Add stable domain sort

This commit is contained in:
SukkaW
2023-08-20 16:13:53 +08:00
parent 38475da084
commit 975aa326ef
9 changed files with 149 additions and 65 deletions

View File

@@ -5,6 +5,7 @@ const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
const { processLine } = require('./lib/process-line'); const { processLine } = require('./lib/process-line');
const { withBannerArray } = require('./lib/with-banner'); const { withBannerArray } = require('./lib/with-banner');
const { compareAndWriteFile } = require('./lib/string-array-compare'); const { compareAndWriteFile } = require('./lib/string-array-compare');
const domainSorter = require('./lib/stable-sort-domain');
(async () => { (async () => {
const rl = readFileByLine(path.resolve(__dirname, '../Source/non_ip/domestic.conf')); const rl = readFileByLine(path.resolve(__dirname, '../Source/non_ip/domestic.conf'));
@@ -20,7 +21,7 @@ const { compareAndWriteFile } = require('./lib/string-array-compare');
...Object.entries(DOMESTICS) ...Object.entries(DOMESTICS)
.filter(([key]) => key !== 'SYSTEM') .filter(([key]) => key !== 'SYSTEM')
.flatMap(([, { domains }]) => domains) .flatMap(([, { domains }]) => domains)
.sort() .sort(domainSorter)
.map((domain) => `DOMAIN-SUFFIX,${domain}`) .map((domain) => `DOMAIN-SUFFIX,${domain}`)
); );

View File

@@ -1,9 +1,10 @@
const tldts = require('tldts'); const { parse } = require('tldts');
const { processFilterRules } = require('./lib/parse-filter.js'); const { processFilterRules } = require('./lib/parse-filter.js');
const path = require('path'); const path = require('path');
const { withBannerArray } = require('./lib/with-banner.js'); const { withBannerArray } = require('./lib/with-banner.js');
const { compareAndWriteFile } = require('./lib/string-array-compare'); const { compareAndWriteFile } = require('./lib/string-array-compare');
const { processLine } = require('./lib/process-line.js'); const { processLine } = require('./lib/process-line.js');
const domainSorter = require('./lib/stable-sort-domain');
const WHITELIST_DOMAIN = new Set([ const WHITELIST_DOMAIN = new Set([
'w3s.link', 'w3s.link',
@@ -68,7 +69,9 @@ const BLACK_TLD = new Set([
const domain = line.charCodeAt(0) === 46 ? line.slice(1) : line; const domain = line.charCodeAt(0) === 46 ? line.slice(1) : line;
const apexDomain = tldts.getDomain(domain, { allowPrivateDomains: true }); const parsed = parse(domain, { allowPrivateDomains: true });
const apexDomain = parsed.domain;
if (apexDomain) { if (apexDomain) {
if (WHITELIST_DOMAIN.has(apexDomain)) { if (WHITELIST_DOMAIN.has(apexDomain)) {
@@ -94,7 +97,7 @@ const BLACK_TLD = new Set([
domainCountMap[apexDomain] += (isPhishingDomainMockingAmazon ? 4.5 : 0.5); domainCountMap[apexDomain] += (isPhishingDomainMockingAmazon ? 4.5 : 0.5);
} }
const tld = tldts.getPublicSuffix(domain, { allowPrivateDomains: true }); const tld = parsed.publicSuffix;
if (!tld || !BLACK_TLD.has(tld)) continue; if (!tld || !BLACK_TLD.has(tld)) continue;
domainCountMap[apexDomain] += 1; domainCountMap[apexDomain] += 1;
@@ -114,7 +117,7 @@ const BLACK_TLD = new Set([
} }
if (domainCountMap[apexDomain] < 5) { if (domainCountMap[apexDomain] < 5) {
const subdomain = tldts.getSubdomain(domain, { allowPrivateDomains: true }); const subdomain = parsed.subdomain;
if (subdomain && subdomain.includes('.')) { if (subdomain && subdomain.includes('.')) {
domainCountMap[apexDomain] += 1.5; domainCountMap[apexDomain] += 1.5;
} }
@@ -134,7 +137,7 @@ const BLACK_TLD = new Set([
} }
}); });
results.sort(); results.sort(domainSorter);
await compareAndWriteFile( await compareAndWriteFile(
withBannerArray( withBannerArray(

View File

@@ -4,7 +4,6 @@ const fse = require('fs-extra');
const { resolve: pathResolve } = require('path'); const { resolve: pathResolve } = require('path');
const { processHosts, processFilterRules } = require('./lib/parse-filter'); const { processHosts, processFilterRules } = require('./lib/parse-filter');
const { getDomain } = require('tldts');
const Trie = require('./lib/trie'); const Trie = require('./lib/trie');
const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source'); const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source');
@@ -14,6 +13,7 @@ const { processLine } = require('./lib/process-line');
const { domainDeduper } = require('./lib/domain-deduper'); const { domainDeduper } = require('./lib/domain-deduper');
const createKeywordFilter = require('./lib/aho-corasick'); const createKeywordFilter = require('./lib/aho-corasick');
const { readFileByLine } = require('./lib/fetch-remote-text-by-line'); const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
const domainSorter = require('./lib/stable-sort-domain');
/** Whitelists */ /** Whitelists */
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST); const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
@@ -188,29 +188,8 @@ const domainSuffixSet = new Set();
/** @type {Record<string, number>} */ /** @type {Record<string, number>} */
const rejectDomainsStats = {}; const rejectDomainsStats = {};
const sorter = (a, b) => {
if (a.domain > b.domain) {
return 1;
}
if (a.domain < b.domain) {
return -1;
}
if (a.v > b.v) {
return 1;
}
if (a.v < b.v) {
return -1;
}
return 0;
};
const sortedDomainSets = dudupedDominArray const sortedDomainSets = dudupedDominArray
.map((v) => { .sort(domainSorter);
const domain = getDomain(v[0] === '.' ? v.slice(1) : v) || v;
rejectDomainsStats[domain] = (rejectDomainsStats[domain] || 0) + 1;
return { v, domain };
})
.sort(sorter)
.map((i) => i.v);
await Promise.all([ await Promise.all([
compareAndWriteFile( compareAndWriteFile(

View File

@@ -3,6 +3,7 @@ const { domainDeduper } = require('./lib/domain-deduper');
const path = require('path'); const path = require('path');
const { withBannerArray } = require('./lib/with-banner.js'); const { withBannerArray } = require('./lib/with-banner.js');
const { compareAndWriteFile } = require('./lib/string-array-compare'); const { compareAndWriteFile } = require('./lib/string-array-compare');
const domainSorter = require('./lib/stable-sort-domain');
const { Sema } = require('async-sema'); const { Sema } = require('async-sema');
const s = new Sema(2); const s = new Sema(2);
@@ -100,7 +101,7 @@ const querySpeedtestApi = async (keyword) => {
} }
} }
const reduped = domainDeduper(Array.from(domains)).sort(); const reduped = domainDeduper(Array.from(domains)).sort(domainSorter);
await compareAndWriteFile( await compareAndWriteFile(
withBannerArray( withBannerArray(

View File

@@ -399,16 +399,6 @@ async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdPart
}; };
} }
/**
* @param {string[]} data
*/
function preprocessFullDomainSetBeforeUsedAsWorkerData(data) {
return data
.filter(domain => domain[0] === '.')
.sort((a, b) => a.length - b.length);
}
module.exports.processDomainLists = processDomainLists; module.exports.processDomainLists = processDomainLists;
module.exports.processHosts = processHosts; module.exports.processHosts = processHosts;
module.exports.processFilterRules = processFilterRules; module.exports.processFilterRules = processFilterRules;
module.exports.preprocessFullDomainSetBeforeUsedAsWorkerData = preprocessFullDomainSetBeforeUsedAsWorkerData;

View File

@@ -0,0 +1,104 @@
// @ts-check
const tldts = require('tldts');
const cache1 = Object.create(null);
/**
* @param {string} url
* @returns {ReturnType<typeof tldts.parse>}
*/
// eslint-disable-next-line no-return-assign -- cache
const parse = (url) => (cache1[url] ||= tldts.parse(url, { allowPrivateDomains: true }));
/**
* @param {string} a
* @param {string} b
* @returns {0 | 1 | -1}
*/
const domainSorter = (a, b) => {
if (a === b) return 0;
const aParsed = parse(a);
const bParsed = parse(b);
const aSuffix = aParsed.publicSuffix;
const bSuffix = bParsed.publicSuffix;
if (bSuffix !== aSuffix) {
if (bSuffix == null) {
return 1;
}
if (aSuffix == null) {
return -1;
}
for (let i = 0, l = aSuffix.length; i < l; i++) {
if (bSuffix[i] == null) {
return 1;
}
if (aSuffix[i] < bSuffix[i]) {
return -1;
}
if (aSuffix[i] > bSuffix[i]) {
return 1;
}
}
}
const aDomainWithoutSuffix = aParsed.domainWithoutSuffix;
const bDomainWithoutSuffix = bParsed.domainWithoutSuffix;
if (aDomainWithoutSuffix !== bDomainWithoutSuffix) {
if (bDomainWithoutSuffix == null) {
return 1;
}
if (aDomainWithoutSuffix == null) {
return -1;
}
for (let i = 0, l = aDomainWithoutSuffix.length; i < l; i++) {
if (bDomainWithoutSuffix[i] == null) {
return 1;
}
if (aDomainWithoutSuffix[i] < bDomainWithoutSuffix[i]) {
return -1;
}
if (aDomainWithoutSuffix[i] > bDomainWithoutSuffix[i]) {
return 1;
}
}
}
const aSubdomain = aParsed.subdomain;
const bSubdomain = bParsed.subdomain;
if (aSubdomain !== bSubdomain) {
if (bSubdomain == null) {
return 1;
}
if (aSubdomain == null) {
return -1;
}
for (let i = 0, l = aSubdomain.length; i < l; i++) {
if (bSubdomain[i] == null) {
return 1;
}
if (aSubdomain[i] < bSubdomain[i]) {
return -1;
}
if (aSubdomain[i] > bSubdomain[i]) {
return 1;
}
}
}
return 0;
};
module.exports = domainSorter;

View File

@@ -2,21 +2,23 @@
.1fichier.info .1fichier.info
.nitro.download .nitro.download
# >> SourceForge # Microsoft .NET Runtime
download.visualstudio.microsoft.com
# SourceForge
downloads.sourceforge.net downloads.sourceforge.net
.dl.sourceforge.net .dl.sourceforge.net
# >> Atlassian # Atlassian
product-downloads.atlassian.com product-downloads.atlassian.com
# >> Mokee # Mokee
.download.mokeedev.com .download.mokeedev.com
# >> Pixel Experience # Pixel Experience
get.pixelexperience.org get.pixelexperience.org
download.pixelexperience.org download.pixelexperience.org
# >> MEGA # MEGA
.mega.nz .mega.nz
.mega.io .mega.io
.mega.co.nz .mega.co.nz
# >> Filen # Filen
down.filen.net down.filen.net
down.filen-1.net down.filen-1.net
down.filen-2.net down.filen-2.net
@@ -24,44 +26,44 @@ down.filen-3.net
down.filen-4.net down.filen-4.net
down.filen-5.net down.filen-5.net
down.filen.io down.filen.io
# >> APKMirror # APKMirror
downloadr2.apkmirror.com downloadr2.apkmirror.com
# >> Parallels, Inc. # Parallels, Inc.
download.parallels.com download.parallels.com
# >> OrbStack # OrbStack
cdn-updates.orbstack.dev cdn-updates.orbstack.dev
# >> VSCode # VSCode
update.code.visualstudio.com update.code.visualstudio.com
download.visualstudio.microsoft.com download.visualstudio.microsoft.com
az764295.vo.msecnd.net az764295.vo.msecnd.net
# >> XMind # XMind
dl2.xmind.net dl2.xmind.net
dl3.xmind.net dl3.xmind.net
# >> PostMan # PostMan
dl.pstmn.io dl.pstmn.io
# >> Surge # Surge
dl.nssurge.com dl.nssurge.com
# >> Docker # Docker
desktop.docker.com desktop.docker.com
# >> Setapp # Setapp
dl.devmate.com dl.devmate.com
store.setapp.com store.setapp.com
# >> Parsec # Parsec
builds.parsec.app builds.parsec.app
# >> Sketch # Sketch
download.sketch.com download.sketch.com
# >> Wireshark # Wireshark
.dl.wireshark.org .dl.wireshark.org
# >> Mozilla # Mozilla
download.mozilla.org download.mozilla.org
# >> AnyDesk # AnyDesk
download.anydesk.com download.anydesk.com
# >> Arc # Arc
releases.arc.net releases.arc.net
# >> App Uninstaller & Cleaner # App Uninstaller & Cleaner
download.nektony.com download.nektony.com
# >> Beeper # Beeper
download.beeper.com download.beeper.com
download.todesktop.com download.todesktop.com
# >> Motrix # Motrix
dl.motrix.app dl.motrix.app

View File

@@ -317,6 +317,8 @@ amp.azure.net
# >> CodeSandbox # >> CodeSandbox
uploads.codesandbox.io uploads.codesandbox.io
screenshots.codesandbox.io
prod-packager-packages.codesandbox.io
pkg.csb.dev pkg.csb.dev
# Sandpack # Sandpack
sandpack-cdn-staging.blazingly.io sandpack-cdn-staging.blazingly.io

View File

@@ -57,4 +57,6 @@ DOMAIN-SUFFIX,s3.us-west-2.amazonaws.com
DOMAIN-KEYWORD,web-assets.zendesk DOMAIN-KEYWORD,web-assets.zendesk
# >> Cloudinary # >> Cloudinary
DOMAIN-KEYWORD,-res.cloudinary.com DOMAIN-KEYWORD,-res.cloudinary.com
# >> Algolia
DOMAIN-KEYWORD,dsn.algolia.net
# --- [AWS S3 Replace Me] --- # --- [AWS S3 Replace Me] ---