From 975aa326ef6f4a5542fd19a37879c90d08a37eac Mon Sep 17 00:00:00 2001 From: SukkaW Date: Sun, 20 Aug 2023 16:13:53 +0800 Subject: [PATCH] Add stable domain sort --- Build/build-domestic-ruleset.js | 3 +- Build/build-phishing-domainset.js | 15 +++-- Build/build-reject-domainset.js | 25 +------ Build/build-speedtest-domainset.js | 3 +- Build/lib/parse-filter.js | 10 --- Build/lib/stable-sort-domain.js | 104 +++++++++++++++++++++++++++++ List/domainset/download.conf | 50 +++++++------- Source/domainset/cdn.conf | 2 + Source/non_ip/cdn.conf | 2 + 9 files changed, 149 insertions(+), 65 deletions(-) create mode 100644 Build/lib/stable-sort-domain.js diff --git a/Build/build-domestic-ruleset.js b/Build/build-domestic-ruleset.js index 0810f98e..5d7a031e 100644 --- a/Build/build-domestic-ruleset.js +++ b/Build/build-domestic-ruleset.js @@ -5,6 +5,7 @@ const { readFileByLine } = require('./lib/fetch-remote-text-by-line'); const { processLine } = require('./lib/process-line'); const { withBannerArray } = require('./lib/with-banner'); const { compareAndWriteFile } = require('./lib/string-array-compare'); +const domainSorter = require('./lib/stable-sort-domain'); (async () => { const rl = readFileByLine(path.resolve(__dirname, '../Source/non_ip/domestic.conf')); @@ -20,7 +21,7 @@ const { compareAndWriteFile } = require('./lib/string-array-compare'); ...Object.entries(DOMESTICS) .filter(([key]) => key !== 'SYSTEM') .flatMap(([, { domains }]) => domains) - .sort() + .sort(domainSorter) .map((domain) => `DOMAIN-SUFFIX,${domain}`) ); diff --git a/Build/build-phishing-domainset.js b/Build/build-phishing-domainset.js index 998d8007..096cb3dd 100644 --- a/Build/build-phishing-domainset.js +++ b/Build/build-phishing-domainset.js @@ -1,9 +1,10 @@ -const tldts = require('tldts'); +const { parse } = require('tldts'); const { processFilterRules } = require('./lib/parse-filter.js'); const path = require('path'); const { withBannerArray } = require('./lib/with-banner.js'); const { compareAndWriteFile } = require('./lib/string-array-compare'); const { processLine } = require('./lib/process-line.js'); +const domainSorter = require('./lib/stable-sort-domain'); const WHITELIST_DOMAIN = new Set([ 'w3s.link', @@ -13,7 +14,7 @@ const WHITELIST_DOMAIN = new Set([ 'business.site', 'page.link', // Firebase URL Shortener 'fleek.cool', - 'notion.site' + 'notion.site' ]); const BLACK_TLD = new Set([ 'xyz', @@ -68,7 +69,9 @@ const BLACK_TLD = new Set([ const domain = line.charCodeAt(0) === 46 ? line.slice(1) : line; - const apexDomain = tldts.getDomain(domain, { allowPrivateDomains: true }); + const parsed = parse(domain, { allowPrivateDomains: true }); + + const apexDomain = parsed.domain; if (apexDomain) { if (WHITELIST_DOMAIN.has(apexDomain)) { @@ -94,7 +97,7 @@ const BLACK_TLD = new Set([ domainCountMap[apexDomain] += (isPhishingDomainMockingAmazon ? 4.5 : 0.5); } - const tld = tldts.getPublicSuffix(domain, { allowPrivateDomains: true }); + const tld = parsed.publicSuffix; if (!tld || !BLACK_TLD.has(tld)) continue; domainCountMap[apexDomain] += 1; @@ -114,7 +117,7 @@ const BLACK_TLD = new Set([ } if (domainCountMap[apexDomain] < 5) { - const subdomain = tldts.getSubdomain(domain, { allowPrivateDomains: true }); + const subdomain = parsed.subdomain; if (subdomain && subdomain.includes('.')) { domainCountMap[apexDomain] += 1.5; } @@ -134,7 +137,7 @@ const BLACK_TLD = new Set([ } }); - results.sort(); + results.sort(domainSorter); await compareAndWriteFile( withBannerArray( diff --git a/Build/build-reject-domainset.js b/Build/build-reject-domainset.js index c823db2d..264e4200 100644 --- a/Build/build-reject-domainset.js +++ b/Build/build-reject-domainset.js @@ -4,7 +4,6 @@ const fse = require('fs-extra'); const { resolve: pathResolve } = require('path'); const { processHosts, processFilterRules } = require('./lib/parse-filter'); -const { getDomain } = require('tldts'); const Trie = require('./lib/trie'); const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source'); @@ -14,6 +13,7 @@ const { processLine } = require('./lib/process-line'); const { domainDeduper } = require('./lib/domain-deduper'); const createKeywordFilter = require('./lib/aho-corasick'); const { readFileByLine } = require('./lib/fetch-remote-text-by-line'); +const domainSorter = require('./lib/stable-sort-domain'); /** Whitelists */ const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST); @@ -188,29 +188,8 @@ const domainSuffixSet = new Set(); /** @type {Record} */ const rejectDomainsStats = {}; - const sorter = (a, b) => { - if (a.domain > b.domain) { - return 1; - } - if (a.domain < b.domain) { - return -1; - } - if (a.v > b.v) { - return 1; - } - if (a.v < b.v) { - return -1; - } - return 0; - }; const sortedDomainSets = dudupedDominArray - .map((v) => { - const domain = getDomain(v[0] === '.' ? v.slice(1) : v) || v; - rejectDomainsStats[domain] = (rejectDomainsStats[domain] || 0) + 1; - return { v, domain }; - }) - .sort(sorter) - .map((i) => i.v); + .sort(domainSorter); await Promise.all([ compareAndWriteFile( diff --git a/Build/build-speedtest-domainset.js b/Build/build-speedtest-domainset.js index 6215d031..00b1b8a2 100644 --- a/Build/build-speedtest-domainset.js +++ b/Build/build-speedtest-domainset.js @@ -3,6 +3,7 @@ const { domainDeduper } = require('./lib/domain-deduper'); const path = require('path'); const { withBannerArray } = require('./lib/with-banner.js'); const { compareAndWriteFile } = require('./lib/string-array-compare'); +const domainSorter = require('./lib/stable-sort-domain'); const { Sema } = require('async-sema'); const s = new Sema(2); @@ -100,7 +101,7 @@ const querySpeedtestApi = async (keyword) => { } } - const reduped = domainDeduper(Array.from(domains)).sort(); + const reduped = domainDeduper(Array.from(domains)).sort(domainSorter); await compareAndWriteFile( withBannerArray( diff --git a/Build/lib/parse-filter.js b/Build/lib/parse-filter.js index 787adf9d..9124683e 100644 --- a/Build/lib/parse-filter.js +++ b/Build/lib/parse-filter.js @@ -399,16 +399,6 @@ async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdPart }; } -/** - * @param {string[]} data - */ -function preprocessFullDomainSetBeforeUsedAsWorkerData(data) { - return data - .filter(domain => domain[0] === '.') - .sort((a, b) => a.length - b.length); -} - module.exports.processDomainLists = processDomainLists; module.exports.processHosts = processHosts; module.exports.processFilterRules = processFilterRules; -module.exports.preprocessFullDomainSetBeforeUsedAsWorkerData = preprocessFullDomainSetBeforeUsedAsWorkerData; diff --git a/Build/lib/stable-sort-domain.js b/Build/lib/stable-sort-domain.js new file mode 100644 index 00000000..09fdb4c5 --- /dev/null +++ b/Build/lib/stable-sort-domain.js @@ -0,0 +1,104 @@ +// @ts-check +const tldts = require('tldts'); + +const cache1 = Object.create(null); +/** + * @param {string} url + * @returns {ReturnType} + */ +// eslint-disable-next-line no-return-assign -- cache +const parse = (url) => (cache1[url] ||= tldts.parse(url, { allowPrivateDomains: true })); + +/** + * @param {string} a + * @param {string} b + * @returns {0 | 1 | -1} + */ +const domainSorter = (a, b) => { + if (a === b) return 0; + + const aParsed = parse(a); + const bParsed = parse(b); + + const aSuffix = aParsed.publicSuffix; + const bSuffix = bParsed.publicSuffix; + + if (bSuffix !== aSuffix) { + if (bSuffix == null) { + return 1; + } + if (aSuffix == null) { + return -1; + } + + for (let i = 0, l = aSuffix.length; i < l; i++) { + if (bSuffix[i] == null) { + return 1; + } + + if (aSuffix[i] < bSuffix[i]) { + return -1; + } + + if (aSuffix[i] > bSuffix[i]) { + return 1; + } + } + } + + const aDomainWithoutSuffix = aParsed.domainWithoutSuffix; + const bDomainWithoutSuffix = bParsed.domainWithoutSuffix; + + if (aDomainWithoutSuffix !== bDomainWithoutSuffix) { + if (bDomainWithoutSuffix == null) { + return 1; + } + if (aDomainWithoutSuffix == null) { + return -1; + } + + for (let i = 0, l = aDomainWithoutSuffix.length; i < l; i++) { + if (bDomainWithoutSuffix[i] == null) { + return 1; + } + + if (aDomainWithoutSuffix[i] < bDomainWithoutSuffix[i]) { + return -1; + } + + if (aDomainWithoutSuffix[i] > bDomainWithoutSuffix[i]) { + return 1; + } + } + } + + const aSubdomain = aParsed.subdomain; + const bSubdomain = bParsed.subdomain; + + if (aSubdomain !== bSubdomain) { + if (bSubdomain == null) { + return 1; + } + if (aSubdomain == null) { + return -1; + } + + for (let i = 0, l = aSubdomain.length; i < l; i++) { + if (bSubdomain[i] == null) { + return 1; + } + + if (aSubdomain[i] < bSubdomain[i]) { + return -1; + } + + if (aSubdomain[i] > bSubdomain[i]) { + return 1; + } + } + } + + return 0; +}; + +module.exports = domainSorter; diff --git a/List/domainset/download.conf b/List/domainset/download.conf index decdde8a..acd6ea62 100644 --- a/List/domainset/download.conf +++ b/List/domainset/download.conf @@ -2,21 +2,23 @@ .1fichier.info .nitro.download -# >> SourceForge +# Microsoft .NET Runtime +download.visualstudio.microsoft.com +# SourceForge downloads.sourceforge.net .dl.sourceforge.net -# >> Atlassian +# Atlassian product-downloads.atlassian.com -# >> Mokee +# Mokee .download.mokeedev.com -# >> Pixel Experience +# Pixel Experience get.pixelexperience.org download.pixelexperience.org -# >> MEGA +# MEGA .mega.nz .mega.io .mega.co.nz -# >> Filen +# Filen down.filen.net down.filen-1.net down.filen-2.net @@ -24,44 +26,44 @@ down.filen-3.net down.filen-4.net down.filen-5.net down.filen.io -# >> APKMirror +# APKMirror downloadr2.apkmirror.com -# >> Parallels, Inc. +# Parallels, Inc. download.parallels.com -# >> OrbStack +# OrbStack cdn-updates.orbstack.dev -# >> VSCode +# VSCode update.code.visualstudio.com download.visualstudio.microsoft.com az764295.vo.msecnd.net -# >> XMind +# XMind dl2.xmind.net dl3.xmind.net -# >> PostMan +# PostMan dl.pstmn.io -# >> Surge +# Surge dl.nssurge.com -# >> Docker +# Docker desktop.docker.com -# >> Setapp +# Setapp dl.devmate.com store.setapp.com -# >> Parsec +# Parsec builds.parsec.app -# >> Sketch +# Sketch download.sketch.com -# >> Wireshark +# Wireshark .dl.wireshark.org -# >> Mozilla +# Mozilla download.mozilla.org -# >> AnyDesk +# AnyDesk download.anydesk.com -# >> Arc +# Arc releases.arc.net -# >> App Uninstaller & Cleaner +# App Uninstaller & Cleaner download.nektony.com -# >> Beeper +# Beeper download.beeper.com download.todesktop.com -# >> Motrix +# Motrix dl.motrix.app diff --git a/Source/domainset/cdn.conf b/Source/domainset/cdn.conf index 9e12179f..c87b7795 100644 --- a/Source/domainset/cdn.conf +++ b/Source/domainset/cdn.conf @@ -317,6 +317,8 @@ amp.azure.net # >> CodeSandbox uploads.codesandbox.io +screenshots.codesandbox.io +prod-packager-packages.codesandbox.io pkg.csb.dev # Sandpack sandpack-cdn-staging.blazingly.io diff --git a/Source/non_ip/cdn.conf b/Source/non_ip/cdn.conf index bdb02e59..8d5417d2 100644 --- a/Source/non_ip/cdn.conf +++ b/Source/non_ip/cdn.conf @@ -57,4 +57,6 @@ DOMAIN-SUFFIX,s3.us-west-2.amazonaws.com DOMAIN-KEYWORD,web-assets.zendesk # >> Cloudinary DOMAIN-KEYWORD,-res.cloudinary.com +# >> Algolia +DOMAIN-KEYWORD,dsn.algolia.net # --- [AWS S3 Replace Me] ---