From b43c1628d6f9d6933381d5409c1545780276f942 Mon Sep 17 00:00:00 2001 From: SukkaW Date: Thu, 13 Jul 2023 22:18:53 +0800 Subject: [PATCH] Add build step to CDN domainset --- .gitignore | 1 + Build/build-cdn-conf.js | 68 +++++++++++++++++++++++------ Build/build-reject-domainset.js | 31 +++---------- Build/lib/domain-deduper.js | 27 ++++++++++++ Build/lib/should-ignore-line.js | 29 ++++++++++++ Build/lib/trie.js | 20 ++++----- {List => Source}/domainset/cdn.conf | 0 7 files changed, 128 insertions(+), 48 deletions(-) create mode 100644 Build/lib/domain-deduper.js create mode 100644 Build/lib/should-ignore-line.js rename {List => Source}/domainset/cdn.conf (100%) diff --git a/.gitignore b/.gitignore index 643dbab5..8b113c77 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ node_modules public List/domainset/reject.conf +List/domainset/cdn.conf List/domainset/reject_phishing.conf List/domainset/reject_sukka.conf List/domainset/apple_cdn.conf diff --git a/Build/build-cdn-conf.js b/Build/build-cdn-conf.js index 77dc6b6c..1055c3dc 100644 --- a/Build/build-cdn-conf.js +++ b/Build/build-cdn-conf.js @@ -1,18 +1,28 @@ -const { fetchWithRetry } = require('./lib/fetch-retry'); +// @ts-check const fs = require('fs'); const path = require('path'); const { compareAndWriteFile } = require('./lib/string-array-compare'); const { withBannerArray } = require('./lib/with-banner'); const { minifyRules } = require('./lib/minify-rules'); +const { domainDeduper } = require('./lib/domain-deduper'); +const { shouldIgnoreLine } = require('./lib/should-ignore-line'); +const { fetchRemoteTextAndCreateReadlineInterface } = require('./lib/fetch-remote-text-by-line'); + +const readline = require('readline'); (async () => { console.time('Total Time - build-cdn-conf'); - const domains = (await (await fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat')).text()).split('\n'); + /** + * Extract OSS domain from publicsuffix list + * @type {Set} + */ + const S3OSSDomains = new Set(); - const S3OSSDomains = domains.filter(line => { - if (line) { - return ( + for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://publicsuffix.org/list/public_suffix_list.dat')) { + if ( + line + && ( line.startsWith('s3-') || line.startsWith('s3.') ) @@ -20,18 +30,16 @@ const { minifyRules } = require('./lib/minify-rules'); line.endsWith('.amazonaws.com') || line.endsWith('.scw.cloud') ) - && !line.includes('cn-'); + && !line.includes('cn-') + ) { + S3OSSDomains.add(line); } + } - return false; - }); - - const filePath = path.resolve(__dirname, '../Source/non_ip/cdn.conf'); - const resultPath = path.resolve(__dirname, '../List/non_ip/cdn.conf'); - const content = (await fs.promises.readFile(filePath, 'utf-8')) + const content = (await fs.promises.readFile(path.resolve(__dirname, '../Source/non_ip/cdn.conf'), 'utf-8')) .replace( '# --- [AWS S3 Replace Me] ---', - S3OSSDomains.map(domain => `DOMAIN-SUFFIX,${domain}`).join('\n') + Array.from(S3OSSDomains).map(domain => `DOMAIN-SUFFIX,${domain}`).join('\n') ); await compareAndWriteFile( @@ -47,7 +55,39 @@ const { minifyRules } = require('./lib/minify-rules'); new Date(), minifyRules(content.split('\n')) ), - resultPath + path.resolve(__dirname, '../List/non_ip/cdn.conf') + ); + + /** + * Dedupe cdn.conf + */ + /** @type {Set} */ + const cdnDomains = new Set(); + + for await (const line of readline.createInterface({ + input: fs.createReadStream(path.resolve(__dirname, '../Source/domainset/cdn.conf'), 'utf-8'), + crlfDelay: Infinity + })) { + const l = shouldIgnoreLine(line); + if (l) { + cdnDomains.add(l); + } + } + + await compareAndWriteFile( + withBannerArray( + 'Sukka\'s Surge Rules - CDN Domains', + [ + 'License: AGPL 3.0', + 'Homepage: https://ruleset.skk.moe', + 'GitHub: https://github.com/SukkaW/Surge', + '', + 'This file contains object storage and static assets CDN domains.' + ], + new Date(), + minifyRules(domainDeduper(Array.from(cdnDomains))) + ), + path.resolve(__dirname, '../List/domainset/cdn.conf') ); console.timeEnd('Total Time - build-cdn-conf'); diff --git a/Build/build-reject-domainset.js b/Build/build-reject-domainset.js index d55e7199..c9ee2dfb 100644 --- a/Build/build-reject-domainset.js +++ b/Build/build-reject-domainset.js @@ -11,6 +11,7 @@ const Trie = require('./lib/trie'); const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source'); const { withBannerArray } = require('./lib/with-banner'); const { compareAndWriteFile } = require('./lib/string-array-compare'); +const { shouldIgnoreLine } = require('./lib/should-ignore-line'); /** Whitelists */ const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST); @@ -109,19 +110,10 @@ const domainSuffixSet = new Set(); }); for await (const line of rl1) { - if ( - line.startsWith('#') - || line.startsWith(' ') - || line.startsWith('\r') - || line.startsWith('\n') - ) { - continue; + const l = shouldIgnoreLine(line); + if (l) { + domainSets.add(l); } - - const trimmed = line.trim(); - if (trimmed === '') continue; - - domainSets.add(trimmed); } previousSize = domainSets.size - previousSize; @@ -146,19 +138,10 @@ const domainSuffixSet = new Set(); crlfDelay: Infinity }); for await (const line of rl3) { - if ( - line.startsWith('#') - || line.startsWith(' ') - || line.startsWith('\r') - || line.startsWith('\n') - ) { - continue; + const l = shouldIgnoreLine(line); + if (l) { + domainSets.add(l); } - - const trimmed = line.trim(); - if (trimmed === '') continue; - - domainSuffixSet.add(trimmed); } console.log(`Import ${domainKeywordsSet.size} black keywords and ${domainSuffixSet.size} black suffixes!`); diff --git a/Build/lib/domain-deduper.js b/Build/lib/domain-deduper.js new file mode 100644 index 00000000..3ead1333 --- /dev/null +++ b/Build/lib/domain-deduper.js @@ -0,0 +1,27 @@ +const Trie = require('./trie'); + +/** + * @param {string[]} inputDomains + */ +const domainDeduper = (inputDomains) => { + const trie = Trie.from(inputDomains); + const sets = new Set(inputDomains); + + for (let j = 0, len = inputDomains.length; j < len; j++) { + const d = inputDomains[j]; + if (d[0] !== '.') { + continue; + } + + trie.find(d, false).forEach(f => sets.delete(f)); + + const a = d.slice(1); + if (trie.has(a)) { + sets.delete(a); + } + } + + return Array.from(sets); +}; + +module.exports.domainDeduper = domainDeduper; diff --git a/Build/lib/should-ignore-line.js b/Build/lib/should-ignore-line.js new file mode 100644 index 00000000..4e1fbaee --- /dev/null +++ b/Build/lib/should-ignore-line.js @@ -0,0 +1,29 @@ +/* eslint-disable camelcase -- cache index access */ + +/** + * @param {string} line + */ +module.exports.shouldIgnoreLine = (line) => { + if (line === '') { + return null; + } + + const line_0 = line[0]; + + if ( + line_0 === '#' + || line_0 === ' ' + || line_0 === '\r' + || line_0 === '\n' + || line_0 === '!' + ) { + return null; + } + + const trimmed = line.trim(); + if (trimmed === '') { + return null; + } + + return trimmed; +}; diff --git a/Build/lib/trie.js b/Build/lib/trie.js index ff983976..968cb5d6 100644 --- a/Build/lib/trie.js +++ b/Build/lib/trie.js @@ -81,6 +81,7 @@ class Trie { $suffix = suffixStack.pop(); node = nodeStack.pop(); + // eslint-disable-next-line guard-for-in -- plain object for (k in node) { if (k === SENTINEL) { if (includeEqualWithSuffix) { @@ -89,7 +90,6 @@ class Trie { matches.push($suffix); } - continue; } @@ -161,8 +161,9 @@ class Trie { node = node[token]; // Prefix does not exist - if (typeof node === 'undefined') + if (typeof node === 'undefined') { return false; + } // Keeping track of a potential branch to prune if (toPrune !== null) { @@ -170,12 +171,9 @@ class Trie { toPrune = null; tokenToPrune = null; } - } - else { - if (Object.keys(node).length < 2) { - toPrune = parent; - tokenToPrune = token; - } + } else if (Object.keys(node).length < 2) { + toPrune = parent; + tokenToPrune = token; } } @@ -206,8 +204,9 @@ class Trie { token = suffix[i]; node = node[token]; - if (typeof node === 'undefined') + if (typeof node === 'undefined') { return false; + } } return SENTINEL in node; @@ -217,7 +216,7 @@ class Trie { * @return {string[]} */ dump() { - let node = this.root; + const node = this.root; const nodeStack = []; const prefixStack = []; // Resolving initial prefix @@ -238,6 +237,7 @@ class Trie { currentNode = nodeStack.pop(); currentPrefix = prefixStack.pop(); + // eslint-disable-next-line guard-for-in -- plain object for (k in currentNode) { if (k === SENTINEL) { hasValue = true; diff --git a/List/domainset/cdn.conf b/Source/domainset/cdn.conf similarity index 100% rename from List/domainset/cdn.conf rename to Source/domainset/cdn.conf