From 685427472b8361baf2d6bf295f68c94d91450bd9 Mon Sep 17 00:00:00 2001 From: SukkaW Date: Thu, 13 Jul 2023 22:31:27 +0800 Subject: [PATCH] Refactor build scripts --- Build/build-cdn-conf.js | 4 +-- Build/build-chn-cidr.js | 25 +++++++++---------- Build/build-internal-rules.js | 5 ++-- Build/build-reject-domainset.js | 6 ++--- Build/lib/parse-filter.js | 24 ++++++++---------- ...{should-ignore-line.js => process-line.js} | 7 ++++-- 6 files changed, 35 insertions(+), 36 deletions(-) rename Build/lib/{should-ignore-line.js => process-line.js} (72%) diff --git a/Build/build-cdn-conf.js b/Build/build-cdn-conf.js index 1055c3dc..e0306839 100644 --- a/Build/build-cdn-conf.js +++ b/Build/build-cdn-conf.js @@ -5,7 +5,7 @@ const { compareAndWriteFile } = require('./lib/string-array-compare'); const { withBannerArray } = require('./lib/with-banner'); const { minifyRules } = require('./lib/minify-rules'); const { domainDeduper } = require('./lib/domain-deduper'); -const { shouldIgnoreLine } = require('./lib/should-ignore-line'); +const { processLine } = require('./lib/process-line'); const { fetchRemoteTextAndCreateReadlineInterface } = require('./lib/fetch-remote-text-by-line'); const readline = require('readline'); @@ -68,7 +68,7 @@ const readline = require('readline'); input: fs.createReadStream(path.resolve(__dirname, '../Source/domainset/cdn.conf'), 'utf-8'), crlfDelay: Infinity })) { - const l = shouldIgnoreLine(line); + const l = processLine(line); if (l) { cdnDomains.add(l); } diff --git a/Build/build-chn-cidr.js b/Build/build-chn-cidr.js index 329dddba..b9fdbc0e 100644 --- a/Build/build-chn-cidr.js +++ b/Build/build-chn-cidr.js @@ -1,25 +1,24 @@ -const { fetchWithRetry } = require('./lib/fetch-retry'); +const { fetchRemoteTextAndCreateReadlineInterface } = require('./lib/fetch-remote-text-by-line'); const { withBannerArray } = require('./lib/with-banner'); const { resolve: pathResolve } = require('path'); const { compareAndWriteFile } = require('./lib/string-array-compare'); +const { processLine } = require('./lib/process-line'); (async () => { console.time('Total Time - build-chnroutes-cidr'); + const { merge: mergeCidrs } = await import('cidr-tools'); - const [rawCidr, { merge: mergeCidrs }] = await Promise.all([ - (await fetchWithRetry('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')).text(), - import('cidr-tools') - ]); - const cidr = rawCidr.split('\n'); - - console.log('Before Merge:', cidr.length); - const filteredCidr = mergeCidrs(cidr.filter(line => { - if (line) { - return !line.startsWith('#'); + /** @type {Set} */ + const cidr = new Set(); + for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')) { + const l = processLine(line); + if (l) { + cidr.add(l); } + } - return false; - })); + console.log('Before Merge:', cidr.size); + const filteredCidr = mergeCidrs(Array.from(cidr)); console.log('After Merge:', filteredCidr.length); await compareAndWriteFile( diff --git a/Build/build-internal-rules.js b/Build/build-internal-rules.js index 43b302cf..8069fee8 100644 --- a/Build/build-internal-rules.js +++ b/Build/build-internal-rules.js @@ -5,6 +5,7 @@ const path = require('path'); const readline = require('readline'); const { isDomainLoose } = require('./lib/is-domain-loose'); const tldts = require('tldts'); +const { processLine } = require('./lib/process-line'); (async () => { const set = new Set(); @@ -32,7 +33,7 @@ const tldts = require('tldts'); addApexDomain(line.slice(1)); } else if (isDomainLoose(line)) { addApexDomain(line); - } else if (!line.startsWith('#') && line.trim() !== '') { + } else if (processLine(line)) { console.warn('[drop line from domainset]', line); } } @@ -52,7 +53,7 @@ const tldts = require('tldts'); addApexDomain(line.replace('DOMAIN-SUFFIX,', '')); } else if (line.startsWith('DOMAIN,')) { addApexDomain(line.replace('DOMAIN,', '')); - } else if (!line.startsWith('#') && line.trim() !== '') { + } else if (processLine(line)) { console.warn('[drop line from ruleset]', line); } } diff --git a/Build/build-reject-domainset.js b/Build/build-reject-domainset.js index c9ee2dfb..7e588359 100644 --- a/Build/build-reject-domainset.js +++ b/Build/build-reject-domainset.js @@ -11,7 +11,7 @@ const Trie = require('./lib/trie'); const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source'); const { withBannerArray } = require('./lib/with-banner'); const { compareAndWriteFile } = require('./lib/string-array-compare'); -const { shouldIgnoreLine } = require('./lib/should-ignore-line'); +const { processLine } = require('./lib/process-line'); /** Whitelists */ const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST); @@ -110,7 +110,7 @@ const domainSuffixSet = new Set(); }); for await (const line of rl1) { - const l = shouldIgnoreLine(line); + const l = processLine(line); if (l) { domainSets.add(l); } @@ -138,7 +138,7 @@ const domainSuffixSet = new Set(); crlfDelay: Infinity }); for await (const line of rl3) { - const l = shouldIgnoreLine(line); + const l = processLine(line); if (l) { domainSets.add(l); } diff --git a/Build/lib/parse-filter.js b/Build/lib/parse-filter.js index 6b7a0892..4d598ddb 100644 --- a/Build/lib/parse-filter.js +++ b/Build/lib/parse-filter.js @@ -3,6 +3,7 @@ const { fetchWithRetry } = require('./fetch-retry'); const { fetchRemoteTextAndCreateReadlineInterface } = require('./fetch-remote-text-by-line'); const { NetworkFilter } = require('@cliqz/adblocker'); const { normalizeDomain } = require('./is-domain-loose'); +const { processLine } = require('./process-line'); const DEBUG_DOMAIN_TO_FIND = null; // example.com | null let foundDebugDomain = false; @@ -31,18 +32,14 @@ async function processDomainLists(domainListsUrl) { const rl = await fetchRemoteTextAndCreateReadlineInterface(domainListsUrl); for await (const line of rl) { - if ( - line.startsWith('#') - || line.startsWith('!') - || line.startsWith(' ') - || line === '' - || line.startsWith('\r') - || line.startsWith('\n') - ) { + if (line.startsWith('!')) { continue; } - const domainToAdd = line.trim(); + const domainToAdd = processLine(line); + if (!domainToAdd) { + continue; + } if (DEBUG_DOMAIN_TO_FIND && domainToAdd.includes(DEBUG_DOMAIN_TO_FIND)) { warnOnce(domainListsUrl.toString(), false, DEBUG_DOMAIN_TO_FIND); @@ -69,13 +66,12 @@ async function processHosts(hostsUrl, includeAllSubDomain = false) { const domainSets = new Set(); const rl = await fetchRemoteTextAndCreateReadlineInterface(hostsUrl); - for await (const line of rl) { - if (line.includes('#')) { - continue; - } - if (line.startsWith(' ') || line.startsWith('\r') || line.startsWith('\n') || line.trim() === '') { + for await (const _line of rl) { + const line = processLine(_line); + if (!line) { continue; } + const [, ...domains] = line.split(' '); const _domain = domains.join(' ').trim(); diff --git a/Build/lib/should-ignore-line.js b/Build/lib/process-line.js similarity index 72% rename from Build/lib/should-ignore-line.js rename to Build/lib/process-line.js index 4e1fbaee..39012be5 100644 --- a/Build/lib/should-ignore-line.js +++ b/Build/lib/process-line.js @@ -1,10 +1,13 @@ /* eslint-disable camelcase -- cache index access */ /** + * If line is commented out or empty, return null. + * Otherwise, return trimmed line. + * * @param {string} line */ -module.exports.shouldIgnoreLine = (line) => { - if (line === '') { +module.exports.processLine = (line) => { + if (!line) { return null; }