From d5850aa84b1a3792c55a51bd91559fc1428ff264 Mon Sep 17 00:00:00 2001 From: SukkaW Date: Fri, 15 Sep 2023 22:35:46 +0800 Subject: [PATCH] Perf: speed up build --- Build/build-anti-bogus-domain.js | 13 ++- Build/build-apple-cdn.js | 2 +- Build/build-cdn-conf.js | 14 ++- Build/build-chn-cidr.js | 20 ++-- Build/build-common.js | 6 +- Build/build-domestic-ruleset.js | 19 +--- Build/build-internal-cdn-rules.js | 15 ++- Build/build-internal-chn-domains.js | 2 +- Build/build-internal-reverse-chn-cidr.js | 20 ++-- Build/build-phishing-domainset.js | 127 +++++++++++++---------- Build/build-public.js | 2 +- Build/build-reject-domainset.js | 2 +- Build/build-speedtest-domainset.js | 10 +- Build/build-telegram-cidr.js | 18 ++-- Build/download-previous-build.js | 11 +- Build/index.js | 38 ++++++- Build/lib/cached-tld-parse.js | 6 +- Build/lib/get-gorhill-publicsuffix.js | 16 ++- Build/lib/is-domain-loose.js | 24 ----- Build/lib/parse-filter.js | 18 +++- Build/lib/process-line.js | 18 +++- Build/lib/trace-runner.js | 9 +- Build/validate-domainset.js | 15 +-- 23 files changed, 241 insertions(+), 184 deletions(-) delete mode 100644 Build/lib/is-domain-loose.js diff --git a/Build/build-anti-bogus-domain.js b/Build/build-anti-bogus-domain.js index 0375e587..d8eae079 100644 --- a/Build/build-anti-bogus-domain.js +++ b/Build/build-anti-bogus-domain.js @@ -6,7 +6,7 @@ const { fetchRemoteTextAndCreateReadlineInterface, readFileByLine } = require('. const { processLine } = require('./lib/process-line'); const { task } = require('./lib/trace-runner'); -const buildAntiBogusDomain = task(__filename, async () => { +const getBogusNxDomainIPs = async () => { /** @type {string[]} */ const res = []; for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf')) { @@ -14,20 +14,27 @@ const buildAntiBogusDomain = task(__filename, async () => { res.push(line.replace('bogus-nxdomain=', '')); } } + return res; +}; +const buildAntiBogusDomain = task(__filename, async () => { const filePath = path.resolve(__dirname, '../Source/ip/reject.conf'); + const bogusIpPromise = getBogusNxDomainIPs(); + /** @type {string[]} */ const result = []; for await (const line of readFileByLine(filePath)) { if (line === '# --- [Anti Bogus Domain Replace Me] ---') { - res.forEach(ip => { + (await bogusIpPromise).forEach(ip => { if (isIPv4(ip)) { result.push(`IP-CIDR,${ip}/32,no-resolve`); } else if (isIPv6(ip)) { result.push(`IP-CIDR6,${ip}/128,no-resolve`); } }); + + continue; } else { const l = processLine(line); if (l) { @@ -47,7 +54,7 @@ const buildAntiBogusDomain = task(__filename, async () => { ' - https://github.com/felixonmars/dnsmasq-china-list' ]; - await Promise.all(createRuleset( + return Promise.all(createRuleset( 'Sukka\'s Ruleset - Anti Bogus Domain', description, new Date(), diff --git a/Build/build-apple-cdn.js b/Build/build-apple-cdn.js index 0a1d3866..f6449462 100644 --- a/Build/build-apple-cdn.js +++ b/Build/build-apple-cdn.js @@ -20,7 +20,7 @@ const buildAppleCdn = task(__filename, async () => { const ruleset = res.map(domain => `DOMAIN-SUFFIX,${domain}`); const domainset = res.map(i => `.${i}`); - await Promise.all([ + return Promise.all([ ...createRuleset( 'Sukka\'s Ruleset - Apple CDN', description, diff --git a/Build/build-cdn-conf.js b/Build/build-cdn-conf.js index 9a3b14f3..e3966bd3 100644 --- a/Build/build-cdn-conf.js +++ b/Build/build-cdn-conf.js @@ -9,7 +9,7 @@ const { processLine } = require('./lib/process-line'); const publicSuffixPath = path.resolve(__dirname, '../node_modules/.cache/public_suffix_list_dat.txt'); -const buildCdnConf = task(__filename, async () => { +const getS3OSSDomains = async () => { const trie = new Trie(); if (fs.existsSync(publicSuffixPath)) { @@ -46,13 +46,19 @@ const buildCdnConf = task(__filename, async () => { } }); + return S3OSSDomains; +}; + +const buildCdnConf = task(__filename, async () => { /** @type {string[]} */ const cdnDomainsList = []; + + const getS3OSSDomainsPromise = getS3OSSDomains(); + for await (const l of readFileByLine(path.resolve(__dirname, '../Source/non_ip/cdn.conf'))) { if (l === '# --- [AWS S3 Replace Me] ---') { - S3OSSDomains.forEach(domain => { - cdnDomainsList.push(`DOMAIN-SUFFIX,${domain}`); - }); + (await getS3OSSDomainsPromise).forEach(domain => { cdnDomainsList.push(`DOMAIN-SUFFIX,${domain}`); }); + continue; } const line = processLine(l); if (line) { diff --git a/Build/build-chn-cidr.js b/Build/build-chn-cidr.js index 3b71b42d..6a3be632 100644 --- a/Build/build-chn-cidr.js +++ b/Build/build-chn-cidr.js @@ -3,7 +3,7 @@ const { fetchRemoteTextAndCreateReadlineInterface } = require('./lib/fetch-remot const { resolve: pathResolve } = require('path'); // This should not use `createRuleset` API since we are going to generate ipcidr for Clash const { compareAndWriteFile, withBannerArray } = require('./lib/create-file'); -const { processLine } = require('./lib/process-line'); +const { processLineFromReadline } = require('./lib/process-line'); const { task } = require('./lib/trace-runner'); // https://github.com/misakaio/chnroutes2/issues/25 @@ -13,20 +13,12 @@ const EXCLUDE_CIDRS = [ ]; const buildChnCidr = task(__filename, async () => { - const { exclude: excludeCidrs } = await import('cidr-tools-wasm'); + const [{ exclude: excludeCidrs }, cidr] = await Promise.all([ + import('cidr-tools-wasm'), + processLineFromReadline(await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')) + ]); - /** @type {string[]} */ - const cidr = []; - for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')) { - const l = processLine(line); - if (l) { - cidr.push(l); - } - } - - console.log('Before Merge:', cidr.length); const filteredCidr = excludeCidrs(cidr, EXCLUDE_CIDRS, true); - console.log('After Merge:', filteredCidr.length); const description = [ 'License: CC BY-SA 2.0', @@ -36,7 +28,7 @@ const buildChnCidr = task(__filename, async () => { 'Data from https://misaka.io (misakaio @ GitHub)' ]; - await Promise.all([ + return Promise.all([ compareAndWriteFile( withBannerArray( 'Sukka\'s Ruleset - Mainland China IPv4 CIDR', diff --git a/Build/build-common.js b/Build/build-common.js index 8a6ce775..22c19d42 100644 --- a/Build/build-common.js +++ b/Build/build-common.js @@ -17,7 +17,7 @@ const outputSurgeDir = path.resolve(__dirname, '../List'); const outputClashDir = path.resolve(__dirname, '../Clash'); const buildCommon = task(__filename, async () => { - /** @type {Promise[]} */ + /** @type {Promise[]} */ const promises = []; const pw = new PathScurry(sourceDir); @@ -107,7 +107,7 @@ async function transformDomainset(sourcePath, relativePath) { ) ]; - await Promise.all(createRuleset( + return Promise.all(createRuleset( title, description, new Date(), @@ -140,7 +140,7 @@ async function transformRuleset(sourcePath, relativePath) { ) ]; - await Promise.all(createRuleset( + return Promise.all(createRuleset( title, description, new Date(), diff --git a/Build/build-domestic-ruleset.js b/Build/build-domestic-ruleset.js index 105c1214..ed446a05 100644 --- a/Build/build-domestic-ruleset.js +++ b/Build/build-domestic-ruleset.js @@ -2,33 +2,22 @@ const path = require('path'); const { DOMESTICS } = require('../Source/non_ip/domestic'); const { readFileByLine } = require('./lib/fetch-remote-text-by-line'); -const { processLine } = require('./lib/process-line'); +const { processLineFromReadline } = require('./lib/process-line'); const { compareAndWriteFile, createRuleset } = require('./lib/create-file'); -const domainSorter = require('./lib/stable-sort-domain'); const { task } = require('./lib/trace-runner'); const buildDomesticRuleset = task(__filename, async () => { - const rl = readFileByLine(path.resolve(__dirname, '../Source/non_ip/domestic.conf')); - const results = []; - for await (const l of rl) { - const line = processLine(l); - if (line) { - results.push(line); - } - } + const results = await processLineFromReadline(readFileByLine(path.resolve(__dirname, '../Source/non_ip/domestic.conf'))); results.push( ...Object.entries(DOMESTICS) .reduce( (acc, [key, { domains }]) => { - if (key === 'SYSTEM') { - return acc; - } + if (key === 'SYSTEM') return acc; return [...acc, ...domains]; }, /** @type {string[]} */([]) ) - .sort(domainSorter) .map((domain) => `DOMAIN-SUFFIX,${domain}`) ); @@ -40,7 +29,7 @@ const buildDomesticRuleset = task(__filename, async () => { 'This file contains known addresses that are avaliable in the Mainland China.' ]; - await Promise.all([ + return Promise.all([ ...createRuleset( 'Sukka\'s Ruleset - Domestic Domains', rulesetDescription, diff --git a/Build/build-internal-cdn-rules.js b/Build/build-internal-cdn-rules.js index eb5daf96..17d13560 100644 --- a/Build/build-internal-cdn-rules.js +++ b/Build/build-internal-cdn-rules.js @@ -4,9 +4,11 @@ const path = require('path'); const tldts = require('tldts'); const { processLine } = require('./lib/process-line'); const { readFileByLine } = require('./lib/fetch-remote-text-by-line'); -const domainSorter = require('./lib/stable-sort-domain'); +const { createDomainSorter } = require('./lib/stable-sort-domain'); const { task } = require('./lib/trace-runner'); const { compareAndWriteFile } = require('./lib/create-file'); +const { getGorhillPublicSuffixPromise } = require('./lib/get-gorhill-publicsuffix'); +const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse'); /** * @param {string} string @@ -19,11 +21,15 @@ const buildInternalCDNDomains = task(__filename, async () => { const set = new Set(); const keywords = new Set(); + const gorhill = await getGorhillPublicSuffixPromise(); + const getDomain = createCachedGorhillGetDomain(gorhill); + const domainSorter = createDomainSorter(gorhill); + /** * @param {string} input */ const addApexDomain = (input) => { - const d = tldts.getDomain(input, { allowPrivateDomains: true }); + const d = getDomain(input); if (d) { set.add(d); } @@ -35,7 +41,8 @@ const buildInternalCDNDomains = task(__filename, async () => { const processLocalDomainSet = async (domainSetPath) => { for await (const line of readFileByLine(domainSetPath)) { const parsed = tldts.parse(line, { allowPrivateDomains: true }); - if (!parsed.isIp && (parsed.isIcann || parsed.isPrivate)) { + if (parsed.isIp) continue; + if (parsed.isIcann || parsed.isPrivate) { if (parsed.domain) { set.add(parsed.domain); } @@ -80,7 +87,7 @@ const buildInternalCDNDomains = task(__filename, async () => { fse.ensureDir(path.resolve(__dirname, '../List/internal')) ]); - await compareAndWriteFile( + return compareAndWriteFile( [ ...Array.from(set).sort(domainSorter).map(i => `SUFFIX,${i}`), ...Array.from(keywords).sort().map(i => `REGEX,${i}`) diff --git a/Build/build-internal-chn-domains.js b/Build/build-internal-chn-domains.js index cc3a9ec4..30ea0ef1 100644 --- a/Build/build-internal-chn-domains.js +++ b/Build/build-internal-chn-domains.js @@ -11,7 +11,7 @@ const buildInternalChnDomains = task(__filename, async () => { fse.ensureDir(path.resolve(__dirname, '../List/internal')) ]); - await compareAndWriteFile( + return compareAndWriteFile( result.map(line => `SUFFIX,${line}`), path.resolve(__dirname, '../List/internal/accelerated-china-domains.txt') ); diff --git a/Build/build-internal-reverse-chn-cidr.js b/Build/build-internal-reverse-chn-cidr.js index 427dd02d..949354d1 100644 --- a/Build/build-internal-reverse-chn-cidr.js +++ b/Build/build-internal-reverse-chn-cidr.js @@ -1,6 +1,6 @@ // @ts-check const { fetchRemoteTextAndCreateReadlineInterface } = require('./lib/fetch-remote-text-by-line'); -const { processLine } = require('./lib/process-line'); +const { processLineFromReadline } = require('./lib/process-line'); const path = require('path'); const fse = require('fs-extra'); const fs = require('fs'); @@ -25,16 +25,11 @@ const RESERVED_IPV4_CIDR = [ ]; const buildInternalReverseChnCIDR = task(__filename, async () => { - const { exclude } = await import('cidr-tools-wasm'); - - /** @type {string[]} */ - const cidr = []; - for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')) { - const l = processLine(line); - if (l) { - cidr.push(l); - } - } + const [{ exclude }, cidr] = await Promise.all([ + import('cidr-tools-wasm'), + processLineFromReadline(await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')), + fse.ensureDir(path.resolve(__dirname, '../List/internal')) + ]); const reversedCidr = exclude( ['0.0.0.0/0'], @@ -42,8 +37,7 @@ const buildInternalReverseChnCIDR = task(__filename, async () => { true ); - await fse.ensureDir(path.resolve(__dirname, '../List/internal')); - await fs.promises.writeFile( + return fs.promises.writeFile( path.resolve(__dirname, '../List/internal/reversed-chn-cidr.txt'), `${reversedCidr.join('\n')}\n` ); diff --git a/Build/build-phishing-domainset.js b/Build/build-phishing-domainset.js index 2acfc004..26686445 100644 --- a/Build/build-phishing-domainset.js +++ b/Build/build-phishing-domainset.js @@ -1,10 +1,14 @@ -const tldts = require('tldts'); +// @ts-check const { processFilterRules } = require('./lib/parse-filter.js'); const path = require('path'); const { createRuleset } = require('./lib/create-file'); const { processLine } = require('./lib/process-line.js'); -const domainSorter = require('./lib/stable-sort-domain'); +const { createDomainSorter } = require('./lib/stable-sort-domain'); const { traceSync, task } = require('./lib/trace-runner.js'); +const Trie = require('./lib/trie.js'); +const { getGorhillPublicSuffixPromise } = require('./lib/get-gorhill-publicsuffix.js'); +const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse.js'); +const tldts = require('tldts'); const WHITELIST_DOMAIN = new Set([ 'w3s.link', @@ -61,77 +65,94 @@ const BLACK_TLD = new Set([ ]); const buildPhishingDomainSet = task(__filename, async () => { - const domainSet = Array.from((await processFilterRules( - 'https://phishing-filter.pages.dev/phishing-filter-agh.txt' - // [ - // 'https://malware-filter.gitlab.io/phishing-filter/phishing-filter-agh.txt', - // 'https://malware-filter.pages.dev/phishing-filter-agh.txt', - // 'https://phishing-filter.pages.dev/phishing-filter-agh.txt' - // ] - )).black); + const [{ black: domainSet }, gorhill] = await Promise.all([ + processFilterRules( + 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt', + [ + 'https://malware-filter.gitlab.io/phishing-filter/phishing-filter-agh.txt', + 'https://malware-filter.pages.dev/phishing-filter-agh.txt', + 'https://phishing-filter.pages.dev/phishing-filter-agh.txt' + ] + ), + getGorhillPublicSuffixPromise() + ]); + + traceSync('* whitelist', () => { + const trieForRemovingWhiteListed = Trie.from(domainSet); + WHITELIST_DOMAIN.forEach(white => { + trieForRemovingWhiteListed.find(`.${white}`, false).forEach(f => domainSet.delete(f)); + if (trieForRemovingWhiteListed.has(white)) { + domainSet.delete(white); + } + }); + }); + const domainCountMap = {}; + const getDomain = createCachedGorhillGetDomain(gorhill); traceSync('* process domain set', () => { - for (let i = 0, len = domainSet.length; i < len; i++) { - const line = processLine(domainSet[i]); + const domainArr = Array.from(domainSet); + + for (let i = 0, len = domainArr.length; i < len; i++) { + const line = processLine(domainArr[i]); if (!line) continue; - const parsed = tldts.parse(line, { allowPrivateDomains: true }); - const apexDomain = parsed.domain; + const apexDomain = getDomain(line); + if (!apexDomain) continue; - if (apexDomain) { - if (WHITELIST_DOMAIN.has(apexDomain)) { - continue; + domainCountMap[apexDomain] ||= 0; + + const isPhishingDomainMockingCoJp = line.includes('-co-jp'); + if (isPhishingDomainMockingCoJp) { + domainCountMap[apexDomain] += 0.5; + } + + if (line.startsWith('.amaz')) { + domainCountMap[apexDomain] += 0.5; + + if (line.startsWith('.amazon-')) { + domainCountMap[apexDomain] += 4.5; } + if (isPhishingDomainMockingCoJp) { + domainCountMap[apexDomain] += 4; + } + } else if (line.startsWith('.customer')) { + domainCountMap[apexDomain] += 0.25; + } - domainCountMap[apexDomain] ||= 0; + const tld = gorhill.getPublicSuffix(line[0] === '.' ? line.slice(1) : line); + if (!tld || !BLACK_TLD.has(tld)) continue; - let isPhishingDomainMockingAmazon = false; - if (line.startsWith('.amaz')) { - domainCountMap[apexDomain] += 0.5; + domainCountMap[apexDomain] += 1; - isPhishingDomainMockingAmazon = true; + const lineLen = line.length; - if (line.startsWith('.amazon-')) { - domainCountMap[apexDomain] += 4.5; - } - } else if (line.startsWith('.customer')) { + if (lineLen > 19) { + // Add more weight if the domain is long enough + if (lineLen > 44) { + domainCountMap[apexDomain] += 3.5; + } else if (lineLen > 34) { + domainCountMap[apexDomain] += 2.5; + } else if (lineLen > 29) { + domainCountMap[apexDomain] += 1.5; + } else if (lineLen > 24) { + domainCountMap[apexDomain] += 0.75; + } else { domainCountMap[apexDomain] += 0.25; } - if (line.includes('-co-jp')) { - domainCountMap[apexDomain] += (isPhishingDomainMockingAmazon ? 4.5 : 0.5); - } - const tld = parsed.publicSuffix; - if (!tld || !BLACK_TLD.has(tld)) continue; - - domainCountMap[apexDomain] += 1; - - if (line.length > 19) { - // Add more weight if the domain is long enough - if (line.length > 44) { - domainCountMap[apexDomain] += 3.5; - } else if (line.length > 34) { - domainCountMap[apexDomain] += 2.5; - } else if (line.length > 29) { + if (domainCountMap[apexDomain] < 5) { + const subdomain = tldts.getSubdomain(line); + if (subdomain?.includes('.')) { domainCountMap[apexDomain] += 1.5; - } else if (line.length > 24) { - domainCountMap[apexDomain] += 0.75; - } else if (line.length > 19) { - domainCountMap[apexDomain] += 0.25; - } - - if (domainCountMap[apexDomain] < 5) { - const subdomain = parsed.subdomain; - if (subdomain?.includes('.')) { - domainCountMap[apexDomain] += 1.5; - } } } } } }); + const domainSorter = createDomainSorter(gorhill); + const results = traceSync('* get final results', () => Object.entries(domainCountMap) .reduce((acc, [apexDomain, count]) => { if (count >= 5) { @@ -151,7 +172,7 @@ const buildPhishingDomainSet = task(__filename, async () => { ' - https://gitlab.com/malware-filter/phishing-filter' ]; - await Promise.all(createRuleset( + return Promise.all(createRuleset( 'Sukka\'s Ruleset - Reject Phishing', description, new Date(), diff --git a/Build/build-public.js b/Build/build-public.js index 569bebd3..ecd5899f 100644 --- a/Build/build-public.js +++ b/Build/build-public.js @@ -28,7 +28,7 @@ const buildPublicHtml = task(__filename, async () => { const html = template(list); - await fs.promises.writeFile(path.join(publicPath, 'index.html'), html, 'utf-8'); + return fs.promises.writeFile(path.join(publicPath, 'index.html'), html, 'utf-8'); }); module.exports.buildPublicHtml = buildPublicHtml; diff --git a/Build/build-reject-domainset.js b/Build/build-reject-domainset.js index bd1ecd6b..329ba693 100644 --- a/Build/build-reject-domainset.js +++ b/Build/build-reject-domainset.js @@ -208,7 +208,7 @@ const buildRejectDomainSet = task(__filename, async () => { ...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`) ]; - await Promise.all([ + return Promise.all([ ...createRuleset( 'Sukka\'s Ruleset - Reject Base', description, diff --git a/Build/build-speedtest-domainset.js b/Build/build-speedtest-domainset.js index 9559309a..0881a26c 100644 --- a/Build/build-speedtest-domainset.js +++ b/Build/build-speedtest-domainset.js @@ -65,7 +65,13 @@ const buildSpeedtestDomainSet = task(__filename, async () => { '.speedtest.idv.tw', '.speedtest.frontier.com', '.speedtest.orange.fr', - '.speedtest.centurylink.net' + '.speedtest.centurylink.net', + '.srvr.bell.ca', + '.speedtest.contabo.net', + 'speedtest.hk.chinamobile.com', + 'speedtestbb.hk.chinamobile.com', + '.hizinitestet.com', + '.linknetspeedtest.net.br' ]); const hostnameGroups = await Promise.all([ @@ -114,7 +120,7 @@ const buildSpeedtestDomainSet = task(__filename, async () => { 'GitHub: https://github.com/SukkaW/Surge' ]; - await Promise.all(createRuleset( + return Promise.all(createRuleset( 'Sukka\'s Ruleset - Speedtest Domains', description, new Date(), diff --git a/Build/build-telegram-cidr.js b/Build/build-telegram-cidr.js index f5a0f12a..a7688a0c 100644 --- a/Build/build-telegram-cidr.js +++ b/Build/build-telegram-cidr.js @@ -17,14 +17,14 @@ const buildTelegramCIDR = task(__filename, async () => { for await (const line of createReadlineInterfaceFromResponse(resp)) { const cidr = processLine(line); - if (cidr) { - const [subnet] = cidr.split('/'); - if (isIPv4(subnet)) { - results.push(`IP-CIDR,${cidr},no-resolve`); - } - if (isIPv6(subnet)) { - results.push(`IP-CIDR6,${cidr},no-resolve`); - } + if (!cidr) continue; + + const [subnet] = cidr.split('/'); + if (isIPv4(subnet)) { + results.push(`IP-CIDR,${cidr},no-resolve`); + } + if (isIPv6(subnet)) { + results.push(`IP-CIDR6,${cidr},no-resolve`); } } @@ -40,7 +40,7 @@ const buildTelegramCIDR = task(__filename, async () => { ' - https://core.telegram.org/resources/cidr.txt' ]; - await Promise.all(createRuleset( + return Promise.all(createRuleset( 'Sukka\'s Ruleset - Telegram IP CIDR', description, date, diff --git a/Build/download-previous-build.js b/Build/download-previous-build.js index e0d70f30..543c69d8 100644 --- a/Build/download-previous-build.js +++ b/Build/download-previous-build.js @@ -33,7 +33,6 @@ const downloadPreviousBuild = task(__filename, async () => { if (!isCI) { allFileExists = fs.existsSync(join(__dirname, '..', line)); if (!allFileExists) { - console.log(`File not exists: ${line}`); break; } } @@ -73,33 +72,27 @@ const downloadPreviousBuild = task(__filename, async () => { await Promise.all(filesList.map(async p => { const src = join(extractedPath, 'Surge-gh-pages', p); if (await fileExists(src)) { - const dst = join(__dirname, '..', p); - console.log('Copy', { src, dst }); return fse.copy( src, join(__dirname, '..', p), { overwrite: true } ); } - - console.log('File not exists:', src); })); - await fs.promises.unlink(extractedPath).catch(() => { }); + return fs.promises.unlink(extractedPath).catch(() => { }); }); const downloadPublicSuffixList = task(__filename, async () => { const publicSuffixDir = resolve(__dirname, '../node_modules/.cache'); const publicSuffixPath = join(publicSuffixDir, 'public_suffix_list_dat.txt'); - console.log('Download public suffix list.'); - const [resp] = await Promise.all([ fetch('https://publicsuffix.org/list/public_suffix_list.dat'), fse.ensureDir(publicSuffixDir) ]); - await pipeline( + return pipeline( Readable.fromWeb(resp.body), fs.createWriteStream(publicSuffixPath) ); diff --git a/Build/index.js b/Build/index.js index cb1f6153..ae7c21e3 100644 --- a/Build/index.js +++ b/Build/index.js @@ -1,3 +1,5 @@ +// @ts-check + const { downloadPreviousBuild, downloadPublicSuffixList } = require('./download-previous-build'); const { buildCommon } = require('./build-common'); const { buildAntiBogusDomain } = require('./build-anti-bogus-domain'); @@ -47,7 +49,7 @@ const requireWorker = (path) => { * @param {WithWorker} worker */ const endWorker = async (worker) => { - const { forceExited } = worker.end(); + const { forceExited } = await worker.end(); if (forceExited && worker.__sukka_worker_name) { console.log(worker.__sukka_worker_name, 'forceExited'); } @@ -72,7 +74,10 @@ const endWorker = async (worker) => { downloadPublicSuffixListPromise ]).then(() => buildCdnConf()); // build:phishing-domainset - const buildPhilishingDomainsetPromise = downloadPreviousBuildPromise.then(() => buildPhishingDomainSet()); + const buildPhilishingDomainsetPromise = Promise.all([ + downloadPreviousBuildPromise, + downloadPublicSuffixListPromise + ]).then(() => buildPhishingDomainSet()); // build:reject-domainset const buildRejectDomainSetPromise = Promise.all([ downloadPreviousBuildPromise, @@ -87,6 +92,7 @@ const endWorker = async (worker) => { const buildSpeedtestDomainSetPromise = downloadPreviousBuildPromise.then(() => buildSpeedtestDomainSet()); // build:internal-cdn-rules const buildInternalCDNDomainsPromise = Promise.all([ + downloadPublicSuffixListPromise, buildCommonPromise, buildCdnConfPromise ]).then(() => buildInternalCDNDomains()); @@ -97,7 +103,7 @@ const endWorker = async (worker) => { // build:domestic-ruleset const buildDomesticRulesetPromise = downloadPreviousBuildPromise.then(() => buildDomesticRuleset()); - await Promise.all([ + const stats = await Promise.all([ downloadPreviousBuildPromise, downloadPublicSuffixListPromise, buildCommonPromise, @@ -120,4 +126,30 @@ const endWorker = async (worker) => { validate(), endWorker(buildInternalReverseChnCIDRWorker) ]); + + printStats(stats); })(); + +/** + * @param {Array<{ start: number, end: number, taskName: string }>} stats + */ +function printStats(stats) { + // sort stats by start time + stats.sort((a, b) => a.start - b.start); + + const longestTaskName = Math.max(...stats.map(i => i.taskName.length)); + const realStart = Math.min(...stats.map(i => i.start)); + const realEnd = Math.max(...stats.map(i => i.end)); + + const totalMs = realEnd - realStart; + + const statsStep = (totalMs / 160) | 0; + + stats.forEach(stat => { + console.log( + `[${stat.taskName}]${' '.repeat(longestTaskName - stat.taskName.length)}`, + ' '.repeat(((stat.start - realStart) / statsStep) | 0), + '='.repeat(Math.max(((stat.end - stat.start) / statsStep) | 0, 1)) + ); + }); +} diff --git a/Build/lib/cached-tld-parse.js b/Build/lib/cached-tld-parse.js index fbe7fe50..9630bab4 100644 --- a/Build/lib/cached-tld-parse.js +++ b/Build/lib/cached-tld-parse.js @@ -9,9 +9,7 @@ const sharedConfig = { allowPrivateDomains: true }; * @param {string} domain * @returns {ReturnType} */ -module.exports.parse = (domain) => { - return cache.sync(domain, () => tldts.parse(domain, sharedConfig)); -}; +module.exports.parse = (domain) => cache.sync(domain, () => tldts.parse(domain, sharedConfig)); let gothillGetDomainCache = null; /** @@ -22,5 +20,5 @@ module.exports.createCachedGorhillGetDomain = (gorhill) => { /** * @param {string} domain */ - return (domain) => gothillGetDomainCache.sync(domain, () => gorhill.getDomain(domain[0] === '.' ? domain.slice(1) : domain)); + return (domain) => (/** @type {ReturnType} */ (gothillGetDomainCache)).sync(domain, () => gorhill.getDomain(domain[0] === '.' ? domain.slice(1) : domain)); }; diff --git a/Build/lib/get-gorhill-publicsuffix.js b/Build/lib/get-gorhill-publicsuffix.js index 6d911afa..f2a9ad69 100644 --- a/Build/lib/get-gorhill-publicsuffix.js +++ b/Build/lib/get-gorhill-publicsuffix.js @@ -3,13 +3,6 @@ const fs = require('fs'); const path = require('path'); const publicSuffixPath = path.resolve(__dirname, '../../node_modules/.cache/public_suffix_list_dat.txt'); -const getPublicSuffixListDat = () => { - if (fs.existsSync(publicSuffixPath)) { - return fs.promises.readFile(publicSuffixPath, 'utf-8'); - } - console.log('public_suffix_list.dat not found, fetch directly from remote.'); - return fetch('https://publicsuffix.org/list/public_suffix_list.dat').then(r => r.text()); -}; const getGorhillPublicSuffix = async () => { const customFetch = async (url) => { @@ -20,7 +13,12 @@ const getGorhillPublicSuffix = async () => { }; const [publicSuffixListDat, { default: gorhill }] = await Promise.all([ - getPublicSuffixListDat(), + fs.existsSync(publicSuffixPath) + ? fs.promises.readFile(publicSuffixPath, 'utf-8') + : fetch('https://publicsuffix.org/list/public_suffix_list.dat').then(r => { + console.log('public_suffix_list.dat not found, fetch directly from remote.'); + return r.text(); + }), import('gorhill-publicsuffixlist') ]); @@ -30,7 +28,7 @@ const getGorhillPublicSuffix = async () => { return gorhill; }; -/** @type {Promise} */ +/** @type {Promise | null} */ let gorhillPublicSuffixPromise = null; module.exports.getGorhillPublicSuffixPromise = () => { gorhillPublicSuffixPromise ||= getGorhillPublicSuffix(); diff --git a/Build/lib/is-domain-loose.js b/Build/lib/is-domain-loose.js deleted file mode 100644 index 4197c564..00000000 --- a/Build/lib/is-domain-loose.js +++ /dev/null @@ -1,24 +0,0 @@ -// @ts-check -const tldts = require('./cached-tld-parse'); -/** - * @param {string | null | undefined} domain - */ -module.exports.normalizeDomain = (domain) => { - if (!domain) { - return null; - } - - const { isIcann, isPrivate, hostname, isIp } = tldts.parse(domain); - if (isIp) { - return null; - } - - if (isIcann || isPrivate) { - if (hostname?.[0] === '.') { - return hostname.slice(1); - } - return hostname; - } - - return null; -}; diff --git a/Build/lib/parse-filter.js b/Build/lib/parse-filter.js index 7489c968..39f927f9 100644 --- a/Build/lib/parse-filter.js +++ b/Build/lib/parse-filter.js @@ -1,8 +1,8 @@ // @ts-check const { fetchWithRetry } = require('./fetch-retry'); +const tldts = require('tldts'); const { fetchRemoteTextAndCreateReadlineInterface } = require('./fetch-remote-text-by-line'); const { NetworkFilter } = require('@cliqz/adblocker'); -const { normalizeDomain } = require('./is-domain-loose'); const { processLine } = require('./process-line'); const { performance } = require('perf_hooks'); @@ -19,6 +19,22 @@ const warnOnce = (url, isWhite, ...message) => { console.warn(url, isWhite ? '(white)' : '(black)', ...message); }; +const normalizeDomain = (domain) => { + if (!domain) return null; + + const { isIcann, isPrivate, hostname, isIp } = tldts.parse(domain); + if (isIp) return null; + + if (isIcann || isPrivate) { + if (hostname?.[0] === '.') { + return hostname.slice(1); + } + return hostname; + } + + return null; +}; + /** * @param {string | URL} domainListsUrl */ diff --git a/Build/lib/process-line.js b/Build/lib/process-line.js index 39012be5..8578343f 100644 --- a/Build/lib/process-line.js +++ b/Build/lib/process-line.js @@ -6,7 +6,7 @@ * * @param {string} line */ -module.exports.processLine = (line) => { +const processLine = (line) => { if (!line) { return null; } @@ -30,3 +30,19 @@ module.exports.processLine = (line) => { return trimmed; }; +module.exports.processLine = processLine; + +/** + * @param {import('readline').ReadLine} rl + */ +module.exports.processLineFromReadline = async (rl) => { + /** @type {string[]} */ + const res = []; + for await (const line of rl) { + const l = processLine(line); + if (l) { + res.push(l); + } + } + return res; +}; diff --git a/Build/lib/trace-runner.js b/Build/lib/trace-runner.js index b6a80d17..fd7ce413 100644 --- a/Build/lib/trace-runner.js +++ b/Build/lib/trace-runner.js @@ -40,8 +40,13 @@ module.exports.traceAsync = traceAsync; */ module.exports.task = (__filename, fn, customname = null) => { const taskName = customname ?? path.basename(__filename, path.extname(__filename)); - return () => { + return async () => { console.log(`🏃 [${taskName}] Start executing`); - return traceAsync(`✅ [${taskName}] Executed successfully`, fn); + const start = performance.now(); + await fn(); + const end = performance.now(); + console.log(`✅ [${taskName}] Executed successfully: ${(end - start).toFixed(3)}ms`); + + return { start, end, taskName }; }; }; diff --git a/Build/validate-domainset.js b/Build/validate-domainset.js index 011f65ea..dd2dc3cf 100644 --- a/Build/validate-domainset.js +++ b/Build/validate-domainset.js @@ -59,14 +59,15 @@ const _validateRuleset = async (filePath) => { }; const validate = task(__filename, async () => { - const [domainsetFiles, _rulesetFiles] = await Promise.all([ - listDir(path.resolve(__dirname, '../List/domainset')), - listDir(path.resolve(__dirname, '../List/non_ip')) - ]); - await Promise.all( - domainsetFiles.map(file => validateDomainSet(file)) + // const [domainsetFiles, _rulesetFiles] = await Promise.all([ + // listDir(path.resolve(__dirname, '../List/domainset')), + // listDir(path.resolve(__dirname, '../List/non_ip')) + // ]); + return Promise.all([ + listDir(path.resolve(__dirname, '../List/domainset')) + .then(domainsetFiles => Promise.all(domainsetFiles.map(file => validateDomainSet(file)))) // rulesetFiles.map(file => validateRuleset(file)) - ); + ]); }); module.exports.validate = validate;