Perf: speed up build

This commit is contained in:
SukkaW 2023-09-15 22:35:46 +08:00
parent 30cab8fc22
commit d5850aa84b
23 changed files with 241 additions and 184 deletions

View File

@ -6,7 +6,7 @@ const { fetchRemoteTextAndCreateReadlineInterface, readFileByLine } = require('.
const { processLine } = require('./lib/process-line'); const { processLine } = require('./lib/process-line');
const { task } = require('./lib/trace-runner'); const { task } = require('./lib/trace-runner');
const buildAntiBogusDomain = task(__filename, async () => { const getBogusNxDomainIPs = async () => {
/** @type {string[]} */ /** @type {string[]} */
const res = []; const res = [];
for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf')) { for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf')) {
@ -14,20 +14,27 @@ const buildAntiBogusDomain = task(__filename, async () => {
res.push(line.replace('bogus-nxdomain=', '')); res.push(line.replace('bogus-nxdomain=', ''));
} }
} }
return res;
};
const buildAntiBogusDomain = task(__filename, async () => {
const filePath = path.resolve(__dirname, '../Source/ip/reject.conf'); const filePath = path.resolve(__dirname, '../Source/ip/reject.conf');
const bogusIpPromise = getBogusNxDomainIPs();
/** @type {string[]} */ /** @type {string[]} */
const result = []; const result = [];
for await (const line of readFileByLine(filePath)) { for await (const line of readFileByLine(filePath)) {
if (line === '# --- [Anti Bogus Domain Replace Me] ---') { if (line === '# --- [Anti Bogus Domain Replace Me] ---') {
res.forEach(ip => { (await bogusIpPromise).forEach(ip => {
if (isIPv4(ip)) { if (isIPv4(ip)) {
result.push(`IP-CIDR,${ip}/32,no-resolve`); result.push(`IP-CIDR,${ip}/32,no-resolve`);
} else if (isIPv6(ip)) { } else if (isIPv6(ip)) {
result.push(`IP-CIDR6,${ip}/128,no-resolve`); result.push(`IP-CIDR6,${ip}/128,no-resolve`);
} }
}); });
continue;
} else { } else {
const l = processLine(line); const l = processLine(line);
if (l) { if (l) {
@ -47,7 +54,7 @@ const buildAntiBogusDomain = task(__filename, async () => {
' - https://github.com/felixonmars/dnsmasq-china-list' ' - https://github.com/felixonmars/dnsmasq-china-list'
]; ];
await Promise.all(createRuleset( return Promise.all(createRuleset(
'Sukka\'s Ruleset - Anti Bogus Domain', 'Sukka\'s Ruleset - Anti Bogus Domain',
description, description,
new Date(), new Date(),

View File

@ -20,7 +20,7 @@ const buildAppleCdn = task(__filename, async () => {
const ruleset = res.map(domain => `DOMAIN-SUFFIX,${domain}`); const ruleset = res.map(domain => `DOMAIN-SUFFIX,${domain}`);
const domainset = res.map(i => `.${i}`); const domainset = res.map(i => `.${i}`);
await Promise.all([ return Promise.all([
...createRuleset( ...createRuleset(
'Sukka\'s Ruleset - Apple CDN', 'Sukka\'s Ruleset - Apple CDN',
description, description,

View File

@ -9,7 +9,7 @@ const { processLine } = require('./lib/process-line');
const publicSuffixPath = path.resolve(__dirname, '../node_modules/.cache/public_suffix_list_dat.txt'); const publicSuffixPath = path.resolve(__dirname, '../node_modules/.cache/public_suffix_list_dat.txt');
const buildCdnConf = task(__filename, async () => { const getS3OSSDomains = async () => {
const trie = new Trie(); const trie = new Trie();
if (fs.existsSync(publicSuffixPath)) { if (fs.existsSync(publicSuffixPath)) {
@ -46,13 +46,19 @@ const buildCdnConf = task(__filename, async () => {
} }
}); });
return S3OSSDomains;
};
const buildCdnConf = task(__filename, async () => {
/** @type {string[]} */ /** @type {string[]} */
const cdnDomainsList = []; const cdnDomainsList = [];
const getS3OSSDomainsPromise = getS3OSSDomains();
for await (const l of readFileByLine(path.resolve(__dirname, '../Source/non_ip/cdn.conf'))) { for await (const l of readFileByLine(path.resolve(__dirname, '../Source/non_ip/cdn.conf'))) {
if (l === '# --- [AWS S3 Replace Me] ---') { if (l === '# --- [AWS S3 Replace Me] ---') {
S3OSSDomains.forEach(domain => { (await getS3OSSDomainsPromise).forEach(domain => { cdnDomainsList.push(`DOMAIN-SUFFIX,${domain}`); });
cdnDomainsList.push(`DOMAIN-SUFFIX,${domain}`); continue;
});
} }
const line = processLine(l); const line = processLine(l);
if (line) { if (line) {

View File

@ -3,7 +3,7 @@ const { fetchRemoteTextAndCreateReadlineInterface } = require('./lib/fetch-remot
const { resolve: pathResolve } = require('path'); const { resolve: pathResolve } = require('path');
// This should not use `createRuleset` API since we are going to generate ipcidr for Clash // This should not use `createRuleset` API since we are going to generate ipcidr for Clash
const { compareAndWriteFile, withBannerArray } = require('./lib/create-file'); const { compareAndWriteFile, withBannerArray } = require('./lib/create-file');
const { processLine } = require('./lib/process-line'); const { processLineFromReadline } = require('./lib/process-line');
const { task } = require('./lib/trace-runner'); const { task } = require('./lib/trace-runner');
// https://github.com/misakaio/chnroutes2/issues/25 // https://github.com/misakaio/chnroutes2/issues/25
@ -13,20 +13,12 @@ const EXCLUDE_CIDRS = [
]; ];
const buildChnCidr = task(__filename, async () => { const buildChnCidr = task(__filename, async () => {
const { exclude: excludeCidrs } = await import('cidr-tools-wasm'); const [{ exclude: excludeCidrs }, cidr] = await Promise.all([
import('cidr-tools-wasm'),
processLineFromReadline(await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt'))
]);
/** @type {string[]} */
const cidr = [];
for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')) {
const l = processLine(line);
if (l) {
cidr.push(l);
}
}
console.log('Before Merge:', cidr.length);
const filteredCidr = excludeCidrs(cidr, EXCLUDE_CIDRS, true); const filteredCidr = excludeCidrs(cidr, EXCLUDE_CIDRS, true);
console.log('After Merge:', filteredCidr.length);
const description = [ const description = [
'License: CC BY-SA 2.0', 'License: CC BY-SA 2.0',
@ -36,7 +28,7 @@ const buildChnCidr = task(__filename, async () => {
'Data from https://misaka.io (misakaio @ GitHub)' 'Data from https://misaka.io (misakaio @ GitHub)'
]; ];
await Promise.all([ return Promise.all([
compareAndWriteFile( compareAndWriteFile(
withBannerArray( withBannerArray(
'Sukka\'s Ruleset - Mainland China IPv4 CIDR', 'Sukka\'s Ruleset - Mainland China IPv4 CIDR',

View File

@ -17,7 +17,7 @@ const outputSurgeDir = path.resolve(__dirname, '../List');
const outputClashDir = path.resolve(__dirname, '../Clash'); const outputClashDir = path.resolve(__dirname, '../Clash');
const buildCommon = task(__filename, async () => { const buildCommon = task(__filename, async () => {
/** @type {Promise<void>[]} */ /** @type {Promise<unknown>[]} */
const promises = []; const promises = [];
const pw = new PathScurry(sourceDir); const pw = new PathScurry(sourceDir);
@ -107,7 +107,7 @@ async function transformDomainset(sourcePath, relativePath) {
) )
]; ];
await Promise.all(createRuleset( return Promise.all(createRuleset(
title, title,
description, description,
new Date(), new Date(),
@ -140,7 +140,7 @@ async function transformRuleset(sourcePath, relativePath) {
) )
]; ];
await Promise.all(createRuleset( return Promise.all(createRuleset(
title, title,
description, description,
new Date(), new Date(),

View File

@ -2,33 +2,22 @@
const path = require('path'); const path = require('path');
const { DOMESTICS } = require('../Source/non_ip/domestic'); const { DOMESTICS } = require('../Source/non_ip/domestic');
const { readFileByLine } = require('./lib/fetch-remote-text-by-line'); const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
const { processLine } = require('./lib/process-line'); const { processLineFromReadline } = require('./lib/process-line');
const { compareAndWriteFile, createRuleset } = require('./lib/create-file'); const { compareAndWriteFile, createRuleset } = require('./lib/create-file');
const domainSorter = require('./lib/stable-sort-domain');
const { task } = require('./lib/trace-runner'); const { task } = require('./lib/trace-runner');
const buildDomesticRuleset = task(__filename, async () => { const buildDomesticRuleset = task(__filename, async () => {
const rl = readFileByLine(path.resolve(__dirname, '../Source/non_ip/domestic.conf')); const results = await processLineFromReadline(readFileByLine(path.resolve(__dirname, '../Source/non_ip/domestic.conf')));
const results = [];
for await (const l of rl) {
const line = processLine(l);
if (line) {
results.push(line);
}
}
results.push( results.push(
...Object.entries(DOMESTICS) ...Object.entries(DOMESTICS)
.reduce( .reduce(
(acc, [key, { domains }]) => { (acc, [key, { domains }]) => {
if (key === 'SYSTEM') { if (key === 'SYSTEM') return acc;
return acc;
}
return [...acc, ...domains]; return [...acc, ...domains];
}, },
/** @type {string[]} */([]) /** @type {string[]} */([])
) )
.sort(domainSorter)
.map((domain) => `DOMAIN-SUFFIX,${domain}`) .map((domain) => `DOMAIN-SUFFIX,${domain}`)
); );
@ -40,7 +29,7 @@ const buildDomesticRuleset = task(__filename, async () => {
'This file contains known addresses that are avaliable in the Mainland China.' 'This file contains known addresses that are avaliable in the Mainland China.'
]; ];
await Promise.all([ return Promise.all([
...createRuleset( ...createRuleset(
'Sukka\'s Ruleset - Domestic Domains', 'Sukka\'s Ruleset - Domestic Domains',
rulesetDescription, rulesetDescription,

View File

@ -4,9 +4,11 @@ const path = require('path');
const tldts = require('tldts'); const tldts = require('tldts');
const { processLine } = require('./lib/process-line'); const { processLine } = require('./lib/process-line');
const { readFileByLine } = require('./lib/fetch-remote-text-by-line'); const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
const domainSorter = require('./lib/stable-sort-domain'); const { createDomainSorter } = require('./lib/stable-sort-domain');
const { task } = require('./lib/trace-runner'); const { task } = require('./lib/trace-runner');
const { compareAndWriteFile } = require('./lib/create-file'); const { compareAndWriteFile } = require('./lib/create-file');
const { getGorhillPublicSuffixPromise } = require('./lib/get-gorhill-publicsuffix');
const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse');
/** /**
* @param {string} string * @param {string} string
@ -19,11 +21,15 @@ const buildInternalCDNDomains = task(__filename, async () => {
const set = new Set(); const set = new Set();
const keywords = new Set(); const keywords = new Set();
const gorhill = await getGorhillPublicSuffixPromise();
const getDomain = createCachedGorhillGetDomain(gorhill);
const domainSorter = createDomainSorter(gorhill);
/** /**
* @param {string} input * @param {string} input
*/ */
const addApexDomain = (input) => { const addApexDomain = (input) => {
const d = tldts.getDomain(input, { allowPrivateDomains: true }); const d = getDomain(input);
if (d) { if (d) {
set.add(d); set.add(d);
} }
@ -35,7 +41,8 @@ const buildInternalCDNDomains = task(__filename, async () => {
const processLocalDomainSet = async (domainSetPath) => { const processLocalDomainSet = async (domainSetPath) => {
for await (const line of readFileByLine(domainSetPath)) { for await (const line of readFileByLine(domainSetPath)) {
const parsed = tldts.parse(line, { allowPrivateDomains: true }); const parsed = tldts.parse(line, { allowPrivateDomains: true });
if (!parsed.isIp && (parsed.isIcann || parsed.isPrivate)) { if (parsed.isIp) continue;
if (parsed.isIcann || parsed.isPrivate) {
if (parsed.domain) { if (parsed.domain) {
set.add(parsed.domain); set.add(parsed.domain);
} }
@ -80,7 +87,7 @@ const buildInternalCDNDomains = task(__filename, async () => {
fse.ensureDir(path.resolve(__dirname, '../List/internal')) fse.ensureDir(path.resolve(__dirname, '../List/internal'))
]); ]);
await compareAndWriteFile( return compareAndWriteFile(
[ [
...Array.from(set).sort(domainSorter).map(i => `SUFFIX,${i}`), ...Array.from(set).sort(domainSorter).map(i => `SUFFIX,${i}`),
...Array.from(keywords).sort().map(i => `REGEX,${i}`) ...Array.from(keywords).sort().map(i => `REGEX,${i}`)

View File

@ -11,7 +11,7 @@ const buildInternalChnDomains = task(__filename, async () => {
fse.ensureDir(path.resolve(__dirname, '../List/internal')) fse.ensureDir(path.resolve(__dirname, '../List/internal'))
]); ]);
await compareAndWriteFile( return compareAndWriteFile(
result.map(line => `SUFFIX,${line}`), result.map(line => `SUFFIX,${line}`),
path.resolve(__dirname, '../List/internal/accelerated-china-domains.txt') path.resolve(__dirname, '../List/internal/accelerated-china-domains.txt')
); );

View File

@ -1,6 +1,6 @@
// @ts-check // @ts-check
const { fetchRemoteTextAndCreateReadlineInterface } = require('./lib/fetch-remote-text-by-line'); const { fetchRemoteTextAndCreateReadlineInterface } = require('./lib/fetch-remote-text-by-line');
const { processLine } = require('./lib/process-line'); const { processLineFromReadline } = require('./lib/process-line');
const path = require('path'); const path = require('path');
const fse = require('fs-extra'); const fse = require('fs-extra');
const fs = require('fs'); const fs = require('fs');
@ -25,16 +25,11 @@ const RESERVED_IPV4_CIDR = [
]; ];
const buildInternalReverseChnCIDR = task(__filename, async () => { const buildInternalReverseChnCIDR = task(__filename, async () => {
const { exclude } = await import('cidr-tools-wasm'); const [{ exclude }, cidr] = await Promise.all([
import('cidr-tools-wasm'),
/** @type {string[]} */ processLineFromReadline(await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')),
const cidr = []; fse.ensureDir(path.resolve(__dirname, '../List/internal'))
for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')) { ]);
const l = processLine(line);
if (l) {
cidr.push(l);
}
}
const reversedCidr = exclude( const reversedCidr = exclude(
['0.0.0.0/0'], ['0.0.0.0/0'],
@ -42,8 +37,7 @@ const buildInternalReverseChnCIDR = task(__filename, async () => {
true true
); );
await fse.ensureDir(path.resolve(__dirname, '../List/internal')); return fs.promises.writeFile(
await fs.promises.writeFile(
path.resolve(__dirname, '../List/internal/reversed-chn-cidr.txt'), path.resolve(__dirname, '../List/internal/reversed-chn-cidr.txt'),
`${reversedCidr.join('\n')}\n` `${reversedCidr.join('\n')}\n`
); );

View File

@ -1,10 +1,14 @@
const tldts = require('tldts'); // @ts-check
const { processFilterRules } = require('./lib/parse-filter.js'); const { processFilterRules } = require('./lib/parse-filter.js');
const path = require('path'); const path = require('path');
const { createRuleset } = require('./lib/create-file'); const { createRuleset } = require('./lib/create-file');
const { processLine } = require('./lib/process-line.js'); const { processLine } = require('./lib/process-line.js');
const domainSorter = require('./lib/stable-sort-domain'); const { createDomainSorter } = require('./lib/stable-sort-domain');
const { traceSync, task } = require('./lib/trace-runner.js'); const { traceSync, task } = require('./lib/trace-runner.js');
const Trie = require('./lib/trie.js');
const { getGorhillPublicSuffixPromise } = require('./lib/get-gorhill-publicsuffix.js');
const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse.js');
const tldts = require('tldts');
const WHITELIST_DOMAIN = new Set([ const WHITELIST_DOMAIN = new Set([
'w3s.link', 'w3s.link',
@ -61,77 +65,94 @@ const BLACK_TLD = new Set([
]); ]);
const buildPhishingDomainSet = task(__filename, async () => { const buildPhishingDomainSet = task(__filename, async () => {
const domainSet = Array.from((await processFilterRules( const [{ black: domainSet }, gorhill] = await Promise.all([
'https://phishing-filter.pages.dev/phishing-filter-agh.txt' processFilterRules(
// [ 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt',
// 'https://malware-filter.gitlab.io/phishing-filter/phishing-filter-agh.txt', [
// 'https://malware-filter.pages.dev/phishing-filter-agh.txt', 'https://malware-filter.gitlab.io/phishing-filter/phishing-filter-agh.txt',
// 'https://phishing-filter.pages.dev/phishing-filter-agh.txt' 'https://malware-filter.pages.dev/phishing-filter-agh.txt',
// ] 'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
)).black); ]
),
getGorhillPublicSuffixPromise()
]);
traceSync('* whitelist', () => {
const trieForRemovingWhiteListed = Trie.from(domainSet);
WHITELIST_DOMAIN.forEach(white => {
trieForRemovingWhiteListed.find(`.${white}`, false).forEach(f => domainSet.delete(f));
if (trieForRemovingWhiteListed.has(white)) {
domainSet.delete(white);
}
});
});
const domainCountMap = {}; const domainCountMap = {};
const getDomain = createCachedGorhillGetDomain(gorhill);
traceSync('* process domain set', () => { traceSync('* process domain set', () => {
for (let i = 0, len = domainSet.length; i < len; i++) { const domainArr = Array.from(domainSet);
const line = processLine(domainSet[i]);
for (let i = 0, len = domainArr.length; i < len; i++) {
const line = processLine(domainArr[i]);
if (!line) continue; if (!line) continue;
const parsed = tldts.parse(line, { allowPrivateDomains: true }); const apexDomain = getDomain(line);
const apexDomain = parsed.domain; if (!apexDomain) continue;
if (apexDomain) { domainCountMap[apexDomain] ||= 0;
if (WHITELIST_DOMAIN.has(apexDomain)) {
continue; const isPhishingDomainMockingCoJp = line.includes('-co-jp');
if (isPhishingDomainMockingCoJp) {
domainCountMap[apexDomain] += 0.5;
}
if (line.startsWith('.amaz')) {
domainCountMap[apexDomain] += 0.5;
if (line.startsWith('.amazon-')) {
domainCountMap[apexDomain] += 4.5;
} }
if (isPhishingDomainMockingCoJp) {
domainCountMap[apexDomain] += 4;
}
} else if (line.startsWith('.customer')) {
domainCountMap[apexDomain] += 0.25;
}
domainCountMap[apexDomain] ||= 0; const tld = gorhill.getPublicSuffix(line[0] === '.' ? line.slice(1) : line);
if (!tld || !BLACK_TLD.has(tld)) continue;
let isPhishingDomainMockingAmazon = false; domainCountMap[apexDomain] += 1;
if (line.startsWith('.amaz')) {
domainCountMap[apexDomain] += 0.5;
isPhishingDomainMockingAmazon = true; const lineLen = line.length;
if (line.startsWith('.amazon-')) { if (lineLen > 19) {
domainCountMap[apexDomain] += 4.5; // Add more weight if the domain is long enough
} if (lineLen > 44) {
} else if (line.startsWith('.customer')) { domainCountMap[apexDomain] += 3.5;
} else if (lineLen > 34) {
domainCountMap[apexDomain] += 2.5;
} else if (lineLen > 29) {
domainCountMap[apexDomain] += 1.5;
} else if (lineLen > 24) {
domainCountMap[apexDomain] += 0.75;
} else {
domainCountMap[apexDomain] += 0.25; domainCountMap[apexDomain] += 0.25;
} }
if (line.includes('-co-jp')) {
domainCountMap[apexDomain] += (isPhishingDomainMockingAmazon ? 4.5 : 0.5);
}
const tld = parsed.publicSuffix; if (domainCountMap[apexDomain] < 5) {
if (!tld || !BLACK_TLD.has(tld)) continue; const subdomain = tldts.getSubdomain(line);
if (subdomain?.includes('.')) {
domainCountMap[apexDomain] += 1;
if (line.length > 19) {
// Add more weight if the domain is long enough
if (line.length > 44) {
domainCountMap[apexDomain] += 3.5;
} else if (line.length > 34) {
domainCountMap[apexDomain] += 2.5;
} else if (line.length > 29) {
domainCountMap[apexDomain] += 1.5; domainCountMap[apexDomain] += 1.5;
} else if (line.length > 24) {
domainCountMap[apexDomain] += 0.75;
} else if (line.length > 19) {
domainCountMap[apexDomain] += 0.25;
}
if (domainCountMap[apexDomain] < 5) {
const subdomain = parsed.subdomain;
if (subdomain?.includes('.')) {
domainCountMap[apexDomain] += 1.5;
}
} }
} }
} }
} }
}); });
const domainSorter = createDomainSorter(gorhill);
const results = traceSync('* get final results', () => Object.entries(domainCountMap) const results = traceSync('* get final results', () => Object.entries(domainCountMap)
.reduce((acc, [apexDomain, count]) => { .reduce((acc, [apexDomain, count]) => {
if (count >= 5) { if (count >= 5) {
@ -151,7 +172,7 @@ const buildPhishingDomainSet = task(__filename, async () => {
' - https://gitlab.com/malware-filter/phishing-filter' ' - https://gitlab.com/malware-filter/phishing-filter'
]; ];
await Promise.all(createRuleset( return Promise.all(createRuleset(
'Sukka\'s Ruleset - Reject Phishing', 'Sukka\'s Ruleset - Reject Phishing',
description, description,
new Date(), new Date(),

View File

@ -28,7 +28,7 @@ const buildPublicHtml = task(__filename, async () => {
const html = template(list); const html = template(list);
await fs.promises.writeFile(path.join(publicPath, 'index.html'), html, 'utf-8'); return fs.promises.writeFile(path.join(publicPath, 'index.html'), html, 'utf-8');
}); });
module.exports.buildPublicHtml = buildPublicHtml; module.exports.buildPublicHtml = buildPublicHtml;

View File

@ -208,7 +208,7 @@ const buildRejectDomainSet = task(__filename, async () => {
...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`) ...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`)
]; ];
await Promise.all([ return Promise.all([
...createRuleset( ...createRuleset(
'Sukka\'s Ruleset - Reject Base', 'Sukka\'s Ruleset - Reject Base',
description, description,

View File

@ -65,7 +65,13 @@ const buildSpeedtestDomainSet = task(__filename, async () => {
'.speedtest.idv.tw', '.speedtest.idv.tw',
'.speedtest.frontier.com', '.speedtest.frontier.com',
'.speedtest.orange.fr', '.speedtest.orange.fr',
'.speedtest.centurylink.net' '.speedtest.centurylink.net',
'.srvr.bell.ca',
'.speedtest.contabo.net',
'speedtest.hk.chinamobile.com',
'speedtestbb.hk.chinamobile.com',
'.hizinitestet.com',
'.linknetspeedtest.net.br'
]); ]);
const hostnameGroups = await Promise.all([ const hostnameGroups = await Promise.all([
@ -114,7 +120,7 @@ const buildSpeedtestDomainSet = task(__filename, async () => {
'GitHub: https://github.com/SukkaW/Surge' 'GitHub: https://github.com/SukkaW/Surge'
]; ];
await Promise.all(createRuleset( return Promise.all(createRuleset(
'Sukka\'s Ruleset - Speedtest Domains', 'Sukka\'s Ruleset - Speedtest Domains',
description, description,
new Date(), new Date(),

View File

@ -17,14 +17,14 @@ const buildTelegramCIDR = task(__filename, async () => {
for await (const line of createReadlineInterfaceFromResponse(resp)) { for await (const line of createReadlineInterfaceFromResponse(resp)) {
const cidr = processLine(line); const cidr = processLine(line);
if (cidr) { if (!cidr) continue;
const [subnet] = cidr.split('/');
if (isIPv4(subnet)) { const [subnet] = cidr.split('/');
results.push(`IP-CIDR,${cidr},no-resolve`); if (isIPv4(subnet)) {
} results.push(`IP-CIDR,${cidr},no-resolve`);
if (isIPv6(subnet)) { }
results.push(`IP-CIDR6,${cidr},no-resolve`); if (isIPv6(subnet)) {
} results.push(`IP-CIDR6,${cidr},no-resolve`);
} }
} }
@ -40,7 +40,7 @@ const buildTelegramCIDR = task(__filename, async () => {
' - https://core.telegram.org/resources/cidr.txt' ' - https://core.telegram.org/resources/cidr.txt'
]; ];
await Promise.all(createRuleset( return Promise.all(createRuleset(
'Sukka\'s Ruleset - Telegram IP CIDR', 'Sukka\'s Ruleset - Telegram IP CIDR',
description, description,
date, date,

View File

@ -33,7 +33,6 @@ const downloadPreviousBuild = task(__filename, async () => {
if (!isCI) { if (!isCI) {
allFileExists = fs.existsSync(join(__dirname, '..', line)); allFileExists = fs.existsSync(join(__dirname, '..', line));
if (!allFileExists) { if (!allFileExists) {
console.log(`File not exists: ${line}`);
break; break;
} }
} }
@ -73,33 +72,27 @@ const downloadPreviousBuild = task(__filename, async () => {
await Promise.all(filesList.map(async p => { await Promise.all(filesList.map(async p => {
const src = join(extractedPath, 'Surge-gh-pages', p); const src = join(extractedPath, 'Surge-gh-pages', p);
if (await fileExists(src)) { if (await fileExists(src)) {
const dst = join(__dirname, '..', p);
console.log('Copy', { src, dst });
return fse.copy( return fse.copy(
src, src,
join(__dirname, '..', p), join(__dirname, '..', p),
{ overwrite: true } { overwrite: true }
); );
} }
console.log('File not exists:', src);
})); }));
await fs.promises.unlink(extractedPath).catch(() => { }); return fs.promises.unlink(extractedPath).catch(() => { });
}); });
const downloadPublicSuffixList = task(__filename, async () => { const downloadPublicSuffixList = task(__filename, async () => {
const publicSuffixDir = resolve(__dirname, '../node_modules/.cache'); const publicSuffixDir = resolve(__dirname, '../node_modules/.cache');
const publicSuffixPath = join(publicSuffixDir, 'public_suffix_list_dat.txt'); const publicSuffixPath = join(publicSuffixDir, 'public_suffix_list_dat.txt');
console.log('Download public suffix list.');
const [resp] = await Promise.all([ const [resp] = await Promise.all([
fetch('https://publicsuffix.org/list/public_suffix_list.dat'), fetch('https://publicsuffix.org/list/public_suffix_list.dat'),
fse.ensureDir(publicSuffixDir) fse.ensureDir(publicSuffixDir)
]); ]);
await pipeline( return pipeline(
Readable.fromWeb(resp.body), Readable.fromWeb(resp.body),
fs.createWriteStream(publicSuffixPath) fs.createWriteStream(publicSuffixPath)
); );

View File

@ -1,3 +1,5 @@
// @ts-check
const { downloadPreviousBuild, downloadPublicSuffixList } = require('./download-previous-build'); const { downloadPreviousBuild, downloadPublicSuffixList } = require('./download-previous-build');
const { buildCommon } = require('./build-common'); const { buildCommon } = require('./build-common');
const { buildAntiBogusDomain } = require('./build-anti-bogus-domain'); const { buildAntiBogusDomain } = require('./build-anti-bogus-domain');
@ -47,7 +49,7 @@ const requireWorker = (path) => {
* @param {WithWorker<T>} worker * @param {WithWorker<T>} worker
*/ */
const endWorker = async (worker) => { const endWorker = async (worker) => {
const { forceExited } = worker.end(); const { forceExited } = await worker.end();
if (forceExited && worker.__sukka_worker_name) { if (forceExited && worker.__sukka_worker_name) {
console.log(worker.__sukka_worker_name, 'forceExited'); console.log(worker.__sukka_worker_name, 'forceExited');
} }
@ -72,7 +74,10 @@ const endWorker = async (worker) => {
downloadPublicSuffixListPromise downloadPublicSuffixListPromise
]).then(() => buildCdnConf()); ]).then(() => buildCdnConf());
// build:phishing-domainset // build:phishing-domainset
const buildPhilishingDomainsetPromise = downloadPreviousBuildPromise.then(() => buildPhishingDomainSet()); const buildPhilishingDomainsetPromise = Promise.all([
downloadPreviousBuildPromise,
downloadPublicSuffixListPromise
]).then(() => buildPhishingDomainSet());
// build:reject-domainset // build:reject-domainset
const buildRejectDomainSetPromise = Promise.all([ const buildRejectDomainSetPromise = Promise.all([
downloadPreviousBuildPromise, downloadPreviousBuildPromise,
@ -87,6 +92,7 @@ const endWorker = async (worker) => {
const buildSpeedtestDomainSetPromise = downloadPreviousBuildPromise.then(() => buildSpeedtestDomainSet()); const buildSpeedtestDomainSetPromise = downloadPreviousBuildPromise.then(() => buildSpeedtestDomainSet());
// build:internal-cdn-rules // build:internal-cdn-rules
const buildInternalCDNDomainsPromise = Promise.all([ const buildInternalCDNDomainsPromise = Promise.all([
downloadPublicSuffixListPromise,
buildCommonPromise, buildCommonPromise,
buildCdnConfPromise buildCdnConfPromise
]).then(() => buildInternalCDNDomains()); ]).then(() => buildInternalCDNDomains());
@ -97,7 +103,7 @@ const endWorker = async (worker) => {
// build:domestic-ruleset // build:domestic-ruleset
const buildDomesticRulesetPromise = downloadPreviousBuildPromise.then(() => buildDomesticRuleset()); const buildDomesticRulesetPromise = downloadPreviousBuildPromise.then(() => buildDomesticRuleset());
await Promise.all([ const stats = await Promise.all([
downloadPreviousBuildPromise, downloadPreviousBuildPromise,
downloadPublicSuffixListPromise, downloadPublicSuffixListPromise,
buildCommonPromise, buildCommonPromise,
@ -120,4 +126,30 @@ const endWorker = async (worker) => {
validate(), validate(),
endWorker(buildInternalReverseChnCIDRWorker) endWorker(buildInternalReverseChnCIDRWorker)
]); ]);
printStats(stats);
})(); })();
/**
* @param {Array<{ start: number, end: number, taskName: string }>} stats
*/
function printStats(stats) {
// sort stats by start time
stats.sort((a, b) => a.start - b.start);
const longestTaskName = Math.max(...stats.map(i => i.taskName.length));
const realStart = Math.min(...stats.map(i => i.start));
const realEnd = Math.max(...stats.map(i => i.end));
const totalMs = realEnd - realStart;
const statsStep = (totalMs / 160) | 0;
stats.forEach(stat => {
console.log(
`[${stat.taskName}]${' '.repeat(longestTaskName - stat.taskName.length)}`,
' '.repeat(((stat.start - realStart) / statsStep) | 0),
'='.repeat(Math.max(((stat.end - stat.start) / statsStep) | 0, 1))
);
});
}

View File

@ -9,9 +9,7 @@ const sharedConfig = { allowPrivateDomains: true };
* @param {string} domain * @param {string} domain
* @returns {ReturnType<import('tldts').parse>} * @returns {ReturnType<import('tldts').parse>}
*/ */
module.exports.parse = (domain) => { module.exports.parse = (domain) => cache.sync(domain, () => tldts.parse(domain, sharedConfig));
return cache.sync(domain, () => tldts.parse(domain, sharedConfig));
};
let gothillGetDomainCache = null; let gothillGetDomainCache = null;
/** /**
@ -22,5 +20,5 @@ module.exports.createCachedGorhillGetDomain = (gorhill) => {
/** /**
* @param {string} domain * @param {string} domain
*/ */
return (domain) => gothillGetDomainCache.sync(domain, () => gorhill.getDomain(domain[0] === '.' ? domain.slice(1) : domain)); return (domain) => (/** @type {ReturnType<typeof createCache>} */ (gothillGetDomainCache)).sync(domain, () => gorhill.getDomain(domain[0] === '.' ? domain.slice(1) : domain));
}; };

View File

@ -3,13 +3,6 @@ const fs = require('fs');
const path = require('path'); const path = require('path');
const publicSuffixPath = path.resolve(__dirname, '../../node_modules/.cache/public_suffix_list_dat.txt'); const publicSuffixPath = path.resolve(__dirname, '../../node_modules/.cache/public_suffix_list_dat.txt');
const getPublicSuffixListDat = () => {
if (fs.existsSync(publicSuffixPath)) {
return fs.promises.readFile(publicSuffixPath, 'utf-8');
}
console.log('public_suffix_list.dat not found, fetch directly from remote.');
return fetch('https://publicsuffix.org/list/public_suffix_list.dat').then(r => r.text());
};
const getGorhillPublicSuffix = async () => { const getGorhillPublicSuffix = async () => {
const customFetch = async (url) => { const customFetch = async (url) => {
@ -20,7 +13,12 @@ const getGorhillPublicSuffix = async () => {
}; };
const [publicSuffixListDat, { default: gorhill }] = await Promise.all([ const [publicSuffixListDat, { default: gorhill }] = await Promise.all([
getPublicSuffixListDat(), fs.existsSync(publicSuffixPath)
? fs.promises.readFile(publicSuffixPath, 'utf-8')
: fetch('https://publicsuffix.org/list/public_suffix_list.dat').then(r => {
console.log('public_suffix_list.dat not found, fetch directly from remote.');
return r.text();
}),
import('gorhill-publicsuffixlist') import('gorhill-publicsuffixlist')
]); ]);
@ -30,7 +28,7 @@ const getGorhillPublicSuffix = async () => {
return gorhill; return gorhill;
}; };
/** @type {Promise<import('gorhill-publicsuffixlist').default | null>} */ /** @type {Promise<import('gorhill-publicsuffixlist').default> | null} */
let gorhillPublicSuffixPromise = null; let gorhillPublicSuffixPromise = null;
module.exports.getGorhillPublicSuffixPromise = () => { module.exports.getGorhillPublicSuffixPromise = () => {
gorhillPublicSuffixPromise ||= getGorhillPublicSuffix(); gorhillPublicSuffixPromise ||= getGorhillPublicSuffix();

View File

@ -1,24 +0,0 @@
// @ts-check
const tldts = require('./cached-tld-parse');
/**
* @param {string | null | undefined} domain
*/
module.exports.normalizeDomain = (domain) => {
if (!domain) {
return null;
}
const { isIcann, isPrivate, hostname, isIp } = tldts.parse(domain);
if (isIp) {
return null;
}
if (isIcann || isPrivate) {
if (hostname?.[0] === '.') {
return hostname.slice(1);
}
return hostname;
}
return null;
};

View File

@ -1,8 +1,8 @@
// @ts-check // @ts-check
const { fetchWithRetry } = require('./fetch-retry'); const { fetchWithRetry } = require('./fetch-retry');
const tldts = require('tldts');
const { fetchRemoteTextAndCreateReadlineInterface } = require('./fetch-remote-text-by-line'); const { fetchRemoteTextAndCreateReadlineInterface } = require('./fetch-remote-text-by-line');
const { NetworkFilter } = require('@cliqz/adblocker'); const { NetworkFilter } = require('@cliqz/adblocker');
const { normalizeDomain } = require('./is-domain-loose');
const { processLine } = require('./process-line'); const { processLine } = require('./process-line');
const { performance } = require('perf_hooks'); const { performance } = require('perf_hooks');
@ -19,6 +19,22 @@ const warnOnce = (url, isWhite, ...message) => {
console.warn(url, isWhite ? '(white)' : '(black)', ...message); console.warn(url, isWhite ? '(white)' : '(black)', ...message);
}; };
const normalizeDomain = (domain) => {
if (!domain) return null;
const { isIcann, isPrivate, hostname, isIp } = tldts.parse(domain);
if (isIp) return null;
if (isIcann || isPrivate) {
if (hostname?.[0] === '.') {
return hostname.slice(1);
}
return hostname;
}
return null;
};
/** /**
* @param {string | URL} domainListsUrl * @param {string | URL} domainListsUrl
*/ */

View File

@ -6,7 +6,7 @@
* *
* @param {string} line * @param {string} line
*/ */
module.exports.processLine = (line) => { const processLine = (line) => {
if (!line) { if (!line) {
return null; return null;
} }
@ -30,3 +30,19 @@ module.exports.processLine = (line) => {
return trimmed; return trimmed;
}; };
module.exports.processLine = processLine;
/**
* @param {import('readline').ReadLine} rl
*/
module.exports.processLineFromReadline = async (rl) => {
/** @type {string[]} */
const res = [];
for await (const line of rl) {
const l = processLine(line);
if (l) {
res.push(l);
}
}
return res;
};

View File

@ -40,8 +40,13 @@ module.exports.traceAsync = traceAsync;
*/ */
module.exports.task = (__filename, fn, customname = null) => { module.exports.task = (__filename, fn, customname = null) => {
const taskName = customname ?? path.basename(__filename, path.extname(__filename)); const taskName = customname ?? path.basename(__filename, path.extname(__filename));
return () => { return async () => {
console.log(`🏃 [${taskName}] Start executing`); console.log(`🏃 [${taskName}] Start executing`);
return traceAsync(`✅ [${taskName}] Executed successfully`, fn); const start = performance.now();
await fn();
const end = performance.now();
console.log(`✅ [${taskName}] Executed successfully: ${(end - start).toFixed(3)}ms`);
return { start, end, taskName };
}; };
}; };

View File

@ -59,14 +59,15 @@ const _validateRuleset = async (filePath) => {
}; };
const validate = task(__filename, async () => { const validate = task(__filename, async () => {
const [domainsetFiles, _rulesetFiles] = await Promise.all([ // const [domainsetFiles, _rulesetFiles] = await Promise.all([
listDir(path.resolve(__dirname, '../List/domainset')), // listDir(path.resolve(__dirname, '../List/domainset')),
listDir(path.resolve(__dirname, '../List/non_ip')) // listDir(path.resolve(__dirname, '../List/non_ip'))
]); // ]);
await Promise.all( return Promise.all([
domainsetFiles.map(file => validateDomainSet(file)) listDir(path.resolve(__dirname, '../List/domainset'))
.then(domainsetFiles => Promise.all(domainsetFiles.map(file => validateDomainSet(file))))
// rulesetFiles.map(file => validateRuleset(file)) // rulesetFiles.map(file => validateRuleset(file))
); ]);
}); });
module.exports.validate = validate; module.exports.validate = validate;