Perf: speed up build

This commit is contained in:
SukkaW 2023-09-15 22:35:46 +08:00
parent 30cab8fc22
commit d5850aa84b
23 changed files with 241 additions and 184 deletions

View File

@ -6,7 +6,7 @@ const { fetchRemoteTextAndCreateReadlineInterface, readFileByLine } = require('.
const { processLine } = require('./lib/process-line');
const { task } = require('./lib/trace-runner');
const buildAntiBogusDomain = task(__filename, async () => {
const getBogusNxDomainIPs = async () => {
/** @type {string[]} */
const res = [];
for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf')) {
@ -14,20 +14,27 @@ const buildAntiBogusDomain = task(__filename, async () => {
res.push(line.replace('bogus-nxdomain=', ''));
}
}
return res;
};
const buildAntiBogusDomain = task(__filename, async () => {
const filePath = path.resolve(__dirname, '../Source/ip/reject.conf');
const bogusIpPromise = getBogusNxDomainIPs();
/** @type {string[]} */
const result = [];
for await (const line of readFileByLine(filePath)) {
if (line === '# --- [Anti Bogus Domain Replace Me] ---') {
res.forEach(ip => {
(await bogusIpPromise).forEach(ip => {
if (isIPv4(ip)) {
result.push(`IP-CIDR,${ip}/32,no-resolve`);
} else if (isIPv6(ip)) {
result.push(`IP-CIDR6,${ip}/128,no-resolve`);
}
});
continue;
} else {
const l = processLine(line);
if (l) {
@ -47,7 +54,7 @@ const buildAntiBogusDomain = task(__filename, async () => {
' - https://github.com/felixonmars/dnsmasq-china-list'
];
await Promise.all(createRuleset(
return Promise.all(createRuleset(
'Sukka\'s Ruleset - Anti Bogus Domain',
description,
new Date(),

View File

@ -20,7 +20,7 @@ const buildAppleCdn = task(__filename, async () => {
const ruleset = res.map(domain => `DOMAIN-SUFFIX,${domain}`);
const domainset = res.map(i => `.${i}`);
await Promise.all([
return Promise.all([
...createRuleset(
'Sukka\'s Ruleset - Apple CDN',
description,

View File

@ -9,7 +9,7 @@ const { processLine } = require('./lib/process-line');
const publicSuffixPath = path.resolve(__dirname, '../node_modules/.cache/public_suffix_list_dat.txt');
const buildCdnConf = task(__filename, async () => {
const getS3OSSDomains = async () => {
const trie = new Trie();
if (fs.existsSync(publicSuffixPath)) {
@ -46,13 +46,19 @@ const buildCdnConf = task(__filename, async () => {
}
});
return S3OSSDomains;
};
const buildCdnConf = task(__filename, async () => {
/** @type {string[]} */
const cdnDomainsList = [];
const getS3OSSDomainsPromise = getS3OSSDomains();
for await (const l of readFileByLine(path.resolve(__dirname, '../Source/non_ip/cdn.conf'))) {
if (l === '# --- [AWS S3 Replace Me] ---') {
S3OSSDomains.forEach(domain => {
cdnDomainsList.push(`DOMAIN-SUFFIX,${domain}`);
});
(await getS3OSSDomainsPromise).forEach(domain => { cdnDomainsList.push(`DOMAIN-SUFFIX,${domain}`); });
continue;
}
const line = processLine(l);
if (line) {

View File

@ -3,7 +3,7 @@ const { fetchRemoteTextAndCreateReadlineInterface } = require('./lib/fetch-remot
const { resolve: pathResolve } = require('path');
// This should not use `createRuleset` API since we are going to generate ipcidr for Clash
const { compareAndWriteFile, withBannerArray } = require('./lib/create-file');
const { processLine } = require('./lib/process-line');
const { processLineFromReadline } = require('./lib/process-line');
const { task } = require('./lib/trace-runner');
// https://github.com/misakaio/chnroutes2/issues/25
@ -13,20 +13,12 @@ const EXCLUDE_CIDRS = [
];
const buildChnCidr = task(__filename, async () => {
const { exclude: excludeCidrs } = await import('cidr-tools-wasm');
const [{ exclude: excludeCidrs }, cidr] = await Promise.all([
import('cidr-tools-wasm'),
processLineFromReadline(await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt'))
]);
/** @type {string[]} */
const cidr = [];
for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')) {
const l = processLine(line);
if (l) {
cidr.push(l);
}
}
console.log('Before Merge:', cidr.length);
const filteredCidr = excludeCidrs(cidr, EXCLUDE_CIDRS, true);
console.log('After Merge:', filteredCidr.length);
const description = [
'License: CC BY-SA 2.0',
@ -36,7 +28,7 @@ const buildChnCidr = task(__filename, async () => {
'Data from https://misaka.io (misakaio @ GitHub)'
];
await Promise.all([
return Promise.all([
compareAndWriteFile(
withBannerArray(
'Sukka\'s Ruleset - Mainland China IPv4 CIDR',

View File

@ -17,7 +17,7 @@ const outputSurgeDir = path.resolve(__dirname, '../List');
const outputClashDir = path.resolve(__dirname, '../Clash');
const buildCommon = task(__filename, async () => {
/** @type {Promise<void>[]} */
/** @type {Promise<unknown>[]} */
const promises = [];
const pw = new PathScurry(sourceDir);
@ -107,7 +107,7 @@ async function transformDomainset(sourcePath, relativePath) {
)
];
await Promise.all(createRuleset(
return Promise.all(createRuleset(
title,
description,
new Date(),
@ -140,7 +140,7 @@ async function transformRuleset(sourcePath, relativePath) {
)
];
await Promise.all(createRuleset(
return Promise.all(createRuleset(
title,
description,
new Date(),

View File

@ -2,33 +2,22 @@
const path = require('path');
const { DOMESTICS } = require('../Source/non_ip/domestic');
const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
const { processLine } = require('./lib/process-line');
const { processLineFromReadline } = require('./lib/process-line');
const { compareAndWriteFile, createRuleset } = require('./lib/create-file');
const domainSorter = require('./lib/stable-sort-domain');
const { task } = require('./lib/trace-runner');
const buildDomesticRuleset = task(__filename, async () => {
const rl = readFileByLine(path.resolve(__dirname, '../Source/non_ip/domestic.conf'));
const results = [];
for await (const l of rl) {
const line = processLine(l);
if (line) {
results.push(line);
}
}
const results = await processLineFromReadline(readFileByLine(path.resolve(__dirname, '../Source/non_ip/domestic.conf')));
results.push(
...Object.entries(DOMESTICS)
.reduce(
(acc, [key, { domains }]) => {
if (key === 'SYSTEM') {
return acc;
}
if (key === 'SYSTEM') return acc;
return [...acc, ...domains];
},
/** @type {string[]} */([])
)
.sort(domainSorter)
.map((domain) => `DOMAIN-SUFFIX,${domain}`)
);
@ -40,7 +29,7 @@ const buildDomesticRuleset = task(__filename, async () => {
'This file contains known addresses that are avaliable in the Mainland China.'
];
await Promise.all([
return Promise.all([
...createRuleset(
'Sukka\'s Ruleset - Domestic Domains',
rulesetDescription,

View File

@ -4,9 +4,11 @@ const path = require('path');
const tldts = require('tldts');
const { processLine } = require('./lib/process-line');
const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
const domainSorter = require('./lib/stable-sort-domain');
const { createDomainSorter } = require('./lib/stable-sort-domain');
const { task } = require('./lib/trace-runner');
const { compareAndWriteFile } = require('./lib/create-file');
const { getGorhillPublicSuffixPromise } = require('./lib/get-gorhill-publicsuffix');
const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse');
/**
* @param {string} string
@ -19,11 +21,15 @@ const buildInternalCDNDomains = task(__filename, async () => {
const set = new Set();
const keywords = new Set();
const gorhill = await getGorhillPublicSuffixPromise();
const getDomain = createCachedGorhillGetDomain(gorhill);
const domainSorter = createDomainSorter(gorhill);
/**
* @param {string} input
*/
const addApexDomain = (input) => {
const d = tldts.getDomain(input, { allowPrivateDomains: true });
const d = getDomain(input);
if (d) {
set.add(d);
}
@ -35,7 +41,8 @@ const buildInternalCDNDomains = task(__filename, async () => {
const processLocalDomainSet = async (domainSetPath) => {
for await (const line of readFileByLine(domainSetPath)) {
const parsed = tldts.parse(line, { allowPrivateDomains: true });
if (!parsed.isIp && (parsed.isIcann || parsed.isPrivate)) {
if (parsed.isIp) continue;
if (parsed.isIcann || parsed.isPrivate) {
if (parsed.domain) {
set.add(parsed.domain);
}
@ -80,7 +87,7 @@ const buildInternalCDNDomains = task(__filename, async () => {
fse.ensureDir(path.resolve(__dirname, '../List/internal'))
]);
await compareAndWriteFile(
return compareAndWriteFile(
[
...Array.from(set).sort(domainSorter).map(i => `SUFFIX,${i}`),
...Array.from(keywords).sort().map(i => `REGEX,${i}`)

View File

@ -11,7 +11,7 @@ const buildInternalChnDomains = task(__filename, async () => {
fse.ensureDir(path.resolve(__dirname, '../List/internal'))
]);
await compareAndWriteFile(
return compareAndWriteFile(
result.map(line => `SUFFIX,${line}`),
path.resolve(__dirname, '../List/internal/accelerated-china-domains.txt')
);

View File

@ -1,6 +1,6 @@
// @ts-check
const { fetchRemoteTextAndCreateReadlineInterface } = require('./lib/fetch-remote-text-by-line');
const { processLine } = require('./lib/process-line');
const { processLineFromReadline } = require('./lib/process-line');
const path = require('path');
const fse = require('fs-extra');
const fs = require('fs');
@ -25,16 +25,11 @@ const RESERVED_IPV4_CIDR = [
];
const buildInternalReverseChnCIDR = task(__filename, async () => {
const { exclude } = await import('cidr-tools-wasm');
/** @type {string[]} */
const cidr = [];
for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')) {
const l = processLine(line);
if (l) {
cidr.push(l);
}
}
const [{ exclude }, cidr] = await Promise.all([
import('cidr-tools-wasm'),
processLineFromReadline(await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')),
fse.ensureDir(path.resolve(__dirname, '../List/internal'))
]);
const reversedCidr = exclude(
['0.0.0.0/0'],
@ -42,8 +37,7 @@ const buildInternalReverseChnCIDR = task(__filename, async () => {
true
);
await fse.ensureDir(path.resolve(__dirname, '../List/internal'));
await fs.promises.writeFile(
return fs.promises.writeFile(
path.resolve(__dirname, '../List/internal/reversed-chn-cidr.txt'),
`${reversedCidr.join('\n')}\n`
);

View File

@ -1,10 +1,14 @@
const tldts = require('tldts');
// @ts-check
const { processFilterRules } = require('./lib/parse-filter.js');
const path = require('path');
const { createRuleset } = require('./lib/create-file');
const { processLine } = require('./lib/process-line.js');
const domainSorter = require('./lib/stable-sort-domain');
const { createDomainSorter } = require('./lib/stable-sort-domain');
const { traceSync, task } = require('./lib/trace-runner.js');
const Trie = require('./lib/trie.js');
const { getGorhillPublicSuffixPromise } = require('./lib/get-gorhill-publicsuffix.js');
const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse.js');
const tldts = require('tldts');
const WHITELIST_DOMAIN = new Set([
'w3s.link',
@ -61,77 +65,94 @@ const BLACK_TLD = new Set([
]);
const buildPhishingDomainSet = task(__filename, async () => {
const domainSet = Array.from((await processFilterRules(
'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
// [
// 'https://malware-filter.gitlab.io/phishing-filter/phishing-filter-agh.txt',
// 'https://malware-filter.pages.dev/phishing-filter-agh.txt',
// 'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
// ]
)).black);
const [{ black: domainSet }, gorhill] = await Promise.all([
processFilterRules(
'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt',
[
'https://malware-filter.gitlab.io/phishing-filter/phishing-filter-agh.txt',
'https://malware-filter.pages.dev/phishing-filter-agh.txt',
'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
]
),
getGorhillPublicSuffixPromise()
]);
traceSync('* whitelist', () => {
const trieForRemovingWhiteListed = Trie.from(domainSet);
WHITELIST_DOMAIN.forEach(white => {
trieForRemovingWhiteListed.find(`.${white}`, false).forEach(f => domainSet.delete(f));
if (trieForRemovingWhiteListed.has(white)) {
domainSet.delete(white);
}
});
});
const domainCountMap = {};
const getDomain = createCachedGorhillGetDomain(gorhill);
traceSync('* process domain set', () => {
for (let i = 0, len = domainSet.length; i < len; i++) {
const line = processLine(domainSet[i]);
const domainArr = Array.from(domainSet);
for (let i = 0, len = domainArr.length; i < len; i++) {
const line = processLine(domainArr[i]);
if (!line) continue;
const parsed = tldts.parse(line, { allowPrivateDomains: true });
const apexDomain = parsed.domain;
const apexDomain = getDomain(line);
if (!apexDomain) continue;
if (apexDomain) {
if (WHITELIST_DOMAIN.has(apexDomain)) {
continue;
domainCountMap[apexDomain] ||= 0;
const isPhishingDomainMockingCoJp = line.includes('-co-jp');
if (isPhishingDomainMockingCoJp) {
domainCountMap[apexDomain] += 0.5;
}
if (line.startsWith('.amaz')) {
domainCountMap[apexDomain] += 0.5;
if (line.startsWith('.amazon-')) {
domainCountMap[apexDomain] += 4.5;
}
if (isPhishingDomainMockingCoJp) {
domainCountMap[apexDomain] += 4;
}
} else if (line.startsWith('.customer')) {
domainCountMap[apexDomain] += 0.25;
}
domainCountMap[apexDomain] ||= 0;
const tld = gorhill.getPublicSuffix(line[0] === '.' ? line.slice(1) : line);
if (!tld || !BLACK_TLD.has(tld)) continue;
let isPhishingDomainMockingAmazon = false;
if (line.startsWith('.amaz')) {
domainCountMap[apexDomain] += 0.5;
domainCountMap[apexDomain] += 1;
isPhishingDomainMockingAmazon = true;
const lineLen = line.length;
if (line.startsWith('.amazon-')) {
domainCountMap[apexDomain] += 4.5;
}
} else if (line.startsWith('.customer')) {
if (lineLen > 19) {
// Add more weight if the domain is long enough
if (lineLen > 44) {
domainCountMap[apexDomain] += 3.5;
} else if (lineLen > 34) {
domainCountMap[apexDomain] += 2.5;
} else if (lineLen > 29) {
domainCountMap[apexDomain] += 1.5;
} else if (lineLen > 24) {
domainCountMap[apexDomain] += 0.75;
} else {
domainCountMap[apexDomain] += 0.25;
}
if (line.includes('-co-jp')) {
domainCountMap[apexDomain] += (isPhishingDomainMockingAmazon ? 4.5 : 0.5);
}
const tld = parsed.publicSuffix;
if (!tld || !BLACK_TLD.has(tld)) continue;
domainCountMap[apexDomain] += 1;
if (line.length > 19) {
// Add more weight if the domain is long enough
if (line.length > 44) {
domainCountMap[apexDomain] += 3.5;
} else if (line.length > 34) {
domainCountMap[apexDomain] += 2.5;
} else if (line.length > 29) {
if (domainCountMap[apexDomain] < 5) {
const subdomain = tldts.getSubdomain(line);
if (subdomain?.includes('.')) {
domainCountMap[apexDomain] += 1.5;
} else if (line.length > 24) {
domainCountMap[apexDomain] += 0.75;
} else if (line.length > 19) {
domainCountMap[apexDomain] += 0.25;
}
if (domainCountMap[apexDomain] < 5) {
const subdomain = parsed.subdomain;
if (subdomain?.includes('.')) {
domainCountMap[apexDomain] += 1.5;
}
}
}
}
}
});
const domainSorter = createDomainSorter(gorhill);
const results = traceSync('* get final results', () => Object.entries(domainCountMap)
.reduce((acc, [apexDomain, count]) => {
if (count >= 5) {
@ -151,7 +172,7 @@ const buildPhishingDomainSet = task(__filename, async () => {
' - https://gitlab.com/malware-filter/phishing-filter'
];
await Promise.all(createRuleset(
return Promise.all(createRuleset(
'Sukka\'s Ruleset - Reject Phishing',
description,
new Date(),

View File

@ -28,7 +28,7 @@ const buildPublicHtml = task(__filename, async () => {
const html = template(list);
await fs.promises.writeFile(path.join(publicPath, 'index.html'), html, 'utf-8');
return fs.promises.writeFile(path.join(publicPath, 'index.html'), html, 'utf-8');
});
module.exports.buildPublicHtml = buildPublicHtml;

View File

@ -208,7 +208,7 @@ const buildRejectDomainSet = task(__filename, async () => {
...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`)
];
await Promise.all([
return Promise.all([
...createRuleset(
'Sukka\'s Ruleset - Reject Base',
description,

View File

@ -65,7 +65,13 @@ const buildSpeedtestDomainSet = task(__filename, async () => {
'.speedtest.idv.tw',
'.speedtest.frontier.com',
'.speedtest.orange.fr',
'.speedtest.centurylink.net'
'.speedtest.centurylink.net',
'.srvr.bell.ca',
'.speedtest.contabo.net',
'speedtest.hk.chinamobile.com',
'speedtestbb.hk.chinamobile.com',
'.hizinitestet.com',
'.linknetspeedtest.net.br'
]);
const hostnameGroups = await Promise.all([
@ -114,7 +120,7 @@ const buildSpeedtestDomainSet = task(__filename, async () => {
'GitHub: https://github.com/SukkaW/Surge'
];
await Promise.all(createRuleset(
return Promise.all(createRuleset(
'Sukka\'s Ruleset - Speedtest Domains',
description,
new Date(),

View File

@ -17,14 +17,14 @@ const buildTelegramCIDR = task(__filename, async () => {
for await (const line of createReadlineInterfaceFromResponse(resp)) {
const cidr = processLine(line);
if (cidr) {
const [subnet] = cidr.split('/');
if (isIPv4(subnet)) {
results.push(`IP-CIDR,${cidr},no-resolve`);
}
if (isIPv6(subnet)) {
results.push(`IP-CIDR6,${cidr},no-resolve`);
}
if (!cidr) continue;
const [subnet] = cidr.split('/');
if (isIPv4(subnet)) {
results.push(`IP-CIDR,${cidr},no-resolve`);
}
if (isIPv6(subnet)) {
results.push(`IP-CIDR6,${cidr},no-resolve`);
}
}
@ -40,7 +40,7 @@ const buildTelegramCIDR = task(__filename, async () => {
' - https://core.telegram.org/resources/cidr.txt'
];
await Promise.all(createRuleset(
return Promise.all(createRuleset(
'Sukka\'s Ruleset - Telegram IP CIDR',
description,
date,

View File

@ -33,7 +33,6 @@ const downloadPreviousBuild = task(__filename, async () => {
if (!isCI) {
allFileExists = fs.existsSync(join(__dirname, '..', line));
if (!allFileExists) {
console.log(`File not exists: ${line}`);
break;
}
}
@ -73,33 +72,27 @@ const downloadPreviousBuild = task(__filename, async () => {
await Promise.all(filesList.map(async p => {
const src = join(extractedPath, 'Surge-gh-pages', p);
if (await fileExists(src)) {
const dst = join(__dirname, '..', p);
console.log('Copy', { src, dst });
return fse.copy(
src,
join(__dirname, '..', p),
{ overwrite: true }
);
}
console.log('File not exists:', src);
}));
await fs.promises.unlink(extractedPath).catch(() => { });
return fs.promises.unlink(extractedPath).catch(() => { });
});
const downloadPublicSuffixList = task(__filename, async () => {
const publicSuffixDir = resolve(__dirname, '../node_modules/.cache');
const publicSuffixPath = join(publicSuffixDir, 'public_suffix_list_dat.txt');
console.log('Download public suffix list.');
const [resp] = await Promise.all([
fetch('https://publicsuffix.org/list/public_suffix_list.dat'),
fse.ensureDir(publicSuffixDir)
]);
await pipeline(
return pipeline(
Readable.fromWeb(resp.body),
fs.createWriteStream(publicSuffixPath)
);

View File

@ -1,3 +1,5 @@
// @ts-check
const { downloadPreviousBuild, downloadPublicSuffixList } = require('./download-previous-build');
const { buildCommon } = require('./build-common');
const { buildAntiBogusDomain } = require('./build-anti-bogus-domain');
@ -47,7 +49,7 @@ const requireWorker = (path) => {
* @param {WithWorker<T>} worker
*/
const endWorker = async (worker) => {
const { forceExited } = worker.end();
const { forceExited } = await worker.end();
if (forceExited && worker.__sukka_worker_name) {
console.log(worker.__sukka_worker_name, 'forceExited');
}
@ -72,7 +74,10 @@ const endWorker = async (worker) => {
downloadPublicSuffixListPromise
]).then(() => buildCdnConf());
// build:phishing-domainset
const buildPhilishingDomainsetPromise = downloadPreviousBuildPromise.then(() => buildPhishingDomainSet());
const buildPhilishingDomainsetPromise = Promise.all([
downloadPreviousBuildPromise,
downloadPublicSuffixListPromise
]).then(() => buildPhishingDomainSet());
// build:reject-domainset
const buildRejectDomainSetPromise = Promise.all([
downloadPreviousBuildPromise,
@ -87,6 +92,7 @@ const endWorker = async (worker) => {
const buildSpeedtestDomainSetPromise = downloadPreviousBuildPromise.then(() => buildSpeedtestDomainSet());
// build:internal-cdn-rules
const buildInternalCDNDomainsPromise = Promise.all([
downloadPublicSuffixListPromise,
buildCommonPromise,
buildCdnConfPromise
]).then(() => buildInternalCDNDomains());
@ -97,7 +103,7 @@ const endWorker = async (worker) => {
// build:domestic-ruleset
const buildDomesticRulesetPromise = downloadPreviousBuildPromise.then(() => buildDomesticRuleset());
await Promise.all([
const stats = await Promise.all([
downloadPreviousBuildPromise,
downloadPublicSuffixListPromise,
buildCommonPromise,
@ -120,4 +126,30 @@ const endWorker = async (worker) => {
validate(),
endWorker(buildInternalReverseChnCIDRWorker)
]);
printStats(stats);
})();
/**
* @param {Array<{ start: number, end: number, taskName: string }>} stats
*/
function printStats(stats) {
// sort stats by start time
stats.sort((a, b) => a.start - b.start);
const longestTaskName = Math.max(...stats.map(i => i.taskName.length));
const realStart = Math.min(...stats.map(i => i.start));
const realEnd = Math.max(...stats.map(i => i.end));
const totalMs = realEnd - realStart;
const statsStep = (totalMs / 160) | 0;
stats.forEach(stat => {
console.log(
`[${stat.taskName}]${' '.repeat(longestTaskName - stat.taskName.length)}`,
' '.repeat(((stat.start - realStart) / statsStep) | 0),
'='.repeat(Math.max(((stat.end - stat.start) / statsStep) | 0, 1))
);
});
}

View File

@ -9,9 +9,7 @@ const sharedConfig = { allowPrivateDomains: true };
* @param {string} domain
* @returns {ReturnType<import('tldts').parse>}
*/
module.exports.parse = (domain) => {
return cache.sync(domain, () => tldts.parse(domain, sharedConfig));
};
module.exports.parse = (domain) => cache.sync(domain, () => tldts.parse(domain, sharedConfig));
let gothillGetDomainCache = null;
/**
@ -22,5 +20,5 @@ module.exports.createCachedGorhillGetDomain = (gorhill) => {
/**
* @param {string} domain
*/
return (domain) => gothillGetDomainCache.sync(domain, () => gorhill.getDomain(domain[0] === '.' ? domain.slice(1) : domain));
return (domain) => (/** @type {ReturnType<typeof createCache>} */ (gothillGetDomainCache)).sync(domain, () => gorhill.getDomain(domain[0] === '.' ? domain.slice(1) : domain));
};

View File

@ -3,13 +3,6 @@ const fs = require('fs');
const path = require('path');
const publicSuffixPath = path.resolve(__dirname, '../../node_modules/.cache/public_suffix_list_dat.txt');
const getPublicSuffixListDat = () => {
if (fs.existsSync(publicSuffixPath)) {
return fs.promises.readFile(publicSuffixPath, 'utf-8');
}
console.log('public_suffix_list.dat not found, fetch directly from remote.');
return fetch('https://publicsuffix.org/list/public_suffix_list.dat').then(r => r.text());
};
const getGorhillPublicSuffix = async () => {
const customFetch = async (url) => {
@ -20,7 +13,12 @@ const getGorhillPublicSuffix = async () => {
};
const [publicSuffixListDat, { default: gorhill }] = await Promise.all([
getPublicSuffixListDat(),
fs.existsSync(publicSuffixPath)
? fs.promises.readFile(publicSuffixPath, 'utf-8')
: fetch('https://publicsuffix.org/list/public_suffix_list.dat').then(r => {
console.log('public_suffix_list.dat not found, fetch directly from remote.');
return r.text();
}),
import('gorhill-publicsuffixlist')
]);
@ -30,7 +28,7 @@ const getGorhillPublicSuffix = async () => {
return gorhill;
};
/** @type {Promise<import('gorhill-publicsuffixlist').default | null>} */
/** @type {Promise<import('gorhill-publicsuffixlist').default> | null} */
let gorhillPublicSuffixPromise = null;
module.exports.getGorhillPublicSuffixPromise = () => {
gorhillPublicSuffixPromise ||= getGorhillPublicSuffix();

View File

@ -1,24 +0,0 @@
// @ts-check
const tldts = require('./cached-tld-parse');
/**
* @param {string | null | undefined} domain
*/
module.exports.normalizeDomain = (domain) => {
if (!domain) {
return null;
}
const { isIcann, isPrivate, hostname, isIp } = tldts.parse(domain);
if (isIp) {
return null;
}
if (isIcann || isPrivate) {
if (hostname?.[0] === '.') {
return hostname.slice(1);
}
return hostname;
}
return null;
};

View File

@ -1,8 +1,8 @@
// @ts-check
const { fetchWithRetry } = require('./fetch-retry');
const tldts = require('tldts');
const { fetchRemoteTextAndCreateReadlineInterface } = require('./fetch-remote-text-by-line');
const { NetworkFilter } = require('@cliqz/adblocker');
const { normalizeDomain } = require('./is-domain-loose');
const { processLine } = require('./process-line');
const { performance } = require('perf_hooks');
@ -19,6 +19,22 @@ const warnOnce = (url, isWhite, ...message) => {
console.warn(url, isWhite ? '(white)' : '(black)', ...message);
};
const normalizeDomain = (domain) => {
if (!domain) return null;
const { isIcann, isPrivate, hostname, isIp } = tldts.parse(domain);
if (isIp) return null;
if (isIcann || isPrivate) {
if (hostname?.[0] === '.') {
return hostname.slice(1);
}
return hostname;
}
return null;
};
/**
* @param {string | URL} domainListsUrl
*/

View File

@ -6,7 +6,7 @@
*
* @param {string} line
*/
module.exports.processLine = (line) => {
const processLine = (line) => {
if (!line) {
return null;
}
@ -30,3 +30,19 @@ module.exports.processLine = (line) => {
return trimmed;
};
module.exports.processLine = processLine;
/**
* @param {import('readline').ReadLine} rl
*/
module.exports.processLineFromReadline = async (rl) => {
/** @type {string[]} */
const res = [];
for await (const line of rl) {
const l = processLine(line);
if (l) {
res.push(l);
}
}
return res;
};

View File

@ -40,8 +40,13 @@ module.exports.traceAsync = traceAsync;
*/
module.exports.task = (__filename, fn, customname = null) => {
const taskName = customname ?? path.basename(__filename, path.extname(__filename));
return () => {
return async () => {
console.log(`🏃 [${taskName}] Start executing`);
return traceAsync(`✅ [${taskName}] Executed successfully`, fn);
const start = performance.now();
await fn();
const end = performance.now();
console.log(`✅ [${taskName}] Executed successfully: ${(end - start).toFixed(3)}ms`);
return { start, end, taskName };
};
};

View File

@ -59,14 +59,15 @@ const _validateRuleset = async (filePath) => {
};
const validate = task(__filename, async () => {
const [domainsetFiles, _rulesetFiles] = await Promise.all([
listDir(path.resolve(__dirname, '../List/domainset')),
listDir(path.resolve(__dirname, '../List/non_ip'))
]);
await Promise.all(
domainsetFiles.map(file => validateDomainSet(file))
// const [domainsetFiles, _rulesetFiles] = await Promise.all([
// listDir(path.resolve(__dirname, '../List/domainset')),
// listDir(path.resolve(__dirname, '../List/non_ip'))
// ]);
return Promise.all([
listDir(path.resolve(__dirname, '../List/domainset'))
.then(domainsetFiles => Promise.all(domainsetFiles.map(file => validateDomainSet(file))))
// rulesetFiles.map(file => validateRuleset(file))
);
]);
});
module.exports.validate = validate;