Perf: further speed up infra

This commit is contained in:
SukkaW 2023-09-14 20:15:49 +08:00
parent adb8b43357
commit 78afa595a9
25 changed files with 431 additions and 173 deletions

View File

@ -1,11 +1,21 @@
{ {
"root": true, "root": true,
"extends": ["sukka/node"], "extends": ["sukka/node"],
"rules": { "ignorePatterns": [
"no-console": "off" "node_modules/",
}, // disable for now
"parserOptions": { "**/*.d.ts"
"ecmaVersion": "latest", ],
"sourceType": "module" "overrides": [
} {
"files": ["**/*.js"],
"rules": {
"no-console": "off"
},
"parserOptions": {
"ecmaVersion": "latest",
"sourceType": "module"
}
}
]
} }

View File

@ -5,11 +5,22 @@ const { minifyRules } = require('./lib/minify-rules');
const { fetchRemoteTextAndCreateReadlineInterface, readFileByLine } = require('./lib/fetch-remote-text-by-line'); const { fetchRemoteTextAndCreateReadlineInterface, readFileByLine } = require('./lib/fetch-remote-text-by-line');
const Trie = require('./lib/trie'); const Trie = require('./lib/trie');
const { runner } = require('./lib/trace-runner'); const { runner } = require('./lib/trace-runner');
const fs = require('fs');
const publicSuffixPath = path.resolve(__dirname, '../node_modules/.cache/public_suffix-list_dat.txt');
runner(__filename, async () => { runner(__filename, async () => {
const trie = new Trie(); const trie = new Trie();
for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://publicsuffix.org/list/public_suffix_list.dat')) {
trie.add(line); if (fs.existsSync(publicSuffixPath)) {
for await (const line of readFileByLine(publicSuffixPath)) {
trie.add(line);
}
} else {
console.log('public_suffix_list.dat not found, fetch directly from remote.');
for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://publicsuffix.org/list/public_suffix_list.dat')) {
trie.add(line);
}
} }
/** /**
@ -18,13 +29,16 @@ runner(__filename, async () => {
*/ */
const S3OSSDomains = new Set(); const S3OSSDomains = new Set();
trie.find('.amazonaws.com') trie.find('.amazonaws.com').forEach(line => {
.filter(line => (line.startsWith('s3-') || line.startsWith('s3.')) && !line.includes('cn-')) if ((line.startsWith('s3-') || line.startsWith('s3.')) && !line.includes('cn-')) {
.forEach(line => S3OSSDomains.add(line)); S3OSSDomains.add(line);
}
trie.find('.scw.cloud') });
.filter(line => (line.startsWith('s3-') || line.startsWith('s3.')) && !line.includes('cn-')) trie.find('.scw.cloud').forEach(line => {
.forEach(line => S3OSSDomains.add(line)); if ((line.startsWith('s3-') || line.startsWith('s3.')) && !line.includes('cn-')) {
S3OSSDomains.add(line);
}
});
/** @type {string[]} */ /** @type {string[]} */
const cdnDomainsList = []; const cdnDomainsList = [];
@ -45,7 +59,7 @@ runner(__filename, async () => {
]; ];
const ruleset = minifyRules(cdnDomainsList); const ruleset = minifyRules(cdnDomainsList);
await Promise.all(createRuleset( return Promise.all(createRuleset(
'Sukka\'s Ruleset - CDN Domains', 'Sukka\'s Ruleset - CDN Domains',
description, description,
new Date(), new Date(),

View File

@ -19,8 +19,15 @@ runner(__filename, async () => {
results.push( results.push(
...Object.entries(DOMESTICS) ...Object.entries(DOMESTICS)
.filter(([key]) => key !== 'SYSTEM') .reduce(
.flatMap(([, { domains }]) => domains) (acc, [key, { domains }]) => {
if (key === 'SYSTEM') {
return acc;
}
return [...acc, ...domains];
},
/** @type {string[]} */([])
)
.sort(domainSorter) .sort(domainSorter)
.map((domain) => `DOMAIN-SUFFIX,${domain}`) .map((domain) => `DOMAIN-SUFFIX,${domain}`)
); );

View File

@ -1,7 +1,6 @@
// @ts-check // @ts-check
const fse = require('fs-extra'); const fse = require('fs-extra');
const path = require('path'); const path = require('path');
const { isDomainLoose } = require('./lib/is-domain-loose');
const tldts = require('tldts'); const tldts = require('tldts');
const { processLine } = require('./lib/process-line'); const { processLine } = require('./lib/process-line');
const { readFileByLine } = require('./lib/fetch-remote-text-by-line'); const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
@ -35,11 +34,15 @@ runner(__filename, async () => {
*/ */
const processLocalDomainSet = async (domainSetPath) => { const processLocalDomainSet = async (domainSetPath) => {
for await (const line of readFileByLine(domainSetPath)) { for await (const line of readFileByLine(domainSetPath)) {
if (line[0] === '.') { const parsed = tldts.parse(line, { allowPrivateDomains: true });
addApexDomain(line.slice(1)); if (!parsed.isIp && (parsed.isIcann || parsed.isPrivate)) {
} else if (isDomainLoose(line)) { if (parsed.domain) {
addApexDomain(line); set.add(parsed.domain);
} else if (processLine(line)) { }
continue;
}
if (processLine(line)) {
console.warn('[drop line from domainset]', line); console.warn('[drop line from domainset]', line);
} }
} }

View File

@ -1,9 +1,9 @@
// @ts-check // @ts-check
const path = require('path'); const path = require('path');
const fse = require('fs-extra'); const fse = require('fs-extra');
const fs = require('fs');
const { parseFelixDnsmasq } = require('./lib/parse-dnsmasq'); const { parseFelixDnsmasq } = require('./lib/parse-dnsmasq');
const { runner } = require('./lib/trace-runner'); const { runner } = require('./lib/trace-runner');
const { compareAndWriteFile } = require('./lib/create-file');
runner(__filename, async () => { runner(__filename, async () => {
const [result] = await Promise.all([ const [result] = await Promise.all([
@ -11,8 +11,8 @@ runner(__filename, async () => {
fse.ensureDir(path.resolve(__dirname, '../List/internal')) fse.ensureDir(path.resolve(__dirname, '../List/internal'))
]); ]);
await fs.promises.writeFile( await compareAndWriteFile(
path.resolve(__dirname, '../List/internal/accelerated-china-domains.txt'), result.map(line => `SUFFIX,${line}`),
`${result.map(line => `SUFFIX,${line}`).join('\n')}\n` path.resolve(__dirname, '../List/internal/accelerated-china-domains.txt')
); );
}); });

View File

@ -1,10 +1,10 @@
const { parse } = require('tldts'); const tldts = require('tldts');
const { processFilterRules } = require('./lib/parse-filter.js'); const { processFilterRules } = require('./lib/parse-filter.js');
const path = require('path'); const path = require('path');
const { createRuleset } = require('./lib/create-file'); const { createRuleset } = require('./lib/create-file');
const { processLine } = require('./lib/process-line.js'); const { processLine } = require('./lib/process-line.js');
const domainSorter = require('./lib/stable-sort-domain'); const domainSorter = require('./lib/stable-sort-domain');
const { runner } = require('./lib/trace-runner.js'); const { runner, traceSync } = require('./lib/trace-runner.js');
const WHITELIST_DOMAIN = new Set([ const WHITELIST_DOMAIN = new Set([
'w3s.link', 'w3s.link',
@ -61,19 +61,14 @@ const BLACK_TLD = new Set([
]); ]);
runner(__filename, async () => { runner(__filename, async () => {
const domainSet = Array.from( const domainSet = Array.from((await processFilterRules('https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt')).black);
(await processFilterRules('https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt')).black
);
const domainCountMap = {}; const domainCountMap = {};
for (let i = 0, len = domainSet.length; i < len; i++) { for (let i = 0, len = domainSet.length; i < len; i++) {
const line = processLine(domainSet[i]); const line = processLine(domainSet[i]);
if (!line) continue; if (!line) continue;
const domain = line.charCodeAt(0) === 46 ? line.slice(1) : line; const parsed = tldts.parse(line, { allowPrivateDomains: true });
const parsed = parse(domain, { allowPrivateDomains: true });
const apexDomain = parsed.domain; const apexDomain = parsed.domain;
if (apexDomain) { if (apexDomain) {
@ -84,19 +79,18 @@ runner(__filename, async () => {
domainCountMap[apexDomain] ||= 0; domainCountMap[apexDomain] ||= 0;
let isPhishingDomainMockingAmazon = false; let isPhishingDomainMockingAmazon = false;
if (line.startsWith('.amaz')) {
if (domain.startsWith('amaz')) {
domainCountMap[apexDomain] += 0.5; domainCountMap[apexDomain] += 0.5;
isPhishingDomainMockingAmazon = true; isPhishingDomainMockingAmazon = true;
if (domain.startsWith('amazon-')) { if (line.startsWith('.amazon-')) {
domainCountMap[apexDomain] += 4.5; domainCountMap[apexDomain] += 4.5;
} }
} else if (domain.startsWith('customer')) { } else if (line.startsWith('.customer')) {
domainCountMap[apexDomain] += 0.25; domainCountMap[apexDomain] += 0.25;
} }
if (domain.includes('-co-jp')) { if (line.includes('-co-jp')) {
domainCountMap[apexDomain] += (isPhishingDomainMockingAmazon ? 4.5 : 0.5); domainCountMap[apexDomain] += (isPhishingDomainMockingAmazon ? 4.5 : 0.5);
} }
@ -105,17 +99,17 @@ runner(__filename, async () => {
domainCountMap[apexDomain] += 1; domainCountMap[apexDomain] += 1;
if (domain.length > 19) { if (line.length > 19) {
// Add more weight if the domain is long enough // Add more weight if the domain is long enough
if (domain.length > 44) { if (line.length > 44) {
domainCountMap[apexDomain] += 3.5; domainCountMap[apexDomain] += 3.5;
} else if (domain.length > 34) { } else if (line.length > 34) {
domainCountMap[apexDomain] += 2.5; domainCountMap[apexDomain] += 2.5;
} else if (domain.length > 29) { } else if (line.length > 29) {
domainCountMap[apexDomain] += 1.5; domainCountMap[apexDomain] += 1.5;
} else if (domain.length > 24) { } else if (line.length > 24) {
domainCountMap[apexDomain] += 0.75; domainCountMap[apexDomain] += 0.75;
} else if (domain.length > 19) { } else if (line.length > 19) {
domainCountMap[apexDomain] += 0.25; domainCountMap[apexDomain] += 0.25;
} }
@ -129,15 +123,14 @@ runner(__filename, async () => {
} }
} }
const results = []; const results = traceSync('* get final results', () => Object.entries(domainCountMap)
.reduce((acc, [apexDomain, count]) => {
Object.entries(domainCountMap).forEach(([domain, count]) => { if (count >= 5) {
if (count >= 5) { acc.push(`.${apexDomain}`);
results.push(`.${domain}`); }
} return acc;
}); }, /** @type {string[]} */([]))
.sort(domainSorter));
results.sort(domainSorter);
const description = [ const description = [
'License: AGPL 3.0', 'License: AGPL 3.0',

View File

@ -1,20 +1,20 @@
// @ts-check // @ts-check
const fs = require('fs');
const fse = require('fs-extra'); const fse = require('fs-extra');
const { resolve: pathResolve } = require('path'); const { resolve: pathResolve } = require('path');
const tldts = require('tldts');
const { processHosts, processFilterRules } = require('./lib/parse-filter'); const { processHosts, processFilterRules } = require('./lib/parse-filter');
const Trie = require('./lib/trie'); const Trie = require('./lib/trie');
const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source'); const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source');
const { createRuleset } = require('./lib/create-file'); const { createRuleset, compareAndWriteFile } = require('./lib/create-file');
const { processLine } = require('./lib/process-line'); const { processLine } = require('./lib/process-line');
const { domainDeduper } = require('./lib/domain-deduper'); const { domainDeduper } = require('./lib/domain-deduper');
const createKeywordFilter = require('./lib/aho-corasick'); const createKeywordFilter = require('./lib/aho-corasick');
const { readFileByLine } = require('./lib/fetch-remote-text-by-line'); const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
const domainSorter = require('./lib/stable-sort-domain'); const { createDomainSorter } = require('./lib/stable-sort-domain');
const { traceSync, runner } = require('./lib/trace-runner');
const { getGorhillPublicSuffixPromise } = require('./lib/get-gorhill-publicsuffix');
const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse');
/** Whitelists */ /** Whitelists */
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST); const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
@ -22,7 +22,8 @@ const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
const domainKeywordsSet = new Set(); const domainKeywordsSet = new Set();
/** @type {Set<string>} Dedupe domains included by DOMAIN-SUFFIX */ /** @type {Set<string>} Dedupe domains included by DOMAIN-SUFFIX */
const domainSuffixSet = new Set(); const domainSuffixSet = new Set();
(async () => {
runner(__filename, async () => {
/** @type Set<string> */ /** @type Set<string> */
const domainSets = new Set(); const domainSets = new Set();
@ -31,7 +32,8 @@ const domainSuffixSet = new Set();
let shouldStop = false; let shouldStop = false;
await Promise.all([ const [gorhill] = await Promise.all([
getGorhillPublicSuffixPromise,
// Parse from remote hosts & domain lists // Parse from remote hosts & domain lists
...HOSTS.map(entry => processHosts(entry[0], entry[1]).then(hosts => { ...HOSTS.map(entry => processHosts(entry[0], entry[1]).then(hosts => {
hosts.forEach(host => { hosts.forEach(host => {
@ -129,7 +131,7 @@ const domainSuffixSet = new Set();
console.log(`Start deduping from black keywords/suffixes! (${previousSize})`); console.log(`Start deduping from black keywords/suffixes! (${previousSize})`);
console.time('* Dedupe from black keywords/suffixes'); console.time('* Dedupe from black keywords/suffixes');
const kwfilter = createKeywordFilter(Array.from(domainKeywordsSet)); const kwfilter = createKeywordFilter(domainKeywordsSet);
const trie1 = Trie.from(domainSets); const trie1 = Trie.from(domainSets);
domainSuffixSet.forEach(suffix => { domainSuffixSet.forEach(suffix => {
@ -167,19 +169,35 @@ const domainSuffixSet = new Set();
const START_TIME = Date.now(); const START_TIME = Date.now();
const dudupedDominArray = domainDeduper(Array.from(domainSets)); const dudupedDominArray = traceSync('* Dedupe from covered subdomain', () => domainDeduper(Array.from(domainSets)));
console.log(`* Dedupe from covered subdomain - ${(Date.now() - START_TIME) / 1000}s`); console.log(`* Dedupe from covered subdomain - ${(Date.now() - START_TIME) / 1000}s`);
console.log(`Deduped ${previousSize - dudupedDominArray.length} rules!`); console.log(`Deduped ${previousSize - dudupedDominArray.length} rules!`);
/** @type {Record<string, number>} */ // Create reject stats
const rejectDomainsStats = dudupedDominArray.reduce((acc, cur) => { const getDomain = createCachedGorhillGetDomain(gorhill);
const suffix = tldts.getDomain(cur, { allowPrivateDomains: false }); /** @type {[string, number][]} */
if (suffix) { const rejectDomainsStats = traceSync(
acc[suffix] = (acc[suffix] ?? 0) + 1; '* Collect reject domain stats',
} () => Object.entries(
return acc; dudupedDominArray.reduce((acc, cur) => {
}, {}); const suffix = getDomain(cur);
if (suffix) {
acc[suffix] = (acc[suffix] ?? 0) + 1;
}
return acc;
}, {})
).filter(a => a[1] > 2).sort((a, b) => {
const t = b[1] - a[1];
if (t === 0) {
return a[0].localeCompare(b[0]);
}
return t;
})
);
const domainSorter = createDomainSorter(gorhill);
const domainset = traceSync('* Sort reject domainset', () => dudupedDominArray.sort(domainSorter));
const description = [ const description = [
'License: AGPL 3.0', 'License: AGPL 3.0',
@ -192,7 +210,6 @@ const domainSuffixSet = new Set();
...HOSTS.map(host => ` - ${host[0]}`), ...HOSTS.map(host => ` - ${host[0]}`),
...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`) ...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`)
]; ];
const domainset = dudupedDominArray.sort(domainSorter);
await Promise.all([ await Promise.all([
...createRuleset( ...createRuleset(
@ -204,21 +221,11 @@ const domainSuffixSet = new Set();
pathResolve(__dirname, '../List/domainset/reject.conf'), pathResolve(__dirname, '../List/domainset/reject.conf'),
pathResolve(__dirname, '../Clash/domainset/reject.txt') pathResolve(__dirname, '../Clash/domainset/reject.txt')
), ),
fs.promises.writeFile( compareAndWriteFile(
pathResolve(__dirname, '../List/internal/reject-stats.txt'), rejectDomainsStats.map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`),
Object.entries(rejectDomainsStats) pathResolve(__dirname, '../List/internal/reject-stats.txt')
.filter(a => a[1] > 1)
.sort((a, b) => {
const t = b[1] - a[1];
if (t === 0) {
return a[0].localeCompare(b[0]);
}
return t;
})
.map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`)
.join('\n')
), ),
// Copy reject_sukka.conf for backward compatibility // Copy reject_sukka.conf for backward compatibility
fse.copy(pathResolve(__dirname, '../Source/domainset/reject_sukka.conf'), pathResolve(__dirname, '../List/domainset/reject_sukka.conf')) fse.copy(pathResolve(__dirname, '../Source/domainset/reject_sukka.conf'), pathResolve(__dirname, '../List/domainset/reject_sukka.conf'))
]); ]);
})(); });

View File

@ -42,7 +42,7 @@ runner(__filename, async () => {
} }
} }
await Promise.all(promises); return Promise.all(promises);
}); });
/** /**

View File

@ -16,7 +16,7 @@ const fileExists = (path) => {
.catch(() => false); .catch(() => false);
}; };
runner(__filename, async () => { const downloadPreviousBuild = async () => {
const filesList = ['Clash', 'List']; const filesList = ['Clash', 'List'];
let allFileExists = true; let allFileExists = true;
@ -79,4 +79,28 @@ runner(__filename, async () => {
})); }));
await fs.promises.unlink(extractedPath).catch(() => { }); await fs.promises.unlink(extractedPath).catch(() => { });
};
const downloadPublicSuffixList = async () => {
const publicSuffixDir = resolve(__dirname, '../node_modules/.cache');
const publicSuffixPath = join(publicSuffixDir, 'public_suffix-list_dat.txt');
console.log('Download public suffix list.');
const [resp] = await Promise.all([
fetch('https://publicsuffix.org/list/public_suffix_list.dat'),
fse.ensureDir(publicSuffixDir)
]);
await pipeline(
Readable.fromWeb(resp.body),
fs.createWriteStream(publicSuffixPath)
);
};
runner(__filename, () => {
return Promise.all([
downloadPreviousBuild(),
downloadPublicSuffixList()
]);
}); });

View File

@ -23,7 +23,7 @@ const createNode = (key, depth = 0) => ({
}); });
/** /**
* @param {string[]} keys * @param {string[] | Set<string>} keys
*/ */
const createKeywordFilter = (keys) => { const createKeywordFilter = (keys) => {
const root = createNode('root'); const root = createNode('root');
@ -39,16 +39,18 @@ const createKeywordFilter = (keys) => {
const map = beginNode.children; const map = beginNode.children;
// eslint-disable-next-line guard-for-in -- plain object // eslint-disable-next-line guard-for-in -- plain object
for (const key in beginNode.children) { for (const key in beginNode.children) {
const node = map[key]; const node = map?.[key];
let failNode = beginNode.fail; let failNode = beginNode.fail;
while (failNode && !failNode.children[key]) { while (failNode && !failNode.children?.[key]) {
failNode = failNode.fail; failNode = failNode.fail;
} }
node.fail = failNode?.children[key] || root; if (node) {
node.fail = failNode?.children?.[key] || root;
queue.push(node); queue.push(node);
}
} }
idx++; idx++;
@ -83,10 +85,9 @@ const createKeywordFilter = (keys) => {
} }
}; };
for (let idx = 0, len = keys.length; idx < len; idx++) { keys.forEach(k => {
const key = keys[idx]; put(k, k.length);
put(key, key.length); });
}
build(); build();

47
Build/lib/cache-apply.js Normal file
View File

@ -0,0 +1,47 @@
/**
* @param {string} [namespace]
*/
const createCache = (namespace, printStats = false) => {
const cache = new Map();
let hit = 0;
if (namespace && printStats) {
process.on('exit', () => {
console.log(`🔋 [cache] ${namespace} hit: ${hit}, size: ${cache.size}`);
});
}
return {
/**
* @template T
* @param {string} key
* @param {() => T} fn
* @returns {T}
*/
sync(key, fn) {
if (cache.has(key)) {
hit++;
return cache.get(key);
}
const value = fn();
cache.set(key, value);
return value;
},
/**
* @template T
* @param {string} key
* @param {() => Promise<T>} fn
* @returns {Promise<T>}
*/
async async(key, fn) {
if (cache.has(key)) {
hit++;
return cache.get(key);
}
const value = await fn();
cache.set(key, value);
return value;
}
};
};
module.exports.createCache = createCache;

View File

@ -0,0 +1,25 @@
const tldts = require('tldts');
const { createCache } = require('./cache-apply');
const cache = createCache('cached-tld-parse', true);
const sharedConfig = { allowPrivateDomains: true };
/**
* @param {string} domain
* @returns {ReturnType<import('tldts').parse>}
*/
module.exports.parse = (domain) => {
return cache.sync(domain, () => tldts.parse(domain, sharedConfig));
};
const gothillGetDomainCache = createCache('cached-gorhill-get-domain', true);
/**
* @param {import('gorhill-publicsuffixlist').default | null} gorhill
*/
module.exports.createCachedGorhillGetDomain = (gorhill) => {
/**
* @param {string} domain
*/
return (domain) => gothillGetDomainCache.sync(domain, () => gorhill.getDomain(domain[0] === '.' ? domain.slice(1) : domain));
};

View File

@ -32,10 +32,11 @@ async function compareAndWriteFile(linesA, filePath) {
const stream = fs.createWriteStream(filePath, { encoding: 'utf-8' }); const stream = fs.createWriteStream(filePath, { encoding: 'utf-8' });
for (let i = 0, len = linesA.length; i < len; i++) { for (let i = 0, len = linesA.length; i < len; i++) {
// eslint-disable-next-line no-await-in-loop -- backpressure const p = writeToStream(stream, `${linesA[i]}\n`);
await writeToStream(stream, linesA[i]); if (p) {
// eslint-disable-next-line no-await-in-loop -- backpressure // eslint-disable-next-line no-await-in-loop -- backpressure, besides we only wait for drain
await writeToStream(stream, '\n'); await p;
}
} }
stream.end(); stream.end();
} else { } else {
@ -48,13 +49,13 @@ module.exports.compareAndWriteFile = compareAndWriteFile;
* @param {import('fs').WriteStream} stream * @param {import('fs').WriteStream} stream
* @param {string} data * @param {string} data
*/ */
async function writeToStream(stream, data) { function writeToStream(stream, data) {
if (!stream.write(data)) { if (!stream.write(data)) {
return /** @type {Promise<void>} */(new Promise((resolve) => { return /** @type {Promise<void>} */(new Promise((resolve) => {
stream.once('drain', () => { resolve(); }); stream.once('drain', resolve);
})); }));
} }
return Promise.resolve(); return null;
} }
/** /**

View File

@ -1,3 +1,4 @@
// @ts-check
const Trie = require('./trie'); const Trie = require('./trie');
/** /**
@ -13,8 +14,10 @@ const domainDeduper = (inputDomains) => {
continue; continue;
} }
// delete all included subdomains (ends with `.example.com`)
trie.find(d, false).forEach(f => sets.delete(f)); trie.find(d, false).forEach(f => sets.delete(f));
// if `.example.com` exists, then `example.com` should also be removed
const a = d.slice(1); const a = d.slice(1);
if (trie.has(a)) { if (trie.has(a)) {
sets.delete(a); sets.delete(a);

View File

@ -34,7 +34,7 @@ module.exports.createReadlineInterfaceFromResponse = createReadlineInterfaceFrom
/** /**
* @param {import('undici').RequestInfo} url * @param {import('undici').RequestInfo} url
* @param {import('undici').RequestInit | undefined} [opt] * @param {import('undici').RequestInit} [opt]
*/ */
module.exports.fetchRemoteTextAndCreateReadlineInterface = async (url, opt) => { module.exports.fetchRemoteTextAndCreateReadlineInterface = async (url, opt) => {
const resp = await fetchWithRetry(url, opt); const resp = await fetchWithRetry(url, opt);

View File

@ -1,4 +1,11 @@
// @ts-check // @ts-check
const { fetch } = require('undici'); const undici = require('undici');
const fetchWithRetry = /** @type {fetch} */(require('@vercel/fetch-retry')(fetch));
// Enable HTTP/2 supports
undici.setGlobalDispatcher(new undici.Agent({
allowH2: true,
pipelining: 10
}));
const fetchWithRetry = /** @type {import('undici').fetch} */(require('@vercel/fetch-retry')(undici.fetch));
module.exports.fetchWithRetry = fetchWithRetry; module.exports.fetchWithRetry = fetchWithRetry;

View File

@ -0,0 +1,34 @@
const { toASCII } = require('punycode/');
const fs = require('fs');
const path = require('path');
const publicSuffixPath = path.resolve(__dirname, '../../node_modules/.cache/public_suffix-list_dat.txt');
const getPublicSuffixListDat = () => {
if (fs.existsSync(publicSuffixPath)) {
return fs.promises.readFile(publicSuffixPath, 'utf-8');
}
console.log('public_suffix_list.dat not found, fetch directly from remote.');
return fetch('https://publicsuffix.org/list/public_suffix_list.dat').then(r => r.text());
};
const getGorhillPublicSuffix = async () => {
const customFetch = async (url) => {
const buf = await fs.promises.readFile(url);
return {
arrayBuffer() { return Promise.resolve(buf.buffer); }
};
};
const [publicSuffixListDat, { default: gorhill }] = await Promise.all([
getPublicSuffixListDat(),
import('gorhill-publicsuffixlist')
]);
gorhill.parse(publicSuffixListDat, toASCII);
await gorhill.enableWASM({ customFetch });
return gorhill;
};
const getGorhillPublicSuffixPromise = getGorhillPublicSuffix();
module.exports.getGorhillPublicSuffixPromise = getGorhillPublicSuffixPromise;

View File

@ -1,13 +1,14 @@
// @ts-check // @ts-check
const { parse } = require('tldts'); const tldts = require('./cached-tld-parse');
/** /**
* @param {string} domain * @param {string} domain
*/ */
module.exports.isDomainLoose = (domain) => { module.exports.isDomainLoose = (domain) => {
const { isIcann, isPrivate, isIp } = parse(domain, { allowPrivateDomains: true }); const { isIcann, isPrivate, isIp } = tldts.parse(domain);
return !!(!isIp && (isIcann || isPrivate)); return !!(!isIp && (isIcann || isPrivate));
}; };
/** /**
* @param {string} domain * @param {string} domain
*/ */
@ -16,12 +17,15 @@ module.exports.normalizeDomain = (domain) => {
return null; return null;
} }
const { isIcann, isPrivate, hostname, isIp } = parse(domain, { allowPrivateDomains: true }); const { isIcann, isPrivate, hostname, isIp } = tldts.parse(domain);
if (isIp) { if (isIp) {
return null; return null;
} }
if (isIcann || isPrivate) { if (isIcann || isPrivate) {
if (hostname?.[0] === '.') {
return hostname.slice(1);
}
return hostname; return hostname;
} }

View File

@ -179,19 +179,15 @@ async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdPart
const downloadStart = performance.now(); const downloadStart = performance.now();
try { try {
const controller = new AbortController(); const controller = new AbortController();
const signal = controller.signal;
/** @type string[] */ /** @type string[] */
filterRules = ( filterRules = (
await Promise.any( await Promise.any(
[filterRulesUrl, ...(fallbackUrls || [])].map( [filterRulesUrl, ...(fallbackUrls || [])].map(async url => {
url => fetchWithRetry(url, { signal }) const text = await fetchWithRetry(url, { signal: controller.signal }).then(r => r.text());
.then(r => r.text()) controller.abort();
.then(text => { return text;
controller.abort(); })
return text;
})
)
) )
).split('\n').map(line => line.trim()); ).split('\n').map(line => line.trim());
} catch (e) { } catch (e) {
@ -317,10 +313,7 @@ function parse($line, includeThirdParties) {
if ( if (
// (line.startsWith('@@|') || line.startsWith('@@.')) // (line.startsWith('@@|') || line.startsWith('@@.'))
( (line[2] === '|' || line[2] === '.')
line[2] === '|'
|| line[2] === '.'
)
&& ( && (
lineEndsWithCaret lineEndsWithCaret
|| lineEndsWithCaretVerticalBar || lineEndsWithCaretVerticalBar
@ -374,7 +367,7 @@ function parse($line, includeThirdParties) {
return null; return null;
} }
const lineStartsWithSingleDot = line.startsWith('.'); const lineStartsWithSingleDot = line[0] === '.';
if ( if (
lineStartsWithSingleDot lineStartsWithSingleDot
&& ( && (
@ -437,7 +430,7 @@ function parse($line, includeThirdParties) {
return null; return null;
} }
const tryNormalizeDomain = normalizeDomain(lineStartsWithSingleDot ? line.slice(1) : line); const tryNormalizeDomain = normalizeDomain(line);
if ( if (
tryNormalizeDomain tryNormalizeDomain
&& ( && (

View File

@ -1,14 +1,4 @@
// @ts-check // @ts-check
const tldts = require('tldts');
const cache1 = Object.create(null);
/**
* @param {string} url
* @returns {ReturnType<typeof tldts.parse>}
*/
// eslint-disable-next-line no-return-assign -- cache
const parse = (url) => (cache1[url] ||= tldts.parse(url, { allowPrivateDomains: true }));
/** /**
* @param {string | null} a * @param {string | null} a
* @param {string | null} b * @param {string | null} b
@ -49,32 +39,49 @@ const compare = (a, b) => {
}; };
/** /**
* @param {string} a * @param {import('gorhill-publicsuffixlist').default | null} [gorhill]
* @param {string} b
* @returns {0 | 1 | -1}
*/ */
const domainSorter = (a, b) => { const createDomainSorter = (gorhill = null) => {
if (a === b) return 0; const cached = require('./cached-tld-parse');
const aParsed = parse(a[0] === '.' ? a.slice(1) : a); if (gorhill) {
const bParsed = parse(b[0] === '.' ? b.slice(1) : b); /**
* @param {string} input
*/
const getDomain = cached.createCachedGorhillGetDomain(gorhill);
const resultDomainWithoutSuffix = compare(aParsed.domainWithoutSuffix, bParsed.domainWithoutSuffix); /**
if (resultDomainWithoutSuffix !== 0) { * @param {string} a
return resultDomainWithoutSuffix; * @param {string} b
* @returns {0 | 1 | -1}
*/
return (a, b) => {
if (a === b) return 0;
const aDomain = getDomain(a);
const bDomain = getDomain(b);
const resultDomain = compare(aDomain, bDomain);
return resultDomain !== 0 ? resultDomain : compare(a, b);
};
} }
const resultSuffix = compare(aParsed.publicSuffix, bParsed.publicSuffix); const tldts = cached;
if (resultSuffix !== 0) { /**
return resultSuffix; * @param {string} a
} * @param {string} b
* @returns {0 | 1 | -1}
*/
return (a, b) => {
if (a === b) return 0;
const resultSubdomain = compare(aParsed.subdomain, bParsed.subdomain); const aDomain = tldts.parse(a).domain;
if (resultSubdomain !== 0) { const bDomain = tldts.parse(b).domain;
return resultSubdomain;
}
return 0; const resultDomain = compare(aDomain, bDomain);
return resultDomain !== 0 ? resultDomain : compare(a, b);
};
}; };
module.exports = domainSorter; module.exports = createDomainSorter();
module.exports.createDomainSorter = createDomainSorter;

View File

@ -1,15 +1,42 @@
const path = require('path'); const path = require('path');
const { performance } = require('perf_hooks');
/** /**
* @param {Function} fn * @template T
* @param {string} __filename * @param {string} prefix
* @param {() => T} fn
* @returns {T}
*/ */
module.exports.runner = async (__filename, fn) => { const traceSync = (prefix, fn) => {
const runnerName = path.basename(__filename, path.extname(__filename)); const start = performance.now();
const result = fn();
const start = Date.now(); const end = performance.now();
const result = await fn(); console.log(`${prefix}: ${(end - start).toFixed(3)}ms`);
const end = Date.now();
console.log(`⌛ [${runnerName}]: ${end - start}ms`);
return result; return result;
}; };
module.exports.traceSync = traceSync;
/**
* @template T
* @param {string} prefix
* @param {() => Promise<T>} fn
* @returns {Promise<T>}
*/
const traceAsync = async (prefix, fn) => {
const start = performance.now();
const result = await fn();
const end = performance.now();
console.log(`${prefix}: ${(end - start).toFixed(3)}ms`);
return result;
};
module.exports.traceAsync = traceAsync;
/**
* @template T
* @param {string} __filename
* @param {() => Promise<T>} fn
* @returns {T}
*/
module.exports.runner = async (__filename, fn) => {
return traceAsync(`⌛ [${path.basename(__filename, path.extname(__filename))}]`, fn);
};

36
Build/mod.d.ts vendored Normal file
View File

@ -0,0 +1,36 @@
declare module 'gorhill-publicsuffixlist' {
type Selfie =
| string
| {
magic: number;
buf32: number[];
};
interface Decoder {
decode: (bufferStr: string, buffer: ArrayBuffer) => void;
decodeSize: (bufferStr: string) => number;
}
interface Encoder {
encode: (buffer: ArrayBuffer, length: number) => string;
}
export interface PublicSuffixList {
version: string;
parse(text: string, toAscii: (input: string) => string): void;
getPublicSuffix(hostname: string): string;
getDomain(hostname: string): string;
suffixInPSL(hostname: string): boolean;
toSelfie(encoder?: null | Encoder): Selfie;
fromSelfie(selfie: Selfie, decoder?: null | Decoder): boolean;
enableWASM(options?: {
customFetch?: null | ((url: URL) => Promise<Blob>);
}): Promise<boolean>;
disableWASM(): Promise<boolean>;
}
const psl: PublicSuffixList;
export default psl;
}

View File

@ -1,6 +1,6 @@
// Surge Domain Set can not include root domain from public suffix list. // Surge Domain Set can not include root domain from public suffix list.
const tldts = require('tldts'); const tldts = require('tldts'); // hit ratio way too low, dont cache
const picocolors = require('picocolors'); const picocolors = require('picocolors');
const path = require('path'); const path = require('path');
const listDir = require('@sukka/listdir'); const listDir = require('@sukka/listdir');
@ -21,7 +21,7 @@ const validateDomainSet = async (filePath) => {
if (!line) { if (!line) {
continue; continue;
} }
const domain = line.charCodeAt(0) === 46 ? line.slice(1) : line; const domain = line[0] === '.' ? line.slice(1) : line;
const parsed = tldts.parse(domain, { allowPrivateDomains: true, detectIp: false }); const parsed = tldts.parse(domain, { allowPrivateDomains: true, detectIp: false });
if ( if (

View File

@ -158,9 +158,11 @@
"ci-info": "^3.8.0", "ci-info": "^3.8.0",
"cidr-tools-wasm": "^0.0.11", "cidr-tools-wasm": "^0.0.11",
"fs-extra": "^11.1.1", "fs-extra": "^11.1.1",
"gorhill-publicsuffixlist": "github:gorhill/publicsuffixlist.js",
"mnemonist": "^0.39.5", "mnemonist": "^0.39.5",
"path-scurry": "^1.10.1", "path-scurry": "^1.10.1",
"picocolors": "^1.0.0", "picocolors": "^1.0.0",
"punycode": "^2.3.0",
"table": "^6.8.1", "table": "^6.8.1",
"tar": "^6.2.0", "tar": "^6.2.0",
"tldts": "^6.0.14", "tldts": "^6.0.14",

19
pnpm-lock.yaml generated
View File

@ -34,6 +34,9 @@ dependencies:
fs-extra: fs-extra:
specifier: ^11.1.1 specifier: ^11.1.1
version: 11.1.1 version: 11.1.1
gorhill-publicsuffixlist:
specifier: github:gorhill/publicsuffixlist.js
version: github.com/gorhill/publicsuffixlist.js/3a1bc623073079184ff76933b88b7bf4f5d48978
mnemonist: mnemonist:
specifier: ^0.39.5 specifier: ^0.39.5
version: 0.39.5 version: 0.39.5
@ -43,6 +46,9 @@ dependencies:
picocolors: picocolors:
specifier: ^1.0.0 specifier: ^1.0.0
version: 1.0.0 version: 1.0.0
punycode:
specifier: ^2.3.0
version: 2.3.0
table: table:
specifier: ^6.8.1 specifier: ^6.8.1
version: 6.8.1 version: 6.8.1
@ -1318,8 +1324,8 @@ packages:
signal-exit: 3.0.7 signal-exit: 3.0.7
dev: true dev: true
/punycode@2.1.1: /punycode@2.3.0:
resolution: {integrity: sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==} resolution: {integrity: sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA==}
engines: {node: '>=6'} engines: {node: '>=6'}
/queue-microtask@1.2.3: /queue-microtask@1.2.3:
@ -1571,7 +1577,7 @@ packages:
/uri-js@4.4.1: /uri-js@4.4.1:
resolution: {integrity: sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==} resolution: {integrity: sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==}
dependencies: dependencies:
punycode: 2.1.1 punycode: 2.3.0
/webidl-conversions@3.0.1: /webidl-conversions@3.0.1:
resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==} resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==}
@ -1661,3 +1667,10 @@ packages:
resolution: {integrity: sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==} resolution: {integrity: sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==}
engines: {node: '>=10'} engines: {node: '>=10'}
dev: true dev: true
github.com/gorhill/publicsuffixlist.js/3a1bc623073079184ff76933b88b7bf4f5d48978:
resolution: {tarball: https://codeload.github.com/gorhill/publicsuffixlist.js/tar.gz/3a1bc623073079184ff76933b88b7bf4f5d48978}
name: '@gorhill/publicsuffixlist'
version: 3.0.1
engines: {node: '>=14.0.0', npm: '>=6.14.4'}
dev: false