Perf: further speed up infra

This commit is contained in:
SukkaW 2023-09-14 20:15:49 +08:00
parent adb8b43357
commit 78afa595a9
25 changed files with 431 additions and 173 deletions

View File

@ -1,11 +1,21 @@
{
"root": true,
"extends": ["sukka/node"],
"rules": {
"no-console": "off"
},
"parserOptions": {
"ecmaVersion": "latest",
"sourceType": "module"
}
"ignorePatterns": [
"node_modules/",
// disable for now
"**/*.d.ts"
],
"overrides": [
{
"files": ["**/*.js"],
"rules": {
"no-console": "off"
},
"parserOptions": {
"ecmaVersion": "latest",
"sourceType": "module"
}
}
]
}

View File

@ -5,11 +5,22 @@ const { minifyRules } = require('./lib/minify-rules');
const { fetchRemoteTextAndCreateReadlineInterface, readFileByLine } = require('./lib/fetch-remote-text-by-line');
const Trie = require('./lib/trie');
const { runner } = require('./lib/trace-runner');
const fs = require('fs');
const publicSuffixPath = path.resolve(__dirname, '../node_modules/.cache/public_suffix-list_dat.txt');
runner(__filename, async () => {
const trie = new Trie();
for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://publicsuffix.org/list/public_suffix_list.dat')) {
trie.add(line);
if (fs.existsSync(publicSuffixPath)) {
for await (const line of readFileByLine(publicSuffixPath)) {
trie.add(line);
}
} else {
console.log('public_suffix_list.dat not found, fetch directly from remote.');
for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://publicsuffix.org/list/public_suffix_list.dat')) {
trie.add(line);
}
}
/**
@ -18,13 +29,16 @@ runner(__filename, async () => {
*/
const S3OSSDomains = new Set();
trie.find('.amazonaws.com')
.filter(line => (line.startsWith('s3-') || line.startsWith('s3.')) && !line.includes('cn-'))
.forEach(line => S3OSSDomains.add(line));
trie.find('.scw.cloud')
.filter(line => (line.startsWith('s3-') || line.startsWith('s3.')) && !line.includes('cn-'))
.forEach(line => S3OSSDomains.add(line));
trie.find('.amazonaws.com').forEach(line => {
if ((line.startsWith('s3-') || line.startsWith('s3.')) && !line.includes('cn-')) {
S3OSSDomains.add(line);
}
});
trie.find('.scw.cloud').forEach(line => {
if ((line.startsWith('s3-') || line.startsWith('s3.')) && !line.includes('cn-')) {
S3OSSDomains.add(line);
}
});
/** @type {string[]} */
const cdnDomainsList = [];
@ -45,7 +59,7 @@ runner(__filename, async () => {
];
const ruleset = minifyRules(cdnDomainsList);
await Promise.all(createRuleset(
return Promise.all(createRuleset(
'Sukka\'s Ruleset - CDN Domains',
description,
new Date(),

View File

@ -19,8 +19,15 @@ runner(__filename, async () => {
results.push(
...Object.entries(DOMESTICS)
.filter(([key]) => key !== 'SYSTEM')
.flatMap(([, { domains }]) => domains)
.reduce(
(acc, [key, { domains }]) => {
if (key === 'SYSTEM') {
return acc;
}
return [...acc, ...domains];
},
/** @type {string[]} */([])
)
.sort(domainSorter)
.map((domain) => `DOMAIN-SUFFIX,${domain}`)
);

View File

@ -1,7 +1,6 @@
// @ts-check
const fse = require('fs-extra');
const path = require('path');
const { isDomainLoose } = require('./lib/is-domain-loose');
const tldts = require('tldts');
const { processLine } = require('./lib/process-line');
const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
@ -35,11 +34,15 @@ runner(__filename, async () => {
*/
const processLocalDomainSet = async (domainSetPath) => {
for await (const line of readFileByLine(domainSetPath)) {
if (line[0] === '.') {
addApexDomain(line.slice(1));
} else if (isDomainLoose(line)) {
addApexDomain(line);
} else if (processLine(line)) {
const parsed = tldts.parse(line, { allowPrivateDomains: true });
if (!parsed.isIp && (parsed.isIcann || parsed.isPrivate)) {
if (parsed.domain) {
set.add(parsed.domain);
}
continue;
}
if (processLine(line)) {
console.warn('[drop line from domainset]', line);
}
}

View File

@ -1,9 +1,9 @@
// @ts-check
const path = require('path');
const fse = require('fs-extra');
const fs = require('fs');
const { parseFelixDnsmasq } = require('./lib/parse-dnsmasq');
const { runner } = require('./lib/trace-runner');
const { compareAndWriteFile } = require('./lib/create-file');
runner(__filename, async () => {
const [result] = await Promise.all([
@ -11,8 +11,8 @@ runner(__filename, async () => {
fse.ensureDir(path.resolve(__dirname, '../List/internal'))
]);
await fs.promises.writeFile(
path.resolve(__dirname, '../List/internal/accelerated-china-domains.txt'),
`${result.map(line => `SUFFIX,${line}`).join('\n')}\n`
await compareAndWriteFile(
result.map(line => `SUFFIX,${line}`),
path.resolve(__dirname, '../List/internal/accelerated-china-domains.txt')
);
});

View File

@ -1,10 +1,10 @@
const { parse } = require('tldts');
const tldts = require('tldts');
const { processFilterRules } = require('./lib/parse-filter.js');
const path = require('path');
const { createRuleset } = require('./lib/create-file');
const { processLine } = require('./lib/process-line.js');
const domainSorter = require('./lib/stable-sort-domain');
const { runner } = require('./lib/trace-runner.js');
const { runner, traceSync } = require('./lib/trace-runner.js');
const WHITELIST_DOMAIN = new Set([
'w3s.link',
@ -61,19 +61,14 @@ const BLACK_TLD = new Set([
]);
runner(__filename, async () => {
const domainSet = Array.from(
(await processFilterRules('https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt')).black
);
const domainSet = Array.from((await processFilterRules('https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt')).black);
const domainCountMap = {};
for (let i = 0, len = domainSet.length; i < len; i++) {
const line = processLine(domainSet[i]);
if (!line) continue;
const domain = line.charCodeAt(0) === 46 ? line.slice(1) : line;
const parsed = parse(domain, { allowPrivateDomains: true });
const parsed = tldts.parse(line, { allowPrivateDomains: true });
const apexDomain = parsed.domain;
if (apexDomain) {
@ -84,19 +79,18 @@ runner(__filename, async () => {
domainCountMap[apexDomain] ||= 0;
let isPhishingDomainMockingAmazon = false;
if (domain.startsWith('amaz')) {
if (line.startsWith('.amaz')) {
domainCountMap[apexDomain] += 0.5;
isPhishingDomainMockingAmazon = true;
if (domain.startsWith('amazon-')) {
if (line.startsWith('.amazon-')) {
domainCountMap[apexDomain] += 4.5;
}
} else if (domain.startsWith('customer')) {
} else if (line.startsWith('.customer')) {
domainCountMap[apexDomain] += 0.25;
}
if (domain.includes('-co-jp')) {
if (line.includes('-co-jp')) {
domainCountMap[apexDomain] += (isPhishingDomainMockingAmazon ? 4.5 : 0.5);
}
@ -105,17 +99,17 @@ runner(__filename, async () => {
domainCountMap[apexDomain] += 1;
if (domain.length > 19) {
if (line.length > 19) {
// Add more weight if the domain is long enough
if (domain.length > 44) {
if (line.length > 44) {
domainCountMap[apexDomain] += 3.5;
} else if (domain.length > 34) {
} else if (line.length > 34) {
domainCountMap[apexDomain] += 2.5;
} else if (domain.length > 29) {
} else if (line.length > 29) {
domainCountMap[apexDomain] += 1.5;
} else if (domain.length > 24) {
} else if (line.length > 24) {
domainCountMap[apexDomain] += 0.75;
} else if (domain.length > 19) {
} else if (line.length > 19) {
domainCountMap[apexDomain] += 0.25;
}
@ -129,15 +123,14 @@ runner(__filename, async () => {
}
}
const results = [];
Object.entries(domainCountMap).forEach(([domain, count]) => {
if (count >= 5) {
results.push(`.${domain}`);
}
});
results.sort(domainSorter);
const results = traceSync('* get final results', () => Object.entries(domainCountMap)
.reduce((acc, [apexDomain, count]) => {
if (count >= 5) {
acc.push(`.${apexDomain}`);
}
return acc;
}, /** @type {string[]} */([]))
.sort(domainSorter));
const description = [
'License: AGPL 3.0',

View File

@ -1,20 +1,20 @@
// @ts-check
const fs = require('fs');
const fse = require('fs-extra');
const { resolve: pathResolve } = require('path');
const tldts = require('tldts');
const { processHosts, processFilterRules } = require('./lib/parse-filter');
const Trie = require('./lib/trie');
const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source');
const { createRuleset } = require('./lib/create-file');
const { createRuleset, compareAndWriteFile } = require('./lib/create-file');
const { processLine } = require('./lib/process-line');
const { domainDeduper } = require('./lib/domain-deduper');
const createKeywordFilter = require('./lib/aho-corasick');
const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
const domainSorter = require('./lib/stable-sort-domain');
const { createDomainSorter } = require('./lib/stable-sort-domain');
const { traceSync, runner } = require('./lib/trace-runner');
const { getGorhillPublicSuffixPromise } = require('./lib/get-gorhill-publicsuffix');
const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse');
/** Whitelists */
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
@ -22,7 +22,8 @@ const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
const domainKeywordsSet = new Set();
/** @type {Set<string>} Dedupe domains included by DOMAIN-SUFFIX */
const domainSuffixSet = new Set();
(async () => {
runner(__filename, async () => {
/** @type Set<string> */
const domainSets = new Set();
@ -31,7 +32,8 @@ const domainSuffixSet = new Set();
let shouldStop = false;
await Promise.all([
const [gorhill] = await Promise.all([
getGorhillPublicSuffixPromise,
// Parse from remote hosts & domain lists
...HOSTS.map(entry => processHosts(entry[0], entry[1]).then(hosts => {
hosts.forEach(host => {
@ -129,7 +131,7 @@ const domainSuffixSet = new Set();
console.log(`Start deduping from black keywords/suffixes! (${previousSize})`);
console.time('* Dedupe from black keywords/suffixes');
const kwfilter = createKeywordFilter(Array.from(domainKeywordsSet));
const kwfilter = createKeywordFilter(domainKeywordsSet);
const trie1 = Trie.from(domainSets);
domainSuffixSet.forEach(suffix => {
@ -167,19 +169,35 @@ const domainSuffixSet = new Set();
const START_TIME = Date.now();
const dudupedDominArray = domainDeduper(Array.from(domainSets));
const dudupedDominArray = traceSync('* Dedupe from covered subdomain', () => domainDeduper(Array.from(domainSets)));
console.log(`* Dedupe from covered subdomain - ${(Date.now() - START_TIME) / 1000}s`);
console.log(`Deduped ${previousSize - dudupedDominArray.length} rules!`);
/** @type {Record<string, number>} */
const rejectDomainsStats = dudupedDominArray.reduce((acc, cur) => {
const suffix = tldts.getDomain(cur, { allowPrivateDomains: false });
if (suffix) {
acc[suffix] = (acc[suffix] ?? 0) + 1;
}
return acc;
}, {});
// Create reject stats
const getDomain = createCachedGorhillGetDomain(gorhill);
/** @type {[string, number][]} */
const rejectDomainsStats = traceSync(
'* Collect reject domain stats',
() => Object.entries(
dudupedDominArray.reduce((acc, cur) => {
const suffix = getDomain(cur);
if (suffix) {
acc[suffix] = (acc[suffix] ?? 0) + 1;
}
return acc;
}, {})
).filter(a => a[1] > 2).sort((a, b) => {
const t = b[1] - a[1];
if (t === 0) {
return a[0].localeCompare(b[0]);
}
return t;
})
);
const domainSorter = createDomainSorter(gorhill);
const domainset = traceSync('* Sort reject domainset', () => dudupedDominArray.sort(domainSorter));
const description = [
'License: AGPL 3.0',
@ -192,7 +210,6 @@ const domainSuffixSet = new Set();
...HOSTS.map(host => ` - ${host[0]}`),
...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`)
];
const domainset = dudupedDominArray.sort(domainSorter);
await Promise.all([
...createRuleset(
@ -204,21 +221,11 @@ const domainSuffixSet = new Set();
pathResolve(__dirname, '../List/domainset/reject.conf'),
pathResolve(__dirname, '../Clash/domainset/reject.txt')
),
fs.promises.writeFile(
pathResolve(__dirname, '../List/internal/reject-stats.txt'),
Object.entries(rejectDomainsStats)
.filter(a => a[1] > 1)
.sort((a, b) => {
const t = b[1] - a[1];
if (t === 0) {
return a[0].localeCompare(b[0]);
}
return t;
})
.map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`)
.join('\n')
compareAndWriteFile(
rejectDomainsStats.map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`),
pathResolve(__dirname, '../List/internal/reject-stats.txt')
),
// Copy reject_sukka.conf for backward compatibility
fse.copy(pathResolve(__dirname, '../Source/domainset/reject_sukka.conf'), pathResolve(__dirname, '../List/domainset/reject_sukka.conf'))
]);
})();
});

View File

@ -42,7 +42,7 @@ runner(__filename, async () => {
}
}
await Promise.all(promises);
return Promise.all(promises);
});
/**

View File

@ -16,7 +16,7 @@ const fileExists = (path) => {
.catch(() => false);
};
runner(__filename, async () => {
const downloadPreviousBuild = async () => {
const filesList = ['Clash', 'List'];
let allFileExists = true;
@ -79,4 +79,28 @@ runner(__filename, async () => {
}));
await fs.promises.unlink(extractedPath).catch(() => { });
};
const downloadPublicSuffixList = async () => {
const publicSuffixDir = resolve(__dirname, '../node_modules/.cache');
const publicSuffixPath = join(publicSuffixDir, 'public_suffix-list_dat.txt');
console.log('Download public suffix list.');
const [resp] = await Promise.all([
fetch('https://publicsuffix.org/list/public_suffix_list.dat'),
fse.ensureDir(publicSuffixDir)
]);
await pipeline(
Readable.fromWeb(resp.body),
fs.createWriteStream(publicSuffixPath)
);
};
runner(__filename, () => {
return Promise.all([
downloadPreviousBuild(),
downloadPublicSuffixList()
]);
});

View File

@ -23,7 +23,7 @@ const createNode = (key, depth = 0) => ({
});
/**
* @param {string[]} keys
* @param {string[] | Set<string>} keys
*/
const createKeywordFilter = (keys) => {
const root = createNode('root');
@ -39,16 +39,18 @@ const createKeywordFilter = (keys) => {
const map = beginNode.children;
// eslint-disable-next-line guard-for-in -- plain object
for (const key in beginNode.children) {
const node = map[key];
const node = map?.[key];
let failNode = beginNode.fail;
while (failNode && !failNode.children[key]) {
while (failNode && !failNode.children?.[key]) {
failNode = failNode.fail;
}
node.fail = failNode?.children[key] || root;
if (node) {
node.fail = failNode?.children?.[key] || root;
queue.push(node);
queue.push(node);
}
}
idx++;
@ -83,10 +85,9 @@ const createKeywordFilter = (keys) => {
}
};
for (let idx = 0, len = keys.length; idx < len; idx++) {
const key = keys[idx];
put(key, key.length);
}
keys.forEach(k => {
put(k, k.length);
});
build();

47
Build/lib/cache-apply.js Normal file
View File

@ -0,0 +1,47 @@
/**
* @param {string} [namespace]
*/
const createCache = (namespace, printStats = false) => {
const cache = new Map();
let hit = 0;
if (namespace && printStats) {
process.on('exit', () => {
console.log(`🔋 [cache] ${namespace} hit: ${hit}, size: ${cache.size}`);
});
}
return {
/**
* @template T
* @param {string} key
* @param {() => T} fn
* @returns {T}
*/
sync(key, fn) {
if (cache.has(key)) {
hit++;
return cache.get(key);
}
const value = fn();
cache.set(key, value);
return value;
},
/**
* @template T
* @param {string} key
* @param {() => Promise<T>} fn
* @returns {Promise<T>}
*/
async async(key, fn) {
if (cache.has(key)) {
hit++;
return cache.get(key);
}
const value = await fn();
cache.set(key, value);
return value;
}
};
};
module.exports.createCache = createCache;

View File

@ -0,0 +1,25 @@
const tldts = require('tldts');
const { createCache } = require('./cache-apply');
const cache = createCache('cached-tld-parse', true);
const sharedConfig = { allowPrivateDomains: true };
/**
* @param {string} domain
* @returns {ReturnType<import('tldts').parse>}
*/
module.exports.parse = (domain) => {
return cache.sync(domain, () => tldts.parse(domain, sharedConfig));
};
const gothillGetDomainCache = createCache('cached-gorhill-get-domain', true);
/**
* @param {import('gorhill-publicsuffixlist').default | null} gorhill
*/
module.exports.createCachedGorhillGetDomain = (gorhill) => {
/**
* @param {string} domain
*/
return (domain) => gothillGetDomainCache.sync(domain, () => gorhill.getDomain(domain[0] === '.' ? domain.slice(1) : domain));
};

View File

@ -32,10 +32,11 @@ async function compareAndWriteFile(linesA, filePath) {
const stream = fs.createWriteStream(filePath, { encoding: 'utf-8' });
for (let i = 0, len = linesA.length; i < len; i++) {
// eslint-disable-next-line no-await-in-loop -- backpressure
await writeToStream(stream, linesA[i]);
// eslint-disable-next-line no-await-in-loop -- backpressure
await writeToStream(stream, '\n');
const p = writeToStream(stream, `${linesA[i]}\n`);
if (p) {
// eslint-disable-next-line no-await-in-loop -- backpressure, besides we only wait for drain
await p;
}
}
stream.end();
} else {
@ -48,13 +49,13 @@ module.exports.compareAndWriteFile = compareAndWriteFile;
* @param {import('fs').WriteStream} stream
* @param {string} data
*/
async function writeToStream(stream, data) {
function writeToStream(stream, data) {
if (!stream.write(data)) {
return /** @type {Promise<void>} */(new Promise((resolve) => {
stream.once('drain', () => { resolve(); });
stream.once('drain', resolve);
}));
}
return Promise.resolve();
return null;
}
/**

View File

@ -1,3 +1,4 @@
// @ts-check
const Trie = require('./trie');
/**
@ -13,8 +14,10 @@ const domainDeduper = (inputDomains) => {
continue;
}
// delete all included subdomains (ends with `.example.com`)
trie.find(d, false).forEach(f => sets.delete(f));
// if `.example.com` exists, then `example.com` should also be removed
const a = d.slice(1);
if (trie.has(a)) {
sets.delete(a);

View File

@ -34,7 +34,7 @@ module.exports.createReadlineInterfaceFromResponse = createReadlineInterfaceFrom
/**
* @param {import('undici').RequestInfo} url
* @param {import('undici').RequestInit | undefined} [opt]
* @param {import('undici').RequestInit} [opt]
*/
module.exports.fetchRemoteTextAndCreateReadlineInterface = async (url, opt) => {
const resp = await fetchWithRetry(url, opt);

View File

@ -1,4 +1,11 @@
// @ts-check
const { fetch } = require('undici');
const fetchWithRetry = /** @type {fetch} */(require('@vercel/fetch-retry')(fetch));
const undici = require('undici');
// Enable HTTP/2 supports
undici.setGlobalDispatcher(new undici.Agent({
allowH2: true,
pipelining: 10
}));
const fetchWithRetry = /** @type {import('undici').fetch} */(require('@vercel/fetch-retry')(undici.fetch));
module.exports.fetchWithRetry = fetchWithRetry;

View File

@ -0,0 +1,34 @@
const { toASCII } = require('punycode/');
const fs = require('fs');
const path = require('path');
const publicSuffixPath = path.resolve(__dirname, '../../node_modules/.cache/public_suffix-list_dat.txt');
const getPublicSuffixListDat = () => {
if (fs.existsSync(publicSuffixPath)) {
return fs.promises.readFile(publicSuffixPath, 'utf-8');
}
console.log('public_suffix_list.dat not found, fetch directly from remote.');
return fetch('https://publicsuffix.org/list/public_suffix_list.dat').then(r => r.text());
};
const getGorhillPublicSuffix = async () => {
const customFetch = async (url) => {
const buf = await fs.promises.readFile(url);
return {
arrayBuffer() { return Promise.resolve(buf.buffer); }
};
};
const [publicSuffixListDat, { default: gorhill }] = await Promise.all([
getPublicSuffixListDat(),
import('gorhill-publicsuffixlist')
]);
gorhill.parse(publicSuffixListDat, toASCII);
await gorhill.enableWASM({ customFetch });
return gorhill;
};
const getGorhillPublicSuffixPromise = getGorhillPublicSuffix();
module.exports.getGorhillPublicSuffixPromise = getGorhillPublicSuffixPromise;

View File

@ -1,13 +1,14 @@
// @ts-check
const { parse } = require('tldts');
const tldts = require('./cached-tld-parse');
/**
* @param {string} domain
*/
module.exports.isDomainLoose = (domain) => {
const { isIcann, isPrivate, isIp } = parse(domain, { allowPrivateDomains: true });
const { isIcann, isPrivate, isIp } = tldts.parse(domain);
return !!(!isIp && (isIcann || isPrivate));
};
/**
* @param {string} domain
*/
@ -16,12 +17,15 @@ module.exports.normalizeDomain = (domain) => {
return null;
}
const { isIcann, isPrivate, hostname, isIp } = parse(domain, { allowPrivateDomains: true });
const { isIcann, isPrivate, hostname, isIp } = tldts.parse(domain);
if (isIp) {
return null;
}
if (isIcann || isPrivate) {
if (hostname?.[0] === '.') {
return hostname.slice(1);
}
return hostname;
}

View File

@ -179,19 +179,15 @@ async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdPart
const downloadStart = performance.now();
try {
const controller = new AbortController();
const signal = controller.signal;
/** @type string[] */
filterRules = (
await Promise.any(
[filterRulesUrl, ...(fallbackUrls || [])].map(
url => fetchWithRetry(url, { signal })
.then(r => r.text())
.then(text => {
controller.abort();
return text;
})
)
[filterRulesUrl, ...(fallbackUrls || [])].map(async url => {
const text = await fetchWithRetry(url, { signal: controller.signal }).then(r => r.text());
controller.abort();
return text;
})
)
).split('\n').map(line => line.trim());
} catch (e) {
@ -317,10 +313,7 @@ function parse($line, includeThirdParties) {
if (
// (line.startsWith('@@|') || line.startsWith('@@.'))
(
line[2] === '|'
|| line[2] === '.'
)
(line[2] === '|' || line[2] === '.')
&& (
lineEndsWithCaret
|| lineEndsWithCaretVerticalBar
@ -374,7 +367,7 @@ function parse($line, includeThirdParties) {
return null;
}
const lineStartsWithSingleDot = line.startsWith('.');
const lineStartsWithSingleDot = line[0] === '.';
if (
lineStartsWithSingleDot
&& (
@ -437,7 +430,7 @@ function parse($line, includeThirdParties) {
return null;
}
const tryNormalizeDomain = normalizeDomain(lineStartsWithSingleDot ? line.slice(1) : line);
const tryNormalizeDomain = normalizeDomain(line);
if (
tryNormalizeDomain
&& (

View File

@ -1,14 +1,4 @@
// @ts-check
const tldts = require('tldts');
const cache1 = Object.create(null);
/**
* @param {string} url
* @returns {ReturnType<typeof tldts.parse>}
*/
// eslint-disable-next-line no-return-assign -- cache
const parse = (url) => (cache1[url] ||= tldts.parse(url, { allowPrivateDomains: true }));
/**
* @param {string | null} a
* @param {string | null} b
@ -49,32 +39,49 @@ const compare = (a, b) => {
};
/**
* @param {string} a
* @param {string} b
* @returns {0 | 1 | -1}
* @param {import('gorhill-publicsuffixlist').default | null} [gorhill]
*/
const domainSorter = (a, b) => {
if (a === b) return 0;
const createDomainSorter = (gorhill = null) => {
const cached = require('./cached-tld-parse');
const aParsed = parse(a[0] === '.' ? a.slice(1) : a);
const bParsed = parse(b[0] === '.' ? b.slice(1) : b);
if (gorhill) {
/**
* @param {string} input
*/
const getDomain = cached.createCachedGorhillGetDomain(gorhill);
const resultDomainWithoutSuffix = compare(aParsed.domainWithoutSuffix, bParsed.domainWithoutSuffix);
if (resultDomainWithoutSuffix !== 0) {
return resultDomainWithoutSuffix;
/**
* @param {string} a
* @param {string} b
* @returns {0 | 1 | -1}
*/
return (a, b) => {
if (a === b) return 0;
const aDomain = getDomain(a);
const bDomain = getDomain(b);
const resultDomain = compare(aDomain, bDomain);
return resultDomain !== 0 ? resultDomain : compare(a, b);
};
}
const resultSuffix = compare(aParsed.publicSuffix, bParsed.publicSuffix);
if (resultSuffix !== 0) {
return resultSuffix;
}
const tldts = cached;
/**
* @param {string} a
* @param {string} b
* @returns {0 | 1 | -1}
*/
return (a, b) => {
if (a === b) return 0;
const resultSubdomain = compare(aParsed.subdomain, bParsed.subdomain);
if (resultSubdomain !== 0) {
return resultSubdomain;
}
const aDomain = tldts.parse(a).domain;
const bDomain = tldts.parse(b).domain;
return 0;
const resultDomain = compare(aDomain, bDomain);
return resultDomain !== 0 ? resultDomain : compare(a, b);
};
};
module.exports = domainSorter;
module.exports = createDomainSorter();
module.exports.createDomainSorter = createDomainSorter;

View File

@ -1,15 +1,42 @@
const path = require('path');
const { performance } = require('perf_hooks');
/**
* @param {Function} fn
* @param {string} __filename
* @template T
* @param {string} prefix
* @param {() => T} fn
* @returns {T}
*/
module.exports.runner = async (__filename, fn) => {
const runnerName = path.basename(__filename, path.extname(__filename));
const start = Date.now();
const result = await fn();
const end = Date.now();
console.log(`⌛ [${runnerName}]: ${end - start}ms`);
const traceSync = (prefix, fn) => {
const start = performance.now();
const result = fn();
const end = performance.now();
console.log(`${prefix}: ${(end - start).toFixed(3)}ms`);
return result;
};
module.exports.traceSync = traceSync;
/**
* @template T
* @param {string} prefix
* @param {() => Promise<T>} fn
* @returns {Promise<T>}
*/
const traceAsync = async (prefix, fn) => {
const start = performance.now();
const result = await fn();
const end = performance.now();
console.log(`${prefix}: ${(end - start).toFixed(3)}ms`);
return result;
};
module.exports.traceAsync = traceAsync;
/**
* @template T
* @param {string} __filename
* @param {() => Promise<T>} fn
* @returns {T}
*/
module.exports.runner = async (__filename, fn) => {
return traceAsync(`⌛ [${path.basename(__filename, path.extname(__filename))}]`, fn);
};

36
Build/mod.d.ts vendored Normal file
View File

@ -0,0 +1,36 @@
declare module 'gorhill-publicsuffixlist' {
type Selfie =
| string
| {
magic: number;
buf32: number[];
};
interface Decoder {
decode: (bufferStr: string, buffer: ArrayBuffer) => void;
decodeSize: (bufferStr: string) => number;
}
interface Encoder {
encode: (buffer: ArrayBuffer, length: number) => string;
}
export interface PublicSuffixList {
version: string;
parse(text: string, toAscii: (input: string) => string): void;
getPublicSuffix(hostname: string): string;
getDomain(hostname: string): string;
suffixInPSL(hostname: string): boolean;
toSelfie(encoder?: null | Encoder): Selfie;
fromSelfie(selfie: Selfie, decoder?: null | Decoder): boolean;
enableWASM(options?: {
customFetch?: null | ((url: URL) => Promise<Blob>);
}): Promise<boolean>;
disableWASM(): Promise<boolean>;
}
const psl: PublicSuffixList;
export default psl;
}

View File

@ -1,6 +1,6 @@
// Surge Domain Set can not include root domain from public suffix list.
const tldts = require('tldts');
const tldts = require('tldts'); // hit ratio way too low, dont cache
const picocolors = require('picocolors');
const path = require('path');
const listDir = require('@sukka/listdir');
@ -21,7 +21,7 @@ const validateDomainSet = async (filePath) => {
if (!line) {
continue;
}
const domain = line.charCodeAt(0) === 46 ? line.slice(1) : line;
const domain = line[0] === '.' ? line.slice(1) : line;
const parsed = tldts.parse(domain, { allowPrivateDomains: true, detectIp: false });
if (

View File

@ -158,9 +158,11 @@
"ci-info": "^3.8.0",
"cidr-tools-wasm": "^0.0.11",
"fs-extra": "^11.1.1",
"gorhill-publicsuffixlist": "github:gorhill/publicsuffixlist.js",
"mnemonist": "^0.39.5",
"path-scurry": "^1.10.1",
"picocolors": "^1.0.0",
"punycode": "^2.3.0",
"table": "^6.8.1",
"tar": "^6.2.0",
"tldts": "^6.0.14",

19
pnpm-lock.yaml generated
View File

@ -34,6 +34,9 @@ dependencies:
fs-extra:
specifier: ^11.1.1
version: 11.1.1
gorhill-publicsuffixlist:
specifier: github:gorhill/publicsuffixlist.js
version: github.com/gorhill/publicsuffixlist.js/3a1bc623073079184ff76933b88b7bf4f5d48978
mnemonist:
specifier: ^0.39.5
version: 0.39.5
@ -43,6 +46,9 @@ dependencies:
picocolors:
specifier: ^1.0.0
version: 1.0.0
punycode:
specifier: ^2.3.0
version: 2.3.0
table:
specifier: ^6.8.1
version: 6.8.1
@ -1318,8 +1324,8 @@ packages:
signal-exit: 3.0.7
dev: true
/punycode@2.1.1:
resolution: {integrity: sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==}
/punycode@2.3.0:
resolution: {integrity: sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA==}
engines: {node: '>=6'}
/queue-microtask@1.2.3:
@ -1571,7 +1577,7 @@ packages:
/uri-js@4.4.1:
resolution: {integrity: sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==}
dependencies:
punycode: 2.1.1
punycode: 2.3.0
/webidl-conversions@3.0.1:
resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==}
@ -1661,3 +1667,10 @@ packages:
resolution: {integrity: sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==}
engines: {node: '>=10'}
dev: true
github.com/gorhill/publicsuffixlist.js/3a1bc623073079184ff76933b88b7bf4f5d48978:
resolution: {tarball: https://codeload.github.com/gorhill/publicsuffixlist.js/tar.gz/3a1bc623073079184ff76933b88b7bf4f5d48978}
name: '@gorhill/publicsuffixlist'
version: 3.0.1
engines: {node: '>=14.0.0', npm: '>=6.14.4'}
dev: false