Perf: further speed up infra

This commit is contained in:
SukkaW
2023-09-14 20:15:49 +08:00
parent adb8b43357
commit 78afa595a9
25 changed files with 431 additions and 173 deletions

View File

@@ -23,7 +23,7 @@ const createNode = (key, depth = 0) => ({
});
/**
* @param {string[]} keys
* @param {string[] | Set<string>} keys
*/
const createKeywordFilter = (keys) => {
const root = createNode('root');
@@ -39,16 +39,18 @@ const createKeywordFilter = (keys) => {
const map = beginNode.children;
// eslint-disable-next-line guard-for-in -- plain object
for (const key in beginNode.children) {
const node = map[key];
const node = map?.[key];
let failNode = beginNode.fail;
while (failNode && !failNode.children[key]) {
while (failNode && !failNode.children?.[key]) {
failNode = failNode.fail;
}
node.fail = failNode?.children[key] || root;
if (node) {
node.fail = failNode?.children?.[key] || root;
queue.push(node);
queue.push(node);
}
}
idx++;
@@ -83,10 +85,9 @@ const createKeywordFilter = (keys) => {
}
};
for (let idx = 0, len = keys.length; idx < len; idx++) {
const key = keys[idx];
put(key, key.length);
}
keys.forEach(k => {
put(k, k.length);
});
build();

47
Build/lib/cache-apply.js Normal file
View File

@@ -0,0 +1,47 @@
/**
* @param {string} [namespace]
*/
const createCache = (namespace, printStats = false) => {
const cache = new Map();
let hit = 0;
if (namespace && printStats) {
process.on('exit', () => {
console.log(`🔋 [cache] ${namespace} hit: ${hit}, size: ${cache.size}`);
});
}
return {
/**
* @template T
* @param {string} key
* @param {() => T} fn
* @returns {T}
*/
sync(key, fn) {
if (cache.has(key)) {
hit++;
return cache.get(key);
}
const value = fn();
cache.set(key, value);
return value;
},
/**
* @template T
* @param {string} key
* @param {() => Promise<T>} fn
* @returns {Promise<T>}
*/
async async(key, fn) {
if (cache.has(key)) {
hit++;
return cache.get(key);
}
const value = await fn();
cache.set(key, value);
return value;
}
};
};
module.exports.createCache = createCache;

View File

@@ -0,0 +1,25 @@
const tldts = require('tldts');
const { createCache } = require('./cache-apply');
const cache = createCache('cached-tld-parse', true);
const sharedConfig = { allowPrivateDomains: true };
/**
* @param {string} domain
* @returns {ReturnType<import('tldts').parse>}
*/
module.exports.parse = (domain) => {
return cache.sync(domain, () => tldts.parse(domain, sharedConfig));
};
const gothillGetDomainCache = createCache('cached-gorhill-get-domain', true);
/**
* @param {import('gorhill-publicsuffixlist').default | null} gorhill
*/
module.exports.createCachedGorhillGetDomain = (gorhill) => {
/**
* @param {string} domain
*/
return (domain) => gothillGetDomainCache.sync(domain, () => gorhill.getDomain(domain[0] === '.' ? domain.slice(1) : domain));
};

View File

@@ -32,10 +32,11 @@ async function compareAndWriteFile(linesA, filePath) {
const stream = fs.createWriteStream(filePath, { encoding: 'utf-8' });
for (let i = 0, len = linesA.length; i < len; i++) {
// eslint-disable-next-line no-await-in-loop -- backpressure
await writeToStream(stream, linesA[i]);
// eslint-disable-next-line no-await-in-loop -- backpressure
await writeToStream(stream, '\n');
const p = writeToStream(stream, `${linesA[i]}\n`);
if (p) {
// eslint-disable-next-line no-await-in-loop -- backpressure, besides we only wait for drain
await p;
}
}
stream.end();
} else {
@@ -48,13 +49,13 @@ module.exports.compareAndWriteFile = compareAndWriteFile;
* @param {import('fs').WriteStream} stream
* @param {string} data
*/
async function writeToStream(stream, data) {
function writeToStream(stream, data) {
if (!stream.write(data)) {
return /** @type {Promise<void>} */(new Promise((resolve) => {
stream.once('drain', () => { resolve(); });
stream.once('drain', resolve);
}));
}
return Promise.resolve();
return null;
}
/**

View File

@@ -1,3 +1,4 @@
// @ts-check
const Trie = require('./trie');
/**
@@ -13,8 +14,10 @@ const domainDeduper = (inputDomains) => {
continue;
}
// delete all included subdomains (ends with `.example.com`)
trie.find(d, false).forEach(f => sets.delete(f));
// if `.example.com` exists, then `example.com` should also be removed
const a = d.slice(1);
if (trie.has(a)) {
sets.delete(a);

View File

@@ -34,7 +34,7 @@ module.exports.createReadlineInterfaceFromResponse = createReadlineInterfaceFrom
/**
* @param {import('undici').RequestInfo} url
* @param {import('undici').RequestInit | undefined} [opt]
* @param {import('undici').RequestInit} [opt]
*/
module.exports.fetchRemoteTextAndCreateReadlineInterface = async (url, opt) => {
const resp = await fetchWithRetry(url, opt);

View File

@@ -1,4 +1,11 @@
// @ts-check
const { fetch } = require('undici');
const fetchWithRetry = /** @type {fetch} */(require('@vercel/fetch-retry')(fetch));
const undici = require('undici');
// Enable HTTP/2 supports
undici.setGlobalDispatcher(new undici.Agent({
allowH2: true,
pipelining: 10
}));
const fetchWithRetry = /** @type {import('undici').fetch} */(require('@vercel/fetch-retry')(undici.fetch));
module.exports.fetchWithRetry = fetchWithRetry;

View File

@@ -0,0 +1,34 @@
const { toASCII } = require('punycode/');
const fs = require('fs');
const path = require('path');
const publicSuffixPath = path.resolve(__dirname, '../../node_modules/.cache/public_suffix-list_dat.txt');
const getPublicSuffixListDat = () => {
if (fs.existsSync(publicSuffixPath)) {
return fs.promises.readFile(publicSuffixPath, 'utf-8');
}
console.log('public_suffix_list.dat not found, fetch directly from remote.');
return fetch('https://publicsuffix.org/list/public_suffix_list.dat').then(r => r.text());
};
const getGorhillPublicSuffix = async () => {
const customFetch = async (url) => {
const buf = await fs.promises.readFile(url);
return {
arrayBuffer() { return Promise.resolve(buf.buffer); }
};
};
const [publicSuffixListDat, { default: gorhill }] = await Promise.all([
getPublicSuffixListDat(),
import('gorhill-publicsuffixlist')
]);
gorhill.parse(publicSuffixListDat, toASCII);
await gorhill.enableWASM({ customFetch });
return gorhill;
};
const getGorhillPublicSuffixPromise = getGorhillPublicSuffix();
module.exports.getGorhillPublicSuffixPromise = getGorhillPublicSuffixPromise;

View File

@@ -1,13 +1,14 @@
// @ts-check
const { parse } = require('tldts');
const tldts = require('./cached-tld-parse');
/**
* @param {string} domain
*/
module.exports.isDomainLoose = (domain) => {
const { isIcann, isPrivate, isIp } = parse(domain, { allowPrivateDomains: true });
const { isIcann, isPrivate, isIp } = tldts.parse(domain);
return !!(!isIp && (isIcann || isPrivate));
};
/**
* @param {string} domain
*/
@@ -16,12 +17,15 @@ module.exports.normalizeDomain = (domain) => {
return null;
}
const { isIcann, isPrivate, hostname, isIp } = parse(domain, { allowPrivateDomains: true });
const { isIcann, isPrivate, hostname, isIp } = tldts.parse(domain);
if (isIp) {
return null;
}
if (isIcann || isPrivate) {
if (hostname?.[0] === '.') {
return hostname.slice(1);
}
return hostname;
}

View File

@@ -179,19 +179,15 @@ async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdPart
const downloadStart = performance.now();
try {
const controller = new AbortController();
const signal = controller.signal;
/** @type string[] */
filterRules = (
await Promise.any(
[filterRulesUrl, ...(fallbackUrls || [])].map(
url => fetchWithRetry(url, { signal })
.then(r => r.text())
.then(text => {
controller.abort();
return text;
})
)
[filterRulesUrl, ...(fallbackUrls || [])].map(async url => {
const text = await fetchWithRetry(url, { signal: controller.signal }).then(r => r.text());
controller.abort();
return text;
})
)
).split('\n').map(line => line.trim());
} catch (e) {
@@ -317,10 +313,7 @@ function parse($line, includeThirdParties) {
if (
// (line.startsWith('@@|') || line.startsWith('@@.'))
(
line[2] === '|'
|| line[2] === '.'
)
(line[2] === '|' || line[2] === '.')
&& (
lineEndsWithCaret
|| lineEndsWithCaretVerticalBar
@@ -374,7 +367,7 @@ function parse($line, includeThirdParties) {
return null;
}
const lineStartsWithSingleDot = line.startsWith('.');
const lineStartsWithSingleDot = line[0] === '.';
if (
lineStartsWithSingleDot
&& (
@@ -437,7 +430,7 @@ function parse($line, includeThirdParties) {
return null;
}
const tryNormalizeDomain = normalizeDomain(lineStartsWithSingleDot ? line.slice(1) : line);
const tryNormalizeDomain = normalizeDomain(line);
if (
tryNormalizeDomain
&& (

View File

@@ -1,14 +1,4 @@
// @ts-check
const tldts = require('tldts');
const cache1 = Object.create(null);
/**
* @param {string} url
* @returns {ReturnType<typeof tldts.parse>}
*/
// eslint-disable-next-line no-return-assign -- cache
const parse = (url) => (cache1[url] ||= tldts.parse(url, { allowPrivateDomains: true }));
/**
* @param {string | null} a
* @param {string | null} b
@@ -49,32 +39,49 @@ const compare = (a, b) => {
};
/**
* @param {string} a
* @param {string} b
* @returns {0 | 1 | -1}
* @param {import('gorhill-publicsuffixlist').default | null} [gorhill]
*/
const domainSorter = (a, b) => {
if (a === b) return 0;
const createDomainSorter = (gorhill = null) => {
const cached = require('./cached-tld-parse');
const aParsed = parse(a[0] === '.' ? a.slice(1) : a);
const bParsed = parse(b[0] === '.' ? b.slice(1) : b);
if (gorhill) {
/**
* @param {string} input
*/
const getDomain = cached.createCachedGorhillGetDomain(gorhill);
const resultDomainWithoutSuffix = compare(aParsed.domainWithoutSuffix, bParsed.domainWithoutSuffix);
if (resultDomainWithoutSuffix !== 0) {
return resultDomainWithoutSuffix;
/**
* @param {string} a
* @param {string} b
* @returns {0 | 1 | -1}
*/
return (a, b) => {
if (a === b) return 0;
const aDomain = getDomain(a);
const bDomain = getDomain(b);
const resultDomain = compare(aDomain, bDomain);
return resultDomain !== 0 ? resultDomain : compare(a, b);
};
}
const resultSuffix = compare(aParsed.publicSuffix, bParsed.publicSuffix);
if (resultSuffix !== 0) {
return resultSuffix;
}
const tldts = cached;
/**
* @param {string} a
* @param {string} b
* @returns {0 | 1 | -1}
*/
return (a, b) => {
if (a === b) return 0;
const resultSubdomain = compare(aParsed.subdomain, bParsed.subdomain);
if (resultSubdomain !== 0) {
return resultSubdomain;
}
const aDomain = tldts.parse(a).domain;
const bDomain = tldts.parse(b).domain;
return 0;
const resultDomain = compare(aDomain, bDomain);
return resultDomain !== 0 ? resultDomain : compare(a, b);
};
};
module.exports = domainSorter;
module.exports = createDomainSorter();
module.exports.createDomainSorter = createDomainSorter;

View File

@@ -1,15 +1,42 @@
const path = require('path');
const { performance } = require('perf_hooks');
/**
* @param {Function} fn
* @param {string} __filename
* @template T
* @param {string} prefix
* @param {() => T} fn
* @returns {T}
*/
module.exports.runner = async (__filename, fn) => {
const runnerName = path.basename(__filename, path.extname(__filename));
const start = Date.now();
const result = await fn();
const end = Date.now();
console.log(`⌛ [${runnerName}]: ${end - start}ms`);
const traceSync = (prefix, fn) => {
const start = performance.now();
const result = fn();
const end = performance.now();
console.log(`${prefix}: ${(end - start).toFixed(3)}ms`);
return result;
};
module.exports.traceSync = traceSync;
/**
* @template T
* @param {string} prefix
* @param {() => Promise<T>} fn
* @returns {Promise<T>}
*/
const traceAsync = async (prefix, fn) => {
const start = performance.now();
const result = await fn();
const end = performance.now();
console.log(`${prefix}: ${(end - start).toFixed(3)}ms`);
return result;
};
module.exports.traceAsync = traceAsync;
/**
* @template T
* @param {string} __filename
* @param {() => Promise<T>} fn
* @returns {T}
*/
module.exports.runner = async (__filename, fn) => {
return traceAsync(`⌛ [${path.basename(__filename, path.extname(__filename))}]`, fn);
};