From 83e94403e89cabd231b8898eda489f703169a858 Mon Sep 17 00:00:00 2001 From: SukkaW Date: Thu, 31 Jul 2025 22:09:14 +0800 Subject: [PATCH] Perf: process publicsuffix in a worker thread --- Build/build-cdn-download-conf.ts | 111 +++++++++++++++++++------------ 1 file changed, 67 insertions(+), 44 deletions(-) diff --git a/Build/build-cdn-download-conf.ts b/Build/build-cdn-download-conf.ts index 92a87874..eb792ce0 100644 --- a/Build/build-cdn-download-conf.ts +++ b/Build/build-cdn-download-conf.ts @@ -1,57 +1,77 @@ import path from 'node:path'; -import { fetchRemoteTextByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line'; -import { HostnameTrie } from './lib/trie'; +import { readFileIntoProcessedArray } from './lib/fetch-text-by-line'; import { task } from './trace'; import { SHARED_DESCRIPTION } from './constants/description'; import { appendArrayInPlace } from 'foxts/append-array-in-place'; import { SOURCE_DIR } from './constants/dir'; import { DomainsetOutput } from './lib/rules/domainset'; import { CRASHLYTICS_WHITELIST } from './constants/reject-data-source'; -import { appendSetElementsToArray } from 'foxts/append-set-elements-to-array'; - -const getS3OSSDomainsPromise = (async (): Promise> => { - const trie = new HostnameTrie(); - - for await (const line of await fetchRemoteTextByLine('https://publicsuffix.org/list/public_suffix_list.dat', true)) { - trie.add(line); - } - - /** - * Extract OSS domain from publicsuffix list - */ - const S3OSSDomains = new Set(); - - trie.find('.amazonaws.com').forEach((line: string) => { - if ( - (line.startsWith('s3-') || line.startsWith('s3.')) - && !line.includes('cn-') - ) { - S3OSSDomains.add('.' + line); - } - }); - trie.find('.scw.cloud').forEach((line: string) => { - if ( - (line.startsWith('s3-') || line.startsWith('s3.')) - // && !line.includes('cn-') - ) { - S3OSSDomains.add('.' + line); - } - }); - trie.find('sakurastorage.jp').forEach((line: string) => { - if ( - (line.startsWith('s3-') || line.startsWith('s3.')) - ) { - S3OSSDomains.add('.' + line); - } - }); - - return S3OSSDomains; -})(); +import Worktank from 'worktank'; const cdnDomainsListPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/cdn.conf')); const downloadDomainSetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/download.conf')); const steamDomainSetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/game-download.conf')); +const pool = new Worktank({ + pool: { + name: 'extract-s3-from-publicssuffix', + size: 1 // The number of workers to keep in the pool, if more workers are needed they will be spawned up to this limit + }, + worker: { + autoAbort: 10000, + autoTerminate: 20000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed + autoInstantiate: true, + methods: { + // eslint-disable-next-line object-shorthand -- workertank + getS3OSSDomains: async function (importMetaUrl: string): Promise { + // TODO: createRequire is a temporary workaround for https://github.com/nodejs/node/issues/51956 + const { default: module } = await import('node:module'); + const __require = module.createRequire(importMetaUrl); + + const { HostnameTrie } = __require('./lib/trie') as typeof import('./lib/trie'); + const { fetchRemoteTextByLine } = __require('./lib/fetch-text-by-line') as typeof import('./lib/fetch-text-by-line'); + + const trie = new HostnameTrie(); + + for await (const line of await fetchRemoteTextByLine('https://publicsuffix.org/list/public_suffix_list.dat', true)) { + trie.add(line); + } + + /** + * Extract OSS domain from publicsuffix list + */ + const S3OSSDomains: string[] = []; + + trie.find('.amazonaws.com').forEach((line: string) => { + if ( + (line.startsWith('s3-') || line.startsWith('s3.')) + && !line.includes('cn-') + ) { + S3OSSDomains.push('.' + line); + } + }); + trie.find('.scw.cloud').forEach((line: string) => { + if ( + (line.startsWith('s3-') || line.startsWith('s3.')) + // && !line.includes('cn-') + ) { + S3OSSDomains.push('.' + line); + } + }); + trie.find('sakurastorage.jp').forEach((line: string) => { + if ( + (line.startsWith('s3-') || line.startsWith('s3.')) + ) { + S3OSSDomains.push('.' + line); + } + }); + + return S3OSSDomains; + } + } + } +}); + export const buildCdnDownloadConf = task(require.main === module, __filename)(async (span) => { const [ S3OSSDomains, @@ -59,14 +79,17 @@ export const buildCdnDownloadConf = task(require.main === module, __filename)(as downloadDomainSet, steamDomainSet ] = await Promise.all([ - span.traceChildPromise('download public suffix list for s3', getS3OSSDomainsPromise), + span.traceChildAsync('download public suffix list for s3', () => pool.exec( + 'getS3OSSDomains', + [import.meta.url] + ).finally(() => pool.terminate())), cdnDomainsListPromise, downloadDomainSetPromise, steamDomainSetPromise ]); // Move S3 domains to download domain set, since S3 files may be large - appendSetElementsToArray(downloadDomainSet, S3OSSDomains); + appendArrayInPlace(downloadDomainSet, S3OSSDomains); appendArrayInPlace(downloadDomainSet, steamDomainSet); // we have whitelisted the crashlytics domain, and we also want to put it in CDN policy