From a4dfcaa66971e57bf6a92f6f94564c5b7ce66f35 Mon Sep 17 00:00:00 2001 From: SukkaW Date: Mon, 13 Apr 2026 16:20:29 +0800 Subject: [PATCH] Chore: allow trace tool to run across workers/realms --- Build/build-reject-domainset.ts | 6 +-- Build/index.ts | 10 +++-- Build/lib/get-phishing-domains.ts | 27 ++++++------- Build/trace/index.ts | 65 ++++++++++++++++++++++++++++++- 4 files changed, 87 insertions(+), 21 deletions(-) diff --git a/Build/build-reject-domainset.ts b/Build/build-reject-domainset.ts index 90ec340b..c2aba871 100644 --- a/Build/build-reject-domainset.ts +++ b/Build/build-reject-domainset.ts @@ -99,6 +99,7 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as // It is faster to add base than add others first then whitelist rejectDomainsetOutput.addFromRuleset(readLocalRejectRulesetPromise); rejectExtraDomainsetOutput.addFromRuleset(readLocalRejectRulesetPromise); + rejectPhisingDomainsetOutput.addFromRuleset(readLocalRejectRulesetPromise); rejectNonIpRulesetOutput.addFromRuleset(readLocalRejectRulesetPromise); @@ -130,7 +131,7 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as arrayPushNonNullish(promises, domainListsExtraDownloads.map(task => task(childSpan).then(appendArrayToRejectExtraOutput))); rejectPhisingDomainsetOutput.addFromDomainset( - span.traceChildPromise('get phishing domains', phishingWorker.getPhishingDomains()) + span.traceWorkerChild('get phishing domains', rawSpan => phishingWorker.getPhishingDomains(rawSpan)) ); arrayPushNonNullish( @@ -212,10 +213,9 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as for (let i = 0, len = arr.length; i < len; i++) { const line = arr[i]; if (line.startsWith('bogus-nxdomain=')) { - // bogus nxdomain needs to be blocked even after resolved rejectIPOutput.addAnyCIDR( line.slice(15).trim(), - false + false // bogus nxdomain needs to be blocked even after resolved ); } } diff --git a/Build/index.ts b/Build/index.ts index 8ebf34f1..02d56fa8 100644 --- a/Build/index.ts +++ b/Build/index.ts @@ -124,10 +124,12 @@ const buildFinishedLock = path.join(ROOT_DIR, '.BUILD_FINISHED'); }); printStats(traces); - await microsoftCdnWorker.end(); - await cdnDownloadWorker.end(); - await telegramCidrWorker.end(); - await mockAssetsWorker.end(); + await Promise.all([ + microsoftCdnWorker.end(), + cdnDownloadWorker.end(), + telegramCidrWorker.end(), + mockAssetsWorker.end() + ]); // Finish the build to avoid leaking timer/fetch ref await whyIsNodeRunning(); diff --git a/Build/lib/get-phishing-domains.ts b/Build/lib/get-phishing-domains.ts index 0c5b7ee9..14edde06 100644 --- a/Build/lib/get-phishing-domains.ts +++ b/Build/lib/get-phishing-domains.ts @@ -2,7 +2,8 @@ import picocolors from 'picocolors'; import { parse } from 'tldts-experimental'; import { appendArrayInPlaceCurried } from 'foxts/append-array-in-place'; -import { dummySpan } from '../trace'; +import { workerJob } from '../trace'; +import type { RawSpan, WorkerJobResult } from '../trace'; import type { TldTsParsed } from './normalize-domain'; import { loosTldOptWithPrivateDomains } from '../constants/loose-tldts-opt'; @@ -14,18 +15,18 @@ import { processDomainListsWithPreload } from './parse-filter/domainlists'; import process from 'node:process'; -export function getPhishingDomains(isDebug = false): Promise { - return dummySpan.traceChild('get phishing domains').traceAsyncFn(async (span) => span.traceChildAsync( - 'process phishing domain set', - async () => { - const downloads = [ - ...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry)), - ...PHISHING_HOSTS_EXTRA.map(entry => processHostsWithPreload(...entry)) - ]; +export function getPhishingDomains(rawSpan?: RawSpan, isDebug = false): Promise> { + return workerJob(rawSpan, async (childSpan) => { + const downloads = [ + ...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry)), + ...PHISHING_HOSTS_EXTRA.map(entry => processHostsWithPreload(...entry)) + ]; + const domainGroups = await Promise.all(downloads.map(task => task(childSpan))); + + return childSpan.traceChildSync('calculate and handling mass phishing domains', () => { const domainArr: string[] = []; - const domainGroups = await Promise.all(downloads.map(task => task(dummySpan))); domainGroups.forEach(appendArrayInPlaceCurried(domainArr)); const domainCountMap = new Map(); @@ -117,8 +118,8 @@ export function getPhishingDomains(isDebug = false): Promise { } return domainArr; - } - )); + }); + }); } function calcDomainAbuseScore(subdomain: string, fullDomain: string = subdomain) { @@ -167,5 +168,5 @@ function calcDomainAbuseScore(subdomain: string, fullDomain: string = subdomain) } if (!process.env.JEST_WORKER_ID && require.main === module) { - getPhishingDomains(true).catch(console.error); + getPhishingDomains(undefined, true).catch(console.error); } diff --git a/Build/trace/index.ts b/Build/trace/index.ts index b6d58fae..0525ecb4 100644 --- a/Build/trace/index.ts +++ b/Build/trace/index.ts @@ -24,6 +24,7 @@ export interface RawSpan { export interface Span { [spanTag]: true, + readonly rawSpan: RawSpan, readonly stop: (time?: number) => void, readonly traceChild: (name: string) => Span, readonly traceSyncFn: (fn: (span: Span) => T) => T, @@ -32,6 +33,7 @@ export interface Span { readonly traceChildSync: (name: string, fn: (span: Span) => T) => T, readonly traceChildAsync: (name: string, fn: (span: Span) => Promise) => Promise, readonly traceChildPromise: (name: string, promise: Promise) => Promise, + readonly traceWorkerChild: (name: string, factory: (rawSpan: RawSpan) => Promise>) => Promise, readonly traceResult: TraceResult } @@ -55,6 +57,7 @@ export function makeSpan(rawSpan: RawSpan): Span { const span: Span = { [spanTag]: true, + rawSpan, stop, traceChild, traceSyncFn(fn: (span: Span) => T) { @@ -75,7 +78,15 @@ export function makeSpan(rawSpan: RawSpan): Span { }, traceChildSync: (name: string, fn: (span: Span) => T): T => traceChild(name).traceSyncFn(fn), traceChildAsync: (name: string, fn: (span: Span) => T | Promise): Promise => traceChild(name).traceAsyncFn(fn), - traceChildPromise: (name: string, promise: Promise): Promise => traceChild(name).tracePromise(promise) + traceChildPromise: (name: string, promise: Promise): Promise => traceChild(name).tracePromise(promise), + + async traceWorkerChild(name: string, factory: (rawSpan: RawSpan) => Promise>): Promise { + const childSpan = traceChild(name); + const { result, traceResult, workerTimeOrigin } = await factory(childSpan.rawSpan); + mergeWorkerTrace(childSpan, traceResult, workerTimeOrigin); + childSpan.stop(); + return result; + } }; // eslint-disable-next-line sukka/no-redundant-variable -- self reference @@ -172,6 +183,58 @@ export async function whyIsNodeRunning() { // }; // }; +function adjustTraceTimestamps(trace: TraceResult, offset: number): TraceResult { + return { + name: trace.name, + start: trace.start + offset, + end: trace.end + offset, + children: trace.children.map(child => adjustTraceTimestamps(child, offset)) + }; +} + +function mergeWorkerTrace( + parentSpan: Span, + workerTraceResult: TraceResult, + workerTimeOrigin: number +): void { + const offset = workerTimeOrigin - performance.timeOrigin; + for (const child of workerTraceResult.children) { + parentSpan.traceResult.children.push(adjustTraceTimestamps(child, offset)); + } +} + +/** The envelope that a worker function returns so the main thread can recover both the result and the trace. */ +export interface WorkerJobResult { + result: T, + traceResult: TraceResult, + workerTimeOrigin: number +} + +/** + * Worker-side wrapper. Call this instead of manually constructing spans. + * + * - When `rawSpan` is provided (normal worker invocation from the main thread), + * it is wrapped with {@link makeSpan} so all child spans are attached to the + * caller's trace tree and can be recovered after the job finishes. + * - When `rawSpan` is `undefined` (standalone / CLI invocation), a fresh + * child span of {@link dummySpan} is used instead. + * + * The impl function receives a full {@link Span} and returns its result + * normally; the wrapper packages everything into a {@link WorkerJobResult}. + */ +export async function workerJob( + rawSpan: RawSpan | undefined, + impl: (span: Span) => Promise +): Promise> { + const span = rawSpan == null ? dummySpan.traceChild('worker-standalone') : makeSpan(rawSpan); + const result = await impl(span); + return { + result, + traceResult: span.traceResult, + workerTimeOrigin: performance.timeOrigin + }; +} + export function printTraceResult(traceResult: TraceResult) { printTree( traceResult,