Refactor: use jest-worker

This commit is contained in:
SukkaW
2026-03-31 22:23:43 +08:00
parent 09183a3cd1
commit 939fa0d2a0
13 changed files with 686 additions and 560 deletions

View File

@@ -1,219 +1,171 @@
import Worktank from 'worktank';
import picocolors from 'picocolors';
import { parse } from 'tldts-experimental';
import { appendArrayInPlaceCurried } from 'foxts/append-array-in-place';
import { dummySpan, printTraceResult } from '../trace';
import type { Span } from '../trace';
import { dummySpan } from '../trace';
import type { TldTsParsed } from './normalize-domain';
const pool = new Worktank({
pool: {
name: 'process-phishing-domains',
size: 1
},
worker: {
autoAbort: 20000, // The maximum number of milliseconds to wait for the result from the worker, if exceeded the worker is terminated and the execution promise rejects
autoInstantiate: true,
autoTerminate: 30000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed
env: {},
methods: {
// eslint-disable-next-line object-shorthand -- workertank
getPhishingDomains: async function (
importMetaUrl: string,
/** require.main === module */ isDebug = false
): Promise<string[]> {
// TODO: createRequire is a temporary workaround for https://github.com/nodejs/node/issues/51956
const { default: module } = await import('node:module');
const __require = module.createRequire(importMetaUrl);
import { loosTldOptWithPrivateDomains } from '../constants/loose-tldts-opt';
import { BLACK_TLD, WHITELIST_MAIN_DOMAINS, leathalKeywords, lowKeywords, sensitiveKeywords } from '../constants/phishing-score-source';
import { PHISHING_DOMAIN_LISTS_EXTRA, PHISHING_HOSTS_EXTRA } from '../constants/reject-data-source';
const picocolors = __require('picocolors') as typeof import('picocolors');
const tldts = __require('tldts-experimental') as typeof import('tldts-experimental');
import { processHostsWithPreload } from './parse-filter/hosts';
import { processDomainListsWithPreload } from './parse-filter/domainlists';
const { appendArrayInPlaceCurried } = __require('foxts/append-array-in-place') as typeof import('foxts/append-array-in-place');
import process from 'node:process';
const { loosTldOptWithPrivateDomains } = __require('../constants/loose-tldts-opt') as typeof import('../constants/loose-tldts-opt');
const { BLACK_TLD, WHITELIST_MAIN_DOMAINS, leathalKeywords, lowKeywords, sensitiveKeywords } = __require('../constants/phishing-score-source') as typeof import('../constants/phishing-score-source');
const { PHISHING_DOMAIN_LISTS_EXTRA, PHISHING_HOSTS_EXTRA } = __require('../constants/reject-data-source') as typeof import('../constants/reject-data-source');
const { dummySpan } = __require('../trace') as typeof import('../trace');
const NullPrototypeObject = __require('null-prototype-object') as typeof import('null-prototype-object');
const { processHostsWithPreload } = __require('./parse-filter/hosts') as typeof import('./parse-filter/hosts');
const { processDomainListsWithPreload } = __require('./parse-filter/domainlists') as typeof import('./parse-filter/domainlists');
const downloads = [
...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry)),
...PHISHING_HOSTS_EXTRA.map(entry => processHostsWithPreload(...entry))
];
const domainArr: string[] = [];
const domainGroups = await Promise.all(downloads.map(task => task(dummySpan)));
domainGroups.forEach(appendArrayInPlaceCurried(domainArr));
// return domainArr;
const domainCountMap = new Map<string, number>();
const domainScoreMap: Record<string, number> = new NullPrototypeObject();
let line = '';
let tld: string | null = '';
let apexDomain: string | null = '';
let subdomain: string | null = '';
let parsed: TldTsParsed;
// const set = new Set<string>();
// let duplicateCount = 0;
for (let i = 0, len = domainArr.length; i < len; i++) {
line = domainArr[i];
// if (set.has(line)) {
// duplicateCount++;
// } else {
// set.add(line);
// }
parsed = tldts.parse(line, loosTldOptWithPrivateDomains);
if (parsed.isPrivate) {
continue;
}
tld = parsed.publicSuffix;
apexDomain = parsed.domain;
if (!tld) {
console.log(picocolors.yellow('[phishing domains] E0001'), 'missing tld', { line, tld });
continue;
}
if (!apexDomain) {
console.log(picocolors.yellow('[phishing domains] E0002'), 'missing domain', { line, apexDomain });
continue;
}
if (WHITELIST_MAIN_DOMAINS.has(apexDomain)) {
continue;
}
domainCountMap.set(
apexDomain,
domainCountMap.has(apexDomain)
? domainCountMap.get(apexDomain)! + 1
: 1
);
let score = 0;
if (apexDomain in domainScoreMap) {
score = domainScoreMap[apexDomain];
} else {
if (BLACK_TLD.has(tld)) {
score += 3;
} else if (tld.length > 4) {
score += 2;
} else if (tld.length > 5) {
score += 4;
}
if (apexDomain.length >= 18) {
score += 0.5;
}
}
subdomain = parsed.subdomain;
if (subdomain) {
score += calcDomainAbuseScore(subdomain, line);
}
domainScoreMap[apexDomain] = score;
}
domainCountMap.forEach((count, apexDomain) => {
const score = domainScoreMap[apexDomain];
if (
// !WHITELIST_MAIN_DOMAINS.has(apexDomain)
(score >= 24)
|| (score >= 16 && count >= 7)
|| (score >= 13 && count >= 11)
|| (score >= 5 && count >= 14)
|| (score >= 3 && count >= 21)
|| (score >= 1 && count >= 60)
) {
domainArr.push('.' + apexDomain);
}
});
if (isDebug) {
console.log({
v: 1,
score: domainScoreMap['com-ticketry.world'],
count: domainCountMap.get('com-ticketry.world'),
domainArrLen: domainArr.length
});
}
return domainArr;
function calcDomainAbuseScore(subdomain: string, fullDomain: string = subdomain) {
if (leathalKeywords(fullDomain)) {
return 100;
}
let weight = 0;
const hitLowKeywords = lowKeywords(fullDomain);
const sensitiveKeywordsHit = sensitiveKeywords(fullDomain);
if (sensitiveKeywordsHit) {
weight += 15;
if (hitLowKeywords) {
weight += 10;
}
} else if (hitLowKeywords) {
weight += 2;
}
const subdomainLength = subdomain.length;
if (subdomainLength > 6) {
weight += 0.015;
if (subdomainLength > 13) {
weight += 0.2;
if (subdomainLength > 20) {
weight += 1;
if (subdomainLength > 30) {
weight += 5;
if (subdomainLength > 40) {
weight += 10;
}
}
}
if (subdomain.indexOf('.', 1) > 1) {
weight += 1;
}
}
}
return weight;
}
}
}
}
});
export function getPhishingDomains(parentSpan: Span) {
return parentSpan.traceChild('get phishing domains').traceAsyncFn(async (span) => span.traceChildAsync(
export function getPhishingDomains(isDebug = false): Promise<string[]> {
return dummySpan.traceChild('get phishing domains').traceAsyncFn(async (span) => span.traceChildAsync(
'process phishing domain set',
() => pool.exec(
'getPhishingDomains',
[__filename, require.main === module]
).finally(() => pool.terminate())
async () => {
const downloads = [
...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry)),
...PHISHING_HOSTS_EXTRA.map(entry => processHostsWithPreload(...entry))
];
const domainArr: string[] = [];
const domainGroups = await Promise.all(downloads.map(task => task(dummySpan)));
domainGroups.forEach(appendArrayInPlaceCurried(domainArr));
const domainCountMap = new Map<string, number>();
const domainScoreMap: Record<string, number> = Object.create(null) as Record<string, number>;
let line: string;
let tld: string | null;
let apexDomain: string | null;
let subdomain: string | null;
let parsed: TldTsParsed;
for (let i = 0, len = domainArr.length; i < len; i++) {
line = domainArr[i];
parsed = parse(line, loosTldOptWithPrivateDomains);
if (parsed.isPrivate) {
continue;
}
tld = parsed.publicSuffix;
apexDomain = parsed.domain;
if (!tld) {
console.log(picocolors.yellow('[phishing domains] E0001'), 'missing tld', { line, tld });
continue;
}
if (!apexDomain) {
console.log(picocolors.yellow('[phishing domains] E0002'), 'missing domain', { line, apexDomain });
continue;
}
if (WHITELIST_MAIN_DOMAINS.has(apexDomain)) {
continue;
}
domainCountMap.set(
apexDomain,
domainCountMap.has(apexDomain)
? domainCountMap.get(apexDomain)! + 1
: 1
);
let score = 0;
if (apexDomain in domainScoreMap) {
score = domainScoreMap[apexDomain];
} else {
if (BLACK_TLD.has(tld)) {
score += 3;
} else if (tld.length > 4) {
score += 2;
} else if (tld.length > 5) {
score += 4;
}
if (apexDomain.length >= 18) {
score += 0.5;
}
}
subdomain = parsed.subdomain;
if (subdomain) {
score += calcDomainAbuseScore(subdomain, line);
}
domainScoreMap[apexDomain] = score;
}
domainCountMap.forEach((count, apexDomain) => {
const score = domainScoreMap[apexDomain];
if (
(score >= 24)
|| (score >= 16 && count >= 7)
|| (score >= 13 && count >= 11)
|| (score >= 5 && count >= 14)
|| (score >= 3 && count >= 21)
|| (score >= 1 && count >= 60)
) {
domainArr.push('.' + apexDomain);
}
});
if (isDebug) {
console.log({
v: 1,
score: domainScoreMap['com-ticketry.world'],
count: domainCountMap.get('com-ticketry.world'),
domainArrLen: domainArr.length
});
}
return domainArr;
}
));
}
if (require.main === module) {
getPhishingDomains(dummySpan)
.catch(console.error)
.finally(() => {
dummySpan.stop();
printTraceResult(dummySpan.traceResult);
});
function calcDomainAbuseScore(subdomain: string, fullDomain: string = subdomain) {
if (leathalKeywords(fullDomain)) {
return 100;
}
let weight = 0;
const hitLowKeywords = lowKeywords(fullDomain);
const sensitiveKeywordsHit = sensitiveKeywords(fullDomain);
if (sensitiveKeywordsHit) {
weight += 15;
if (hitLowKeywords) {
weight += 10;
}
} else if (hitLowKeywords) {
weight += 2;
}
const subdomainLength = subdomain.length;
if (subdomainLength > 6) {
weight += 0.015;
if (subdomainLength > 13) {
weight += 0.2;
if (subdomainLength > 20) {
weight += 1;
if (subdomainLength > 30) {
weight += 5;
if (subdomainLength > 40) {
weight += 10;
}
}
}
if (subdomain.indexOf('.', 1) > 1) {
weight += 1;
}
}
}
return weight;
}
if (!process.env.JEST_WORKER_ID && require.main === module) {
getPhishingDomains(true).catch(console.error);
}

View File

@@ -7,10 +7,6 @@ import { bigint2ip } from 'fast-cidr-tools';
import { base64ToUint8Array, concatUint8Arrays } from 'foxts/uint8array-utils';
import Worktank from 'worktank';
import { wait } from 'foxts/wait';
import { once } from 'foxts/once';
const mtptoto_public_rsa = `-----BEGIN RSA PUBLIC KEY-----
MIIBCgKCAQEAyr+18Rex2ohtVy8sroGP
BwXD3DOoKCSpjDqYoXgCqB7ioln4eDCFfOBUlfXUEvM/fnKCpF46VkAftlb4VuPD
@@ -112,160 +108,3 @@ export function getTelegramBackupIPFromBase64(base64: string) {
}
}));
}
const pool = new Worktank({
pool: {
name: 'get-telegram-backup-ips',
size: 1 // The number of workers to keep in the pool, if more workers are needed they will be spawned up to this limit
},
worker: {
autoAbort: 10000,
autoTerminate: 30000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed
autoInstantiate: true,
methods: {
// eslint-disable-next-line object-shorthand -- workertank
getTelegramBackupIPs: async function (__filename: string): Promise<{ timestamp: number, ipcidr: string[], ipcidr6: string[] }> {
// TODO: createRequire is a temporary workaround for https://github.com/nodejs/node/issues/51956
const { default: module } = await import('node:module');
const __require = module.createRequire(__filename);
const picocolors = __require('picocolors') as typeof import('picocolors');
const { '~fetch': fetch } = __require('./fetch-retry') as typeof import('./fetch-retry');
const dns = __require('node:dns/promises') as typeof import('node:dns/promises');
const { createReadlineInterfaceFromResponse } = __require('./fetch-text-by-line') as typeof import('./fetch-text-by-line');
const { getTelegramBackupIPFromBase64 } = __require('./get-telegram-backup-ip') as typeof import('./get-telegram-backup-ip');
const { fastIpVersion } = __require('foxts/fast-ip-version') as typeof import('foxts/fast-ip-version');
const { fastStringArrayJoin } = __require('foxts/fast-string-array-join') as typeof import('foxts/fast-string-array-join');
const resp = await fetch('https://core.telegram.org/resources/cidr.txt');
const lastModified = resp.headers.get('last-modified');
const date = lastModified ? new Date(lastModified) : new Date();
const ipcidr: string[] = [
// Unused secret Telegram backup CIDR, announced by AS62041
'95.161.64.0/20'
];
const ipcidr6: string[] = [];
for await (const cidr of createReadlineInterfaceFromResponse(resp, true)) {
const v = fastIpVersion(cidr);
if (v === 4) {
ipcidr.push(cidr);
} else if (v === 6) {
ipcidr6.push(cidr);
}
}
const backupIPs = new Set<string>();
// https://github.com/tdlib/td/blob/master/td/telegram/ConfigManager.cpp
const resolvers = ['8.8.8.8', '1.0.0.1'].map((ip) => {
const resolver = new dns.Resolver();
resolver.setServers([ip]);
return Object.assign(resolver, { server: ip });
});
// Backup IP Source 1 (DNS)
await Promise.all(resolvers.flatMap((resolver) => [
'apv3.stel.com', // prod
'tapv3.stel.com' // test
].map(async (domain) => {
try {
// tapv3.stel.com was for testing server
const resp = await resolver.resolveTxt(domain);
const strings = resp.map(r => fastStringArrayJoin(r, '')); // flatten
if (strings.length !== 2) {
throw new TypeError(`Unexpected TXT record count: ${strings.length}`);
}
const str = strings[0].length > strings[1].length
? strings[0] + strings[1]
: strings[1] + strings[0];
const ips = getTelegramBackupIPFromBase64(str);
ips.forEach(i => backupIPs.add(i.ip));
console.log('[telegram backup ip]', picocolors.green('DNS TXT'), { domain, ips, server: resolver.server });
} catch (e) {
console.error('[telegram backup ip]', picocolors.red('DNS TXT error'), { domain }, e);
}
})));
// Backup IP Source 2: Firebase Realtime Database (test server not supported)
try {
const text = await (await fetch('https://reserve-5a846.firebaseio.com/ipconfigv3.json')).json();
if (typeof text === 'string' && text.length === 344) {
const ips = getTelegramBackupIPFromBase64(text);
ips.forEach(i => backupIPs.add(i.ip));
console.log('[telegram backup ip]', picocolors.green('Firebase Realtime DB'), { ips });
}
} catch (e) {
console.error('[telegram backup ip]', picocolors.red('Firebase Realtime DB error'), e);
// ignore all errors
}
// Backup IP Source 3: Firebase Value Store (test server not supported)
try {
const json = await (await fetch('https://firestore.googleapis.com/v1/projects/reserve-5a846/databases/(default)/documents/ipconfig/v3', {
headers: {
Accept: '*/*',
Origin: undefined // Without this line, Google API will return "Bad request: Origin doesn't match Host for XD3.". Probably have something to do with sqlite cache store
}
})).json();
// const json = await resp.json();
if (
json && typeof json === 'object'
&& 'fields' in json && typeof json.fields === 'object' && json.fields
&& 'data' in json.fields && typeof json.fields.data === 'object' && json.fields.data
&& 'stringValue' in json.fields.data && typeof json.fields.data.stringValue === 'string' && json.fields.data.stringValue.length === 344
) {
const ips = getTelegramBackupIPFromBase64(json.fields.data.stringValue);
ips.forEach(i => backupIPs.add(i.ip));
console.log('[telegram backup ip]', picocolors.green('Firebase Value Store'), { ips });
} else {
console.error('[telegram backup ip]', picocolors.red('Firebase Value Store data format invalid'), { json });
}
} catch (e) {
console.error('[telegram backup ip]', picocolors.red('Firebase Value Store error'), e);
}
// Backup IP Source 4: Google App Engine
await Promise.all([
'https://dns-telegram.appspot.com',
'https://dns-telegram.appspot.com/test'
].map(async (url) => {
try {
const text = await (await fetch(url)).text();
if (text.length === 344) {
const ips = getTelegramBackupIPFromBase64(text);
ips.forEach(i => backupIPs.add(i.ip));
console.log('[telegram backup ip]', picocolors.green('Google App Engine'), { url, ips });
}
} catch (e) {
console.error('[telegram backup ip]', picocolors.red('Google App Engine error'), { url }, e);
}
}));
// tcdnb.azureedge.net no longer works
console.log('[telegram backup ip]', `Found ${backupIPs.size} backup IPs:`, backupIPs);
ipcidr.push(...Array.from(backupIPs).map(i => i + '/32'));
return { timestamp: date.getTime(), ipcidr, ipcidr6 };
}
}
}
});
export const getTelegramCIDRPromise = once(() => wait(0).then(() => pool.exec(
'getTelegramBackupIPs',
[__filename]
)).finally(() => pool.terminate()), false);

28
Build/lib/worker.ts Normal file
View File

@@ -0,0 +1,28 @@
import process from 'node:process';
import type { JestWorkerFarm } from 'jest-worker';
import { Worker as JestWorker } from 'jest-worker';
const sharedWorkerOptions = {
numWorkers: 1,
enableWorkerThreads: true,
forkOptions: {
env: {
...process.env,
NODE_OPTIONS: process.env.NODE_OPTIONS
}
}
} satisfies ConstructorParameters<typeof JestWorker>[1];
export function createWorker<T extends Record<string, unknown>>(workerPath: string) {
return <const K extends ReadonlyArray<keyof T & string>>(exposedMethods: K): JestWorkerFarm<Pick<T, K[number]>> => {
const worker = new JestWorker(workerPath, {
...sharedWorkerOptions,
exposedMethods
}) as JestWorkerFarm<Pick<T, K[number]>>;
worker.getStdout().pipe(process.stdout);
worker.getStderr().pipe(process.stderr);
return worker;
};
}