Refactor: use jest-worker

This commit is contained in:
SukkaW
2026-03-31 22:23:43 +08:00
parent 09183a3cd1
commit 939fa0d2a0
13 changed files with 686 additions and 560 deletions

View File

@@ -1,12 +1,12 @@
import path from 'node:path';
import { readFileIntoProcessedArray } from './lib/fetch-text-by-line';
import { readFileIntoProcessedArray, fetchRemoteTextByLine } from './lib/fetch-text-by-line';
import { task } from './trace';
import { SHARED_DESCRIPTION } from './constants/description';
import { appendArrayInPlace } from 'foxts/append-array-in-place';
import { SOURCE_DIR } from './constants/dir';
import { DomainsetOutput } from './lib/rules/domainset';
import { CRASHLYTICS_WHITELIST } from './constants/reject-data-source';
import Worktank from 'worktank';
import { HostnameTrie } from './lib/trie';
import { $$fetch } from './lib/fetch-retry';
import { fastUri } from 'fast-uri';
@@ -14,25 +14,17 @@ const cdnDomainSetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'do
const downloadDomainSetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/download.conf'));
const steamDomainSetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/game-download.conf'));
const pool = new Worktank({
pool: {
name: 'extract-s3-from-publicssuffix',
size: 1 // The number of workers to keep in the pool, if more workers are needed they will be spawned up to this limit
},
worker: {
autoAbort: 10000,
autoTerminate: 20000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed
autoInstantiate: true,
methods: {
// eslint-disable-next-line object-shorthand -- workertank
getS3OSSDomains: async function (__filename: string): Promise<string[]> {
// TODO: createRequire is a temporary workaround for https://github.com/nodejs/node/issues/51956
const { default: module } = await import('node:module');
const __require = module.createRequire(__filename);
const { HostnameTrie } = __require('./lib/trie') as typeof import('./lib/trie');
const { fetchRemoteTextByLine } = __require('./lib/fetch-text-by-line') as typeof import('./lib/fetch-text-by-line');
export const buildCdnDownloadConf = task(require.main === module, __filename)(async (span) => {
const [
S3OSSDomains,
IPFSDomains,
cdnDomainsList,
downloadDomainSet,
steamDomainSet
] = await Promise.all([
span.traceChildAsync(
'download public suffix list for s3',
async () => {
const trie = new HostnameTrie();
for await (const line of await fetchRemoteTextByLine('https://publicsuffix.org/list/public_suffix_list.dat', true)) {
@@ -70,24 +62,6 @@ const pool = new Worktank({
return S3OSSDomains;
}
}
}
});
export const buildCdnDownloadConf = task(require.main === module, __filename)(async (span) => {
const [
S3OSSDomains,
IPFSDomains,
cdnDomainsList,
downloadDomainSet,
steamDomainSet
] = await Promise.all([
span.traceChildAsync(
'download public suffix list for s3',
() => pool.exec(
'getS3OSSDomains',
[__filename]
).finally(() => pool.terminate())
),
span.traceChildAsync(
'load public ipfs gateway list',

View File

@@ -1,70 +0,0 @@
import { task } from './trace';
import { SHARED_DESCRIPTION } from './constants/description';
import { RulesetOutput } from './lib/rules/ruleset';
import Worktank from 'worktank';
import { RULES } from './constants/microsoft-cdn';
const pool = new Worktank({
pool: {
name: 'get-microsoft-cdn',
size: 1 // The number of workers to keep in the pool, if more workers are needed they will be spawned up to this limit
},
worker: {
autoAbort: 10000,
autoTerminate: 30000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed
autoInstantiate: true,
methods: {
// eslint-disable-next-line object-shorthand -- workertank
getMicrosoftCdnRuleset: async function (__filename: string): Promise<[domains: string[], domainSuffixes: string[]]> {
// TODO: createRequire is a temporary workaround for https://github.com/nodejs/node/issues/51956
const { default: module } = await import('node:module');
const __require = module.createRequire(__filename);
const { HostnameSmolTrie } = __require('./lib/trie');
const { PROBE_DOMAINS, DOMAINS, DOMAIN_SUFFIXES, BLACKLIST } = __require('./constants/microsoft-cdn') as typeof import('./constants/microsoft-cdn');
const { fetchRemoteTextByLine } = __require('./lib/fetch-text-by-line') as typeof import('./lib/fetch-text-by-line');
const { appendArrayInPlace } = __require('foxts/append-array-in-place') as typeof import('foxts/append-array-in-place');
const { extractDomainsFromFelixDnsmasq } = __require('./lib/parse-dnsmasq') as typeof import('./lib/parse-dnsmasq');
const trie = new HostnameSmolTrie();
for await (const line of await fetchRemoteTextByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) {
const domain = extractDomainsFromFelixDnsmasq(line);
if (domain) {
trie.add(domain);
}
}
// remove blacklist domain from trie, to prevent them from being included in the later dump
BLACKLIST.forEach(black => trie.whitelist(black));
const domains: string[] = DOMAINS;
const domainSuffixes = appendArrayInPlace(PROBE_DOMAINS.flatMap(domain => trie.find(domain)), DOMAIN_SUFFIXES);
return [domains, domainSuffixes] as const;
}
}
}
});
const getMicrosoftCdnRulesetPromise = pool.exec(
'getMicrosoftCdnRuleset',
[__filename]
).finally(() => pool.terminate());
export const buildMicrosoftCdn = task(require.main === module, __filename)(async (span) => {
const [domains, domainSuffixes] = await span.traceChildPromise('get microsoft cdn domains', getMicrosoftCdnRulesetPromise);
return new RulesetOutput(span, 'microsoft_cdn', 'non_ip')
.withTitle('Sukka\'s Ruleset - Microsoft CDN')
.appendDescription(SHARED_DESCRIPTION)
.appendDescription(
'',
'This file contains Microsoft\'s domains using their China mainland CDN servers.'
)
.addFromRuleset(RULES)
.appendDataSource('https://github.com/felixonmars/dnsmasq-china-list')
.bulkAddDomain(domains)
.bulkAddDomainSuffix(domainSuffixes)
.write();
});

View File

@@ -0,0 +1,42 @@
import { task } from './trace';
import { SHARED_DESCRIPTION } from './constants/description';
import { RulesetOutput } from './lib/rules/ruleset';
import { RULES, PROBE_DOMAINS, DOMAINS, DOMAIN_SUFFIXES, BLACKLIST } from './constants/microsoft-cdn';
import { HostnameSmolTrie } from './lib/trie';
import { fetchRemoteTextByLine } from './lib/fetch-text-by-line';
import { appendArrayInPlace } from 'foxts/append-array-in-place';
import { extractDomainsFromFelixDnsmasq } from './lib/parse-dnsmasq';
export const buildMicrosoftCdn = task(require.main === module, __filename)(async (span) => {
const [domains, domainSuffixes] = await span.traceChildAsync('get microsoft cdn domains', async () => {
const trie = new HostnameSmolTrie();
for await (const line of await fetchRemoteTextByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) {
const domain = extractDomainsFromFelixDnsmasq(line);
if (domain) {
trie.add(domain);
}
}
// remove blacklist domain from trie, to prevent them from being included in the later dump
BLACKLIST.forEach(black => trie.whitelist(black));
const domains: string[] = DOMAINS;
const domainSuffixes = appendArrayInPlace(PROBE_DOMAINS.flatMap(domain => trie.find(domain)), DOMAIN_SUFFIXES);
return [domains, domainSuffixes] as [string[], string[]];
});
return new RulesetOutput(span, 'microsoft_cdn', 'non_ip')
.withTitle('Sukka\'s Ruleset - Microsoft CDN')
.appendDescription(SHARED_DESCRIPTION)
.appendDescription(
'',
'This file contains Microsoft\'s domains using their China mainland CDN servers.'
)
.addFromRuleset(RULES)
.appendDataSource('https://github.com/felixonmars/dnsmasq-china-list')
.bulkAddDomain(domains)
.bulkAddDomainSuffix(domainSuffixes)
.write();
});

View File

@@ -18,7 +18,7 @@ import { addArrayElementsToSet } from 'foxts/add-array-elements-to-set';
import { OUTPUT_INTERNAL_DIR, SOURCE_DIR } from './constants/dir';
import { DomainsetOutput, AdGuardHomeOutput } from './lib/rules/domainset';
import { foundDebugDomain } from './lib/parse-filter/shared';
import { getPhishingDomains } from './lib/get-phishing-domains';
import { createWorker } from './lib/worker';
import type { MaybePromise } from './lib/misc';
import { RulesetOutput } from './lib/rules/ruleset';
import { fetchAssets } from './lib/fetch-assets';
@@ -39,6 +39,9 @@ const adguardFiltersExtraDownloads = ADGUARD_FILTERS_EXTRA.map(entry => processF
const adguardFiltersWhitelistsDownloads = ADGUARD_FILTERS_WHITELIST.map(entry => processFilterRulesWithPreload(...entry));
export const buildRejectDomainSet = task(require.main === module, __filename)(async (span) => {
const phishingWorker = createWorker<typeof import('./lib/get-phishing-domains')>(
require.resolve('./lib/get-phishing-domains')
)(['getPhishingDomains']);
const rejectDomainsetOutput = new DomainsetOutput(span, 'reject')
.withTitle('Sukka\'s Ruleset - Reject Base')
.appendDescription(
@@ -126,7 +129,9 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
arrayPushNonNullish(promises, domainListsDownloads.map(task => task(childSpan).then(appendArrayToRejectOutput)));
arrayPushNonNullish(promises, domainListsExtraDownloads.map(task => task(childSpan).then(appendArrayToRejectExtraOutput)));
rejectPhisingDomainsetOutput.addFromDomainset(getPhishingDomains(childSpan));
rejectPhisingDomainsetOutput.addFromDomainset(
span.traceChildPromise('get phishing domains', phishingWorker.getPhishingDomains())
);
arrayPushNonNullish(
promises,
@@ -253,6 +258,7 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
rejectNonIpRulesetOutput.whitelistKeyword(keyword);
}
// Deduplicate reject_extra and reject_phishing from the base reject domainset
rejectDomainsetOutput.domainTrie.dump(arg => {
rejectExtraDomainsetOutput.whitelistDomain(arg);
rejectPhisingDomainsetOutput.whitelistDomain(arg);
@@ -295,4 +301,6 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
await myRejectOutputAdGuardHome
.addFromRuleset(readFileIntoProcessedArray(path.join(SOURCE_DIR, 'non_ip/my_reject.conf')))
.write();
await phishingWorker.end();
});

View File

@@ -2,10 +2,137 @@
import { task } from './trace';
import { SHARED_DESCRIPTION } from './constants/description';
import { RulesetOutput } from './lib/rules/ruleset';
import { getTelegramCIDRPromise } from './lib/get-telegram-backup-ip';
import { getTelegramBackupIPFromBase64 } from './lib/get-telegram-backup-ip';
import picocolors from 'picocolors';
import { $$fetch } from './lib/fetch-retry';
import dns from 'node:dns/promises';
import { createReadlineInterfaceFromResponse } from './lib/fetch-text-by-line';
import { fastIpVersion } from 'foxts/fast-ip-version';
import { fastStringArrayJoin } from 'foxts/fast-string-array-join';
export const buildTelegramCIDR = task(require.main === module, __filename)(async (span) => {
const { timestamp, ipcidr, ipcidr6 } = await span.traceChildAsync('get telegram cidr', getTelegramCIDRPromise);
const { timestamp, ipcidr, ipcidr6 } = await span.traceChildAsync('get telegram cidr', async () => {
const resp = await $$fetch('https://core.telegram.org/resources/cidr.txt');
const lastModified = resp.headers.get('last-modified');
const date = lastModified ? new Date(lastModified) : new Date();
const ipcidr: string[] = [
// Unused secret Telegram backup CIDR, announced by AS62041
'95.161.64.0/20'
];
const ipcidr6: string[] = [];
for await (const cidr of createReadlineInterfaceFromResponse(resp, true)) {
const v = fastIpVersion(cidr);
if (v === 4) {
ipcidr.push(cidr);
} else if (v === 6) {
ipcidr6.push(cidr);
}
}
const backupIPs = new Set<string>();
// https://github.com/tdlib/td/blob/master/td/telegram/ConfigManager.cpp
const resolvers = ['8.8.8.8', '1.0.0.1'].map((ip) => {
const resolver = new dns.Resolver();
resolver.setServers([ip]);
return Object.assign(resolver, { server: ip });
});
// Backup IP Source 1 (DNS)
await Promise.all(resolvers.flatMap((resolver) => [
'apv3.stel.com', // prod
'tapv3.stel.com' // test
].map(async (domain) => {
try {
// tapv3.stel.com was for testing server
const resp = await resolver.resolveTxt(domain);
const strings = resp.map(r => fastStringArrayJoin(r, '')); // flatten
if (strings.length !== 2) {
throw new TypeError(`Unexpected TXT record count: ${strings.length}`);
}
const str = strings[0].length > strings[1].length
? strings[0] + strings[1]
: strings[1] + strings[0];
const ips = getTelegramBackupIPFromBase64(str);
ips.forEach(i => backupIPs.add(i.ip));
console.log('[telegram backup ip]', picocolors.green('DNS TXT'), { domain, ips, server: resolver.server });
} catch (e) {
console.error('[telegram backup ip]', picocolors.red('DNS TXT error'), { domain }, e);
}
})));
// Backup IP Source 2: Firebase Realtime Database (test server not supported)
try {
const text = await (await $$fetch('https://reserve-5a846.firebaseio.com/ipconfigv3.json')).json();
if (typeof text === 'string' && text.length === 344) {
const ips = getTelegramBackupIPFromBase64(text);
ips.forEach(i => backupIPs.add(i.ip));
console.log('[telegram backup ip]', picocolors.green('Firebase Realtime DB'), { ips });
}
} catch (e) {
console.error('[telegram backup ip]', picocolors.red('Firebase Realtime DB error'), e);
// ignore all errors
}
// Backup IP Source 3: Firebase Value Store (test server not supported)
try {
const json = await (await $$fetch('https://firestore.googleapis.com/v1/projects/reserve-5a846/databases/(default)/documents/ipconfig/v3', {
headers: {
Accept: '*/*',
Origin: undefined // Without this line, Google API will return "Bad request: Origin doesn't match Host for XD3.". Probably have something to do with sqlite cache store
}
})).json();
if (
json && typeof json === 'object'
&& 'fields' in json && typeof json.fields === 'object' && json.fields
&& 'data' in json.fields && typeof json.fields.data === 'object' && json.fields.data
&& 'stringValue' in json.fields.data && typeof json.fields.data.stringValue === 'string' && json.fields.data.stringValue.length === 344
) {
const ips = getTelegramBackupIPFromBase64(json.fields.data.stringValue);
ips.forEach(i => backupIPs.add(i.ip));
console.log('[telegram backup ip]', picocolors.green('Firebase Value Store'), { ips });
} else {
console.error('[telegram backup ip]', picocolors.red('Firebase Value Store data format invalid'), { json });
}
} catch (e) {
console.error('[telegram backup ip]', picocolors.red('Firebase Value Store error'), e);
}
// Backup IP Source 4: Google App Engine
await Promise.all([
'https://dns-telegram.appspot.com',
'https://dns-telegram.appspot.com/test'
].map(async (url) => {
try {
const text = await (await $$fetch(url)).text();
if (text.length === 344) {
const ips = getTelegramBackupIPFromBase64(text);
ips.forEach(i => backupIPs.add(i.ip));
console.log('[telegram backup ip]', picocolors.green('Google App Engine'), { url, ips });
}
} catch (e) {
console.error('[telegram backup ip]', picocolors.red('Google App Engine error'), { url }, e);
}
}));
// tcdnb.azureedge.net no longer works
console.log('[telegram backup ip]', `Found ${backupIPs.size} backup IPs:`, backupIPs);
ipcidr.push(...Array.from(backupIPs).map(i => i + '/32'));
return { timestamp: date.getTime(), ipcidr, ipcidr6 };
});
if (ipcidr.length + ipcidr6.length === 0) {
throw new Error('Failed to fetch data!');

View File

@@ -0,0 +1,159 @@
// @ts-check
import { task } from './trace';
import { SHARED_DESCRIPTION } from './constants/description';
import { RulesetOutput } from './lib/rules/ruleset';
import { getTelegramBackupIPFromBase64 } from './lib/get-telegram-backup-ip';
import picocolors from 'picocolors';
import { $$fetch } from './lib/fetch-retry';
import dns from 'node:dns/promises';
import { createReadlineInterfaceFromResponse } from './lib/fetch-text-by-line';
import { fastIpVersion } from 'foxts/fast-ip-version';
import { fastStringArrayJoin } from 'foxts/fast-string-array-join';
export const buildTelegramCIDR = task(require.main === module, __filename)(async (span) => {
const { timestamp, ipcidr, ipcidr6 } = await span.traceChildAsync('get telegram cidr', async () => {
const resp = await $$fetch('https://core.telegram.org/resources/cidr.txt');
const lastModified = resp.headers.get('last-modified');
const date = lastModified ? new Date(lastModified) : new Date();
const ipcidr: string[] = [
// Unused secret Telegram backup CIDR, announced by AS62041
'95.161.64.0/20'
];
const ipcidr6: string[] = [];
for await (const cidr of createReadlineInterfaceFromResponse(resp, true)) {
const v = fastIpVersion(cidr);
if (v === 4) {
ipcidr.push(cidr);
} else if (v === 6) {
ipcidr6.push(cidr);
}
}
const backupIPs = new Set<string>();
// https://github.com/tdlib/td/blob/master/td/telegram/ConfigManager.cpp
const resolvers = ['8.8.8.8', '1.0.0.1'].map((ip) => {
const resolver = new dns.Resolver();
resolver.setServers([ip]);
return Object.assign(resolver, { server: ip });
});
// Backup IP Source 1 (DNS)
await Promise.all(resolvers.flatMap((resolver) => [
'apv3.stel.com', // prod
'tapv3.stel.com' // test
].map(async (domain) => {
try {
// tapv3.stel.com was for testing server
const resp = await resolver.resolveTxt(domain);
const strings = resp.map(r => fastStringArrayJoin(r, '')); // flatten
if (strings.length !== 2) {
throw new TypeError(`Unexpected TXT record count: ${strings.length}`);
}
const str = strings[0].length > strings[1].length
? strings[0] + strings[1]
: strings[1] + strings[0];
const ips = getTelegramBackupIPFromBase64(str);
ips.forEach(i => backupIPs.add(i.ip));
console.log('[telegram backup ip]', picocolors.green('DNS TXT'), { domain, ips, server: resolver.server });
} catch (e) {
console.error('[telegram backup ip]', picocolors.red('DNS TXT error'), { domain }, e);
}
})));
// Backup IP Source 2: Firebase Realtime Database (test server not supported)
try {
const text = await (await $$fetch('https://reserve-5a846.firebaseio.com/ipconfigv3.json')).json();
if (typeof text === 'string' && text.length === 344) {
const ips = getTelegramBackupIPFromBase64(text);
ips.forEach(i => backupIPs.add(i.ip));
console.log('[telegram backup ip]', picocolors.green('Firebase Realtime DB'), { ips });
}
} catch (e) {
console.error('[telegram backup ip]', picocolors.red('Firebase Realtime DB error'), e);
// ignore all errors
}
// Backup IP Source 3: Firebase Value Store (test server not supported)
try {
const json = await (await $$fetch('https://firestore.googleapis.com/v1/projects/reserve-5a846/databases/(default)/documents/ipconfig/v3', {
headers: {
Accept: '*/*',
Origin: undefined // Without this line, Google API will return "Bad request: Origin doesn't match Host for XD3.". Probably have something to do with sqlite cache store
}
})).json();
if (
json && typeof json === 'object'
&& 'fields' in json && typeof json.fields === 'object' && json.fields
&& 'data' in json.fields && typeof json.fields.data === 'object' && json.fields.data
&& 'stringValue' in json.fields.data && typeof json.fields.data.stringValue === 'string' && json.fields.data.stringValue.length === 344
) {
const ips = getTelegramBackupIPFromBase64(json.fields.data.stringValue);
ips.forEach(i => backupIPs.add(i.ip));
console.log('[telegram backup ip]', picocolors.green('Firebase Value Store'), { ips });
} else {
console.error('[telegram backup ip]', picocolors.red('Firebase Value Store data format invalid'), { json });
}
} catch (e) {
console.error('[telegram backup ip]', picocolors.red('Firebase Value Store error'), e);
}
// Backup IP Source 4: Google App Engine
await Promise.all([
'https://dns-telegram.appspot.com',
'https://dns-telegram.appspot.com/test'
].map(async (url) => {
try {
const text = await (await $$fetch(url)).text();
if (text.length === 344) {
const ips = getTelegramBackupIPFromBase64(text);
ips.forEach(i => backupIPs.add(i.ip));
console.log('[telegram backup ip]', picocolors.green('Google App Engine'), { url, ips });
}
} catch (e) {
console.error('[telegram backup ip]', picocolors.red('Google App Engine error'), { url }, e);
}
}));
// tcdnb.azureedge.net no longer works
console.log('[telegram backup ip]', `Found ${backupIPs.size} backup IPs:`, backupIPs);
ipcidr.push(...Array.from(backupIPs).map(i => i + '/32'));
return { timestamp: date.getTime(), ipcidr, ipcidr6 };
});
if (ipcidr.length + ipcidr6.length === 0) {
throw new Error('Failed to fetch data!');
}
const description = [
...SHARED_DESCRIPTION,
'Data from:',
' - https://core.telegram.org/resources/cidr.txt'
];
return new RulesetOutput(span, 'telegram', 'ip')
.withTitle('Sukka\'s Ruleset - Telegram IP CIDR')
.withDescription(description)
// .withDate(date) // With extra data source, we no longer use last-modified for file date
.appendDataSource(
'https://core.telegram.org/resources/cidr.txt (last updated: ' + new Date(timestamp).toISOString() + ')'
)
.bulkAddCIDR4NoResolve(ipcidr)
.bulkAddCIDR6NoResolve(ipcidr6)
.write();
});
export const ___ = '';

View File

@@ -6,9 +6,7 @@ import { downloadPreviousBuild } from './download-previous-build';
import { buildCommon } from './build-common';
import { buildRejectIPList } from './build-reject-ip-list';
import { buildAppleCdn } from './build-apple-cdn';
import { buildCdnDownloadConf } from './build-cdn-download-conf';
import { buildRejectDomainSet } from './build-reject-domainset';
import { buildTelegramCIDR } from './build-telegram-cidr';
import { buildChnCidr } from './build-chn-cidr';
import { buildSpeedtestDomainSet } from './build-speedtest-domainset';
import { buildDomesticRuleset } from './build-domestic-direct-lan-ruleset-dns-mapping-module';
@@ -18,11 +16,9 @@ import { buildStreamService } from './build-stream-service';
import { buildRedirectModule } from './build-sgmodule-redirect';
import { buildAlwaysRealIPModule } from './build-sgmodule-always-realip';
import { buildMicrosoftCdn } from './build-microsoft-cdn';
import { createWorker } from './lib/worker';
import { buildPublic } from './build-public';
import { downloadMockAssets } from './download-mock-assets';
import { buildCloudMounterRules } from './build-cloudmounter-rules';
import { printStats, printTraceResult, whyIsNodeRunning } from './trace';
@@ -71,6 +67,22 @@ const buildFinishedLock = path.join(ROOT_DIR, '.BUILD_FINISHED');
fs.unlinkSync(buildFinishedLock);
}
const microsoftCdnWorker = createWorker<typeof import('./build-microsoft-cdn.worker')>(
require.resolve('./build-microsoft-cdn.worker')
)(['buildMicrosoftCdn']);
const cdnDownloadWorker = createWorker<typeof import('./build-cdn-download-conf.worker')>(
require.resolve('./build-cdn-download-conf.worker')
)(['buildCdnDownloadConf']);
const telegramCidrWorker = createWorker<typeof import('./build-telegram-cidr.worker')>(
require.resolve('./build-telegram-cidr.worker')
)(['buildTelegramCIDR']);
const mockAssetsWorker = createWorker<typeof import('./download-mock-assets.worker')>(
require.resolve('./download-mock-assets.worker')
)(['downloadMockAssets']);
try {
// only enable why-is-node-running in GitHub Actions debug mode
if (isCI && process.env.RUNNER_DEBUG === '1') {
@@ -79,14 +91,14 @@ const buildFinishedLock = path.join(ROOT_DIR, '.BUILD_FINISHED');
const downloadPreviousBuildPromise = downloadPreviousBuild();
await Promise.all([
const traces: TraceResult[] = await Promise.all([
downloadPreviousBuildPromise,
downloadPreviousBuildPromise.then(() => buildCommon()),
downloadPreviousBuildPromise.then(() => buildRejectIPList()),
downloadPreviousBuildPromise.then(() => buildAppleCdn()),
downloadPreviousBuildPromise.then(() => buildCdnDownloadConf()),
downloadPreviousBuildPromise.then(() => cdnDownloadWorker.buildCdnDownloadConf()),
downloadPreviousBuildPromise.then(() => buildRejectDomainSet()),
downloadPreviousBuildPromise.then(() => buildTelegramCIDR()),
downloadPreviousBuildPromise.then(() => telegramCidrWorker.buildTelegramCIDR()),
downloadPreviousBuildPromise.then(() => buildChnCidr()),
downloadPreviousBuildPromise.then(() => buildSpeedtestDomainSet()),
downloadPreviousBuildPromise.then(() => buildDomesticRuleset()),
@@ -94,45 +106,29 @@ const buildFinishedLock = path.join(ROOT_DIR, '.BUILD_FINISHED');
downloadPreviousBuildPromise.then(() => buildRedirectModule()),
downloadPreviousBuildPromise.then(() => buildAlwaysRealIPModule()),
downloadPreviousBuildPromise.then(() => buildStreamService()),
downloadPreviousBuildPromise.then(() => buildMicrosoftCdn()),
downloadPreviousBuildPromise.then(() => microsoftCdnWorker.buildMicrosoftCdn()),
downloadPreviousBuildPromise.then(() => buildCloudMounterRules()),
downloadMockAssets()
mockAssetsWorker.downloadMockAssets()
]);
await buildDeprecateFiles();
await buildPublic();
traces.push(
await buildDeprecateFiles(),
await buildPublic()
);
// write a file to demonstrate that the build is finished
fs.writeFileSync(buildFinishedLock, 'BUILD_FINISHED\n');
const traces: TraceResult[] = [];
[
downloadPreviousBuild,
downloadMockAssets,
buildCommon,
buildRejectIPList,
buildAppleCdn,
buildCdnDownloadConf,
buildRejectDomainSet,
buildTelegramCIDR,
buildChnCidr,
buildSpeedtestDomainSet,
buildDomesticRuleset,
buildGlobalRuleset,
buildRedirectModule,
buildAlwaysRealIPModule,
buildStreamService,
buildMicrosoftCdn,
buildCloudMounterRules,
buildPublic,
buildDeprecateFiles
].forEach((fn) => {
const trace = fn.getInternalTraceResult();
printTraceResult(trace);
traces.push(trace);
traces.forEach((t) => {
printTraceResult(t);
});
printStats(traces);
await microsoftCdnWorker.end();
await cdnDownloadWorker.end();
await telegramCidrWorker.end();
await mockAssetsWorker.end();
// Finish the build to avoid leaking timer/fetch ref
await whyIsNodeRunning();
process.exit(0);

View File

@@ -1,219 +1,171 @@
import Worktank from 'worktank';
import picocolors from 'picocolors';
import { parse } from 'tldts-experimental';
import { appendArrayInPlaceCurried } from 'foxts/append-array-in-place';
import { dummySpan, printTraceResult } from '../trace';
import type { Span } from '../trace';
import { dummySpan } from '../trace';
import type { TldTsParsed } from './normalize-domain';
const pool = new Worktank({
pool: {
name: 'process-phishing-domains',
size: 1
},
worker: {
autoAbort: 20000, // The maximum number of milliseconds to wait for the result from the worker, if exceeded the worker is terminated and the execution promise rejects
autoInstantiate: true,
autoTerminate: 30000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed
env: {},
methods: {
// eslint-disable-next-line object-shorthand -- workertank
getPhishingDomains: async function (
importMetaUrl: string,
/** require.main === module */ isDebug = false
): Promise<string[]> {
// TODO: createRequire is a temporary workaround for https://github.com/nodejs/node/issues/51956
const { default: module } = await import('node:module');
const __require = module.createRequire(importMetaUrl);
import { loosTldOptWithPrivateDomains } from '../constants/loose-tldts-opt';
import { BLACK_TLD, WHITELIST_MAIN_DOMAINS, leathalKeywords, lowKeywords, sensitiveKeywords } from '../constants/phishing-score-source';
import { PHISHING_DOMAIN_LISTS_EXTRA, PHISHING_HOSTS_EXTRA } from '../constants/reject-data-source';
const picocolors = __require('picocolors') as typeof import('picocolors');
const tldts = __require('tldts-experimental') as typeof import('tldts-experimental');
import { processHostsWithPreload } from './parse-filter/hosts';
import { processDomainListsWithPreload } from './parse-filter/domainlists';
const { appendArrayInPlaceCurried } = __require('foxts/append-array-in-place') as typeof import('foxts/append-array-in-place');
import process from 'node:process';
const { loosTldOptWithPrivateDomains } = __require('../constants/loose-tldts-opt') as typeof import('../constants/loose-tldts-opt');
const { BLACK_TLD, WHITELIST_MAIN_DOMAINS, leathalKeywords, lowKeywords, sensitiveKeywords } = __require('../constants/phishing-score-source') as typeof import('../constants/phishing-score-source');
const { PHISHING_DOMAIN_LISTS_EXTRA, PHISHING_HOSTS_EXTRA } = __require('../constants/reject-data-source') as typeof import('../constants/reject-data-source');
const { dummySpan } = __require('../trace') as typeof import('../trace');
const NullPrototypeObject = __require('null-prototype-object') as typeof import('null-prototype-object');
const { processHostsWithPreload } = __require('./parse-filter/hosts') as typeof import('./parse-filter/hosts');
const { processDomainListsWithPreload } = __require('./parse-filter/domainlists') as typeof import('./parse-filter/domainlists');
const downloads = [
...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry)),
...PHISHING_HOSTS_EXTRA.map(entry => processHostsWithPreload(...entry))
];
const domainArr: string[] = [];
const domainGroups = await Promise.all(downloads.map(task => task(dummySpan)));
domainGroups.forEach(appendArrayInPlaceCurried(domainArr));
// return domainArr;
const domainCountMap = new Map<string, number>();
const domainScoreMap: Record<string, number> = new NullPrototypeObject();
let line = '';
let tld: string | null = '';
let apexDomain: string | null = '';
let subdomain: string | null = '';
let parsed: TldTsParsed;
// const set = new Set<string>();
// let duplicateCount = 0;
for (let i = 0, len = domainArr.length; i < len; i++) {
line = domainArr[i];
// if (set.has(line)) {
// duplicateCount++;
// } else {
// set.add(line);
// }
parsed = tldts.parse(line, loosTldOptWithPrivateDomains);
if (parsed.isPrivate) {
continue;
}
tld = parsed.publicSuffix;
apexDomain = parsed.domain;
if (!tld) {
console.log(picocolors.yellow('[phishing domains] E0001'), 'missing tld', { line, tld });
continue;
}
if (!apexDomain) {
console.log(picocolors.yellow('[phishing domains] E0002'), 'missing domain', { line, apexDomain });
continue;
}
if (WHITELIST_MAIN_DOMAINS.has(apexDomain)) {
continue;
}
domainCountMap.set(
apexDomain,
domainCountMap.has(apexDomain)
? domainCountMap.get(apexDomain)! + 1
: 1
);
let score = 0;
if (apexDomain in domainScoreMap) {
score = domainScoreMap[apexDomain];
} else {
if (BLACK_TLD.has(tld)) {
score += 3;
} else if (tld.length > 4) {
score += 2;
} else if (tld.length > 5) {
score += 4;
}
if (apexDomain.length >= 18) {
score += 0.5;
}
}
subdomain = parsed.subdomain;
if (subdomain) {
score += calcDomainAbuseScore(subdomain, line);
}
domainScoreMap[apexDomain] = score;
}
domainCountMap.forEach((count, apexDomain) => {
const score = domainScoreMap[apexDomain];
if (
// !WHITELIST_MAIN_DOMAINS.has(apexDomain)
(score >= 24)
|| (score >= 16 && count >= 7)
|| (score >= 13 && count >= 11)
|| (score >= 5 && count >= 14)
|| (score >= 3 && count >= 21)
|| (score >= 1 && count >= 60)
) {
domainArr.push('.' + apexDomain);
}
});
if (isDebug) {
console.log({
v: 1,
score: domainScoreMap['com-ticketry.world'],
count: domainCountMap.get('com-ticketry.world'),
domainArrLen: domainArr.length
});
}
return domainArr;
function calcDomainAbuseScore(subdomain: string, fullDomain: string = subdomain) {
if (leathalKeywords(fullDomain)) {
return 100;
}
let weight = 0;
const hitLowKeywords = lowKeywords(fullDomain);
const sensitiveKeywordsHit = sensitiveKeywords(fullDomain);
if (sensitiveKeywordsHit) {
weight += 15;
if (hitLowKeywords) {
weight += 10;
}
} else if (hitLowKeywords) {
weight += 2;
}
const subdomainLength = subdomain.length;
if (subdomainLength > 6) {
weight += 0.015;
if (subdomainLength > 13) {
weight += 0.2;
if (subdomainLength > 20) {
weight += 1;
if (subdomainLength > 30) {
weight += 5;
if (subdomainLength > 40) {
weight += 10;
}
}
}
if (subdomain.indexOf('.', 1) > 1) {
weight += 1;
}
}
}
return weight;
}
}
}
}
});
export function getPhishingDomains(parentSpan: Span) {
return parentSpan.traceChild('get phishing domains').traceAsyncFn(async (span) => span.traceChildAsync(
export function getPhishingDomains(isDebug = false): Promise<string[]> {
return dummySpan.traceChild('get phishing domains').traceAsyncFn(async (span) => span.traceChildAsync(
'process phishing domain set',
() => pool.exec(
'getPhishingDomains',
[__filename, require.main === module]
).finally(() => pool.terminate())
async () => {
const downloads = [
...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry)),
...PHISHING_HOSTS_EXTRA.map(entry => processHostsWithPreload(...entry))
];
const domainArr: string[] = [];
const domainGroups = await Promise.all(downloads.map(task => task(dummySpan)));
domainGroups.forEach(appendArrayInPlaceCurried(domainArr));
const domainCountMap = new Map<string, number>();
const domainScoreMap: Record<string, number> = Object.create(null) as Record<string, number>;
let line: string;
let tld: string | null;
let apexDomain: string | null;
let subdomain: string | null;
let parsed: TldTsParsed;
for (let i = 0, len = domainArr.length; i < len; i++) {
line = domainArr[i];
parsed = parse(line, loosTldOptWithPrivateDomains);
if (parsed.isPrivate) {
continue;
}
tld = parsed.publicSuffix;
apexDomain = parsed.domain;
if (!tld) {
console.log(picocolors.yellow('[phishing domains] E0001'), 'missing tld', { line, tld });
continue;
}
if (!apexDomain) {
console.log(picocolors.yellow('[phishing domains] E0002'), 'missing domain', { line, apexDomain });
continue;
}
if (WHITELIST_MAIN_DOMAINS.has(apexDomain)) {
continue;
}
domainCountMap.set(
apexDomain,
domainCountMap.has(apexDomain)
? domainCountMap.get(apexDomain)! + 1
: 1
);
let score = 0;
if (apexDomain in domainScoreMap) {
score = domainScoreMap[apexDomain];
} else {
if (BLACK_TLD.has(tld)) {
score += 3;
} else if (tld.length > 4) {
score += 2;
} else if (tld.length > 5) {
score += 4;
}
if (apexDomain.length >= 18) {
score += 0.5;
}
}
subdomain = parsed.subdomain;
if (subdomain) {
score += calcDomainAbuseScore(subdomain, line);
}
domainScoreMap[apexDomain] = score;
}
domainCountMap.forEach((count, apexDomain) => {
const score = domainScoreMap[apexDomain];
if (
(score >= 24)
|| (score >= 16 && count >= 7)
|| (score >= 13 && count >= 11)
|| (score >= 5 && count >= 14)
|| (score >= 3 && count >= 21)
|| (score >= 1 && count >= 60)
) {
domainArr.push('.' + apexDomain);
}
});
if (isDebug) {
console.log({
v: 1,
score: domainScoreMap['com-ticketry.world'],
count: domainCountMap.get('com-ticketry.world'),
domainArrLen: domainArr.length
});
}
return domainArr;
}
));
}
if (require.main === module) {
getPhishingDomains(dummySpan)
.catch(console.error)
.finally(() => {
dummySpan.stop();
printTraceResult(dummySpan.traceResult);
});
function calcDomainAbuseScore(subdomain: string, fullDomain: string = subdomain) {
if (leathalKeywords(fullDomain)) {
return 100;
}
let weight = 0;
const hitLowKeywords = lowKeywords(fullDomain);
const sensitiveKeywordsHit = sensitiveKeywords(fullDomain);
if (sensitiveKeywordsHit) {
weight += 15;
if (hitLowKeywords) {
weight += 10;
}
} else if (hitLowKeywords) {
weight += 2;
}
const subdomainLength = subdomain.length;
if (subdomainLength > 6) {
weight += 0.015;
if (subdomainLength > 13) {
weight += 0.2;
if (subdomainLength > 20) {
weight += 1;
if (subdomainLength > 30) {
weight += 5;
if (subdomainLength > 40) {
weight += 10;
}
}
}
if (subdomain.indexOf('.', 1) > 1) {
weight += 1;
}
}
}
return weight;
}
if (!process.env.JEST_WORKER_ID && require.main === module) {
getPhishingDomains(true).catch(console.error);
}

View File

@@ -7,10 +7,6 @@ import { bigint2ip } from 'fast-cidr-tools';
import { base64ToUint8Array, concatUint8Arrays } from 'foxts/uint8array-utils';
import Worktank from 'worktank';
import { wait } from 'foxts/wait';
import { once } from 'foxts/once';
const mtptoto_public_rsa = `-----BEGIN RSA PUBLIC KEY-----
MIIBCgKCAQEAyr+18Rex2ohtVy8sroGP
BwXD3DOoKCSpjDqYoXgCqB7ioln4eDCFfOBUlfXUEvM/fnKCpF46VkAftlb4VuPD
@@ -112,160 +108,3 @@ export function getTelegramBackupIPFromBase64(base64: string) {
}
}));
}
const pool = new Worktank({
pool: {
name: 'get-telegram-backup-ips',
size: 1 // The number of workers to keep in the pool, if more workers are needed they will be spawned up to this limit
},
worker: {
autoAbort: 10000,
autoTerminate: 30000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed
autoInstantiate: true,
methods: {
// eslint-disable-next-line object-shorthand -- workertank
getTelegramBackupIPs: async function (__filename: string): Promise<{ timestamp: number, ipcidr: string[], ipcidr6: string[] }> {
// TODO: createRequire is a temporary workaround for https://github.com/nodejs/node/issues/51956
const { default: module } = await import('node:module');
const __require = module.createRequire(__filename);
const picocolors = __require('picocolors') as typeof import('picocolors');
const { '~fetch': fetch } = __require('./fetch-retry') as typeof import('./fetch-retry');
const dns = __require('node:dns/promises') as typeof import('node:dns/promises');
const { createReadlineInterfaceFromResponse } = __require('./fetch-text-by-line') as typeof import('./fetch-text-by-line');
const { getTelegramBackupIPFromBase64 } = __require('./get-telegram-backup-ip') as typeof import('./get-telegram-backup-ip');
const { fastIpVersion } = __require('foxts/fast-ip-version') as typeof import('foxts/fast-ip-version');
const { fastStringArrayJoin } = __require('foxts/fast-string-array-join') as typeof import('foxts/fast-string-array-join');
const resp = await fetch('https://core.telegram.org/resources/cidr.txt');
const lastModified = resp.headers.get('last-modified');
const date = lastModified ? new Date(lastModified) : new Date();
const ipcidr: string[] = [
// Unused secret Telegram backup CIDR, announced by AS62041
'95.161.64.0/20'
];
const ipcidr6: string[] = [];
for await (const cidr of createReadlineInterfaceFromResponse(resp, true)) {
const v = fastIpVersion(cidr);
if (v === 4) {
ipcidr.push(cidr);
} else if (v === 6) {
ipcidr6.push(cidr);
}
}
const backupIPs = new Set<string>();
// https://github.com/tdlib/td/blob/master/td/telegram/ConfigManager.cpp
const resolvers = ['8.8.8.8', '1.0.0.1'].map((ip) => {
const resolver = new dns.Resolver();
resolver.setServers([ip]);
return Object.assign(resolver, { server: ip });
});
// Backup IP Source 1 (DNS)
await Promise.all(resolvers.flatMap((resolver) => [
'apv3.stel.com', // prod
'tapv3.stel.com' // test
].map(async (domain) => {
try {
// tapv3.stel.com was for testing server
const resp = await resolver.resolveTxt(domain);
const strings = resp.map(r => fastStringArrayJoin(r, '')); // flatten
if (strings.length !== 2) {
throw new TypeError(`Unexpected TXT record count: ${strings.length}`);
}
const str = strings[0].length > strings[1].length
? strings[0] + strings[1]
: strings[1] + strings[0];
const ips = getTelegramBackupIPFromBase64(str);
ips.forEach(i => backupIPs.add(i.ip));
console.log('[telegram backup ip]', picocolors.green('DNS TXT'), { domain, ips, server: resolver.server });
} catch (e) {
console.error('[telegram backup ip]', picocolors.red('DNS TXT error'), { domain }, e);
}
})));
// Backup IP Source 2: Firebase Realtime Database (test server not supported)
try {
const text = await (await fetch('https://reserve-5a846.firebaseio.com/ipconfigv3.json')).json();
if (typeof text === 'string' && text.length === 344) {
const ips = getTelegramBackupIPFromBase64(text);
ips.forEach(i => backupIPs.add(i.ip));
console.log('[telegram backup ip]', picocolors.green('Firebase Realtime DB'), { ips });
}
} catch (e) {
console.error('[telegram backup ip]', picocolors.red('Firebase Realtime DB error'), e);
// ignore all errors
}
// Backup IP Source 3: Firebase Value Store (test server not supported)
try {
const json = await (await fetch('https://firestore.googleapis.com/v1/projects/reserve-5a846/databases/(default)/documents/ipconfig/v3', {
headers: {
Accept: '*/*',
Origin: undefined // Without this line, Google API will return "Bad request: Origin doesn't match Host for XD3.". Probably have something to do with sqlite cache store
}
})).json();
// const json = await resp.json();
if (
json && typeof json === 'object'
&& 'fields' in json && typeof json.fields === 'object' && json.fields
&& 'data' in json.fields && typeof json.fields.data === 'object' && json.fields.data
&& 'stringValue' in json.fields.data && typeof json.fields.data.stringValue === 'string' && json.fields.data.stringValue.length === 344
) {
const ips = getTelegramBackupIPFromBase64(json.fields.data.stringValue);
ips.forEach(i => backupIPs.add(i.ip));
console.log('[telegram backup ip]', picocolors.green('Firebase Value Store'), { ips });
} else {
console.error('[telegram backup ip]', picocolors.red('Firebase Value Store data format invalid'), { json });
}
} catch (e) {
console.error('[telegram backup ip]', picocolors.red('Firebase Value Store error'), e);
}
// Backup IP Source 4: Google App Engine
await Promise.all([
'https://dns-telegram.appspot.com',
'https://dns-telegram.appspot.com/test'
].map(async (url) => {
try {
const text = await (await fetch(url)).text();
if (text.length === 344) {
const ips = getTelegramBackupIPFromBase64(text);
ips.forEach(i => backupIPs.add(i.ip));
console.log('[telegram backup ip]', picocolors.green('Google App Engine'), { url, ips });
}
} catch (e) {
console.error('[telegram backup ip]', picocolors.red('Google App Engine error'), { url }, e);
}
}));
// tcdnb.azureedge.net no longer works
console.log('[telegram backup ip]', `Found ${backupIPs.size} backup IPs:`, backupIPs);
ipcidr.push(...Array.from(backupIPs).map(i => i + '/32'));
return { timestamp: date.getTime(), ipcidr, ipcidr6 };
}
}
}
});
export const getTelegramCIDRPromise = once(() => wait(0).then(() => pool.exec(
'getTelegramBackupIPs',
[__filename]
)).finally(() => pool.terminate()), false);

28
Build/lib/worker.ts Normal file
View File

@@ -0,0 +1,28 @@
import process from 'node:process';
import type { JestWorkerFarm } from 'jest-worker';
import { Worker as JestWorker } from 'jest-worker';
const sharedWorkerOptions = {
numWorkers: 1,
enableWorkerThreads: true,
forkOptions: {
env: {
...process.env,
NODE_OPTIONS: process.env.NODE_OPTIONS
}
}
} satisfies ConstructorParameters<typeof JestWorker>[1];
export function createWorker<T extends Record<string, unknown>>(workerPath: string) {
return <const K extends ReadonlyArray<keyof T & string>>(exposedMethods: K): JestWorkerFarm<Pick<T, K[number]>> => {
const worker = new JestWorker(workerPath, {
...sharedWorkerOptions,
exposedMethods
}) as JestWorkerFarm<Pick<T, K[number]>>;
worker.getStdout().pipe(process.stdout);
worker.getStderr().pipe(process.stderr);
return worker;
};
}