Housekeeping [skip ci]

This commit is contained in:
SukkaW 2025-07-27 21:02:35 +08:00
parent df7af76375
commit 3d1514f0d1
5 changed files with 501 additions and 472 deletions

View File

@ -5,41 +5,44 @@ import { RulesetOutput } from './lib/rules/ruleset';
import Worktank from 'worktank';
const pool = new Worktank({
name: 'build-internal-reverse-chn-cidr',
size: 1,
timeout: 10000, // The maximum number of milliseconds to wait for the result from the worker, if exceeded the worker is terminated and the execution promise rejects
warmup: true,
autoterminate: 30000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed
env: {},
methods: {
pool: {
name: 'build-internal-reverse-chn-cidr',
size: 1 // The number of workers to keep in the pool, if more workers are needed they will be spawned up to this limit
},
worker: {
autoAbort: 10000,
autoTerminate: 30000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed
autoInstantiate: true,
methods: {
// eslint-disable-next-line object-shorthand -- workertank
getMicrosoftCdnRuleset: async function (importMetaUrl: string): Promise<[domains: string[], domainSuffixes: string[]]> {
getMicrosoftCdnRuleset: async function (importMetaUrl: string): Promise<[domains: string[], domainSuffixes: string[]]> {
// TODO: createRequire is a temporary workaround for https://github.com/nodejs/node/issues/51956
const { default: module } = await import('node:module');
const __require = module.createRequire(importMetaUrl);
const { default: module } = await import('node:module');
const __require = module.createRequire(importMetaUrl);
const { HostnameSmolTrie } = __require('./lib/trie');
const { PROBE_DOMAINS, DOMAINS, DOMAIN_SUFFIXES, BLACKLIST } = __require('./constants/microsoft-cdn') as typeof import('./constants/microsoft-cdn');
const { fetchRemoteTextByLine } = __require('./lib/fetch-text-by-line') as typeof import('./lib/fetch-text-by-line');
const { appendArrayInPlace } = __require('foxts/append-array-in-place') as typeof import('foxts/append-array-in-place');
const { extractDomainsFromFelixDnsmasq } = __require('./lib/parse-dnsmasq') as typeof import('./lib/parse-dnsmasq');
const { HostnameSmolTrie } = __require('./lib/trie');
const { PROBE_DOMAINS, DOMAINS, DOMAIN_SUFFIXES, BLACKLIST } = __require('./constants/microsoft-cdn') as typeof import('./constants/microsoft-cdn');
const { fetchRemoteTextByLine } = __require('./lib/fetch-text-by-line') as typeof import('./lib/fetch-text-by-line');
const { appendArrayInPlace } = __require('foxts/append-array-in-place') as typeof import('foxts/append-array-in-place');
const { extractDomainsFromFelixDnsmasq } = __require('./lib/parse-dnsmasq') as typeof import('./lib/parse-dnsmasq');
const trie = new HostnameSmolTrie();
const trie = new HostnameSmolTrie();
for await (const line of await fetchRemoteTextByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) {
const domain = extractDomainsFromFelixDnsmasq(line);
if (domain) {
trie.add(domain);
for await (const line of await fetchRemoteTextByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) {
const domain = extractDomainsFromFelixDnsmasq(line);
if (domain) {
trie.add(domain);
}
}
// remove blacklist domain from trie, to prevent them from being included in the later dump
BLACKLIST.forEach(black => trie.whitelist(black));
const domains: string[] = DOMAINS;
const domainSuffixes = appendArrayInPlace(PROBE_DOMAINS.flatMap(domain => trie.find(domain)), DOMAIN_SUFFIXES);
return [domains, domainSuffixes] as const;
}
// remove blacklist domain from trie, to prevent them from being included in the later dump
BLACKLIST.forEach(black => trie.whitelist(black));
const domains: string[] = DOMAINS;
const domainSuffixes = appendArrayInPlace(PROBE_DOMAINS.flatMap(domain => trie.find(domain)), DOMAIN_SUFFIXES);
return [domains, domainSuffixes] as const;
}
}
});

View File

@ -2,55 +2,52 @@ import Worktank from 'worktank';
import os from 'node:os';
import process from 'node:process';
import type { Span } from '../trace';
import { availableParallelism } from 'foxts/available-parallelism';
const pool = new Worktank({
name: 'process-phishing-domains',
size: Math.max(
1,
(
'availableParallelism' in os
? os.availableParallelism()
: (os as typeof import('node:os')).cpus().length
) - 1
),
timeout: 10000, // The maximum number of milliseconds to wait for the result from the worker, if exceeded the worker is terminated and the execution promise rejects
warmup: true,
autoterminate: 30000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed
env: {},
methods: {
pool: {
name: 'process-phishing-domains',
size: (availableParallelism(os) - 1) || 1
},
worker: {
autoAbort: 10000, // The maximum number of milliseconds to wait for the result from the worker, if exceeded the worker is terminated and the execution promise rejects
autoTerminate: 30000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed
env: {},
methods: {
// eslint-disable-next-line object-shorthand -- workertank
compareAndWriteFile: async function (
linesA: string[], filePath: string,
importMetaUrl: string
): Promise<void> {
const { default: module } = await import('node:module');
const __require = module.createRequire(importMetaUrl);
compareAndWriteFile: async function (
linesA: string[], filePath: string,
importMetaUrl: string
): Promise<void> {
const { default: module } = await import('node:module');
const __require = module.createRequire(importMetaUrl);
const fs = __require('fs') as typeof import('fs');
const { readFileByLine } = __require('./fetch-text-by-line') as typeof import('./fetch-text-by-line');
const { fileEqual } = __require('./create-file') as typeof import('./create-file');
const path = __require('node:path') as typeof import('node:path');
const { fastStringArrayJoin } = __require('foxts/fast-string-array-join') as typeof import('foxts/fast-string-array-join');
const picocolors = __require('picocolors') as typeof import('picocolors');
const fs = __require('fs') as typeof import('fs');
const { readFileByLine } = __require('./fetch-text-by-line') as typeof import('./fetch-text-by-line');
const { fileEqual } = __require('./create-file') as typeof import('./create-file');
const path = __require('node:path') as typeof import('node:path');
const { fastStringArrayJoin } = __require('foxts/fast-string-array-join') as typeof import('foxts/fast-string-array-join');
const picocolors = __require('picocolors') as typeof import('picocolors');
let isEqual = false;
if (fs.existsSync(filePath)) {
isEqual = await fileEqual(linesA, readFileByLine(filePath));
} else {
console.log(`${filePath} does not exists, writing...`);
let isEqual = false;
if (fs.existsSync(filePath)) {
isEqual = await fileEqual(linesA, readFileByLine(filePath));
} else {
console.log(`${filePath} does not exists, writing...`);
// isEqual = false; // isEqual is false by default anyway
}
}
if (isEqual) {
console.log(picocolors.gray(picocolors.dim(`same content, bail out writing: ${filePath}`)));
return;
}
if (isEqual) {
console.log(picocolors.gray(picocolors.dim(`same content, bail out writing: ${filePath}`)));
return;
}
const dir = path.dirname(filePath);
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
const dir = path.dirname(filePath);
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
fs.writeFileSync(filePath, fastStringArrayJoin(linesA, '\n') + '\n', { encoding: 'utf-8' });
}
fs.writeFileSync(filePath, fastStringArrayJoin(linesA, '\n') + '\n', { encoding: 'utf-8' });
}
}
});

View File

@ -5,189 +5,193 @@ import type { Span } from '../trace';
import type { TldTsParsed } from './normalize-domain';
const pool = new Worktank({
name: 'process-phishing-domains',
size: 1,
timeout: 20000, // The maximum number of milliseconds to wait for the result from the worker, if exceeded the worker is terminated and the execution promise rejects
warmup: true,
autoterminate: 30000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed
env: {},
methods: {
// eslint-disable-next-line object-shorthand -- workertank
getPhishingDomains: async function (
importMetaUrl: string,
/** require.main === module */ isDebug = false
): Promise<string[]> {
pool: {
name: 'process-phishing-domains',
size: 1
},
worker: {
autoAbort: 20000, // The maximum number of milliseconds to wait for the result from the worker, if exceeded the worker is terminated and the execution promise rejects
autoInstantiate: true,
autoTerminate: 30000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed
env: {},
methods: {
// eslint-disable-next-line object-shorthand -- workertank
getPhishingDomains: async function (
importMetaUrl: string,
/** require.main === module */ isDebug = false
): Promise<string[]> {
// TODO: createRequire is a temporary workaround for https://github.com/nodejs/node/issues/51956
const { default: module } = await import('node:module');
const __require = module.createRequire(importMetaUrl);
const { default: module } = await import('node:module');
const __require = module.createRequire(importMetaUrl);
const picocolors = __require('picocolors') as typeof import('picocolors');
const tldts = __require('tldts-experimental') as typeof import('tldts-experimental');
const picocolors = __require('picocolors') as typeof import('picocolors');
const tldts = __require('tldts-experimental') as typeof import('tldts-experimental');
const { appendArrayInPlaceCurried } = __require('foxts/append-array-in-place') as typeof import('foxts/append-array-in-place');
const { appendArrayInPlaceCurried } = __require('foxts/append-array-in-place') as typeof import('foxts/append-array-in-place');
const { loosTldOptWithPrivateDomains } = __require('../constants/loose-tldts-opt') as typeof import('../constants/loose-tldts-opt');
const { BLACK_TLD, WHITELIST_MAIN_DOMAINS, leathalKeywords, lowKeywords, sensitiveKeywords } = __require('../constants/phishing-score-source') as typeof import('../constants/phishing-score-source');
const { PHISHING_DOMAIN_LISTS_EXTRA, PHISHING_HOSTS_EXTRA } = __require('../constants/reject-data-source') as typeof import('../constants/reject-data-source');
const { dummySpan } = __require('../trace') as typeof import('../trace');
const NullPrototypeObject = __require('null-prototype-object') as typeof import('null-prototype-object');
const { loosTldOptWithPrivateDomains } = __require('../constants/loose-tldts-opt') as typeof import('../constants/loose-tldts-opt');
const { BLACK_TLD, WHITELIST_MAIN_DOMAINS, leathalKeywords, lowKeywords, sensitiveKeywords } = __require('../constants/phishing-score-source') as typeof import('../constants/phishing-score-source');
const { PHISHING_DOMAIN_LISTS_EXTRA, PHISHING_HOSTS_EXTRA } = __require('../constants/reject-data-source') as typeof import('../constants/reject-data-source');
const { dummySpan } = __require('../trace') as typeof import('../trace');
const NullPrototypeObject = __require('null-prototype-object') as typeof import('null-prototype-object');
const { processHostsWithPreload } = __require('./parse-filter/hosts') as typeof import('./parse-filter/hosts');
const { processDomainListsWithPreload } = __require('./parse-filter/domainlists') as typeof import('./parse-filter/domainlists');
const { processHostsWithPreload } = __require('./parse-filter/hosts') as typeof import('./parse-filter/hosts');
const { processDomainListsWithPreload } = __require('./parse-filter/domainlists') as typeof import('./parse-filter/domainlists');
const downloads = [
...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry)),
...PHISHING_HOSTS_EXTRA.map(entry => processHostsWithPreload(...entry))
];
const downloads = [
...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry)),
...PHISHING_HOSTS_EXTRA.map(entry => processHostsWithPreload(...entry))
];
const domainArr: string[] = [];
const domainArr: string[] = [];
const domainGroups = await Promise.all(downloads.map(task => task(dummySpan)));
domainGroups.forEach(appendArrayInPlaceCurried(domainArr));
const domainGroups = await Promise.all(downloads.map(task => task(dummySpan)));
domainGroups.forEach(appendArrayInPlaceCurried(domainArr));
// return domainArr;
// return domainArr;
const domainCountMap = new Map<string, number>();
const domainScoreMap: Record<string, number> = new NullPrototypeObject();
const domainCountMap = new Map<string, number>();
const domainScoreMap: Record<string, number> = new NullPrototypeObject();
let line = '';
let tld: string | null = '';
let apexDomain: string | null = '';
let subdomain: string | null = '';
let parsed: TldTsParsed;
let line = '';
let tld: string | null = '';
let apexDomain: string | null = '';
let subdomain: string | null = '';
let parsed: TldTsParsed;
// const set = new Set<string>();
// let duplicateCount = 0;
// const set = new Set<string>();
// let duplicateCount = 0;
for (let i = 0, len = domainArr.length; i < len; i++) {
line = domainArr[i];
for (let i = 0, len = domainArr.length; i < len; i++) {
line = domainArr[i];
// if (set.has(line)) {
// duplicateCount++;
// } else {
// set.add(line);
// }
// if (set.has(line)) {
// duplicateCount++;
// } else {
// set.add(line);
// }
parsed = tldts.parse(line, loosTldOptWithPrivateDomains);
if (parsed.isPrivate) {
continue;
}
tld = parsed.publicSuffix;
apexDomain = parsed.domain;
if (!tld) {
console.log(picocolors.yellow('[phishing domains] E0001'), 'missing tld', { line, tld });
continue;
}
if (!apexDomain) {
console.log(picocolors.yellow('[phishing domains] E0002'), 'missing domain', { line, apexDomain });
continue;
}
if (WHITELIST_MAIN_DOMAINS.has(apexDomain)) {
continue;
}
domainCountMap.set(
apexDomain,
domainCountMap.has(apexDomain)
? domainCountMap.get(apexDomain)! + 1
: 1
);
let score = 0;
if (apexDomain in domainScoreMap) {
score = domainScoreMap[apexDomain];
} else {
if (BLACK_TLD.has(tld)) {
score += 3;
} else if (tld.length > 6) {
score += 2;
parsed = tldts.parse(line, loosTldOptWithPrivateDomains);
if (parsed.isPrivate) {
continue;
}
if (apexDomain.length >= 18) {
score += 0.5;
tld = parsed.publicSuffix;
apexDomain = parsed.domain;
if (!tld) {
console.log(picocolors.yellow('[phishing domains] E0001'), 'missing tld', { line, tld });
continue;
}
if (!apexDomain) {
console.log(picocolors.yellow('[phishing domains] E0002'), 'missing domain', { line, apexDomain });
continue;
}
if (WHITELIST_MAIN_DOMAINS.has(apexDomain)) {
continue;
}
domainCountMap.set(
apexDomain,
domainCountMap.has(apexDomain)
? domainCountMap.get(apexDomain)! + 1
: 1
);
let score = 0;
if (apexDomain in domainScoreMap) {
score = domainScoreMap[apexDomain];
} else {
if (BLACK_TLD.has(tld)) {
score += 3;
} else if (tld.length > 6) {
score += 2;
}
if (apexDomain.length >= 18) {
score += 0.5;
}
}
subdomain = parsed.subdomain;
if (subdomain) {
score += calcDomainAbuseScore(subdomain, line);
}
domainScoreMap[apexDomain] = score;
}
subdomain = parsed.subdomain;
if (subdomain) {
score += calcDomainAbuseScore(subdomain, line);
}
domainScoreMap[apexDomain] = score;
}
domainCountMap.forEach((count, apexDomain) => {
const score = domainScoreMap[apexDomain];
if (
// !WHITELIST_MAIN_DOMAINS.has(apexDomain)
(score >= 24)
|| (score >= 16 && count >= 7)
|| (score >= 13 && count >= 11)
|| (score >= 5 && count >= 14)
|| (score >= 3 && count >= 21)
|| (score >= 1 && count >= 60)
) {
domainArr.push('.' + apexDomain);
}
});
if (isDebug) {
console.log({
v: 1,
score: domainScoreMap['com-ticketry.world'],
count: domainCountMap.get('com-ticketry.world'),
domainArrLen: domainArr.length
domainCountMap.forEach((count, apexDomain) => {
const score = domainScoreMap[apexDomain];
if (
// !WHITELIST_MAIN_DOMAINS.has(apexDomain)
(score >= 24)
|| (score >= 16 && count >= 7)
|| (score >= 13 && count >= 11)
|| (score >= 5 && count >= 14)
|| (score >= 3 && count >= 21)
|| (score >= 1 && count >= 60)
) {
domainArr.push('.' + apexDomain);
}
});
}
return domainArr;
function calcDomainAbuseScore(subdomain: string, fullDomain: string = subdomain) {
if (leathalKeywords(fullDomain)) {
return 100;
if (isDebug) {
console.log({
v: 1,
score: domainScoreMap['com-ticketry.world'],
count: domainCountMap.get('com-ticketry.world'),
domainArrLen: domainArr.length
});
}
let weight = 0;
return domainArr;
const hitLowKeywords = lowKeywords(fullDomain);
const sensitiveKeywordsHit = sensitiveKeywords(fullDomain);
if (sensitiveKeywordsHit) {
weight += 15;
if (hitLowKeywords) {
weight += 10;
function calcDomainAbuseScore(subdomain: string, fullDomain: string = subdomain) {
if (leathalKeywords(fullDomain)) {
return 100;
}
} else if (hitLowKeywords) {
weight += 2;
}
const subdomainLength = subdomain.length;
let weight = 0;
if (subdomainLength > 6) {
weight += 0.015;
const hitLowKeywords = lowKeywords(fullDomain);
const sensitiveKeywordsHit = sensitiveKeywords(fullDomain);
if (subdomainLength > 13) {
weight += 0.2;
if (subdomainLength > 20) {
weight += 1;
if (subdomainLength > 30) {
weight += 5;
if (subdomainLength > 40) {
weight += 10;
if (sensitiveKeywordsHit) {
weight += 15;
if (hitLowKeywords) {
weight += 10;
}
} else if (hitLowKeywords) {
weight += 2;
}
const subdomainLength = subdomain.length;
if (subdomainLength > 6) {
weight += 0.015;
if (subdomainLength > 13) {
weight += 0.2;
if (subdomainLength > 20) {
weight += 1;
if (subdomainLength > 30) {
weight += 5;
if (subdomainLength > 40) {
weight += 10;
}
}
}
}
if (subdomain.indexOf('.', 1) > 1) {
weight += 1;
if (subdomain.indexOf('.', 1) > 1) {
weight += 1;
}
}
}
}
return weight;
return weight;
}
}
}
}

View File

@ -20,20 +20,20 @@
"author": "",
"license": "ISC",
"dependencies": {
"@ghostery/adblocker": "^2.11.2",
"@ghostery/adblocker": "^2.11.3",
"@henrygd/queue": "^1.0.7",
"@mitata/counters": "^0.0.8",
"async-retry": "^1.3.3",
"better-sqlite3": "^12.2.0",
"ci-info": "^4.3.0",
"cli-progress": "^3.12.0",
"csv-parse": "^5.6.0",
"csv-parse": "^6.1.0",
"dns2": "^2.1.0",
"fast-cidr-tools": "^0.3.2",
"fast-fifo": "^1.3.2",
"fast-uri": "^3.0.6",
"fdir": "^6.4.6",
"foxts": "^3.9.0",
"foxts": "^3.10.0",
"hash-wasm": "^4.12.0",
"json-stringify-pretty-compact": "3.0.0",
"null-prototype-object": "^1.2.0",
@ -46,7 +46,7 @@
"undici-cache-store-better-sqlite3": "^1.0.0",
"whoiser": "^1.18.0",
"why-is-node-running": "^3.2.2",
"worktank": "^2.7.3",
"worktank": "^3.0.2",
"xbits": "^0.2.0",
"yaml": "^2.8.0",
"yauzl-promise": "^4.0.0"
@ -54,21 +54,21 @@
"devDependencies": {
"@eslint-sukka/node": "^6.22.1",
"@swc-node/register": "^1.10.10",
"@swc/core": "^1.13.1",
"@swc/core": "^1.13.2",
"@types/async-retry": "^1.4.9",
"@types/better-sqlite3": "^7.6.13",
"@types/cli-progress": "^3.11.6",
"@types/dns2": "^2.0.9",
"@types/fast-fifo": "^1.3.0",
"@types/mocha": "^10.0.10",
"@types/node": "^24.0.15",
"@types/node": "^24.1.0",
"@types/punycode": "^2.1.4",
"@types/tar-fs": "^2.0.4",
"@types/yauzl-promise": "^4.0.1",
"eslint": "^9.31.0",
"eslint": "^9.32.0",
"eslint-config-sukka": "^6.22.1",
"eslint-formatter-sukka": "^6.22.1",
"expect": "^30.0.4",
"expect": "^30.0.5",
"mitata": "^1.0.34",
"mocha": "^11.7.1",
"tinyexec": "^1.0.1",

519
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff