Perf: attempts to make phishing hosts processing faster

This commit is contained in:
SukkaW 2025-03-27 23:16:25 +08:00
parent 61505bfa1a
commit eac8256e2e
3 changed files with 26 additions and 18 deletions

View File

@ -286,7 +286,11 @@ export const ADGUARD_FILTERS_EXTRA: AdGuardFilterSource[] = [
] ]
], ],
// no coin list adguard list is more maintained than its hosts // no coin list adguard list is more maintained than its hosts
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/nocoin.txt', [], true], [
'https://cdn.jsdelivr.net/gh/hoshsadiq/adblock-nocoin-list@master/nocoin.txt',
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/nocoin.txt'],
true
],
// AdGuard Annoyances filter // AdGuard Annoyances filter
[ [
'https://filters.adtidy.org/extension/ublock/filters/14_optimized.txt', 'https://filters.adtidy.org/extension/ublock/filters/14_optimized.txt',

View File

@ -37,7 +37,7 @@ const pool = new Worktank({
const { BLACK_TLD, WHITELIST_MAIN_DOMAINS, leathalKeywords, lowKeywords, sensitiveKeywords } = __require('../constants/phishing-score-source') as typeof import('../constants/phishing-score-source'); const { BLACK_TLD, WHITELIST_MAIN_DOMAINS, leathalKeywords, lowKeywords, sensitiveKeywords } = __require('../constants/phishing-score-source') as typeof import('../constants/phishing-score-source');
const domainCountMap = new Map<string, number>(); const domainCountMap = new Map<string, number>();
const domainScoreMap: Record<string, number> = {}; const domainScoreMap: Record<string, number> = Object.create(null);
let line = ''; let line = '';
let tld: string | null = ''; let tld: string | null = '';
@ -72,6 +72,9 @@ const pool = new Worktank({
console.log(picocolors.yellow('[phishing domains] E0002'), 'missing domain', { line, apexDomain }); console.log(picocolors.yellow('[phishing domains] E0002'), 'missing domain', { line, apexDomain });
continue; continue;
} }
if (WHITELIST_MAIN_DOMAINS.has(apexDomain)) {
continue;
}
domainCountMap.set( domainCountMap.set(
apexDomain, apexDomain,
@ -80,37 +83,38 @@ const pool = new Worktank({
: 1 : 1
); );
let score = apexDomain in domainScoreMap ? domainScoreMap[apexDomain] : 0;
if (!(apexDomain in domainScoreMap)) { if (!(apexDomain in domainScoreMap)) {
domainScoreMap[apexDomain] = 0;
if (BLACK_TLD.has(tld)) { if (BLACK_TLD.has(tld)) {
domainScoreMap[apexDomain] += 3; score += 3;
} else if (tld.length > 6) { } else if (tld.length > 6) {
domainScoreMap[apexDomain] += 2; score += 2;
} }
if (apexDomain.length >= 18) { if (apexDomain.length >= 18) {
domainScoreMap[apexDomain] += 0.5; score += 0.5;
} }
} }
subdomain = parsed.subdomain; subdomain = parsed.subdomain;
if ( if (subdomain) {
subdomain score += calcDomainAbuseScore(subdomain, line);
&& !WHITELIST_MAIN_DOMAINS.has(apexDomain)
) {
domainScoreMap[apexDomain] += calcDomainAbuseScore(subdomain, line);
} }
domainScoreMap[apexDomain] = score;
} }
domainCountMap.forEach((count, apexDomain) => { domainCountMap.forEach((count, apexDomain) => {
const score = domainScoreMap[apexDomain];
if ( if (
// !WHITELIST_MAIN_DOMAINS.has(apexDomain) // !WHITELIST_MAIN_DOMAINS.has(apexDomain)
(domainScoreMap[apexDomain] >= 24) (score >= 24)
|| (domainScoreMap[apexDomain] >= 16 && count >= 7) || (score >= 16 && count >= 7)
|| (domainScoreMap[apexDomain] >= 13 && count >= 11) || (score >= 13 && count >= 11)
|| (domainScoreMap[apexDomain] >= 5 && count >= 14) || (score >= 5 && count >= 14)
|| (domainScoreMap[apexDomain] >= 3 && count >= 21) || (score >= 3 && count >= 21)
|| (domainScoreMap[apexDomain] >= 1 && count >= 60) || (score >= 1 && count >= 60)
) { ) {
domainArr.push('.' + apexDomain); domainArr.push('.' + apexDomain);
} }

View File

@ -196,7 +196,7 @@ export function parse($line: string, result: [string, ParseType], includeThirdPa
return result; return result;
} }
const filter = NetworkFilter.parse(line); const filter = NetworkFilter.parse(line, false);
if (filter) { if (filter) {
if ( if (
// filter.isCosmeticFilter() // always false // filter.isCosmeticFilter() // always false