mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-13 01:30:37 +08:00
Perf: cache phishing hosts process
This commit is contained in:
parent
cb10d2d758
commit
deadf16ae8
@ -29,7 +29,8 @@ export interface CacheOptions<S = string> {
|
||||
|
||||
interface CacheApplyRawOption {
|
||||
ttl?: number | null,
|
||||
temporaryBypass?: boolean
|
||||
temporaryBypass?: boolean,
|
||||
incrementTtlWhenHit?: boolean
|
||||
}
|
||||
|
||||
interface CacheApplyNonRawOption<T, S> extends CacheApplyRawOption {
|
||||
@ -158,6 +159,10 @@ export class Cache<S = string> {
|
||||
return rv ? (rv.ttl > now ? CacheStatus.Hit : CacheStatus.Stale) : CacheStatus.Miss;
|
||||
}
|
||||
|
||||
private updateTtl(key: string, ttl: number): void {
|
||||
this.db.prepare(`UPDATE ${this.tableName} SET ttl = ? WHERE key = ?;`).run(Date.now() + ttl, key);
|
||||
}
|
||||
|
||||
del(key: string): void {
|
||||
this.db.prepare(`DELETE FROM ${this.tableName} WHERE key = ?`).run(key);
|
||||
}
|
||||
@ -167,7 +172,7 @@ export class Cache<S = string> {
|
||||
fn: () => Promise<T>,
|
||||
opt: CacheApplyOption<T, S>
|
||||
): Promise<T> {
|
||||
const { ttl, temporaryBypass } = opt;
|
||||
const { ttl, temporaryBypass, incrementTtlWhenHit } = opt;
|
||||
|
||||
if (temporaryBypass) {
|
||||
return fn();
|
||||
@ -193,6 +198,10 @@ export class Cache<S = string> {
|
||||
|
||||
console.log(picocolors.green('[cache] hit'), picocolors.gray(key));
|
||||
|
||||
if (incrementTtlWhenHit) {
|
||||
this.updateTtl(key, ttl);
|
||||
}
|
||||
|
||||
const deserializer = 'deserializer' in opt ? opt.deserializer : identity;
|
||||
return deserializer(cached);
|
||||
}
|
||||
|
||||
@ -7,7 +7,10 @@ import { PHISHING_DOMAIN_LISTS_EXTRA } from '../constants/reject-data-source';
|
||||
import { loosTldOptWithPrivateDomains } from '../constants/loose-tldts-opt';
|
||||
import picocolors from 'picocolors';
|
||||
import createKeywordFilter from './aho-corasick';
|
||||
import { createCacheKey } from './cache-filesystem';
|
||||
import { createCacheKey, deserializeArray, fsFetchCache, serializeArray } from './cache-filesystem';
|
||||
import { fastStringArrayJoin } from './misc';
|
||||
|
||||
import { sha256 } from 'hash-wasm';
|
||||
|
||||
const BLACK_TLD = new Set([
|
||||
'accountant',
|
||||
@ -158,65 +161,82 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
||||
return domainArr;
|
||||
});
|
||||
|
||||
const domainCountMap: Record<string, number> = {};
|
||||
const domainScoreMap: Record<string, number> = {};
|
||||
return span.traceChildAsync(
|
||||
'process phishing domain set',
|
||||
() => processPhihsingDomains(domainArr)
|
||||
);
|
||||
});
|
||||
|
||||
span.traceChildSync('process phishing domain set', () => {
|
||||
for (let i = 0, len = domainArr.length; i < len; i++) {
|
||||
const line = domainArr[i];
|
||||
async function processPhihsingDomains(domainArr: string[]) {
|
||||
const hash = await sha256(fastStringArrayJoin(domainArr, '|'));
|
||||
return fsFetchCache.apply(
|
||||
cacheKey('processPhihsingDomains|' + hash),
|
||||
() => {
|
||||
const domainCountMap: Record<string, number> = {};
|
||||
const domainScoreMap: Record<string, number> = {};
|
||||
|
||||
const {
|
||||
publicSuffix: tld,
|
||||
domain: apexDomain,
|
||||
subdomain,
|
||||
isPrivate
|
||||
} = tldts.parse(line, loosTldOptWithPrivateDomains);
|
||||
for (let i = 0, len = domainArr.length; i < len; i++) {
|
||||
const line = domainArr[i];
|
||||
|
||||
if (isPrivate) {
|
||||
continue;
|
||||
}
|
||||
const {
|
||||
publicSuffix: tld,
|
||||
domain: apexDomain,
|
||||
subdomain,
|
||||
isPrivate
|
||||
} = tldts.parse(line, loosTldOptWithPrivateDomains);
|
||||
|
||||
if (!tld) {
|
||||
console.log(picocolors.yellow('[phishing domains] E0001'), 'missing tld', { line, tld });
|
||||
continue;
|
||||
}
|
||||
if (!apexDomain) {
|
||||
console.log(picocolors.yellow('[phishing domains] E0002'), 'missing domain', { line, apexDomain });
|
||||
continue;
|
||||
}
|
||||
if (isPrivate) {
|
||||
continue;
|
||||
}
|
||||
|
||||
domainCountMap[apexDomain] ||= 0;
|
||||
domainCountMap[apexDomain] += 1;
|
||||
if (!tld) {
|
||||
console.log(picocolors.yellow('[phishing domains] E0001'), 'missing tld', { line, tld });
|
||||
continue;
|
||||
}
|
||||
if (!apexDomain) {
|
||||
console.log(picocolors.yellow('[phishing domains] E0002'), 'missing domain', { line, apexDomain });
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!(apexDomain in domainScoreMap)) {
|
||||
domainScoreMap[apexDomain] = 0;
|
||||
if (BLACK_TLD.has(tld)) {
|
||||
domainScoreMap[apexDomain] += 4;
|
||||
} else if (tld.length > 6) {
|
||||
domainScoreMap[apexDomain] += 2;
|
||||
domainCountMap[apexDomain] ||= 0;
|
||||
domainCountMap[apexDomain] += 1;
|
||||
|
||||
if (!(apexDomain in domainScoreMap)) {
|
||||
domainScoreMap[apexDomain] = 0;
|
||||
if (BLACK_TLD.has(tld)) {
|
||||
domainScoreMap[apexDomain] += 4;
|
||||
} else if (tld.length > 6) {
|
||||
domainScoreMap[apexDomain] += 2;
|
||||
}
|
||||
}
|
||||
if (
|
||||
subdomain
|
||||
&& !WHITELIST_MAIN_DOMAINS.has(apexDomain)
|
||||
) {
|
||||
domainScoreMap[apexDomain] += calcDomainAbuseScore(subdomain);
|
||||
}
|
||||
}
|
||||
if (
|
||||
subdomain
|
||||
&& !WHITELIST_MAIN_DOMAINS.has(apexDomain)
|
||||
) {
|
||||
domainScoreMap[apexDomain] += calcDomainAbuseScore(subdomain);
|
||||
|
||||
for (const apexDomain in domainCountMap) {
|
||||
if (
|
||||
// !WHITELIST_MAIN_DOMAINS.has(apexDomain)
|
||||
domainScoreMap[apexDomain] >= 12
|
||||
|| (domainScoreMap[apexDomain] >= 5 && domainCountMap[apexDomain] >= 4)
|
||||
) {
|
||||
domainArr.push(`.${apexDomain}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
for (const apexDomain in domainCountMap) {
|
||||
if (
|
||||
// !WHITELIST_MAIN_DOMAINS.has(apexDomain)
|
||||
domainScoreMap[apexDomain] >= 12
|
||||
|| (domainScoreMap[apexDomain] >= 5 && domainCountMap[apexDomain] >= 4)
|
||||
) {
|
||||
domainArr.push(`.${apexDomain}`);
|
||||
return Promise.resolve(domainArr);
|
||||
},
|
||||
{
|
||||
ttl: 2 * 86400,
|
||||
serializer: serializeArray,
|
||||
deserializer: deserializeArray,
|
||||
incrementTtlWhenHit: true
|
||||
}
|
||||
}
|
||||
|
||||
return domainArr;
|
||||
});
|
||||
);
|
||||
}
|
||||
|
||||
export function calcDomainAbuseScore(subdomain: string) {
|
||||
let weight = 0;
|
||||
|
||||
@ -30,6 +30,7 @@
|
||||
"fast-cidr-tools": "^0.2.5",
|
||||
"fdir": "^6.3.0",
|
||||
"foxact": "^0.2.38",
|
||||
"hash-wasm": "^4.11.0",
|
||||
"json-stringify-pretty-compact": "^3.0.0",
|
||||
"mnemonist": "^0.39.8",
|
||||
"picocolors": "^1.1.0",
|
||||
|
||||
8
pnpm-lock.yaml
generated
8
pnpm-lock.yaml
generated
@ -41,6 +41,9 @@ importers:
|
||||
foxact:
|
||||
specifier: ^0.2.38
|
||||
version: 0.2.38
|
||||
hash-wasm:
|
||||
specifier: ^4.11.0
|
||||
version: 4.11.0
|
||||
json-stringify-pretty-compact:
|
||||
specifier: ^3.0.0
|
||||
version: 3.0.0
|
||||
@ -996,6 +999,9 @@ packages:
|
||||
resolution: {integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==}
|
||||
engines: {node: '>=8'}
|
||||
|
||||
hash-wasm@4.11.0:
|
||||
resolution: {integrity: sha512-HVusNXlVqHe0fzIzdQOGolnFN6mX/fqcrSAOcTBXdvzrXVHwTz11vXeKRmkR5gTuwVpvHZEIyKoePDvuAR+XwQ==}
|
||||
|
||||
hasown@2.0.2:
|
||||
resolution: {integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==}
|
||||
engines: {node: '>= 0.4'}
|
||||
@ -2519,6 +2525,8 @@ snapshots:
|
||||
|
||||
has-flag@4.0.0: {}
|
||||
|
||||
hash-wasm@4.11.0: {}
|
||||
|
||||
hasown@2.0.2:
|
||||
dependencies:
|
||||
function-bind: 1.1.2
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user