Perf: cache phishing hosts process

This commit is contained in:
SukkaW 2024-09-13 22:07:29 +08:00
parent cb10d2d758
commit deadf16ae8
4 changed files with 89 additions and 51 deletions

View File

@ -29,7 +29,8 @@ export interface CacheOptions<S = string> {
interface CacheApplyRawOption { interface CacheApplyRawOption {
ttl?: number | null, ttl?: number | null,
temporaryBypass?: boolean temporaryBypass?: boolean,
incrementTtlWhenHit?: boolean
} }
interface CacheApplyNonRawOption<T, S> extends CacheApplyRawOption { interface CacheApplyNonRawOption<T, S> extends CacheApplyRawOption {
@ -158,6 +159,10 @@ export class Cache<S = string> {
return rv ? (rv.ttl > now ? CacheStatus.Hit : CacheStatus.Stale) : CacheStatus.Miss; return rv ? (rv.ttl > now ? CacheStatus.Hit : CacheStatus.Stale) : CacheStatus.Miss;
} }
private updateTtl(key: string, ttl: number): void {
this.db.prepare(`UPDATE ${this.tableName} SET ttl = ? WHERE key = ?;`).run(Date.now() + ttl, key);
}
del(key: string): void { del(key: string): void {
this.db.prepare(`DELETE FROM ${this.tableName} WHERE key = ?`).run(key); this.db.prepare(`DELETE FROM ${this.tableName} WHERE key = ?`).run(key);
} }
@ -167,7 +172,7 @@ export class Cache<S = string> {
fn: () => Promise<T>, fn: () => Promise<T>,
opt: CacheApplyOption<T, S> opt: CacheApplyOption<T, S>
): Promise<T> { ): Promise<T> {
const { ttl, temporaryBypass } = opt; const { ttl, temporaryBypass, incrementTtlWhenHit } = opt;
if (temporaryBypass) { if (temporaryBypass) {
return fn(); return fn();
@ -193,6 +198,10 @@ export class Cache<S = string> {
console.log(picocolors.green('[cache] hit'), picocolors.gray(key)); console.log(picocolors.green('[cache] hit'), picocolors.gray(key));
if (incrementTtlWhenHit) {
this.updateTtl(key, ttl);
}
const deserializer = 'deserializer' in opt ? opt.deserializer : identity; const deserializer = 'deserializer' in opt ? opt.deserializer : identity;
return deserializer(cached); return deserializer(cached);
} }

View File

@ -7,7 +7,10 @@ import { PHISHING_DOMAIN_LISTS_EXTRA } from '../constants/reject-data-source';
import { loosTldOptWithPrivateDomains } from '../constants/loose-tldts-opt'; import { loosTldOptWithPrivateDomains } from '../constants/loose-tldts-opt';
import picocolors from 'picocolors'; import picocolors from 'picocolors';
import createKeywordFilter from './aho-corasick'; import createKeywordFilter from './aho-corasick';
import { createCacheKey } from './cache-filesystem'; import { createCacheKey, deserializeArray, fsFetchCache, serializeArray } from './cache-filesystem';
import { fastStringArrayJoin } from './misc';
import { sha256 } from 'hash-wasm';
const BLACK_TLD = new Set([ const BLACK_TLD = new Set([
'accountant', 'accountant',
@ -158,10 +161,20 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
return domainArr; return domainArr;
}); });
return span.traceChildAsync(
'process phishing domain set',
() => processPhihsingDomains(domainArr)
);
});
async function processPhihsingDomains(domainArr: string[]) {
const hash = await sha256(fastStringArrayJoin(domainArr, '|'));
return fsFetchCache.apply(
cacheKey('processPhihsingDomains|' + hash),
() => {
const domainCountMap: Record<string, number> = {}; const domainCountMap: Record<string, number> = {};
const domainScoreMap: Record<string, number> = {}; const domainScoreMap: Record<string, number> = {};
span.traceChildSync('process phishing domain set', () => {
for (let i = 0, len = domainArr.length; i < len; i++) { for (let i = 0, len = domainArr.length; i < len; i++) {
const line = domainArr[i]; const line = domainArr[i];
@ -203,7 +216,6 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
domainScoreMap[apexDomain] += calcDomainAbuseScore(subdomain); domainScoreMap[apexDomain] += calcDomainAbuseScore(subdomain);
} }
} }
});
for (const apexDomain in domainCountMap) { for (const apexDomain in domainCountMap) {
if ( if (
@ -215,8 +227,16 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
} }
} }
return domainArr; return Promise.resolve(domainArr);
}); },
{
ttl: 2 * 86400,
serializer: serializeArray,
deserializer: deserializeArray,
incrementTtlWhenHit: true
}
);
}
export function calcDomainAbuseScore(subdomain: string) { export function calcDomainAbuseScore(subdomain: string) {
let weight = 0; let weight = 0;

View File

@ -30,6 +30,7 @@
"fast-cidr-tools": "^0.2.5", "fast-cidr-tools": "^0.2.5",
"fdir": "^6.3.0", "fdir": "^6.3.0",
"foxact": "^0.2.38", "foxact": "^0.2.38",
"hash-wasm": "^4.11.0",
"json-stringify-pretty-compact": "^3.0.0", "json-stringify-pretty-compact": "^3.0.0",
"mnemonist": "^0.39.8", "mnemonist": "^0.39.8",
"picocolors": "^1.1.0", "picocolors": "^1.1.0",

8
pnpm-lock.yaml generated
View File

@ -41,6 +41,9 @@ importers:
foxact: foxact:
specifier: ^0.2.38 specifier: ^0.2.38
version: 0.2.38 version: 0.2.38
hash-wasm:
specifier: ^4.11.0
version: 4.11.0
json-stringify-pretty-compact: json-stringify-pretty-compact:
specifier: ^3.0.0 specifier: ^3.0.0
version: 3.0.0 version: 3.0.0
@ -996,6 +999,9 @@ packages:
resolution: {integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==} resolution: {integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==}
engines: {node: '>=8'} engines: {node: '>=8'}
hash-wasm@4.11.0:
resolution: {integrity: sha512-HVusNXlVqHe0fzIzdQOGolnFN6mX/fqcrSAOcTBXdvzrXVHwTz11vXeKRmkR5gTuwVpvHZEIyKoePDvuAR+XwQ==}
hasown@2.0.2: hasown@2.0.2:
resolution: {integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==} resolution: {integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==}
engines: {node: '>= 0.4'} engines: {node: '>= 0.4'}
@ -2519,6 +2525,8 @@ snapshots:
has-flag@4.0.0: {} has-flag@4.0.0: {}
hash-wasm@4.11.0: {}
hasown@2.0.2: hasown@2.0.2:
dependencies: dependencies:
function-bind: 1.1.2 function-bind: 1.1.2