diff --git a/Build/lib/cache-filesystem.ts b/Build/lib/cache-filesystem.ts index 26c9de72..c17dde31 100644 --- a/Build/lib/cache-filesystem.ts +++ b/Build/lib/cache-filesystem.ts @@ -29,7 +29,8 @@ export interface CacheOptions { interface CacheApplyRawOption { ttl?: number | null, - temporaryBypass?: boolean + temporaryBypass?: boolean, + incrementTtlWhenHit?: boolean } interface CacheApplyNonRawOption extends CacheApplyRawOption { @@ -158,6 +159,10 @@ export class Cache { return rv ? (rv.ttl > now ? CacheStatus.Hit : CacheStatus.Stale) : CacheStatus.Miss; } + private updateTtl(key: string, ttl: number): void { + this.db.prepare(`UPDATE ${this.tableName} SET ttl = ? WHERE key = ?;`).run(Date.now() + ttl, key); + } + del(key: string): void { this.db.prepare(`DELETE FROM ${this.tableName} WHERE key = ?`).run(key); } @@ -167,7 +172,7 @@ export class Cache { fn: () => Promise, opt: CacheApplyOption ): Promise { - const { ttl, temporaryBypass } = opt; + const { ttl, temporaryBypass, incrementTtlWhenHit } = opt; if (temporaryBypass) { return fn(); @@ -193,6 +198,10 @@ export class Cache { console.log(picocolors.green('[cache] hit'), picocolors.gray(key)); + if (incrementTtlWhenHit) { + this.updateTtl(key, ttl); + } + const deserializer = 'deserializer' in opt ? opt.deserializer : identity; return deserializer(cached); } diff --git a/Build/lib/get-phishing-domains.ts b/Build/lib/get-phishing-domains.ts index 789a0578..14c32b0d 100644 --- a/Build/lib/get-phishing-domains.ts +++ b/Build/lib/get-phishing-domains.ts @@ -7,7 +7,10 @@ import { PHISHING_DOMAIN_LISTS_EXTRA } from '../constants/reject-data-source'; import { loosTldOptWithPrivateDomains } from '../constants/loose-tldts-opt'; import picocolors from 'picocolors'; import createKeywordFilter from './aho-corasick'; -import { createCacheKey } from './cache-filesystem'; +import { createCacheKey, deserializeArray, fsFetchCache, serializeArray } from './cache-filesystem'; +import { fastStringArrayJoin } from './misc'; + +import { sha256 } from 'hash-wasm'; const BLACK_TLD = new Set([ 'accountant', @@ -158,65 +161,82 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g return domainArr; }); - const domainCountMap: Record = {}; - const domainScoreMap: Record = {}; + return span.traceChildAsync( + 'process phishing domain set', + () => processPhihsingDomains(domainArr) + ); +}); - span.traceChildSync('process phishing domain set', () => { - for (let i = 0, len = domainArr.length; i < len; i++) { - const line = domainArr[i]; +async function processPhihsingDomains(domainArr: string[]) { + const hash = await sha256(fastStringArrayJoin(domainArr, '|')); + return fsFetchCache.apply( + cacheKey('processPhihsingDomains|' + hash), + () => { + const domainCountMap: Record = {}; + const domainScoreMap: Record = {}; - const { - publicSuffix: tld, - domain: apexDomain, - subdomain, - isPrivate - } = tldts.parse(line, loosTldOptWithPrivateDomains); + for (let i = 0, len = domainArr.length; i < len; i++) { + const line = domainArr[i]; - if (isPrivate) { - continue; - } + const { + publicSuffix: tld, + domain: apexDomain, + subdomain, + isPrivate + } = tldts.parse(line, loosTldOptWithPrivateDomains); - if (!tld) { - console.log(picocolors.yellow('[phishing domains] E0001'), 'missing tld', { line, tld }); - continue; - } - if (!apexDomain) { - console.log(picocolors.yellow('[phishing domains] E0002'), 'missing domain', { line, apexDomain }); - continue; - } + if (isPrivate) { + continue; + } - domainCountMap[apexDomain] ||= 0; - domainCountMap[apexDomain] += 1; + if (!tld) { + console.log(picocolors.yellow('[phishing domains] E0001'), 'missing tld', { line, tld }); + continue; + } + if (!apexDomain) { + console.log(picocolors.yellow('[phishing domains] E0002'), 'missing domain', { line, apexDomain }); + continue; + } - if (!(apexDomain in domainScoreMap)) { - domainScoreMap[apexDomain] = 0; - if (BLACK_TLD.has(tld)) { - domainScoreMap[apexDomain] += 4; - } else if (tld.length > 6) { - domainScoreMap[apexDomain] += 2; + domainCountMap[apexDomain] ||= 0; + domainCountMap[apexDomain] += 1; + + if (!(apexDomain in domainScoreMap)) { + domainScoreMap[apexDomain] = 0; + if (BLACK_TLD.has(tld)) { + domainScoreMap[apexDomain] += 4; + } else if (tld.length > 6) { + domainScoreMap[apexDomain] += 2; + } + } + if ( + subdomain + && !WHITELIST_MAIN_DOMAINS.has(apexDomain) + ) { + domainScoreMap[apexDomain] += calcDomainAbuseScore(subdomain); } } - if ( - subdomain - && !WHITELIST_MAIN_DOMAINS.has(apexDomain) - ) { - domainScoreMap[apexDomain] += calcDomainAbuseScore(subdomain); + + for (const apexDomain in domainCountMap) { + if ( + // !WHITELIST_MAIN_DOMAINS.has(apexDomain) + domainScoreMap[apexDomain] >= 12 + || (domainScoreMap[apexDomain] >= 5 && domainCountMap[apexDomain] >= 4) + ) { + domainArr.push(`.${apexDomain}`); + } } - } - }); - for (const apexDomain in domainCountMap) { - if ( - // !WHITELIST_MAIN_DOMAINS.has(apexDomain) - domainScoreMap[apexDomain] >= 12 - || (domainScoreMap[apexDomain] >= 5 && domainCountMap[apexDomain] >= 4) - ) { - domainArr.push(`.${apexDomain}`); + return Promise.resolve(domainArr); + }, + { + ttl: 2 * 86400, + serializer: serializeArray, + deserializer: deserializeArray, + incrementTtlWhenHit: true } - } - - return domainArr; -}); + ); +} export function calcDomainAbuseScore(subdomain: string) { let weight = 0; diff --git a/package.json b/package.json index 7dc6b92b..1c1a6082 100644 --- a/package.json +++ b/package.json @@ -30,6 +30,7 @@ "fast-cidr-tools": "^0.2.5", "fdir": "^6.3.0", "foxact": "^0.2.38", + "hash-wasm": "^4.11.0", "json-stringify-pretty-compact": "^3.0.0", "mnemonist": "^0.39.8", "picocolors": "^1.1.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index fb6ab94c..74b48439 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -41,6 +41,9 @@ importers: foxact: specifier: ^0.2.38 version: 0.2.38 + hash-wasm: + specifier: ^4.11.0 + version: 4.11.0 json-stringify-pretty-compact: specifier: ^3.0.0 version: 3.0.0 @@ -996,6 +999,9 @@ packages: resolution: {integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==} engines: {node: '>=8'} + hash-wasm@4.11.0: + resolution: {integrity: sha512-HVusNXlVqHe0fzIzdQOGolnFN6mX/fqcrSAOcTBXdvzrXVHwTz11vXeKRmkR5gTuwVpvHZEIyKoePDvuAR+XwQ==} + hasown@2.0.2: resolution: {integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==} engines: {node: '>= 0.4'} @@ -2519,6 +2525,8 @@ snapshots: has-flag@4.0.0: {} + hash-wasm@4.11.0: {} + hasown@2.0.2: dependencies: function-bind: 1.1.2