diff --git a/Build/lib/cache-filesystem.ts b/Build/lib/cache-filesystem.ts index 8bb5813a..26c9de72 100644 --- a/Build/lib/cache-filesystem.ts +++ b/Build/lib/cache-filesystem.ts @@ -217,5 +217,5 @@ export const deserializeArray = (str: string) => str.split(separator); export const createCacheKey = (filename: string) => { const fileHash = stringHash(fs.readFileSync(filename, 'utf-8')); - return (key: string) => key + '$' + fileHash; + return (key: string) => key + '$' + fileHash + '$'; }; diff --git a/Build/lib/get-phishing-domains.ts b/Build/lib/get-phishing-domains.ts index f071280a..a33ce865 100644 --- a/Build/lib/get-phishing-domains.ts +++ b/Build/lib/get-phishing-domains.ts @@ -7,6 +7,7 @@ import { PHISHING_DOMAIN_LISTS_EXTRA } from '../constants/reject-data-source'; import { looseTldtsOpt } from '../constants/loose-tldts-opt'; import picocolors from 'picocolors'; import createKeywordFilter from './aho-corasick'; +import { createCacheKey } from './cache-filesystem'; const BLACK_TLD = new Set([ 'accountant', @@ -130,11 +131,13 @@ const lowKeywords = createKeywordFilter([ '.www-' ]); +const cacheKey = createCacheKey(__filename); + export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('get phishing domains').traceAsyncFn(async (span) => { const domainArr = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => { const domainArr: string[] = []; - (await Promise.all(PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(curSpan, ...entry)))) + (await Promise.all(PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(curSpan, ...entry, cacheKey)))) .forEach(appendArrayInPlaceCurried(domainArr)); return domainArr; diff --git a/Build/lib/parse-filter.ts b/Build/lib/parse-filter.ts index d2964fa9..d680ef93 100644 --- a/Build/lib/parse-filter.ts +++ b/Build/lib/parse-filter.ts @@ -16,6 +16,8 @@ const DEBUG_DOMAIN_TO_FIND: string | null = null; // example.com | null let foundDebugDomain = false; const temporaryBypass = typeof DEBUG_DOMAIN_TO_FIND === 'string'; +const identity = (x: T) => x; + const domainListLineCb = (l: string, set: string[], includeAllSubDomain: boolean, meta: string) => { let line = processLine(l); if (!line) return; @@ -44,9 +46,9 @@ const domainListLineCb = (l: string, set: string[], includeAllSubDomain: boolean const cacheKey = createCacheKey(__filename); -export function processDomainLists(span: Span, domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, ttl: number | null = null) { +export function processDomainLists(span: Span, domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, ttl: number | null = null, extraCacheKey: (input: string) => string = identity) { return span.traceChild(`process domainlist: ${domainListsUrl}`).traceAsyncFn((childSpan) => fsFetchCache.apply( - cacheKey(domainListsUrl), + extraCacheKey(cacheKey(domainListsUrl)), async () => { const domainSets: string[] = [];