From 96cf0fc1f9995a07f177f40ff303fbdc1edc759d Mon Sep 17 00:00:00 2001 From: SukkaW Date: Sat, 18 Jan 2025 02:59:32 +0800 Subject: [PATCH] Perf: preload phishing data source --- Build/lib/get-phishing-domains.ts | 15 +++++++++------ Build/lib/parse-filter/domainlists.ts | 18 ++++++++++++++++++ Build/lib/parse-filter/hosts.ts | 20 ++++++++++++++++++++ 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/Build/lib/get-phishing-domains.ts b/Build/lib/get-phishing-domains.ts index f25b3764..99a073a9 100644 --- a/Build/lib/get-phishing-domains.ts +++ b/Build/lib/get-phishing-domains.ts @@ -1,5 +1,5 @@ -import { processHosts } from './parse-filter/hosts'; -import { processDomainLists } from './parse-filter/domainlists'; +import { processHostsWithPreload } from './parse-filter/hosts'; +import { processDomainListsWithPreload } from './parse-filter/domainlists'; import * as tldts from 'tldts-experimental'; @@ -207,15 +207,18 @@ const processPhihsingDomains = cache(function processPhihsingDomains(domainArr: temporaryBypass: !isCI || DEBUG_DOMAIN_TO_FIND !== null }); +const downloads = [ + ...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry)), + ...PHISHING_HOSTS_EXTRA.map(entry => processHostsWithPreload(...entry)) +]; + export function getPhishingDomains(parentSpan: Span) { return parentSpan.traceChild('get phishing domains').traceAsyncFn(async (span) => { const domainArr = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => { const domainArr: string[] = []; - await Promise.all([ - ...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(curSpan, ...entry)), - ...PHISHING_HOSTS_EXTRA.map(entry => processHosts(curSpan, ...entry)) - ]).then(domainGroups => domainGroups.forEach(appendArrayInPlaceCurried(domainArr))); + const domainGroups = await Promise.all(downloads.map(task => task(curSpan))); + domainGroups.forEach(appendArrayInPlaceCurried(domainArr)); return domainArr; }); diff --git a/Build/lib/parse-filter/domainlists.ts b/Build/lib/parse-filter/domainlists.ts index a0284079..428c73a0 100644 --- a/Build/lib/parse-filter/domainlists.ts +++ b/Build/lib/parse-filter/domainlists.ts @@ -48,3 +48,21 @@ export function processDomainLists( return domainSets; }); } + +export function processDomainListsWithPreload(domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false) { + const downloadPromise = fetchAssets(domainListsUrl, mirrors); + + return (span: Span) => span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => { + const text = await span.traceChildPromise('download', downloadPromise); + const domainSets: string[] = []; + const filterRules = text.split('\n'); + + span.traceChildSync('parse domain list', () => { + for (let i = 0, len = filterRules.length; i < len; i++) { + domainListLineCb(filterRules[i], domainSets, includeAllSubDomain, domainListsUrl); + } + }); + + return domainSets; + }); +} diff --git a/Build/lib/parse-filter/hosts.ts b/Build/lib/parse-filter/hosts.ts index 6df00c40..db4dd2ae 100644 --- a/Build/lib/parse-filter/hosts.ts +++ b/Build/lib/parse-filter/hosts.ts @@ -44,3 +44,23 @@ export function processHosts( return domainSets; }); } + +export function processHostsWithPreload(hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false) { + const downloadPromise = fetchAssets(hostsUrl, mirrors); + + return (span: Span) => span.traceChildAsync(`process hosts: ${hostsUrl}`, async (span) => { + const text = await span.traceChild('download').tracePromise(downloadPromise); + + const domainSets: string[] = []; + + const filterRules = text.split('\n'); + + span.traceChild('parse hosts').traceSyncFn(() => { + for (let i = 0, len = filterRules.length; i < len; i++) { + hostsLineCb(filterRules[i], domainSets, includeAllSubDomain, hostsUrl); + } + }); + + return domainSets; + }); +}