Perf: preload phishing data source

This commit is contained in:
SukkaW 2025-01-18 02:59:32 +08:00
parent 42428e14de
commit 96cf0fc1f9
3 changed files with 47 additions and 6 deletions

View File

@ -1,5 +1,5 @@
import { processHosts } from './parse-filter/hosts';
import { processDomainLists } from './parse-filter/domainlists';
import { processHostsWithPreload } from './parse-filter/hosts';
import { processDomainListsWithPreload } from './parse-filter/domainlists';
import * as tldts from 'tldts-experimental';
@ -207,15 +207,18 @@ const processPhihsingDomains = cache(function processPhihsingDomains(domainArr:
temporaryBypass: !isCI || DEBUG_DOMAIN_TO_FIND !== null
});
const downloads = [
...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry)),
...PHISHING_HOSTS_EXTRA.map(entry => processHostsWithPreload(...entry))
];
export function getPhishingDomains(parentSpan: Span) {
return parentSpan.traceChild('get phishing domains').traceAsyncFn(async (span) => {
const domainArr = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => {
const domainArr: string[] = [];
await Promise.all([
...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(curSpan, ...entry)),
...PHISHING_HOSTS_EXTRA.map(entry => processHosts(curSpan, ...entry))
]).then(domainGroups => domainGroups.forEach(appendArrayInPlaceCurried(domainArr)));
const domainGroups = await Promise.all(downloads.map(task => task(curSpan)));
domainGroups.forEach(appendArrayInPlaceCurried(domainArr));
return domainArr;
});

View File

@ -48,3 +48,21 @@ export function processDomainLists(
return domainSets;
});
}
export function processDomainListsWithPreload(domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false) {
const downloadPromise = fetchAssets(domainListsUrl, mirrors);
return (span: Span) => span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => {
const text = await span.traceChildPromise('download', downloadPromise);
const domainSets: string[] = [];
const filterRules = text.split('\n');
span.traceChildSync('parse domain list', () => {
for (let i = 0, len = filterRules.length; i < len; i++) {
domainListLineCb(filterRules[i], domainSets, includeAllSubDomain, domainListsUrl);
}
});
return domainSets;
});
}

View File

@ -44,3 +44,23 @@ export function processHosts(
return domainSets;
});
}
export function processHostsWithPreload(hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false) {
const downloadPromise = fetchAssets(hostsUrl, mirrors);
return (span: Span) => span.traceChildAsync(`process hosts: ${hostsUrl}`, async (span) => {
const text = await span.traceChild('download').tracePromise(downloadPromise);
const domainSets: string[] = [];
const filterRules = text.split('\n');
span.traceChild('parse hosts').traceSyncFn(() => {
for (let i = 0, len = filterRules.length; i < len; i++) {
hostsLineCb(filterRules[i], domainSets, includeAllSubDomain, hostsUrl);
}
});
return domainSets;
});
}