From 21a31e6c1f8741db106f4db8e0c5e96bb7dd634b Mon Sep 17 00:00:00 2001 From: SukkaW Date: Sun, 26 May 2024 17:27:11 +0800 Subject: [PATCH] Perf/Refactor: `processDomainList` now returns `string[]` --- Build/build-reject-domainset.ts | 14 +++++--------- Build/lib/get-phishing-domains.ts | 15 ++++++--------- Build/lib/parse-filter.ts | 10 +++++----- Build/lib/set-add-from-array.bench.ts | 26 ++++++++++++++++++++++++++ Build/lib/set-add-from-array.ts | 11 ++++++++--- 5 files changed, 50 insertions(+), 26 deletions(-) create mode 100644 Build/lib/set-add-from-array.bench.ts diff --git a/Build/build-reject-domainset.ts b/Build/build-reject-domainset.ts index 25eb694c..4ba911c1 100644 --- a/Build/build-reject-domainset.ts +++ b/Build/build-reject-domainset.ts @@ -19,7 +19,7 @@ import { SHARED_DESCRIPTION } from './lib/constants'; import { getPhishingDomains } from './lib/get-phishing-domains'; import { add as SetAdd, subtract as SetSubstract } from 'mnemonist/set'; -import { setAddFromArray } from './lib/set-add-from-array'; +import { setAddFromArray, setAddFromArrayCurried } from './lib/set-add-from-array'; import { sort } from './lib/timsort'; export const buildRejectDomainSet = task(import.meta.path, async (span) => { @@ -38,7 +38,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => { // Parse from remote hosts & domain lists ...HOSTS.map(entry => processHosts(childSpan, entry[0], entry[1], entry[2], entry[3]).then(hosts => SetAdd(domainSets, hosts))), - ...DOMAIN_LISTS.map(entry => processDomainLists(childSpan, entry[0], entry[1], entry[2]).then(hosts => SetAdd(domainSets, hosts))), + ...DOMAIN_LISTS.map(entry => processDomainLists(childSpan, entry[0], entry[1], entry[2]).then(setAddFromArrayCurried(domainSets))), ...ADGUARD_FILTERS.map(input => ( typeof input === 'string' @@ -60,13 +60,9 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => { setAddFromArray(filterRuleWhitelistDomainSets, white); setAddFromArray(filterRuleWhitelistDomainSets, black); }))), - getPhishingDomains(childSpan).then(([purePhishingDomains, fullPhishingDomainSet]) => { - SetAdd(domainSets, fullPhishingDomainSet); - setAddFromArray(domainSets, purePhishingDomains); - }), - childSpan.traceChildAsync('process reject_sukka.conf', async () => { - setAddFromArray(domainSets, await readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/domainset/reject_sukka.conf'))); - }) + getPhishingDomains(childSpan).then(setAddFromArrayCurried(domainSets)), + childSpan.traceChildAsync('process reject_sukka.conf', () => readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/domainset/reject_sukka.conf')) + .then(setAddFromArrayCurried(domainSets))) ]); // eslint-disable-next-line sukka/no-single-return -- not single return return shouldStop; diff --git a/Build/lib/get-phishing-domains.ts b/Build/lib/get-phishing-domains.ts index 0cc1dd8c..0e47eef5 100644 --- a/Build/lib/get-phishing-domains.ts +++ b/Build/lib/get-phishing-domains.ts @@ -5,6 +5,7 @@ import { TTL } from './cache-filesystem'; import { add as SetAdd } from 'mnemonist/set'; import type { Span } from '../trace'; +import { appendArrayInPlace } from './append-array-in-place'; const BLACK_TLD = new Set([ 'accountant', @@ -92,13 +93,13 @@ const BLACK_TLD = new Set([ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('get phishing domains').traceAsyncFn(async (span) => { const gorhill = await getGorhillPublicSuffixPromise(); - const domainSet = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => { + const domainArr = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => { const [domainSet, domainSet2] = await Promise.all([ processDomainLists(curSpan, 'https://curbengh.github.io/phishing-filter/phishing-filter-domains.txt', true, TTL.THREE_HOURS()), processDomainLists(curSpan, 'https://phishing.army/download/phishing_army_blocklist.txt', true, TTL.THREE_HOURS()) ]); - SetAdd(domainSet, domainSet2); + appendArrayInPlace(domainSet, domainSet2); return domainSet; }); @@ -106,8 +107,6 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g const domainCountMap: Record = {}; span.traceChildSync('process phishing domain set', () => { - const domainArr = Array.from(domainSet); - for (let i = 0, len = domainArr.length; i < len; i++) { const line = domainArr[i]; @@ -126,17 +125,15 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g } }); - const results = span.traceChildSync('get final phishing results', () => { - const res: string[] = []; + span.traceChildSync('get final phishing results', () => { for (const domain in domainCountMap) { if (domainCountMap[domain] >= 8) { - res.push(`.${domain}`); + domainArr.push(`.${domain}`); } } - return res; }); - return [results, domainSet] as const; + return domainArr; }); export function calcDomainAbuseScore(line: string) { diff --git a/Build/lib/parse-filter.ts b/Build/lib/parse-filter.ts index 6dce33c1..0591f53f 100644 --- a/Build/lib/parse-filter.ts +++ b/Build/lib/parse-filter.ts @@ -8,7 +8,7 @@ import type { PublicSuffixList } from '@gorhill/publicsuffixlist'; import picocolors from 'picocolors'; import { normalizeDomain } from './normalize-domain'; import { fetchAssets } from './fetch-assets'; -import { deserializeSet, fsFetchCache, serializeSet } from './cache-filesystem'; +import { deserializeArray, deserializeSet, fsFetchCache, serializeArray, serializeSet } from './cache-filesystem'; import type { Span } from '../trace'; import createKeywordFilter from './aho-corasick'; @@ -20,7 +20,7 @@ export function processDomainLists(span: Span, domainListsUrl: string, includeAl return span.traceChild(`process domainlist: ${domainListsUrl}`).traceAsyncFn(() => fsFetchCache.apply( domainListsUrl, async () => { - const domainSets = new Set(); + const domainSets: string[] = []; for await (const line of await fetchRemoteTextByLine(domainListsUrl)) { let domainToAdd = processLine(line); @@ -33,7 +33,7 @@ export function processDomainLists(span: Span, domainListsUrl: string, includeAl foundDebugDomain = true; } - domainSets.add(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd); + domainSets.push(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd); } return domainSets; @@ -41,8 +41,8 @@ export function processDomainLists(span: Span, domainListsUrl: string, includeAl { ttl, temporaryBypass, - serializer: serializeSet, - deserializer: deserializeSet + serializer: serializeArray, + deserializer: deserializeArray } )); } diff --git a/Build/lib/set-add-from-array.bench.ts b/Build/lib/set-add-from-array.bench.ts new file mode 100644 index 00000000..2077bd6c --- /dev/null +++ b/Build/lib/set-add-from-array.bench.ts @@ -0,0 +1,26 @@ +import { fetchRemoteTextByLine } from './fetch-text-by-line'; +import { processLineFromReadline } from './process-line'; + +import { bench, group, run } from 'mitata'; + +(async () => { + const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt')); + + group('setAddFromArray', () => { + bench('run', () => { + const set = new Set(['1', '2', '1', '3', 'skk.moe']); + for (let i = 0, len = data.length; i < len; i++) { + set.add(data[i]); + } + }); + }); + group('setAddFromArray', () => { + bench('run', () => { + const set = new Set(['1', '2', '1', '3', 'skk.moe']); + // eslint-disable-next-line @typescript-eslint/unbound-method -- thisArg is passed + data.forEach(set.add, set); + }); + }); + + run(); +})(); diff --git a/Build/lib/set-add-from-array.ts b/Build/lib/set-add-from-array.ts index bf025f5b..c15b95e1 100644 --- a/Build/lib/set-add-from-array.ts +++ b/Build/lib/set-add-from-array.ts @@ -2,7 +2,12 @@ * In-place adding of elements from an array to a set. */ export function setAddFromArray(set: Set, arr: T[]): void { - for (let i = 0, len = arr.length; i < len; i++) { - set.add(arr[i]); - } + // for (let i = 0, len = arr.length; i < len; i++) { + // set.add(arr[i]); + // } + // eslint-disable-next-line @typescript-eslint/unbound-method -- thisArg is passed + arr.forEach(set.add, set); } + +// eslint-disable-next-line @typescript-eslint/unbound-method -- thisArg is passed +export const setAddFromArrayCurried = (set: Set) => (arr: T[]) => arr.forEach(set.add, set);