From e5d511d1054d98b08f7230ec0d8c270de1c6ffcb Mon Sep 17 00:00:00 2001 From: SukkaW Date: Thu, 2 May 2024 08:13:15 +0800 Subject: [PATCH] Perf: many changes - Hoist process hosts line callback - Reduce dp hosts file size - Reduce domain sort --- Build/build-reject-domainset.ts | 9 ++---- Build/build-reject-ip-list.ts | 4 ++- Build/build-speedtest-domainset.ts | 2 +- Build/lib/parse-filter.ts | 49 +++++++++++++++--------------- Build/lib/reject-data-source.ts | 3 +- Build/lib/stable-sort-domain.ts | 10 +++++- 6 files changed, 42 insertions(+), 35 deletions(-) diff --git a/Build/build-reject-domainset.ts b/Build/build-reject-domainset.ts index 8af021d0..519e8d51 100644 --- a/Build/build-reject-domainset.ts +++ b/Build/build-reject-domainset.ts @@ -18,14 +18,9 @@ import { getPhishingDomains } from './lib/get-phishing-domains'; import * as SetHelpers from 'mnemonist/set'; import { setAddFromArray } from './lib/set-add-from-array'; -import type { PublicSuffixList } from '@gorhill/publicsuffixlist'; export const buildRejectDomainSet = task(import.meta.path, async (span) => { - const gorhillPromise = getGorhillPublicSuffixPromise(); - const gorhillPeeked = Bun.peek(gorhillPromise); - const gorhill: PublicSuffixList = gorhillPeeked === gorhillPromise - ? await gorhillPromise - : (gorhillPeeked as PublicSuffixList); + const gorhill = await getGorhillPublicSuffixPromise(); /** Whitelists */ const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST); @@ -126,7 +121,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => { const kwfilter = createKeywordFilter(domainKeywordsSet); for (const domain of domainSets) { - // Remove keyword + // Remove keyword if (kwfilter(domain)) { domainSets.delete(domain); } diff --git a/Build/build-reject-ip-list.ts b/Build/build-reject-ip-list.ts index 527cbedd..e9959762 100644 --- a/Build/build-reject-ip-list.ts +++ b/Build/build-reject-ip-list.ts @@ -65,8 +65,10 @@ const getBotNetFilterIPsPromise = fsFetchCache.apply( } ); +const localRejectIPSourcesPromise = readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/ip/reject.conf')); + export const buildRejectIPList = task(import.meta.path, async (span) => { - const result: string[] = await readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/ip/reject.conf')); + const result = await localRejectIPSourcesPromise; const bogusNxDomainIPs = await span.traceChildPromise('get bogus nxdomain ips', getBogusNxDomainIPsPromise); const botNetIPs = await span.traceChildPromise('get botnet ips', getBotNetFilterIPsPromise); diff --git a/Build/build-speedtest-domainset.ts b/Build/build-speedtest-domainset.ts index e40938bd..8656be13 100644 --- a/Build/build-speedtest-domainset.ts +++ b/Build/build-speedtest-domainset.ts @@ -232,7 +232,7 @@ export const buildSpeedtestDomainSet = task(import.meta.path, async (span) => { }); resolve(); - }, 1000 * 60 * 2); + }, 1000 * 60 * 1.5); Promise.all(Object.values(pMap)).then(() => { clearTimeout(timer); diff --git a/Build/lib/parse-filter.ts b/Build/lib/parse-filter.ts index e08befbd..6dce33c1 100644 --- a/Build/lib/parse-filter.ts +++ b/Build/lib/parse-filter.ts @@ -46,37 +46,38 @@ export function processDomainLists(span: Span, domainListsUrl: string, includeAl } )); } + +const hostsLineCb = (l: string, set: Set, includeAllSubDomain: boolean, meta: string) => { + const line = processLine(l); + if (!line) { + return; + } + + const _domain = line.split(/\s/)[1]?.trim(); + if (!_domain) { + return; + } + const domain = normalizeDomain(_domain); + if (!domain) { + return; + } + if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) { + console.warn(picocolors.red(meta), '(black)', domain.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND))); + foundDebugDomain = true; + } + + set.add(includeAllSubDomain ? `.${domain}` : domain); +}; + export function processHosts(span: Span, hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, ttl: number | null = null) { const domainSets = new Set(); - const lineCb = (l: string) => { - const line = processLine(l); - if (!line) { - return; - } - - const _domain = line.split(/\s/)[1]?.trim(); - if (!_domain) { - return; - } - const domain = normalizeDomain(_domain); - if (!domain) { - return; - } - if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) { - console.warn(picocolors.red(hostsUrl), '(black)', domain.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND))); - foundDebugDomain = true; - } - - domainSets.add(includeAllSubDomain ? `.${domain}` : domain); - }; - return span.traceChild(`processhosts: ${hostsUrl}`).traceAsyncFn((childSpan) => fsFetchCache.apply( hostsUrl, async () => { if (mirrors == null || mirrors.length === 0) { for await (const l of await fetchRemoteTextByLine(hostsUrl)) { - lineCb(l); + hostsLineCb(l, domainSets, includeAllSubDomain, hostsUrl); } } else { const filterRules = await childSpan @@ -85,7 +86,7 @@ export function processHosts(span: Span, hostsUrl: string, mirrors: string[] | n childSpan.traceChild('parse hosts').traceSyncFn(() => { for (let i = 0, len = filterRules.length; i < len; i++) { - lineCb(filterRules[i]); + hostsLineCb(filterRules[i], domainSets, includeAllSubDomain, hostsUrl); } }); } diff --git a/Build/lib/reject-data-source.ts b/Build/lib/reject-data-source.ts index f47f0089..a8c0f947 100644 --- a/Build/lib/reject-data-source.ts +++ b/Build/lib/reject-data-source.ts @@ -9,7 +9,8 @@ export const HOSTS: HostsSource[] = [ true, TTL.THREE_HOURS() ], - ['https://someonewhocares.org/hosts/hosts', null, true, TTL.THREE_HOURS()], + // Dan Pollock's hosts file, 0.0.0.0 version is 30 KiB smaller + ['https://someonewhocares.org/hosts/zero/hosts', null, true, TTL.THREE_HOURS()], // no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', null, true, TTL.THREE_DAYS()], // have not been updated for more than a year, so we set a 14 days cache ttl diff --git a/Build/lib/stable-sort-domain.ts b/Build/lib/stable-sort-domain.ts index 42a9ce9a..012927e0 100644 --- a/Build/lib/stable-sort-domain.ts +++ b/Build/lib/stable-sort-domain.ts @@ -42,7 +42,15 @@ export const sortDomains = (inputs: string[], gorhill: PublicSuffixList) => { const sorter = (a: string, b: string) => { if (a === b) return 0; - return compare(domains.get(a)!, domains.get(b)!) || compare(a, b); + + const $a = domains.get(a)!; + const $b = domains.get(b)!; + + // avoid compare same thing twice + if (a === $a && b === $b) { + return compare(a, b); + } + return compare($a, $b) || compare(a, b); }; return inputs.sort(sorter);