Perf: many changes

- Hoist process hosts line callback
- Reduce dp hosts file size
- Reduce domain sort
This commit is contained in:
SukkaW 2024-05-02 08:13:15 +08:00
parent 10bde9f1e8
commit e5d511d105
6 changed files with 42 additions and 35 deletions

View File

@ -18,14 +18,9 @@ import { getPhishingDomains } from './lib/get-phishing-domains';
import * as SetHelpers from 'mnemonist/set'; import * as SetHelpers from 'mnemonist/set';
import { setAddFromArray } from './lib/set-add-from-array'; import { setAddFromArray } from './lib/set-add-from-array';
import type { PublicSuffixList } from '@gorhill/publicsuffixlist';
export const buildRejectDomainSet = task(import.meta.path, async (span) => { export const buildRejectDomainSet = task(import.meta.path, async (span) => {
const gorhillPromise = getGorhillPublicSuffixPromise(); const gorhill = await getGorhillPublicSuffixPromise();
const gorhillPeeked = Bun.peek(gorhillPromise);
const gorhill: PublicSuffixList = gorhillPeeked === gorhillPromise
? await gorhillPromise
: (gorhillPeeked as PublicSuffixList);
/** Whitelists */ /** Whitelists */
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST); const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
@ -126,7 +121,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
const kwfilter = createKeywordFilter(domainKeywordsSet); const kwfilter = createKeywordFilter(domainKeywordsSet);
for (const domain of domainSets) { for (const domain of domainSets) {
// Remove keyword // Remove keyword
if (kwfilter(domain)) { if (kwfilter(domain)) {
domainSets.delete(domain); domainSets.delete(domain);
} }

View File

@ -65,8 +65,10 @@ const getBotNetFilterIPsPromise = fsFetchCache.apply(
} }
); );
const localRejectIPSourcesPromise = readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/ip/reject.conf'));
export const buildRejectIPList = task(import.meta.path, async (span) => { export const buildRejectIPList = task(import.meta.path, async (span) => {
const result: string[] = await readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/ip/reject.conf')); const result = await localRejectIPSourcesPromise;
const bogusNxDomainIPs = await span.traceChildPromise('get bogus nxdomain ips', getBogusNxDomainIPsPromise); const bogusNxDomainIPs = await span.traceChildPromise('get bogus nxdomain ips', getBogusNxDomainIPsPromise);
const botNetIPs = await span.traceChildPromise('get botnet ips', getBotNetFilterIPsPromise); const botNetIPs = await span.traceChildPromise('get botnet ips', getBotNetFilterIPsPromise);

View File

@ -232,7 +232,7 @@ export const buildSpeedtestDomainSet = task(import.meta.path, async (span) => {
}); });
resolve(); resolve();
}, 1000 * 60 * 2); }, 1000 * 60 * 1.5);
Promise.all(Object.values(pMap)).then(() => { Promise.all(Object.values(pMap)).then(() => {
clearTimeout(timer); clearTimeout(timer);

View File

@ -46,37 +46,38 @@ export function processDomainLists(span: Span, domainListsUrl: string, includeAl
} }
)); ));
} }
const hostsLineCb = (l: string, set: Set<string>, includeAllSubDomain: boolean, meta: string) => {
const line = processLine(l);
if (!line) {
return;
}
const _domain = line.split(/\s/)[1]?.trim();
if (!_domain) {
return;
}
const domain = normalizeDomain(_domain);
if (!domain) {
return;
}
if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
console.warn(picocolors.red(meta), '(black)', domain.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND)));
foundDebugDomain = true;
}
set.add(includeAllSubDomain ? `.${domain}` : domain);
};
export function processHosts(span: Span, hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, ttl: number | null = null) { export function processHosts(span: Span, hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, ttl: number | null = null) {
const domainSets = new Set<string>(); const domainSets = new Set<string>();
const lineCb = (l: string) => {
const line = processLine(l);
if (!line) {
return;
}
const _domain = line.split(/\s/)[1]?.trim();
if (!_domain) {
return;
}
const domain = normalizeDomain(_domain);
if (!domain) {
return;
}
if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
console.warn(picocolors.red(hostsUrl), '(black)', domain.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND)));
foundDebugDomain = true;
}
domainSets.add(includeAllSubDomain ? `.${domain}` : domain);
};
return span.traceChild(`processhosts: ${hostsUrl}`).traceAsyncFn((childSpan) => fsFetchCache.apply( return span.traceChild(`processhosts: ${hostsUrl}`).traceAsyncFn((childSpan) => fsFetchCache.apply(
hostsUrl, hostsUrl,
async () => { async () => {
if (mirrors == null || mirrors.length === 0) { if (mirrors == null || mirrors.length === 0) {
for await (const l of await fetchRemoteTextByLine(hostsUrl)) { for await (const l of await fetchRemoteTextByLine(hostsUrl)) {
lineCb(l); hostsLineCb(l, domainSets, includeAllSubDomain, hostsUrl);
} }
} else { } else {
const filterRules = await childSpan const filterRules = await childSpan
@ -85,7 +86,7 @@ export function processHosts(span: Span, hostsUrl: string, mirrors: string[] | n
childSpan.traceChild('parse hosts').traceSyncFn(() => { childSpan.traceChild('parse hosts').traceSyncFn(() => {
for (let i = 0, len = filterRules.length; i < len; i++) { for (let i = 0, len = filterRules.length; i < len; i++) {
lineCb(filterRules[i]); hostsLineCb(filterRules[i], domainSets, includeAllSubDomain, hostsUrl);
} }
}); });
} }

View File

@ -9,7 +9,8 @@ export const HOSTS: HostsSource[] = [
true, true,
TTL.THREE_HOURS() TTL.THREE_HOURS()
], ],
['https://someonewhocares.org/hosts/hosts', null, true, TTL.THREE_HOURS()], // Dan Pollock's hosts file, 0.0.0.0 version is 30 KiB smaller
['https://someonewhocares.org/hosts/zero/hosts', null, true, TTL.THREE_HOURS()],
// no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl // no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', null, true, TTL.THREE_DAYS()], ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', null, true, TTL.THREE_DAYS()],
// have not been updated for more than a year, so we set a 14 days cache ttl // have not been updated for more than a year, so we set a 14 days cache ttl

View File

@ -42,7 +42,15 @@ export const sortDomains = (inputs: string[], gorhill: PublicSuffixList) => {
const sorter = (a: string, b: string) => { const sorter = (a: string, b: string) => {
if (a === b) return 0; if (a === b) return 0;
return compare(domains.get(a)!, domains.get(b)!) || compare(a, b);
const $a = domains.get(a)!;
const $b = domains.get(b)!;
// avoid compare same thing twice
if (a === $a && b === $b) {
return compare(a, b);
}
return compare($a, $b) || compare(a, b);
}; };
return inputs.sort(sorter); return inputs.sort(sorter);