From 07419a79428cf2358e4a0829a06df8a6e4e7762d Mon Sep 17 00:00:00 2001 From: SukkaW Date: Wed, 22 Jan 2025 10:52:03 +0800 Subject: [PATCH] Perf: faster `fetchAssets` (without string and manual split) --- Build/build-reject-ip-list.ts | 14 +++++--------- Build/lib/fetch-assets.ts | 16 ++++++++++++---- Build/lib/parse-filter/domainlists.ts | 16 ++++++---------- Build/lib/parse-filter/filters.ts | 4 +--- Build/lib/parse-filter/hosts.ts | 18 ++++-------------- 5 files changed, 28 insertions(+), 40 deletions(-) diff --git a/Build/build-reject-ip-list.ts b/Build/build-reject-ip-list.ts index 14b8ea17..9be65f33 100644 --- a/Build/build-reject-ip-list.ts +++ b/Build/build-reject-ip-list.ts @@ -4,7 +4,6 @@ import { createReadlineInterfaceFromResponse, readFileIntoProcessedArray } from import { task } from './trace'; import { SHARED_DESCRIPTION } from './constants/description'; import { isProbablyIpv4, isProbablyIpv6 } from 'foxts/is-probably-ip'; -import { processLine } from './lib/process-line'; import { RulesetOutput } from './lib/create-file'; import { SOURCE_DIR } from './constants/dir'; import { $$fetch } from './lib/fetch-retry'; @@ -37,14 +36,11 @@ const BOTNET_FILTER_MIRROR_URL = [ // https://curbengh.github.io/malware-filter/botnet-filter-dnscrypt-blocked-ips.txt ]; -const getBotNetFilterIPsPromise: Promise<[ipv4: string[], ipv6: string[]]> = fetchAssets(BOTNET_FILTER_URL, BOTNET_FILTER_MIRROR_URL).then(text => text.split('\n').reduce<[ipv4: string[], ipv6: string[]]>((acc, cur) => { - const ip = processLine(cur); - if (ip) { - if (isProbablyIpv4(ip)) { - acc[0].push(ip); - } else if (isProbablyIpv6(ip)) { - acc[1].push(ip); - } +const getBotNetFilterIPsPromise: Promise<[ipv4: string[], ipv6: string[]]> = fetchAssets(BOTNET_FILTER_URL, BOTNET_FILTER_MIRROR_URL, true).then(arr => arr.reduce<[ipv4: string[], ipv6: string[]]>((acc, ip) => { + if (isProbablyIpv4(ip)) { + acc[0].push(ip); + } else if (isProbablyIpv6(ip)) { + acc[1].push(ip); } return acc; }, [[], []])); diff --git a/Build/lib/fetch-assets.ts b/Build/lib/fetch-assets.ts index 295e293d..63310480 100644 --- a/Build/lib/fetch-assets.ts +++ b/Build/lib/fetch-assets.ts @@ -1,6 +1,9 @@ import picocolors from 'picocolors'; import { $$fetch, defaultRequestInit, ResponseError } from './fetch-retry'; import { waitWithAbort } from 'foxts/wait'; +import { nullthrow } from 'foxts/guard'; +import { TextLineStream } from './text-line-transform-stream'; +import { ProcessLineStream } from './process-line'; // eslint-disable-next-line sukka/unicorn/custom-error-definition -- typescript is better export class CustomAbortError extends Error { @@ -26,7 +29,7 @@ export class CustomNoETagFallbackError extends Error { } } -export async function fetchAssets(url: string, fallbackUrls: null | undefined | string[] | readonly string[]) { +export async function fetchAssets(url: string, fallbackUrls: null | undefined | string[] | readonly string[], processLine = false) { const controller = new AbortController(); const createFetchFallbackPromise = async (url: string, index: number) => { @@ -44,14 +47,19 @@ export async function fetchAssets(url: string, fallbackUrls: null | undefined | throw new CustomAbortError(); } const res = await $$fetch(url, { signal: controller.signal, ...defaultRequestInit }); - const text = await res.text(); - if (text.length < 2) { + let stream = nullthrow(res.body).pipeThrough(new TextDecoderStream()).pipeThrough(new TextLineStream()); + if (processLine) { + stream = stream.pipeThrough(new ProcessLineStream()); + } + const arr = await Array.fromAsync(stream); + + if (arr.length < 1) { throw new ResponseError(res, url, 'empty response w/o 304'); } controller.abort(); - return text; + return arr; }; if (!fallbackUrls || fallbackUrls.length === 0) { diff --git a/Build/lib/parse-filter/domainlists.ts b/Build/lib/parse-filter/domainlists.ts index 63df4107..834c6c1b 100644 --- a/Build/lib/parse-filter/domainlists.ts +++ b/Build/lib/parse-filter/domainlists.ts @@ -16,10 +16,7 @@ function domainListLineCb(l: string, set: string[], meta: string, normalizeDomai set.push(domain); } -function domainListLineCbIncludeAllSubdomain(l: string, set: string[], meta: string, normalizeDomain = fastNormalizeDomain) { - const line = processLine(l); - if (!line) return; - +function domainListLineCbIncludeAllSubdomain(line: string, set: string[], meta: string, normalizeDomain = fastNormalizeDomain) { const domain = normalizeDomain(line); if (!domain) return; @@ -36,12 +33,12 @@ export function processDomainLists( const lineCb = includeAllSubDomain ? domainListLineCbIncludeAllSubdomain : domainListLineCb; return span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => { - const text = await span.traceChildAsync('download', () => fetchAssets( + const filterRules = await span.traceChildAsync('download', () => fetchAssets( domainListsUrl, - mirrors + mirrors, + true )); const domainSets: string[] = []; - const filterRules = text.split('\n'); span.traceChildSync('parse domain list', () => { for (let i = 0, len = filterRules.length; i < len; i++) { @@ -59,13 +56,12 @@ export function processDomainListsWithPreload( ) { const domainNormalizer = wwwToApex ? fastNormalizeDomainIgnoreWww : fastNormalizeDomain; - const downloadPromise = fetchAssets(domainListsUrl, mirrors); + const downloadPromise = fetchAssets(domainListsUrl, mirrors, true); const lineCb = includeAllSubDomain ? domainListLineCbIncludeAllSubdomain : domainListLineCb; return (span: Span) => span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => { - const text = await span.traceChildPromise('download', downloadPromise); + const filterRules = await span.traceChildPromise('download', downloadPromise); const domainSets: string[] = []; - const filterRules = text.split('\n'); span.traceChildSync('parse domain list', () => { for (let i = 0, len = filterRules.length; i < len; i++) { diff --git a/Build/lib/parse-filter/filters.ts b/Build/lib/parse-filter/filters.ts index dfebf7a9..2e276154 100644 --- a/Build/lib/parse-filter/filters.ts +++ b/Build/lib/parse-filter/filters.ts @@ -28,7 +28,7 @@ export function processFilterRulesWithPreload( const downloadPromise = fetchAssets(filterRulesUrl, fallbackUrls); return (span: Span) => span.traceChildAsync>(`process filter rules: ${filterRulesUrl}`, async (span) => { - const text = await span.traceChildPromise('download', downloadPromise); + const filterRules = await span.traceChildPromise('download', downloadPromise); const whiteDomains = new Set(); const whiteDomainSuffixes = new Set(); @@ -82,8 +82,6 @@ export function processFilterRulesWithPreload( } }; - const filterRules = text.split('\n'); - span.traceChild('parse adguard filter').traceSyncFn(() => { for (let i = 0, len = filterRules.length; i < len; i++) { lineCb(filterRules[i]); diff --git a/Build/lib/parse-filter/hosts.ts b/Build/lib/parse-filter/hosts.ts index db4dd2ae..865c8f87 100644 --- a/Build/lib/parse-filter/hosts.ts +++ b/Build/lib/parse-filter/hosts.ts @@ -1,15 +1,9 @@ import type { Span } from '../../trace'; import { fetchAssets } from '../fetch-assets'; import { fastNormalizeDomain } from '../normalize-domain'; -import { processLine } from '../process-line'; import { onBlackFound } from './shared'; -function hostsLineCb(l: string, set: string[], includeAllSubDomain: boolean, meta: string) { - const line = processLine(l); - if (!line) { - return; - } - +function hostsLineCb(line: string, set: string[], includeAllSubDomain: boolean, meta: string) { const _domain = line.split(/\s/)[1]?.trim(); if (!_domain) { return; @@ -29,12 +23,10 @@ export function processHosts( hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false ) { return span.traceChildAsync(`process hosts: ${hostsUrl}`, async (span) => { - const text = await span.traceChild('download').traceAsyncFn(() => fetchAssets(hostsUrl, mirrors)); + const filterRules = await span.traceChild('download').traceAsyncFn(() => fetchAssets(hostsUrl, mirrors, true)); const domainSets: string[] = []; - const filterRules = text.split('\n'); - span.traceChild('parse hosts').traceSyncFn(() => { for (let i = 0, len = filterRules.length; i < len; i++) { hostsLineCb(filterRules[i], domainSets, includeAllSubDomain, hostsUrl); @@ -46,15 +38,13 @@ export function processHosts( } export function processHostsWithPreload(hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false) { - const downloadPromise = fetchAssets(hostsUrl, mirrors); + const downloadPromise = fetchAssets(hostsUrl, mirrors, true); return (span: Span) => span.traceChildAsync(`process hosts: ${hostsUrl}`, async (span) => { - const text = await span.traceChild('download').tracePromise(downloadPromise); + const filterRules = await span.traceChild('download').tracePromise(downloadPromise); const domainSets: string[] = []; - const filterRules = text.split('\n'); - span.traceChild('parse hosts').traceSyncFn(() => { for (let i = 0, len = filterRules.length; i < len; i++) { hostsLineCb(filterRules[i], domainSets, includeAllSubDomain, hostsUrl);