Perf: faster fetchAssets (without string and manual split)
Some checks are pending
Build / Build (push) Waiting to run
Build / Deploy to Cloudflare Pages (push) Blocked by required conditions
Build / Deploy to GitHub and GitLab (push) Blocked by required conditions

This commit is contained in:
SukkaW 2025-01-22 10:52:03 +08:00
parent d97a866352
commit 07419a7942
5 changed files with 28 additions and 40 deletions

View File

@ -4,7 +4,6 @@ import { createReadlineInterfaceFromResponse, readFileIntoProcessedArray } from
import { task } from './trace';
import { SHARED_DESCRIPTION } from './constants/description';
import { isProbablyIpv4, isProbablyIpv6 } from 'foxts/is-probably-ip';
import { processLine } from './lib/process-line';
import { RulesetOutput } from './lib/create-file';
import { SOURCE_DIR } from './constants/dir';
import { $$fetch } from './lib/fetch-retry';
@ -37,15 +36,12 @@ const BOTNET_FILTER_MIRROR_URL = [
// https://curbengh.github.io/malware-filter/botnet-filter-dnscrypt-blocked-ips.txt
];
const getBotNetFilterIPsPromise: Promise<[ipv4: string[], ipv6: string[]]> = fetchAssets(BOTNET_FILTER_URL, BOTNET_FILTER_MIRROR_URL).then(text => text.split('\n').reduce<[ipv4: string[], ipv6: string[]]>((acc, cur) => {
const ip = processLine(cur);
if (ip) {
const getBotNetFilterIPsPromise: Promise<[ipv4: string[], ipv6: string[]]> = fetchAssets(BOTNET_FILTER_URL, BOTNET_FILTER_MIRROR_URL, true).then(arr => arr.reduce<[ipv4: string[], ipv6: string[]]>((acc, ip) => {
if (isProbablyIpv4(ip)) {
acc[0].push(ip);
} else if (isProbablyIpv6(ip)) {
acc[1].push(ip);
}
}
return acc;
}, [[], []]));

View File

@ -1,6 +1,9 @@
import picocolors from 'picocolors';
import { $$fetch, defaultRequestInit, ResponseError } from './fetch-retry';
import { waitWithAbort } from 'foxts/wait';
import { nullthrow } from 'foxts/guard';
import { TextLineStream } from './text-line-transform-stream';
import { ProcessLineStream } from './process-line';
// eslint-disable-next-line sukka/unicorn/custom-error-definition -- typescript is better
export class CustomAbortError extends Error {
@ -26,7 +29,7 @@ export class CustomNoETagFallbackError extends Error {
}
}
export async function fetchAssets(url: string, fallbackUrls: null | undefined | string[] | readonly string[]) {
export async function fetchAssets(url: string, fallbackUrls: null | undefined | string[] | readonly string[], processLine = false) {
const controller = new AbortController();
const createFetchFallbackPromise = async (url: string, index: number) => {
@ -44,14 +47,19 @@ export async function fetchAssets(url: string, fallbackUrls: null | undefined |
throw new CustomAbortError();
}
const res = await $$fetch(url, { signal: controller.signal, ...defaultRequestInit });
const text = await res.text();
if (text.length < 2) {
let stream = nullthrow(res.body).pipeThrough(new TextDecoderStream()).pipeThrough(new TextLineStream());
if (processLine) {
stream = stream.pipeThrough(new ProcessLineStream());
}
const arr = await Array.fromAsync(stream);
if (arr.length < 1) {
throw new ResponseError(res, url, 'empty response w/o 304');
}
controller.abort();
return text;
return arr;
};
if (!fallbackUrls || fallbackUrls.length === 0) {

View File

@ -16,10 +16,7 @@ function domainListLineCb(l: string, set: string[], meta: string, normalizeDomai
set.push(domain);
}
function domainListLineCbIncludeAllSubdomain(l: string, set: string[], meta: string, normalizeDomain = fastNormalizeDomain) {
const line = processLine(l);
if (!line) return;
function domainListLineCbIncludeAllSubdomain(line: string, set: string[], meta: string, normalizeDomain = fastNormalizeDomain) {
const domain = normalizeDomain(line);
if (!domain) return;
@ -36,12 +33,12 @@ export function processDomainLists(
const lineCb = includeAllSubDomain ? domainListLineCbIncludeAllSubdomain : domainListLineCb;
return span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => {
const text = await span.traceChildAsync('download', () => fetchAssets(
const filterRules = await span.traceChildAsync('download', () => fetchAssets(
domainListsUrl,
mirrors
mirrors,
true
));
const domainSets: string[] = [];
const filterRules = text.split('\n');
span.traceChildSync('parse domain list', () => {
for (let i = 0, len = filterRules.length; i < len; i++) {
@ -59,13 +56,12 @@ export function processDomainListsWithPreload(
) {
const domainNormalizer = wwwToApex ? fastNormalizeDomainIgnoreWww : fastNormalizeDomain;
const downloadPromise = fetchAssets(domainListsUrl, mirrors);
const downloadPromise = fetchAssets(domainListsUrl, mirrors, true);
const lineCb = includeAllSubDomain ? domainListLineCbIncludeAllSubdomain : domainListLineCb;
return (span: Span) => span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => {
const text = await span.traceChildPromise('download', downloadPromise);
const filterRules = await span.traceChildPromise('download', downloadPromise);
const domainSets: string[] = [];
const filterRules = text.split('\n');
span.traceChildSync('parse domain list', () => {
for (let i = 0, len = filterRules.length; i < len; i++) {

View File

@ -28,7 +28,7 @@ export function processFilterRulesWithPreload(
const downloadPromise = fetchAssets(filterRulesUrl, fallbackUrls);
return (span: Span) => span.traceChildAsync<Record<'whiteDomains' | 'whiteDomainSuffixes' | 'blackDomains' | 'blackDomainSuffixes', string[]>>(`process filter rules: ${filterRulesUrl}`, async (span) => {
const text = await span.traceChildPromise('download', downloadPromise);
const filterRules = await span.traceChildPromise('download', downloadPromise);
const whiteDomains = new Set<string>();
const whiteDomainSuffixes = new Set<string>();
@ -82,8 +82,6 @@ export function processFilterRulesWithPreload(
}
};
const filterRules = text.split('\n');
span.traceChild('parse adguard filter').traceSyncFn(() => {
for (let i = 0, len = filterRules.length; i < len; i++) {
lineCb(filterRules[i]);

View File

@ -1,15 +1,9 @@
import type { Span } from '../../trace';
import { fetchAssets } from '../fetch-assets';
import { fastNormalizeDomain } from '../normalize-domain';
import { processLine } from '../process-line';
import { onBlackFound } from './shared';
function hostsLineCb(l: string, set: string[], includeAllSubDomain: boolean, meta: string) {
const line = processLine(l);
if (!line) {
return;
}
function hostsLineCb(line: string, set: string[], includeAllSubDomain: boolean, meta: string) {
const _domain = line.split(/\s/)[1]?.trim();
if (!_domain) {
return;
@ -29,12 +23,10 @@ export function processHosts(
hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false
) {
return span.traceChildAsync(`process hosts: ${hostsUrl}`, async (span) => {
const text = await span.traceChild('download').traceAsyncFn(() => fetchAssets(hostsUrl, mirrors));
const filterRules = await span.traceChild('download').traceAsyncFn(() => fetchAssets(hostsUrl, mirrors, true));
const domainSets: string[] = [];
const filterRules = text.split('\n');
span.traceChild('parse hosts').traceSyncFn(() => {
for (let i = 0, len = filterRules.length; i < len; i++) {
hostsLineCb(filterRules[i], domainSets, includeAllSubDomain, hostsUrl);
@ -46,15 +38,13 @@ export function processHosts(
}
export function processHostsWithPreload(hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false) {
const downloadPromise = fetchAssets(hostsUrl, mirrors);
const downloadPromise = fetchAssets(hostsUrl, mirrors, true);
return (span: Span) => span.traceChildAsync(`process hosts: ${hostsUrl}`, async (span) => {
const text = await span.traceChild('download').tracePromise(downloadPromise);
const filterRules = await span.traceChild('download').tracePromise(downloadPromise);
const domainSets: string[] = [];
const filterRules = text.split('\n');
span.traceChild('parse hosts').traceSyncFn(() => {
for (let i = 0, len = filterRules.length; i < len; i++) {
hostsLineCb(filterRules[i], domainSets, includeAllSubDomain, hostsUrl);