Perf: run filter TransformStream before parsing

This commit is contained in:
SukkaW
2025-07-01 00:46:04 +08:00
parent 9b17d81df1
commit 58aed6aa60
2 changed files with 100 additions and 36 deletions

View File

@@ -4,6 +4,7 @@ import { waitWithAbort } from 'foxts/wait';
import { nullthrow } from 'foxts/guard';
import { TextLineStream } from 'foxts/text-line-stream';
import { ProcessLineStream } from './process-line';
import { AdGuardFilterIgnoreUnsupportedLinesStream } from './parse-filter/filters';
// eslint-disable-next-line sukka/unicorn/custom-error-definition -- typescript is better
class CustomAbortError extends Error {
@@ -13,7 +14,10 @@ class CustomAbortError extends Error {
const reusedCustomAbortError = new CustomAbortError();
export async function fetchAssets(url: string, fallbackUrls: null | undefined | string[] | readonly string[], processLine = false, allowEmpty = false) {
export async function fetchAssets(
url: string, fallbackUrls: null | undefined | string[] | readonly string[],
processLine = false, allowEmpty = false, filterAdGuardUnsupportedLines = false
) {
const controller = new AbortController();
const createFetchFallbackPromise = async (url: string, index: number) => {
@@ -36,6 +40,9 @@ export async function fetchAssets(url: string, fallbackUrls: null | undefined |
if (processLine) {
stream = stream.pipeThrough(new ProcessLineStream());
}
if (filterAdGuardUnsupportedLines) {
stream = stream.pipeThrough(new AdGuardFilterIgnoreUnsupportedLinesStream());
}
const arr = await Array.fromAsync(stream);
if (arr.length < 1 && !allowEmpty) {

View File

@@ -29,7 +29,10 @@ export function processFilterRulesWithPreload(
fallbackUrls?: string[] | null,
includeThirdParty = false
) {
const downloadPromise = fetchAssets(filterRulesUrl, fallbackUrls);
const downloadPromise = fetchAssets(
filterRulesUrl, fallbackUrls,
true, false, true
);
return (span: Span) => span.traceChildAsync<
Record<
@@ -192,24 +195,31 @@ const kwfilter = createKeywordFilter([
'^popup'
]);
export function parse($line: string, result: [string, ParseType], includeThirdParty: boolean): [hostname: string, flag: ParseType] {
/**
* The idea is that, TransformStream works kinda like a filter running on response. If we
* can filter lines before Array.fromAsync, we can create a smaller array, this saves memory
* and could improve performance.
*/
export class AdGuardFilterIgnoreUnsupportedLinesStream extends TransformStream<string, string> {
// private __buf = '';
constructor() {
super({
transform(line, controller) {
if (
// doesn't include
!$line.includes('.') // rule with out dot can not be a domain
!line.includes('.') // rule with out dot can not be a domain
// includes
|| kwfilter($line)
// note that this can only excludes $redirect but not $4-,redirect, so we still need to parse it
|| kwfilter(line)
// note that this can only excludes $redirect but not $3p,redirect, so we still need to parse it
// this is only an early bail out
) {
result[1] = ParseType.Null;
return result;
return;
}
const line = $line.trim();
line = line.trim();
if (line.length === 0) {
result[1] = ParseType.Null;
return result;
return;
}
const firstCharCode = line.charCodeAt(0);
@@ -225,15 +235,62 @@ export function parse($line: string, result: [string, ParseType], includeThirdPa
|| lastCharCode === 45 // 45 `-`, line.endsWith('-')
|| lastCharCode === 95 // 95 `_`, line.endsWith('_')
) {
result[1] = ParseType.Null;
return result;
return;
}
if ((line.includes('/') || line.includes(':')) && !line.includes('://')) {
result[1] = ParseType.Null;
return result;
return;
}
controller.enqueue(line);
}
});
}
}
export function parse(line: string, result: [string, ParseType], includeThirdParty: boolean): [hostname: string, flag: ParseType] {
// We have already done this in AdGuardFilterIgnoreUnsupportedLinesStream
// if (
// // doesn't include
// !$line.includes('.') // rule with out dot can not be a domain
// // includes
// || kwfilter($line)
// // note that this can only excludes $redirect but not $3p,redirect, so we still need to parse it
// // this is only an early bail out
// ) {
// result[1] = ParseType.Null;
// return result;
// }
// const line = $line.trim();
// if (line.length === 0) {
// result[1] = ParseType.Null;
// return result;
// }
const firstCharCode = line.charCodeAt(0);
// const lastCharCode = line.charCodeAt(line.length - 1);
// if (
// firstCharCode === 47 // 47 `/`
// // ends with
// // _160-600.
// // -detect-adblock.
// // _web-advert.
// || lastCharCode === 46 // 46 `.`, line.endsWith('.')
// || lastCharCode === 45 // 45 `-`, line.endsWith('-')
// || lastCharCode === 95 // 95 `_`, line.endsWith('_')
// ) {
// result[1] = ParseType.Null;
// return result;
// }
// if ((line.includes('/') || line.includes(':')) && !line.includes('://')) {
// return;
// }
const filter = NetworkFilter.parse(line, false);
if (filter) {
if (