diff --git a/Build/lib/fetch-assets.ts b/Build/lib/fetch-assets.ts index 61b9c88f..bb9e7c06 100644 --- a/Build/lib/fetch-assets.ts +++ b/Build/lib/fetch-assets.ts @@ -4,6 +4,7 @@ import { waitWithAbort } from 'foxts/wait'; import { nullthrow } from 'foxts/guard'; import { TextLineStream } from 'foxts/text-line-stream'; import { ProcessLineStream } from './process-line'; +import { AdGuardFilterIgnoreUnsupportedLinesStream } from './parse-filter/filters'; // eslint-disable-next-line sukka/unicorn/custom-error-definition -- typescript is better class CustomAbortError extends Error { @@ -13,7 +14,10 @@ class CustomAbortError extends Error { const reusedCustomAbortError = new CustomAbortError(); -export async function fetchAssets(url: string, fallbackUrls: null | undefined | string[] | readonly string[], processLine = false, allowEmpty = false) { +export async function fetchAssets( + url: string, fallbackUrls: null | undefined | string[] | readonly string[], + processLine = false, allowEmpty = false, filterAdGuardUnsupportedLines = false +) { const controller = new AbortController(); const createFetchFallbackPromise = async (url: string, index: number) => { @@ -36,6 +40,9 @@ export async function fetchAssets(url: string, fallbackUrls: null | undefined | if (processLine) { stream = stream.pipeThrough(new ProcessLineStream()); } + if (filterAdGuardUnsupportedLines) { + stream = stream.pipeThrough(new AdGuardFilterIgnoreUnsupportedLinesStream()); + } const arr = await Array.fromAsync(stream); if (arr.length < 1 && !allowEmpty) { diff --git a/Build/lib/parse-filter/filters.ts b/Build/lib/parse-filter/filters.ts index f743b1bc..6aa706b1 100644 --- a/Build/lib/parse-filter/filters.ts +++ b/Build/lib/parse-filter/filters.ts @@ -29,7 +29,10 @@ export function processFilterRulesWithPreload( fallbackUrls?: string[] | null, includeThirdParty = false ) { - const downloadPromise = fetchAssets(filterRulesUrl, fallbackUrls); + const downloadPromise = fetchAssets( + filterRulesUrl, fallbackUrls, + true, false, true + ); return (span: Span) => span.traceChildAsync< Record< @@ -192,47 +195,101 @@ const kwfilter = createKeywordFilter([ '^popup' ]); -export function parse($line: string, result: [string, ParseType], includeThirdParty: boolean): [hostname: string, flag: ParseType] { - if ( - // doesn't include - !$line.includes('.') // rule with out dot can not be a domain - // includes - || kwfilter($line) - // note that this can only excludes $redirect but not $4-,redirect, so we still need to parse it - // this is only an early bail out - ) { - result[1] = ParseType.Null; - return result; - } +/** + * The idea is that, TransformStream works kinda like a filter running on response. If we + * can filter lines before Array.fromAsync, we can create a smaller array, this saves memory + * and could improve performance. + */ +export class AdGuardFilterIgnoreUnsupportedLinesStream extends TransformStream { + // private __buf = ''; + constructor() { + super({ + transform(line, controller) { + if ( + // doesn't include + !line.includes('.') // rule with out dot can not be a domain + // includes + || kwfilter(line) + // note that this can only excludes $redirect but not $3p,redirect, so we still need to parse it + // this is only an early bail out + ) { + return; + } - const line = $line.trim(); + line = line.trim(); - if (line.length === 0) { - result[1] = ParseType.Null; - return result; + if (line.length === 0) { + return; + } + + const firstCharCode = line.charCodeAt(0); + const lastCharCode = line.charCodeAt(line.length - 1); + + if ( + firstCharCode === 47 // 47 `/` + // ends with + // _160-600. + // -detect-adblock. + // _web-advert. + || lastCharCode === 46 // 46 `.`, line.endsWith('.') + || lastCharCode === 45 // 45 `-`, line.endsWith('-') + || lastCharCode === 95 // 95 `_`, line.endsWith('_') + ) { + return; + } + + if ((line.includes('/') || line.includes(':')) && !line.includes('://')) { + return; + } + + controller.enqueue(line); + } + }); } +} + +export function parse(line: string, result: [string, ParseType], includeThirdParty: boolean): [hostname: string, flag: ParseType] { + // We have already done this in AdGuardFilterIgnoreUnsupportedLinesStream + + // if ( + // // doesn't include + // !$line.includes('.') // rule with out dot can not be a domain + // // includes + // || kwfilter($line) + // // note that this can only excludes $redirect but not $3p,redirect, so we still need to parse it + // // this is only an early bail out + // ) { + // result[1] = ParseType.Null; + // return result; + // } + + // const line = $line.trim(); + + // if (line.length === 0) { + // result[1] = ParseType.Null; + // return result; + // } const firstCharCode = line.charCodeAt(0); - const lastCharCode = line.charCodeAt(line.length - 1); + // const lastCharCode = line.charCodeAt(line.length - 1); - if ( - firstCharCode === 47 // 47 `/` - // ends with - // _160-600. - // -detect-adblock. - // _web-advert. - || lastCharCode === 46 // 46 `.`, line.endsWith('.') - || lastCharCode === 45 // 45 `-`, line.endsWith('-') - || lastCharCode === 95 // 95 `_`, line.endsWith('_') - ) { - result[1] = ParseType.Null; - return result; - } + // if ( + // firstCharCode === 47 // 47 `/` + // // ends with + // // _160-600. + // // -detect-adblock. + // // _web-advert. + // || lastCharCode === 46 // 46 `.`, line.endsWith('.') + // || lastCharCode === 45 // 45 `-`, line.endsWith('-') + // || lastCharCode === 95 // 95 `_`, line.endsWith('_') + // ) { + // result[1] = ParseType.Null; + // return result; + // } - if ((line.includes('/') || line.includes(':')) && !line.includes('://')) { - result[1] = ParseType.Null; - return result; - } + // if ((line.includes('/') || line.includes(':')) && !line.includes('://')) { + // return; + // } const filter = NetworkFilter.parse(line, false); if (filter) {