Perf: fast bail out filter parsing

This commit is contained in:
SukkaW
2024-02-03 21:36:30 +08:00
parent 067d19f488
commit 43562f62d4

View File

@@ -10,6 +10,7 @@ import { normalizeDomain } from './normalize-domain';
import { fetchAssets } from './fetch-assets'; import { fetchAssets } from './fetch-assets';
import { deserializeSet, fsFetchCache, serializeSet } from './cache-filesystem'; import { deserializeSet, fsFetchCache, serializeSet } from './cache-filesystem';
import type { Span } from '../trace'; import type { Span } from '../trace';
import createKeywordFilter from './aho-corasick';
const DEBUG_DOMAIN_TO_FIND: string | null = null; // example.com | null const DEBUG_DOMAIN_TO_FIND: string | null = null; // example.com | null
let foundDebugDomain = false; let foundDebugDomain = false;
@@ -241,25 +242,37 @@ export async function processFilterRules(
}; };
} }
const R_KNOWN_NOT_NETWORK_FILTER_PATTERN = /[#%&=~]/; // const R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2 = /(\$popup|\$removeparam|\$popunder|\$cname)/;
const R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2 = /(\$popup|\$removeparam|\$popunder|\$cname)/;
// cname exceptional filter can not be parsed by NetworkFilter // cname exceptional filter can not be parsed by NetworkFilter
// Surge / Clash can't handle CNAME either, so we just ignore them // Surge / Clash can't handle CNAME either, so we just ignore them
const kwfilter = createKeywordFilter([
'!',
'?',
'*',
'[',
'(',
']',
')',
',',
'#',
'%',
'&',
'=',
'~',
// special modifier
'$popup',
'$removeparam',
'$popunder',
'$cname'
]);
function parse($line: string, gorhill: PublicSuffixList): null | [hostname: string, flag: ParseType] { function parse($line: string, gorhill: PublicSuffixList): null | [hostname: string, flag: ParseType] {
if ( if (
// doesn't include // doesn't include
!$line.includes('.') // rule with out dot can not be a domain !$line.includes('.') // rule with out dot can not be a domain
// includes // includes
|| $line.includes('!') || kwfilter($line)
|| $line.includes('?')
|| $line.includes('*')
|| $line.includes('[')
|| $line.includes('(')
|| $line.includes(']')
|| $line.includes(')')
|| $line.includes(',')
|| R_KNOWN_NOT_NETWORK_FILTER_PATTERN.test($line)
) { ) {
return null; return null;
} }
@@ -281,8 +294,6 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri
|| lastCharCode === 46 // 46 `.`, line.endsWith('.') || lastCharCode === 46 // 46 `.`, line.endsWith('.')
|| lastCharCode === 45 // 45 `-`, line.endsWith('-') || lastCharCode === 45 // 45 `-`, line.endsWith('-')
|| lastCharCode === 95 // 95 `_`, line.endsWith('_') || lastCharCode === 95 // 95 `_`, line.endsWith('_')
// special modifier
|| R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2.test(line)
// || line.includes('$popup') // || line.includes('$popup')
// || line.includes('$removeparam') // || line.includes('$removeparam')
// || line.includes('$popunder') // || line.includes('$popunder')