mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 09:10:35 +08:00
Perf: faster adguard filter syntax parsing
This commit is contained in:
parent
f58c10e34c
commit
05ccd9fa50
@ -12,3 +12,8 @@ export const loosTldOptWithPrivateDomains: Parameters<typeof tldts.getSubdomain>
|
||||
...looseTldtsOpt,
|
||||
allowPrivateDomains: true
|
||||
};
|
||||
|
||||
export const normalizeTldtsOpt: Parameters<typeof tldts.getSubdomain>[1] = {
|
||||
allowPrivateDomains: true
|
||||
// detectIp: true
|
||||
};
|
||||
|
||||
@ -1,26 +1,32 @@
|
||||
// https://github.com/remusao/tldts/issues/2121
|
||||
// import tldts from 'tldts-experimental';
|
||||
import tldts from 'tldts';
|
||||
export const normalizeDomain = (domain: string) => {
|
||||
if (!domain) return null;
|
||||
import { normalizeTldtsOpt } from '../constants/loose-tldts-opt';
|
||||
|
||||
type TldTsParsed = ReturnType<typeof tldts.parse>;
|
||||
|
||||
export const normalizeDomain = (domain: string, parsed: TldTsParsed | null = null) => {
|
||||
if (domain.length === 0) return null;
|
||||
|
||||
parsed ??= tldts.parse(domain, normalizeTldtsOpt);
|
||||
|
||||
const parsed = tldts.parse(domain, { allowPrivateDomains: true, allowIcannDomains: true, detectIp: true });
|
||||
if (parsed.isIp) return null;
|
||||
if (!parsed.hostname) return null;
|
||||
|
||||
let h = parsed.hostname;
|
||||
if (h === null) return null;
|
||||
// Private invalid domain (things like .tor, .dn42, etc)
|
||||
if (!parsed.isIcann && !parsed.isPrivate) return null;
|
||||
|
||||
let h = parsed.hostname;
|
||||
|
||||
let sliceStart: number | undefined;
|
||||
let sliceEnd: number | undefined;
|
||||
let sliceStart = 0;
|
||||
let sliceEnd = 0;
|
||||
|
||||
if (h[0] === '.') sliceStart = 1;
|
||||
if (h.endsWith('.')) sliceEnd = -1;
|
||||
// eslint-disable-next-line sukka/string/prefer-string-starts-ends-with -- performance
|
||||
if (h[h.length - 1] === '.') sliceEnd = -1;
|
||||
|
||||
if (sliceStart !== undefined || sliceEnd !== undefined) {
|
||||
if (sliceStart !== 0 || sliceEnd !== 0) {
|
||||
h = h.slice(sliceStart, sliceEnd);
|
||||
}
|
||||
|
||||
return h || null;
|
||||
return h.length > 0 ? h : null;
|
||||
};
|
||||
|
||||
@ -1,12 +1,20 @@
|
||||
import { describe, it } from 'mocha';
|
||||
|
||||
import { processFilterRules } from './parse-filter';
|
||||
import { parse, processFilterRules, type ParseType } from './parse-filter';
|
||||
import { createCacheKey } from './cache-filesystem';
|
||||
import { createSpan } from '../trace';
|
||||
|
||||
const cacheKey = createCacheKey(__filename);
|
||||
|
||||
describe('processFilterRules', () => {
|
||||
describe('parse', () => {
|
||||
const MUTABLE_PARSE_LINE_RESULT: [string, ParseType] = ['', 1000];
|
||||
|
||||
it('||top.mail.ru^$badfilter', () => {
|
||||
console.log(parse('||top.mail.ru^$badfilter', MUTABLE_PARSE_LINE_RESULT));
|
||||
});
|
||||
});
|
||||
|
||||
describe.skip('processFilterRules', () => {
|
||||
it('https://filters.adtidy.org/extension/ublock/filters/18_optimized.txt', () => {
|
||||
console.log(processFilterRules(
|
||||
createSpan('noop'),
|
||||
|
||||
@ -143,6 +143,8 @@ const enum ParseType {
|
||||
Null = 1000
|
||||
}
|
||||
|
||||
export { type ParseType };
|
||||
|
||||
export async function processFilterRules(
|
||||
parentSpan: Span,
|
||||
filterRulesUrl: string,
|
||||
@ -289,10 +291,12 @@ const kwfilter = createKeywordFilter([
|
||||
'$popup',
|
||||
'$removeparam',
|
||||
'$popunder',
|
||||
'$cname'
|
||||
'$cname',
|
||||
// some bad syntax
|
||||
'^popup'
|
||||
]);
|
||||
|
||||
function parse($line: string, result: [string, ParseType]): [hostname: string, flag: ParseType] {
|
||||
export function parse($line: string, result: [string, ParseType]): [hostname: string, flag: ParseType] {
|
||||
if (
|
||||
// doesn't include
|
||||
!$line.includes('.') // rule with out dot can not be a domain
|
||||
@ -685,6 +689,7 @@ function parse($line: string, result: [string, ParseType]): [hostname: string, f
|
||||
*/
|
||||
let sliceStart = 0;
|
||||
let sliceEnd: number | undefined;
|
||||
|
||||
if (lineStartsWithSingleDot) {
|
||||
sliceStart = 1;
|
||||
}
|
||||
@ -696,28 +701,17 @@ function parse($line: string, result: [string, ParseType]): [hostname: string, f
|
||||
line.endsWith('$document')
|
||||
) {
|
||||
sliceEnd = -9;
|
||||
} else if (line.endsWith('$badfilter')) {
|
||||
sliceEnd = -10;
|
||||
}
|
||||
const sliced = (sliceStart !== 0 || sliceEnd !== undefined) ? line.slice(sliceStart, sliceEnd) : line;
|
||||
const suffix = tldts.getPublicSuffix(sliced, looseTldtsOpt);
|
||||
/**
|
||||
* Fast exclude definitely not domain-like resource
|
||||
*
|
||||
* `.gatracking.js`, suffix is `js`,
|
||||
* `.ads.css`, suffix is `css`,
|
||||
* `-cpm-ads.$badfilter`, suffix is `$badfilter`,
|
||||
* `portal.librus.pl$$advertisement-module`, suffix is `pl$$advertisement-module`
|
||||
*/
|
||||
if (!suffix) {
|
||||
// This exclude domain-like resource like `.gatracking.js`, `.beacon.min.js` and `.cookielaw.js`
|
||||
result[1] = ParseType.Null;
|
||||
return result;
|
||||
}
|
||||
|
||||
const tryNormalizeDomain = normalizeDomain(sliced);
|
||||
if (tryNormalizeDomain === sliced) {
|
||||
// the entire rule is domain
|
||||
result[0] = sliced;
|
||||
result[1] = ParseType.BlackIncludeSubdomain;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user