mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-13 01:30:37 +08:00
Perf: faster adguard filter syntax parsing
This commit is contained in:
parent
f58c10e34c
commit
05ccd9fa50
@ -12,3 +12,8 @@ export const loosTldOptWithPrivateDomains: Parameters<typeof tldts.getSubdomain>
|
|||||||
...looseTldtsOpt,
|
...looseTldtsOpt,
|
||||||
allowPrivateDomains: true
|
allowPrivateDomains: true
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const normalizeTldtsOpt: Parameters<typeof tldts.getSubdomain>[1] = {
|
||||||
|
allowPrivateDomains: true
|
||||||
|
// detectIp: true
|
||||||
|
};
|
||||||
|
|||||||
@ -1,26 +1,32 @@
|
|||||||
// https://github.com/remusao/tldts/issues/2121
|
// https://github.com/remusao/tldts/issues/2121
|
||||||
// import tldts from 'tldts-experimental';
|
// import tldts from 'tldts-experimental';
|
||||||
import tldts from 'tldts';
|
import tldts from 'tldts';
|
||||||
export const normalizeDomain = (domain: string) => {
|
import { normalizeTldtsOpt } from '../constants/loose-tldts-opt';
|
||||||
if (!domain) return null;
|
|
||||||
|
type TldTsParsed = ReturnType<typeof tldts.parse>;
|
||||||
|
|
||||||
|
export const normalizeDomain = (domain: string, parsed: TldTsParsed | null = null) => {
|
||||||
|
if (domain.length === 0) return null;
|
||||||
|
|
||||||
|
parsed ??= tldts.parse(domain, normalizeTldtsOpt);
|
||||||
|
|
||||||
const parsed = tldts.parse(domain, { allowPrivateDomains: true, allowIcannDomains: true, detectIp: true });
|
|
||||||
if (parsed.isIp) return null;
|
if (parsed.isIp) return null;
|
||||||
if (!parsed.hostname) return null;
|
|
||||||
|
let h = parsed.hostname;
|
||||||
|
if (h === null) return null;
|
||||||
// Private invalid domain (things like .tor, .dn42, etc)
|
// Private invalid domain (things like .tor, .dn42, etc)
|
||||||
if (!parsed.isIcann && !parsed.isPrivate) return null;
|
if (!parsed.isIcann && !parsed.isPrivate) return null;
|
||||||
|
|
||||||
let h = parsed.hostname;
|
let sliceStart = 0;
|
||||||
|
let sliceEnd = 0;
|
||||||
let sliceStart: number | undefined;
|
|
||||||
let sliceEnd: number | undefined;
|
|
||||||
|
|
||||||
if (h[0] === '.') sliceStart = 1;
|
if (h[0] === '.') sliceStart = 1;
|
||||||
if (h.endsWith('.')) sliceEnd = -1;
|
// eslint-disable-next-line sukka/string/prefer-string-starts-ends-with -- performance
|
||||||
|
if (h[h.length - 1] === '.') sliceEnd = -1;
|
||||||
|
|
||||||
if (sliceStart !== undefined || sliceEnd !== undefined) {
|
if (sliceStart !== 0 || sliceEnd !== 0) {
|
||||||
h = h.slice(sliceStart, sliceEnd);
|
h = h.slice(sliceStart, sliceEnd);
|
||||||
}
|
}
|
||||||
|
|
||||||
return h || null;
|
return h.length > 0 ? h : null;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -1,12 +1,20 @@
|
|||||||
import { describe, it } from 'mocha';
|
import { describe, it } from 'mocha';
|
||||||
|
|
||||||
import { processFilterRules } from './parse-filter';
|
import { parse, processFilterRules, type ParseType } from './parse-filter';
|
||||||
import { createCacheKey } from './cache-filesystem';
|
import { createCacheKey } from './cache-filesystem';
|
||||||
import { createSpan } from '../trace';
|
import { createSpan } from '../trace';
|
||||||
|
|
||||||
const cacheKey = createCacheKey(__filename);
|
const cacheKey = createCacheKey(__filename);
|
||||||
|
|
||||||
describe('processFilterRules', () => {
|
describe('parse', () => {
|
||||||
|
const MUTABLE_PARSE_LINE_RESULT: [string, ParseType] = ['', 1000];
|
||||||
|
|
||||||
|
it('||top.mail.ru^$badfilter', () => {
|
||||||
|
console.log(parse('||top.mail.ru^$badfilter', MUTABLE_PARSE_LINE_RESULT));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe.skip('processFilterRules', () => {
|
||||||
it('https://filters.adtidy.org/extension/ublock/filters/18_optimized.txt', () => {
|
it('https://filters.adtidy.org/extension/ublock/filters/18_optimized.txt', () => {
|
||||||
console.log(processFilterRules(
|
console.log(processFilterRules(
|
||||||
createSpan('noop'),
|
createSpan('noop'),
|
||||||
|
|||||||
@ -143,6 +143,8 @@ const enum ParseType {
|
|||||||
Null = 1000
|
Null = 1000
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export { type ParseType };
|
||||||
|
|
||||||
export async function processFilterRules(
|
export async function processFilterRules(
|
||||||
parentSpan: Span,
|
parentSpan: Span,
|
||||||
filterRulesUrl: string,
|
filterRulesUrl: string,
|
||||||
@ -289,10 +291,12 @@ const kwfilter = createKeywordFilter([
|
|||||||
'$popup',
|
'$popup',
|
||||||
'$removeparam',
|
'$removeparam',
|
||||||
'$popunder',
|
'$popunder',
|
||||||
'$cname'
|
'$cname',
|
||||||
|
// some bad syntax
|
||||||
|
'^popup'
|
||||||
]);
|
]);
|
||||||
|
|
||||||
function parse($line: string, result: [string, ParseType]): [hostname: string, flag: ParseType] {
|
export function parse($line: string, result: [string, ParseType]): [hostname: string, flag: ParseType] {
|
||||||
if (
|
if (
|
||||||
// doesn't include
|
// doesn't include
|
||||||
!$line.includes('.') // rule with out dot can not be a domain
|
!$line.includes('.') // rule with out dot can not be a domain
|
||||||
@ -685,6 +689,7 @@ function parse($line: string, result: [string, ParseType]): [hostname: string, f
|
|||||||
*/
|
*/
|
||||||
let sliceStart = 0;
|
let sliceStart = 0;
|
||||||
let sliceEnd: number | undefined;
|
let sliceEnd: number | undefined;
|
||||||
|
|
||||||
if (lineStartsWithSingleDot) {
|
if (lineStartsWithSingleDot) {
|
||||||
sliceStart = 1;
|
sliceStart = 1;
|
||||||
}
|
}
|
||||||
@ -696,28 +701,17 @@ function parse($line: string, result: [string, ParseType]): [hostname: string, f
|
|||||||
line.endsWith('$document')
|
line.endsWith('$document')
|
||||||
) {
|
) {
|
||||||
sliceEnd = -9;
|
sliceEnd = -9;
|
||||||
|
} else if (line.endsWith('$badfilter')) {
|
||||||
|
sliceEnd = -10;
|
||||||
}
|
}
|
||||||
const sliced = (sliceStart !== 0 || sliceEnd !== undefined) ? line.slice(sliceStart, sliceEnd) : line;
|
const sliced = (sliceStart !== 0 || sliceEnd !== undefined) ? line.slice(sliceStart, sliceEnd) : line;
|
||||||
const suffix = tldts.getPublicSuffix(sliced, looseTldtsOpt);
|
|
||||||
/**
|
|
||||||
* Fast exclude definitely not domain-like resource
|
|
||||||
*
|
|
||||||
* `.gatracking.js`, suffix is `js`,
|
|
||||||
* `.ads.css`, suffix is `css`,
|
|
||||||
* `-cpm-ads.$badfilter`, suffix is `$badfilter`,
|
|
||||||
* `portal.librus.pl$$advertisement-module`, suffix is `pl$$advertisement-module`
|
|
||||||
*/
|
|
||||||
if (!suffix) {
|
|
||||||
// This exclude domain-like resource like `.gatracking.js`, `.beacon.min.js` and `.cookielaw.js`
|
|
||||||
result[1] = ParseType.Null;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
const tryNormalizeDomain = normalizeDomain(sliced);
|
const tryNormalizeDomain = normalizeDomain(sliced);
|
||||||
if (tryNormalizeDomain === sliced) {
|
if (tryNormalizeDomain === sliced) {
|
||||||
// the entire rule is domain
|
// the entire rule is domain
|
||||||
result[0] = sliced;
|
result[0] = sliced;
|
||||||
result[1] = ParseType.BlackIncludeSubdomain;
|
result[1] = ParseType.BlackIncludeSubdomain;
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user