mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 09:10:35 +08:00
Perf: make reject parsing faster & more robust
This commit is contained in:
parent
e2f14d93b8
commit
91df00f7f3
@ -5,7 +5,7 @@ import { processLine } from './process-line';
|
|||||||
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
|
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
|
||||||
import type { PublicSuffixList } from '@gorhill/publicsuffixlist';
|
import type { PublicSuffixList } from '@gorhill/publicsuffixlist';
|
||||||
|
|
||||||
import { traceAsync } from './trace-runner';
|
import { traceAsync, traceSync } from './trace-runner';
|
||||||
import picocolors from 'picocolors';
|
import picocolors from 'picocolors';
|
||||||
import { normalizeDomain } from './normalize-domain';
|
import { normalizeDomain } from './normalize-domain';
|
||||||
import { fetchAssets } from './fetch-assets';
|
import { fetchAssets } from './fetch-assets';
|
||||||
@ -156,9 +156,13 @@ export async function processFilterRules(
|
|||||||
() => fetchAssets(filterRulesUrl, fallbackUrls),
|
() => fetchAssets(filterRulesUrl, fallbackUrls),
|
||||||
picocolors.gray
|
picocolors.gray
|
||||||
)).split('\n');
|
)).split('\n');
|
||||||
|
|
||||||
|
const key = picocolors.gray(`- parse adguard filter ${filterRulesUrl}`);
|
||||||
|
console.time(key);
|
||||||
for (let i = 0, len = filterRules.length; i < len; i++) {
|
for (let i = 0, len = filterRules.length; i < len; i++) {
|
||||||
lineCb(filterRules[i]);
|
lineCb(filterRules[i]);
|
||||||
}
|
}
|
||||||
|
console.timeEnd(key);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -305,17 +309,19 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** @example line.endsWith('^') */
|
/** @example line.endsWith('^') */
|
||||||
const linedEndsWithCaret = lastCharCode === 94; // lastChar === '^';
|
const lineEndsWithCaret = lastCharCode === 94; // lastChar === '^';
|
||||||
/** @example line.endsWith('^|') */
|
/** @example line.endsWith('^|') */
|
||||||
const lineEndsWithCaretVerticalBar = (lastCharCode === 124 /** lastChar === '|' */) && line[len - 2] === '^';
|
const lineEndsWithCaretVerticalBar = (lastCharCode === 124 /** lastChar === '|' */) && line[len - 2] === '^';
|
||||||
/** @example line.endsWith('^') || line.endsWith('^|') */
|
/** @example line.endsWith('^') || line.endsWith('^|') */
|
||||||
const lineEndsWithCaretOrCaretVerticalBar = linedEndsWithCaret || lineEndsWithCaretVerticalBar;
|
const lineEndsWithCaretOrCaretVerticalBar = lineEndsWithCaret || lineEndsWithCaretVerticalBar;
|
||||||
|
|
||||||
// whitelist (exception)
|
// whitelist (exception)
|
||||||
if (
|
if (
|
||||||
firstCharCode === 64 // 64 `@`
|
firstCharCode === 64 // 64 `@`
|
||||||
&& line[1] === '@'
|
&& line[1] === '@'
|
||||||
) {
|
) {
|
||||||
|
let whiteIncludeAllSubDomain = true;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Some "malformed" regex-based filters can not be parsed by NetworkFilter
|
* Some "malformed" regex-based filters can not be parsed by NetworkFilter
|
||||||
* "$genericblock`" is also not supported by NetworkFilter, see:
|
* "$genericblock`" is also not supported by NetworkFilter, see:
|
||||||
@ -331,22 +337,27 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri
|
|||||||
let sliceStart = 0;
|
let sliceStart = 0;
|
||||||
let sliceEnd: number | undefined;
|
let sliceEnd: number | undefined;
|
||||||
|
|
||||||
// line.startsWith('@@|') || line.startsWith('@@.')
|
if (line[2] === '|') { // line.startsWith('@@|')
|
||||||
if (line[2] === '|' || line[2] === '.') {
|
|
||||||
sliceStart = 3;
|
sliceStart = 3;
|
||||||
// line.startsWith('@@||')
|
whiteIncludeAllSubDomain = false;
|
||||||
if (line[3] === '|') {
|
|
||||||
sliceStart = 4;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
if (line[3] === '|') { // line.startsWith('@@||')
|
||||||
* line.startsWith('@@://')
|
sliceStart = 4;
|
||||||
*
|
whiteIncludeAllSubDomain = true;
|
||||||
* `@@://googleadservices.com^|`
|
}
|
||||||
* `@@://www.googleadservices.com^|`
|
} else if (line[2] === '.') { // line.startsWith('@@.')
|
||||||
*/
|
sliceStart = 3;
|
||||||
if (line[2] === ':' && line[3] === '/' && line[4] === '/') {
|
whiteIncludeAllSubDomain = true;
|
||||||
|
} else if (
|
||||||
|
/**
|
||||||
|
* line.startsWith('@@://')
|
||||||
|
*
|
||||||
|
* `@@://googleadservices.com^|`
|
||||||
|
* `@@://www.googleadservices.com^|`
|
||||||
|
*/
|
||||||
|
line[2] === ':' && line[3] === '/' && line[4] === '/'
|
||||||
|
) {
|
||||||
|
whiteIncludeAllSubDomain = false;
|
||||||
sliceStart = 5;
|
sliceStart = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -368,7 +379,7 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri
|
|||||||
const sliced = line.slice(sliceStart, sliceEnd);
|
const sliced = line.slice(sliceStart, sliceEnd);
|
||||||
const domain = normalizeDomain(sliced);
|
const domain = normalizeDomain(sliced);
|
||||||
if (domain) {
|
if (domain) {
|
||||||
return [domain, ParseType.WhiteIncludeSubdomain];
|
return [domain, whiteIncludeAllSubDomain ? ParseType.WhiteIncludeSubdomain : ParseType.WhiteAbsolute];
|
||||||
}
|
}
|
||||||
return [
|
return [
|
||||||
`[parse-filter E0001] (white) invalid domain: ${JSON.stringify({
|
`[parse-filter E0001] (white) invalid domain: ${JSON.stringify({
|
||||||
@ -386,40 +397,39 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri
|
|||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (firstCharCode === 124) { // 124 `|`
|
if (
|
||||||
if (lineEndsWithCaretOrCaretVerticalBar) {
|
// 124 `|`
|
||||||
/**
|
// line.startsWith('|')
|
||||||
* Some malformed filters can not be parsed by NetworkFilter:
|
firstCharCode === 124
|
||||||
*
|
&& lineEndsWithCaretOrCaretVerticalBar
|
||||||
* `||smetrics.teambeachbody.com^.com^`
|
) {
|
||||||
* `||solutions.|pages.indigovision.com^`
|
/**
|
||||||
* `||vystar..0rg@client.iebetanialaargentina.edu.co^`
|
* Some malformed filters can not be parsed by NetworkFilter:
|
||||||
* `app-uat.latrobehealth.com.au^predirect.snapdeal.com`
|
*
|
||||||
*/
|
* `||smetrics.teambeachbody.com^.com^`
|
||||||
|
* `||solutions.|pages.indigovision.com^`
|
||||||
|
* `||vystar..0rg@client.iebetanialaargentina.edu.co^`
|
||||||
|
* `app-uat.latrobehealth.com.au^predirect.snapdeal.com`
|
||||||
|
*/
|
||||||
|
|
||||||
const includeAllSubDomain = line[1] === '|';
|
const includeAllSubDomain = line[1] === '|';
|
||||||
|
|
||||||
const sliceStart = includeAllSubDomain ? 2 : 1;
|
const sliceStart = includeAllSubDomain ? 2 : 1;
|
||||||
const sliceEnd = lastCharCode === 94 // lastChar === '^'
|
const sliceEnd = lineEndsWithCaret
|
||||||
? -1
|
? -1
|
||||||
: (lineEndsWithCaretVerticalBar
|
: (lineEndsWithCaretVerticalBar ? -2 : undefined);
|
||||||
? -2
|
|
||||||
: undefined);
|
|
||||||
|
|
||||||
const _domain = line
|
const sliced = line.slice(sliceStart, sliceEnd); // we already make sure line startsWith "|"
|
||||||
.slice(sliceStart, sliceEnd) // we already make sure line startsWith "|"
|
|
||||||
.trim();
|
|
||||||
|
|
||||||
const domain = normalizeDomain(_domain);
|
const domain = normalizeDomain(sliced);
|
||||||
if (domain) {
|
if (domain) {
|
||||||
return [domain, includeAllSubDomain ? ParseType.BlackIncludeSubdomain : ParseType.BlackAbsolute];
|
return [domain, includeAllSubDomain ? ParseType.BlackIncludeSubdomain : ParseType.BlackAbsolute];
|
||||||
}
|
|
||||||
|
|
||||||
return [
|
|
||||||
`[parse-filter E0002] (black) invalid domain: ${_domain}`,
|
|
||||||
ParseType.ErrorMessage
|
|
||||||
];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return [
|
||||||
|
`[parse-filter E0002] (black) invalid domain: ${sliced}`,
|
||||||
|
ParseType.ErrorMessage
|
||||||
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
const lineStartsWithSingleDot = firstCharCode === 46; // 46 `.`
|
const lineStartsWithSingleDot = firstCharCode === 46; // 46 `.`
|
||||||
@ -432,75 +442,78 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri
|
|||||||
* `.m.bookben.com^`
|
* `.m.bookben.com^`
|
||||||
* `.wap.x4399.com^`
|
* `.wap.x4399.com^`
|
||||||
*/
|
*/
|
||||||
const _domain = line.slice(
|
const sliced = line.slice(
|
||||||
1, // remove prefix dot
|
1, // remove prefix dot
|
||||||
linedEndsWithCaret // replaceAll('^', '')
|
lineEndsWithCaret // replaceAll('^', '')
|
||||||
? -1
|
? -1
|
||||||
: (lineEndsWithCaretVerticalBar ? -2 : 0) // replace('^|', '')
|
: (lineEndsWithCaretVerticalBar ? -2 : undefined) // replace('^|', '')
|
||||||
);
|
);
|
||||||
|
|
||||||
const suffix = gorhill.getPublicSuffix(_domain);
|
const suffix = gorhill.getPublicSuffix(sliced);
|
||||||
if (!gorhill.suffixInPSL(suffix)) {
|
if (!gorhill.suffixInPSL(suffix)) {
|
||||||
// This exclude domain-like resource like `1.1.4.514.js`
|
// This exclude domain-like resource like `1.1.4.514.js`
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
const domain = normalizeDomain(_domain);
|
const domain = normalizeDomain(sliced);
|
||||||
if (domain) {
|
if (domain) {
|
||||||
return [domain, ParseType.BlackIncludeSubdomain];
|
return [domain, ParseType.BlackIncludeSubdomain];
|
||||||
}
|
}
|
||||||
|
|
||||||
return [
|
return [
|
||||||
`[paparse-filter E0003] (black) invalid domain: ${_domain}`,
|
`[paparse-filter E0003] (black) invalid domain: ${sliced}`,
|
||||||
ParseType.ErrorMessage
|
ParseType.ErrorMessage
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* `|http://x.o2.pl^`
|
* `|http://x.o2.pl^`
|
||||||
* `://mine.torrent.pw^`
|
* `://mine.torrent.pw^`
|
||||||
* `://say.ac^`
|
* `://say.ac^`
|
||||||
*/
|
*/
|
||||||
if (
|
if (lineEndsWithCaretOrCaretVerticalBar) {
|
||||||
(
|
let sliceStart = 0;
|
||||||
line.startsWith('://')
|
let sliceEnd;
|
||||||
|| line.startsWith('http://')
|
if (lineEndsWithCaret) { // line.endsWith('^')
|
||||||
|| line.startsWith('https://')
|
sliceEnd = -1;
|
||||||
|| line.startsWith('|http://')
|
} else if (lineEndsWithCaretVerticalBar) { // line.endsWith('^|')
|
||||||
|| line.startsWith('|https://')
|
sliceEnd = -2;
|
||||||
)
|
}
|
||||||
&& lineEndsWithCaretOrCaretVerticalBar
|
if (line.startsWith('://')) {
|
||||||
) {
|
sliceStart = 3;
|
||||||
const _domain = line
|
} else if (line.startsWith('http://')) {
|
||||||
.replace('|https://', '')
|
sliceStart = 7;
|
||||||
.replace('https://', '')
|
} else if (line.startsWith('https://')) {
|
||||||
.replace('|http://', '')
|
sliceStart = 8;
|
||||||
.replace('http://', '')
|
} else if (line.startsWith('|http://')) {
|
||||||
.replace('://', '')
|
sliceStart = 8;
|
||||||
.replace('^|', '')
|
} else if (line.startsWith('|https://')) {
|
||||||
.replaceAll('^', '')
|
sliceStart = 9;
|
||||||
.trim();
|
|
||||||
|
|
||||||
const domain = normalizeDomain(_domain);
|
|
||||||
if (domain) {
|
|
||||||
return [domain, ParseType.BlackAbsolute];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return [
|
if (sliceStart !== 0 || sliceEnd !== undefined) {
|
||||||
`[parse-filter E0004] (black) invalid domain: ${_domain}`,
|
const sliced = line.slice(sliceStart, sliceEnd);
|
||||||
ParseType.ErrorMessage
|
const domain = normalizeDomain(sliced);
|
||||||
];
|
if (domain) {
|
||||||
|
return [domain, ParseType.BlackIncludeSubdomain];
|
||||||
|
}
|
||||||
|
return [
|
||||||
|
`[parse-filter E0004] (black) invalid domain: ${JSON.stringify({
|
||||||
|
line, sliced, sliceStart, sliceEnd
|
||||||
|
})}`,
|
||||||
|
ParseType.ErrorMessage
|
||||||
|
];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* `_vmind.qqvideo.tc.qq.com^`
|
* `_vmind.qqvideo.tc.qq.com^`
|
||||||
* `arketing.indianadunes.com^`
|
* `arketing.indianadunes.com^`
|
||||||
* `charlestownwyllie.oaklawnnonantum.com^`
|
* `charlestownwyllie.oaklawnnonantum.com^`
|
||||||
* `-telemetry.officeapps.live.com^`
|
* `-telemetry.officeapps.live.com^`
|
||||||
* `-tracker.biliapi.net`
|
* `-tracker.biliapi.net`
|
||||||
* `-logging.nextmedia.com`
|
* `-logging.nextmedia.com`
|
||||||
* `_social_tracking.js^`
|
* `_social_tracking.js^`
|
||||||
*/
|
*/
|
||||||
if (
|
if (
|
||||||
firstCharCode !== 124 // 124 `|`
|
firstCharCode !== 124 // 124 `|`
|
||||||
&& lastCharCode === 94 // 94 `^`
|
&& lastCharCode === 94 // 94 `^`
|
||||||
@ -524,43 +537,62 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri
|
|||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Possibly that entire rule is domain
|
||||||
|
|
||||||
|
/**
|
||||||
|
* lineStartsWithSingleDot:
|
||||||
|
*
|
||||||
|
* `.cookielaw.js`
|
||||||
|
* `.content_tracking.js`
|
||||||
|
* `.ads.css`
|
||||||
|
*
|
||||||
|
* else:
|
||||||
|
*
|
||||||
|
* `_prebid.js`
|
||||||
|
* `t.yesware.com`
|
||||||
|
* `ubmcmm.baidustatic.com`
|
||||||
|
* `://www.smfg-card.$document`
|
||||||
|
* `portal.librus.pl$$advertisement-module`
|
||||||
|
* `@@-ds.metric.gstatic.com^|`
|
||||||
|
* `://gom.ge/cookie.js`
|
||||||
|
* `://accout-update-smba.jp.$document`
|
||||||
|
* `_200x250.png`
|
||||||
|
* `@@://www.liquidweb.com/kb/wp-content/themes/lw-kb-theme/images/ads/vps-sidebar.jpg`
|
||||||
|
*/
|
||||||
|
let sliceStart = 0;
|
||||||
|
let sliceEnd: number | undefined;
|
||||||
if (lineStartsWithSingleDot) {
|
if (lineStartsWithSingleDot) {
|
||||||
/**
|
sliceStart = 1;
|
||||||
* `.cookielaw.js`
|
}
|
||||||
* `.content_tracking.js`
|
if (line.endsWith('^$all')) { // This salvage line `thepiratebay3.com^$all`
|
||||||
* `.ads.css`
|
sliceEnd = -5;
|
||||||
*/
|
} else if (
|
||||||
const _domain = line.slice(1);
|
// Try to salvage line like `://account.smba.$document`
|
||||||
|
// For this specific line, it will fail anyway though.
|
||||||
|
line.endsWith('$document')
|
||||||
|
) {
|
||||||
|
sliceEnd = -9;
|
||||||
|
}
|
||||||
|
const sliced = (sliceStart !== 0 || sliceEnd !== undefined) ? line.slice(sliceStart, sliceEnd) : line;
|
||||||
|
const suffix = gorhill.getPublicSuffix(sliced);
|
||||||
|
/**
|
||||||
|
* Fast exclude definitely not domain-like resource
|
||||||
|
*
|
||||||
|
* `.gatracking.js`, suffix is `js`,
|
||||||
|
* `.ads.css`, suffix is `css`,
|
||||||
|
* `-cpm-ads.$badfilter`, suffix is `$badfilter`,
|
||||||
|
* `portal.librus.pl$$advertisement-module`, suffix is `pl$$advertisement-module`
|
||||||
|
*/
|
||||||
|
if (!suffix || !gorhill.suffixInPSL(suffix)) {
|
||||||
|
// This exclude domain-like resource like `.gatracking.js`, `.beacon.min.js` and `.cookielaw.js`
|
||||||
|
console.log({ line, suffix });
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
const suffix = gorhill.getPublicSuffix(_domain);
|
const tryNormalizeDomain = normalizeDomain(sliced);
|
||||||
if (!suffix || !gorhill.suffixInPSL(suffix)) {
|
if (tryNormalizeDomain === sliced) {
|
||||||
// This exclude domain-like resource like `.gatracking.js`, `.beacon.min.js` and `.cookielaw.js`
|
// the entire rule is domain
|
||||||
return null;
|
return [sliced, ParseType.BlackIncludeSubdomain];
|
||||||
}
|
|
||||||
|
|
||||||
const tryNormalizeDomain = normalizeDomain(_domain);
|
|
||||||
if (tryNormalizeDomain === _domain) {
|
|
||||||
// the entire rule is domain
|
|
||||||
return [line, ParseType.BlackIncludeSubdomain];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/**
|
|
||||||
* `_prebid.js`
|
|
||||||
* `t.yesware.com`
|
|
||||||
* `ubmcmm.baidustatic.com`
|
|
||||||
* `://www.smfg-card.$document`
|
|
||||||
* `portal.librus.pl$$advertisement-module`
|
|
||||||
* `@@-ds.metric.gstatic.com^|`
|
|
||||||
* `://gom.ge/cookie.js`
|
|
||||||
* `://accout-update-smba.jp.$document`
|
|
||||||
* `_200x250.png`
|
|
||||||
* `@@://www.liquidweb.com/kb/wp-content/themes/lw-kb-theme/images/ads/vps-sidebar.jpg`
|
|
||||||
*/
|
|
||||||
const tryNormalizeDomain = normalizeDomain(line);
|
|
||||||
if (tryNormalizeDomain === line) {
|
|
||||||
// the entire rule is domain
|
|
||||||
return [line, ParseType.BlackIncludeSubdomain];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return [
|
return [
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user