mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 17:20:35 +08:00
Refactor/Perf: rewrite how rules are salvaged
This commit is contained in:
parent
ff6db02b99
commit
2643903b24
@ -300,7 +300,7 @@ export function parse($line: string, result: [string, ParseType], allowThirdPart
|
||||
return result;
|
||||
}
|
||||
|
||||
let line = $line.trim();
|
||||
const line = $line.trim();
|
||||
|
||||
if (line.length === 0) {
|
||||
result[1] = ParseType.Null;
|
||||
@ -308,11 +308,14 @@ export function parse($line: string, result: [string, ParseType], allowThirdPart
|
||||
}
|
||||
|
||||
const firstCharCode = line.charCodeAt(0);
|
||||
let lastCharCode = line.charCodeAt(line.length - 1);
|
||||
const lastCharCode = line.charCodeAt(line.length - 1);
|
||||
|
||||
if (
|
||||
firstCharCode === 47 // 47 `/`
|
||||
// ends with
|
||||
// _160-600.
|
||||
// -detect-adblock.
|
||||
// _web-advert.
|
||||
|| lastCharCode === 46 // 46 `.`, line.endsWith('.')
|
||||
|| lastCharCode === 45 // 45 `-`, line.endsWith('-')
|
||||
|| lastCharCode === 95 // 95 `_`, line.endsWith('_')
|
||||
@ -405,46 +408,29 @@ export function parse($line: string, result: [string, ParseType], allowThirdPart
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* From now on, we are mostly facing non-standard domain rules (some are regex like)
|
||||
*
|
||||
* We can still salvage some of them by removing modifiers
|
||||
*/
|
||||
|
||||
let sliceStart = 0;
|
||||
let sliceEnd = 0;
|
||||
|
||||
// After NetworkFilter.parse, it means the line can not be parsed by cliqz NetworkFilter
|
||||
// We now need to "salvage" the line as much as possible
|
||||
|
||||
/*
|
||||
* From now on, we are mostly facing non-standard domain rules (some are regex like)
|
||||
* We first skip third-party and frame rules, as Surge / Clash can't handle them
|
||||
*
|
||||
* `.sharecounter.$third-party`
|
||||
* `.bbelements.com^$third-party`
|
||||
* `://o0e.ru^$third-party`
|
||||
* `.1.1.1.l80.js^$third-party`
|
||||
*/
|
||||
if (line.includes('$third-party')) {
|
||||
if (!allowThirdParty) {
|
||||
result[1] = ParseType.Null;
|
||||
return result;
|
||||
}
|
||||
let white = false;
|
||||
let includeAllSubDomain = false;
|
||||
|
||||
line = line
|
||||
.replace('$third-party,', '$')
|
||||
.replace('$third-party', '');
|
||||
}
|
||||
|
||||
lastCharCode = line.charCodeAt(line.length - 1);
|
||||
|
||||
/** @example line.endsWith('^') */
|
||||
const lineEndsWithCaret = lastCharCode === 94; // lastChar === '^';
|
||||
/** @example line.endsWith('|') */
|
||||
const lineEndsWithVerticalBar = lastCharCode === 124; // lastChar === '|';
|
||||
/** @example line.endsWith('^|') */
|
||||
const lineEndsWithCaretVerticalBar = lineEndsWithVerticalBar && line[line.length - 2] === '^';
|
||||
/** @example line.endsWith('^') || line.endsWith('^|') */
|
||||
const lineEndsWithCaretOrCaretVerticalBar = lineEndsWithCaret || lineEndsWithCaretVerticalBar;
|
||||
|
||||
// whitelist (exception)
|
||||
if (
|
||||
firstCharCode === 64 // 64 `@`
|
||||
&& line[1] === '@'
|
||||
&& line.charCodeAt(1) === 64 // 64 `@`
|
||||
) {
|
||||
let whiteIncludeAllSubDomain = true;
|
||||
sliceStart += 2;
|
||||
white = true;
|
||||
includeAllSubDomain = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Some "malformed" regex-based filters can not be parsed by NetworkFilter
|
||||
@ -458,38 +444,55 @@ export function parse($line: string, result: [string, ParseType], allowThirdPart
|
||||
* `@@||ad.alimama.com^$genericblock`
|
||||
*/
|
||||
|
||||
let sliceStart = 0;
|
||||
let sliceEnd: number | undefined;
|
||||
switch (line.charCodeAt(sliceStart)) {
|
||||
case 124: /** | */
|
||||
// line.startsWith('@@|') || line.startsWith('|')
|
||||
sliceStart += 1;
|
||||
includeAllSubDomain = false;
|
||||
|
||||
switch (line[2]) {
|
||||
case '|':
|
||||
// line.startsWith('@@|')
|
||||
sliceStart = 3;
|
||||
whiteIncludeAllSubDomain = false;
|
||||
|
||||
if (line[3] === '|') { // line.startsWith('@@||')
|
||||
sliceStart = 4;
|
||||
whiteIncludeAllSubDomain = true;
|
||||
if (line[sliceStart] === '|') { // line.startsWith('@@||') || line.startsWith('||')
|
||||
sliceStart += 1;
|
||||
includeAllSubDomain = true;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case '.': { // line.startsWith('@@.')
|
||||
sliceStart = 3;
|
||||
whiteIncludeAllSubDomain = true;
|
||||
break;
|
||||
}
|
||||
|
||||
case ':': {
|
||||
case 46: { /** | */ // line.startsWith('@@.') || line.startsWith('.')
|
||||
/**
|
||||
* `.ay.delivery^`
|
||||
* `.m.bookben.com^`
|
||||
* `.wap.x4399.com^`
|
||||
*/
|
||||
sliceStart += 1;
|
||||
includeAllSubDomain = true;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (line.charCodeAt(sliceStart)) {
|
||||
case 58: { /** : */
|
||||
/**
|
||||
* line.startsWith('@@://')
|
||||
*
|
||||
* `@@://googleadservices.com^|`
|
||||
* `@@://www.googleadservices.com^|`
|
||||
* `://mine.torrent.pw^`
|
||||
* `://say.ac^`
|
||||
*/
|
||||
if (line[3] === '/' && line[4] === '/') {
|
||||
whiteIncludeAllSubDomain = false;
|
||||
sliceStart = 5;
|
||||
if (line[sliceStart + 1] === '/' && line[sliceStart + 2] === '/') {
|
||||
includeAllSubDomain = false;
|
||||
sliceStart += 3;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 104: { /** h */
|
||||
/** |http://x.o2.pl^ */
|
||||
if (line.startsWith('http://', sliceStart)) {
|
||||
sliceStart += 7;
|
||||
} else if (line.startsWith('https://', sliceStart)) {
|
||||
sliceStart += 8;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -498,170 +501,37 @@ export function parse($line: string, result: [string, ParseType], allowThirdPart
|
||||
break;
|
||||
}
|
||||
|
||||
if (lineEndsWithCaret) {
|
||||
sliceEnd = -1;
|
||||
} else if (lineEndsWithVerticalBar) {
|
||||
// It is possible that a whitelist filter ends with '|' without '^|'
|
||||
// @@|www.auslogics.com|
|
||||
sliceEnd = lineEndsWithCaretVerticalBar ? -2 : -1;
|
||||
} else if (line.endsWith('$genericblock')) {
|
||||
sliceEnd = -13;
|
||||
if (line[line.length - 14] === '^') { // line.endsWith('^$genericblock')
|
||||
sliceEnd = -14;
|
||||
}
|
||||
} else if (line.endsWith('$document')) {
|
||||
sliceEnd = -9;
|
||||
if (line[line.length - 10] === '^') { // line.endsWith('^$document')
|
||||
sliceEnd = -10;
|
||||
}
|
||||
const indexOfDollar = line.indexOf('$', sliceStart);
|
||||
if (indexOfDollar > -1) {
|
||||
sliceEnd = indexOfDollar - line.length;
|
||||
}
|
||||
|
||||
if (sliceStart !== 0 || sliceEnd !== undefined) {
|
||||
const sliced = line.slice(sliceStart, sliceEnd);
|
||||
const domain = normalizeDomain(sliced);
|
||||
if (domain) {
|
||||
result[0] = domain;
|
||||
result[1] = whiteIncludeAllSubDomain ? ParseType.WhiteIncludeSubdomain : ParseType.WhiteAbsolute;
|
||||
return result;
|
||||
}
|
||||
|
||||
result[0] = `[parse-filter E0001] (white) invalid domain: ${JSON.stringify({
|
||||
line, sliced, sliceStart, sliceEnd, domain
|
||||
})}`;
|
||||
result[1] = ParseType.ErrorMessage;
|
||||
return result;
|
||||
}
|
||||
|
||||
result[0] = `[parse-filter E0006] (white) failed to parse: ${JSON.stringify({
|
||||
line, sliceStart, sliceEnd
|
||||
})}`;
|
||||
result[1] = ParseType.ErrorMessage;
|
||||
return result;
|
||||
}
|
||||
|
||||
if (
|
||||
// 124 `|`
|
||||
// line.startsWith('|')
|
||||
firstCharCode === 124
|
||||
&& lineEndsWithCaretOrCaretVerticalBar
|
||||
) {
|
||||
/**
|
||||
* Some malformed filters can not be parsed by NetworkFilter:
|
||||
/*
|
||||
* We skip third-party and frame rules, as Surge / Clash can't handle them
|
||||
*
|
||||
* `||smetrics.teambeachbody.com^.com^`
|
||||
* `||solutions.|pages.indigovision.com^`
|
||||
* `||vystar..0rg@client.iebetanialaargentina.edu.co^`
|
||||
* `app-uat.latrobehealth.com.au^predirect.snapdeal.com`
|
||||
* `.sharecounter.$third-party`
|
||||
* `.bbelements.com^$third-party`
|
||||
* `://o0e.ru^$third-party`
|
||||
* `.1.1.1.l80.js^$third-party`
|
||||
*/
|
||||
|
||||
const includeAllSubDomain = line[1] === '|';
|
||||
|
||||
const sliceStart = includeAllSubDomain ? 2 : 1;
|
||||
const sliceEnd = lineEndsWithCaret
|
||||
? -1
|
||||
: (lineEndsWithCaretVerticalBar ? -2 : undefined);
|
||||
|
||||
const sliced = line.slice(sliceStart, sliceEnd); // we already make sure line startsWith "|"
|
||||
|
||||
const domain = normalizeDomain(sliced);
|
||||
if (domain) {
|
||||
result[0] = domain;
|
||||
result[1] = includeAllSubDomain ? ParseType.BlackIncludeSubdomain : ParseType.BlackAbsolute;
|
||||
return result;
|
||||
}
|
||||
|
||||
result[0] = `[parse-filter E0002] (black) invalid domain: ${sliced}`;
|
||||
result[1] = ParseType.ErrorMessage;
|
||||
return result;
|
||||
}
|
||||
|
||||
// if (line.endsWith('$image')) {
|
||||
// /**
|
||||
// * Some $image filters are not NetworkFilter:
|
||||
// *
|
||||
// * `app.site123.com$image`
|
||||
// * `t.signaux$image`
|
||||
// * `track.customer.io$image`
|
||||
// */
|
||||
// }
|
||||
const lineStartsWithSingleDot = firstCharCode === 46; // 46 `.`
|
||||
|
||||
if (
|
||||
lineStartsWithSingleDot
|
||||
&& lineEndsWithCaretOrCaretVerticalBar
|
||||
!allowThirdParty
|
||||
&& (
|
||||
line.includes('third-party', indexOfDollar + 1)
|
||||
|| line.includes('3p', indexOfDollar + 1)
|
||||
)
|
||||
) {
|
||||
/**
|
||||
* `.ay.delivery^`
|
||||
* `.m.bookben.com^`
|
||||
* `.wap.x4399.com^`
|
||||
*/
|
||||
const sliced = line.slice(
|
||||
1, // remove prefix dot
|
||||
lineEndsWithCaret // replaceAll('^', '')
|
||||
? -1
|
||||
: (lineEndsWithCaretVerticalBar ? -2 : undefined) // replace('^|', '')
|
||||
);
|
||||
|
||||
const suffix = tldts.getPublicSuffix(sliced, looseTldtsOpt);
|
||||
if (!suffix) {
|
||||
// This exclude domain-like resource like `1.1.4.514.js`
|
||||
result[1] = ParseType.Null;
|
||||
return result;
|
||||
}
|
||||
|
||||
const domain = normalizeDomain(sliced);
|
||||
if (domain) {
|
||||
result[0] = domain;
|
||||
result[1] = ParseType.BlackIncludeSubdomain;
|
||||
return result;
|
||||
if (line.includes('badfilter', indexOfDollar + 1)) {
|
||||
white = true;
|
||||
}
|
||||
if (line.includes('all', indexOfDollar + 1)) {
|
||||
includeAllSubDomain = true;
|
||||
}
|
||||
|
||||
result[0] = `[parse-filter E0003] (black) invalid domain: ${JSON.stringify({ sliced, domain })}`;
|
||||
result[1] = ParseType.ErrorMessage;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* `|http://x.o2.pl^`
|
||||
* `://mine.torrent.pw^`
|
||||
* `://say.ac^`
|
||||
*/
|
||||
if (lineEndsWithCaretOrCaretVerticalBar) {
|
||||
let sliceStart = 0;
|
||||
let sliceEnd;
|
||||
if (lineEndsWithCaret) { // line.endsWith('^')
|
||||
sliceEnd = -1;
|
||||
} else if (lineEndsWithCaretVerticalBar) { // line.endsWith('^|')
|
||||
sliceEnd = -2;
|
||||
}
|
||||
if (line.startsWith('://')) {
|
||||
sliceStart = 3;
|
||||
} else if (line.startsWith('http://')) {
|
||||
sliceStart = 7;
|
||||
} else if (line.startsWith('https://')) {
|
||||
sliceStart = 8;
|
||||
} else if (line.startsWith('|http://')) {
|
||||
sliceStart = 8;
|
||||
} else if (line.startsWith('|https://')) {
|
||||
sliceStart = 9;
|
||||
}
|
||||
|
||||
if (sliceStart !== 0 || sliceEnd !== undefined) {
|
||||
const sliced = line.slice(sliceStart, sliceEnd);
|
||||
const domain = normalizeDomain(sliced);
|
||||
if (domain) {
|
||||
result[0] = domain;
|
||||
result[1] = ParseType.BlackIncludeSubdomain;
|
||||
return result;
|
||||
}
|
||||
|
||||
result[0] = `[parse-filter E0004] (black) invalid domain: ${JSON.stringify({
|
||||
line, sliced, sliceStart, sliceEnd, domain
|
||||
})}`;
|
||||
result[1] = ParseType.ErrorMessage;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* `_vmind.qqvideo.tc.qq.com^`
|
||||
* `arketing.indianadunes.com^`
|
||||
@ -671,103 +541,54 @@ export function parse($line: string, result: [string, ParseType], allowThirdPart
|
||||
* `-logging.nextmedia.com`
|
||||
* `_social_tracking.js^`
|
||||
*/
|
||||
if (
|
||||
firstCharCode !== 124 // 124 `|`
|
||||
&& lastCharCode === 94 // 94 `^`
|
||||
) {
|
||||
const _domain = line.slice(0, -1);
|
||||
if (line.charCodeAt(line.length + sliceEnd - 1) === 94) { // 94 `^`
|
||||
/** line.endsWith('^') */
|
||||
sliceEnd -= 1;
|
||||
} else if (line.charCodeAt(line.length + sliceEnd - 1) === 124) { // 124 `|`
|
||||
/** line.endsWith('|') */
|
||||
sliceEnd -= 1;
|
||||
|
||||
const suffix = tldts.getPublicSuffix(_domain, looseTldtsOpt);
|
||||
if (line.charCodeAt(line.length + sliceEnd - 1) === 94) { // 94 `^`
|
||||
/** line.endsWith('^|') */
|
||||
sliceEnd -= 1;
|
||||
}
|
||||
} else if (line.charCodeAt(line.length + sliceEnd - 1) === 46) { // 46 `.`
|
||||
/** line.endsWith('.') */
|
||||
sliceEnd -= 1;
|
||||
}
|
||||
|
||||
const sliced = (sliceStart > 0 || sliceEnd < 0) ? line.slice(sliceStart, sliceEnd === 0 ? undefined : sliceEnd) : line;
|
||||
if (sliced.charCodeAt(0) === 45 /* - */) {
|
||||
// line.startsWith('-') is not a valid domain
|
||||
result[1] = ParseType.ErrorMessage;
|
||||
result[0] = `[parse-filter E0001] (${white ? 'white' : 'black'}) invalid domain: ${JSON.stringify({
|
||||
line, sliced, sliceStart, sliceEnd
|
||||
})}`;
|
||||
return result;
|
||||
}
|
||||
|
||||
const suffix = tldts.getPublicSuffix(sliced, looseTldtsOpt);
|
||||
if (!suffix) {
|
||||
// This exclude domain-like resource like `_social_tracking.js^`
|
||||
result[1] = ParseType.Null;
|
||||
return result;
|
||||
}
|
||||
|
||||
const domain = normalizeDomain(_domain);
|
||||
const domain = normalizeDomain(sliced);
|
||||
if (domain) {
|
||||
result[0] = domain;
|
||||
result[1] = ParseType.BlackAbsolute;
|
||||
|
||||
if (white) {
|
||||
result[1] = includeAllSubDomain ? ParseType.WhiteIncludeSubdomain : ParseType.WhiteAbsolute;
|
||||
} else {
|
||||
result[1] = includeAllSubDomain ? ParseType.BlackIncludeSubdomain : ParseType.BlackAbsolute;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
result[0] = `[parse-filter E0005] (black) invalid domain: ${_domain}`;
|
||||
result[1] = ParseType.ErrorMessage;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Possibly that entire rule is domain
|
||||
|
||||
/**
|
||||
* lineStartsWithSingleDot:
|
||||
*
|
||||
* `.cookielaw.js`
|
||||
* `.content_tracking.js`
|
||||
* `.ads.css`
|
||||
*
|
||||
* else:
|
||||
*
|
||||
* `_prebid.js`
|
||||
* `t.yesware.com`
|
||||
* `ubmcmm.baidustatic.com`
|
||||
* `://www.smfg-card.$document`
|
||||
* `portal.librus.pl$$advertisement-module`
|
||||
* `@@-ds.metric.gstatic.com^|`
|
||||
* `://gom.ge/cookie.js`
|
||||
* `://accout-update-smba.jp.$document`
|
||||
* `_200x250.png`
|
||||
* `@@://www.liquidweb.com/kb/wp-content/themes/lw-kb-theme/images/ads/vps-sidebar.jpg`
|
||||
*/
|
||||
let sliceStart = 0;
|
||||
let sliceEnd = line.length;
|
||||
let isWhieList = false;
|
||||
|
||||
if (lineStartsWithSingleDot) {
|
||||
// .usercentrics.eu^
|
||||
sliceStart = 1;
|
||||
} else if (firstCharCode === 58 /** : */ && line.startsWith('://')) {
|
||||
// ://backcb.one^$all
|
||||
sliceStart = 3;
|
||||
}
|
||||
|
||||
if (line.endsWith('$all')) {
|
||||
sliceEnd -= 4;
|
||||
} else if (line.endsWith('$document')) {
|
||||
sliceEnd -= 9;
|
||||
} else if (line.endsWith('$badfilter')) {
|
||||
isWhieList = true;
|
||||
sliceEnd -= 10;
|
||||
}
|
||||
|
||||
const charBeforeModifier = line.charCodeAt(sliceEnd - 1);
|
||||
if (
|
||||
charBeforeModifier === 94 /** ^$all, ^$document, etc. */
|
||||
|| charBeforeModifier === 46 /** .$all */
|
||||
) {
|
||||
sliceEnd -= 1;
|
||||
}
|
||||
|
||||
const sliced = (sliceStart !== 0 || sliceEnd !== line.length) ? line.slice(sliceStart, sliceEnd) : line;
|
||||
|
||||
const tryNormalizeDomain = normalizeDomain(sliced);
|
||||
if (tryNormalizeDomain === sliced) {
|
||||
// the entire rule is domain
|
||||
result[0] = sliced;
|
||||
result[1] = isWhieList
|
||||
? ParseType.WhiteIncludeSubdomain
|
||||
: ParseType.BlackIncludeSubdomain;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
console.log({
|
||||
line,
|
||||
lineEndsWithCaret,
|
||||
lineEndsWithCaretOrCaretVerticalBar,
|
||||
lineEndsWithCaretVerticalBar
|
||||
});
|
||||
|
||||
result[0] = `[parse-filter ${tryNormalizeDomain === null ? 'E0010' : 'E0011'}] can not parse: ${JSON.stringify({ line, tryNormalizeDomain, sliced, sliceStart, sliceEnd })}`;
|
||||
result[0] = `[parse-filter E0010] (${white ? 'white' : 'black'}) invalid domain: ${JSON.stringify({
|
||||
line, domain, suffix, sliced, sliceStart, sliceEnd
|
||||
})}`;
|
||||
result[1] = ParseType.ErrorMessage;
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -67,6 +67,14 @@ DOMAIN-KEYWORD,-attr.appsflyersdk.com
|
||||
DOMAIN-KEYWORD,-s2s.sensic.net
|
||||
DOMAIN-KEYWORD,-rtb.gravite.net
|
||||
|
||||
# >> Migrate from EasyPrivacy
|
||||
DOMAIN-KEYWORD,analytics-cdn.
|
||||
DOMAIN-KEYWORD,backstory.ebay.
|
||||
DOMAIN-KEYWORD,click.rum.
|
||||
DOMAIN-KEYWORD,cmpworker.
|
||||
DOMAIN-KEYWORD,insights-collector.
|
||||
DOMAIN-KEYWORD,track.opentable.
|
||||
|
||||
DOMAIN-WILDCARD,f-log*.grammarly.io
|
||||
DOMAIN-WILDCARD,*.ad.*.prod.hosts.ooklaserver.net
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user