mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Refactor: rework reject hosts parser
This commit is contained in:
parent
d80ea1ecd8
commit
065eeff91f
@ -20,6 +20,9 @@ const warnOnce = (url, isWhite, ...message) => {
|
||||
console.warn(url, isWhite ? '(white)' : '(black)', ...message);
|
||||
};
|
||||
|
||||
/**
|
||||
* @param {string} domain
|
||||
*/
|
||||
const normalizeDomain = (domain) => {
|
||||
if (!domain) return null;
|
||||
|
||||
@ -28,7 +31,10 @@ const normalizeDomain = (domain) => {
|
||||
|
||||
if (parsed.isIcann || parsed.isPrivate) {
|
||||
const h = parsed.hostname;
|
||||
return h?.[0] === '.' ? h.slice(1) : h;
|
||||
|
||||
if (h === null) return null;
|
||||
|
||||
return h[0] === '.' ? h.slice(1) : h;
|
||||
}
|
||||
|
||||
return null;
|
||||
@ -259,6 +265,7 @@ function parse($line, gorhill) {
|
||||
|
||||
const line = $line.trim();
|
||||
|
||||
/** @example line.length */
|
||||
const len = line.length;
|
||||
if (len === 0) {
|
||||
return null;
|
||||
@ -268,15 +275,13 @@ function parse($line, gorhill) {
|
||||
const lastChar = line[len - 1];
|
||||
|
||||
if (
|
||||
len === 0
|
||||
|| firstChar === '/'
|
||||
firstChar === '/'
|
||||
// ends with
|
||||
|| lastChar === '.' // || line.endsWith('.')
|
||||
|| lastChar === '-' // || line.endsWith('-')
|
||||
|| lastChar === '_' // || line.endsWith('_')
|
||||
// special modifier
|
||||
|| R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2.test(line)
|
||||
|| ((line.includes('/') || line.includes(':')) && !line.includes('://'))
|
||||
// || line.includes('$popup')
|
||||
// || line.includes('$removeparam')
|
||||
// || line.includes('$popunder')
|
||||
@ -284,6 +289,10 @@ function parse($line, gorhill) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if ((line.includes('/') || line.includes(':')) && !line.includes('://')) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const filter = NetworkFilter.parse(line);
|
||||
if (filter) {
|
||||
if (
|
||||
@ -352,9 +361,11 @@ function parse($line, gorhill) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/** @example line.endsWith('^') */
|
||||
const linedEndsWithCaret = lastChar === '^';
|
||||
/** @example line.endsWith('^|') */
|
||||
const lineEndsWithCaretVerticalBar = lastChar === '|' && line[len - 2] === '^';
|
||||
|
||||
/** @example line.endsWith('^') || line.endsWith('^|') */
|
||||
const lineEndsWithCaretOrCaretVerticalBar = linedEndsWithCaret || lineEndsWithCaretVerticalBar;
|
||||
|
||||
// whitelist (exception)
|
||||
@ -379,8 +390,19 @@ function parse($line, gorhill) {
|
||||
* `@@|adsterra.com^|`
|
||||
*/
|
||||
if (
|
||||
// (line.startsWith('@@|') || line.startsWith('@@.'))
|
||||
(line[2] === '|' || line[2] === '.')
|
||||
(
|
||||
// line.startsWith('@@|')
|
||||
line[2] === '|'
|
||||
// line.startsWith('@@.')
|
||||
|| line[2] === '.'
|
||||
/**
|
||||
* line.startsWith('@@://')
|
||||
*
|
||||
* `@@://googleadservices.com^|`
|
||||
* `@@://www.googleadservices.com^|`
|
||||
*/
|
||||
|| (line[2] === ':' && line[3] === '/' && line[4] === '/')
|
||||
)
|
||||
&& (
|
||||
lineEndsWithCaretOrCaretVerticalBar
|
||||
|| line.endsWith('$genericblock')
|
||||
@ -389,6 +411,7 @@ function parse($line, gorhill) {
|
||||
) {
|
||||
const _domain = line
|
||||
.replace('@@||', '')
|
||||
.replace('@@://', '')
|
||||
.replace('@@|', '')
|
||||
.replace('@@.', '')
|
||||
.replace('^|', '')
|
||||
@ -409,37 +432,41 @@ function parse($line, gorhill) {
|
||||
}
|
||||
}
|
||||
|
||||
if (firstChar === '|' && (lineEndsWithCaretOrCaretVerticalBar || line.endsWith('$cname'))) {
|
||||
/**
|
||||
* Some malformed filters can not be parsed by NetworkFilter:
|
||||
*
|
||||
* `||smetrics.teambeachbody.com^.com^`
|
||||
* `||solutions.|pages.indigovision.com^`
|
||||
* `||vystar..0rg@client.iebetanialaargentina.edu.co^`
|
||||
*/
|
||||
if (firstChar === '|') {
|
||||
const lineEndswithCname = line.endsWith('$cname');
|
||||
|
||||
const includeAllSubDomain = line[1] === '|';
|
||||
if (lineEndsWithCaretOrCaretVerticalBar || lineEndswithCname) {
|
||||
/**
|
||||
* Some malformed filters can not be parsed by NetworkFilter:
|
||||
*
|
||||
* `||smetrics.teambeachbody.com^.com^`
|
||||
* `||solutions.|pages.indigovision.com^`
|
||||
* `||vystar..0rg@client.iebetanialaargentina.edu.co^`
|
||||
* `app-uat.latrobehealth.com.au^predirect.snapdeal.com`
|
||||
*/
|
||||
|
||||
const sliceStart = includeAllSubDomain ? 2 : 1;
|
||||
const sliceEnd = lastChar === '^'
|
||||
? -1
|
||||
: lineEndsWithCaretOrCaretVerticalBar
|
||||
? -2
|
||||
// eslint-disable-next-line sukka/unicorn/no-nested-ternary -- speed
|
||||
: (line.endsWith('$cname') ? -6 : 0);
|
||||
const includeAllSubDomain = line[1] === '|';
|
||||
|
||||
const _domain = line
|
||||
// .replace('||', '')
|
||||
.slice(sliceStart, sliceEnd) // we already make sure line startsWith ||
|
||||
.trim();
|
||||
const sliceStart = includeAllSubDomain ? 2 : 1;
|
||||
const sliceEnd = lastChar === '^'
|
||||
? -1
|
||||
: lineEndsWithCaretOrCaretVerticalBar
|
||||
? -2
|
||||
// eslint-disable-next-line sukka/unicorn/no-nested-ternary -- speed
|
||||
: (lineEndswithCname ? -6 : 0);
|
||||
|
||||
const domain = normalizeDomain(_domain);
|
||||
if (domain) {
|
||||
return [domain, includeAllSubDomain ? 2 : 1];
|
||||
const _domain = line
|
||||
.slice(sliceStart, sliceEnd) // we already make sure line startsWith "|"
|
||||
.trim();
|
||||
|
||||
const domain = normalizeDomain(_domain);
|
||||
if (domain) {
|
||||
return [domain, includeAllSubDomain ? 2 : 1];
|
||||
}
|
||||
console.warn(' * [parse-filter E0002] (black) invalid domain:', _domain);
|
||||
|
||||
return null;
|
||||
}
|
||||
console.warn(' * [parse-filter E0002] (black) invalid domain:', _domain);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
const lineStartsWithSingleDot = firstChar === '.';
|
||||
@ -452,16 +479,12 @@ function parse($line, gorhill) {
|
||||
* `.m.bookben.com^`
|
||||
* `.wap.x4399.com^`
|
||||
*/
|
||||
const _domain = line
|
||||
.slice(
|
||||
1,
|
||||
linedEndsWithCaret
|
||||
? -1
|
||||
: (lineEndsWithCaretVerticalBar ? -2 : 0)
|
||||
) // remove prefix dot
|
||||
.replace('^|', '')
|
||||
.replaceAll('^', '')
|
||||
.trim();
|
||||
const _domain = line.slice(
|
||||
1, // remove prefix dot
|
||||
linedEndsWithCaret // replaceAll('^', '')
|
||||
? -1
|
||||
: (lineEndsWithCaretVerticalBar ? -2 : 0) // replace('^|', '')
|
||||
);
|
||||
|
||||
const suffix = gorhill.getPublicSuffix(_domain);
|
||||
if (!gorhill.suffixInPSL(suffix)) {
|
||||
@ -479,10 +502,10 @@ function parse($line, gorhill) {
|
||||
}
|
||||
|
||||
/**
|
||||
* `|http://x.o2.pl^`
|
||||
* `://mine.torrent.pw^`
|
||||
* `://say.ac^`
|
||||
*/
|
||||
* `|http://x.o2.pl^`
|
||||
* `://mine.torrent.pw^`
|
||||
* `://say.ac^`
|
||||
*/
|
||||
if (
|
||||
(
|
||||
line.startsWith('://')
|
||||
@ -513,13 +536,14 @@ function parse($line, gorhill) {
|
||||
}
|
||||
|
||||
/**
|
||||
* `_vmind.qqvideo.tc.qq.com^`
|
||||
* `arketing.indianadunes.com^`
|
||||
* `charlestownwyllie.oaklawnnonantum.com^`
|
||||
* `-telemetry.officeapps.live.com^`
|
||||
* `-tracker.biliapi.net`
|
||||
* `_social_tracking.js^`
|
||||
*/
|
||||
* `_vmind.qqvideo.tc.qq.com^`
|
||||
* `arketing.indianadunes.com^`
|
||||
* `charlestownwyllie.oaklawnnonantum.com^`
|
||||
* `-telemetry.officeapps.live.com^`
|
||||
* `-tracker.biliapi.net`
|
||||
* `-logging.nextmedia.com`
|
||||
* `_social_tracking.js^`
|
||||
*/
|
||||
if (firstChar !== '|' && lastChar === '^') {
|
||||
const _domain = line.slice(0, -1);
|
||||
|
||||
@ -538,35 +562,48 @@ function parse($line, gorhill) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* `.3.n.2.2.l30.js`
|
||||
* `_prebid.js`
|
||||
* `t.yesware.com`
|
||||
* `ubmcmm.baidustatic.com`
|
||||
* `portal.librus.pl$$advertisement-module`
|
||||
* `@@-ds.metric.gstatic.com^|`
|
||||
* `://gom.ge/cookie.js`
|
||||
* `://accout-update-smba.jp.$document`
|
||||
* `@@://googleadservices.com^|`
|
||||
*/
|
||||
const tryNormalizeDomain = normalizeDomain(line);
|
||||
if (tryNormalizeDomain) {
|
||||
if (lineStartsWithSingleDot) {
|
||||
/**
|
||||
* `.cookielaw.js`
|
||||
* `.content_tracking.js`
|
||||
* `.ads.css`
|
||||
*/
|
||||
const _domain = line.slice(1);
|
||||
|
||||
const suffix = gorhill.getPublicSuffix(_domain);
|
||||
if (!suffix || !gorhill.suffixInPSL(suffix)) {
|
||||
// This exclude domain-like resource like `.gatracking.js`, `.beacon.min.js` and `.cookielaw.js`
|
||||
return null;
|
||||
}
|
||||
|
||||
const tryNormalizeDomain = normalizeDomain(_domain);
|
||||
if (tryNormalizeDomain === _domain) {
|
||||
// the entire rule is domain
|
||||
return [line, 2];
|
||||
}
|
||||
} else {
|
||||
/**
|
||||
* `_prebid.js`
|
||||
* `t.yesware.com`
|
||||
* `ubmcmm.baidustatic.com`
|
||||
* `://www.smfg-card.$document`
|
||||
* `portal.librus.pl$$advertisement-module`
|
||||
* `@@-ds.metric.gstatic.com^|`
|
||||
* `://gom.ge/cookie.js`
|
||||
* `://accout-update-smba.jp.$document`
|
||||
* `_200x250.png`
|
||||
* `@@://www.liquidweb.com/kb/wp-content/themes/lw-kb-theme/images/ads/vps-sidebar.jpg`
|
||||
*/
|
||||
const tryNormalizeDomain = normalizeDomain(line);
|
||||
if (tryNormalizeDomain === line) {
|
||||
// the entire rule is domain
|
||||
return [line, 2];
|
||||
}
|
||||
if (lineStartsWithSingleDot && tryNormalizeDomain === line.slice(1)) {
|
||||
// dot prefixed line has stripped
|
||||
return [line, 2];
|
||||
}
|
||||
}
|
||||
|
||||
if (!line.endsWith('.js') && !line.endsWith('.css')) {
|
||||
console.warn(' * [parse-filter E0010] can not parse:', line);
|
||||
}
|
||||
console.warn(' * [parse-filter E0010] can not parse:', line);
|
||||
|
||||
return null;
|
||||
/* eslint-enable no-nested-ternary */
|
||||
}
|
||||
|
||||
module.exports.processDomainLists = processDomainLists;
|
||||
|
||||
@ -27,7 +27,9 @@ const ADGUARD_FILTERS = /** @type {const} */([
|
||||
[
|
||||
'https://secure.fanboy.co.nz/easyprivacy.txt',
|
||||
'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easyprivacy.txt',
|
||||
'https://easylist-downloads.adblockplus.org/easyprivacy.txt'
|
||||
'https://easylist-downloads.adblockplus.org/easyprivacy.txt',
|
||||
'https://ublockorigin.github.io/uAssets/thirdparties/easyprivacy.txt',
|
||||
'https://ublockorigin.pages.dev/thirdparties/easyprivacy.txt'
|
||||
]
|
||||
],
|
||||
// AdGuard DNS Filter
|
||||
@ -45,70 +47,42 @@ const ADGUARD_FILTERS = /** @type {const} */([
|
||||
'https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_mail_trackers.txt',
|
||||
// uBlock Origin Filter List
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssets/filters/filters.txt',
|
||||
'https://ublockorigin.github.io/uAssets/filters/filters.min.txt',
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/filters.txt',
|
||||
'https://ublockorigin.pages.dev/filters/filters.txt'
|
||||
]
|
||||
],
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssets/filters/filters-2020.txt',
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2020.txt',
|
||||
'https://ublockorigin.pages.dev/filters/filters-2020.txt'
|
||||
]
|
||||
],
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssets/filters/filters-2021.txt',
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2021.txt',
|
||||
'https://ublockorigin.pages.dev/filters/filters-2021.txt'
|
||||
]
|
||||
],
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssets/filters/filters-2022.txt',
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2022.txt',
|
||||
'https://ublockorigin.pages.dev/filters/filters-2022.txt'
|
||||
]
|
||||
],
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssets/filters/filters-2023.txt',
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2023.txt',
|
||||
'https://ublockorigin.pages.dev/filters/filters-2023.txt'
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/filters.min.txt',
|
||||
'https://ublockorigin.pages.dev/filters/filters.min.txt'
|
||||
]
|
||||
],
|
||||
// uBlock Origin Badware Risk List
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssets/filters/badware.txt',
|
||||
'https://ublockorigin.github.io/uAssets/filters/badware.min.txt',
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/badware.txt',
|
||||
'https://ublockorigin.pages.dev/filters/badware.txt'
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/badware.min.txt',
|
||||
'https://ublockorigin.pages.dev/filters/badware.min.txt'
|
||||
]
|
||||
],
|
||||
// uBlock Origin Privacy List
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssets/filters/privacy.txt',
|
||||
'https://ublockorigin.github.io/uAssets/filters/privacy.min.txt',
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/privacy.txt',
|
||||
'https://ublockorigin.pages.dev/filters/privacy.txt'
|
||||
]
|
||||
],
|
||||
// uBlock Origin Resource Abuse
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssets/filters/resource-abuse.txt',
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/resource-abuse.txt',
|
||||
'https://ublockorigin.pages.dev/filters/resource-abuse.txt'
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/privacy.min.txt',
|
||||
'https://ublockorigin.pages.dev/filters/privacy.min.txt'
|
||||
]
|
||||
],
|
||||
// uBlock Origin Resource Abuse: merged in uBlock Origin Privacy List
|
||||
// [
|
||||
// 'https://ublockorigin.github.io/uAssets/filters/resource-abuse.txt',
|
||||
// [
|
||||
// 'https://ublockorigin.github.io/uAssetsCDN/filters/resource-abuse.txt',
|
||||
// 'https://ublockorigin.pages.dev/filters/resource-abuse.txt'
|
||||
// ]
|
||||
// ],
|
||||
// uBlock Origin Unbreak
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssets/filters/unbreak.txt',
|
||||
'https://ublockorigin.github.io/uAssets/filters/unbreak.min.txt',
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/unbreak.txt',
|
||||
'https://ublockorigin.pages.dev/filters/unbreak.txt'
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/unbreak.min.txt',
|
||||
'https://ublockorigin.pages.dev/filters/unbreak.min.txt'
|
||||
]
|
||||
],
|
||||
// AdGuard Base Filter
|
||||
@ -171,9 +145,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
|
||||
// BarbBlock
|
||||
'https://paulgb.github.io/BarbBlock/blacklists/ublock-origin.txt',
|
||||
// Brave First Party & First Party CNAME
|
||||
'https://raw.githubusercontent.com/brave/adblock-lists/master/brave-lists/brave-firstparty.txt',
|
||||
'https://raw.githubusercontent.com/brave/adblock-lists/master/brave-lists/brave-firstparty-cname.txt',
|
||||
'https://raw.githubusercontent.com/brave/adblock-lists/master/coin-miners.txt'
|
||||
'https://raw.githubusercontent.com/brave/adblock-lists/master/brave-lists/brave-firstparty.txt'
|
||||
]);
|
||||
|
||||
const PREDEFINED_WHITELIST = [
|
||||
|
||||
@ -340,6 +340,8 @@ th.bing.com
|
||||
images.ecomm.microsoft.com
|
||||
.yammerusercontent.com
|
||||
.assets-yammer.com
|
||||
.edgestorewebcdn.azureedge.net
|
||||
img-s-msn-com.akamaized.net
|
||||
# Microsoft Cookie Conscent
|
||||
wcpstatic.microsoft.com
|
||||
# Xbox
|
||||
|
||||
@ -519,6 +519,11 @@ www.kuguopush.com
|
||||
.adcast.deviantart.com
|
||||
.inside.bitcomet.com
|
||||
.cdnads.com
|
||||
.stats.esomniture.com
|
||||
.adalyser.com
|
||||
.tradedoubler.com
|
||||
.xiti.com
|
||||
.cjt1.net
|
||||
|
||||
.youxiaoad.com
|
||||
.iteye.com
|
||||
@ -1749,7 +1754,6 @@ ntp.msn.cn
|
||||
assets.msn.cn
|
||||
api.msn.com
|
||||
browser.events.data.msn.com
|
||||
img-s-msn-com.akamaized.net
|
||||
|
||||
# >> OPPO
|
||||
adsfs.oppomobile.com
|
||||
@ -1820,6 +1824,7 @@ adserve2.tom.com
|
||||
.discovery.tom.com
|
||||
|
||||
# brightdata (luminati) SDK
|
||||
.l-err.biz
|
||||
.lum-sdk.io
|
||||
.luminatinet.com
|
||||
.luminati.io
|
||||
@ -1829,3 +1834,4 @@ adserve2.tom.com
|
||||
.hola.org
|
||||
.h-vpn.org
|
||||
.holashop.org
|
||||
.svd-cdn.com
|
||||
|
||||
@ -83,7 +83,7 @@ DOMAIN-SUFFIX,tw1.ru
|
||||
# >> General
|
||||
|
||||
DOMAIN-KEYWORD,track.tiara
|
||||
DOMAIN-KEYWORD,adservice
|
||||
# DOMAIN-KEYWORD,adservice # conflict with @@://www.googleadservices.com^|
|
||||
DOMAIN-KEYWORD,umeng
|
||||
DOMAIN-KEYWORD,adsby
|
||||
DOMAIN-KEYWORD,adsdk
|
||||
@ -108,6 +108,8 @@ DOMAIN-KEYWORD,_vmind.qqvideo.tc.qq.com
|
||||
DOMAIN-KEYWORD,-logging.nextmedia.com
|
||||
DOMAIN-KEYWORD,-spiky.clevertap-prod.com
|
||||
DOMAIN-KEYWORD,.engage.3m.
|
||||
# -telemetry.officeapps.live.com.mcas.ms
|
||||
# -telemetry.officeapps.live.com
|
||||
DOMAIN-KEYWORD,telemetry.officeapps.live.com
|
||||
DOMAIN-KEYWORD,-launches.appsflyersdk.com
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user