mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Perf: speed up infra
This commit is contained in:
parent
c2022ce61d
commit
23c9a963aa
@ -15,17 +15,17 @@ const EXCLUDE_CIDRS = [
|
||||
runner(__filename, async () => {
|
||||
const { exclude: excludeCidrs } = await import('cidr-tools-wasm');
|
||||
|
||||
/** @type {Set<string>} */
|
||||
const cidr = new Set();
|
||||
/** @type {string[]} */
|
||||
const cidr = [];
|
||||
for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')) {
|
||||
const l = processLine(line);
|
||||
if (l) {
|
||||
cidr.add(l);
|
||||
cidr.push(l);
|
||||
}
|
||||
}
|
||||
|
||||
console.log('Before Merge:', cidr.size);
|
||||
const filteredCidr = excludeCidrs(Array.from(cidr), EXCLUDE_CIDRS, true);
|
||||
console.log('Before Merge:', cidr.length);
|
||||
const filteredCidr = excludeCidrs(cidr, EXCLUDE_CIDRS, true);
|
||||
console.log('After Merge:', filteredCidr.length);
|
||||
|
||||
const description = [
|
||||
|
||||
@ -55,8 +55,7 @@ runner(__filename, async () => {
|
||||
`${domain} = server:${dns}`,
|
||||
`*.${domain} = server:${dns}`
|
||||
])
|
||||
),
|
||||
''
|
||||
)
|
||||
],
|
||||
path.resolve(__dirname, '../Modules/sukka_local_dns_mapping.sgmodule')
|
||||
)
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
// @ts-check
|
||||
const fs = require('fs');
|
||||
const fse = require('fs-extra');
|
||||
const path = require('path');
|
||||
const { isDomainLoose } = require('./lib/is-domain-loose');
|
||||
@ -8,6 +7,7 @@ const { processLine } = require('./lib/process-line');
|
||||
const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
|
||||
const domainSorter = require('./lib/stable-sort-domain');
|
||||
const { runner } = require('./lib/trace-runner');
|
||||
const { compareAndWriteFile } = require('./lib/create-file');
|
||||
|
||||
/**
|
||||
* @param {string} string
|
||||
@ -77,12 +77,11 @@ runner(__filename, async () => {
|
||||
fse.ensureDir(path.resolve(__dirname, '../List/internal'))
|
||||
]);
|
||||
|
||||
await fs.promises.writeFile(
|
||||
path.resolve(__dirname, '../List/internal/cdn.txt'),
|
||||
await compareAndWriteFile(
|
||||
[
|
||||
...Array.from(set).sort(domainSorter).map(i => `SUFFIX,${i}`),
|
||||
...Array.from(keywords).sort().map(i => `REGEX,${i}`),
|
||||
''
|
||||
].join('\n')
|
||||
...Array.from(keywords).sort().map(i => `REGEX,${i}`)
|
||||
],
|
||||
path.resolve(__dirname, '../List/internal/cdn.txt')
|
||||
);
|
||||
});
|
||||
|
||||
@ -27,18 +27,18 @@ const RESERVED_IPV4_CIDR = [
|
||||
runner(__filename, async () => {
|
||||
const { exclude } = await import('cidr-tools-wasm');
|
||||
|
||||
/** @type {Set<string>} */
|
||||
const cidr = new Set();
|
||||
/** @type {string[]} */
|
||||
const cidr = [];
|
||||
for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')) {
|
||||
const l = processLine(line);
|
||||
if (l) {
|
||||
cidr.add(l);
|
||||
cidr.push(l);
|
||||
}
|
||||
}
|
||||
|
||||
const reversedCidr = exclude(
|
||||
['0.0.0.0/0'],
|
||||
RESERVED_IPV4_CIDR.concat(Array.from(cidr)),
|
||||
RESERVED_IPV4_CIDR.concat(cidr),
|
||||
true
|
||||
);
|
||||
|
||||
|
||||
@ -95,7 +95,7 @@ const PRESET_MITM_HOSTNAMES = [
|
||||
}));
|
||||
|
||||
let mitmDomains = new Set(PRESET_MITM_HOSTNAMES); // Special case for parsed failed
|
||||
const parsedFailures = new Set();
|
||||
const parsedFailures = [];
|
||||
|
||||
const dedupedUrlRegexPaths = [...new Set(urlRegexPaths)];
|
||||
|
||||
|
||||
@ -62,9 +62,7 @@ const BLACK_TLD = new Set([
|
||||
|
||||
runner(__filename, async () => {
|
||||
const domainSet = Array.from(
|
||||
(
|
||||
await processFilterRules('https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt')
|
||||
).black
|
||||
(await processFilterRules('https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt')).black
|
||||
);
|
||||
const domainCountMap = {};
|
||||
|
||||
|
||||
@ -50,13 +50,9 @@ const domainSuffixSet = new Set();
|
||||
const { white, black, foundDebugDomain } = i;
|
||||
if (foundDebugDomain) {
|
||||
shouldStop = true;
|
||||
// we should not break here, as we want to see full matches from all data source
|
||||
}
|
||||
white.forEach(i => {
|
||||
// if (PREDEFINED_ENFORCED_BACKLIST.some(j => i.endsWith(j))) {
|
||||
// return;
|
||||
// }
|
||||
filterRuleWhitelistDomainSets.add(i);
|
||||
});
|
||||
white.forEach(i => filterRuleWhitelistDomainSets.add(i));
|
||||
black.forEach(i => domainSets.add(i));
|
||||
} else {
|
||||
process.exitCode = 1;
|
||||
@ -71,15 +67,9 @@ const domainSuffixSet = new Set();
|
||||
if (i) {
|
||||
const { white, black } = i;
|
||||
white.forEach(i => {
|
||||
// if (PREDEFINED_ENFORCED_BACKLIST.some(j => i.endsWith(j))) {
|
||||
// return;
|
||||
// }
|
||||
filterRuleWhitelistDomainSets.add(i);
|
||||
});
|
||||
black.forEach(i => {
|
||||
// if (PREDEFINED_ENFORCED_BACKLIST.some(j => i.endsWith(j))) {
|
||||
// return;
|
||||
// }
|
||||
filterRuleWhitelistDomainSets.add(i);
|
||||
});
|
||||
} else {
|
||||
@ -89,7 +79,8 @@ const domainSuffixSet = new Set();
|
||||
})))
|
||||
]);
|
||||
|
||||
const trie0 = Trie.from(Array.from(filterRuleWhitelistDomainSets));
|
||||
// remove pre-defined enforced blacklist from whitelist
|
||||
const trie0 = Trie.from(filterRuleWhitelistDomainSets);
|
||||
PREDEFINED_ENFORCED_BACKLIST.forEach(enforcedBlack => {
|
||||
trie0.find(enforcedBlack).forEach(found => filterRuleWhitelistDomainSets.delete(found));
|
||||
});
|
||||
@ -140,7 +131,7 @@ const domainSuffixSet = new Set();
|
||||
|
||||
const kwfilter = createKeywordFilter(Array.from(domainKeywordsSet));
|
||||
|
||||
const trie1 = Trie.from(Array.from(domainSets));
|
||||
const trie1 = Trie.from(domainSets);
|
||||
domainSuffixSet.forEach(suffix => {
|
||||
trie1.find(suffix, true).forEach(f => domainSets.delete(f));
|
||||
});
|
||||
@ -149,7 +140,7 @@ const domainSuffixSet = new Set();
|
||||
});
|
||||
|
||||
// Build whitelist trie, to handle case like removing `g.msn.com` due to white `.g.msn.com` (`@@||g.msn.com`)
|
||||
const trieWhite = Trie.from(Array.from(filterRuleWhitelistDomainSets));
|
||||
const trieWhite = Trie.from(filterRuleWhitelistDomainSets);
|
||||
for (const domain of domainSets) {
|
||||
if (domain[0] === '.') {
|
||||
if (trieWhite.contains(domain)) {
|
||||
|
||||
@ -49,8 +49,8 @@ runner(__filename, async () => {
|
||||
* @param {string} sourcePath
|
||||
*/
|
||||
const processFile = async (sourcePath) => {
|
||||
/** @type {Set<string>} */
|
||||
const lines = new Set();
|
||||
/** @type {string[]} */
|
||||
const lines = [];
|
||||
|
||||
let title = '';
|
||||
/** @type {string[]} */
|
||||
@ -73,7 +73,7 @@ const processFile = async (sourcePath) => {
|
||||
|
||||
const l = processLine(line);
|
||||
if (l) {
|
||||
lines.add(l);
|
||||
lines.push(l);
|
||||
}
|
||||
}
|
||||
|
||||
@ -89,7 +89,7 @@ async function transformDomainset(sourcePath, relativePath) {
|
||||
if (!res) return;
|
||||
const [title, descriptions, lines] = res;
|
||||
|
||||
const deduped = domainDeduper(Array.from(lines));
|
||||
const deduped = domainDeduper(lines);
|
||||
const description = [
|
||||
'License: AGPL 3.0',
|
||||
'Homepage: https://ruleset.skk.moe',
|
||||
@ -121,7 +121,7 @@ async function transformDomainset(sourcePath, relativePath) {
|
||||
async function transformRuleset(sourcePath, relativePath) {
|
||||
const res = await processFile(sourcePath);
|
||||
if (!res) return;
|
||||
const [title, descriptions, set] = res;
|
||||
const [title, descriptions, lines] = res;
|
||||
|
||||
const description = [
|
||||
'License: AGPL 3.0',
|
||||
@ -138,7 +138,7 @@ async function transformRuleset(sourcePath, relativePath) {
|
||||
title,
|
||||
description,
|
||||
new Date(),
|
||||
Array.from(set),
|
||||
lines,
|
||||
'ruleset',
|
||||
path.resolve(outputSurgeDir, relativePath),
|
||||
path.resolve(outputClashDir, `${relativePath.slice(0, -path.extname(relativePath).length)}.txt`)
|
||||
|
||||
@ -36,6 +36,7 @@ runner(__filename, async () => {
|
||||
|
||||
if (!allFileExists) {
|
||||
console.log(`File not exists: ${line}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
// @ts-check
|
||||
const { promises: fsPromises } = require('fs');
|
||||
const fs = require('fs');
|
||||
const fse = require('fs-extra');
|
||||
const { readFileByLine } = require('./fetch-remote-text-by-line');
|
||||
const { surgeDomainsetToClashDomainset, surgeRulesetToClashClassicalTextRuleset } = require('./clash');
|
||||
@ -28,18 +28,35 @@ async function compareAndWriteFile(linesA, filePath) {
|
||||
}
|
||||
}
|
||||
|
||||
if (!isEqual || index !== linesA.length - 1) {
|
||||
await fsPromises.writeFile(
|
||||
filePath,
|
||||
linesA.join('\n'),
|
||||
{ encoding: 'utf-8' }
|
||||
);
|
||||
if (!isEqual || index !== linesA.length) {
|
||||
const stream = fs.createWriteStream(filePath, { encoding: 'utf-8' });
|
||||
|
||||
for (let i = 0, len = linesA.length; i < len; i++) {
|
||||
// eslint-disable-next-line no-await-in-loop -- backpressure
|
||||
await writeToStream(stream, linesA[i]);
|
||||
// eslint-disable-next-line no-await-in-loop -- backpressure
|
||||
await writeToStream(stream, '\n');
|
||||
}
|
||||
stream.end();
|
||||
} else {
|
||||
console.log(`Same Content, bail out writing: ${filePath}`);
|
||||
}
|
||||
}
|
||||
module.exports.compareAndWriteFile = compareAndWriteFile;
|
||||
|
||||
/**
|
||||
* @param {import('fs').WriteStream} stream
|
||||
* @param {string} data
|
||||
*/
|
||||
async function writeToStream(stream, data) {
|
||||
if (!stream.write(data)) {
|
||||
return /** @type {Promise<void>} */(new Promise((resolve) => {
|
||||
stream.once('drain', () => { resolve(); });
|
||||
}));
|
||||
}
|
||||
return Promise.resolve();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} title
|
||||
* @param {string[]} description
|
||||
@ -56,8 +73,7 @@ const withBannerArray = (title, description, date, content) => {
|
||||
...description.map(line => (line ? `# ${line}` : '#')),
|
||||
'########################################',
|
||||
...content,
|
||||
'################# END ###################',
|
||||
''
|
||||
'################# END ###################'
|
||||
];
|
||||
};
|
||||
module.exports.withBannerArray = withBannerArray;
|
||||
|
||||
@ -4,6 +4,7 @@ const { fetchRemoteTextAndCreateReadlineInterface } = require('./fetch-remote-te
|
||||
const { NetworkFilter } = require('@cliqz/adblocker');
|
||||
const { normalizeDomain } = require('./is-domain-loose');
|
||||
const { processLine } = require('./process-line');
|
||||
const { performance } = require('perf_hooks');
|
||||
|
||||
const DEBUG_DOMAIN_TO_FIND = null; // example.com | null
|
||||
let foundDebugDomain = false;
|
||||
@ -98,17 +99,17 @@ const R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2 = /(\$popup|\$removeparam|\$popunder)
|
||||
/**
|
||||
* @param {string | URL} filterRulesUrl
|
||||
* @param {readonly (string | URL)[] | undefined} [fallbackUrls]
|
||||
* @returns {Promise<{ white: Set<string>, black: Set<string>, foundDebugDomain: boolean, parseFailed: boolean }>}
|
||||
* @returns {Promise<{ white: Set<string>, black: Set<string>, foundDebugDomain: boolean }>}
|
||||
*/
|
||||
async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdParties = false) {
|
||||
console.time(` - processFilterRules: ${filterRulesUrl}`);
|
||||
const runStart = performance.now();
|
||||
|
||||
/** @type Set<string> */
|
||||
const whitelistDomainSets = new Set();
|
||||
/** @type Set<string> */
|
||||
const blacklistDomainSets = new Set();
|
||||
|
||||
const addToBlackList = (domainToBeAddedToBlack, isSubDomain) => {
|
||||
const __addToBlackList = (domainToBeAddedToBlack, isSubDomain) => {
|
||||
if (DEBUG_DOMAIN_TO_FIND && domainToBeAddedToBlack.includes(DEBUG_DOMAIN_TO_FIND)) {
|
||||
warnOnce(filterRulesUrl.toString(), false, DEBUG_DOMAIN_TO_FIND);
|
||||
foundDebugDomain = true;
|
||||
@ -120,289 +121,341 @@ async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdPart
|
||||
blacklistDomainSets.add(domainToBeAddedToBlack);
|
||||
}
|
||||
};
|
||||
const addToWhiteList = (domainToBeAddedToWhite) => {
|
||||
if (DEBUG_DOMAIN_TO_FIND && domainToBeAddedToWhite.includes(DEBUG_DOMAIN_TO_FIND)) {
|
||||
warnOnce(filterRulesUrl.toString(), true, DEBUG_DOMAIN_TO_FIND);
|
||||
foundDebugDomain = true;
|
||||
}
|
||||
const addToBlackList = DEBUG_DOMAIN_TO_FIND == null
|
||||
? __addToBlackList
|
||||
: (domainToBeAddedToBlack, isSubDomain) => {
|
||||
if (DEBUG_DOMAIN_TO_FIND && domainToBeAddedToBlack.includes(DEBUG_DOMAIN_TO_FIND)) {
|
||||
warnOnce(filterRulesUrl.toString(), false, DEBUG_DOMAIN_TO_FIND);
|
||||
foundDebugDomain = true;
|
||||
}
|
||||
__addToBlackList(domainToBeAddedToBlack, isSubDomain);
|
||||
};
|
||||
|
||||
const __addToWhiteList = (domainToBeAddedToWhite) => {
|
||||
whitelistDomainSets.add(domainToBeAddedToWhite);
|
||||
};
|
||||
const addToWhiteList = DEBUG_DOMAIN_TO_FIND == null
|
||||
? __addToWhiteList
|
||||
: (domainToBeAddedToWhite) => {
|
||||
if (DEBUG_DOMAIN_TO_FIND && domainToBeAddedToWhite.includes(DEBUG_DOMAIN_TO_FIND)) {
|
||||
warnOnce(filterRulesUrl.toString(), true, DEBUG_DOMAIN_TO_FIND);
|
||||
foundDebugDomain = true;
|
||||
}
|
||||
__addToWhiteList(domainToBeAddedToWhite);
|
||||
};
|
||||
|
||||
let filterRules;
|
||||
try {
|
||||
const controller = new AbortController();
|
||||
const signal = controller.signal;
|
||||
let downloadTime = 0;
|
||||
|
||||
/** @type string[] */
|
||||
filterRules = (
|
||||
await Promise.any(
|
||||
[filterRulesUrl, ...(fallbackUrls || [])].map(
|
||||
url => fetchWithRetry(url, { signal })
|
||||
.then(r => r.text())
|
||||
.then(text => {
|
||||
controller.abort();
|
||||
return text;
|
||||
})
|
||||
const lineCb = (line) => {
|
||||
const result = parse(line, includeThirdParties);
|
||||
if (result) {
|
||||
const flag = result[1];
|
||||
const hostname = result[0];
|
||||
switch (flag) {
|
||||
case 0:
|
||||
addToWhiteList(hostname);
|
||||
break;
|
||||
case 1:
|
||||
addToBlackList(hostname, false);
|
||||
break;
|
||||
case 2:
|
||||
addToBlackList(hostname, true);
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Unknown flag: ${flag}`);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if (!fallbackUrls || fallbackUrls.length === 0) {
|
||||
const downloadStart = performance.now();
|
||||
for await (const line of await fetchRemoteTextAndCreateReadlineInterface(filterRulesUrl)) {
|
||||
lineCb(line.trim());
|
||||
}
|
||||
downloadTime = performance.now() - downloadStart;
|
||||
} else {
|
||||
let filterRules;
|
||||
|
||||
const downloadStart = performance.now();
|
||||
try {
|
||||
const controller = new AbortController();
|
||||
const signal = controller.signal;
|
||||
|
||||
/** @type string[] */
|
||||
filterRules = (
|
||||
await Promise.any(
|
||||
[filterRulesUrl, ...(fallbackUrls || [])].map(
|
||||
url => fetchWithRetry(url, { signal })
|
||||
.then(r => r.text())
|
||||
.then(text => {
|
||||
controller.abort();
|
||||
return text;
|
||||
})
|
||||
)
|
||||
)
|
||||
)
|
||||
).split('\n').map(line => line.trim());
|
||||
} catch (e) {
|
||||
console.log(`Download Rule for [${filterRulesUrl}] failed`);
|
||||
throw e;
|
||||
}
|
||||
|
||||
let hasParseFailed = false;
|
||||
|
||||
for (let i = 0, len = filterRules.length; i < len; i++) {
|
||||
const line = filterRules[i].trim();
|
||||
|
||||
if (
|
||||
line === ''
|
||||
|| line[0] === '/'
|
||||
|| R_KNOWN_NOT_NETWORK_FILTER_PATTERN.test(line)
|
||||
// doesn't include
|
||||
|| !line.includes('.') // rule with out dot can not be a domain
|
||||
// includes
|
||||
// || line.includes('#')
|
||||
|| line.includes('!')
|
||||
|| line.includes('?')
|
||||
|| line.includes('*')
|
||||
// || line.includes('=')
|
||||
|| line.includes('[')
|
||||
|| line.includes('(')
|
||||
|| line.includes(']')
|
||||
|| line.includes(')')
|
||||
|| line.includes(',')
|
||||
// || line.includes('~')
|
||||
// || line.includes('&')
|
||||
// || line.includes('%')
|
||||
// ends with
|
||||
|| line.endsWith('.')
|
||||
|| line.endsWith('-')
|
||||
|| line.endsWith('_')
|
||||
// special modifier
|
||||
|| R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2.test(line)
|
||||
|| ((line.includes('/') || line.includes(':')) && !line.includes('://'))
|
||||
// || line.includes('$popup')
|
||||
// || line.includes('$removeparam')
|
||||
// || line.includes('$popunder')
|
||||
) {
|
||||
continue;
|
||||
).split('\n').map(line => line.trim());
|
||||
} catch (e) {
|
||||
console.log(`Download Rule for [${filterRulesUrl}] failed`);
|
||||
throw e;
|
||||
}
|
||||
downloadTime = performance.now() - downloadStart;
|
||||
|
||||
const filter = NetworkFilter.parse(line);
|
||||
if (filter) {
|
||||
if (
|
||||
filter.isElemHide()
|
||||
|| filter.isGenericHide()
|
||||
|| filter.isSpecificHide()
|
||||
|| filter.isRedirect()
|
||||
|| filter.isRedirectRule()
|
||||
|| filter.hasDomains()
|
||||
|| filter.isCSP() // must not be csp rule
|
||||
|| (!filter.fromAny() && !filter.fromDocument())
|
||||
) {
|
||||
// not supported type
|
||||
continue;
|
||||
}
|
||||
|
||||
if (
|
||||
filter.hasHostname() // must have
|
||||
&& filter.isPlain()
|
||||
&& (!filter.isRegex())
|
||||
&& (!filter.isFullRegex())
|
||||
) {
|
||||
const hostname = normalizeDomain(filter.getHostname());
|
||||
if (hostname) {
|
||||
if (filter.isException() || filter.isBadFilter()) {
|
||||
addToWhiteList(hostname);
|
||||
continue;
|
||||
}
|
||||
if (filter.firstParty() === filter.thirdParty()) {
|
||||
addToBlackList(hostname, true);
|
||||
continue;
|
||||
}
|
||||
if (filter.thirdParty()) {
|
||||
if (includeThirdParties) {
|
||||
addToBlackList(hostname, true);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (filter.firstParty()) {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (line.includes('$third-party') || line.includes('$frame')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const lineEndsWithCaret = line.endsWith('^');
|
||||
const lineEndsWithCaretVerticalBar = line.endsWith('^|');
|
||||
|
||||
if (line[0] === '@' && line[1] === '@') {
|
||||
if (line.endsWith('$cname')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (
|
||||
// (line.startsWith('@@|') || line.startsWith('@@.'))
|
||||
(
|
||||
line[2] === '|'
|
||||
|| line[2] === '.'
|
||||
)
|
||||
&& (
|
||||
lineEndsWithCaret
|
||||
|| lineEndsWithCaretVerticalBar
|
||||
|| line.endsWith('$genericblock')
|
||||
|| line.endsWith('$document')
|
||||
)
|
||||
) {
|
||||
const _domain = line
|
||||
.replace('@@||', '')
|
||||
.replace('@@|', '')
|
||||
.replace('@@.', '')
|
||||
.replace('^|', '')
|
||||
.replace('^$genericblock', '')
|
||||
.replace('$genericblock', '')
|
||||
.replace('^$document', '')
|
||||
.replace('$document', '')
|
||||
.replaceAll('^', '')
|
||||
.trim();
|
||||
|
||||
const domain = normalizeDomain(_domain);
|
||||
if (domain) {
|
||||
addToWhiteList(domain);
|
||||
} else {
|
||||
console.warn(' * [parse-filter E0001] (black) invalid domain:', _domain);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
line.startsWith('||')
|
||||
&& (
|
||||
lineEndsWithCaret
|
||||
|| lineEndsWithCaretVerticalBar
|
||||
|| line.endsWith('$cname')
|
||||
)
|
||||
) {
|
||||
const _domain = line
|
||||
.replace('||', '')
|
||||
.replace('^|', '')
|
||||
.replace('$cname', '')
|
||||
.replaceAll('^', '')
|
||||
.trim();
|
||||
|
||||
const domain = normalizeDomain(_domain);
|
||||
if (domain) {
|
||||
addToBlackList(domain, true);
|
||||
} else {
|
||||
console.warn(' * [parse-filter E0002] (black) invalid domain:', _domain);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
const lineStartsWithSingleDot = line.startsWith('.');
|
||||
if (
|
||||
lineStartsWithSingleDot
|
||||
&& (
|
||||
lineEndsWithCaret
|
||||
|| lineEndsWithCaretVerticalBar
|
||||
)
|
||||
) {
|
||||
const _domain = line
|
||||
.replace('^|', '')
|
||||
.replaceAll('^', '')
|
||||
.slice(1)
|
||||
.trim();
|
||||
|
||||
const domain = normalizeDomain(_domain);
|
||||
if (domain) {
|
||||
addToBlackList(domain, true);
|
||||
} else {
|
||||
console.warn(' * [parse-filter E0003] (black) invalid domain:', _domain);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (
|
||||
(
|
||||
line.startsWith('://')
|
||||
|| line.startsWith('http://')
|
||||
|| line.startsWith('https://')
|
||||
|| line.startsWith('|http://')
|
||||
|| line.startsWith('|https://')
|
||||
)
|
||||
&& (
|
||||
lineEndsWithCaret
|
||||
|| lineEndsWithCaretVerticalBar
|
||||
)
|
||||
) {
|
||||
const _domain = line
|
||||
.replace('|https://', '')
|
||||
.replace('https://', '')
|
||||
.replace('|http://', '')
|
||||
.replace('http://', '')
|
||||
.replace('://', '')
|
||||
.replace('^|', '')
|
||||
.replaceAll('^', '')
|
||||
.trim();
|
||||
|
||||
const domain = normalizeDomain(_domain);
|
||||
if (domain) {
|
||||
addToBlackList(domain, false);
|
||||
} else {
|
||||
console.warn(' * [parse-filter E0004] (black) invalid domain:', _domain);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (line[0] !== '|' && lineEndsWithCaret) {
|
||||
const _domain = line.slice(0, -1);
|
||||
const domain = normalizeDomain(_domain);
|
||||
if (domain) {
|
||||
addToBlackList(domain, false);
|
||||
} else {
|
||||
console.warn(' * [parse-filter E0005] (black) invalid domain:', _domain);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
const tryNormalizeDomain = normalizeDomain(lineStartsWithSingleDot ? line.slice(1) : line);
|
||||
if (
|
||||
tryNormalizeDomain
|
||||
&& (
|
||||
lineStartsWithSingleDot
|
||||
? tryNormalizeDomain.length === line.length - 1
|
||||
: tryNormalizeDomain === line
|
||||
)
|
||||
) {
|
||||
addToBlackList(line, true);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (
|
||||
!line.endsWith('.js')
|
||||
) {
|
||||
hasParseFailed = true;
|
||||
console.warn(' * [parse-filter E0010] can not parse:', line);
|
||||
for (let i = 0, len = filterRules.length; i < len; i++) {
|
||||
const line = filterRules[i].trim();
|
||||
lineCb(line);
|
||||
}
|
||||
}
|
||||
|
||||
console.timeEnd(` - processFilterRules: ${filterRulesUrl}`);
|
||||
console.log(` ┬ processFilterRules (${filterRulesUrl}): ${(performance.now() - runStart).toFixed(3)}ms`);
|
||||
console.log(` └── download time: ${downloadTime.toFixed(3)}ms`);
|
||||
|
||||
return {
|
||||
white: whitelistDomainSets,
|
||||
black: blacklistDomainSets,
|
||||
foundDebugDomain,
|
||||
parseFailed: hasParseFailed
|
||||
foundDebugDomain
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} $line
|
||||
* @param {boolean} includeThirdParties
|
||||
* @returns {null | [string, 0 | 1 | 2]} - 0 white, 1 black abosulte, 2 black include subdomain
|
||||
*/
|
||||
function parse($line, includeThirdParties) {
|
||||
const line = $line.trim();
|
||||
|
||||
if (
|
||||
line === ''
|
||||
|| line[0] === '/'
|
||||
|| R_KNOWN_NOT_NETWORK_FILTER_PATTERN.test(line)
|
||||
// doesn't include
|
||||
|| !line.includes('.') // rule with out dot can not be a domain
|
||||
// includes
|
||||
// || line.includes('#')
|
||||
|| line.includes('!')
|
||||
|| line.includes('?')
|
||||
|| line.includes('*')
|
||||
// || line.includes('=')
|
||||
|| line.includes('[')
|
||||
|| line.includes('(')
|
||||
|| line.includes(']')
|
||||
|| line.includes(')')
|
||||
|| line.includes(',')
|
||||
// || line.includes('~')
|
||||
// || line.includes('&')
|
||||
// || line.includes('%')
|
||||
// ends with
|
||||
|| line.endsWith('.')
|
||||
|| line.endsWith('-')
|
||||
|| line.endsWith('_')
|
||||
// special modifier
|
||||
|| R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2.test(line)
|
||||
|| ((line.includes('/') || line.includes(':')) && !line.includes('://'))
|
||||
// || line.includes('$popup')
|
||||
// || line.includes('$removeparam')
|
||||
// || line.includes('$popunder')
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const filter = NetworkFilter.parse(line);
|
||||
if (filter) {
|
||||
if (
|
||||
filter.isElemHide()
|
||||
|| filter.isGenericHide()
|
||||
|| filter.isSpecificHide()
|
||||
|| filter.isRedirect()
|
||||
|| filter.isRedirectRule()
|
||||
|| filter.hasDomains()
|
||||
|| filter.isCSP() // must not be csp rule
|
||||
|| (!filter.fromAny() && !filter.fromDocument())
|
||||
) {
|
||||
// not supported type
|
||||
return null;
|
||||
}
|
||||
|
||||
if (
|
||||
filter.hasHostname() // must have
|
||||
&& filter.isPlain()
|
||||
&& (!filter.isRegex())
|
||||
&& (!filter.isFullRegex())
|
||||
) {
|
||||
const hostname = normalizeDomain(filter.getHostname());
|
||||
if (hostname) {
|
||||
if (filter.isException() || filter.isBadFilter()) {
|
||||
return [hostname, 0];
|
||||
}
|
||||
if (filter.firstParty() === filter.thirdParty()) {
|
||||
return [hostname, 2];
|
||||
}
|
||||
if (filter.thirdParty()) {
|
||||
if (includeThirdParties) {
|
||||
return [hostname, 2];
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (filter.firstParty()) {
|
||||
return null;
|
||||
}
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (line.includes('$third-party') || line.includes('$frame')) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const lineEndsWithCaret = line.endsWith('^');
|
||||
const lineEndsWithCaretVerticalBar = line.endsWith('^|');
|
||||
|
||||
if (line[0] === '@' && line[1] === '@') {
|
||||
if (line.endsWith('$cname')) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (
|
||||
// (line.startsWith('@@|') || line.startsWith('@@.'))
|
||||
(
|
||||
line[2] === '|'
|
||||
|| line[2] === '.'
|
||||
)
|
||||
&& (
|
||||
lineEndsWithCaret
|
||||
|| lineEndsWithCaretVerticalBar
|
||||
|| line.endsWith('$genericblock')
|
||||
|| line.endsWith('$document')
|
||||
)
|
||||
) {
|
||||
const _domain = line
|
||||
.replace('@@||', '')
|
||||
.replace('@@|', '')
|
||||
.replace('@@.', '')
|
||||
.replace('^|', '')
|
||||
.replace('^$genericblock', '')
|
||||
.replace('$genericblock', '')
|
||||
.replace('^$document', '')
|
||||
.replace('$document', '')
|
||||
.replaceAll('^', '')
|
||||
.trim();
|
||||
|
||||
const domain = normalizeDomain(_domain);
|
||||
if (domain) {
|
||||
return [domain, 0];
|
||||
}
|
||||
console.warn(' * [parse-filter E0001] (black) invalid domain:', _domain);
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
line.startsWith('||')
|
||||
&& (
|
||||
lineEndsWithCaret
|
||||
|| lineEndsWithCaretVerticalBar
|
||||
|| line.endsWith('$cname')
|
||||
)
|
||||
) {
|
||||
const _domain = line
|
||||
.replace('||', '')
|
||||
.replace('^|', '')
|
||||
.replace('$cname', '')
|
||||
.replaceAll('^', '')
|
||||
.trim();
|
||||
|
||||
const domain = normalizeDomain(_domain);
|
||||
if (domain) {
|
||||
return [domain, 2];
|
||||
}
|
||||
console.warn(' * [parse-filter E0002] (black) invalid domain:', _domain);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
const lineStartsWithSingleDot = line.startsWith('.');
|
||||
if (
|
||||
lineStartsWithSingleDot
|
||||
&& (
|
||||
lineEndsWithCaret
|
||||
|| lineEndsWithCaretVerticalBar
|
||||
)
|
||||
) {
|
||||
const _domain = line
|
||||
.replace('^|', '')
|
||||
.replaceAll('^', '')
|
||||
.slice(1)
|
||||
.trim();
|
||||
|
||||
const domain = normalizeDomain(_domain);
|
||||
if (domain) {
|
||||
return [domain, 2];
|
||||
}
|
||||
console.warn(' * [parse-filter E0003] (black) invalid domain:', _domain);
|
||||
|
||||
return null;
|
||||
}
|
||||
if (
|
||||
(
|
||||
line.startsWith('://')
|
||||
|| line.startsWith('http://')
|
||||
|| line.startsWith('https://')
|
||||
|| line.startsWith('|http://')
|
||||
|| line.startsWith('|https://')
|
||||
)
|
||||
&& (
|
||||
lineEndsWithCaret
|
||||
|| lineEndsWithCaretVerticalBar
|
||||
)
|
||||
) {
|
||||
const _domain = line
|
||||
.replace('|https://', '')
|
||||
.replace('https://', '')
|
||||
.replace('|http://', '')
|
||||
.replace('http://', '')
|
||||
.replace('://', '')
|
||||
.replace('^|', '')
|
||||
.replaceAll('^', '')
|
||||
.trim();
|
||||
|
||||
const domain = normalizeDomain(_domain);
|
||||
if (domain) {
|
||||
return [domain, 1];
|
||||
}
|
||||
console.warn(' * [parse-filter E0004] (black) invalid domain:', _domain);
|
||||
|
||||
return null;
|
||||
}
|
||||
if (line[0] !== '|' && lineEndsWithCaret) {
|
||||
const _domain = line.slice(0, -1);
|
||||
const domain = normalizeDomain(_domain);
|
||||
if (domain) {
|
||||
return [domain, 1];
|
||||
}
|
||||
console.warn(' * [parse-filter E0005] (black) invalid domain:', _domain);
|
||||
|
||||
return null;
|
||||
}
|
||||
const tryNormalizeDomain = normalizeDomain(lineStartsWithSingleDot ? line.slice(1) : line);
|
||||
if (
|
||||
tryNormalizeDomain
|
||||
&& (
|
||||
lineStartsWithSingleDot
|
||||
? tryNormalizeDomain.length === line.length - 1
|
||||
: tryNormalizeDomain === line
|
||||
)
|
||||
) {
|
||||
return [line, 2];
|
||||
}
|
||||
|
||||
if (!line.endsWith('.js')) {
|
||||
console.warn(' * [parse-filter E0010] can not parse:', line);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports.processDomainLists = processDomainLists;
|
||||
module.exports.processHosts = processHosts;
|
||||
module.exports.processFilterRules = processFilterRules;
|
||||
|
||||
11
Build/lib/stable-sort-domain.test.js
Normal file
11
Build/lib/stable-sort-domain.test.js
Normal file
@ -0,0 +1,11 @@
|
||||
const domainSorter = require('./stable-sort-domain');
|
||||
const chai = require('chai');
|
||||
const { describe, it } = require('mocha');
|
||||
|
||||
chai.should();
|
||||
|
||||
describe('stable-sort-domain', () => {
|
||||
it('.ks.cn, .tag.unclaimedproperty.ks.gov', () => {
|
||||
domainSorter('.ks.cn', '.tag.unclaimedproperty.ks.gov').should.eql(-1);
|
||||
});
|
||||
});
|
||||
@ -278,7 +278,7 @@ class Trie {
|
||||
* Static .from function taking an arbitrary iterable & converting it into
|
||||
* a trie.
|
||||
*
|
||||
* @param {string[]} iterable - Target iterable.
|
||||
* @param {string[] | Set<string>} iterable - Target iterable.
|
||||
* @return {Trie}
|
||||
*/
|
||||
static from = iterable => {
|
||||
|
||||
@ -2,6 +2,7 @@ require('chai').should();
|
||||
|
||||
const Trie = require('./trie');
|
||||
const assert = require('assert');
|
||||
const { describe, it } = require('mocha');
|
||||
|
||||
describe('Trie', () => {
|
||||
it('should be possible to add items to a Trie.', () => {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user