mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Perf: make reject list build faster
This commit is contained in:
parent
b659bff079
commit
4d0a5260ca
@ -2,12 +2,9 @@
|
|||||||
const { promises: fsPromises } = require('fs');
|
const { promises: fsPromises } = require('fs');
|
||||||
const fse = require('fs-extra');
|
const fse = require('fs-extra');
|
||||||
const { resolve: pathResolve } = require('path');
|
const { resolve: pathResolve } = require('path');
|
||||||
const Piscina = require('piscina');
|
|
||||||
const { processHosts, processFilterRules, preprocessFullDomainSetBeforeUsedAsWorkerData } = require('./lib/parse-filter');
|
const { processHosts, processFilterRules, preprocessFullDomainSetBeforeUsedAsWorkerData } = require('./lib/parse-filter');
|
||||||
const cpuCount = require('os').cpus().length;
|
|
||||||
const { isCI } = require('ci-info');
|
|
||||||
const threads = isCI ? cpuCount : cpuCount / 2;
|
|
||||||
const { getDomain } = require('tldts');
|
const { getDomain } = require('tldts');
|
||||||
|
const Trie = require('./lib/trie');
|
||||||
|
|
||||||
const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source');
|
const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source');
|
||||||
const { withBannerArray } = require('./lib/with-banner');
|
const { withBannerArray } = require('./lib/with-banner');
|
||||||
@ -30,15 +27,14 @@ const domainSuffixSet = new Set();
|
|||||||
console.time('* Download and process Hosts');
|
console.time('* Download and process Hosts');
|
||||||
|
|
||||||
// Parse from remote hosts & domain lists
|
// Parse from remote hosts & domain lists
|
||||||
(await Promise.all(
|
(await Promise.all(HOSTS.map(entry => processHosts(entry[0], entry[1]))))
|
||||||
HOSTS.map(entry => processHosts(entry[0], entry[1]))
|
.forEach(hosts => {
|
||||||
)).forEach(hosts => {
|
hosts.forEach(host => {
|
||||||
hosts.forEach(host => {
|
if (host) {
|
||||||
if (host) {
|
domainSets.add(host);
|
||||||
domainSets.add(host);
|
}
|
||||||
}
|
});
|
||||||
});
|
});
|
||||||
});
|
|
||||||
|
|
||||||
console.timeEnd('* Download and process Hosts');
|
console.timeEnd('* Download and process Hosts');
|
||||||
|
|
||||||
@ -167,8 +163,31 @@ const domainSuffixSet = new Set();
|
|||||||
console.log(`Start deduping from black keywords/suffixes! (${previousSize})`);
|
console.log(`Start deduping from black keywords/suffixes! (${previousSize})`);
|
||||||
console.time(`* Dedupe from black keywords/suffixes`);
|
console.time(`* Dedupe from black keywords/suffixes`);
|
||||||
|
|
||||||
|
const trie1 = Trie.from(Array.from(domainSets));
|
||||||
|
domainSuffixSet.forEach(suffix => {
|
||||||
|
trie1.find(suffix, true).forEach(f => domainSets.delete(f));
|
||||||
|
});
|
||||||
|
filterRuleWhitelistDomainSets.forEach(suffix => {
|
||||||
|
trie1.find(suffix, true).forEach(f => domainSets.delete(f));
|
||||||
|
});
|
||||||
|
|
||||||
|
// Build whitelist trie, to handle case like removing `g.msn.com` due to white `.g.msn.com` (`@@||g.msn.com`)
|
||||||
|
const trieWhite = Trie.from(Array.from(filterRuleWhitelistDomainSets));
|
||||||
for (const domain of domainSets) {
|
for (const domain of domainSets) {
|
||||||
if (isMatchKeyword(domain) || isMatchSuffix(domain) || isInWhiteList(domain)) {
|
if (domain[0] !== '.' && trieWhite.has(`.${domain}`)) {
|
||||||
|
domainSets.delete(domain);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (domain[0] === '.') {
|
||||||
|
const found = trieWhite.find(domain);
|
||||||
|
if (found.length > 0) {
|
||||||
|
domainSets.delete(domain);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove keyword
|
||||||
|
if (isMatchKeyword(domain)) {
|
||||||
domainSets.delete(domain);
|
domainSets.delete(domain);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -183,44 +202,28 @@ const domainSuffixSet = new Set();
|
|||||||
const START_TIME = Date.now();
|
const START_TIME = Date.now();
|
||||||
|
|
||||||
const domainSetsArray = Array.from(domainSets);
|
const domainSetsArray = Array.from(domainSets);
|
||||||
const workerData = preprocessFullDomainSetBeforeUsedAsWorkerData(domainSetsArray);
|
const trie2 = Trie.from(domainSetsArray);
|
||||||
|
const fullsetDomainStartsWithADot = preprocessFullDomainSetBeforeUsedAsWorkerData(domainSetsArray);
|
||||||
|
console.log(fullsetDomainStartsWithADot.length);
|
||||||
|
|
||||||
const piscina = new Piscina({
|
for (let j = 0, len = fullsetDomainStartsWithADot.length; j < len; j++) {
|
||||||
filename: pathResolve(__dirname, 'worker/build-reject-domainset-worker.js'),
|
const domainStartsWithADotAndFromFullSet = fullsetDomainStartsWithADot[j];
|
||||||
workerData,
|
const found = trie2.find(domainStartsWithADotAndFromFullSet, false);
|
||||||
idleTimeout: 50,
|
if (found.length) {
|
||||||
minThreads: threads,
|
found.forEach(f => {
|
||||||
maxThreads: threads
|
domainSets.delete(f);
|
||||||
});
|
})
|
||||||
|
}
|
||||||
|
|
||||||
console.log(workerData.length);
|
const a = domainStartsWithADotAndFromFullSet.slice(1);
|
||||||
|
if (trie2.has(a)) {
|
||||||
console.log(`Launching ${threads} threads...`);
|
domainSets.delete(a);
|
||||||
|
}
|
||||||
const tasksArray = domainSetsArray.reduce((result, element, index) => {
|
}
|
||||||
const chunk = index % threads;
|
|
||||||
result[chunk] ??= [];
|
|
||||||
|
|
||||||
result[chunk].push(element);
|
|
||||||
return result;
|
|
||||||
}, /** @type {string[][]} */([]));
|
|
||||||
|
|
||||||
(await Promise.all(
|
|
||||||
tasksArray.map(chunk => piscina.run({ chunk }))
|
|
||||||
)).forEach((result, taskIndex) => {
|
|
||||||
const chunk = tasksArray[taskIndex];
|
|
||||||
for (let i = 0, len = result.length; i < len; i++) {
|
|
||||||
if (result[i]) {
|
|
||||||
domainSets.delete(chunk[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log(`* Dedupe from covered subdomain - ${(Date.now() - START_TIME) / 1000}s`);
|
console.log(`* Dedupe from covered subdomain - ${(Date.now() - START_TIME) / 1000}s`);
|
||||||
console.log(`Deduped ${previousSize - domainSets.size} rules!`);
|
console.log(`Deduped ${previousSize - domainSets.size} rules!`);
|
||||||
|
|
||||||
await piscina.destroy();
|
|
||||||
|
|
||||||
console.time('* Write reject.conf');
|
console.time('* Write reject.conf');
|
||||||
|
|
||||||
const sorter = (a, b) => {
|
const sorter = (a, b) => {
|
||||||
@ -264,9 +267,6 @@ const domainSuffixSet = new Set();
|
|||||||
console.timeEnd('* Write reject.conf');
|
console.timeEnd('* Write reject.conf');
|
||||||
|
|
||||||
console.timeEnd('Total Time - build-reject-domain-set');
|
console.timeEnd('Total Time - build-reject-domain-set');
|
||||||
if (piscina.queueSize === 0) {
|
|
||||||
process.exit(0);
|
|
||||||
}
|
|
||||||
})();
|
})();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -281,34 +281,3 @@ function isMatchKeyword(domain) {
|
|||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @param {string} domain
|
|
||||||
*/
|
|
||||||
function isMatchSuffix(domain) {
|
|
||||||
for (const suffix of domainSuffixSet) {
|
|
||||||
if (domain.endsWith(suffix)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param {string} domain
|
|
||||||
*/
|
|
||||||
function isInWhiteList(domain) {
|
|
||||||
for (const white of filterRuleWhitelistDomainSets) {
|
|
||||||
if (domain === white || domain.endsWith(white)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (white.endsWith(domain)) {
|
|
||||||
// If a whole domain is in blacklist but a subdomain is in whitelist
|
|
||||||
// We have no choice but to remove the whole domain from blacklist
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|||||||
@ -397,7 +397,7 @@ async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdPart
|
|||||||
*/
|
*/
|
||||||
function preprocessFullDomainSetBeforeUsedAsWorkerData(data) {
|
function preprocessFullDomainSetBeforeUsedAsWorkerData(data) {
|
||||||
return data
|
return data
|
||||||
.filter(domain => domain.charCodeAt(0) === 46)
|
.filter(domain => domain[0] === '.')
|
||||||
.sort((a, b) => a.length - b.length);
|
.sort((a, b) => a.length - b.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -129,7 +129,7 @@ class Trie {
|
|||||||
/**
|
/**
|
||||||
* Method used to delete a prefix from the trie.
|
* Method used to delete a prefix from the trie.
|
||||||
*
|
*
|
||||||
* @param {string|array} suffix - Prefix to delete.
|
* @param {string} suffix - Prefix to delete.
|
||||||
* @return {boolean}
|
* @return {boolean}
|
||||||
*/
|
*/
|
||||||
delete(suffix) {
|
delete(suffix) {
|
||||||
@ -198,66 +198,45 @@ class Trie {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Method returning an iterator over the trie's prefixes.
|
* @return {string[]}
|
||||||
*
|
|
||||||
* @param {string|array} [prefix] - Optional starting prefix.
|
|
||||||
* @return {Iterator}
|
|
||||||
*/
|
*/
|
||||||
// prefixes(prefix) {
|
dump() {
|
||||||
// let node = this.root;
|
let node = this.root;
|
||||||
// const nodeStack = [];
|
const nodeStack = [];
|
||||||
// const prefixStack = [];
|
const prefixStack = [];
|
||||||
// let token;
|
// Resolving initial prefix
|
||||||
// let i;
|
const prefix = '';
|
||||||
// let l;
|
|
||||||
|
|
||||||
// const isString = this.mode === 'string';
|
nodeStack.push(node);
|
||||||
|
prefixStack.push(prefix);
|
||||||
|
|
||||||
// // Resolving initial prefix
|
/** @type {string[]} */
|
||||||
// if (prefix) {
|
const results = [];
|
||||||
// for (i = 0, l = prefix.length; i < l; i++) {
|
|
||||||
// token = prefix[i];
|
|
||||||
// node = node[token];
|
|
||||||
|
|
||||||
// // If the prefix does not exist, we return an empty iterator
|
let currentNode;
|
||||||
// if (typeof node === 'undefined')
|
let currentPrefix;
|
||||||
// return Iterator.empty();
|
let hasValue = false;
|
||||||
// }
|
let k;
|
||||||
// }
|
|
||||||
// else {
|
|
||||||
// prefix = isString ? '' : [];
|
|
||||||
// }
|
|
||||||
|
|
||||||
// nodeStack.push(node);
|
while (nodeStack.length) {
|
||||||
// prefixStack.push(prefix);
|
currentNode = nodeStack.pop();
|
||||||
|
currentPrefix = prefixStack.pop();
|
||||||
|
|
||||||
// return new Iterator(() => {
|
for (k in currentNode) {
|
||||||
// let currentNode;
|
if (k === SENTINEL) {
|
||||||
// let currentPrefix;
|
hasValue = true;
|
||||||
// let hasValue = false;
|
continue;
|
||||||
// let k;
|
}
|
||||||
|
|
||||||
// while (nodeStack.length) {
|
nodeStack.push(currentNode[k]);
|
||||||
// currentNode = nodeStack.pop();
|
prefixStack.push(k + currentPrefix);
|
||||||
// currentPrefix = prefixStack.pop();
|
}
|
||||||
|
|
||||||
// for (k in currentNode) {
|
if (hasValue) results.push(currentPrefix);
|
||||||
// if (k === SENTINEL) {
|
}
|
||||||
// hasValue = true;
|
|
||||||
// continue;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// nodeStack.push(currentNode[k]);
|
return results;
|
||||||
// prefixStack.push(isString ? currentPrefix + k : currentPrefix.concat(k));
|
}
|
||||||
// }
|
|
||||||
|
|
||||||
// if (hasValue)
|
|
||||||
// return { done: false, value: currentPrefix };
|
|
||||||
// }
|
|
||||||
|
|
||||||
// return { done: true };
|
|
||||||
// });
|
|
||||||
// }
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convenience known methods.
|
* Convenience known methods.
|
||||||
|
|||||||
@ -1,45 +0,0 @@
|
|||||||
// @ts-check
|
|
||||||
const Piscina = require('piscina');
|
|
||||||
const Trie = require('../lib/trie');
|
|
||||||
// const { isCI } = require('ci-info');
|
|
||||||
/** @type {string[]} */
|
|
||||||
const fullsetDomainStartsWithADot = Piscina.workerData;
|
|
||||||
const totalLen = fullsetDomainStartsWithADot.length;
|
|
||||||
|
|
||||||
const DOT = '.';
|
|
||||||
|
|
||||||
// const log = isCI ? () => { } : console.log.bind(console);
|
|
||||||
/**
|
|
||||||
* @param {{ chunk: string[] }} param0
|
|
||||||
*/
|
|
||||||
module.exports = ({ chunk }) => {
|
|
||||||
const chunkLength = chunk.length;
|
|
||||||
const outputToBeRemoved = new Int8Array(chunkLength);
|
|
||||||
|
|
||||||
const trie = Trie.from(chunk);
|
|
||||||
|
|
||||||
for (let j = 0; j < totalLen; j++) {
|
|
||||||
const domainStartsWithADotAndFromFullSet = fullsetDomainStartsWithADot[j];
|
|
||||||
|
|
||||||
const found = trie.find(domainStartsWithADotAndFromFullSet, false)
|
|
||||||
|
|
||||||
if (found.length) {
|
|
||||||
found.forEach(f => {
|
|
||||||
const index = chunk.indexOf(f);
|
|
||||||
if (index !== -1) {
|
|
||||||
outputToBeRemoved[index] = 1;
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
const a = domainStartsWithADotAndFromFullSet.slice(1);
|
|
||||||
if (trie.has(a)) {
|
|
||||||
const index = chunk.indexOf(a);
|
|
||||||
if (index !== -1) {
|
|
||||||
outputToBeRemoved[index] = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return Piscina.move(outputToBeRemoved);
|
|
||||||
};
|
|
||||||
Loading…
x
Reference in New Issue
Block a user