mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 09:10:35 +08:00
Perf: improve reject set dedupe performance
This commit is contained in:
parent
69c196c8a5
commit
39f3dacf6e
@ -148,24 +148,56 @@ const threads = require('os').cpus().length - 1;
|
|||||||
// Dedupe domainSets
|
// Dedupe domainSets
|
||||||
console.log(`Start deduping! (${previousSize})`);
|
console.log(`Start deduping! (${previousSize})`);
|
||||||
|
|
||||||
|
const toBeRemoved = new Set();
|
||||||
|
for (const domain of domainSets) {
|
||||||
|
let isTobeRemoved = false;
|
||||||
|
|
||||||
|
for (const keyword of domainKeywordsSet) {
|
||||||
|
if (domain.includes(keyword) || keyword.includes(domain)) {
|
||||||
|
isTobeRemoved = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isTobeRemoved) {
|
||||||
|
for (const suffix of domainSuffixSet) {
|
||||||
|
if (domain.endsWith(suffix)) {
|
||||||
|
isTobeRemoved = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isTobeRemoved) {
|
||||||
|
for (const white of filterRuleWhitelistDomainSets) {
|
||||||
|
if (domain.includes(white) || white.includes(domain)) {
|
||||||
|
isTobeRemoved = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isTobeRemoved) {
|
||||||
|
toBeRemoved.add(domain);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
toBeRemoved.forEach((removed) => {
|
||||||
|
domainSets.delete(removed)
|
||||||
|
});
|
||||||
|
|
||||||
|
// Dedupe domainSets
|
||||||
|
console.log(`Deduped ${previousSize - domainSets.size} from black keywords and suffixes!`);
|
||||||
|
|
||||||
|
previousSize = domainSets.size;
|
||||||
|
// Dedupe domainSets
|
||||||
|
console.log(`Start deduping! (${previousSize})`);
|
||||||
|
|
||||||
const piscina = new Piscina({
|
const piscina = new Piscina({
|
||||||
filename: pathResolve(__dirname, 'worker/build-reject-domainset-worker.js'),
|
filename: pathResolve(__dirname, 'worker/build-reject-domainset-worker.js'),
|
||||||
workerData: [...domainSets]
|
workerData: [...domainSets]
|
||||||
});
|
});
|
||||||
|
|
||||||
(await Promise.all([
|
|
||||||
piscina.run(
|
|
||||||
{ keywords: domainKeywordsSet, suffixes: domainSuffixSet },
|
|
||||||
{ name: 'dedupeKeywords' }
|
|
||||||
),
|
|
||||||
piscina.run(
|
|
||||||
{ whiteList: filterRuleWhitelistDomainSets },
|
|
||||||
{ name: 'whitelisted' }
|
|
||||||
)
|
|
||||||
])).forEach(set => {
|
|
||||||
set.forEach(i => domainSets.delete(i));
|
|
||||||
});
|
|
||||||
|
|
||||||
(await Promise.all(
|
(await Promise.all(
|
||||||
Array.from(domainSets)
|
Array.from(domainSets)
|
||||||
.reduce((result, element, index) => {
|
.reduce((result, element, index) => {
|
||||||
|
|||||||
@ -43,39 +43,3 @@ exports.dedupe = ({ chunk }) => {
|
|||||||
|
|
||||||
return outputToBeRemoved;
|
return outputToBeRemoved;
|
||||||
};
|
};
|
||||||
|
|
||||||
exports.whitelisted = ({ whiteList }) => {
|
|
||||||
const outputToBeRemoved = new Set();
|
|
||||||
|
|
||||||
for (const domain of workerData) {
|
|
||||||
for (const white of whiteList) {
|
|
||||||
if (domain.includes(white) || white.includes(domain)) {
|
|
||||||
outputToBeRemoved.add(domain);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return outputToBeRemoved;
|
|
||||||
};
|
|
||||||
|
|
||||||
exports.dedupeKeywords = ({ keywords, suffixes }) => {
|
|
||||||
const outputToBeRemoved = new Set();
|
|
||||||
|
|
||||||
for (const domain of workerData) {
|
|
||||||
for (const keyword of keywords) {
|
|
||||||
if (domain.includes(keyword) || keyword.includes(domain)) {
|
|
||||||
outputToBeRemoved.add(domain);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (const suffix of suffixes) {
|
|
||||||
if (domain.endsWith(suffix)) {
|
|
||||||
outputToBeRemoved.add(domain);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return outputToBeRemoved;
|
|
||||||
}
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user