From 39f3dacf6e0a1459f4f46ccbd1677148a85ab07c Mon Sep 17 00:00:00 2001 From: SukkaW Date: Wed, 31 Aug 2022 02:25:21 +0800 Subject: [PATCH] Perf: improve reject set dedupe performance --- Build/build-reject-domainset.js | 58 ++++++++++++++----- Build/worker/build-reject-domainset-worker.js | 36 ------------ 2 files changed, 45 insertions(+), 49 deletions(-) diff --git a/Build/build-reject-domainset.js b/Build/build-reject-domainset.js index 86a887c5..404720ff 100644 --- a/Build/build-reject-domainset.js +++ b/Build/build-reject-domainset.js @@ -148,24 +148,56 @@ const threads = require('os').cpus().length - 1; // Dedupe domainSets console.log(`Start deduping! (${previousSize})`); + const toBeRemoved = new Set(); + for (const domain of domainSets) { + let isTobeRemoved = false; + + for (const keyword of domainKeywordsSet) { + if (domain.includes(keyword) || keyword.includes(domain)) { + isTobeRemoved = true; + break; + } + } + + if (!isTobeRemoved) { + for (const suffix of domainSuffixSet) { + if (domain.endsWith(suffix)) { + isTobeRemoved = true; + break; + } + } + } + + if (!isTobeRemoved) { + for (const white of filterRuleWhitelistDomainSets) { + if (domain.includes(white) || white.includes(domain)) { + isTobeRemoved = true; + break; + } + } + } + + if (isTobeRemoved) { + toBeRemoved.add(domain); + } + } + + toBeRemoved.forEach((removed) => { + domainSets.delete(removed) + }); + + // Dedupe domainSets + console.log(`Deduped ${previousSize - domainSets.size} from black keywords and suffixes!`); + + previousSize = domainSets.size; + // Dedupe domainSets + console.log(`Start deduping! (${previousSize})`); + const piscina = new Piscina({ filename: pathResolve(__dirname, 'worker/build-reject-domainset-worker.js'), workerData: [...domainSets] }); - (await Promise.all([ - piscina.run( - { keywords: domainKeywordsSet, suffixes: domainSuffixSet }, - { name: 'dedupeKeywords' } - ), - piscina.run( - { whiteList: filterRuleWhitelistDomainSets }, - { name: 'whitelisted' } - ) - ])).forEach(set => { - set.forEach(i => domainSets.delete(i)); - }); - (await Promise.all( Array.from(domainSets) .reduce((result, element, index) => { diff --git a/Build/worker/build-reject-domainset-worker.js b/Build/worker/build-reject-domainset-worker.js index e4c511b1..cf14286f 100644 --- a/Build/worker/build-reject-domainset-worker.js +++ b/Build/worker/build-reject-domainset-worker.js @@ -43,39 +43,3 @@ exports.dedupe = ({ chunk }) => { return outputToBeRemoved; }; - -exports.whitelisted = ({ whiteList }) => { - const outputToBeRemoved = new Set(); - - for (const domain of workerData) { - for (const white of whiteList) { - if (domain.includes(white) || white.includes(domain)) { - outputToBeRemoved.add(domain); - break; - } - } - } - - return outputToBeRemoved; -}; - -exports.dedupeKeywords = ({ keywords, suffixes }) => { - const outputToBeRemoved = new Set(); - - for (const domain of workerData) { - for (const keyword of keywords) { - if (domain.includes(keyword) || keyword.includes(domain)) { - outputToBeRemoved.add(domain); - break; - } - } - for (const suffix of suffixes) { - if (domain.endsWith(suffix)) { - outputToBeRemoved.add(domain); - break; - } - } - } - - return outputToBeRemoved; -}