Perf: improve reject set dedupe performance

This commit is contained in:
SukkaW 2022-08-22 12:57:58 +08:00
parent f47fc943b1
commit a08bc60052
2 changed files with 11 additions and 7 deletions

View File

@ -150,7 +150,7 @@ const threads = require('os').cpus().length - 1;
const piscina = new Piscina({
filename: pathResolve(__dirname, 'worker/build-reject-domainset-worker.js'),
workerData: domainSets
workerData: [...domainSets]
});
(await Promise.all([

View File

@ -1,21 +1,25 @@
const { workerData } = require('piscina');
const len = workerData.length;
exports.dedupe = ({ chunk }) => {
const outputToBeRemoved = new Set();
for (let i = 0, l = chunk.length; i < l; i++) {
const domainFromInput = chunk[i];
for (const domainFromFullSet of workerData) {
if (outputToBeRemoved.has(domainFromFullSet)) continue;
for (let j = 0; j < len; j++) {
const domainFromFullSet = workerData[j];
if (domainFromFullSet === domainFromInput) continue;
if (domainFromFullSet.charAt(0) !== '.') continue;
if (domainFromFullSet.charCodeAt(0) !== 46) continue;
// domainFromFullSet is now startsWith a "."
if (domainFromInput.charAt(0) !== '.') {
if (domainFromInput.charCodeAt(0) !== 46) {
let shouldBeRemoved = true;
for (let j = 0, l2 = domainFromInput.length; j < l2; j++) {
if (domainFromFullSet.charAt(j + 1) !== domainFromInput.charAt(j)) {
for (let k = 0, l2 = domainFromInput.length; k < l2; k++) {
if (domainFromFullSet.charCodeAt(k + 1) !== domainFromInput.charCodeAt(k)) {
shouldBeRemoved = false;
break;
}