Perf: improve reject set dedupe worker performance

This commit is contained in:
SukkaW
2022-09-05 14:13:07 +08:00
parent 587505c88c
commit 61035df4a8
2 changed files with 27 additions and 11 deletions

View File

@@ -198,6 +198,17 @@ const threads = require('os').cpus().length - 1;
workerData: [...domainSets]
});
console.log(`Launching ${threads} threads...`)
const tasksArray = Array.from(domainSets)
.reduce((result, element, index) => {
const chunk = index % threads;
result[chunk] ??= [];
result[chunk].push(element);
return result;
}, []);
(await Promise.all(
Array.from(domainSets)
.reduce((result, element, index) => {
@@ -208,11 +219,15 @@ const threads = require('os').cpus().length - 1;
return result;
}, [])
.map(chunk => piscina.run(
{ chunk },
{ name: 'dedupe' }
{ chunk }
))
)).forEach(set => {
set.forEach(i => domainSets.delete(i));
)).forEach((result, taskIndex) => {
const chunk = tasksArray[taskIndex];
result.forEach((value, index) => {
if (value === 1) {
domainSets.delete(chunk[index])
}
})
});
console.log(`Deduped ${previousSize - domainSets.size} rules!`);

View File

@@ -1,11 +1,12 @@
const { workerData } = require('piscina');
const { workerData, move } = require('piscina');
const len = workerData.length;
exports.dedupe = ({ chunk }) => {
const outputToBeRemoved = new Set();
module.exports = ({ chunk }) => {
const chunkLength = chunk.length;
const outputToBeRemoved = new Int32Array(chunkLength);
for (let i = 0, l = chunk.length; i < l; i++) {
for (let i = 0; i < chunkLength; i++) {
const domainFromInput = chunk[i];
for (let j = 0; j < len; j++) {
@@ -26,7 +27,7 @@ exports.dedupe = ({ chunk }) => {
}
if (shouldBeRemoved) {
outputToBeRemoved.add(domainFromInput);
outputToBeRemoved[i] = 1;
break;
}
}
@@ -34,12 +35,12 @@ exports.dedupe = ({ chunk }) => {
if (domainFromInput.length >= domainFromFullSet.length) {
if (domainFromInput.endsWith(domainFromFullSet)) {
outputToBeRemoved.add(domainFromInput);
outputToBeRemoved[i] = 1;
break;
}
}
}
}
return outputToBeRemoved;
return move(outputToBeRemoved);
};