Perf: improve reject set dedupe worker performance

This commit is contained in:
SukkaW
2022-09-12 23:27:15 +08:00
parent 020eb76a81
commit 7a7a97e4d0

View File

@@ -1,25 +1,32 @@
const { workerData, move } = require('piscina'); const { workerData, move } = require('piscina');
const len = workerData.length; const len = workerData.length;
// pre check if fullset domain is starts with a "."
// This avoid calling chatCodeAt repeatedly
const fullsetDomainStartsWithADot = workerData.map(domain => domain.charCodeAt(0) === 46);
module.exports = ({ chunk }) => { module.exports = ({ chunk }) => {
const chunkLength = chunk.length; const chunkLength = chunk.length;
const outputToBeRemoved = new Int32Array(chunkLength); const outputToBeRemoved = new Int8Array(chunkLength);
for (let i = 0; i < chunkLength; i++) { for (let i = 0; i < chunkLength; i++) {
const domainFromInput = chunk[i]; const domainFromInput = chunk[i];
for (let j = 0; j < len; j++) { for (let j = 0; j < len; j++) {
// Check if domainFromFullset starts with a "."
if (!fullsetDomainStartsWithADot[j]) continue;
// domainFromFullSet is now startsWith a "."
const domainFromFullSet = workerData[j]; const domainFromFullSet = workerData[j];
if (domainFromFullSet === domainFromInput) continue; if (domainFromFullSet === domainFromInput) continue;
if (domainFromFullSet.charCodeAt(0) !== 46) continue;
// domainFromFullSet is now startsWith a "." const domainFromInputLen = domainFromInput.length;
if (domainFromInput.charCodeAt(0) !== 46) { if (domainFromInput.charCodeAt(0) !== 46) {
let shouldBeRemoved = true; let shouldBeRemoved = true;
for (let k = 0, l2 = domainFromInput.length; k < l2; k++) { for (let k = 0; k < domainFromInputLen; k++) {
if (domainFromFullSet.charCodeAt(k + 1) !== domainFromInput.charCodeAt(k)) { if (domainFromFullSet.charCodeAt(k + 1) !== domainFromInput.charCodeAt(k)) {
shouldBeRemoved = false; shouldBeRemoved = false;
break; break;
@@ -33,7 +40,7 @@ module.exports = ({ chunk }) => {
} }
// domainFromInput is now startsWith a "." // domainFromInput is now startsWith a "."
if (domainFromInput.length >= domainFromFullSet.length) { if (domainFromInputLen >= domainFromFullSet.length) {
if (domainFromInput.endsWith(domainFromFullSet)) { if (domainFromInput.endsWith(domainFromFullSet)) {
outputToBeRemoved[i] = 1; outputToBeRemoved[i] = 1;
break; break;