Chore: even faster reject domainset builder

This commit is contained in:
SukkaW 2022-07-05 00:39:30 +08:00
parent 6c7317108b
commit 7ae9170b4d
2 changed files with 14 additions and 13 deletions

View File

@ -21,17 +21,18 @@ const threads = require('os').cpus().length - 1;
}); });
}); });
const hostsSize = domainSets.size; let previousSize = domainSets.size;
console.log(`Import ${hostsSize} rules from hosts files!`); console.log(`Import ${previousSize} rules from hosts files!`);
await fsPromises.readFile(pathResolve(__dirname, '../List/domainset/reject_sukka.conf'), { encoding: 'utf-8' }).then(data => { await fsPromises.readFile(pathResolve(__dirname, '../List/domainset/reject_sukka.conf'), { encoding: 'utf-8' }).then(data => {
data.split('\n').forEach(line => { data.split('\n').forEach(line => {
const trimmed = line.trim();
if ( if (
line.startsWith('#') line.startsWith('#')
|| line.startsWith(' ') || line.startsWith(' ')
|| line.startsWith('\r') || line.startsWith('\r')
|| line.startsWith('\n') || line.startsWith('\n')
|| line.trim() === '' || trimmed === ''
) { ) {
return; return;
} }
@ -39,12 +40,12 @@ const threads = require('os').cpus().length - 1;
/* if (domainSets.has(line) || domainSets.has(`.${line}`)) { /* if (domainSets.has(line) || domainSets.has(`.${line}`)) {
console.warn(`|${line}| is already in the list!`); console.warn(`|${line}| is already in the list!`);
} */ } */
domainSets.add(line.trim()); domainSets.add(trimmed);
}); });
}); });
const sukkaSize = domainSets.size - hostsSize; previousSize = domainSets.size - previousSize;
console.log(`Import ${sukkaSize} rules from reject_sukka.conf!`); console.log(`Import ${previousSize} rules from reject_sukka.conf!`);
// Parse from AdGuard Filters // Parse from AdGuard Filters
/** @type Set<string> */ /** @type Set<string> */
@ -120,8 +121,8 @@ const threads = require('os').cpus().length - 1;
black.forEach(i => domainSets.add(i)); black.forEach(i => domainSets.add(i));
}); });
const adguardSize = domainSets.size - hostsSize - sukkaSize; previousSize = domainSets.size - previousSize;
console.log(`Import ${adguardSize} rules from adguard filters!`); console.log(`Import ${previousSize} rules from adguard filters!`);
// Read DOMAIN Keyword // Read DOMAIN Keyword
const domainKeywordsSet = new Set(); const domainKeywordsSet = new Set();
@ -140,9 +141,9 @@ const threads = require('os').cpus().length - 1;
console.log(`Import ${domainKeywordsSet.size} black keywords and ${domainSuffixSet.size} black suffixes!`); console.log(`Import ${domainKeywordsSet.size} black keywords and ${domainSuffixSet.size} black suffixes!`);
const beforeDeduping = domainSets.size; previousSize = domainSets.size;
// Dedupe domainSets // Dedupe domainSets
console.log(`Start deduping! (${beforeDeduping})`); console.log(`Start deduping! (${previousSize})`);
const piscina = new Piscina({ const piscina = new Piscina({
filename: pathResolve(__dirname, 'worker/build-reject-domainset-worker.js'), filename: pathResolve(__dirname, 'worker/build-reject-domainset-worker.js'),
@ -179,7 +180,7 @@ const threads = require('os').cpus().length - 1;
set.forEach(i => domainSets.delete(i)); set.forEach(i => domainSets.delete(i));
}); });
console.log(`Deduped ${beforeDeduping - domainSets.size} rules!`); console.log(`Deduped ${previousSize - domainSets.size} rules!`);
return fsPromises.writeFile( return fsPromises.writeFile(
pathResolve(__dirname, '../List/domainset/reject.conf'), pathResolve(__dirname, '../List/domainset/reject.conf'),

View File

@ -10,8 +10,8 @@ exports.dedupe = ({ chunk }) => {
if (domainFromFullSet.charAt(0) !== '.') continue; if (domainFromFullSet.charAt(0) !== '.') continue;
if ( if (
// `.${domainFromInput}` === domainFromFullSet (domainFromInput.charAt(0) !== '.' && `.${domainFromInput}` === domainFromFullSet)
domainFromInput.endsWith(domainFromFullSet) || domainFromInput.endsWith(domainFromFullSet)
) { ) {
outputToBeRemoved.add(domainFromInput); outputToBeRemoved.add(domainFromInput);
break; break;