Chore: reject domainset update

This commit is contained in:
SukkaW 2022-10-08 17:50:06 +08:00
parent 3b489ddb2e
commit c91d22b2d3
3 changed files with 32 additions and 16 deletions

View File

@ -1,7 +1,7 @@
const { promises: fsPromises } = require('fs');
const { resolve: pathResolve } = require('path');
const Piscina = require('piscina');
const { processHosts, processFilterRules } = require('./lib/parse-filter');
const { processHosts, processFilterRules, preprocessFullDomainSetBeforeUsedAsWorkerData } = require('./lib/parse-filter');
const cpuCount = require('os').cpus().length;
const { isCI } = require('ci-info');
const threads = isCI ? cpuCount : cpuCount / 2;
@ -68,6 +68,7 @@ const threads = isCI ? cpuCount : cpuCount / 2;
'ip6-allrouters',
'ip6-allhosts',
'mcastprefix',
'skk.moe',
'analytics.google.com',
'msa.cdn.mediaset.net', // Added manually using DOMAIN-KEYWORDS
'cloud.answerhub.com',
@ -145,6 +146,8 @@ const threads = isCI ? cpuCount : cpuCount / 2;
'https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt',
// PiHoleBlocklist
'https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/SmartTV-AGH.txt',
// Spam404
'https://raw.githubusercontent.com/Spam404/lists/master/adblock-list.txt'
].map(input => {
if (typeof input === 'string') {
return processFilterRules(input);
@ -233,7 +236,7 @@ const threads = isCI ? cpuCount : cpuCount / 2;
const piscina = new Piscina({
filename: pathResolve(__dirname, 'worker/build-reject-domainset-worker.js'),
workerData: [...domainSets],
workerData: preprocessFullDomainSetBeforeUsedAsWorkerData([...domainSets]),
idleTimeout: 50,
minThreads: threads,
maxThreads: threads

View File

@ -125,6 +125,7 @@ async function processFilterRules (filterRulesUrl, fallbackUrls) {
|| line.includes('!')
|| line.includes('*')
|| line.includes('/')
|| line.includes('[')
|| line.includes('$') && !lineStartsWithDoubleVerticalBar
|| line === ''
|| isIP(line) !== 0
@ -214,6 +215,28 @@ async function processFilterRules (filterRulesUrl, fallbackUrls) {
};
}
function preprocessFullDomainSetBeforeUsedAsWorkerData (data) {
return data.filter(domain => (
domain.charCodeAt(0) === 46
&& !canExcludeFromDedupe(domain)
));
}
// duckdns.org domain will not overlap and doesn't need dedupe
function canExcludeFromDedupe (domain) {
if (
// starts with a dot
domain.charCodeAt(0) === 46
&& domain.length === 23
&& domain.endsWith('.duckdns.org')
) {
return true;
}
return false;
}
module.exports.processDomainLists = processDomainLists;
module.exports.processHosts = processHosts;
module.exports.processFilterRules = processFilterRules;
module.exports.preprocessFullDomainSetBeforeUsedAsWorkerData = preprocessFullDomainSetBeforeUsedAsWorkerData;
module.exports.canExcludeFromDedupe = canExcludeFromDedupe;

View File

@ -1,12 +1,10 @@
const Piscina = require('piscina');
// pre check if fullset domain is starts with a "."
// This avoid calling chatCodeAt repeatedly
const { canExcludeFromDedupe } = require('../lib/parse-filter')
// workerData is an array of string. Sort it by length, short first:
const fullsetDomainStartsWithADot = Piscina.workerData.filter(domain => (
domain.charCodeAt(0) === 46
&& !canExcludeFromDedupe(domain)
));
// workerData is an array of string, sorted by length, short first
const fullsetDomainStartsWithADot = Piscina.workerData
const totalLen = fullsetDomainStartsWithADot.length;
module.exports.dedupe = ({ chunk }) => {
@ -60,13 +58,5 @@ module.exports.dedupe = ({ chunk }) => {
}
}
return outputToBeRemoved;
return Piscina.move(outputToBeRemoved);
};
// duckdns.org domain will not overlap and doesn't need dedupe
function canExcludeFromDedupe (domain) {
if (domain.length === 23 && domain.endsWith('.duckdns.org')) {
return true;
}
return false;
}