Chore: reject domainset update

This commit is contained in:
SukkaW 2022-10-08 17:50:06 +08:00
parent 3b489ddb2e
commit c91d22b2d3
3 changed files with 32 additions and 16 deletions

View File

@ -1,7 +1,7 @@
const { promises: fsPromises } = require('fs'); const { promises: fsPromises } = require('fs');
const { resolve: pathResolve } = require('path'); const { resolve: pathResolve } = require('path');
const Piscina = require('piscina'); const Piscina = require('piscina');
const { processHosts, processFilterRules } = require('./lib/parse-filter'); const { processHosts, processFilterRules, preprocessFullDomainSetBeforeUsedAsWorkerData } = require('./lib/parse-filter');
const cpuCount = require('os').cpus().length; const cpuCount = require('os').cpus().length;
const { isCI } = require('ci-info'); const { isCI } = require('ci-info');
const threads = isCI ? cpuCount : cpuCount / 2; const threads = isCI ? cpuCount : cpuCount / 2;
@ -68,6 +68,7 @@ const threads = isCI ? cpuCount : cpuCount / 2;
'ip6-allrouters', 'ip6-allrouters',
'ip6-allhosts', 'ip6-allhosts',
'mcastprefix', 'mcastprefix',
'skk.moe',
'analytics.google.com', 'analytics.google.com',
'msa.cdn.mediaset.net', // Added manually using DOMAIN-KEYWORDS 'msa.cdn.mediaset.net', // Added manually using DOMAIN-KEYWORDS
'cloud.answerhub.com', 'cloud.answerhub.com',
@ -145,6 +146,8 @@ const threads = isCI ? cpuCount : cpuCount / 2;
'https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt', 'https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt',
// PiHoleBlocklist // PiHoleBlocklist
'https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/SmartTV-AGH.txt', 'https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/SmartTV-AGH.txt',
// Spam404
'https://raw.githubusercontent.com/Spam404/lists/master/adblock-list.txt'
].map(input => { ].map(input => {
if (typeof input === 'string') { if (typeof input === 'string') {
return processFilterRules(input); return processFilterRules(input);
@ -233,7 +236,7 @@ const threads = isCI ? cpuCount : cpuCount / 2;
const piscina = new Piscina({ const piscina = new Piscina({
filename: pathResolve(__dirname, 'worker/build-reject-domainset-worker.js'), filename: pathResolve(__dirname, 'worker/build-reject-domainset-worker.js'),
workerData: [...domainSets], workerData: preprocessFullDomainSetBeforeUsedAsWorkerData([...domainSets]),
idleTimeout: 50, idleTimeout: 50,
minThreads: threads, minThreads: threads,
maxThreads: threads maxThreads: threads

View File

@ -125,6 +125,7 @@ async function processFilterRules (filterRulesUrl, fallbackUrls) {
|| line.includes('!') || line.includes('!')
|| line.includes('*') || line.includes('*')
|| line.includes('/') || line.includes('/')
|| line.includes('[')
|| line.includes('$') && !lineStartsWithDoubleVerticalBar || line.includes('$') && !lineStartsWithDoubleVerticalBar
|| line === '' || line === ''
|| isIP(line) !== 0 || isIP(line) !== 0
@ -214,6 +215,28 @@ async function processFilterRules (filterRulesUrl, fallbackUrls) {
}; };
} }
function preprocessFullDomainSetBeforeUsedAsWorkerData (data) {
return data.filter(domain => (
domain.charCodeAt(0) === 46
&& !canExcludeFromDedupe(domain)
));
}
// duckdns.org domain will not overlap and doesn't need dedupe
function canExcludeFromDedupe (domain) {
if (
// starts with a dot
domain.charCodeAt(0) === 46
&& domain.length === 23
&& domain.endsWith('.duckdns.org')
) {
return true;
}
return false;
}
module.exports.processDomainLists = processDomainLists; module.exports.processDomainLists = processDomainLists;
module.exports.processHosts = processHosts; module.exports.processHosts = processHosts;
module.exports.processFilterRules = processFilterRules; module.exports.processFilterRules = processFilterRules;
module.exports.preprocessFullDomainSetBeforeUsedAsWorkerData = preprocessFullDomainSetBeforeUsedAsWorkerData;
module.exports.canExcludeFromDedupe = canExcludeFromDedupe;

View File

@ -1,12 +1,10 @@
const Piscina = require('piscina'); const Piscina = require('piscina');
// pre check if fullset domain is starts with a "." // pre check if fullset domain is starts with a "."
// This avoid calling chatCodeAt repeatedly // This avoid calling chatCodeAt repeatedly
const { canExcludeFromDedupe } = require('../lib/parse-filter')
// workerData is an array of string. Sort it by length, short first: // workerData is an array of string, sorted by length, short first
const fullsetDomainStartsWithADot = Piscina.workerData.filter(domain => ( const fullsetDomainStartsWithADot = Piscina.workerData
domain.charCodeAt(0) === 46
&& !canExcludeFromDedupe(domain)
));
const totalLen = fullsetDomainStartsWithADot.length; const totalLen = fullsetDomainStartsWithADot.length;
module.exports.dedupe = ({ chunk }) => { module.exports.dedupe = ({ chunk }) => {
@ -60,13 +58,5 @@ module.exports.dedupe = ({ chunk }) => {
} }
} }
return outputToBeRemoved; return Piscina.move(outputToBeRemoved);
}; };
// duckdns.org domain will not overlap and doesn't need dedupe
function canExcludeFromDedupe (domain) {
if (domain.length === 23 && domain.endsWith('.duckdns.org')) {
return true;
}
return false;
}