diff --git a/Build/build-reject-domainset.js b/Build/build-reject-domainset.js index 7ed9d8f1..02a1007f 100644 --- a/Build/build-reject-domainset.js +++ b/Build/build-reject-domainset.js @@ -1,7 +1,7 @@ const { promises: fsPromises } = require('fs'); const { resolve: pathResolve } = require('path'); const Piscina = require('piscina'); -const { processHosts, processFilterRules } = require('./lib/parse-filter'); +const { processHosts, processFilterRules, preprocessFullDomainSetBeforeUsedAsWorkerData } = require('./lib/parse-filter'); const cpuCount = require('os').cpus().length; const { isCI } = require('ci-info'); const threads = isCI ? cpuCount : cpuCount / 2; @@ -68,6 +68,7 @@ const threads = isCI ? cpuCount : cpuCount / 2; 'ip6-allrouters', 'ip6-allhosts', 'mcastprefix', + 'skk.moe', 'analytics.google.com', 'msa.cdn.mediaset.net', // Added manually using DOMAIN-KEYWORDS 'cloud.answerhub.com', @@ -145,6 +146,8 @@ const threads = isCI ? cpuCount : cpuCount / 2; 'https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt', // PiHoleBlocklist 'https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/SmartTV-AGH.txt', + // Spam404 + 'https://raw.githubusercontent.com/Spam404/lists/master/adblock-list.txt' ].map(input => { if (typeof input === 'string') { return processFilterRules(input); @@ -233,7 +236,7 @@ const threads = isCI ? cpuCount : cpuCount / 2; const piscina = new Piscina({ filename: pathResolve(__dirname, 'worker/build-reject-domainset-worker.js'), - workerData: [...domainSets], + workerData: preprocessFullDomainSetBeforeUsedAsWorkerData([...domainSets]), idleTimeout: 50, minThreads: threads, maxThreads: threads diff --git a/Build/lib/parse-filter.js b/Build/lib/parse-filter.js index ab10ca71..16ba23d3 100644 --- a/Build/lib/parse-filter.js +++ b/Build/lib/parse-filter.js @@ -125,6 +125,7 @@ async function processFilterRules (filterRulesUrl, fallbackUrls) { || line.includes('!') || line.includes('*') || line.includes('/') + || line.includes('[') || line.includes('$') && !lineStartsWithDoubleVerticalBar || line === '' || isIP(line) !== 0 @@ -214,6 +215,28 @@ async function processFilterRules (filterRulesUrl, fallbackUrls) { }; } +function preprocessFullDomainSetBeforeUsedAsWorkerData (data) { + return data.filter(domain => ( + domain.charCodeAt(0) === 46 + && !canExcludeFromDedupe(domain) + )); +} + +// duckdns.org domain will not overlap and doesn't need dedupe +function canExcludeFromDedupe (domain) { + if ( + // starts with a dot + domain.charCodeAt(0) === 46 + && domain.length === 23 + && domain.endsWith('.duckdns.org') + ) { + return true; + } + return false; +} + module.exports.processDomainLists = processDomainLists; module.exports.processHosts = processHosts; module.exports.processFilterRules = processFilterRules; +module.exports.preprocessFullDomainSetBeforeUsedAsWorkerData = preprocessFullDomainSetBeforeUsedAsWorkerData; +module.exports.canExcludeFromDedupe = canExcludeFromDedupe; diff --git a/Build/worker/build-reject-domainset-worker.js b/Build/worker/build-reject-domainset-worker.js index 7779b14e..82942a16 100644 --- a/Build/worker/build-reject-domainset-worker.js +++ b/Build/worker/build-reject-domainset-worker.js @@ -1,12 +1,10 @@ const Piscina = require('piscina'); // pre check if fullset domain is starts with a "." // This avoid calling chatCodeAt repeatedly +const { canExcludeFromDedupe } = require('../lib/parse-filter') -// workerData is an array of string. Sort it by length, short first: -const fullsetDomainStartsWithADot = Piscina.workerData.filter(domain => ( - domain.charCodeAt(0) === 46 - && !canExcludeFromDedupe(domain) -)); +// workerData is an array of string, sorted by length, short first +const fullsetDomainStartsWithADot = Piscina.workerData const totalLen = fullsetDomainStartsWithADot.length; module.exports.dedupe = ({ chunk }) => { @@ -60,13 +58,5 @@ module.exports.dedupe = ({ chunk }) => { } } - return outputToBeRemoved; + return Piscina.move(outputToBeRemoved); }; - -// duckdns.org domain will not overlap and doesn't need dedupe -function canExcludeFromDedupe (domain) { - if (domain.length === 23 && domain.endsWith('.duckdns.org')) { - return true; - } - return false; -}