From 4565b15e170f26f1c6d5ee51a562c0e68abc7564 Mon Sep 17 00:00:00 2001 From: SukkaW Date: Mon, 7 Nov 2022 18:53:32 +0800 Subject: [PATCH] Perf: faster is-domain-loose check --- Build/build-anti-bogus-domain.js | 15 ++++++++-- Build/build-reject-domainset.js | 5 +++- Build/lib/is-domain-loose.js | 20 +++++++++++-- Build/lib/parse-filter.js | 51 ++++++++++++++++++++------------ 4 files changed, 66 insertions(+), 25 deletions(-) diff --git a/Build/build-anti-bogus-domain.js b/Build/build-anti-bogus-domain.js index abea36c7..3c00c87b 100644 --- a/Build/build-anti-bogus-domain.js +++ b/Build/build-anti-bogus-domain.js @@ -1,7 +1,7 @@ const { fetchWithRetry } = require('./lib/fetch-retry'); const fs = require('fs'); const path = require('path'); -const { isIP } = require('net'); +const { isIPv4, isIPv6 } = require('net'); (async () => { console.time('Total Time - build-anti-bogus-domain'); @@ -15,7 +15,8 @@ const { isIP } = require('net'); return null }) - .filter(ip => typeof ip === 'string' && isIP(ip) !== 0); + .filter(ip => typeof ip === 'string'); + console.timeEnd('* Download bogus-nxdomain-list') const filePath = path.resolve(__dirname, '../Source/ip/reject.conf'); @@ -23,7 +24,15 @@ const { isIP } = require('net'); const content = (await fs.promises.readFile(filePath, 'utf-8')) .replace( '# --- [Anti Bogus Domain Replace Me] ---', - res.map(ip => `IP-CIDR,${ip}/32,no-resolve`).join('\n') + res.map(ip => { + if (isIPv4(ip)) { + return `IP-CIDR,${ip}/32,no-resolve` + } + if (isIPv6(ip)) { + return `IP-CIDR6,${ip}/128,no-resolve` + } + return '' + }).join('\n') ); await fs.promises.writeFile(resultPath, content, 'utf-8'); diff --git a/Build/build-reject-domainset.js b/Build/build-reject-domainset.js index be995445..0c806fa8 100644 --- a/Build/build-reject-domainset.js +++ b/Build/build-reject-domainset.js @@ -45,7 +45,10 @@ const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST); if (Array.isArray(input) && input.length === 2) { return processFilterRules(input[0], input[1]); } - }))).forEach(({ white, black }) => { + }))).forEach(({ white, black, foundDebugDomain }) => { + if (foundDebugDomain) { + process.exit(1); + }; white.forEach(i => filterRuleWhitelistDomainSets.add(i)); black.forEach(i => domainSets.add(i)); }); diff --git a/Build/lib/is-domain-loose.js b/Build/lib/is-domain-loose.js index 381fd09d..8ee21582 100644 --- a/Build/lib/is-domain-loose.js +++ b/Build/lib/is-domain-loose.js @@ -1,3 +1,19 @@ -const rDomain = /^(((?!\-))(xn\-\-)?[a-z0-9\-_]{0,61}[a-z0-9]{1,1}\.)*(xn\-\-)?([a-z0-9\-]{1,61}|[a-z0-9\-]{1,30})\.[a-z]{2,}$/m; +const { parse } = require('tldts'); -module.exports.isDomainLoose = (domain) => rDomain.test(domain); +module.exports.isDomainLoose = (domain) => { + const { isIcann, isPrivate, isIp } = parse(domain, { allowPrivateDomains: true }); + return !!(!isIp && (isIcann || isPrivate)); +}; + +module.exports.normalizeDomain = (domain) => { + const { isIcann, isPrivate, hostname, isIp } = parse(domain, { allowPrivateDomains: true }); + if (isIp) { + return null; + } + + if (isIcann || isPrivate) { + return hostname; + }; + + return null; +} diff --git a/Build/lib/parse-filter.js b/Build/lib/parse-filter.js index c5fa00b9..ccd6b174 100644 --- a/Build/lib/parse-filter.js +++ b/Build/lib/parse-filter.js @@ -1,8 +1,8 @@ -const { isIP } = require('net'); const { fetchWithRetry } = require('./fetch-retry'); -const { isDomainLoose } = require('./is-domain-loose'); +const { normalizeDomain } = require('./is-domain-loose'); const DEBUG_DOMAIN_TO_FIND = null; // example.com | null +let foundDebugDomain = false; const warnOnceUrl = new Set(); const warnOnce = (url, isWhite, ...message) => { @@ -42,6 +42,7 @@ async function processDomainLists (domainListsUrl) { if (DEBUG_DOMAIN_TO_FIND && domainToAdd.includes(DEBUG_DOMAIN_TO_FIND)) { warnOnce(domainListsUrl.toString(), false, DEBUG_DOMAIN_TO_FIND); + foundDebugDomain = true; } domainSets.add(domainToAdd); @@ -73,13 +74,15 @@ async function processHosts (hostsUrl, includeAllSubDomain = false) { return; } const [, ...domains] = line.split(' '); - const domain = domains.join(' ').trim(); + const _domain = domains.join(' ').trim(); - if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) { + if (DEBUG_DOMAIN_TO_FIND && _domain.includes(DEBUG_DOMAIN_TO_FIND)) { warnOnce(hostsUrl.toString(), false, DEBUG_DOMAIN_TO_FIND); + foundDebugDomain = true; } - if (isDomainLoose(domain)) { + const domain = normalizeDomain(_domain); + if (domain) { if (includeAllSubDomain) { domainSets.add(`.${domain}`); } else { @@ -96,7 +99,7 @@ async function processHosts (hostsUrl, includeAllSubDomain = false) { /** * @param {string | URL} filterRulesUrl * @param {(string | URL)[] | undefined} fallbackUrls - * @returns {Promise<{ white: Set, black: Set }>} + * @returns {Promise<{ white: Set, black: Set, foundDebugDomain: boolean }>} */ async function processFilterRules (filterRulesUrl, fallbackUrls) { console.time(` - processFilterRules: ${filterRulesUrl}`); @@ -135,8 +138,6 @@ async function processFilterRules (filterRulesUrl, fallbackUrls) { || line.includes('[') || line.includes('(') || line.includes('$') && !lineStartsWithDoubleVerticalBar - || line === '' - || isIP(line) !== 0 ) { continue; } @@ -145,15 +146,17 @@ async function processFilterRules (filterRulesUrl, fallbackUrls) { const lineEndsWithCaretVerticalBar = line.endsWith('^|'); if (lineStartsWithDoubleVerticalBar && line.endsWith('^$badfilter')) { - const domain = line.replace('||', '').replace('^$badfilter', '').trim(); - if (isDomainLoose(domain)) { + const _domain = line.replace('||', '').replace('^$badfilter', '').trim(); + const domain = normalizeDomain(_domain); + if (domain) { if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) { warnOnce(filterRulesUrl.toString(), true, DEBUG_DOMAIN_TO_FIND); + foundDebugDomain = true; } whitelistDomainSets.add(domain); } else { - console.warn(' * [parse-filter white] ' + domain + ' is not a valid domain'); + console.warn(' * [parse-filter white] ' + _domain + ' is not a valid domain'); } } else if (line.startsWith('@@||') && ( @@ -163,21 +166,25 @@ async function processFilterRules (filterRulesUrl, fallbackUrls) { || line.endsWith('^$1p') ) ) { - const domain = line + const _domain = line .replaceAll('@@||', '') .replaceAll('^$badfilter', '') .replaceAll('^$1p', '') .replaceAll('^|', '') .replaceAll('^', '') .trim(); - if (isDomainLoose(domain)) { + + const domain = normalizeDomain(_domain); + + if (domain) { if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) { warnOnce(filterRulesUrl.toString(), true, DEBUG_DOMAIN_TO_FIND); + foundDebugDomain = true; } whitelistDomainSets.add(domain); } else { - console.warn(' * [parse-filter white] ' + domain + ' is not a valid domain'); + console.warn(' * [parse-filter white] ' + _domain + ' is not a valid domain'); } } else if ( lineStartsWithDoubleVerticalBar @@ -187,16 +194,19 @@ async function processFilterRules (filterRulesUrl, fallbackUrls) { || line.endsWith('^$all') ) ) { - const domain = line + const _domain = line .replaceAll('||', '') .replaceAll('^|', '') .replaceAll('^$all', '') .replaceAll('^', '') .trim(); - if (isDomainLoose(domain)) { + const domain = normalizeDomain(_domain); + + if (domain) { if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) { warnOnce(filterRulesUrl.toString(), false, DEBUG_DOMAIN_TO_FIND); + foundDebugDomain = true; } blacklistDomainSets.add(`.${domain}`); @@ -208,10 +218,12 @@ async function processFilterRules (filterRulesUrl, fallbackUrls) { || lineEndsWithCaretVerticalBar ) ) { - const domain = `${line.replaceAll('://', '').replaceAll('^|', '').replaceAll('^', '')}`.trim(); - if (isDomainLoose(domain)) { + const _domain = `${line.replaceAll('://', '').replaceAll('^|', '').replaceAll('^', '')}`.trim(); + const domain = normalizeDomain(_domain); + if (domain) { if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) { warnOnce(filterRulesUrl.toString(), false, DEBUG_DOMAIN_TO_FIND); + foundDebugDomain = true; } blacklistDomainSets.add(domain); @@ -223,7 +235,8 @@ async function processFilterRules (filterRulesUrl, fallbackUrls) { return { white: whitelistDomainSets, - black: blacklistDomainSets + black: blacklistDomainSets, + foundDebugDomain }; }