From a0a772d2e1c13fb9b5c55e9f5262e2993b775ce5 Mon Sep 17 00:00:00 2001 From: SukkaW Date: Sun, 10 Dec 2023 23:55:05 +0800 Subject: [PATCH] Chore: simplify build infra --- Build/build-anti-bogus-domain.ts | 28 ++++++-------- Build/build-cdn-conf.ts | 6 +-- Build/build-common.ts | 32 ++++++++-------- Build/build-internal-cdn-rules.ts | 6 +-- Build/build-reject-domainset.ts | 62 ++++++++++++------------------- Build/build-telegram-cidr.ts | 8 ++-- Build/lib/create-file.ts | 7 ++-- Build/lib/fetch-retry.ts | 3 +- Build/lib/is-fast-ip.ts | 44 ++++++++++++++++++++++ Build/lib/parse-filter.ts | 53 +++++++++++++------------- Source/ip/reject.conf | 1 - Source/non_ip/cdn.conf | 1 - 12 files changed, 137 insertions(+), 114 deletions(-) diff --git a/Build/build-anti-bogus-domain.ts b/Build/build-anti-bogus-domain.ts index 59886bde..4af72b63 100644 --- a/Build/build-anti-bogus-domain.ts +++ b/Build/build-anti-bogus-domain.ts @@ -1,21 +1,20 @@ // @ts-check import path from 'path'; -import { isIPv4, isIPv6 } from 'net'; import { createRuleset } from './lib/create-file'; import { fetchRemoteTextAndReadByLine, readFileByLine } from './lib/fetch-text-by-line'; import { processLine } from './lib/process-line'; import { task } from './lib/trace-runner'; import { SHARED_DESCRIPTION } from './lib/constants'; +import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip'; const getBogusNxDomainIPs = async () => { - /** @type {string[]} */ - const result = []; + const result: string[] = []; for await (const line of await fetchRemoteTextAndReadByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf')) { - if (line.startsWith('bogus-nxdomain=')) { + if (line && line.startsWith('bogus-nxdomain=')) { const ip = line.slice(15).trim(); - if (isIPv4(ip)) { + if (isProbablyIpv4(ip)) { result.push(`IP-CIDR,${ip}/32,no-resolve`); - } else if (isIPv6(ip)) { + } else if (isProbablyIpv6(ip)) { result.push(`IP-CIDR6,${ip}/128,no-resolve`); } } @@ -26,21 +25,16 @@ const getBogusNxDomainIPs = async () => { export const buildAntiBogusDomain = task(import.meta.path, async () => { const bogusIpPromise = getBogusNxDomainIPs(); - /** @type {string[]} */ - const result = []; + const result: string[] = []; for await (const line of readFileByLine(path.resolve(import.meta.dir, '../Source/ip/reject.conf'))) { - if (line === '# --- [Anti Bogus Domain Replace Me] ---') { - // bogus ip is less than 200, no need to worry about "Maximum call stack size exceeded" - result.push(...(await bogusIpPromise)); - continue; - } else { - const l = processLine(line); - if (l) { - result.push(l); - } + const l = processLine(line); + if (l) { + result.push(l); } } + result.push(...(await bogusIpPromise)); + const description = [ ...SHARED_DESCRIPTION, '', diff --git a/Build/build-cdn-conf.ts b/Build/build-cdn-conf.ts index 6c35521a..b1a986fa 100644 --- a/Build/build-cdn-conf.ts +++ b/Build/build-cdn-conf.ts @@ -63,16 +63,14 @@ const buildCdnConf = task(import.meta.path, async () => { const getS3OSSDomainsPromise: Promise> = getS3OSSDomains(); for await (const l of readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/cdn.conf'))) { - if (l === '# --- [AWS S3 Replace Me] ---') { - (await getS3OSSDomainsPromise).forEach((domain: string) => { cdnDomainsList.push(`DOMAIN-SUFFIX,${domain}`); }); - continue; - } const line = processLine(l); if (line) { cdnDomainsList.push(line); } } + (await getS3OSSDomainsPromise).forEach((domain: string) => { cdnDomainsList.push(`DOMAIN-SUFFIX,${domain}`); }); + const description: string[] = [ ...SHARED_DESCRIPTION, '', diff --git a/Build/build-common.ts b/Build/build-common.ts index 4cc6a5f2..c5ed033d 100644 --- a/Build/build-common.ts +++ b/Build/build-common.ts @@ -22,23 +22,25 @@ export const buildCommon = task(import.meta.path, async () => { const pw = new PathScurry(sourceDir); for await (const entry of pw) { - if (entry.isFile()) { - if (path.extname(entry.name) === '.js') { - continue; - } + if (!entry.isFile()) { + continue; + } - const relativePath = entry.relative(); - if (relativePath.startsWith('domainset/')) { - promises.push(transformDomainset(entry.fullpath(), relativePath)); - continue; - } - if ( - relativePath.startsWith('ip/') + if (path.extname(entry.name) === '.js') { + continue; + } + + const relativePath = entry.relative(); + if (relativePath.startsWith('domainset/')) { + promises.push(transformDomainset(entry.fullpath(), relativePath)); + continue; + } + if ( + relativePath.startsWith('ip/') || relativePath.startsWith('non_ip/') - ) { - promises.push(transformRuleset(entry.fullpath(), relativePath)); - continue; - } + ) { + promises.push(transformRuleset(entry.fullpath(), relativePath)); + continue; } } diff --git a/Build/build-internal-cdn-rules.ts b/Build/build-internal-cdn-rules.ts index 739609bc..8ca8cb48 100644 --- a/Build/build-internal-cdn-rules.ts +++ b/Build/build-internal-cdn-rules.ts @@ -15,9 +15,6 @@ export const buildInternalCDNDomains = task(import.meta.path, async () => { const set = new Set(); const keywords = new Set(); - const gorhill = await getGorhillPublicSuffixPromise(); - const domainSorter = createDomainSorter(gorhill); - const addApexDomain = (input: string) => { // We are including the private domains themselves const d = tldts.getDomain(input, { allowPrivateDomains: false }); @@ -61,7 +58,8 @@ export const buildInternalCDNDomains = task(import.meta.path, async () => { } }; - await Promise.all([ + const [domainSorter] = await Promise.all([ + getGorhillPublicSuffixPromise().then(createDomainSorter), processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/cdn.conf')), processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global.conf')), processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global_plus.conf')), diff --git a/Build/build-reject-domainset.ts b/Build/build-reject-domainset.ts index ba586838..94a08dd3 100644 --- a/Build/build-reject-domainset.ts +++ b/Build/build-reject-domainset.ts @@ -18,15 +18,12 @@ import * as tldts from 'tldts'; import { SHARED_DESCRIPTION } from './lib/constants'; import { getPhishingDomains } from './lib/get-phishing-domains'; -/** Whitelists */ -const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST); -/** @type {Set} Dedupe domains inclued by DOMAIN-KEYWORD */ -const domainKeywordsSet = new Set(); -/** @type {Set} Dedupe domains included by DOMAIN-SUFFIX */ -const domainSuffixSet = new Set(); - export const buildRejectDomainSet = task(import.meta.path, async () => { - /** @type Set */ + /** Whitelists */ + const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST); + const domainKeywordsSet = new Set(); + const domainSuffixSet = new Set(); + const domainSets = new Set(); // Parse from AdGuard Filters @@ -38,9 +35,7 @@ export const buildRejectDomainSet = task(import.meta.path, async () => { // Parse from remote hosts & domain lists ...HOSTS.map(entry => processHosts(entry[0], entry[1]).then(hosts => { hosts.forEach(host => { - if (host) { - domainSets.add(host); - } + domainSets.add(host); }); })), ...ADGUARD_FILTERS.map(input => { @@ -61,12 +56,8 @@ export const buildRejectDomainSet = task(import.meta.path, async () => { 'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exceptions.txt', 'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exclusions.txt' ].map(input => processFilterRules(input).then(({ white, black }) => { - white.forEach(i => { - filterRuleWhitelistDomainSets.add(i); - }); - black.forEach(i => { - filterRuleWhitelistDomainSets.add(i); - }); + white.forEach(i => filterRuleWhitelistDomainSets.add(i)); + black.forEach(i => filterRuleWhitelistDomainSets.add(i)); }))), getPhishingDomains().then(([purePhishingDomains, fullDomainSet]) => { fullDomainSet.forEach(host => { @@ -74,10 +65,16 @@ export const buildRejectDomainSet = task(import.meta.path, async () => { domainSets.add(host); } }); - purePhishingDomains.forEach(suffix => { - domainSets.add(`.${suffix}`); - }); - }) + purePhishingDomains.forEach(suffix => domainSets.add(`.${suffix}`)); + }), + (async () => { + for await (const line of readFileByLine(path.resolve(import.meta.dir, '../Source/domainset/reject_sukka.conf'))) { + const l = processLine(line); + if (l) { + domainSets.add(l); + } + } + })() ]); // remove pre-defined enforced blacklist from whitelist @@ -94,17 +91,7 @@ export const buildRejectDomainSet = task(import.meta.path, async () => { } let previousSize = domainSets.size; - console.log(`Import ${previousSize} rules from Hosts / AdBlock Filter Rules!`); - - for await (const line of readFileByLine(path.resolve(import.meta.dir, '../Source/domainset/reject_sukka.conf'))) { - const l = processLine(line); - if (l) { - domainSets.add(l); - } - } - - previousSize = domainSets.size - previousSize; - console.log(`Import ${previousSize} rules from reject_sukka.conf!`); + console.log(`Import ${previousSize} rules from Hosts / AdBlock Filter Rules & reject_sukka.conf!`); for await (const line of readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/reject.conf'))) { const [type, keyword] = line.split(','); @@ -150,11 +137,11 @@ export const buildRejectDomainSet = task(import.meta.path, async () => { domainSets.delete(domain); } } + + console.log(`Deduped ${previousSize} - ${domainSets.size} = ${previousSize - domainSets.size} from black keywords and suffixes!`); }); - - console.log(`Deduped ${previousSize} - ${domainSets.size} = ${previousSize - domainSets.size} from black keywords and suffixes!`); - previousSize = domainSets.size; + // Dedupe domainSets const dudupedDominArray = traceSync('* Dedupe from covered subdomain', () => domainDeduper(Array.from(domainSets))); console.log(`Deduped ${previousSize - dudupedDominArray.length} rules!`); @@ -180,9 +167,6 @@ export const buildRejectDomainSet = task(import.meta.path, async () => { }) ); - const domainSorter = createDomainSorter(gorhill); - const domainset = traceSync('* Sort reject domainset', () => dudupedDominArray.sort(domainSorter)); - const description = [ ...SHARED_DESCRIPTION, '', @@ -198,7 +182,7 @@ export const buildRejectDomainSet = task(import.meta.path, async () => { 'Sukka\'s Ruleset - Reject Base', description, new Date(), - domainset, + traceSync('* Sort reject domainset', () => dudupedDominArray.sort(createDomainSorter(gorhill))), 'domainset', path.resolve(import.meta.dir, '../List/domainset/reject.conf'), path.resolve(import.meta.dir, '../Clash/domainset/reject.txt') diff --git a/Build/build-telegram-cidr.ts b/Build/build-telegram-cidr.ts index d1539ad8..84b795d5 100644 --- a/Build/build-telegram-cidr.ts +++ b/Build/build-telegram-cidr.ts @@ -2,14 +2,14 @@ import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry'; import { createReadlineInterfaceFromResponse } from './lib/fetch-text-by-line'; import path from 'path'; -import { isIPv4, isIPv6 } from 'net'; +import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip'; import { processLine } from './lib/process-line'; import { createRuleset } from './lib/create-file'; import { task } from './lib/trace-runner'; import { SHARED_DESCRIPTION } from './lib/constants'; export const buildTelegramCIDR = task(import.meta.path, async () => { - const resp = await fetchWithRetry('https://core.telegram.org/resources/cidr.txt', defaultRequestInit); + const resp = await fetchWithRetry('https://core.telegram.org/resources/cidr.txt', defaultRequestInit) as Response; const lastModified = resp.headers.get('last-modified'); const date = lastModified ? new Date(lastModified) : new Date(); @@ -20,10 +20,10 @@ export const buildTelegramCIDR = task(import.meta.path, async () => { if (!cidr) continue; const [subnet] = cidr.split('/'); - if (isIPv4(subnet)) { + if (isProbablyIpv4(subnet)) { results.push(`IP-CIDR,${cidr},no-resolve`); } - if (isIPv6(subnet)) { + if (isProbablyIpv6(subnet)) { results.push(`IP-CIDR6,${cidr},no-resolve`); } } diff --git a/Build/lib/create-file.ts b/Build/lib/create-file.ts index 604cd011..c294ca53 100644 --- a/Build/lib/create-file.ts +++ b/Build/lib/create-file.ts @@ -2,6 +2,7 @@ import { readFileByLine } from './fetch-text-by-line'; import { surgeDomainsetToClashDomainset, surgeRulesetToClashClassicalTextRuleset } from './clash'; import { traceAsync } from './trace-runner'; +import picocolors from 'picocolors'; export async function compareAndWriteFile(linesA: string[], filePath: string) { let isEqual = true; @@ -45,11 +46,11 @@ export async function compareAndWriteFile(linesA: string[], filePath: string) { } if (isEqual) { - console.log(`Same Content, bail out writing: ${filePath}`); + console.log(picocolors.gray(`Same Content, bail out writing: ${filePath}`)); return; } - await traceAsync(`Writing ${filePath}`, async () => { + await traceAsync(picocolors.gray(`Writing ${filePath}`), async () => { if (linesALen < 10000) { return Bun.write(file, `${linesA.join('\n')}\n`); } @@ -63,7 +64,7 @@ export async function compareAndWriteFile(linesA: string[], filePath: string) { await writer.flush(); return writer.end(); - }); + }, picocolors.gray); } export const withBannerArray = (title: string, description: string[], date: Date, content: string[]) => { diff --git a/Build/lib/fetch-retry.ts b/Build/lib/fetch-retry.ts index 969e14e3..bc042d09 100644 --- a/Build/lib/fetch-retry.ts +++ b/Build/lib/fetch-retry.ts @@ -1,4 +1,5 @@ import retry from 'async-retry'; +import picocolors from 'picocolors'; // retry settings const MIN_TIMEOUT = 10; @@ -86,7 +87,7 @@ function createFetchRetry($fetch: typeof fetch): typeof fetch { err.name === 'AbortError' || ('digest' in err && err.digest === 'AbortError') ) { - console.log('[fetch abort]', url.toString()); + console.log(picocolors.gray('[fetch abort]'), picocolors.gray(url.toString())); return bail(err); } } diff --git a/Build/lib/is-fast-ip.ts b/Build/lib/is-fast-ip.ts index 8b15aba1..a2abc4b1 100644 --- a/Build/lib/is-fast-ip.ts +++ b/Build/lib/is-fast-ip.ts @@ -31,3 +31,47 @@ export function isProbablyIpv4(hostname: string): boolean { && /* '.' */ hostname.charCodeAt(hostname.length - 1) !== 46 /* '.' */ ); } + +export function isProbablyIpv6(hostname: string): boolean { + if (hostname.length < 3) { + return false; + } + + let start = hostname[0] === '[' ? 1 : 0; + let end = hostname.length; + + if (hostname[end - 1] === ']') { + end -= 1; + } + + // We only consider the maximum size of a normal IPV6. Note that this will + // fail on so-called "IPv4 mapped IPv6 addresses" but this is a corner-case + // and a proper validation library should be used for these. + if (end - start > 39) { + return false; + } + + /* eslint-disable sukka/no-single-return -- here it goes */ + let hasColon = false; + + for (; start < end; start += 1) { + const code = hostname.charCodeAt(start); + + if (code === 58 /* ':' */) { + hasColon = true; + } else if ( + !( + ( + (code >= 48 && code <= 57) // 0-9 + || (code >= 97 && code <= 102) // a-f + || (code >= 65 && code <= 90) // A-F + ) + ) + ) { + return false; + } + } + + return hasColon; + /* eslint-enable sukka/no-single-return -- here it goes */ +} diff --git a/Build/lib/parse-filter.ts b/Build/lib/parse-filter.ts index f8951d70..7e6a4739 100644 --- a/Build/lib/parse-filter.ts +++ b/Build/lib/parse-filter.ts @@ -51,11 +51,7 @@ export async function processDomainLists(domainListsUrl: string, includeAllSubDo foundDebugDomain = true; } - if (includeAllSubDomain) { - domainSets.add(`.${domainToAdd}`); - } else { - domainSets.add(domainToAdd); - } + domainSets.add(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd); } return domainSets; @@ -90,6 +86,8 @@ export async function processHosts(hostsUrl: string, includeAllSubDomain = false } } + console.log(picocolors.gray('[process hosts]'), picocolors.gray(hostsUrl), picocolors.gray(domainSets.size)); + return domainSets; }); } @@ -159,7 +157,7 @@ export async function processFilterRules( warningMessages.push(hostname); break; default: - throw new Error(`Unknown flag: ${flag as any}`); + break; } }; @@ -187,6 +185,13 @@ export async function processFilterRules( ); }); + console.log( + picocolors.gray('[process filter]'), + picocolors.gray(filterRulesUrl), + picocolors.gray(`white: ${whitelistDomainSets.size}`), + picocolors.gray(`black: ${blacklistDomainSets.size}`) + ); + return { white: whitelistDomainSets, black: blacklistDomainSets, @@ -569,25 +574,23 @@ class CustomAbortError extends Error { public readonly digest = 'AbortError'; } -function sleepWithAbort(ms: number, signal: AbortSignal) { - return new Promise((resolve, reject) => { - signal.throwIfAborted(); - signal.addEventListener('abort', stop); - Bun.sleep(ms).then(done).catch(doReject); +const sleepWithAbort = (ms: number, signal: AbortSignal) => new Promise((resolve, reject) => { + signal.throwIfAborted(); + signal.addEventListener('abort', stop); + Bun.sleep(ms).then(done).catch(doReject); - function done() { - signal.removeEventListener('abort', stop); - resolve(); - } - function stop(this: AbortSignal) { - reject(this.reason); - } - function doReject(reason: unknown) { - signal.removeEventListener('abort', stop); - reject(reason); - } - }); -} + function done() { + signal.removeEventListener('abort', stop); + resolve(); + } + function stop(this: AbortSignal) { + reject(this.reason); + } + function doReject(reason: unknown) { + signal.removeEventListener('abort', stop); + reject(reason); + } +}); async function fetchAssets(url: string, fallbackUrls: string[] | readonly string[]) { const controller = new AbortController(); @@ -602,7 +605,7 @@ async function fetchAssets(url: string, fallbackUrls: string[] | readonly string const createFetchFallbackPromise = async (url: string, index: number) => { // Most assets can be downloaded within 250ms. To avoid wasting bandwidth, we will wait for 350ms before downloading from the fallback URL. try { - await sleepWithAbort(200 + (index + 1) * 10, controller.signal); + await sleepWithAbort(300 + (index + 1) * 20, controller.signal); } catch { console.log(picocolors.gray('[fetch cancelled early]'), picocolors.gray(url)); throw new CustomAbortError(); diff --git a/Source/ip/reject.conf b/Source/ip/reject.conf index a15f5505..ec9f84b9 100644 --- a/Source/ip/reject.conf +++ b/Source/ip/reject.conf @@ -58,4 +58,3 @@ IP-CIDR,222.73.156.235/32,no-resolve # --- Anti-Bogus Domain --- # https://github.com/felixonmars/dnsmasq-china-list/blob/master/bogus-nxdomain.china.conf -# --- [Anti Bogus Domain Replace Me] --- diff --git a/Source/non_ip/cdn.conf b/Source/non_ip/cdn.conf index 3a652842..219cc7ea 100644 --- a/Source/non_ip/cdn.conf +++ b/Source/non_ip/cdn.conf @@ -108,4 +108,3 @@ DOMAIN-SUFFIX,cos.eu-frankfurt.myqcloud.com DOMAIN-SUFFIX,ks3-cn-hk-1.ksyuncs.com DOMAIN-SUFFIX,ks3-rus.ksyuncs.com DOMAIN-SUFFIX,ks3-sgp.ksyuncs.com -# --- [AWS S3 Replace Me] ---