From 4808ed8d27e2880c3baa8c4f5fc4d5321087868d Mon Sep 17 00:00:00 2001 From: SukkaW Date: Sat, 21 Sep 2024 03:36:44 +0800 Subject: [PATCH] Refactor: adapt new output --- ...c-direct-lan-ruleset-dns-mapping-module.ts | 53 ++--- Build/build-microsoft-cdn.ts | 20 +- Build/build-reject-domainset.ts | 184 +++++------------- Build/build-reject-ip-list.ts | 84 ++++---- Build/build-stream-service.ts | 44 ++--- Build/build-telegram-cidr.ts | 18 +- Build/lib/create-file-new.ts | 70 ++++++- 7 files changed, 204 insertions(+), 269 deletions(-) diff --git a/Build/build-domestic-direct-lan-ruleset-dns-mapping-module.ts b/Build/build-domestic-direct-lan-ruleset-dns-mapping-module.ts index 192c45e2..85e8e1fe 100644 --- a/Build/build-domestic-direct-lan-ruleset-dns-mapping-module.ts +++ b/Build/build-domestic-direct-lan-ruleset-dns-mapping-module.ts @@ -3,14 +3,15 @@ import path from 'node:path'; import { DOMESTICS } from '../Source/non_ip/domestic'; import { DIRECTS, LANS } from '../Source/non_ip/direct'; import { readFileIntoProcessedArray } from './lib/fetch-text-by-line'; -import { compareAndWriteFile, createRuleset } from './lib/create-file'; +import { compareAndWriteFile } from './lib/create-file'; import { task } from './trace'; import { SHARED_DESCRIPTION } from './lib/constants'; import { createMemoizedPromise } from './lib/memo-promise'; import * as yaml from 'yaml'; import { appendArrayInPlace } from './lib/append-array-in-place'; -import { output, writeFile } from './lib/misc'; +import { writeFile } from './lib/misc'; import { OUTPUT_INTERNAL_DIR, OUTPUT_MODULES_DIR, SOURCE_DIR } from './constants/dir'; +import { RulesetOutput } from './lib/create-file-new'; export const getDomesticAndDirectDomainsRulesetPromise = createMemoizedPromise(async () => { const domestics = await readFileIntoProcessedArray(path.join(SOURCE_DIR, 'non_ip/domestic.conf')); @@ -38,45 +39,33 @@ export const buildDomesticRuleset = task(require.main === module, __filename)(as appendArrayInPlace(dataset, Object.entries(LANS)); return Promise.all([ - createRuleset( - span, - 'Sukka\'s Ruleset - Domestic Domains', - [ + new RulesetOutput(span, 'domestic', 'non_ip') + .withTitle('Sukka\'s Ruleset - Domestic Domains') + .withDescription([ ...SHARED_DESCRIPTION, '', 'This file contains known addresses that are avaliable in the Mainland China.' - ], - new Date(), - res[0], - 'ruleset', - output('domestic', 'non_ip') - ), - createRuleset( - span, - 'Sukka\'s Ruleset - Direct Rules', - [ + ]) + .addFromRuleset(res[0]) + .write(), + new RulesetOutput(span, 'direct', 'non_ip') + .withTitle('Sukka\'s Ruleset - Direct Rules') + .withDescription([ ...SHARED_DESCRIPTION, '', 'This file contains domains and process that should not be proxied.' - ], - new Date(), - res[1], - 'ruleset', - output('direct', 'non_ip') - ), - createRuleset( - span, - 'Sukka\'s Ruleset - LAN', - [ + ]) + .addFromRuleset(res[1]) + .write(), + new RulesetOutput(span, 'lan', 'non_ip') + .withTitle('Sukka\'s Ruleset - LAN') + .withDescription([ ...SHARED_DESCRIPTION, '', 'This file includes rules for LAN DOMAIN and reserved TLDs.' - ], - new Date(), - res[2], - 'ruleset', - output('lan', 'non_ip') - ), + ]) + .addFromRuleset(res[2]) + .write(), compareAndWriteFile( span, [ diff --git a/Build/build-microsoft-cdn.ts b/Build/build-microsoft-cdn.ts index 1052b453..d5a0e4d4 100644 --- a/Build/build-microsoft-cdn.ts +++ b/Build/build-microsoft-cdn.ts @@ -1,12 +1,10 @@ import { task } from './trace'; -import { createRuleset } from './lib/create-file'; import { fetchRemoteTextByLine } from './lib/fetch-text-by-line'; import { createTrie } from './lib/trie'; import { SHARED_DESCRIPTION } from './lib/constants'; import { createMemoizedPromise } from './lib/memo-promise'; import { extractDomainsFromFelixDnsmasq } from './lib/parse-dnsmasq'; -import { sortDomains } from './lib/stable-sort-domain'; -import { output } from './lib/misc'; +import { RulesetOutput } from './lib/create-file-new'; const PROBE_DOMAINS = ['.microsoft.com', '.windows.net', '.windows.com', '.windowsupdate.com', '.windowssearch.com', '.office.net']; @@ -39,7 +37,7 @@ export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => { const trie2 = createTrie(foundMicrosoftCdnDomains, true); BLACKLIST.forEach(trie2.whitelist); - return sortDomains(trie2.dump()) + return trie2.dump() .map(d => `DOMAIN-SUFFIX,${d}`) .concat(WHITELIST); }); @@ -56,13 +54,9 @@ export const buildMicrosoftCdn = task(require.main === module, __filename)(async const res: string[] = await span.traceChildPromise('get microsoft cdn domains', getMicrosoftCdnRulesetPromise()); - return createRuleset( - span, - 'Sukka\'s Ruleset - Microsoft CDN', - description, - new Date(), - res, - 'ruleset', - output('microsoft_cdn', 'non_ip') - ); + return new RulesetOutput(span, 'microsoft_cdn', 'non_ip') + .withTitle('Sukka\'s Ruleset - Microsoft CDN') + .withDescription(description) + .addFromRuleset(res) + .write(); }); diff --git a/Build/build-reject-domainset.ts b/Build/build-reject-domainset.ts index e1a8aea5..40934bfc 100644 --- a/Build/build-reject-domainset.ts +++ b/Build/build-reject-domainset.ts @@ -3,13 +3,10 @@ import path from 'node:path'; import process from 'node:process'; import { processHosts, processFilterRules, processDomainLists } from './lib/parse-filter'; -import { createTrie } from './lib/trie'; import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, DOMAIN_LISTS, HOSTS_EXTRA, DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_EXTRA, PHISHING_DOMAIN_LISTS_EXTRA } from './constants/reject-data-source'; -import { createRuleset, compareAndWriteFile } from './lib/create-file'; -import createKeywordFilter from './lib/aho-corasick'; +import { compareAndWriteFile } from './lib/create-file'; import { readFileByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line'; -import { buildParseDomainMap, sortDomains } from './lib/stable-sort-domain'; import { task } from './trace'; // tldts-experimental is way faster than tldts, but very little bit inaccurate // (since it is hashes based). But the result is still deterministic, which is @@ -17,23 +14,47 @@ import { task } from './trace'; import { SHARED_DESCRIPTION } from './lib/constants'; import { getPhishingDomains } from './lib/get-phishing-domains'; -import { setAddFromArray, setAddFromArrayCurried } from './lib/set-add-from-array'; -import { output } from './lib/misc'; +import { setAddFromArray } from './lib/set-add-from-array'; import { appendArrayInPlace } from './lib/append-array-in-place'; import { OUTPUT_INTERNAL_DIR, SOURCE_DIR } from './constants/dir'; +import { DomainsetOutput } from './lib/create-file-new'; const getRejectSukkaConfPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/reject_sukka.conf')); export const buildRejectDomainSet = task(require.main === module, __filename)(async (span) => { + const rejectOutput = new DomainsetOutput(span, 'reject') + .withTitle('Sukka\'s Ruleset - Reject Base') + .withDescription([ + ...SHARED_DESCRIPTION, + '', + 'The domainset supports AD blocking, tracking protection, privacy protection, anti-phishing, anti-mining', + '', + 'Build from:', + ...HOSTS.map(host => ` - ${host[0]}`), + ...DOMAIN_LISTS.map(domainList => ` - ${domainList[0]}`), + ...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`) + ]); + + const rejectExtraOutput = new DomainsetOutput(span, 'reject_extra') + .withTitle('Sukka\'s Ruleset - Reject Extra') + .withDescription([ + ...SHARED_DESCRIPTION, + '', + 'The domainset supports AD blocking, tracking protection, privacy protection, anti-phishing, anti-mining', + '', + 'Build from:', + ...HOSTS_EXTRA.map(host => ` - ${host[0]}`), + ...DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`), + ...ADGUARD_FILTERS_EXTRA.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`), + ...PHISHING_DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`) + ]); + + const appendArrayToRejectOutput = rejectOutput.addFromDomainset.bind(rejectOutput); + const appendArrayToRejectExtraOutput = rejectExtraOutput.addFromDomainset.bind(rejectExtraOutput); + /** Whitelists */ const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST); - const domainSets = new Set(); - const appendArrayToDomainSets = setAddFromArrayCurried(domainSets); - - const domainSetsExtra = new Set(); - const appendArrayToDomainSetsExtra = setAddFromArrayCurried(domainSetsExtra); - // Parse from AdGuard Filters const shouldStop = await span .traceChild('download and process hosts / adblock filter rules') @@ -42,11 +63,11 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as let shouldStop = false; await Promise.all([ // Parse from remote hosts & domain lists - HOSTS.map(entry => processHosts(childSpan, ...entry).then(appendArrayToDomainSets)), - HOSTS_EXTRA.map(entry => processHosts(childSpan, ...entry).then(appendArrayToDomainSetsExtra)), + HOSTS.map(entry => processHosts(childSpan, ...entry).then(appendArrayToRejectOutput)), + HOSTS_EXTRA.map(entry => processHosts(childSpan, ...entry).then(appendArrayToRejectExtraOutput)), - DOMAIN_LISTS.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToDomainSets)), - DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToDomainSetsExtra)), + DOMAIN_LISTS.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToRejectOutput)), + DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToRejectExtraOutput)), ADGUARD_FILTERS.map( entry => processFilterRules(childSpan, ...entry) @@ -57,7 +78,7 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as // we should not break here, as we want to see full matches from all data source } setAddFromArray(filterRuleWhitelistDomainSets, white); - setAddFromArray(domainSets, black); + appendArrayToRejectOutput(black); }) ), ADGUARD_FILTERS_EXTRA.map( @@ -69,7 +90,7 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as // we should not break here, as we want to see full matches from all data source } setAddFromArray(filterRuleWhitelistDomainSets, white); - setAddFromArray(domainSetsExtra, black); + appendArrayToRejectExtraOutput(black); }) ), @@ -82,8 +103,8 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as setAddFromArray(filterRuleWhitelistDomainSets, black); }) )), - getPhishingDomains(childSpan).then(appendArrayToDomainSetsExtra), - getRejectSukkaConfPromise.then(appendArrayToDomainSets) + getPhishingDomains(childSpan).then(appendArrayToRejectExtraOutput), + getRejectSukkaConfPromise.then(appendArrayToRejectOutput) ].flat()); // eslint-disable-next-line sukka/no-single-return -- not single return return shouldStop; @@ -93,72 +114,23 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as process.exit(1); } - console.log(`Import ${domainSets.size} + ${domainSetsExtra.size} rules from Hosts / AdBlock Filter Rules & reject_sukka.conf!`); - // Dedupe domainSets - const domainKeywordsSet = await span.traceChildAsync('collect black keywords/suffixes', async () => { + await span.traceChildAsync('collect black keywords/suffixes', async () => { /** Collect DOMAIN-KEYWORD from non_ip/reject.conf for deduplication */ - const domainKeywordsSet = new Set(); - for await (const line of readFileByLine(path.resolve(__dirname, '../Source/non_ip/reject.conf'))) { const [type, value] = line.split(','); if (type === 'DOMAIN-KEYWORD') { - domainKeywordsSet.add(value); + rejectOutput.addDomainKeyword(value); // Add for later deduplication + rejectExtraOutput.addDomainKeyword(value); // Add for later deduplication } else if (type === 'DOMAIN-SUFFIX') { - domainSets.add('.' + value); // Add to domainSets for later deduplication + rejectOutput.addDomainSuffix(value); // Add for later deduplication } } - - return domainKeywordsSet; }); - const [baseTrie, extraTrie] = span.traceChildSync('create smol trie while deduping black keywords', (childSpan) => { - const baseTrie = createTrie(null, true); - const extraTrie = createTrie(null, true); - - const kwfilter = createKeywordFilter(domainKeywordsSet); - - childSpan.traceChildSync('add items to trie (extra)', () => { - for (const domain of domainSetsExtra) { - // exclude keyword when creating trie - if (!kwfilter(domain)) { - extraTrie.add(domain); - } - } - }); - - childSpan.traceChildSync('add items to trie (base) + dedupe extra trie', () => { - for (const domain of domainSets) { - // exclude keyword when creating trie - if (!kwfilter(domain)) { - baseTrie.add(domain); - extraTrie.whitelist(domain); - } - } - }); - - return [baseTrie, extraTrie] as const; - }); - - span.traceChildSync('dedupe from white suffixes (base)', () => filterRuleWhitelistDomainSets.forEach(baseTrie.whitelist)); - span.traceChildSync('dedupe from white suffixes and base (extra)', () => { - filterRuleWhitelistDomainSets.forEach(extraTrie.whitelist); - }); - - // Dedupe domainSets - const dedupedDominArray = span.traceChildSync('dedupe from covered subdomain (base)', () => baseTrie.dump()); - const dudupedDominArrayExtra = span.traceChildSync('dedupe from covered subdomain (extra)', () => extraTrie.dump()); - - console.log(`Final size ${dedupedDominArray.length} + ${dudupedDominArrayExtra.length}`); - - const { - domainMap: domainArrayMainDomainMap, - subdomainMap: domainArraySubdomainMap - } = span.traceChildSync( - 'build map for stat and sort', - () => buildParseDomainMap(dedupedDominArray.concat(dudupedDominArrayExtra)) - ); + rejectOutput.calcDomainMap(); + rejectExtraOutput.calcDomainMap(); // Create reject stats const rejectDomainsStats: string[] = span @@ -166,50 +138,15 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as .traceSyncFn(() => { const results = []; results.push('=== base ==='); - appendArrayInPlace(results, getStatMap(dedupedDominArray, domainArrayMainDomainMap)); + appendArrayInPlace(results, rejectOutput.getStatMap()); results.push('=== extra ==='); - appendArrayInPlace(results, getStatMap(dudupedDominArrayExtra, domainArrayMainDomainMap)); + appendArrayInPlace(results, rejectExtraOutput.getStatMap()); return results; }); return Promise.all([ - createRuleset( - span, - 'Sukka\'s Ruleset - Reject Base', - [ - ...SHARED_DESCRIPTION, - '', - 'The domainset supports AD blocking, tracking protection, privacy protection, anti-phishing, anti-mining', - '', - 'Build from:', - ...HOSTS.map(host => ` - ${host[0]}`), - ...DOMAIN_LISTS.map(domainList => ` - ${domainList[0]}`), - ...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`) - ], - new Date(), - span.traceChildSync('sort reject domainset (base)', () => sortDomains(dedupedDominArray, domainArrayMainDomainMap, domainArraySubdomainMap)), - 'domainset', - output('reject', 'domainset') - ), - createRuleset( - span, - 'Sukka\'s Ruleset - Reject Extra', - [ - ...SHARED_DESCRIPTION, - '', - 'The domainset supports AD blocking, tracking protection, privacy protection, anti-phishing, anti-mining', - '', - 'Build from:', - ...HOSTS_EXTRA.map(host => ` - ${host[0]}`), - ...DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`), - ...ADGUARD_FILTERS_EXTRA.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`), - ...PHISHING_DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`) - ], - new Date(), - span.traceChildSync('sort reject domainset (extra)', () => sortDomains(dudupedDominArrayExtra, domainArrayMainDomainMap, domainArraySubdomainMap)), - 'domainset', - output('reject_extra', 'domainset') - ), + rejectOutput.write(), + rejectExtraOutput.write(), compareAndWriteFile( span, rejectDomainsStats, @@ -217,22 +154,3 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as ) ]); }); - -function getStatMap(domains: string[], domainArrayMainDomainMap: Map): string[] { - return Array.from( - ( - domains.reduce>((acc, cur) => { - const suffix = domainArrayMainDomainMap.get(cur); - if (suffix) { - acc.set(suffix, (acc.get(suffix) ?? 0) + 1); - } - return acc; - }, new Map()) - ).entries() - ) - .filter(a => a[1] > 9) - .sort( - (a, b) => (b[1] - a[1]) || a[0].localeCompare(b[0]) - ) - .map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`); -}; diff --git a/Build/build-reject-ip-list.ts b/Build/build-reject-ip-list.ts index b1446aaf..2daa080e 100644 --- a/Build/build-reject-ip-list.ts +++ b/Build/build-reject-ip-list.ts @@ -1,40 +1,40 @@ // @ts-check import path from 'node:path'; -import { createRuleset } from './lib/create-file'; import { fetchRemoteTextByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line'; import { task } from './trace'; import { SHARED_DESCRIPTION } from './lib/constants'; import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip'; -import { TTL, deserializeArray, fsFetchCache, serializeArray, createCacheKey } from './lib/cache-filesystem'; +import { TTL, fsFetchCache, createCacheKey } from './lib/cache-filesystem'; import { fetchAssets } from './lib/fetch-assets'; import { processLine } from './lib/process-line'; -import { appendArrayInPlace } from './lib/append-array-in-place'; -import { output } from './lib/misc'; +import { RulesetOutput } from './lib/create-file-new'; +import { SOURCE_DIR } from './constants/dir'; const cacheKey = createCacheKey(__filename); const BOGUS_NXDOMAIN_URL = 'https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf'; -const getBogusNxDomainIPsPromise = fsFetchCache.apply( +const getBogusNxDomainIPsPromise = fsFetchCache.apply<[ipv4: string[], ipv6: string[]]>( cacheKey(BOGUS_NXDOMAIN_URL), async () => { - const result: string[] = []; + const ipv4: string[] = []; + const ipv6: string[] = []; for await (const line of await fetchRemoteTextByLine(BOGUS_NXDOMAIN_URL)) { if (line.startsWith('bogus-nxdomain=')) { const ip = line.slice(15).trim(); if (isProbablyIpv4(ip)) { - result.push(`IP-CIDR,${ip}/32,no-resolve`); + ipv4.push(ip); } else if (isProbablyIpv6(ip)) { - result.push(`IP-CIDR6,${ip}/128,no-resolve`); + ipv6.push(ip); } } } - return result; + return [ipv4, ipv6] as const; }, { ttl: TTL.ONE_WEEK(), - serializer: serializeArray, - deserializer: deserializeArray + serializer: JSON.stringify, + deserializer: JSON.parse } ); @@ -45,62 +45,50 @@ const BOTNET_FILTER_MIRROR_URL = [ 'https://malware-filter.pages.dev/botnet-filter-dnscrypt-blocked-ips.txt' ]; -const getBotNetFilterIPsPromise = fsFetchCache.apply( +const getBotNetFilterIPsPromise = fsFetchCache.apply<[ipv4: string[], ipv6: string[]]>( cacheKey(BOTNET_FILTER_URL), async () => { const text = await fetchAssets(BOTNET_FILTER_URL, BOTNET_FILTER_MIRROR_URL); - return text.split('\n').reduce((acc, cur) => { + return text.split('\n').reduce<[ipv4: string[], ipv6: string[]]>((acc, cur) => { const ip = processLine(cur); if (ip) { if (isProbablyIpv4(ip)) { - acc.push(`IP-CIDR,${ip}/32,no-resolve`); + acc[0].push(ip); } else if (isProbablyIpv6(ip)) { - acc.push(`IP-CIDR6,${ip}/128,no-resolve`); + acc[1].push(ip); } } return acc; - }, []); + }, [[], []]); }, { ttl: TTL.TWLVE_HOURS(), - serializer: serializeArray, - deserializer: deserializeArray + serializer: JSON.stringify, + deserializer: JSON.parse } ); -const localRejectIPSourcesPromise = readFileIntoProcessedArray(path.resolve(__dirname, '../Source/ip/reject.conf')); - export const buildRejectIPList = task(require.main === module, __filename)(async (span) => { - const result = await localRejectIPSourcesPromise; - - const results = await Promise.all([ + const [bogusNxDomainIPs, botNetIPs] = await Promise.all([ span.traceChildPromise('get bogus nxdomain ips', getBogusNxDomainIPsPromise), span.traceChildPromise('get botnet ips', getBotNetFilterIPsPromise) ]); - const bogusNxDomainIPs = results[0]; - const botNetIPs = results[1]; - - appendArrayInPlace(result, bogusNxDomainIPs); - appendArrayInPlace(result, botNetIPs); - - const description = [ - ...SHARED_DESCRIPTION, - '', - 'This file contains known addresses that are hijacking NXDOMAIN results returned by DNS servers, and botnet controller IPs.', - '', - 'Data from:', - ' - https://github.com/felixonmars/dnsmasq-china-list', - ' - https://github.com/curbengh/botnet-filter' - ]; - - return createRuleset( - span, - 'Sukka\'s Ruleset - Anti Bogus Domain', - description, - new Date(), - result, - 'ruleset', - output('reject', 'ip') - ); + return new RulesetOutput(span, 'reject', 'ip') + .withTitle('Sukka\'s Ruleset - Anti Bogus Domain') + .withDescription([ + ...SHARED_DESCRIPTION, + '', + 'This file contains known addresses that are hijacking NXDOMAIN results returned by DNS servers, and botnet controller IPs.', + '', + 'Data from:', + ' - https://github.com/felixonmars/dnsmasq-china-list', + ' - https://github.com/curbengh/botnet-filter' + ]) + .addFromRuleset(await readFileIntoProcessedArray(path.resolve(SOURCE_DIR, 'ip/reject.conf'))) + .bulkAddCIDR4NoResolve(bogusNxDomainIPs[0]) + .bulkAddCIDR6NoResolve(bogusNxDomainIPs[1]) + .bulkAddCIDR4NoResolve(botNetIPs[0]) + .bulkAddCIDR6NoResolve(botNetIPs[1]) + .write(); }); diff --git a/Build/build-stream-service.ts b/Build/build-stream-service.ts index 7d4806e6..66ebd72c 100644 --- a/Build/build-stream-service.ts +++ b/Build/build-stream-service.ts @@ -2,49 +2,33 @@ import type { Span } from './trace'; import { task } from './trace'; -import { createRuleset } from './lib/create-file'; - import { ALL, NORTH_AMERICA, EU, HK, TW, JP, KR } from '../Source/stream'; import { SHARED_DESCRIPTION } from './lib/constants'; -import { output } from './lib/misc'; +import { RulesetOutput } from './lib/create-file-new'; export const createRulesetForStreamService = (span: Span, fileId: string, title: string, streamServices: Array) => { return span.traceChildAsync(fileId, async (childSpan) => Promise.all([ // Domains - createRuleset( - childSpan, - `Sukka's Ruleset - Stream Services: ${title}`, - [ + new RulesetOutput(childSpan, fileId, 'non_ip') + .withTitle(`Sukka's Ruleset - Stream Services: ${title}`) + .withDescription([ ...SHARED_DESCRIPTION, '', ...streamServices.map((i) => `- ${i.name}`) - ], - new Date(), - streamServices.flatMap((i) => i.rules), - 'ruleset', - output(fileId, 'non_ip') - ), + ]) + .addFromRuleset(streamServices.flatMap((i) => i.rules)) + .write(), // IP - createRuleset( - childSpan, - `Sukka's Ruleset - Stream Services' IPs: ${title}`, - [ + new RulesetOutput(childSpan, fileId, 'ip') + .withTitle(`Sukka's Ruleset - Stream Services IPs: ${title}`) + .withDescription([ ...SHARED_DESCRIPTION, '', ...streamServices.map((i) => `- ${i.name}`) - ], - new Date(), - streamServices.flatMap((i) => ( - i.ip - ? [ - ...i.ip.v4.map((ip) => `IP-CIDR,${ip},no-resolve`), - ...i.ip.v6.map((ip) => `IP-CIDR6,${ip},no-resolve`) - ] - : [] - )), - 'ruleset', - output(fileId, 'ip') - ) + ]) + .bulkAddCIDR4NoResolve(streamServices.flatMap(i => i.ip?.v4 ?? [])) + .bulkAddCIDR6NoResolve(streamServices.flatMap(i => i.ip?.v6 ?? [])) + .write() ])); }; diff --git a/Build/build-telegram-cidr.ts b/Build/build-telegram-cidr.ts index 592f3443..164071a2 100644 --- a/Build/build-telegram-cidr.ts +++ b/Build/build-telegram-cidr.ts @@ -3,11 +3,10 @@ import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry'; import { createReadlineInterfaceFromResponse } from './lib/fetch-text-by-line'; import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip'; import { processLine } from './lib/process-line'; -import { createRuleset } from './lib/create-file'; import { task } from './trace'; import { SHARED_DESCRIPTION } from './lib/constants'; import { createMemoizedPromise } from './lib/memo-promise'; -import { output } from './lib/misc'; +import { RulesetOutput } from './lib/create-file-new'; export const getTelegramCIDRPromise = createMemoizedPromise(async () => { const resp = await fetchWithRetry('https://core.telegram.org/resources/cidr.txt', defaultRequestInit); @@ -45,13 +44,10 @@ export const buildTelegramCIDR = task(require.main === module, __filename)(async ' - https://core.telegram.org/resources/cidr.txt' ]; - return createRuleset( - span, - 'Sukka\'s Ruleset - Telegram IP CIDR', - description, - date, - results, - 'ruleset', - output('telegram', 'ip') - ); + return new RulesetOutput(span, 'telegram', 'ip') + .withTitle('Sukka\'s Ruleset - Telegram IP CIDR') + .withDescription(description) + .withDate(date) + .addFromRuleset(results) + .write(); }); diff --git a/Build/lib/create-file-new.ts b/Build/lib/create-file-new.ts index fe437e31..552464c0 100644 --- a/Build/lib/create-file-new.ts +++ b/Build/lib/create-file-new.ts @@ -4,12 +4,14 @@ import type { Span } from '../trace'; import { surgeDomainsetToClashDomainset, surgeRulesetToClashClassicalTextRuleset } from './clash'; import { compareAndWriteFile, defaultSortTypeOrder, sortTypeOrder, withBannerArray } from './create-file'; import { ipCidrListToSingbox, surgeDomainsetToSingbox, surgeRulesetToSingbox } from './singbox'; -import { sortDomains } from './stable-sort-domain'; +import { buildParseDomainMap, sortDomains } from './stable-sort-domain'; import { createTrie } from './trie'; import { invariant } from 'foxact/invariant'; import { OUTPUT_CLASH_DIR, OUTPUT_SINGBOX_DIR, OUTPUT_SURGE_DIR } from '../constants/dir'; import stringify from 'json-stringify-pretty-compact'; import { appendArrayInPlace } from './append-array-in-place'; +import { nullthrow } from 'foxact/nullthrow'; +import createKeywordFilter from './aho-corasick'; abstract class RuleOutput { protected domainTrie = createTrie(null, true); @@ -146,6 +148,13 @@ abstract class RuleOutput { return this; } + bulkAddCIDR4NoResolve(cidr: string[]) { + for (let i = 0, len = cidr.length; i < len; i++) { + this.ipcidrNoResolve.add(cidr[i]); + } + return this; + } + bulkAddCIDR6(cidr: string[]) { for (let i = 0, len = cidr.length; i < len; i++) { this.ipcidr6.add(cidr[i]); @@ -153,19 +162,54 @@ abstract class RuleOutput { return this; } + bulkAddCIDR6NoResolve(cidr: string[]) { + for (let i = 0, len = cidr.length; i < len; i++) { + this.ipcidr6NoResolve.add(cidr[i]); + } + return this; + } + abstract write(): Promise; } export class DomainsetOutput extends RuleOutput { protected type = 'domainset' as const; + private $dumped: string[] | null = null; + + get dumped() { + if (!this.$dumped) { + const kwfilter = createKeywordFilter(this.domainKeywords); + + const dumped = this.domainTrie.dump(); + const set = new Set(dumped); + for (let i = 0, len = dumped.length; i < len; i++) { + const domain = dumped[i]; + if (kwfilter(domain)) { + set.delete(domain); + } + } + + this.$dumped = Array.from(set); + } + return this.$dumped; + } + + calcDomainMap() { + if (!this.apexDomainMap || !this.subDomainMap) { + const { domainMap, subdomainMap } = buildParseDomainMap(this.dumped); + this.apexDomainMap = domainMap; + this.subDomainMap = subdomainMap; + } + } + async write() { await this.pendingPromise; invariant(this.title, 'Missing title'); invariant(this.description, 'Missing description'); - const sorted = sortDomains(this.domainTrie.dump(), this.apexDomainMap, this.subDomainMap); + const sorted = sortDomains(this.dumped, this.apexDomainMap, this.subDomainMap); sorted.push('this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'); const surge = sorted; @@ -201,6 +245,28 @@ export class DomainsetOutput extends RuleOutput { ) ]); } + + getStatMap() { + invariant(this.dumped, 'Non dumped yet'); + invariant(this.apexDomainMap, 'Missing apex domain map'); + + return Array.from( + ( + nullthrow(this.dumped, 'Non dumped yet').reduce>((acc, cur) => { + const suffix = this.apexDomainMap!.get(cur); + if (suffix) { + acc.set(suffix, (acc.get(suffix) ?? 0) + 1); + } + return acc; + }, new Map()) + ).entries() + ) + .filter(a => a[1] > 9) + .sort( + (a, b) => (b[1] - a[1]) || a[0].localeCompare(b[0]) + ) + .map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`); + } } export class IPListOutput extends RuleOutput {