From de1f817eec20531962dc8aab14b1d352c1c8f947 Mon Sep 17 00:00:00 2001 From: SukkaW Date: Mon, 21 Jul 2025 15:43:55 +0800 Subject: [PATCH] Chore: universal way to attach upstream data source --- Build/build-reject-domainset.ts | 72 +++++++++++++++++-------------- Build/lib/get-phishing-domains.ts | 2 +- Build/lib/parse-filter/filters.ts | 5 ++- Build/lib/rules/base.ts | 33 ++++++++++++-- 4 files changed, 74 insertions(+), 38 deletions(-) diff --git a/Build/build-reject-domainset.ts b/Build/build-reject-domainset.ts index d9468639..c10583d3 100644 --- a/Build/build-reject-domainset.ts +++ b/Build/build-reject-domainset.ts @@ -39,45 +39,35 @@ const adguardFiltersExtraDownloads = ADGUARD_FILTERS_EXTRA.map(entry => processF const adguardFiltersWhitelistsDownloads = ADGUARD_FILTERS_WHITELIST.map(entry => processFilterRulesWithPreload(...entry)); export const buildRejectDomainSet = task(require.main === module, __filename)(async (span) => { - const rejectBaseDescription = [ - ...SHARED_DESCRIPTION, - '', - 'The domainset supports AD blocking, tracking protection, privacy protection, anti-mining', - '', - 'Build from:', - ...HOSTS.map(host => ` - ${host[0]}`), - ...DOMAIN_LISTS.map(domainList => ` - ${domainList[0]}`), - ...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`) - ]; - const rejectDomainsetOutput = new DomainsetOutput(span, 'reject') .withTitle('Sukka\'s Ruleset - Reject Base') - .withDescription(rejectBaseDescription); + .withDescription([ + ...SHARED_DESCRIPTION, + '', + 'The domainset supports AD blocking, tracking protection, privacy protection, anti-mining' + ]) + .appendDataSource(HOSTS.map(host => host[0])) + .appendDataSource(DOMAIN_LISTS.map(domainList => domainList[0])); const rejectExtraDomainsetOutput = new DomainsetOutput(span, 'reject_extra') .withTitle('Sukka\'s Ruleset - Reject Extra') .withDescription([ ...SHARED_DESCRIPTION, '', - 'The domainset supports AD blocking, tracking protection, privacy protection, anti-mining', - '', - 'Build from:', - ...HOSTS_EXTRA.map(host => ` - ${host[0]}`), - ...DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`), - ...ADGUARD_FILTERS_EXTRA.map(filter => ` - ${filter[0]}`) - ]); + 'The domainset supports AD blocking, tracking protection, privacy protection, anti-mining' + ]) + .appendDataSource(HOSTS_EXTRA.map(host => host[0])) + .appendDataSource(DOMAIN_LISTS_EXTRA.map(domainList => domainList[0])); const rejectPhisingDomainsetOutput = new DomainsetOutput(span, 'reject_phishing') .withTitle('Sukka\'s Ruleset - Reject Phishing') .withDescription([ ...SHARED_DESCRIPTION, '', - 'The domainset is specifically designed for anti-phishing', - '', - 'Build from:', - ...PHISHING_HOSTS_EXTRA.map(host => ` - ${host[0]}`), - ...PHISHING_DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`) - ]); + 'The domainset is specifically designed for anti-phishing' + ]) + .appendDataSource(PHISHING_HOSTS_EXTRA.map(host => host[0])) + .appendDataSource(PHISHING_DOMAIN_LISTS_EXTRA.map(domainList => domainList[0])); const rejectNonIpRulesetOutput = new RulesetOutput(span, 'reject', 'non_ip') .withTitle('Sukka\'s Ruleset - Reject Non-IP') @@ -94,13 +84,10 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as .withDescription([ ...SHARED_DESCRIPTION, '', - 'This file contains known addresses that are hijacking NXDOMAIN results returned by DNS servers, and botnet controller IPs.', - '', - 'Data from:', - ' - https://github.com/felixonmars/dnsmasq-china-list', - ' - https://github.com/curbengh/botnet-filter', - ' - And other sources mentioned in /domainset/reject file' + 'This file contains known addresses that are hijacking NXDOMAIN results returned by DNS servers, and botnet controller IPs.' ]) + .appendDataSource('https://github.com/felixonmars/dnsmasq-china-list') + .appendDataSource('https://github.com/curbengh/botnet-filter') .bulkAddIPASN(AUGUST_ASN) .bulkAddIPASN(HUIZE_ASN); @@ -143,7 +130,13 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as rejectPhisingDomainsetOutput.addFromDomainset(getPhishingDomains(childSpan)), adguardFiltersDownloads.map( - task => task(childSpan).then(({ whiteDomains, whiteDomainSuffixes, blackDomains, blackDomainSuffixes, blackIPs, blackWildcard, whiteKeyword, blackKeyword }) => { + task => task(childSpan).then(({ + filterRulesUrl, + whiteDomains, whiteDomainSuffixes, + blackDomains, blackDomainSuffixes, + blackIPs, blackWildcard, + whiteKeyword, blackKeyword + }) => { addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomains); addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomainSuffixes, suffix => '.' + suffix); @@ -154,13 +147,22 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as rejectDomainsetOutput.bulkAddDomainKeyword(blackKeyword); + rejectDomainsetOutput.appendDataSource(filterRulesUrl); + rejectNonIpRulesetOutput.bulkAddDomainWildcard(blackWildcard); + rejectNonIpRulesetOutput.appendDataSource(filterRulesUrl); rejectIPOutput.bulkAddAnyCIDR(blackIPs, false); + rejectIPOutput.appendDataSource(filterRulesUrl); }) ), adguardFiltersExtraDownloads.map( - task => task(childSpan).then(({ whiteDomains, whiteDomainSuffixes, blackDomains, blackDomainSuffixes, blackIPs, blackWildcard, whiteKeyword, blackKeyword }) => { + task => task(childSpan).then(({ + filterRulesUrl, + whiteDomains, whiteDomainSuffixes, + blackDomains, blackDomainSuffixes, + blackIPs, blackWildcard, whiteKeyword, blackKeyword + }) => { addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomains); addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomainSuffixes, suffix => '.' + suffix); addArrayElementsToSet(filterRuleWhiteKeywords, whiteKeyword); @@ -170,9 +172,13 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as rejectExtraDomainsetOutput.bulkAddDomainKeyword(blackKeyword); + rejectExtraDomainsetOutput.appendDataSource(filterRulesUrl); + rejectIPOutput.bulkAddAnyCIDR(blackIPs, false); + rejectIPOutput.appendDataSource(filterRulesUrl); rejectNonIpRulesetOutput.bulkAddDomainWildcard(blackWildcard); + rejectNonIpRulesetOutput.appendDataSource(filterRulesUrl); }) ), adguardFiltersWhitelistsDownloads.map( diff --git a/Build/lib/get-phishing-domains.ts b/Build/lib/get-phishing-domains.ts index 531ea44e..387e9528 100644 --- a/Build/lib/get-phishing-domains.ts +++ b/Build/lib/get-phishing-domains.ts @@ -7,7 +7,7 @@ import type { TldTsParsed } from './normalize-domain'; const pool = new Worktank({ name: 'process-phishing-domains', size: 1, - timeout: 10000, // The maximum number of milliseconds to wait for the result from the worker, if exceeded the worker is terminated and the execution promise rejects + timeout: 20000, // The maximum number of milliseconds to wait for the result from the worker, if exceeded the worker is terminated and the execution promise rejects warmup: true, autoterminate: 30000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed env: {}, diff --git a/Build/lib/parse-filter/filters.ts b/Build/lib/parse-filter/filters.ts index 6aa706b1..6d82c3c0 100644 --- a/Build/lib/parse-filter/filters.ts +++ b/Build/lib/parse-filter/filters.ts @@ -45,7 +45,9 @@ export function processFilterRulesWithPreload( | 'whiteKeyword' | 'blackKeyword', string[] - > + > & { + filterRulesUrl: string + } >(`process filter rules: ${filterRulesUrl}`, async (span) => { const filterRules = await span.traceChildPromise('download', downloadPromise); @@ -140,6 +142,7 @@ export function processFilterRulesWithPreload( ); return { + filterRulesUrl, whiteDomains: Array.from(whiteDomains), whiteDomainSuffixes: Array.from(whiteDomainSuffixes), blackDomains: Array.from(blackDomains), diff --git a/Build/lib/rules/base.ts b/Build/lib/rules/base.ts index b9687ad9..516c0461 100644 --- a/Build/lib/rules/base.ts +++ b/Build/lib/rules/base.ts @@ -2,6 +2,7 @@ import type { Span } from '../../trace'; import { HostnameSmolTrie } from '../trie'; import { not, nullthrow } from 'foxts/guard'; import { fastIpVersion } from 'foxts/fast-ip-version'; +import { addArrayElementsToSet } from 'foxts/add-array-elements-to-set'; import type { MaybePromise } from '../misc'; import type { BaseWriteStrategy } from '../writing-strategy/base'; import { merge as mergeCidr } from 'fast-cidr-tools'; @@ -16,6 +17,8 @@ import { SurgeMitmSgmodule } from '../writing-strategy/surge'; export class FileOutput { protected strategies: BaseWriteStrategy[] = []; + protected dataSource = new Set(); + public domainTrie = new HostnameSmolTrie(null); public wildcardTrie: HostnameSmolTrie = new HostnameSmolTrie(null); @@ -77,9 +80,9 @@ export class FileOutput { this.strategies.push(strategy); } - protected description: string[] | readonly string[] | null = null; + protected description: string[] | null = null; withDescription(description: string[] | readonly string[]) { - this.description = description; + this.description = description as string[]; return this; } @@ -314,6 +317,19 @@ export class FileOutput { return this; } + /** + * Add data source information. This will be rendered inside description + */ + appendDataSource(source: string | string[]) { + if (typeof source === 'string') { + this.dataSource.add(source); + } else { + addArrayElementsToSet(this.dataSource, source); + } + + return this; + } + async done() { await this.pendingPromise; this.pendingPromise = null; @@ -503,15 +519,26 @@ export class FileOutput { return childSpan.traceChildAsync('output to disk', (childSpan) => { const promises: Array | void> = []; + const descriptions = nullthrow(this.description, 'Missing description'); + + if (this.dataSource.size) { + descriptions.push( + '', + 'This file contains data from:' + ); + appendArrayInPlace(descriptions, Array.from(this.dataSource).sort().map((source) => ` - ${source}`)); + } + for (let i = 0, len = this.strategies.length; i < len; i++) { const strategy = this.strategies[i]; const basename = (strategy.overwriteFilename || this.id) + '.' + strategy.fileExtension; + promises.push( childSpan.traceChildAsync('write ' + strategy.name, (childSpan) => Promise.resolve(strategy.output( childSpan, nullthrow(this.title, 'Missing title'), - nullthrow(this.description, 'Missing description'), + descriptions, this.date, path.join( strategy.outputDir,