Chore: universal way to attach upstream data source

This commit is contained in:
SukkaW 2025-07-21 15:43:55 +08:00
parent aa0a63602f
commit de1f817eec
4 changed files with 74 additions and 38 deletions

View File

@ -39,45 +39,35 @@ const adguardFiltersExtraDownloads = ADGUARD_FILTERS_EXTRA.map(entry => processF
const adguardFiltersWhitelistsDownloads = ADGUARD_FILTERS_WHITELIST.map(entry => processFilterRulesWithPreload(...entry)); const adguardFiltersWhitelistsDownloads = ADGUARD_FILTERS_WHITELIST.map(entry => processFilterRulesWithPreload(...entry));
export const buildRejectDomainSet = task(require.main === module, __filename)(async (span) => { export const buildRejectDomainSet = task(require.main === module, __filename)(async (span) => {
const rejectBaseDescription = [
...SHARED_DESCRIPTION,
'',
'The domainset supports AD blocking, tracking protection, privacy protection, anti-mining',
'',
'Build from:',
...HOSTS.map(host => ` - ${host[0]}`),
...DOMAIN_LISTS.map(domainList => ` - ${domainList[0]}`),
...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`)
];
const rejectDomainsetOutput = new DomainsetOutput(span, 'reject') const rejectDomainsetOutput = new DomainsetOutput(span, 'reject')
.withTitle('Sukka\'s Ruleset - Reject Base') .withTitle('Sukka\'s Ruleset - Reject Base')
.withDescription(rejectBaseDescription); .withDescription([
...SHARED_DESCRIPTION,
'',
'The domainset supports AD blocking, tracking protection, privacy protection, anti-mining'
])
.appendDataSource(HOSTS.map(host => host[0]))
.appendDataSource(DOMAIN_LISTS.map(domainList => domainList[0]));
const rejectExtraDomainsetOutput = new DomainsetOutput(span, 'reject_extra') const rejectExtraDomainsetOutput = new DomainsetOutput(span, 'reject_extra')
.withTitle('Sukka\'s Ruleset - Reject Extra') .withTitle('Sukka\'s Ruleset - Reject Extra')
.withDescription([ .withDescription([
...SHARED_DESCRIPTION, ...SHARED_DESCRIPTION,
'', '',
'The domainset supports AD blocking, tracking protection, privacy protection, anti-mining', 'The domainset supports AD blocking, tracking protection, privacy protection, anti-mining'
'', ])
'Build from:', .appendDataSource(HOSTS_EXTRA.map(host => host[0]))
...HOSTS_EXTRA.map(host => ` - ${host[0]}`), .appendDataSource(DOMAIN_LISTS_EXTRA.map(domainList => domainList[0]));
...DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`),
...ADGUARD_FILTERS_EXTRA.map(filter => ` - ${filter[0]}`)
]);
const rejectPhisingDomainsetOutput = new DomainsetOutput(span, 'reject_phishing') const rejectPhisingDomainsetOutput = new DomainsetOutput(span, 'reject_phishing')
.withTitle('Sukka\'s Ruleset - Reject Phishing') .withTitle('Sukka\'s Ruleset - Reject Phishing')
.withDescription([ .withDescription([
...SHARED_DESCRIPTION, ...SHARED_DESCRIPTION,
'', '',
'The domainset is specifically designed for anti-phishing', 'The domainset is specifically designed for anti-phishing'
'', ])
'Build from:', .appendDataSource(PHISHING_HOSTS_EXTRA.map(host => host[0]))
...PHISHING_HOSTS_EXTRA.map(host => ` - ${host[0]}`), .appendDataSource(PHISHING_DOMAIN_LISTS_EXTRA.map(domainList => domainList[0]));
...PHISHING_DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`)
]);
const rejectNonIpRulesetOutput = new RulesetOutput(span, 'reject', 'non_ip') const rejectNonIpRulesetOutput = new RulesetOutput(span, 'reject', 'non_ip')
.withTitle('Sukka\'s Ruleset - Reject Non-IP') .withTitle('Sukka\'s Ruleset - Reject Non-IP')
@ -94,13 +84,10 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
.withDescription([ .withDescription([
...SHARED_DESCRIPTION, ...SHARED_DESCRIPTION,
'', '',
'This file contains known addresses that are hijacking NXDOMAIN results returned by DNS servers, and botnet controller IPs.', 'This file contains known addresses that are hijacking NXDOMAIN results returned by DNS servers, and botnet controller IPs.'
'',
'Data from:',
' - https://github.com/felixonmars/dnsmasq-china-list',
' - https://github.com/curbengh/botnet-filter',
' - And other sources mentioned in /domainset/reject file'
]) ])
.appendDataSource('https://github.com/felixonmars/dnsmasq-china-list')
.appendDataSource('https://github.com/curbengh/botnet-filter')
.bulkAddIPASN(AUGUST_ASN) .bulkAddIPASN(AUGUST_ASN)
.bulkAddIPASN(HUIZE_ASN); .bulkAddIPASN(HUIZE_ASN);
@ -143,7 +130,13 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
rejectPhisingDomainsetOutput.addFromDomainset(getPhishingDomains(childSpan)), rejectPhisingDomainsetOutput.addFromDomainset(getPhishingDomains(childSpan)),
adguardFiltersDownloads.map( adguardFiltersDownloads.map(
task => task(childSpan).then(({ whiteDomains, whiteDomainSuffixes, blackDomains, blackDomainSuffixes, blackIPs, blackWildcard, whiteKeyword, blackKeyword }) => { task => task(childSpan).then(({
filterRulesUrl,
whiteDomains, whiteDomainSuffixes,
blackDomains, blackDomainSuffixes,
blackIPs, blackWildcard,
whiteKeyword, blackKeyword
}) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomains); addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomains);
addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomainSuffixes, suffix => '.' + suffix); addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomainSuffixes, suffix => '.' + suffix);
@ -154,13 +147,22 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
rejectDomainsetOutput.bulkAddDomainKeyword(blackKeyword); rejectDomainsetOutput.bulkAddDomainKeyword(blackKeyword);
rejectDomainsetOutput.appendDataSource(filterRulesUrl);
rejectNonIpRulesetOutput.bulkAddDomainWildcard(blackWildcard); rejectNonIpRulesetOutput.bulkAddDomainWildcard(blackWildcard);
rejectNonIpRulesetOutput.appendDataSource(filterRulesUrl);
rejectIPOutput.bulkAddAnyCIDR(blackIPs, false); rejectIPOutput.bulkAddAnyCIDR(blackIPs, false);
rejectIPOutput.appendDataSource(filterRulesUrl);
}) })
), ),
adguardFiltersExtraDownloads.map( adguardFiltersExtraDownloads.map(
task => task(childSpan).then(({ whiteDomains, whiteDomainSuffixes, blackDomains, blackDomainSuffixes, blackIPs, blackWildcard, whiteKeyword, blackKeyword }) => { task => task(childSpan).then(({
filterRulesUrl,
whiteDomains, whiteDomainSuffixes,
blackDomains, blackDomainSuffixes,
blackIPs, blackWildcard, whiteKeyword, blackKeyword
}) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomains); addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomains);
addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomainSuffixes, suffix => '.' + suffix); addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomainSuffixes, suffix => '.' + suffix);
addArrayElementsToSet(filterRuleWhiteKeywords, whiteKeyword); addArrayElementsToSet(filterRuleWhiteKeywords, whiteKeyword);
@ -170,9 +172,13 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
rejectExtraDomainsetOutput.bulkAddDomainKeyword(blackKeyword); rejectExtraDomainsetOutput.bulkAddDomainKeyword(blackKeyword);
rejectExtraDomainsetOutput.appendDataSource(filterRulesUrl);
rejectIPOutput.bulkAddAnyCIDR(blackIPs, false); rejectIPOutput.bulkAddAnyCIDR(blackIPs, false);
rejectIPOutput.appendDataSource(filterRulesUrl);
rejectNonIpRulesetOutput.bulkAddDomainWildcard(blackWildcard); rejectNonIpRulesetOutput.bulkAddDomainWildcard(blackWildcard);
rejectNonIpRulesetOutput.appendDataSource(filterRulesUrl);
}) })
), ),
adguardFiltersWhitelistsDownloads.map( adguardFiltersWhitelistsDownloads.map(

View File

@ -7,7 +7,7 @@ import type { TldTsParsed } from './normalize-domain';
const pool = new Worktank({ const pool = new Worktank({
name: 'process-phishing-domains', name: 'process-phishing-domains',
size: 1, size: 1,
timeout: 10000, // The maximum number of milliseconds to wait for the result from the worker, if exceeded the worker is terminated and the execution promise rejects timeout: 20000, // The maximum number of milliseconds to wait for the result from the worker, if exceeded the worker is terminated and the execution promise rejects
warmup: true, warmup: true,
autoterminate: 30000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed autoterminate: 30000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed
env: {}, env: {},

View File

@ -45,7 +45,9 @@ export function processFilterRulesWithPreload(
| 'whiteKeyword' | 'whiteKeyword'
| 'blackKeyword', | 'blackKeyword',
string[] string[]
> > & {
filterRulesUrl: string
}
>(`process filter rules: ${filterRulesUrl}`, async (span) => { >(`process filter rules: ${filterRulesUrl}`, async (span) => {
const filterRules = await span.traceChildPromise('download', downloadPromise); const filterRules = await span.traceChildPromise('download', downloadPromise);
@ -140,6 +142,7 @@ export function processFilterRulesWithPreload(
); );
return { return {
filterRulesUrl,
whiteDomains: Array.from(whiteDomains), whiteDomains: Array.from(whiteDomains),
whiteDomainSuffixes: Array.from(whiteDomainSuffixes), whiteDomainSuffixes: Array.from(whiteDomainSuffixes),
blackDomains: Array.from(blackDomains), blackDomains: Array.from(blackDomains),

View File

@ -2,6 +2,7 @@ import type { Span } from '../../trace';
import { HostnameSmolTrie } from '../trie'; import { HostnameSmolTrie } from '../trie';
import { not, nullthrow } from 'foxts/guard'; import { not, nullthrow } from 'foxts/guard';
import { fastIpVersion } from 'foxts/fast-ip-version'; import { fastIpVersion } from 'foxts/fast-ip-version';
import { addArrayElementsToSet } from 'foxts/add-array-elements-to-set';
import type { MaybePromise } from '../misc'; import type { MaybePromise } from '../misc';
import type { BaseWriteStrategy } from '../writing-strategy/base'; import type { BaseWriteStrategy } from '../writing-strategy/base';
import { merge as mergeCidr } from 'fast-cidr-tools'; import { merge as mergeCidr } from 'fast-cidr-tools';
@ -16,6 +17,8 @@ import { SurgeMitmSgmodule } from '../writing-strategy/surge';
export class FileOutput { export class FileOutput {
protected strategies: BaseWriteStrategy[] = []; protected strategies: BaseWriteStrategy[] = [];
protected dataSource = new Set<string>();
public domainTrie = new HostnameSmolTrie(null); public domainTrie = new HostnameSmolTrie(null);
public wildcardTrie: HostnameSmolTrie = new HostnameSmolTrie(null); public wildcardTrie: HostnameSmolTrie = new HostnameSmolTrie(null);
@ -77,9 +80,9 @@ export class FileOutput {
this.strategies.push(strategy); this.strategies.push(strategy);
} }
protected description: string[] | readonly string[] | null = null; protected description: string[] | null = null;
withDescription(description: string[] | readonly string[]) { withDescription(description: string[] | readonly string[]) {
this.description = description; this.description = description as string[];
return this; return this;
} }
@ -314,6 +317,19 @@ export class FileOutput {
return this; return this;
} }
/**
* Add data source information. This will be rendered inside description
*/
appendDataSource(source: string | string[]) {
if (typeof source === 'string') {
this.dataSource.add(source);
} else {
addArrayElementsToSet(this.dataSource, source);
}
return this;
}
async done() { async done() {
await this.pendingPromise; await this.pendingPromise;
this.pendingPromise = null; this.pendingPromise = null;
@ -503,15 +519,26 @@ export class FileOutput {
return childSpan.traceChildAsync('output to disk', (childSpan) => { return childSpan.traceChildAsync('output to disk', (childSpan) => {
const promises: Array<Promise<void> | void> = []; const promises: Array<Promise<void> | void> = [];
const descriptions = nullthrow(this.description, 'Missing description');
if (this.dataSource.size) {
descriptions.push(
'',
'This file contains data from:'
);
appendArrayInPlace(descriptions, Array.from(this.dataSource).sort().map((source) => ` - ${source}`));
}
for (let i = 0, len = this.strategies.length; i < len; i++) { for (let i = 0, len = this.strategies.length; i < len; i++) {
const strategy = this.strategies[i]; const strategy = this.strategies[i];
const basename = (strategy.overwriteFilename || this.id) + '.' + strategy.fileExtension; const basename = (strategy.overwriteFilename || this.id) + '.' + strategy.fileExtension;
promises.push( promises.push(
childSpan.traceChildAsync('write ' + strategy.name, (childSpan) => Promise.resolve(strategy.output( childSpan.traceChildAsync('write ' + strategy.name, (childSpan) => Promise.resolve(strategy.output(
childSpan, childSpan,
nullthrow(this.title, 'Missing title'), nullthrow(this.title, 'Missing title'),
nullthrow(this.description, 'Missing description'), descriptions,
this.date, this.date,
path.join( path.join(
strategy.outputDir, strategy.outputDir,