Chore: universal way to attach upstream data source

This commit is contained in:
SukkaW 2025-07-21 15:43:55 +08:00
parent aa0a63602f
commit de1f817eec
4 changed files with 74 additions and 38 deletions

View File

@ -39,45 +39,35 @@ const adguardFiltersExtraDownloads = ADGUARD_FILTERS_EXTRA.map(entry => processF
const adguardFiltersWhitelistsDownloads = ADGUARD_FILTERS_WHITELIST.map(entry => processFilterRulesWithPreload(...entry));
export const buildRejectDomainSet = task(require.main === module, __filename)(async (span) => {
const rejectBaseDescription = [
...SHARED_DESCRIPTION,
'',
'The domainset supports AD blocking, tracking protection, privacy protection, anti-mining',
'',
'Build from:',
...HOSTS.map(host => ` - ${host[0]}`),
...DOMAIN_LISTS.map(domainList => ` - ${domainList[0]}`),
...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`)
];
const rejectDomainsetOutput = new DomainsetOutput(span, 'reject')
.withTitle('Sukka\'s Ruleset - Reject Base')
.withDescription(rejectBaseDescription);
.withDescription([
...SHARED_DESCRIPTION,
'',
'The domainset supports AD blocking, tracking protection, privacy protection, anti-mining'
])
.appendDataSource(HOSTS.map(host => host[0]))
.appendDataSource(DOMAIN_LISTS.map(domainList => domainList[0]));
const rejectExtraDomainsetOutput = new DomainsetOutput(span, 'reject_extra')
.withTitle('Sukka\'s Ruleset - Reject Extra')
.withDescription([
...SHARED_DESCRIPTION,
'',
'The domainset supports AD blocking, tracking protection, privacy protection, anti-mining',
'',
'Build from:',
...HOSTS_EXTRA.map(host => ` - ${host[0]}`),
...DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`),
...ADGUARD_FILTERS_EXTRA.map(filter => ` - ${filter[0]}`)
]);
'The domainset supports AD blocking, tracking protection, privacy protection, anti-mining'
])
.appendDataSource(HOSTS_EXTRA.map(host => host[0]))
.appendDataSource(DOMAIN_LISTS_EXTRA.map(domainList => domainList[0]));
const rejectPhisingDomainsetOutput = new DomainsetOutput(span, 'reject_phishing')
.withTitle('Sukka\'s Ruleset - Reject Phishing')
.withDescription([
...SHARED_DESCRIPTION,
'',
'The domainset is specifically designed for anti-phishing',
'',
'Build from:',
...PHISHING_HOSTS_EXTRA.map(host => ` - ${host[0]}`),
...PHISHING_DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`)
]);
'The domainset is specifically designed for anti-phishing'
])
.appendDataSource(PHISHING_HOSTS_EXTRA.map(host => host[0]))
.appendDataSource(PHISHING_DOMAIN_LISTS_EXTRA.map(domainList => domainList[0]));
const rejectNonIpRulesetOutput = new RulesetOutput(span, 'reject', 'non_ip')
.withTitle('Sukka\'s Ruleset - Reject Non-IP')
@ -94,13 +84,10 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
.withDescription([
...SHARED_DESCRIPTION,
'',
'This file contains known addresses that are hijacking NXDOMAIN results returned by DNS servers, and botnet controller IPs.',
'',
'Data from:',
' - https://github.com/felixonmars/dnsmasq-china-list',
' - https://github.com/curbengh/botnet-filter',
' - And other sources mentioned in /domainset/reject file'
'This file contains known addresses that are hijacking NXDOMAIN results returned by DNS servers, and botnet controller IPs.'
])
.appendDataSource('https://github.com/felixonmars/dnsmasq-china-list')
.appendDataSource('https://github.com/curbengh/botnet-filter')
.bulkAddIPASN(AUGUST_ASN)
.bulkAddIPASN(HUIZE_ASN);
@ -143,7 +130,13 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
rejectPhisingDomainsetOutput.addFromDomainset(getPhishingDomains(childSpan)),
adguardFiltersDownloads.map(
task => task(childSpan).then(({ whiteDomains, whiteDomainSuffixes, blackDomains, blackDomainSuffixes, blackIPs, blackWildcard, whiteKeyword, blackKeyword }) => {
task => task(childSpan).then(({
filterRulesUrl,
whiteDomains, whiteDomainSuffixes,
blackDomains, blackDomainSuffixes,
blackIPs, blackWildcard,
whiteKeyword, blackKeyword
}) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomains);
addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomainSuffixes, suffix => '.' + suffix);
@ -154,13 +147,22 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
rejectDomainsetOutput.bulkAddDomainKeyword(blackKeyword);
rejectDomainsetOutput.appendDataSource(filterRulesUrl);
rejectNonIpRulesetOutput.bulkAddDomainWildcard(blackWildcard);
rejectNonIpRulesetOutput.appendDataSource(filterRulesUrl);
rejectIPOutput.bulkAddAnyCIDR(blackIPs, false);
rejectIPOutput.appendDataSource(filterRulesUrl);
})
),
adguardFiltersExtraDownloads.map(
task => task(childSpan).then(({ whiteDomains, whiteDomainSuffixes, blackDomains, blackDomainSuffixes, blackIPs, blackWildcard, whiteKeyword, blackKeyword }) => {
task => task(childSpan).then(({
filterRulesUrl,
whiteDomains, whiteDomainSuffixes,
blackDomains, blackDomainSuffixes,
blackIPs, blackWildcard, whiteKeyword, blackKeyword
}) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomains);
addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomainSuffixes, suffix => '.' + suffix);
addArrayElementsToSet(filterRuleWhiteKeywords, whiteKeyword);
@ -170,9 +172,13 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
rejectExtraDomainsetOutput.bulkAddDomainKeyword(blackKeyword);
rejectExtraDomainsetOutput.appendDataSource(filterRulesUrl);
rejectIPOutput.bulkAddAnyCIDR(blackIPs, false);
rejectIPOutput.appendDataSource(filterRulesUrl);
rejectNonIpRulesetOutput.bulkAddDomainWildcard(blackWildcard);
rejectNonIpRulesetOutput.appendDataSource(filterRulesUrl);
})
),
adguardFiltersWhitelistsDownloads.map(

View File

@ -7,7 +7,7 @@ import type { TldTsParsed } from './normalize-domain';
const pool = new Worktank({
name: 'process-phishing-domains',
size: 1,
timeout: 10000, // The maximum number of milliseconds to wait for the result from the worker, if exceeded the worker is terminated and the execution promise rejects
timeout: 20000, // The maximum number of milliseconds to wait for the result from the worker, if exceeded the worker is terminated and the execution promise rejects
warmup: true,
autoterminate: 30000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed
env: {},

View File

@ -45,7 +45,9 @@ export function processFilterRulesWithPreload(
| 'whiteKeyword'
| 'blackKeyword',
string[]
>
> & {
filterRulesUrl: string
}
>(`process filter rules: ${filterRulesUrl}`, async (span) => {
const filterRules = await span.traceChildPromise('download', downloadPromise);
@ -140,6 +142,7 @@ export function processFilterRulesWithPreload(
);
return {
filterRulesUrl,
whiteDomains: Array.from(whiteDomains),
whiteDomainSuffixes: Array.from(whiteDomainSuffixes),
blackDomains: Array.from(blackDomains),

View File

@ -2,6 +2,7 @@ import type { Span } from '../../trace';
import { HostnameSmolTrie } from '../trie';
import { not, nullthrow } from 'foxts/guard';
import { fastIpVersion } from 'foxts/fast-ip-version';
import { addArrayElementsToSet } from 'foxts/add-array-elements-to-set';
import type { MaybePromise } from '../misc';
import type { BaseWriteStrategy } from '../writing-strategy/base';
import { merge as mergeCidr } from 'fast-cidr-tools';
@ -16,6 +17,8 @@ import { SurgeMitmSgmodule } from '../writing-strategy/surge';
export class FileOutput {
protected strategies: BaseWriteStrategy[] = [];
protected dataSource = new Set<string>();
public domainTrie = new HostnameSmolTrie(null);
public wildcardTrie: HostnameSmolTrie = new HostnameSmolTrie(null);
@ -77,9 +80,9 @@ export class FileOutput {
this.strategies.push(strategy);
}
protected description: string[] | readonly string[] | null = null;
protected description: string[] | null = null;
withDescription(description: string[] | readonly string[]) {
this.description = description;
this.description = description as string[];
return this;
}
@ -314,6 +317,19 @@ export class FileOutput {
return this;
}
/**
* Add data source information. This will be rendered inside description
*/
appendDataSource(source: string | string[]) {
if (typeof source === 'string') {
this.dataSource.add(source);
} else {
addArrayElementsToSet(this.dataSource, source);
}
return this;
}
async done() {
await this.pendingPromise;
this.pendingPromise = null;
@ -503,15 +519,26 @@ export class FileOutput {
return childSpan.traceChildAsync('output to disk', (childSpan) => {
const promises: Array<Promise<void> | void> = [];
const descriptions = nullthrow(this.description, 'Missing description');
if (this.dataSource.size) {
descriptions.push(
'',
'This file contains data from:'
);
appendArrayInPlace(descriptions, Array.from(this.dataSource).sort().map((source) => ` - ${source}`));
}
for (let i = 0, len = this.strategies.length; i < len; i++) {
const strategy = this.strategies[i];
const basename = (strategy.overwriteFilename || this.id) + '.' + strategy.fileExtension;
promises.push(
childSpan.traceChildAsync('write ' + strategy.name, (childSpan) => Promise.resolve(strategy.output(
childSpan,
nullthrow(this.title, 'Missing title'),
nullthrow(this.description, 'Missing description'),
descriptions,
this.date,
path.join(
strategy.outputDir,