Refactor: adapt new output

This commit is contained in:
SukkaW 2024-09-21 03:36:44 +08:00
parent eb2023b9aa
commit 4808ed8d27
7 changed files with 204 additions and 269 deletions

View File

@ -3,14 +3,15 @@ import path from 'node:path';
import { DOMESTICS } from '../Source/non_ip/domestic'; import { DOMESTICS } from '../Source/non_ip/domestic';
import { DIRECTS, LANS } from '../Source/non_ip/direct'; import { DIRECTS, LANS } from '../Source/non_ip/direct';
import { readFileIntoProcessedArray } from './lib/fetch-text-by-line'; import { readFileIntoProcessedArray } from './lib/fetch-text-by-line';
import { compareAndWriteFile, createRuleset } from './lib/create-file'; import { compareAndWriteFile } from './lib/create-file';
import { task } from './trace'; import { task } from './trace';
import { SHARED_DESCRIPTION } from './lib/constants'; import { SHARED_DESCRIPTION } from './lib/constants';
import { createMemoizedPromise } from './lib/memo-promise'; import { createMemoizedPromise } from './lib/memo-promise';
import * as yaml from 'yaml'; import * as yaml from 'yaml';
import { appendArrayInPlace } from './lib/append-array-in-place'; import { appendArrayInPlace } from './lib/append-array-in-place';
import { output, writeFile } from './lib/misc'; import { writeFile } from './lib/misc';
import { OUTPUT_INTERNAL_DIR, OUTPUT_MODULES_DIR, SOURCE_DIR } from './constants/dir'; import { OUTPUT_INTERNAL_DIR, OUTPUT_MODULES_DIR, SOURCE_DIR } from './constants/dir';
import { RulesetOutput } from './lib/create-file-new';
export const getDomesticAndDirectDomainsRulesetPromise = createMemoizedPromise(async () => { export const getDomesticAndDirectDomainsRulesetPromise = createMemoizedPromise(async () => {
const domestics = await readFileIntoProcessedArray(path.join(SOURCE_DIR, 'non_ip/domestic.conf')); const domestics = await readFileIntoProcessedArray(path.join(SOURCE_DIR, 'non_ip/domestic.conf'));
@ -38,45 +39,33 @@ export const buildDomesticRuleset = task(require.main === module, __filename)(as
appendArrayInPlace(dataset, Object.entries(LANS)); appendArrayInPlace(dataset, Object.entries(LANS));
return Promise.all([ return Promise.all([
createRuleset( new RulesetOutput(span, 'domestic', 'non_ip')
span, .withTitle('Sukka\'s Ruleset - Domestic Domains')
'Sukka\'s Ruleset - Domestic Domains', .withDescription([
[
...SHARED_DESCRIPTION, ...SHARED_DESCRIPTION,
'', '',
'This file contains known addresses that are avaliable in the Mainland China.' 'This file contains known addresses that are avaliable in the Mainland China.'
], ])
new Date(), .addFromRuleset(res[0])
res[0], .write(),
'ruleset', new RulesetOutput(span, 'direct', 'non_ip')
output('domestic', 'non_ip') .withTitle('Sukka\'s Ruleset - Direct Rules')
), .withDescription([
createRuleset(
span,
'Sukka\'s Ruleset - Direct Rules',
[
...SHARED_DESCRIPTION, ...SHARED_DESCRIPTION,
'', '',
'This file contains domains and process that should not be proxied.' 'This file contains domains and process that should not be proxied.'
], ])
new Date(), .addFromRuleset(res[1])
res[1], .write(),
'ruleset', new RulesetOutput(span, 'lan', 'non_ip')
output('direct', 'non_ip') .withTitle('Sukka\'s Ruleset - LAN')
), .withDescription([
createRuleset(
span,
'Sukka\'s Ruleset - LAN',
[
...SHARED_DESCRIPTION, ...SHARED_DESCRIPTION,
'', '',
'This file includes rules for LAN DOMAIN and reserved TLDs.' 'This file includes rules for LAN DOMAIN and reserved TLDs.'
], ])
new Date(), .addFromRuleset(res[2])
res[2], .write(),
'ruleset',
output('lan', 'non_ip')
),
compareAndWriteFile( compareAndWriteFile(
span, span,
[ [

View File

@ -1,12 +1,10 @@
import { task } from './trace'; import { task } from './trace';
import { createRuleset } from './lib/create-file';
import { fetchRemoteTextByLine } from './lib/fetch-text-by-line'; import { fetchRemoteTextByLine } from './lib/fetch-text-by-line';
import { createTrie } from './lib/trie'; import { createTrie } from './lib/trie';
import { SHARED_DESCRIPTION } from './lib/constants'; import { SHARED_DESCRIPTION } from './lib/constants';
import { createMemoizedPromise } from './lib/memo-promise'; import { createMemoizedPromise } from './lib/memo-promise';
import { extractDomainsFromFelixDnsmasq } from './lib/parse-dnsmasq'; import { extractDomainsFromFelixDnsmasq } from './lib/parse-dnsmasq';
import { sortDomains } from './lib/stable-sort-domain'; import { RulesetOutput } from './lib/create-file-new';
import { output } from './lib/misc';
const PROBE_DOMAINS = ['.microsoft.com', '.windows.net', '.windows.com', '.windowsupdate.com', '.windowssearch.com', '.office.net']; const PROBE_DOMAINS = ['.microsoft.com', '.windows.net', '.windows.com', '.windowsupdate.com', '.windowssearch.com', '.office.net'];
@ -39,7 +37,7 @@ export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => {
const trie2 = createTrie(foundMicrosoftCdnDomains, true); const trie2 = createTrie(foundMicrosoftCdnDomains, true);
BLACKLIST.forEach(trie2.whitelist); BLACKLIST.forEach(trie2.whitelist);
return sortDomains(trie2.dump()) return trie2.dump()
.map(d => `DOMAIN-SUFFIX,${d}`) .map(d => `DOMAIN-SUFFIX,${d}`)
.concat(WHITELIST); .concat(WHITELIST);
}); });
@ -56,13 +54,9 @@ export const buildMicrosoftCdn = task(require.main === module, __filename)(async
const res: string[] = await span.traceChildPromise('get microsoft cdn domains', getMicrosoftCdnRulesetPromise()); const res: string[] = await span.traceChildPromise('get microsoft cdn domains', getMicrosoftCdnRulesetPromise());
return createRuleset( return new RulesetOutput(span, 'microsoft_cdn', 'non_ip')
span, .withTitle('Sukka\'s Ruleset - Microsoft CDN')
'Sukka\'s Ruleset - Microsoft CDN', .withDescription(description)
description, .addFromRuleset(res)
new Date(), .write();
res,
'ruleset',
output('microsoft_cdn', 'non_ip')
);
}); });

View File

@ -3,13 +3,10 @@ import path from 'node:path';
import process from 'node:process'; import process from 'node:process';
import { processHosts, processFilterRules, processDomainLists } from './lib/parse-filter'; import { processHosts, processFilterRules, processDomainLists } from './lib/parse-filter';
import { createTrie } from './lib/trie';
import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, DOMAIN_LISTS, HOSTS_EXTRA, DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_EXTRA, PHISHING_DOMAIN_LISTS_EXTRA } from './constants/reject-data-source'; import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, DOMAIN_LISTS, HOSTS_EXTRA, DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_EXTRA, PHISHING_DOMAIN_LISTS_EXTRA } from './constants/reject-data-source';
import { createRuleset, compareAndWriteFile } from './lib/create-file'; import { compareAndWriteFile } from './lib/create-file';
import createKeywordFilter from './lib/aho-corasick';
import { readFileByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line'; import { readFileByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
import { buildParseDomainMap, sortDomains } from './lib/stable-sort-domain';
import { task } from './trace'; import { task } from './trace';
// tldts-experimental is way faster than tldts, but very little bit inaccurate // tldts-experimental is way faster than tldts, but very little bit inaccurate
// (since it is hashes based). But the result is still deterministic, which is // (since it is hashes based). But the result is still deterministic, which is
@ -17,23 +14,47 @@ import { task } from './trace';
import { SHARED_DESCRIPTION } from './lib/constants'; import { SHARED_DESCRIPTION } from './lib/constants';
import { getPhishingDomains } from './lib/get-phishing-domains'; import { getPhishingDomains } from './lib/get-phishing-domains';
import { setAddFromArray, setAddFromArrayCurried } from './lib/set-add-from-array'; import { setAddFromArray } from './lib/set-add-from-array';
import { output } from './lib/misc';
import { appendArrayInPlace } from './lib/append-array-in-place'; import { appendArrayInPlace } from './lib/append-array-in-place';
import { OUTPUT_INTERNAL_DIR, SOURCE_DIR } from './constants/dir'; import { OUTPUT_INTERNAL_DIR, SOURCE_DIR } from './constants/dir';
import { DomainsetOutput } from './lib/create-file-new';
const getRejectSukkaConfPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/reject_sukka.conf')); const getRejectSukkaConfPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/reject_sukka.conf'));
export const buildRejectDomainSet = task(require.main === module, __filename)(async (span) => { export const buildRejectDomainSet = task(require.main === module, __filename)(async (span) => {
const rejectOutput = new DomainsetOutput(span, 'reject')
.withTitle('Sukka\'s Ruleset - Reject Base')
.withDescription([
...SHARED_DESCRIPTION,
'',
'The domainset supports AD blocking, tracking protection, privacy protection, anti-phishing, anti-mining',
'',
'Build from:',
...HOSTS.map(host => ` - ${host[0]}`),
...DOMAIN_LISTS.map(domainList => ` - ${domainList[0]}`),
...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`)
]);
const rejectExtraOutput = new DomainsetOutput(span, 'reject_extra')
.withTitle('Sukka\'s Ruleset - Reject Extra')
.withDescription([
...SHARED_DESCRIPTION,
'',
'The domainset supports AD blocking, tracking protection, privacy protection, anti-phishing, anti-mining',
'',
'Build from:',
...HOSTS_EXTRA.map(host => ` - ${host[0]}`),
...DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`),
...ADGUARD_FILTERS_EXTRA.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`),
...PHISHING_DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`)
]);
const appendArrayToRejectOutput = rejectOutput.addFromDomainset.bind(rejectOutput);
const appendArrayToRejectExtraOutput = rejectExtraOutput.addFromDomainset.bind(rejectExtraOutput);
/** Whitelists */ /** Whitelists */
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST); const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
const domainSets = new Set<string>();
const appendArrayToDomainSets = setAddFromArrayCurried(domainSets);
const domainSetsExtra = new Set<string>();
const appendArrayToDomainSetsExtra = setAddFromArrayCurried(domainSetsExtra);
// Parse from AdGuard Filters // Parse from AdGuard Filters
const shouldStop = await span const shouldStop = await span
.traceChild('download and process hosts / adblock filter rules') .traceChild('download and process hosts / adblock filter rules')
@ -42,11 +63,11 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
let shouldStop = false; let shouldStop = false;
await Promise.all([ await Promise.all([
// Parse from remote hosts & domain lists // Parse from remote hosts & domain lists
HOSTS.map(entry => processHosts(childSpan, ...entry).then(appendArrayToDomainSets)), HOSTS.map(entry => processHosts(childSpan, ...entry).then(appendArrayToRejectOutput)),
HOSTS_EXTRA.map(entry => processHosts(childSpan, ...entry).then(appendArrayToDomainSetsExtra)), HOSTS_EXTRA.map(entry => processHosts(childSpan, ...entry).then(appendArrayToRejectExtraOutput)),
DOMAIN_LISTS.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToDomainSets)), DOMAIN_LISTS.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToRejectOutput)),
DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToDomainSetsExtra)), DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToRejectExtraOutput)),
ADGUARD_FILTERS.map( ADGUARD_FILTERS.map(
entry => processFilterRules(childSpan, ...entry) entry => processFilterRules(childSpan, ...entry)
@ -57,7 +78,7 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
// we should not break here, as we want to see full matches from all data source // we should not break here, as we want to see full matches from all data source
} }
setAddFromArray(filterRuleWhitelistDomainSets, white); setAddFromArray(filterRuleWhitelistDomainSets, white);
setAddFromArray(domainSets, black); appendArrayToRejectOutput(black);
}) })
), ),
ADGUARD_FILTERS_EXTRA.map( ADGUARD_FILTERS_EXTRA.map(
@ -69,7 +90,7 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
// we should not break here, as we want to see full matches from all data source // we should not break here, as we want to see full matches from all data source
} }
setAddFromArray(filterRuleWhitelistDomainSets, white); setAddFromArray(filterRuleWhitelistDomainSets, white);
setAddFromArray(domainSetsExtra, black); appendArrayToRejectExtraOutput(black);
}) })
), ),
@ -82,8 +103,8 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
setAddFromArray(filterRuleWhitelistDomainSets, black); setAddFromArray(filterRuleWhitelistDomainSets, black);
}) })
)), )),
getPhishingDomains(childSpan).then(appendArrayToDomainSetsExtra), getPhishingDomains(childSpan).then(appendArrayToRejectExtraOutput),
getRejectSukkaConfPromise.then(appendArrayToDomainSets) getRejectSukkaConfPromise.then(appendArrayToRejectOutput)
].flat()); ].flat());
// eslint-disable-next-line sukka/no-single-return -- not single return // eslint-disable-next-line sukka/no-single-return -- not single return
return shouldStop; return shouldStop;
@ -93,72 +114,23 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
process.exit(1); process.exit(1);
} }
console.log(`Import ${domainSets.size} + ${domainSetsExtra.size} rules from Hosts / AdBlock Filter Rules & reject_sukka.conf!`);
// Dedupe domainSets // Dedupe domainSets
const domainKeywordsSet = await span.traceChildAsync('collect black keywords/suffixes', async () => { await span.traceChildAsync('collect black keywords/suffixes', async () => {
/** Collect DOMAIN-KEYWORD from non_ip/reject.conf for deduplication */ /** Collect DOMAIN-KEYWORD from non_ip/reject.conf for deduplication */
const domainKeywordsSet = new Set<string>();
for await (const line of readFileByLine(path.resolve(__dirname, '../Source/non_ip/reject.conf'))) { for await (const line of readFileByLine(path.resolve(__dirname, '../Source/non_ip/reject.conf'))) {
const [type, value] = line.split(','); const [type, value] = line.split(',');
if (type === 'DOMAIN-KEYWORD') { if (type === 'DOMAIN-KEYWORD') {
domainKeywordsSet.add(value); rejectOutput.addDomainKeyword(value); // Add for later deduplication
rejectExtraOutput.addDomainKeyword(value); // Add for later deduplication
} else if (type === 'DOMAIN-SUFFIX') { } else if (type === 'DOMAIN-SUFFIX') {
domainSets.add('.' + value); // Add to domainSets for later deduplication rejectOutput.addDomainSuffix(value); // Add for later deduplication
}
}
return domainKeywordsSet;
});
const [baseTrie, extraTrie] = span.traceChildSync('create smol trie while deduping black keywords', (childSpan) => {
const baseTrie = createTrie(null, true);
const extraTrie = createTrie(null, true);
const kwfilter = createKeywordFilter(domainKeywordsSet);
childSpan.traceChildSync('add items to trie (extra)', () => {
for (const domain of domainSetsExtra) {
// exclude keyword when creating trie
if (!kwfilter(domain)) {
extraTrie.add(domain);
} }
} }
}); });
childSpan.traceChildSync('add items to trie (base) + dedupe extra trie', () => { rejectOutput.calcDomainMap();
for (const domain of domainSets) { rejectExtraOutput.calcDomainMap();
// exclude keyword when creating trie
if (!kwfilter(domain)) {
baseTrie.add(domain);
extraTrie.whitelist(domain);
}
}
});
return [baseTrie, extraTrie] as const;
});
span.traceChildSync('dedupe from white suffixes (base)', () => filterRuleWhitelistDomainSets.forEach(baseTrie.whitelist));
span.traceChildSync('dedupe from white suffixes and base (extra)', () => {
filterRuleWhitelistDomainSets.forEach(extraTrie.whitelist);
});
// Dedupe domainSets
const dedupedDominArray = span.traceChildSync('dedupe from covered subdomain (base)', () => baseTrie.dump());
const dudupedDominArrayExtra = span.traceChildSync('dedupe from covered subdomain (extra)', () => extraTrie.dump());
console.log(`Final size ${dedupedDominArray.length} + ${dudupedDominArrayExtra.length}`);
const {
domainMap: domainArrayMainDomainMap,
subdomainMap: domainArraySubdomainMap
} = span.traceChildSync(
'build map for stat and sort',
() => buildParseDomainMap(dedupedDominArray.concat(dudupedDominArrayExtra))
);
// Create reject stats // Create reject stats
const rejectDomainsStats: string[] = span const rejectDomainsStats: string[] = span
@ -166,50 +138,15 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
.traceSyncFn(() => { .traceSyncFn(() => {
const results = []; const results = [];
results.push('=== base ==='); results.push('=== base ===');
appendArrayInPlace(results, getStatMap(dedupedDominArray, domainArrayMainDomainMap)); appendArrayInPlace(results, rejectOutput.getStatMap());
results.push('=== extra ==='); results.push('=== extra ===');
appendArrayInPlace(results, getStatMap(dudupedDominArrayExtra, domainArrayMainDomainMap)); appendArrayInPlace(results, rejectExtraOutput.getStatMap());
return results; return results;
}); });
return Promise.all([ return Promise.all([
createRuleset( rejectOutput.write(),
span, rejectExtraOutput.write(),
'Sukka\'s Ruleset - Reject Base',
[
...SHARED_DESCRIPTION,
'',
'The domainset supports AD blocking, tracking protection, privacy protection, anti-phishing, anti-mining',
'',
'Build from:',
...HOSTS.map(host => ` - ${host[0]}`),
...DOMAIN_LISTS.map(domainList => ` - ${domainList[0]}`),
...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`)
],
new Date(),
span.traceChildSync('sort reject domainset (base)', () => sortDomains(dedupedDominArray, domainArrayMainDomainMap, domainArraySubdomainMap)),
'domainset',
output('reject', 'domainset')
),
createRuleset(
span,
'Sukka\'s Ruleset - Reject Extra',
[
...SHARED_DESCRIPTION,
'',
'The domainset supports AD blocking, tracking protection, privacy protection, anti-phishing, anti-mining',
'',
'Build from:',
...HOSTS_EXTRA.map(host => ` - ${host[0]}`),
...DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`),
...ADGUARD_FILTERS_EXTRA.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`),
...PHISHING_DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`)
],
new Date(),
span.traceChildSync('sort reject domainset (extra)', () => sortDomains(dudupedDominArrayExtra, domainArrayMainDomainMap, domainArraySubdomainMap)),
'domainset',
output('reject_extra', 'domainset')
),
compareAndWriteFile( compareAndWriteFile(
span, span,
rejectDomainsStats, rejectDomainsStats,
@ -217,22 +154,3 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
) )
]); ]);
}); });
function getStatMap(domains: string[], domainArrayMainDomainMap: Map<string, string>): string[] {
return Array.from(
(
domains.reduce<Map<string, number>>((acc, cur) => {
const suffix = domainArrayMainDomainMap.get(cur);
if (suffix) {
acc.set(suffix, (acc.get(suffix) ?? 0) + 1);
}
return acc;
}, new Map())
).entries()
)
.filter(a => a[1] > 9)
.sort(
(a, b) => (b[1] - a[1]) || a[0].localeCompare(b[0])
)
.map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`);
};

View File

@ -1,40 +1,40 @@
// @ts-check // @ts-check
import path from 'node:path'; import path from 'node:path';
import { createRuleset } from './lib/create-file';
import { fetchRemoteTextByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line'; import { fetchRemoteTextByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
import { task } from './trace'; import { task } from './trace';
import { SHARED_DESCRIPTION } from './lib/constants'; import { SHARED_DESCRIPTION } from './lib/constants';
import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip'; import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip';
import { TTL, deserializeArray, fsFetchCache, serializeArray, createCacheKey } from './lib/cache-filesystem'; import { TTL, fsFetchCache, createCacheKey } from './lib/cache-filesystem';
import { fetchAssets } from './lib/fetch-assets'; import { fetchAssets } from './lib/fetch-assets';
import { processLine } from './lib/process-line'; import { processLine } from './lib/process-line';
import { appendArrayInPlace } from './lib/append-array-in-place'; import { RulesetOutput } from './lib/create-file-new';
import { output } from './lib/misc'; import { SOURCE_DIR } from './constants/dir';
const cacheKey = createCacheKey(__filename); const cacheKey = createCacheKey(__filename);
const BOGUS_NXDOMAIN_URL = 'https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf'; const BOGUS_NXDOMAIN_URL = 'https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf';
const getBogusNxDomainIPsPromise = fsFetchCache.apply( const getBogusNxDomainIPsPromise = fsFetchCache.apply<[ipv4: string[], ipv6: string[]]>(
cacheKey(BOGUS_NXDOMAIN_URL), cacheKey(BOGUS_NXDOMAIN_URL),
async () => { async () => {
const result: string[] = []; const ipv4: string[] = [];
const ipv6: string[] = [];
for await (const line of await fetchRemoteTextByLine(BOGUS_NXDOMAIN_URL)) { for await (const line of await fetchRemoteTextByLine(BOGUS_NXDOMAIN_URL)) {
if (line.startsWith('bogus-nxdomain=')) { if (line.startsWith('bogus-nxdomain=')) {
const ip = line.slice(15).trim(); const ip = line.slice(15).trim();
if (isProbablyIpv4(ip)) { if (isProbablyIpv4(ip)) {
result.push(`IP-CIDR,${ip}/32,no-resolve`); ipv4.push(ip);
} else if (isProbablyIpv6(ip)) { } else if (isProbablyIpv6(ip)) {
result.push(`IP-CIDR6,${ip}/128,no-resolve`); ipv6.push(ip);
} }
} }
} }
return result; return [ipv4, ipv6] as const;
}, },
{ {
ttl: TTL.ONE_WEEK(), ttl: TTL.ONE_WEEK(),
serializer: serializeArray, serializer: JSON.stringify,
deserializer: deserializeArray deserializer: JSON.parse
} }
); );
@ -45,46 +45,38 @@ const BOTNET_FILTER_MIRROR_URL = [
'https://malware-filter.pages.dev/botnet-filter-dnscrypt-blocked-ips.txt' 'https://malware-filter.pages.dev/botnet-filter-dnscrypt-blocked-ips.txt'
]; ];
const getBotNetFilterIPsPromise = fsFetchCache.apply( const getBotNetFilterIPsPromise = fsFetchCache.apply<[ipv4: string[], ipv6: string[]]>(
cacheKey(BOTNET_FILTER_URL), cacheKey(BOTNET_FILTER_URL),
async () => { async () => {
const text = await fetchAssets(BOTNET_FILTER_URL, BOTNET_FILTER_MIRROR_URL); const text = await fetchAssets(BOTNET_FILTER_URL, BOTNET_FILTER_MIRROR_URL);
return text.split('\n').reduce<string[]>((acc, cur) => { return text.split('\n').reduce<[ipv4: string[], ipv6: string[]]>((acc, cur) => {
const ip = processLine(cur); const ip = processLine(cur);
if (ip) { if (ip) {
if (isProbablyIpv4(ip)) { if (isProbablyIpv4(ip)) {
acc.push(`IP-CIDR,${ip}/32,no-resolve`); acc[0].push(ip);
} else if (isProbablyIpv6(ip)) { } else if (isProbablyIpv6(ip)) {
acc.push(`IP-CIDR6,${ip}/128,no-resolve`); acc[1].push(ip);
} }
} }
return acc; return acc;
}, []); }, [[], []]);
}, },
{ {
ttl: TTL.TWLVE_HOURS(), ttl: TTL.TWLVE_HOURS(),
serializer: serializeArray, serializer: JSON.stringify,
deserializer: deserializeArray deserializer: JSON.parse
} }
); );
const localRejectIPSourcesPromise = readFileIntoProcessedArray(path.resolve(__dirname, '../Source/ip/reject.conf'));
export const buildRejectIPList = task(require.main === module, __filename)(async (span) => { export const buildRejectIPList = task(require.main === module, __filename)(async (span) => {
const result = await localRejectIPSourcesPromise; const [bogusNxDomainIPs, botNetIPs] = await Promise.all([
const results = await Promise.all([
span.traceChildPromise('get bogus nxdomain ips', getBogusNxDomainIPsPromise), span.traceChildPromise('get bogus nxdomain ips', getBogusNxDomainIPsPromise),
span.traceChildPromise('get botnet ips', getBotNetFilterIPsPromise) span.traceChildPromise('get botnet ips', getBotNetFilterIPsPromise)
]); ]);
const bogusNxDomainIPs = results[0]; return new RulesetOutput(span, 'reject', 'ip')
const botNetIPs = results[1]; .withTitle('Sukka\'s Ruleset - Anti Bogus Domain')
.withDescription([
appendArrayInPlace(result, bogusNxDomainIPs);
appendArrayInPlace(result, botNetIPs);
const description = [
...SHARED_DESCRIPTION, ...SHARED_DESCRIPTION,
'', '',
'This file contains known addresses that are hijacking NXDOMAIN results returned by DNS servers, and botnet controller IPs.', 'This file contains known addresses that are hijacking NXDOMAIN results returned by DNS servers, and botnet controller IPs.',
@ -92,15 +84,11 @@ export const buildRejectIPList = task(require.main === module, __filename)(async
'Data from:', 'Data from:',
' - https://github.com/felixonmars/dnsmasq-china-list', ' - https://github.com/felixonmars/dnsmasq-china-list',
' - https://github.com/curbengh/botnet-filter' ' - https://github.com/curbengh/botnet-filter'
]; ])
.addFromRuleset(await readFileIntoProcessedArray(path.resolve(SOURCE_DIR, 'ip/reject.conf')))
return createRuleset( .bulkAddCIDR4NoResolve(bogusNxDomainIPs[0])
span, .bulkAddCIDR6NoResolve(bogusNxDomainIPs[1])
'Sukka\'s Ruleset - Anti Bogus Domain', .bulkAddCIDR4NoResolve(botNetIPs[0])
description, .bulkAddCIDR6NoResolve(botNetIPs[1])
new Date(), .write();
result,
'ruleset',
output('reject', 'ip')
);
}); });

View File

@ -2,49 +2,33 @@
import type { Span } from './trace'; import type { Span } from './trace';
import { task } from './trace'; import { task } from './trace';
import { createRuleset } from './lib/create-file';
import { ALL, NORTH_AMERICA, EU, HK, TW, JP, KR } from '../Source/stream'; import { ALL, NORTH_AMERICA, EU, HK, TW, JP, KR } from '../Source/stream';
import { SHARED_DESCRIPTION } from './lib/constants'; import { SHARED_DESCRIPTION } from './lib/constants';
import { output } from './lib/misc'; import { RulesetOutput } from './lib/create-file-new';
export const createRulesetForStreamService = (span: Span, fileId: string, title: string, streamServices: Array<import('../Source/stream').StreamService>) => { export const createRulesetForStreamService = (span: Span, fileId: string, title: string, streamServices: Array<import('../Source/stream').StreamService>) => {
return span.traceChildAsync(fileId, async (childSpan) => Promise.all([ return span.traceChildAsync(fileId, async (childSpan) => Promise.all([
// Domains // Domains
createRuleset( new RulesetOutput(childSpan, fileId, 'non_ip')
childSpan, .withTitle(`Sukka's Ruleset - Stream Services: ${title}`)
`Sukka's Ruleset - Stream Services: ${title}`, .withDescription([
[
...SHARED_DESCRIPTION, ...SHARED_DESCRIPTION,
'', '',
...streamServices.map((i) => `- ${i.name}`) ...streamServices.map((i) => `- ${i.name}`)
], ])
new Date(), .addFromRuleset(streamServices.flatMap((i) => i.rules))
streamServices.flatMap((i) => i.rules), .write(),
'ruleset',
output(fileId, 'non_ip')
),
// IP // IP
createRuleset( new RulesetOutput(childSpan, fileId, 'ip')
childSpan, .withTitle(`Sukka's Ruleset - Stream Services IPs: ${title}`)
`Sukka's Ruleset - Stream Services' IPs: ${title}`, .withDescription([
[
...SHARED_DESCRIPTION, ...SHARED_DESCRIPTION,
'', '',
...streamServices.map((i) => `- ${i.name}`) ...streamServices.map((i) => `- ${i.name}`)
], ])
new Date(), .bulkAddCIDR4NoResolve(streamServices.flatMap(i => i.ip?.v4 ?? []))
streamServices.flatMap((i) => ( .bulkAddCIDR6NoResolve(streamServices.flatMap(i => i.ip?.v6 ?? []))
i.ip .write()
? [
...i.ip.v4.map((ip) => `IP-CIDR,${ip},no-resolve`),
...i.ip.v6.map((ip) => `IP-CIDR6,${ip},no-resolve`)
]
: []
)),
'ruleset',
output(fileId, 'ip')
)
])); ]));
}; };

View File

@ -3,11 +3,10 @@ import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry';
import { createReadlineInterfaceFromResponse } from './lib/fetch-text-by-line'; import { createReadlineInterfaceFromResponse } from './lib/fetch-text-by-line';
import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip'; import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip';
import { processLine } from './lib/process-line'; import { processLine } from './lib/process-line';
import { createRuleset } from './lib/create-file';
import { task } from './trace'; import { task } from './trace';
import { SHARED_DESCRIPTION } from './lib/constants'; import { SHARED_DESCRIPTION } from './lib/constants';
import { createMemoizedPromise } from './lib/memo-promise'; import { createMemoizedPromise } from './lib/memo-promise';
import { output } from './lib/misc'; import { RulesetOutput } from './lib/create-file-new';
export const getTelegramCIDRPromise = createMemoizedPromise(async () => { export const getTelegramCIDRPromise = createMemoizedPromise(async () => {
const resp = await fetchWithRetry('https://core.telegram.org/resources/cidr.txt', defaultRequestInit); const resp = await fetchWithRetry('https://core.telegram.org/resources/cidr.txt', defaultRequestInit);
@ -45,13 +44,10 @@ export const buildTelegramCIDR = task(require.main === module, __filename)(async
' - https://core.telegram.org/resources/cidr.txt' ' - https://core.telegram.org/resources/cidr.txt'
]; ];
return createRuleset( return new RulesetOutput(span, 'telegram', 'ip')
span, .withTitle('Sukka\'s Ruleset - Telegram IP CIDR')
'Sukka\'s Ruleset - Telegram IP CIDR', .withDescription(description)
description, .withDate(date)
date, .addFromRuleset(results)
results, .write();
'ruleset',
output('telegram', 'ip')
);
}); });

View File

@ -4,12 +4,14 @@ import type { Span } from '../trace';
import { surgeDomainsetToClashDomainset, surgeRulesetToClashClassicalTextRuleset } from './clash'; import { surgeDomainsetToClashDomainset, surgeRulesetToClashClassicalTextRuleset } from './clash';
import { compareAndWriteFile, defaultSortTypeOrder, sortTypeOrder, withBannerArray } from './create-file'; import { compareAndWriteFile, defaultSortTypeOrder, sortTypeOrder, withBannerArray } from './create-file';
import { ipCidrListToSingbox, surgeDomainsetToSingbox, surgeRulesetToSingbox } from './singbox'; import { ipCidrListToSingbox, surgeDomainsetToSingbox, surgeRulesetToSingbox } from './singbox';
import { sortDomains } from './stable-sort-domain'; import { buildParseDomainMap, sortDomains } from './stable-sort-domain';
import { createTrie } from './trie'; import { createTrie } from './trie';
import { invariant } from 'foxact/invariant'; import { invariant } from 'foxact/invariant';
import { OUTPUT_CLASH_DIR, OUTPUT_SINGBOX_DIR, OUTPUT_SURGE_DIR } from '../constants/dir'; import { OUTPUT_CLASH_DIR, OUTPUT_SINGBOX_DIR, OUTPUT_SURGE_DIR } from '../constants/dir';
import stringify from 'json-stringify-pretty-compact'; import stringify from 'json-stringify-pretty-compact';
import { appendArrayInPlace } from './append-array-in-place'; import { appendArrayInPlace } from './append-array-in-place';
import { nullthrow } from 'foxact/nullthrow';
import createKeywordFilter from './aho-corasick';
abstract class RuleOutput { abstract class RuleOutput {
protected domainTrie = createTrie<unknown>(null, true); protected domainTrie = createTrie<unknown>(null, true);
@ -146,6 +148,13 @@ abstract class RuleOutput {
return this; return this;
} }
bulkAddCIDR4NoResolve(cidr: string[]) {
for (let i = 0, len = cidr.length; i < len; i++) {
this.ipcidrNoResolve.add(cidr[i]);
}
return this;
}
bulkAddCIDR6(cidr: string[]) { bulkAddCIDR6(cidr: string[]) {
for (let i = 0, len = cidr.length; i < len; i++) { for (let i = 0, len = cidr.length; i < len; i++) {
this.ipcidr6.add(cidr[i]); this.ipcidr6.add(cidr[i]);
@ -153,19 +162,54 @@ abstract class RuleOutput {
return this; return this;
} }
bulkAddCIDR6NoResolve(cidr: string[]) {
for (let i = 0, len = cidr.length; i < len; i++) {
this.ipcidr6NoResolve.add(cidr[i]);
}
return this;
}
abstract write(): Promise<void>; abstract write(): Promise<void>;
} }
export class DomainsetOutput extends RuleOutput { export class DomainsetOutput extends RuleOutput {
protected type = 'domainset' as const; protected type = 'domainset' as const;
private $dumped: string[] | null = null;
get dumped() {
if (!this.$dumped) {
const kwfilter = createKeywordFilter(this.domainKeywords);
const dumped = this.domainTrie.dump();
const set = new Set<string>(dumped);
for (let i = 0, len = dumped.length; i < len; i++) {
const domain = dumped[i];
if (kwfilter(domain)) {
set.delete(domain);
}
}
this.$dumped = Array.from(set);
}
return this.$dumped;
}
calcDomainMap() {
if (!this.apexDomainMap || !this.subDomainMap) {
const { domainMap, subdomainMap } = buildParseDomainMap(this.dumped);
this.apexDomainMap = domainMap;
this.subDomainMap = subdomainMap;
}
}
async write() { async write() {
await this.pendingPromise; await this.pendingPromise;
invariant(this.title, 'Missing title'); invariant(this.title, 'Missing title');
invariant(this.description, 'Missing description'); invariant(this.description, 'Missing description');
const sorted = sortDomains(this.domainTrie.dump(), this.apexDomainMap, this.subDomainMap); const sorted = sortDomains(this.dumped, this.apexDomainMap, this.subDomainMap);
sorted.push('this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'); sorted.push('this_ruleset_is_made_by_sukkaw.ruleset.skk.moe');
const surge = sorted; const surge = sorted;
@ -201,6 +245,28 @@ export class DomainsetOutput extends RuleOutput {
) )
]); ]);
} }
getStatMap() {
invariant(this.dumped, 'Non dumped yet');
invariant(this.apexDomainMap, 'Missing apex domain map');
return Array.from(
(
nullthrow(this.dumped, 'Non dumped yet').reduce<Map<string, number>>((acc, cur) => {
const suffix = this.apexDomainMap!.get(cur);
if (suffix) {
acc.set(suffix, (acc.get(suffix) ?? 0) + 1);
}
return acc;
}, new Map())
).entries()
)
.filter(a => a[1] > 9)
.sort(
(a, b) => (b[1] - a[1]) || a[0].localeCompare(b[0])
)
.map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`);
}
} }
export class IPListOutput extends RuleOutput { export class IPListOutput extends RuleOutput {