Refactor: adapt new output

This commit is contained in:
SukkaW 2024-09-21 03:36:44 +08:00
parent eb2023b9aa
commit 4808ed8d27
7 changed files with 204 additions and 269 deletions

View File

@ -3,14 +3,15 @@ import path from 'node:path';
import { DOMESTICS } from '../Source/non_ip/domestic';
import { DIRECTS, LANS } from '../Source/non_ip/direct';
import { readFileIntoProcessedArray } from './lib/fetch-text-by-line';
import { compareAndWriteFile, createRuleset } from './lib/create-file';
import { compareAndWriteFile } from './lib/create-file';
import { task } from './trace';
import { SHARED_DESCRIPTION } from './lib/constants';
import { createMemoizedPromise } from './lib/memo-promise';
import * as yaml from 'yaml';
import { appendArrayInPlace } from './lib/append-array-in-place';
import { output, writeFile } from './lib/misc';
import { writeFile } from './lib/misc';
import { OUTPUT_INTERNAL_DIR, OUTPUT_MODULES_DIR, SOURCE_DIR } from './constants/dir';
import { RulesetOutput } from './lib/create-file-new';
export const getDomesticAndDirectDomainsRulesetPromise = createMemoizedPromise(async () => {
const domestics = await readFileIntoProcessedArray(path.join(SOURCE_DIR, 'non_ip/domestic.conf'));
@ -38,45 +39,33 @@ export const buildDomesticRuleset = task(require.main === module, __filename)(as
appendArrayInPlace(dataset, Object.entries(LANS));
return Promise.all([
createRuleset(
span,
'Sukka\'s Ruleset - Domestic Domains',
[
new RulesetOutput(span, 'domestic', 'non_ip')
.withTitle('Sukka\'s Ruleset - Domestic Domains')
.withDescription([
...SHARED_DESCRIPTION,
'',
'This file contains known addresses that are avaliable in the Mainland China.'
],
new Date(),
res[0],
'ruleset',
output('domestic', 'non_ip')
),
createRuleset(
span,
'Sukka\'s Ruleset - Direct Rules',
[
])
.addFromRuleset(res[0])
.write(),
new RulesetOutput(span, 'direct', 'non_ip')
.withTitle('Sukka\'s Ruleset - Direct Rules')
.withDescription([
...SHARED_DESCRIPTION,
'',
'This file contains domains and process that should not be proxied.'
],
new Date(),
res[1],
'ruleset',
output('direct', 'non_ip')
),
createRuleset(
span,
'Sukka\'s Ruleset - LAN',
[
])
.addFromRuleset(res[1])
.write(),
new RulesetOutput(span, 'lan', 'non_ip')
.withTitle('Sukka\'s Ruleset - LAN')
.withDescription([
...SHARED_DESCRIPTION,
'',
'This file includes rules for LAN DOMAIN and reserved TLDs.'
],
new Date(),
res[2],
'ruleset',
output('lan', 'non_ip')
),
])
.addFromRuleset(res[2])
.write(),
compareAndWriteFile(
span,
[

View File

@ -1,12 +1,10 @@
import { task } from './trace';
import { createRuleset } from './lib/create-file';
import { fetchRemoteTextByLine } from './lib/fetch-text-by-line';
import { createTrie } from './lib/trie';
import { SHARED_DESCRIPTION } from './lib/constants';
import { createMemoizedPromise } from './lib/memo-promise';
import { extractDomainsFromFelixDnsmasq } from './lib/parse-dnsmasq';
import { sortDomains } from './lib/stable-sort-domain';
import { output } from './lib/misc';
import { RulesetOutput } from './lib/create-file-new';
const PROBE_DOMAINS = ['.microsoft.com', '.windows.net', '.windows.com', '.windowsupdate.com', '.windowssearch.com', '.office.net'];
@ -39,7 +37,7 @@ export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => {
const trie2 = createTrie(foundMicrosoftCdnDomains, true);
BLACKLIST.forEach(trie2.whitelist);
return sortDomains(trie2.dump())
return trie2.dump()
.map(d => `DOMAIN-SUFFIX,${d}`)
.concat(WHITELIST);
});
@ -56,13 +54,9 @@ export const buildMicrosoftCdn = task(require.main === module, __filename)(async
const res: string[] = await span.traceChildPromise('get microsoft cdn domains', getMicrosoftCdnRulesetPromise());
return createRuleset(
span,
'Sukka\'s Ruleset - Microsoft CDN',
description,
new Date(),
res,
'ruleset',
output('microsoft_cdn', 'non_ip')
);
return new RulesetOutput(span, 'microsoft_cdn', 'non_ip')
.withTitle('Sukka\'s Ruleset - Microsoft CDN')
.withDescription(description)
.addFromRuleset(res)
.write();
});

View File

@ -3,13 +3,10 @@ import path from 'node:path';
import process from 'node:process';
import { processHosts, processFilterRules, processDomainLists } from './lib/parse-filter';
import { createTrie } from './lib/trie';
import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, DOMAIN_LISTS, HOSTS_EXTRA, DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_EXTRA, PHISHING_DOMAIN_LISTS_EXTRA } from './constants/reject-data-source';
import { createRuleset, compareAndWriteFile } from './lib/create-file';
import createKeywordFilter from './lib/aho-corasick';
import { compareAndWriteFile } from './lib/create-file';
import { readFileByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
import { buildParseDomainMap, sortDomains } from './lib/stable-sort-domain';
import { task } from './trace';
// tldts-experimental is way faster than tldts, but very little bit inaccurate
// (since it is hashes based). But the result is still deterministic, which is
@ -17,23 +14,47 @@ import { task } from './trace';
import { SHARED_DESCRIPTION } from './lib/constants';
import { getPhishingDomains } from './lib/get-phishing-domains';
import { setAddFromArray, setAddFromArrayCurried } from './lib/set-add-from-array';
import { output } from './lib/misc';
import { setAddFromArray } from './lib/set-add-from-array';
import { appendArrayInPlace } from './lib/append-array-in-place';
import { OUTPUT_INTERNAL_DIR, SOURCE_DIR } from './constants/dir';
import { DomainsetOutput } from './lib/create-file-new';
const getRejectSukkaConfPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/reject_sukka.conf'));
export const buildRejectDomainSet = task(require.main === module, __filename)(async (span) => {
const rejectOutput = new DomainsetOutput(span, 'reject')
.withTitle('Sukka\'s Ruleset - Reject Base')
.withDescription([
...SHARED_DESCRIPTION,
'',
'The domainset supports AD blocking, tracking protection, privacy protection, anti-phishing, anti-mining',
'',
'Build from:',
...HOSTS.map(host => ` - ${host[0]}`),
...DOMAIN_LISTS.map(domainList => ` - ${domainList[0]}`),
...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`)
]);
const rejectExtraOutput = new DomainsetOutput(span, 'reject_extra')
.withTitle('Sukka\'s Ruleset - Reject Extra')
.withDescription([
...SHARED_DESCRIPTION,
'',
'The domainset supports AD blocking, tracking protection, privacy protection, anti-phishing, anti-mining',
'',
'Build from:',
...HOSTS_EXTRA.map(host => ` - ${host[0]}`),
...DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`),
...ADGUARD_FILTERS_EXTRA.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`),
...PHISHING_DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`)
]);
const appendArrayToRejectOutput = rejectOutput.addFromDomainset.bind(rejectOutput);
const appendArrayToRejectExtraOutput = rejectExtraOutput.addFromDomainset.bind(rejectExtraOutput);
/** Whitelists */
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
const domainSets = new Set<string>();
const appendArrayToDomainSets = setAddFromArrayCurried(domainSets);
const domainSetsExtra = new Set<string>();
const appendArrayToDomainSetsExtra = setAddFromArrayCurried(domainSetsExtra);
// Parse from AdGuard Filters
const shouldStop = await span
.traceChild('download and process hosts / adblock filter rules')
@ -42,11 +63,11 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
let shouldStop = false;
await Promise.all([
// Parse from remote hosts & domain lists
HOSTS.map(entry => processHosts(childSpan, ...entry).then(appendArrayToDomainSets)),
HOSTS_EXTRA.map(entry => processHosts(childSpan, ...entry).then(appendArrayToDomainSetsExtra)),
HOSTS.map(entry => processHosts(childSpan, ...entry).then(appendArrayToRejectOutput)),
HOSTS_EXTRA.map(entry => processHosts(childSpan, ...entry).then(appendArrayToRejectExtraOutput)),
DOMAIN_LISTS.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToDomainSets)),
DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToDomainSetsExtra)),
DOMAIN_LISTS.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToRejectOutput)),
DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToRejectExtraOutput)),
ADGUARD_FILTERS.map(
entry => processFilterRules(childSpan, ...entry)
@ -57,7 +78,7 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
// we should not break here, as we want to see full matches from all data source
}
setAddFromArray(filterRuleWhitelistDomainSets, white);
setAddFromArray(domainSets, black);
appendArrayToRejectOutput(black);
})
),
ADGUARD_FILTERS_EXTRA.map(
@ -69,7 +90,7 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
// we should not break here, as we want to see full matches from all data source
}
setAddFromArray(filterRuleWhitelistDomainSets, white);
setAddFromArray(domainSetsExtra, black);
appendArrayToRejectExtraOutput(black);
})
),
@ -82,8 +103,8 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
setAddFromArray(filterRuleWhitelistDomainSets, black);
})
)),
getPhishingDomains(childSpan).then(appendArrayToDomainSetsExtra),
getRejectSukkaConfPromise.then(appendArrayToDomainSets)
getPhishingDomains(childSpan).then(appendArrayToRejectExtraOutput),
getRejectSukkaConfPromise.then(appendArrayToRejectOutput)
].flat());
// eslint-disable-next-line sukka/no-single-return -- not single return
return shouldStop;
@ -93,72 +114,23 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
process.exit(1);
}
console.log(`Import ${domainSets.size} + ${domainSetsExtra.size} rules from Hosts / AdBlock Filter Rules & reject_sukka.conf!`);
// Dedupe domainSets
const domainKeywordsSet = await span.traceChildAsync('collect black keywords/suffixes', async () => {
await span.traceChildAsync('collect black keywords/suffixes', async () => {
/** Collect DOMAIN-KEYWORD from non_ip/reject.conf for deduplication */
const domainKeywordsSet = new Set<string>();
for await (const line of readFileByLine(path.resolve(__dirname, '../Source/non_ip/reject.conf'))) {
const [type, value] = line.split(',');
if (type === 'DOMAIN-KEYWORD') {
domainKeywordsSet.add(value);
rejectOutput.addDomainKeyword(value); // Add for later deduplication
rejectExtraOutput.addDomainKeyword(value); // Add for later deduplication
} else if (type === 'DOMAIN-SUFFIX') {
domainSets.add('.' + value); // Add to domainSets for later deduplication
rejectOutput.addDomainSuffix(value); // Add for later deduplication
}
}
return domainKeywordsSet;
});
const [baseTrie, extraTrie] = span.traceChildSync('create smol trie while deduping black keywords', (childSpan) => {
const baseTrie = createTrie(null, true);
const extraTrie = createTrie(null, true);
const kwfilter = createKeywordFilter(domainKeywordsSet);
childSpan.traceChildSync('add items to trie (extra)', () => {
for (const domain of domainSetsExtra) {
// exclude keyword when creating trie
if (!kwfilter(domain)) {
extraTrie.add(domain);
}
}
});
childSpan.traceChildSync('add items to trie (base) + dedupe extra trie', () => {
for (const domain of domainSets) {
// exclude keyword when creating trie
if (!kwfilter(domain)) {
baseTrie.add(domain);
extraTrie.whitelist(domain);
}
}
});
return [baseTrie, extraTrie] as const;
});
span.traceChildSync('dedupe from white suffixes (base)', () => filterRuleWhitelistDomainSets.forEach(baseTrie.whitelist));
span.traceChildSync('dedupe from white suffixes and base (extra)', () => {
filterRuleWhitelistDomainSets.forEach(extraTrie.whitelist);
});
// Dedupe domainSets
const dedupedDominArray = span.traceChildSync('dedupe from covered subdomain (base)', () => baseTrie.dump());
const dudupedDominArrayExtra = span.traceChildSync('dedupe from covered subdomain (extra)', () => extraTrie.dump());
console.log(`Final size ${dedupedDominArray.length} + ${dudupedDominArrayExtra.length}`);
const {
domainMap: domainArrayMainDomainMap,
subdomainMap: domainArraySubdomainMap
} = span.traceChildSync(
'build map for stat and sort',
() => buildParseDomainMap(dedupedDominArray.concat(dudupedDominArrayExtra))
);
rejectOutput.calcDomainMap();
rejectExtraOutput.calcDomainMap();
// Create reject stats
const rejectDomainsStats: string[] = span
@ -166,50 +138,15 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
.traceSyncFn(() => {
const results = [];
results.push('=== base ===');
appendArrayInPlace(results, getStatMap(dedupedDominArray, domainArrayMainDomainMap));
appendArrayInPlace(results, rejectOutput.getStatMap());
results.push('=== extra ===');
appendArrayInPlace(results, getStatMap(dudupedDominArrayExtra, domainArrayMainDomainMap));
appendArrayInPlace(results, rejectExtraOutput.getStatMap());
return results;
});
return Promise.all([
createRuleset(
span,
'Sukka\'s Ruleset - Reject Base',
[
...SHARED_DESCRIPTION,
'',
'The domainset supports AD blocking, tracking protection, privacy protection, anti-phishing, anti-mining',
'',
'Build from:',
...HOSTS.map(host => ` - ${host[0]}`),
...DOMAIN_LISTS.map(domainList => ` - ${domainList[0]}`),
...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`)
],
new Date(),
span.traceChildSync('sort reject domainset (base)', () => sortDomains(dedupedDominArray, domainArrayMainDomainMap, domainArraySubdomainMap)),
'domainset',
output('reject', 'domainset')
),
createRuleset(
span,
'Sukka\'s Ruleset - Reject Extra',
[
...SHARED_DESCRIPTION,
'',
'The domainset supports AD blocking, tracking protection, privacy protection, anti-phishing, anti-mining',
'',
'Build from:',
...HOSTS_EXTRA.map(host => ` - ${host[0]}`),
...DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`),
...ADGUARD_FILTERS_EXTRA.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`),
...PHISHING_DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`)
],
new Date(),
span.traceChildSync('sort reject domainset (extra)', () => sortDomains(dudupedDominArrayExtra, domainArrayMainDomainMap, domainArraySubdomainMap)),
'domainset',
output('reject_extra', 'domainset')
),
rejectOutput.write(),
rejectExtraOutput.write(),
compareAndWriteFile(
span,
rejectDomainsStats,
@ -217,22 +154,3 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
)
]);
});
function getStatMap(domains: string[], domainArrayMainDomainMap: Map<string, string>): string[] {
return Array.from(
(
domains.reduce<Map<string, number>>((acc, cur) => {
const suffix = domainArrayMainDomainMap.get(cur);
if (suffix) {
acc.set(suffix, (acc.get(suffix) ?? 0) + 1);
}
return acc;
}, new Map())
).entries()
)
.filter(a => a[1] > 9)
.sort(
(a, b) => (b[1] - a[1]) || a[0].localeCompare(b[0])
)
.map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`);
};

View File

@ -1,40 +1,40 @@
// @ts-check
import path from 'node:path';
import { createRuleset } from './lib/create-file';
import { fetchRemoteTextByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
import { task } from './trace';
import { SHARED_DESCRIPTION } from './lib/constants';
import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip';
import { TTL, deserializeArray, fsFetchCache, serializeArray, createCacheKey } from './lib/cache-filesystem';
import { TTL, fsFetchCache, createCacheKey } from './lib/cache-filesystem';
import { fetchAssets } from './lib/fetch-assets';
import { processLine } from './lib/process-line';
import { appendArrayInPlace } from './lib/append-array-in-place';
import { output } from './lib/misc';
import { RulesetOutput } from './lib/create-file-new';
import { SOURCE_DIR } from './constants/dir';
const cacheKey = createCacheKey(__filename);
const BOGUS_NXDOMAIN_URL = 'https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf';
const getBogusNxDomainIPsPromise = fsFetchCache.apply(
const getBogusNxDomainIPsPromise = fsFetchCache.apply<[ipv4: string[], ipv6: string[]]>(
cacheKey(BOGUS_NXDOMAIN_URL),
async () => {
const result: string[] = [];
const ipv4: string[] = [];
const ipv6: string[] = [];
for await (const line of await fetchRemoteTextByLine(BOGUS_NXDOMAIN_URL)) {
if (line.startsWith('bogus-nxdomain=')) {
const ip = line.slice(15).trim();
if (isProbablyIpv4(ip)) {
result.push(`IP-CIDR,${ip}/32,no-resolve`);
ipv4.push(ip);
} else if (isProbablyIpv6(ip)) {
result.push(`IP-CIDR6,${ip}/128,no-resolve`);
ipv6.push(ip);
}
}
}
return result;
return [ipv4, ipv6] as const;
},
{
ttl: TTL.ONE_WEEK(),
serializer: serializeArray,
deserializer: deserializeArray
serializer: JSON.stringify,
deserializer: JSON.parse
}
);
@ -45,62 +45,50 @@ const BOTNET_FILTER_MIRROR_URL = [
'https://malware-filter.pages.dev/botnet-filter-dnscrypt-blocked-ips.txt'
];
const getBotNetFilterIPsPromise = fsFetchCache.apply(
const getBotNetFilterIPsPromise = fsFetchCache.apply<[ipv4: string[], ipv6: string[]]>(
cacheKey(BOTNET_FILTER_URL),
async () => {
const text = await fetchAssets(BOTNET_FILTER_URL, BOTNET_FILTER_MIRROR_URL);
return text.split('\n').reduce<string[]>((acc, cur) => {
return text.split('\n').reduce<[ipv4: string[], ipv6: string[]]>((acc, cur) => {
const ip = processLine(cur);
if (ip) {
if (isProbablyIpv4(ip)) {
acc.push(`IP-CIDR,${ip}/32,no-resolve`);
acc[0].push(ip);
} else if (isProbablyIpv6(ip)) {
acc.push(`IP-CIDR6,${ip}/128,no-resolve`);
acc[1].push(ip);
}
}
return acc;
}, []);
}, [[], []]);
},
{
ttl: TTL.TWLVE_HOURS(),
serializer: serializeArray,
deserializer: deserializeArray
serializer: JSON.stringify,
deserializer: JSON.parse
}
);
const localRejectIPSourcesPromise = readFileIntoProcessedArray(path.resolve(__dirname, '../Source/ip/reject.conf'));
export const buildRejectIPList = task(require.main === module, __filename)(async (span) => {
const result = await localRejectIPSourcesPromise;
const results = await Promise.all([
const [bogusNxDomainIPs, botNetIPs] = await Promise.all([
span.traceChildPromise('get bogus nxdomain ips', getBogusNxDomainIPsPromise),
span.traceChildPromise('get botnet ips', getBotNetFilterIPsPromise)
]);
const bogusNxDomainIPs = results[0];
const botNetIPs = results[1];
appendArrayInPlace(result, bogusNxDomainIPs);
appendArrayInPlace(result, botNetIPs);
const description = [
...SHARED_DESCRIPTION,
'',
'This file contains known addresses that are hijacking NXDOMAIN results returned by DNS servers, and botnet controller IPs.',
'',
'Data from:',
' - https://github.com/felixonmars/dnsmasq-china-list',
' - https://github.com/curbengh/botnet-filter'
];
return createRuleset(
span,
'Sukka\'s Ruleset - Anti Bogus Domain',
description,
new Date(),
result,
'ruleset',
output('reject', 'ip')
);
return new RulesetOutput(span, 'reject', 'ip')
.withTitle('Sukka\'s Ruleset - Anti Bogus Domain')
.withDescription([
...SHARED_DESCRIPTION,
'',
'This file contains known addresses that are hijacking NXDOMAIN results returned by DNS servers, and botnet controller IPs.',
'',
'Data from:',
' - https://github.com/felixonmars/dnsmasq-china-list',
' - https://github.com/curbengh/botnet-filter'
])
.addFromRuleset(await readFileIntoProcessedArray(path.resolve(SOURCE_DIR, 'ip/reject.conf')))
.bulkAddCIDR4NoResolve(bogusNxDomainIPs[0])
.bulkAddCIDR6NoResolve(bogusNxDomainIPs[1])
.bulkAddCIDR4NoResolve(botNetIPs[0])
.bulkAddCIDR6NoResolve(botNetIPs[1])
.write();
});

View File

@ -2,49 +2,33 @@
import type { Span } from './trace';
import { task } from './trace';
import { createRuleset } from './lib/create-file';
import { ALL, NORTH_AMERICA, EU, HK, TW, JP, KR } from '../Source/stream';
import { SHARED_DESCRIPTION } from './lib/constants';
import { output } from './lib/misc';
import { RulesetOutput } from './lib/create-file-new';
export const createRulesetForStreamService = (span: Span, fileId: string, title: string, streamServices: Array<import('../Source/stream').StreamService>) => {
return span.traceChildAsync(fileId, async (childSpan) => Promise.all([
// Domains
createRuleset(
childSpan,
`Sukka's Ruleset - Stream Services: ${title}`,
[
new RulesetOutput(childSpan, fileId, 'non_ip')
.withTitle(`Sukka's Ruleset - Stream Services: ${title}`)
.withDescription([
...SHARED_DESCRIPTION,
'',
...streamServices.map((i) => `- ${i.name}`)
],
new Date(),
streamServices.flatMap((i) => i.rules),
'ruleset',
output(fileId, 'non_ip')
),
])
.addFromRuleset(streamServices.flatMap((i) => i.rules))
.write(),
// IP
createRuleset(
childSpan,
`Sukka's Ruleset - Stream Services' IPs: ${title}`,
[
new RulesetOutput(childSpan, fileId, 'ip')
.withTitle(`Sukka's Ruleset - Stream Services IPs: ${title}`)
.withDescription([
...SHARED_DESCRIPTION,
'',
...streamServices.map((i) => `- ${i.name}`)
],
new Date(),
streamServices.flatMap((i) => (
i.ip
? [
...i.ip.v4.map((ip) => `IP-CIDR,${ip},no-resolve`),
...i.ip.v6.map((ip) => `IP-CIDR6,${ip},no-resolve`)
]
: []
)),
'ruleset',
output(fileId, 'ip')
)
])
.bulkAddCIDR4NoResolve(streamServices.flatMap(i => i.ip?.v4 ?? []))
.bulkAddCIDR6NoResolve(streamServices.flatMap(i => i.ip?.v6 ?? []))
.write()
]));
};

View File

@ -3,11 +3,10 @@ import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry';
import { createReadlineInterfaceFromResponse } from './lib/fetch-text-by-line';
import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip';
import { processLine } from './lib/process-line';
import { createRuleset } from './lib/create-file';
import { task } from './trace';
import { SHARED_DESCRIPTION } from './lib/constants';
import { createMemoizedPromise } from './lib/memo-promise';
import { output } from './lib/misc';
import { RulesetOutput } from './lib/create-file-new';
export const getTelegramCIDRPromise = createMemoizedPromise(async () => {
const resp = await fetchWithRetry('https://core.telegram.org/resources/cidr.txt', defaultRequestInit);
@ -45,13 +44,10 @@ export const buildTelegramCIDR = task(require.main === module, __filename)(async
' - https://core.telegram.org/resources/cidr.txt'
];
return createRuleset(
span,
'Sukka\'s Ruleset - Telegram IP CIDR',
description,
date,
results,
'ruleset',
output('telegram', 'ip')
);
return new RulesetOutput(span, 'telegram', 'ip')
.withTitle('Sukka\'s Ruleset - Telegram IP CIDR')
.withDescription(description)
.withDate(date)
.addFromRuleset(results)
.write();
});

View File

@ -4,12 +4,14 @@ import type { Span } from '../trace';
import { surgeDomainsetToClashDomainset, surgeRulesetToClashClassicalTextRuleset } from './clash';
import { compareAndWriteFile, defaultSortTypeOrder, sortTypeOrder, withBannerArray } from './create-file';
import { ipCidrListToSingbox, surgeDomainsetToSingbox, surgeRulesetToSingbox } from './singbox';
import { sortDomains } from './stable-sort-domain';
import { buildParseDomainMap, sortDomains } from './stable-sort-domain';
import { createTrie } from './trie';
import { invariant } from 'foxact/invariant';
import { OUTPUT_CLASH_DIR, OUTPUT_SINGBOX_DIR, OUTPUT_SURGE_DIR } from '../constants/dir';
import stringify from 'json-stringify-pretty-compact';
import { appendArrayInPlace } from './append-array-in-place';
import { nullthrow } from 'foxact/nullthrow';
import createKeywordFilter from './aho-corasick';
abstract class RuleOutput {
protected domainTrie = createTrie<unknown>(null, true);
@ -146,6 +148,13 @@ abstract class RuleOutput {
return this;
}
bulkAddCIDR4NoResolve(cidr: string[]) {
for (let i = 0, len = cidr.length; i < len; i++) {
this.ipcidrNoResolve.add(cidr[i]);
}
return this;
}
bulkAddCIDR6(cidr: string[]) {
for (let i = 0, len = cidr.length; i < len; i++) {
this.ipcidr6.add(cidr[i]);
@ -153,19 +162,54 @@ abstract class RuleOutput {
return this;
}
bulkAddCIDR6NoResolve(cidr: string[]) {
for (let i = 0, len = cidr.length; i < len; i++) {
this.ipcidr6NoResolve.add(cidr[i]);
}
return this;
}
abstract write(): Promise<void>;
}
export class DomainsetOutput extends RuleOutput {
protected type = 'domainset' as const;
private $dumped: string[] | null = null;
get dumped() {
if (!this.$dumped) {
const kwfilter = createKeywordFilter(this.domainKeywords);
const dumped = this.domainTrie.dump();
const set = new Set<string>(dumped);
for (let i = 0, len = dumped.length; i < len; i++) {
const domain = dumped[i];
if (kwfilter(domain)) {
set.delete(domain);
}
}
this.$dumped = Array.from(set);
}
return this.$dumped;
}
calcDomainMap() {
if (!this.apexDomainMap || !this.subDomainMap) {
const { domainMap, subdomainMap } = buildParseDomainMap(this.dumped);
this.apexDomainMap = domainMap;
this.subDomainMap = subdomainMap;
}
}
async write() {
await this.pendingPromise;
invariant(this.title, 'Missing title');
invariant(this.description, 'Missing description');
const sorted = sortDomains(this.domainTrie.dump(), this.apexDomainMap, this.subDomainMap);
const sorted = sortDomains(this.dumped, this.apexDomainMap, this.subDomainMap);
sorted.push('this_ruleset_is_made_by_sukkaw.ruleset.skk.moe');
const surge = sorted;
@ -201,6 +245,28 @@ export class DomainsetOutput extends RuleOutput {
)
]);
}
getStatMap() {
invariant(this.dumped, 'Non dumped yet');
invariant(this.apexDomainMap, 'Missing apex domain map');
return Array.from(
(
nullthrow(this.dumped, 'Non dumped yet').reduce<Map<string, number>>((acc, cur) => {
const suffix = this.apexDomainMap!.get(cur);
if (suffix) {
acc.set(suffix, (acc.get(suffix) ?? 0) + 1);
}
return acc;
}, new Map())
).entries()
)
.filter(a => a[1] > 9)
.sort(
(a, b) => (b[1] - a[1]) || a[0].localeCompare(b[0])
)
.map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`);
}
}
export class IPListOutput extends RuleOutput {