import path from 'node:path'; import type { Span } from '../trace'; import { surgeDomainsetToClashDomainset, surgeRulesetToClashClassicalTextRuleset } from './clash'; import { ipCidrListToSingbox, surgeDomainsetToSingbox, surgeRulesetToSingbox } from './singbox'; import { buildParseDomainMap, sortDomains } from './stable-sort-domain'; import { createTrie } from './trie'; import { invariant } from 'foxact/invariant'; import { OUTPUT_CLASH_DIR, OUTPUT_SINGBOX_DIR, OUTPUT_SURGE_DIR } from '../constants/dir'; import stringify from 'json-stringify-pretty-compact'; import { appendArrayInPlace } from './append-array-in-place'; import { nullthrow } from 'foxact/nullthrow'; import createKeywordFilter from './aho-corasick'; import picocolors from 'picocolors'; import fs from 'node:fs'; import { appendArrayFromSet, fastStringArrayJoin, writeFile } from './misc'; import { readFileByLine } from './fetch-text-by-line'; import { asyncWriteToStream } from './async-write-to-stream'; const defaultSortTypeOrder = Symbol('defaultSortTypeOrder'); const sortTypeOrder: Record = { DOMAIN: 1, 'DOMAIN-SUFFIX': 2, 'DOMAIN-KEYWORD': 10, // experimental domain wildcard support 'DOMAIN-WILDCARD': 20, 'DOMAIN-REGEX': 21, 'USER-AGENT': 30, 'PROCESS-NAME': 40, [defaultSortTypeOrder]: 50, // default sort order for unknown type 'URL-REGEX': 100, AND: 300, OR: 300, GEOIP: 400, 'IP-CIDR': 400, 'IP-CIDR6': 400 }; abstract class RuleOutput { protected domainTrie = createTrie(null, true); protected domainKeywords = new Set(); protected domainWildcard = new Set(); protected userAgent = new Set(); protected processName = new Set(); protected processPath = new Set(); protected urlRegex = new Set(); protected ipcidr = new Set(); protected ipcidrNoResolve = new Set(); protected ipasn = new Set(); protected ipasnNoResolve = new Set(); protected ipcidr6 = new Set(); protected ipcidr6NoResolve = new Set(); protected geoip = new Set(); protected groipNoResolve = new Set(); // TODO: add sourceIpcidr // TODO: add sourcePort // TODO: add port protected otherRules: Array<[raw: string, orderWeight: number]> = []; protected abstract type: 'domainset' | 'non_ip' | 'ip'; protected pendingPromise = Promise.resolve(); static jsonToLines = (json: unknown): string[] => stringify(json).split('\n'); static domainWildCardToRegex = (domain: string) => { let result = '^'; for (let i = 0, len = domain.length; i < len; i++) { switch (domain[i]) { case '.': result += String.raw`\.`; break; case '*': result += '[a-zA-Z0-9-_.]*?'; break; case '?': result += '[a-zA-Z0-9-_.]'; break; default: result += domain[i]; } } result += '$'; return result; }; constructor( protected readonly span: Span, protected readonly id: string ) {} protected title: string | null = null; withTitle(title: string) { this.title = title; return this; } protected description: string[] | readonly string[] | null = null; withDescription(description: string[] | readonly string[]) { this.description = description; return this; } protected date = new Date(); withDate(date: Date) { this.date = date; return this; } protected apexDomainMap: Map | null = null; protected subDomainMap: Map | null = null; withDomainMap(apexDomainMap: Map, subDomainMap: Map) { this.apexDomainMap = apexDomainMap; this.subDomainMap = subDomainMap; return this; } addDomain(domain: string) { this.domainTrie.add(domain); return this; } addDomainSuffix(domain: string) { this.domainTrie.add(domain[0] === '.' ? domain : '.' + domain); return this; } bulkAddDomainSuffix(domains: string[]) { for (let i = 0, len = domains.length; i < len; i++) { this.addDomainSuffix(domains[i]); } return this; } addDomainKeyword(keyword: string) { this.domainKeywords.add(keyword); return this; } private async addFromDomainsetPromise(source: AsyncIterable | Iterable | string[]) { for await (const line of source) { if (line[0] === '.') { this.addDomainSuffix(line); } else { this.addDomain(line); } } } addFromDomainset(source: AsyncIterable | Iterable | string[]) { this.pendingPromise = this.pendingPromise.then(() => this.addFromDomainsetPromise(source)); return this; } private async addFromRulesetPromise(source: AsyncIterable | Iterable) { for await (const line of source) { const splitted = line.split(','); const type = splitted[0]; const value = splitted[1]; const arg = splitted[2]; switch (type) { case 'DOMAIN': this.addDomain(value); break; case 'DOMAIN-SUFFIX': this.addDomainSuffix(value); break; case 'DOMAIN-KEYWORD': this.addDomainKeyword(value); break; case 'DOMAIN-WILDCARD': this.domainWildcard.add(value); break; case 'USER-AGENT': this.userAgent.add(value); break; case 'PROCESS-NAME': if (value.includes('/') || value.includes('\\')) { this.processPath.add(value); } else { this.processName.add(value); } break; case 'URL-REGEX': { const [, ...rest] = splitted; this.urlRegex.add(rest.join(',')); break; } case 'IP-CIDR': (arg === 'no-resolve' ? this.ipcidrNoResolve : this.ipcidr).add(value); break; case 'IP-CIDR6': (arg === 'no-resolve' ? this.ipcidr6NoResolve : this.ipcidr6).add(value); break; case 'IP-ASN': (arg === 'no-resolve' ? this.ipasnNoResolve : this.ipasn).add(value); break; case 'GEOIP': (arg === 'no-resolve' ? this.groipNoResolve : this.geoip).add(value); break; default: this.otherRules.push([line, type in sortTypeOrder ? sortTypeOrder[type] : sortTypeOrder[defaultSortTypeOrder]]); break; } } } addFromRuleset(source: AsyncIterable | Iterable) { this.pendingPromise = this.pendingPromise.then(() => this.addFromRulesetPromise(source)); return this; } bulkAddCIDR4(cidr: string[]) { for (let i = 0, len = cidr.length; i < len; i++) { this.ipcidr.add(cidr[i]); } return this; } bulkAddCIDR4NoResolve(cidr: string[]) { for (let i = 0, len = cidr.length; i < len; i++) { this.ipcidrNoResolve.add(cidr[i]); } return this; } bulkAddCIDR6(cidr: string[]) { for (let i = 0, len = cidr.length; i < len; i++) { this.ipcidr6.add(cidr[i]); } return this; } bulkAddCIDR6NoResolve(cidr: string[]) { for (let i = 0, len = cidr.length; i < len; i++) { this.ipcidr6NoResolve.add(cidr[i]); } return this; } abstract write(): Promise; } export class DomainsetOutput extends RuleOutput { protected type = 'domainset' as const; private $dumped: string[] | null = null; get dumped() { if (!this.$dumped) { const kwfilter = createKeywordFilter(this.domainKeywords); const results: string[] = []; const dumped = this.domainTrie.dump(); for (let i = 0, len = dumped.length; i < len; i++) { const domain = dumped[i]; if (!kwfilter(domain)) { results.push(domain); } } this.$dumped = results; } return this.$dumped; } calcDomainMap() { if (!this.apexDomainMap || !this.subDomainMap) { const { domainMap, subdomainMap } = buildParseDomainMap(this.dumped); this.apexDomainMap = domainMap; this.subDomainMap = subdomainMap; } } async write() { await this.pendingPromise; invariant(this.title, 'Missing title'); invariant(this.description, 'Missing description'); const sorted = sortDomains(this.dumped, this.apexDomainMap, this.subDomainMap); sorted.push('this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'); const surge = sorted; const clash = surgeDomainsetToClashDomainset(sorted); // TODO: Implement singbox directly using data const singbox = RuleOutput.jsonToLines(surgeDomainsetToSingbox(sorted)); await Promise.all([ compareAndWriteFile( this.span, withBannerArray( this.title, this.description, this.date, surge ), path.join(OUTPUT_SURGE_DIR, this.type, this.id + '.conf') ), compareAndWriteFile( this.span, withBannerArray( this.title, this.description, this.date, clash ), path.join(OUTPUT_CLASH_DIR, this.type, this.id + '.txt') ), compareAndWriteFile( this.span, singbox, path.join(OUTPUT_SINGBOX_DIR, this.type, this.id + '.json') ) ]); } getStatMap() { invariant(this.dumped, 'Non dumped yet'); invariant(this.apexDomainMap, 'Missing apex domain map'); return Array.from( ( nullthrow(this.dumped, 'Non dumped yet').reduce>((acc, cur) => { const suffix = this.apexDomainMap!.get(cur); if (suffix) { acc.set(suffix, (acc.get(suffix) ?? 0) + 1); } return acc; }, new Map()) ).entries() ) .filter(a => a[1] > 9) .sort( (a, b) => (b[1] - a[1]) || a[0].localeCompare(b[0]) ) .map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`); } } export class IPListOutput extends RuleOutput { protected type = 'ip' as const; constructor(span: Span, id: string, private readonly clashUseRule = true) { super(span, id); } async write() { await this.pendingPromise; invariant(this.title, 'Missing title'); invariant(this.description, 'Missing description'); const sorted4 = Array.from(this.ipcidr); const sorted6 = Array.from(this.ipcidr6); const merged = appendArrayInPlace(appendArrayInPlace([], sorted4), sorted6); const surge = sorted4.map(i => `IP-CIDR,${i}`); appendArrayInPlace(surge, sorted6.map(i => `IP-CIDR6,${i}`)); surge.push('DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'); const clash = this.clashUseRule ? surge : merged; // TODO: Implement singbox directly using data const singbox = RuleOutput.jsonToLines(ipCidrListToSingbox(merged)); await Promise.all([ compareAndWriteFile( this.span, withBannerArray( this.title, this.description, this.date, surge ), path.join(OUTPUT_SURGE_DIR, this.type, this.id + '.conf') ), compareAndWriteFile( this.span, withBannerArray( this.title, this.description, this.date, clash ), path.join(OUTPUT_CLASH_DIR, this.type, this.id + '.txt') ), compareAndWriteFile( this.span, singbox, path.join(OUTPUT_SINGBOX_DIR, this.type, this.id + '.json') ) ]); } } export class RulesetOutput extends RuleOutput { constructor(span: Span, id: string, protected type: 'non_ip' | 'ip') { super(span, id); } async write() { await this.pendingPromise; invariant(this.title, 'Missing title'); invariant(this.description, 'Missing description'); const results: string[] = [ 'DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe' ]; const kwfilter = createKeywordFilter(this.domainKeywords); for (const domain of sortDomains(this.domainTrie.dump(), this.apexDomainMap, this.subDomainMap)) { if (kwfilter(domain)) { continue; } if (domain[0] === '.') { results.push(`DOMAIN-SUFFIX,${domain.slice(1)}`); } else { results.push(`DOMAIN,${domain}`); } } appendArrayFromSet(results, this.domainKeywords, i => `DOMAIN-KEYWORD,${i}`); appendArrayFromSet(results, this.domainWildcard, i => `DOMAIN-WILDCARD,${i}`); appendArrayFromSet(results, this.userAgent, i => `USER-AGENT,${i}`); appendArrayFromSet(results, this.processName, i => `PROCESS-NAME,${i}`); appendArrayFromSet(results, this.processPath, i => `PROCESS-NAME,${i}`); for (const [rule] of this.otherRules.sort((a, b) => a[1] - b[1])) { results.push(rule); } appendArrayFromSet(results, this.urlRegex, i => `URL-REGEX,${i}`); appendArrayFromSet(results, this.ipcidrNoResolve, i => `IP-CIDR,${i},no-resolve`); appendArrayFromSet(results, this.ipcidr6NoResolve, i => `IP-CIDR6,${i},no-resolve`); appendArrayFromSet(results, this.ipasnNoResolve, i => `IP-ASN,${i},no-resolve`); appendArrayFromSet(results, this.groipNoResolve, i => `GEOIP,${i},no-resolve`); appendArrayFromSet(results, this.ipcidr, i => `IP-CIDR,${i}`); appendArrayFromSet(results, this.ipcidr6, i => `IP-CIDR6,${i}`); appendArrayFromSet(results, this.ipasn, i => `IP-ASN,${i}`); appendArrayFromSet(results, this.geoip, i => `GEOIP,${i}`); for (const geoip of this.geoip) { results.push(`GEOIP,${geoip}`); } for (const geoip of this.groipNoResolve) { results.push(`GEOIP,${geoip},no-resolve`); } const surge = results; const clash = surgeRulesetToClashClassicalTextRuleset(results); // TODO: Implement singbox directly using data const singbox = RuleOutput.jsonToLines(surgeRulesetToSingbox(results)); await Promise.all([ compareAndWriteFile( this.span, withBannerArray( this.title, this.description, this.date, surge ), path.join(OUTPUT_SURGE_DIR, this.type, this.id + '.conf') ), compareAndWriteFile( this.span, withBannerArray( this.title, this.description, this.date, clash ), path.join(OUTPUT_CLASH_DIR, this.type, this.id + '.txt') ), compareAndWriteFile( this.span, singbox, path.join(OUTPUT_SINGBOX_DIR, this.type, this.id + '.json') ) ]); } } function withBannerArray(title: string, description: string[] | readonly string[], date: Date, content: string[]) { return [ '#########################################', `# ${title}`, `# Last Updated: ${date.toISOString()}`, `# Size: ${content.length}`, ...description.map(line => (line ? `# ${line}` : '#')), '#########################################', ...content, '################## EOF ##################' ]; }; export const fileEqual = async (linesA: string[], source: AsyncIterable): Promise => { if (linesA.length === 0) { return false; } let index = -1; for await (const lineB of source) { index++; if (index > linesA.length - 1) { if (index === linesA.length && lineB === '') { return true; } // The file becomes smaller return false; } const lineA = linesA[index]; if (lineA[0] === '#' && lineB[0] === '#') { continue; } if ( lineA[0] === '/' && lineA[1] === '/' && lineB[0] === '/' && lineB[1] === '/' && lineA[3] === '#' && lineB[3] === '#' ) { continue; } if (lineA !== lineB) { return false; } } if (index < linesA.length - 1) { // The file becomes larger return false; } return true; }; export async function compareAndWriteFile(span: Span, linesA: string[], filePath: string) { let isEqual = true; const linesALen = linesA.length; if (fs.existsSync(filePath)) { isEqual = await fileEqual(linesA, readFileByLine(filePath)); } else { console.log(`${filePath} does not exists, writing...`); isEqual = false; } if (isEqual) { console.log(picocolors.gray(picocolors.dim(`same content, bail out writing: ${filePath}`))); return; } await span.traceChildAsync(`writing ${filePath}`, async () => { // The default highwater mark is normally 16384, // So we make sure direct write to file if the content is // most likely less than 500 lines if (linesALen < 500) { return writeFile(filePath, fastStringArrayJoin(linesA, '\n') + '\n'); } const writeStream = fs.createWriteStream(filePath); for (let i = 0; i < linesALen; i++) { const p = asyncWriteToStream(writeStream, linesA[i] + '\n'); // eslint-disable-next-line no-await-in-loop -- stream high water mark if (p) await p; } await asyncWriteToStream(writeStream, '\n'); writeStream.end(); }); }