From 7c372b3b8c15bad53e3be21860b663d07ff9d875 Mon Sep 17 00:00:00 2001 From: SukkaW Date: Sat, 21 Sep 2024 18:20:24 +0800 Subject: [PATCH] Refactor: adjust output --- Build/lib/clash.ts | 4 - Build/lib/create-file.ts | 591 +---------------------------------- Build/lib/misc.ts | 28 +- Build/lib/rules/base.ts | 338 ++++++++++++++++++++ Build/lib/rules/domainset.ts | 95 ++++++ Build/lib/rules/ip.ts | 53 ++++ Build/lib/rules/ruleset.ts | 112 +++++++ Build/lib/singbox.ts | 68 +--- 8 files changed, 627 insertions(+), 662 deletions(-) create mode 100644 Build/lib/rules/base.ts create mode 100644 Build/lib/rules/domainset.ts create mode 100644 Build/lib/rules/ip.ts create mode 100644 Build/lib/rules/ruleset.ts diff --git a/Build/lib/clash.ts b/Build/lib/clash.ts index d38ccb2f..0951375a 100644 --- a/Build/lib/clash.ts +++ b/Build/lib/clash.ts @@ -64,10 +64,6 @@ export const surgeRulesetToClashClassicalTextRuleset = (rules: string[] | Set { - return domainset.map(i => (i[0] === '.' ? `+${i}` : i)); -}; - export const surgeDomainsetToClashRuleset = (domainset: string[]) => { return domainset.map(i => (i[0] === '.' ? `DOMAIN-SUFFIX,${i.slice(1)}` : `DOMAIN,${i}`)); }; diff --git a/Build/lib/create-file.ts b/Build/lib/create-file.ts index 0f1fee9c..a42c92c9 100644 --- a/Build/lib/create-file.ts +++ b/Build/lib/create-file.ts @@ -1,587 +1,4 @@ -import path from 'node:path'; - -import type { Span } from '../trace'; -import { surgeDomainsetToClashDomainset, surgeRulesetToClashClassicalTextRuleset } from './clash'; -import { ipCidrListToSingbox, surgeDomainsetToSingbox, surgeRulesetToSingbox } from './singbox'; -import { buildParseDomainMap, sortDomains } from './stable-sort-domain'; -import { createTrie } from './trie'; -import { invariant } from 'foxact/invariant'; -import { OUTPUT_CLASH_DIR, OUTPUT_SINGBOX_DIR, OUTPUT_SURGE_DIR } from '../constants/dir'; -import stringify from 'json-stringify-pretty-compact'; -import { appendArrayInPlace } from './append-array-in-place'; -import { nullthrow } from 'foxact/nullthrow'; -import createKeywordFilter from './aho-corasick'; -import picocolors from 'picocolors'; -import fs from 'node:fs'; -import { appendArrayFromSet, fastStringArrayJoin, writeFile } from './misc'; -import { readFileByLine } from './fetch-text-by-line'; -import { asyncWriteToStream } from './async-write-to-stream'; - -const defaultSortTypeOrder = Symbol('defaultSortTypeOrder'); -const sortTypeOrder: Record = { - DOMAIN: 1, - 'DOMAIN-SUFFIX': 2, - 'DOMAIN-KEYWORD': 10, - // experimental domain wildcard support - 'DOMAIN-WILDCARD': 20, - 'DOMAIN-REGEX': 21, - 'USER-AGENT': 30, - 'PROCESS-NAME': 40, - [defaultSortTypeOrder]: 50, // default sort order for unknown type - 'URL-REGEX': 100, - AND: 300, - OR: 300, - GEOIP: 400, - 'IP-CIDR': 400, - 'IP-CIDR6': 400 -}; - -abstract class RuleOutput { - protected domainTrie = createTrie(null, true); - protected domainKeywords = new Set(); - protected domainWildcard = new Set(); - protected userAgent = new Set(); - protected processName = new Set(); - protected processPath = new Set(); - protected urlRegex = new Set(); - protected ipcidr = new Set(); - protected ipcidrNoResolve = new Set(); - protected ipasn = new Set(); - protected ipasnNoResolve = new Set(); - protected ipcidr6 = new Set(); - protected ipcidr6NoResolve = new Set(); - protected geoip = new Set(); - protected groipNoResolve = new Set(); - // TODO: add sourceIpcidr - // TODO: add sourcePort - // TODO: add port - - protected otherRules: Array<[raw: string, orderWeight: number]> = []; - protected abstract type: 'domainset' | 'non_ip' | 'ip'; - - protected pendingPromise = Promise.resolve(); - - static jsonToLines = (json: unknown): string[] => stringify(json).split('\n'); - - static domainWildCardToRegex = (domain: string) => { - let result = '^'; - for (let i = 0, len = domain.length; i < len; i++) { - switch (domain[i]) { - case '.': - result += String.raw`\.`; - break; - case '*': - result += '[a-zA-Z0-9-_.]*?'; - break; - case '?': - result += '[a-zA-Z0-9-_.]'; - break; - default: - result += domain[i]; - } - } - result += '$'; - return result; - }; - - constructor( - protected readonly span: Span, - protected readonly id: string - ) {} - - protected title: string | null = null; - withTitle(title: string) { - this.title = title; - return this; - } - - protected description: string[] | readonly string[] | null = null; - withDescription(description: string[] | readonly string[]) { - this.description = description; - return this; - } - - protected date = new Date(); - withDate(date: Date) { - this.date = date; - return this; - } - - protected apexDomainMap: Map | null = null; - protected subDomainMap: Map | null = null; - withDomainMap(apexDomainMap: Map, subDomainMap: Map) { - this.apexDomainMap = apexDomainMap; - this.subDomainMap = subDomainMap; - return this; - } - - addDomain(domain: string) { - this.domainTrie.add(domain); - return this; - } - - addDomainSuffix(domain: string) { - this.domainTrie.add(domain[0] === '.' ? domain : '.' + domain); - return this; - } - - bulkAddDomainSuffix(domains: string[]) { - for (let i = 0, len = domains.length; i < len; i++) { - this.addDomainSuffix(domains[i]); - } - return this; - } - - addDomainKeyword(keyword: string) { - this.domainKeywords.add(keyword); - return this; - } - - private async addFromDomainsetPromise(source: AsyncIterable | Iterable | string[]) { - for await (const line of source) { - if (line[0] === '.') { - this.addDomainSuffix(line); - } else { - this.addDomain(line); - } - } - } - - addFromDomainset(source: AsyncIterable | Iterable | string[]) { - this.pendingPromise = this.pendingPromise.then(() => this.addFromDomainsetPromise(source)); - return this; - } - - private async addFromRulesetPromise(source: AsyncIterable | Iterable) { - for await (const line of source) { - const splitted = line.split(','); - const type = splitted[0]; - const value = splitted[1]; - const arg = splitted[2]; - - switch (type) { - case 'DOMAIN': - this.addDomain(value); - break; - case 'DOMAIN-SUFFIX': - this.addDomainSuffix(value); - break; - case 'DOMAIN-KEYWORD': - this.addDomainKeyword(value); - break; - case 'DOMAIN-WILDCARD': - this.domainWildcard.add(value); - break; - case 'USER-AGENT': - this.userAgent.add(value); - break; - case 'PROCESS-NAME': - if (value.includes('/') || value.includes('\\')) { - this.processPath.add(value); - } else { - this.processName.add(value); - } - break; - case 'URL-REGEX': { - const [, ...rest] = splitted; - this.urlRegex.add(rest.join(',')); - break; - } - case 'IP-CIDR': - (arg === 'no-resolve' ? this.ipcidrNoResolve : this.ipcidr).add(value); - break; - case 'IP-CIDR6': - (arg === 'no-resolve' ? this.ipcidr6NoResolve : this.ipcidr6).add(value); - break; - case 'IP-ASN': - (arg === 'no-resolve' ? this.ipasnNoResolve : this.ipasn).add(value); - break; - case 'GEOIP': - (arg === 'no-resolve' ? this.groipNoResolve : this.geoip).add(value); - break; - default: - this.otherRules.push([line, type in sortTypeOrder ? sortTypeOrder[type] : sortTypeOrder[defaultSortTypeOrder]]); - break; - } - } - } - - addFromRuleset(source: AsyncIterable | Iterable) { - this.pendingPromise = this.pendingPromise.then(() => this.addFromRulesetPromise(source)); - return this; - } - - bulkAddCIDR4(cidr: string[]) { - for (let i = 0, len = cidr.length; i < len; i++) { - this.ipcidr.add(cidr[i]); - } - return this; - } - - bulkAddCIDR4NoResolve(cidr: string[]) { - for (let i = 0, len = cidr.length; i < len; i++) { - this.ipcidrNoResolve.add(cidr[i]); - } - return this; - } - - bulkAddCIDR6(cidr: string[]) { - for (let i = 0, len = cidr.length; i < len; i++) { - this.ipcidr6.add(cidr[i]); - } - return this; - } - - bulkAddCIDR6NoResolve(cidr: string[]) { - for (let i = 0, len = cidr.length; i < len; i++) { - this.ipcidr6NoResolve.add(cidr[i]); - } - return this; - } - - abstract write(): Promise; -} - -export class DomainsetOutput extends RuleOutput { - protected type = 'domainset' as const; - - private $dumped: string[] | null = null; - - get dumped() { - if (!this.$dumped) { - const kwfilter = createKeywordFilter(this.domainKeywords); - - const results: string[] = []; - - const dumped = this.domainTrie.dump(); - - for (let i = 0, len = dumped.length; i < len; i++) { - const domain = dumped[i]; - if (!kwfilter(domain)) { - results.push(domain); - } - } - - this.$dumped = results; - } - return this.$dumped; - } - - calcDomainMap() { - if (!this.apexDomainMap || !this.subDomainMap) { - const { domainMap, subdomainMap } = buildParseDomainMap(this.dumped); - this.apexDomainMap = domainMap; - this.subDomainMap = subdomainMap; - } - } - - async write() { - await this.pendingPromise; - - invariant(this.title, 'Missing title'); - invariant(this.description, 'Missing description'); - - const sorted = sortDomains(this.dumped, this.apexDomainMap, this.subDomainMap); - sorted.push('this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'); - - const surge = sorted; - const clash = surgeDomainsetToClashDomainset(sorted); - // TODO: Implement singbox directly using data - const singbox = RuleOutput.jsonToLines(surgeDomainsetToSingbox(sorted)); - - await Promise.all([ - compareAndWriteFile( - this.span, - withBannerArray( - this.title, - this.description, - this.date, - surge - ), - path.join(OUTPUT_SURGE_DIR, this.type, this.id + '.conf') - ), - compareAndWriteFile( - this.span, - withBannerArray( - this.title, - this.description, - this.date, - clash - ), - path.join(OUTPUT_CLASH_DIR, this.type, this.id + '.txt') - ), - compareAndWriteFile( - this.span, - singbox, - path.join(OUTPUT_SINGBOX_DIR, this.type, this.id + '.json') - ) - ]); - } - - getStatMap() { - invariant(this.dumped, 'Non dumped yet'); - invariant(this.apexDomainMap, 'Missing apex domain map'); - - return Array.from( - ( - nullthrow(this.dumped, 'Non dumped yet').reduce>((acc, cur) => { - const suffix = this.apexDomainMap!.get(cur); - if (suffix) { - acc.set(suffix, (acc.get(suffix) ?? 0) + 1); - } - return acc; - }, new Map()) - ).entries() - ) - .filter(a => a[1] > 9) - .sort( - (a, b) => (b[1] - a[1]) || a[0].localeCompare(b[0]) - ) - .map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`); - } -} - -export class IPListOutput extends RuleOutput { - protected type = 'ip' as const; - - constructor(span: Span, id: string, private readonly clashUseRule = true) { - super(span, id); - } - - async write() { - await this.pendingPromise; - - invariant(this.title, 'Missing title'); - invariant(this.description, 'Missing description'); - - const sorted4 = Array.from(this.ipcidr); - const sorted6 = Array.from(this.ipcidr6); - const merged = appendArrayInPlace(appendArrayInPlace([], sorted4), sorted6); - - const surge = sorted4.map(i => `IP-CIDR,${i}`); - appendArrayInPlace(surge, sorted6.map(i => `IP-CIDR6,${i}`)); - surge.push('DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'); - - const clash = this.clashUseRule ? surge : merged; - // TODO: Implement singbox directly using data - const singbox = RuleOutput.jsonToLines(ipCidrListToSingbox(merged)); - - await Promise.all([ - compareAndWriteFile( - this.span, - withBannerArray( - this.title, - this.description, - this.date, - surge - ), - path.join(OUTPUT_SURGE_DIR, this.type, this.id + '.conf') - ), - compareAndWriteFile( - this.span, - withBannerArray( - this.title, - this.description, - this.date, - clash - ), - path.join(OUTPUT_CLASH_DIR, this.type, this.id + '.txt') - ), - compareAndWriteFile( - this.span, - singbox, - path.join(OUTPUT_SINGBOX_DIR, this.type, this.id + '.json') - ) - ]); - } -} - -export class RulesetOutput extends RuleOutput { - constructor(span: Span, id: string, protected type: 'non_ip' | 'ip') { - super(span, id); - } - - async write() { - await this.pendingPromise; - - invariant(this.title, 'Missing title'); - invariant(this.description, 'Missing description'); - - const results: string[] = [ - 'DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe' - ]; - - const kwfilter = createKeywordFilter(this.domainKeywords); - - for (const domain of sortDomains(this.domainTrie.dump(), this.apexDomainMap, this.subDomainMap)) { - if (kwfilter(domain)) { - continue; - } - if (domain[0] === '.') { - results.push(`DOMAIN-SUFFIX,${domain.slice(1)}`); - } else { - results.push(`DOMAIN,${domain}`); - } - } - - appendArrayFromSet(results, this.domainKeywords, i => `DOMAIN-KEYWORD,${i}`); - appendArrayFromSet(results, this.domainWildcard, i => `DOMAIN-WILDCARD,${i}`); - - appendArrayFromSet(results, this.userAgent, i => `USER-AGENT,${i}`); - - appendArrayFromSet(results, this.processName, i => `PROCESS-NAME,${i}`); - appendArrayFromSet(results, this.processPath, i => `PROCESS-NAME,${i}`); - - for (const [rule] of this.otherRules.sort((a, b) => a[1] - b[1])) { - results.push(rule); - } - - appendArrayFromSet(results, this.urlRegex, i => `URL-REGEX,${i}`); - - appendArrayFromSet(results, this.ipcidrNoResolve, i => `IP-CIDR,${i},no-resolve`); - appendArrayFromSet(results, this.ipcidr6NoResolve, i => `IP-CIDR6,${i},no-resolve`); - appendArrayFromSet(results, this.ipasnNoResolve, i => `IP-ASN,${i},no-resolve`); - appendArrayFromSet(results, this.groipNoResolve, i => `GEOIP,${i},no-resolve`); - - appendArrayFromSet(results, this.ipcidr, i => `IP-CIDR,${i}`); - appendArrayFromSet(results, this.ipcidr6, i => `IP-CIDR6,${i}`); - appendArrayFromSet(results, this.ipasn, i => `IP-ASN,${i}`); - appendArrayFromSet(results, this.geoip, i => `GEOIP,${i}`); - - for (const geoip of this.geoip) { - results.push(`GEOIP,${geoip}`); - } - for (const geoip of this.groipNoResolve) { - results.push(`GEOIP,${geoip},no-resolve`); - } - - const surge = results; - const clash = surgeRulesetToClashClassicalTextRuleset(results); - // TODO: Implement singbox directly using data - const singbox = RuleOutput.jsonToLines(surgeRulesetToSingbox(results)); - - await Promise.all([ - compareAndWriteFile( - this.span, - withBannerArray( - this.title, - this.description, - this.date, - surge - ), - path.join(OUTPUT_SURGE_DIR, this.type, this.id + '.conf') - ), - compareAndWriteFile( - this.span, - withBannerArray( - this.title, - this.description, - this.date, - clash - ), - path.join(OUTPUT_CLASH_DIR, this.type, this.id + '.txt') - ), - compareAndWriteFile( - this.span, - singbox, - path.join(OUTPUT_SINGBOX_DIR, this.type, this.id + '.json') - ) - ]); - } -} - -function withBannerArray(title: string, description: string[] | readonly string[], date: Date, content: string[]) { - return [ - '#########################################', - `# ${title}`, - `# Last Updated: ${date.toISOString()}`, - `# Size: ${content.length}`, - ...description.map(line => (line ? `# ${line}` : '#')), - '#########################################', - ...content, - '################## EOF ##################' - ]; -}; - -export const fileEqual = async (linesA: string[], source: AsyncIterable): Promise => { - if (linesA.length === 0) { - return false; - } - - let index = -1; - for await (const lineB of source) { - index++; - - if (index > linesA.length - 1) { - if (index === linesA.length && lineB === '') { - return true; - } - // The file becomes smaller - return false; - } - - const lineA = linesA[index]; - - if (lineA[0] === '#' && lineB[0] === '#') { - continue; - } - if ( - lineA[0] === '/' - && lineA[1] === '/' - && lineB[0] === '/' - && lineB[1] === '/' - && lineA[3] === '#' - && lineB[3] === '#' - ) { - continue; - } - - if (lineA !== lineB) { - return false; - } - } - - if (index < linesA.length - 1) { - // The file becomes larger - return false; - } - - return true; -}; - -export async function compareAndWriteFile(span: Span, linesA: string[], filePath: string) { - let isEqual = true; - const linesALen = linesA.length; - - if (fs.existsSync(filePath)) { - isEqual = await fileEqual(linesA, readFileByLine(filePath)); - } else { - console.log(`${filePath} does not exists, writing...`); - isEqual = false; - } - - if (isEqual) { - console.log(picocolors.gray(picocolors.dim(`same content, bail out writing: ${filePath}`))); - return; - } - - await span.traceChildAsync(`writing ${filePath}`, async () => { - // The default highwater mark is normally 16384, - // So we make sure direct write to file if the content is - // most likely less than 500 lines - if (linesALen < 500) { - return writeFile(filePath, fastStringArrayJoin(linesA, '\n') + '\n'); - } - - const writeStream = fs.createWriteStream(filePath); - for (let i = 0; i < linesALen; i++) { - const p = asyncWriteToStream(writeStream, linesA[i] + '\n'); - // eslint-disable-next-line no-await-in-loop -- stream high water mark - if (p) await p; - } - - await asyncWriteToStream(writeStream, '\n'); - - writeStream.end(); - }); -} +export { DomainsetOutput } from './rules/domainset'; +export { IPListOutput } from './rules/ip'; +export { RulesetOutput } from './rules/ruleset'; +export { fileEqual, compareAndWriteFile } from './rules/base'; diff --git a/Build/lib/misc.ts b/Build/lib/misc.ts index 13ceaa8c..be5bfa8c 100644 --- a/Build/lib/misc.ts +++ b/Build/lib/misc.ts @@ -63,13 +63,18 @@ export const domainWildCardToRegex = (domain: string) => { export const identity = (x: T): T => x; -export const appendArrayFromSet = (dest: T[], source: Set, transformer: (item: T) => T = identity) => { - const iterator = source.values(); - let step: IteratorResult; +export const appendArrayFromSet = (dest: T[], source: Set | Array>, transformer: (item: T) => T = identity) => { + const casted = Array.isArray(source) ? source : [source]; + for (let i = 0, len = casted.length; i < len; i++) { + const iterator = casted[i].values(); + let step: IteratorResult; - while ((step = iterator.next(), !step.done)) { - dest.push(transformer(step.value)); + while ((step = iterator.next(), !step.done)) { + dest.push(transformer(step.value)); + } } + + return dest; }; export const output = (id: string, type: 'non_ip' | 'ip' | 'domainset') => { @@ -79,3 +84,16 @@ export const output = (id: string, type: 'non_ip' | 'ip' | 'domainset') => { path.join(OUTPUT_SINGBOX_DIR, type, id + '.json') ] as const; }; + +export function withBannerArray(title: string, description: string[] | readonly string[], date: Date, content: string[]) { + return [ + '#########################################', + `# ${title}`, + `# Last Updated: ${date.toISOString()}`, + `# Size: ${content.length}`, + ...description.map(line => (line ? `# ${line}` : '#')), + '#########################################', + ...content, + '################## EOF ##################' + ]; +}; diff --git a/Build/lib/rules/base.ts b/Build/lib/rules/base.ts new file mode 100644 index 00000000..f8a98c98 --- /dev/null +++ b/Build/lib/rules/base.ts @@ -0,0 +1,338 @@ +import { OUTPUT_CLASH_DIR, OUTPUT_SINGBOX_DIR, OUTPUT_SURGE_DIR } from '../../constants/dir'; +import type { Span } from '../../trace'; +import { createTrie } from '../trie'; +import stringify from 'json-stringify-pretty-compact'; +import path from 'node:path'; +import { withBannerArray } from '../misc'; +import { invariant } from 'foxact/invariant'; +import picocolors from 'picocolors'; +import fs from 'node:fs'; +import { fastStringArrayJoin, writeFile } from '../misc'; +import { readFileByLine } from '../fetch-text-by-line'; +import { asyncWriteToStream } from '../async-write-to-stream'; + +export abstract class RuleOutput { + protected domainTrie = createTrie(null, true); + protected domainKeywords = new Set(); + protected domainWildcard = new Set(); + protected userAgent = new Set(); + protected processName = new Set(); + protected processPath = new Set(); + protected urlRegex = new Set(); + protected ipcidr = new Set(); + protected ipcidrNoResolve = new Set(); + protected ipasn = new Set(); + protected ipasnNoResolve = new Set(); + protected ipcidr6 = new Set(); + protected ipcidr6NoResolve = new Set(); + protected geoip = new Set(); + protected groipNoResolve = new Set(); + // TODO: add sourceIpcidr + // TODO: add sourcePort + // TODO: add port + + protected otherRules: string[] = []; + protected abstract type: 'domainset' | 'non_ip' | 'ip'; + + protected pendingPromise = Promise.resolve(); + + static jsonToLines = (json: unknown): string[] => stringify(json).split('\n'); + + static domainWildCardToRegex = (domain: string) => { + let result = '^'; + for (let i = 0, len = domain.length; i < len; i++) { + switch (domain[i]) { + case '.': + result += String.raw`\.`; + break; + case '*': + result += '[a-zA-Z0-9-_.]*?'; + break; + case '?': + result += '[a-zA-Z0-9-_.]'; + break; + default: + result += domain[i]; + } + } + result += '$'; + return result; + }; + + constructor( + protected readonly span: Span, + protected readonly id: string + ) {} + + protected title: string | null = null; + withTitle(title: string) { + this.title = title; + return this; + } + + protected description: string[] | readonly string[] | null = null; + withDescription(description: string[] | readonly string[]) { + this.description = description; + return this; + } + + protected date = new Date(); + withDate(date: Date) { + this.date = date; + return this; + } + + protected apexDomainMap: Map | null = null; + protected subDomainMap: Map | null = null; + withDomainMap(apexDomainMap: Map, subDomainMap: Map) { + this.apexDomainMap = apexDomainMap; + this.subDomainMap = subDomainMap; + return this; + } + + addDomain(domain: string) { + this.domainTrie.add(domain); + return this; + } + + addDomainSuffix(domain: string) { + this.domainTrie.add(domain[0] === '.' ? domain : '.' + domain); + return this; + } + + bulkAddDomainSuffix(domains: string[]) { + for (let i = 0, len = domains.length; i < len; i++) { + this.addDomainSuffix(domains[i]); + } + return this; + } + + addDomainKeyword(keyword: string) { + this.domainKeywords.add(keyword); + return this; + } + + private async addFromDomainsetPromise(source: AsyncIterable | Iterable | string[]) { + for await (const line of source) { + if (line[0] === '.') { + this.addDomainSuffix(line); + } else { + this.addDomain(line); + } + } + } + + addFromDomainset(source: AsyncIterable | Iterable | string[]) { + this.pendingPromise = this.pendingPromise.then(() => this.addFromDomainsetPromise(source)); + return this; + } + + private async addFromRulesetPromise(source: AsyncIterable | Iterable) { + for await (const line of source) { + const splitted = line.split(','); + const type = splitted[0]; + const value = splitted[1]; + const arg = splitted[2]; + + switch (type) { + case 'DOMAIN': + this.addDomain(value); + break; + case 'DOMAIN-SUFFIX': + this.addDomainSuffix(value); + break; + case 'DOMAIN-KEYWORD': + this.addDomainKeyword(value); + break; + case 'DOMAIN-WILDCARD': + this.domainWildcard.add(value); + break; + case 'USER-AGENT': + this.userAgent.add(value); + break; + case 'PROCESS-NAME': + if (value.includes('/') || value.includes('\\')) { + this.processPath.add(value); + } else { + this.processName.add(value); + } + break; + case 'URL-REGEX': { + const [, ...rest] = splitted; + this.urlRegex.add(rest.join(',')); + break; + } + case 'IP-CIDR': + (arg === 'no-resolve' ? this.ipcidrNoResolve : this.ipcidr).add(value); + break; + case 'IP-CIDR6': + (arg === 'no-resolve' ? this.ipcidr6NoResolve : this.ipcidr6).add(value); + break; + case 'IP-ASN': + (arg === 'no-resolve' ? this.ipasnNoResolve : this.ipasn).add(value); + break; + case 'GEOIP': + (arg === 'no-resolve' ? this.groipNoResolve : this.geoip).add(value); + break; + default: + this.otherRules.push(line); + break; + } + } + } + + addFromRuleset(source: AsyncIterable | Iterable) { + this.pendingPromise = this.pendingPromise.then(() => this.addFromRulesetPromise(source)); + return this; + } + + bulkAddCIDR4(cidr: string[]) { + for (let i = 0, len = cidr.length; i < len; i++) { + this.ipcidr.add(cidr[i]); + } + return this; + } + + bulkAddCIDR4NoResolve(cidr: string[]) { + for (let i = 0, len = cidr.length; i < len; i++) { + this.ipcidrNoResolve.add(cidr[i]); + } + return this; + } + + bulkAddCIDR6(cidr: string[]) { + for (let i = 0, len = cidr.length; i < len; i++) { + this.ipcidr6.add(cidr[i]); + } + return this; + } + + bulkAddCIDR6NoResolve(cidr: string[]) { + for (let i = 0, len = cidr.length; i < len; i++) { + this.ipcidr6NoResolve.add(cidr[i]); + } + return this; + } + + abstract surge(): string[]; + abstract clash(): string[]; + abstract singbox(): string[]; + + async write(): Promise { + await this.pendingPromise; + + invariant(this.title, 'Missing title'); + invariant(this.description, 'Missing description'); + + await Promise.all([ + compareAndWriteFile( + this.span, + withBannerArray( + this.title, + this.description, + this.date, + this.surge() + ), + path.join(OUTPUT_SURGE_DIR, this.type, this.id + '.conf') + ), + compareAndWriteFile( + this.span, + withBannerArray( + this.title, + this.description, + this.date, + this.clash() + ), + path.join(OUTPUT_CLASH_DIR, this.type, this.id + '.txt') + ), + compareAndWriteFile( + this.span, + this.singbox(), + path.join(OUTPUT_SINGBOX_DIR, this.type, this.id + '.json') + ) + ]); + } +} + +export const fileEqual = async (linesA: string[], source: AsyncIterable): Promise => { + if (linesA.length === 0) { + return false; + } + + let index = -1; + for await (const lineB of source) { + index++; + + if (index > linesA.length - 1) { + if (index === linesA.length && lineB === '') { + return true; + } + // The file becomes smaller + return false; + } + + const lineA = linesA[index]; + + if (lineA[0] === '#' && lineB[0] === '#') { + continue; + } + if ( + lineA[0] === '/' + && lineA[1] === '/' + && lineB[0] === '/' + && lineB[1] === '/' + && lineA[3] === '#' + && lineB[3] === '#' + ) { + continue; + } + + if (lineA !== lineB) { + return false; + } + } + + if (index < linesA.length - 1) { + // The file becomes larger + return false; + } + + return true; +}; + +export async function compareAndWriteFile(span: Span, linesA: string[], filePath: string) { + let isEqual = true; + const linesALen = linesA.length; + + if (fs.existsSync(filePath)) { + isEqual = await fileEqual(linesA, readFileByLine(filePath)); + } else { + console.log(`${filePath} does not exists, writing...`); + isEqual = false; + } + + if (isEqual) { + console.log(picocolors.gray(picocolors.dim(`same content, bail out writing: ${filePath}`))); + return; + } + + await span.traceChildAsync(`writing ${filePath}`, async () => { + // The default highwater mark is normally 16384, + // So we make sure direct write to file if the content is + // most likely less than 500 lines + if (linesALen < 500) { + return writeFile(filePath, fastStringArrayJoin(linesA, '\n') + '\n'); + } + + const writeStream = fs.createWriteStream(filePath); + for (let i = 0; i < linesALen; i++) { + const p = asyncWriteToStream(writeStream, linesA[i] + '\n'); + // eslint-disable-next-line no-await-in-loop -- stream high water mark + if (p) await p; + } + + await asyncWriteToStream(writeStream, '\n'); + + writeStream.end(); + }); +} diff --git a/Build/lib/rules/domainset.ts b/Build/lib/rules/domainset.ts new file mode 100644 index 00000000..4be35f22 --- /dev/null +++ b/Build/lib/rules/domainset.ts @@ -0,0 +1,95 @@ +import { invariant } from 'foxact/invariant'; +import createKeywordFilter from '../aho-corasick'; +import { buildParseDomainMap, sortDomains } from '../stable-sort-domain'; +import { RuleOutput } from './base'; +import type { SingboxSourceFormat } from '../singbox'; +import { nullthrow } from 'foxact/nullthrow'; + +export class DomainsetOutput extends RuleOutput { + protected type = 'domainset' as const; + + private $sorted: string[] | null = null; + + get sorted() { + if (!this.$sorted) { + const kwfilter = createKeywordFilter(this.domainKeywords); + + const results: string[] = []; + + const dumped = this.domainTrie.dump(); + + for (let i = 0, len = dumped.length; i < len; i++) { + const domain = dumped[i]; + if (!kwfilter(domain)) { + results.push(domain); + } + } + + const sorted = sortDomains(results, this.apexDomainMap, this.subDomainMap); + sorted.push('this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'); + + this.$sorted = sorted; + } + return this.$sorted; + } + + calcDomainMap() { + if (!this.apexDomainMap || !this.subDomainMap) { + const { domainMap, subdomainMap } = buildParseDomainMap(this.sorted); + this.apexDomainMap = domainMap; + this.subDomainMap = subdomainMap; + } + } + + surge(): string[] { + return this.sorted; + } + + clash(): string[] { + return this.sorted.map(i => (i[0] === '.' ? `+${i}` : i)); + } + + singbox(): string[] { + const domains: string[] = []; + const domainSuffixes: string[] = []; + + for (let i = 0, len = this.sorted.length; i < len; i++) { + const domain = this.sorted[i]; + if (domain[0] === '.') { + domainSuffixes.push(domain.slice(1)); + } else { + domains.push(domain); + } + } + + return RuleOutput.jsonToLines({ + version: 2, + rules: [{ + domain: domains, + domain_suffix: domainSuffixes + }] + } satisfies SingboxSourceFormat); + } + + getStatMap() { + invariant(this.sorted, 'Non dumped yet'); + invariant(this.apexDomainMap, 'Missing apex domain map'); + + return Array.from( + ( + nullthrow(this.sorted, 'Non dumped yet').reduce>((acc, cur) => { + const suffix = this.apexDomainMap!.get(cur); + if (suffix) { + acc.set(suffix, (acc.get(suffix) ?? 0) + 1); + } + return acc; + }, new Map()) + ).entries() + ) + .filter(a => a[1] > 9) + .sort( + (a, b) => (b[1] - a[1]) || a[0].localeCompare(b[0]) + ) + .map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`); + } +} diff --git a/Build/lib/rules/ip.ts b/Build/lib/rules/ip.ts new file mode 100644 index 00000000..582286f2 --- /dev/null +++ b/Build/lib/rules/ip.ts @@ -0,0 +1,53 @@ +import type { Span } from '../../trace'; +import { appendArrayFromSet } from '../misc'; +import type { SingboxSourceFormat } from '../singbox'; +import { RuleOutput } from './base'; + +export class IPListOutput extends RuleOutput { + protected type = 'ip' as const; + + constructor(span: Span, id: string, private readonly clashUseRule = true) { + super(span, id); + } + + private $merged: string[] | null = null; + get merged() { + if (!this.$merged) { + this.$merged = appendArrayFromSet(appendArrayFromSet([], this.ipcidr), this.ipcidr6); + } + return this.$merged; + } + + private $surge: string[] | null = null; + + surge(): string[] { + if (!this.$surge) { + const results: string[] = ['DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe']; + + appendArrayFromSet(results, this.ipcidr, i => `IP-CIDR,${i}`); + appendArrayFromSet(results, this.ipcidr6, i => `IP-CIDR6,${i}`); + + this.$surge = results; + } + return this.$surge; + } + + clash(): string[] { + if (this.clashUseRule) { + return this.surge(); + } + + return this.merged; + } + + singbox(): string[] { + const singbox: SingboxSourceFormat = { + version: 2, + rules: [{ + domain: ['this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'], + ip_cidr: this.merged + }] + }; + return RuleOutput.jsonToLines(singbox); + } +} diff --git a/Build/lib/rules/ruleset.ts b/Build/lib/rules/ruleset.ts new file mode 100644 index 00000000..7c4e6e75 --- /dev/null +++ b/Build/lib/rules/ruleset.ts @@ -0,0 +1,112 @@ +import type { Span } from '../../trace'; +import createKeywordFilter from '../aho-corasick'; +import { appendArrayInPlace } from '../append-array-in-place'; +import { appendArrayFromSet } from '../misc'; +import type { SingboxSourceFormat } from '../singbox'; +import { sortDomains } from '../stable-sort-domain'; +import { RuleOutput } from './base'; + +export class RulesetOutput extends RuleOutput { + constructor(span: Span, id: string, protected type: 'non_ip' | 'ip') { + super(span, id); + } + + private $computed: [domain: string[], domainSuffix: string[], sortedDomainRules: string[]] | null = null; + private get computed() { + if (!this.$computed) { + const kwfilter = createKeywordFilter(this.domainKeywords); + + const domains: string[] = []; + const domainSuffixes: string[] = []; + const sortedDomainRules: string[] = []; + + for (const domain of sortDomains(this.domainTrie.dump(), this.apexDomainMap, this.subDomainMap)) { + if (kwfilter(domain)) { + continue; + } + if (domain[0] === '.') { + domainSuffixes.push(domain.slice(1)); + sortedDomainRules.push(`DOMAIN-SUFFIX,${domain.slice(1)}`); + } else { + domains.push(domain); + sortedDomainRules.push(`DOMAIN,${domain}`); + } + } + + this.$computed = [domains, domainSuffixes, sortedDomainRules]; + } + return this.$computed; + } + + surge(): string[] { + const results: string[] = ['DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe']; + appendArrayInPlace(results, this.computed[2]); + + appendArrayFromSet(results, this.domainKeywords, i => `DOMAIN-KEYWORD,${i}`); + appendArrayFromSet(results, this.domainWildcard, i => `DOMAIN-WILDCARD,${i}`); + + appendArrayFromSet(results, this.userAgent, i => `USER-AGENT,${i}`); + + appendArrayFromSet(results, this.processName, i => `PROCESS-NAME,${i}`); + appendArrayFromSet(results, this.processPath, i => `PROCESS-NAME,${i}`); + + appendArrayInPlace(results, this.otherRules); + + appendArrayFromSet(results, this.urlRegex, i => `URL-REGEX,${i}`); + + appendArrayFromSet(results, this.ipcidrNoResolve, i => `IP-CIDR,${i},no-resolve`); + appendArrayFromSet(results, this.ipcidr6NoResolve, i => `IP-CIDR6,${i},no-resolve`); + appendArrayFromSet(results, this.ipasnNoResolve, i => `IP-ASN,${i},no-resolve`); + appendArrayFromSet(results, this.groipNoResolve, i => `GEOIP,${i},no-resolve`); + + appendArrayFromSet(results, this.ipcidr, i => `IP-CIDR,${i}`); + appendArrayFromSet(results, this.ipcidr6, i => `IP-CIDR6,${i}`); + appendArrayFromSet(results, this.ipasn, i => `IP-ASN,${i}`); + appendArrayFromSet(results, this.geoip, i => `GEOIP,${i}`); + + return results; + } + + clash(): string[] { + const results: string[] = ['DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe']; + + appendArrayInPlace(results, this.computed[2]); + + appendArrayFromSet(results, this.domainKeywords, i => `DOMAIN-KEYWORD,${i}`); + appendArrayFromSet(results, this.domainWildcard, i => `DOMAIN-REGEX,${RuleOutput.domainWildCardToRegex(i)}`); + + appendArrayFromSet(results, this.processName, i => `PROCESS-NAME,${i}`); + appendArrayFromSet(results, this.processPath, i => `PROCESS-PATH,${i}`); + + // appendArrayInPlace(results, this.otherRules); + + appendArrayFromSet(results, this.ipcidrNoResolve, i => `IP-CIDR,${i},no-resolve`); + appendArrayFromSet(results, this.ipcidr6NoResolve, i => `IP-CIDR6,${i},no-resolve`); + appendArrayFromSet(results, this.ipasnNoResolve, i => `IP-ASN,${i},no-resolve`); + appendArrayFromSet(results, this.groipNoResolve, i => `GEOIP,${i},no-resolve`); + + appendArrayFromSet(results, this.ipcidr, i => `IP-CIDR,${i}`); + appendArrayFromSet(results, this.ipcidr6, i => `IP-CIDR6,${i}`); + appendArrayFromSet(results, this.ipasn, i => `IP-ASN,${i}`); + appendArrayFromSet(results, this.geoip, i => `GEOIP,${i}`); + + return results; + } + + singbox(): string[] { + const singbox: SingboxSourceFormat = { + version: 2, + rules: [{ + domain: ['this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'].concat(this.computed[0]), + domain_suffix: this.computed[1], + domain_keyword: Array.from(this.domainKeywords), + domain_regex: Array.from(this.domainWildcard).map(RuleOutput.domainWildCardToRegex), + ip_cidr: appendArrayFromSet([], [this.ipcidr, this.ipcidrNoResolve, this.ipcidr6, this.ipcidr6NoResolve]), + process_name: Array.from(this.processName), + process_path: Array.from(this.processPath) + }] + }; + + return RuleOutput.jsonToLines(singbox); + } +} diff --git a/Build/lib/singbox.ts b/Build/lib/singbox.ts index ae1a2615..d2b07427 100644 --- a/Build/lib/singbox.ts +++ b/Build/lib/singbox.ts @@ -1,4 +1,3 @@ -import picocolors from 'picocolors'; import { domainWildCardToRegex } from './misc'; import { isProbablyIpv4, isProbablyIpv6 } from './is-fast-ip'; @@ -10,7 +9,7 @@ const toNumberTuple = (key: T, value: string): [T, number] | n }; // https://sing-box.sagernet.org/configuration/rule-set/source-format/ -const PROCESSOR: Record [key: keyof SingboxHeadlessRule, value: Required[keyof SingboxHeadlessRule][number]] | null) | typeof unsupported> = { +export const PROCESSOR: Record [key: keyof SingboxHeadlessRule, value: Required[keyof SingboxHeadlessRule][number]] | null) | typeof unsupported> = { DOMAIN: (_1, _2, value) => ['domain', value], 'DOMAIN-SUFFIX': (_1, _2, value) => ['domain_suffix', value], 'DOMAIN-KEYWORD': (_1, _2, value) => ['domain_keyword', value], @@ -57,70 +56,7 @@ interface SingboxHeadlessRule { process_path?: string[] } -interface SingboxSourceFormat { +export interface SingboxSourceFormat { version: 2 | number & {}, rules: SingboxHeadlessRule[] } - -export const surgeRulesetToSingbox = (rules: string[] | Set): SingboxSourceFormat => { - const rule: SingboxHeadlessRule = Array.from(rules).reduce((acc, cur) => { - let buf = ''; - let type = ''; - let i = 0; - for (const len = cur.length; i < len; i++) { - if (cur[i] === ',') { - type = buf; - break; - } - buf += cur[i]; - } - if (type === '') { - return acc; - } - const value = cur.slice(i + 1); - if (type in PROCESSOR) { - const proc = PROCESSOR[type]; - if (proc !== unsupported) { - const r = proc(cur, type, value); - if (r) { - const [k, v] = r; - acc[k] ||= []; - (acc[k] as any).push(v); - } - } - } else { - console.log(picocolors.yellow(`[sing-box] unknown rule type: ${type}`), cur); - } - return acc; - }, {}); - - return { - version: 2, - rules: [rule] - }; -}; - -export const surgeDomainsetToSingbox = (domainset: string[]) => { - const rule = domainset.reduce((acc, cur) => { - if (cur[0] === '.') { - acc.domain_suffix.push(cur.slice(1)); - } else { - acc.domain.push(cur); - } - return acc; - }, { domain: [] as string[], domain_suffix: [] as string[] } satisfies SingboxHeadlessRule); - - return { - version: 2, - rules: [rule] - }; -}; - -export const ipCidrListToSingbox = (ipCidrList: string[]): SingboxSourceFormat => { - return { - version: 2, - rules: [{ - ip_cidr: ipCidrList - }] - }; -};