From 9eec31be10170c224997104088f1c9cdfdd44fd7 Mon Sep 17 00:00:00 2001 From: SukkaW Date: Sat, 21 Sep 2024 04:22:08 +0800 Subject: [PATCH] Refactor: merge new output --- Build/build-apple-cdn.ts | 2 +- Build/build-cdn-download-conf.ts | 2 +- Build/build-chn-cidr.ts | 2 +- Build/build-cloudmounter-rules.ts | 2 +- Build/build-common.ts | 2 +- ...c-direct-lan-ruleset-dns-mapping-module.ts | 2 +- Build/build-microsoft-cdn.ts | 2 +- Build/build-reject-domainset.ts | 2 +- Build/build-reject-ip-list.ts | 2 +- Build/build-speedtest-domainset.ts | 2 +- Build/build-stream-service.ts | 2 +- Build/build-telegram-cidr.ts | 2 +- Build/lib/create-file-new.ts | 409 ------------ Build/lib/create-file.ts | 616 +++++++++++++----- Source/domainset/cdn.conf | 10 + 15 files changed, 461 insertions(+), 598 deletions(-) delete mode 100644 Build/lib/create-file-new.ts diff --git a/Build/build-apple-cdn.ts b/Build/build-apple-cdn.ts index 3e33fa3e..856b13dc 100644 --- a/Build/build-apple-cdn.ts +++ b/Build/build-apple-cdn.ts @@ -3,7 +3,7 @@ import { task } from './trace'; import { SHARED_DESCRIPTION } from './lib/constants'; import { createMemoizedPromise } from './lib/memo-promise'; import { TTL, deserializeArray, fsFetchCache, serializeArray, createCacheKey } from './lib/cache-filesystem'; -import { DomainsetOutput } from './lib/create-file-new'; +import { DomainsetOutput } from './lib/create-file'; const cacheKey = createCacheKey(__filename); diff --git a/Build/build-cdn-download-conf.ts b/Build/build-cdn-download-conf.ts index 45a59be7..4ff864ed 100644 --- a/Build/build-cdn-download-conf.ts +++ b/Build/build-cdn-download-conf.ts @@ -7,7 +7,7 @@ import { getPublicSuffixListTextPromise } from './lib/download-publicsuffixlist' import { appendArrayInPlace } from './lib/append-array-in-place'; import { SOURCE_DIR } from './constants/dir'; import { processLine } from './lib/process-line'; -import { DomainsetOutput } from './lib/create-file-new'; +import { DomainsetOutput } from './lib/create-file'; const getS3OSSDomainsPromise = (async (): Promise => { const trie = createTrie( diff --git a/Build/build-chn-cidr.ts b/Build/build-chn-cidr.ts index 1e39cd0b..a49761df 100644 --- a/Build/build-chn-cidr.ts +++ b/Build/build-chn-cidr.ts @@ -6,7 +6,7 @@ import { exclude } from 'fast-cidr-tools'; import { createMemoizedPromise } from './lib/memo-promise'; import { CN_CIDR_NOT_INCLUDED_IN_CHNROUTE, NON_CN_CIDR_INCLUDED_IN_CHNROUTE } from './constants/cidr'; import { appendArrayInPlace } from './lib/append-array-in-place'; -import { IPListOutput } from './lib/create-file-new'; +import { IPListOutput } from './lib/create-file'; export const getChnCidrPromise = createMemoizedPromise(async () => { const cidr4 = await processLineFromReadline(await fetchRemoteTextByLine('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')); diff --git a/Build/build-cloudmounter-rules.ts b/Build/build-cloudmounter-rules.ts index a79be03e..c6dc047a 100644 --- a/Build/build-cloudmounter-rules.ts +++ b/Build/build-cloudmounter-rules.ts @@ -1,7 +1,7 @@ import { DOMAINS, PROCESS_NAMES } from '../Source/non_ip/cloudmounter'; import { SHARED_DESCRIPTION } from './lib/constants'; import { task } from './trace'; -import { RulesetOutput } from './lib/create-file-new'; +import { RulesetOutput } from './lib/create-file'; export const buildCloudMounterRules = task(require.main === module, __filename)(async (span) => { // AND,((SRC-IP,192.168.1.110), (DOMAIN, example.com)) diff --git a/Build/build-common.ts b/Build/build-common.ts index 4c97225b..e267a5f7 100644 --- a/Build/build-common.ts +++ b/Build/build-common.ts @@ -9,7 +9,7 @@ import { SHARED_DESCRIPTION } from './lib/constants'; import { fdir as Fdir } from 'fdir'; import { appendArrayInPlace } from './lib/append-array-in-place'; import { SOURCE_DIR } from './constants/dir'; -import { DomainsetOutput, RulesetOutput } from './lib/create-file-new'; +import { DomainsetOutput, RulesetOutput } from './lib/create-file'; const MAGIC_COMMAND_SKIP = '# $ custom_build_script'; const MAGIC_COMMAND_TITLE = '# $ meta_title '; diff --git a/Build/build-domestic-direct-lan-ruleset-dns-mapping-module.ts b/Build/build-domestic-direct-lan-ruleset-dns-mapping-module.ts index 85e8e1fe..b5276541 100644 --- a/Build/build-domestic-direct-lan-ruleset-dns-mapping-module.ts +++ b/Build/build-domestic-direct-lan-ruleset-dns-mapping-module.ts @@ -11,7 +11,7 @@ import * as yaml from 'yaml'; import { appendArrayInPlace } from './lib/append-array-in-place'; import { writeFile } from './lib/misc'; import { OUTPUT_INTERNAL_DIR, OUTPUT_MODULES_DIR, SOURCE_DIR } from './constants/dir'; -import { RulesetOutput } from './lib/create-file-new'; +import { RulesetOutput } from './lib/create-file'; export const getDomesticAndDirectDomainsRulesetPromise = createMemoizedPromise(async () => { const domestics = await readFileIntoProcessedArray(path.join(SOURCE_DIR, 'non_ip/domestic.conf')); diff --git a/Build/build-microsoft-cdn.ts b/Build/build-microsoft-cdn.ts index d5a0e4d4..e8731d17 100644 --- a/Build/build-microsoft-cdn.ts +++ b/Build/build-microsoft-cdn.ts @@ -4,7 +4,7 @@ import { createTrie } from './lib/trie'; import { SHARED_DESCRIPTION } from './lib/constants'; import { createMemoizedPromise } from './lib/memo-promise'; import { extractDomainsFromFelixDnsmasq } from './lib/parse-dnsmasq'; -import { RulesetOutput } from './lib/create-file-new'; +import { RulesetOutput } from './lib/create-file'; const PROBE_DOMAINS = ['.microsoft.com', '.windows.net', '.windows.com', '.windowsupdate.com', '.windowssearch.com', '.office.net']; diff --git a/Build/build-reject-domainset.ts b/Build/build-reject-domainset.ts index 40934bfc..d8814d3b 100644 --- a/Build/build-reject-domainset.ts +++ b/Build/build-reject-domainset.ts @@ -17,7 +17,7 @@ import { getPhishingDomains } from './lib/get-phishing-domains'; import { setAddFromArray } from './lib/set-add-from-array'; import { appendArrayInPlace } from './lib/append-array-in-place'; import { OUTPUT_INTERNAL_DIR, SOURCE_DIR } from './constants/dir'; -import { DomainsetOutput } from './lib/create-file-new'; +import { DomainsetOutput } from './lib/create-file'; const getRejectSukkaConfPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/reject_sukka.conf')); diff --git a/Build/build-reject-ip-list.ts b/Build/build-reject-ip-list.ts index 2daa080e..e05ef242 100644 --- a/Build/build-reject-ip-list.ts +++ b/Build/build-reject-ip-list.ts @@ -7,7 +7,7 @@ import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip'; import { TTL, fsFetchCache, createCacheKey } from './lib/cache-filesystem'; import { fetchAssets } from './lib/fetch-assets'; import { processLine } from './lib/process-line'; -import { RulesetOutput } from './lib/create-file-new'; +import { RulesetOutput } from './lib/create-file'; import { SOURCE_DIR } from './constants/dir'; const cacheKey = createCacheKey(__filename); diff --git a/Build/build-speedtest-domainset.ts b/Build/build-speedtest-domainset.ts index fd6013c4..9729fe6a 100644 --- a/Build/build-speedtest-domainset.ts +++ b/Build/build-speedtest-domainset.ts @@ -8,7 +8,7 @@ import { SHARED_DESCRIPTION } from './lib/constants'; import { readFileIntoProcessedArray } from './lib/fetch-text-by-line'; import { TTL, deserializeArray, fsFetchCache, serializeArray, createCacheKey } from './lib/cache-filesystem'; -import { DomainsetOutput } from './lib/create-file-new'; +import { DomainsetOutput } from './lib/create-file'; import { OUTPUT_SURGE_DIR } from './constants/dir'; const KEYWORDS = [ diff --git a/Build/build-stream-service.ts b/Build/build-stream-service.ts index 66ebd72c..6c451eed 100644 --- a/Build/build-stream-service.ts +++ b/Build/build-stream-service.ts @@ -4,7 +4,7 @@ import { task } from './trace'; import { ALL, NORTH_AMERICA, EU, HK, TW, JP, KR } from '../Source/stream'; import { SHARED_DESCRIPTION } from './lib/constants'; -import { RulesetOutput } from './lib/create-file-new'; +import { RulesetOutput } from './lib/create-file'; export const createRulesetForStreamService = (span: Span, fileId: string, title: string, streamServices: Array) => { return span.traceChildAsync(fileId, async (childSpan) => Promise.all([ diff --git a/Build/build-telegram-cidr.ts b/Build/build-telegram-cidr.ts index 164071a2..62a51608 100644 --- a/Build/build-telegram-cidr.ts +++ b/Build/build-telegram-cidr.ts @@ -6,7 +6,7 @@ import { processLine } from './lib/process-line'; import { task } from './trace'; import { SHARED_DESCRIPTION } from './lib/constants'; import { createMemoizedPromise } from './lib/memo-promise'; -import { RulesetOutput } from './lib/create-file-new'; +import { RulesetOutput } from './lib/create-file'; export const getTelegramCIDRPromise = createMemoizedPromise(async () => { const resp = await fetchWithRetry('https://core.telegram.org/resources/cidr.txt', defaultRequestInit); diff --git a/Build/lib/create-file-new.ts b/Build/lib/create-file-new.ts deleted file mode 100644 index 368bc8a8..00000000 --- a/Build/lib/create-file-new.ts +++ /dev/null @@ -1,409 +0,0 @@ -import path from 'node:path'; - -import type { Span } from '../trace'; -import { surgeDomainsetToClashDomainset, surgeRulesetToClashClassicalTextRuleset } from './clash'; -import { compareAndWriteFile, defaultSortTypeOrder, sortTypeOrder, withBannerArray } from './create-file'; -import { ipCidrListToSingbox, surgeDomainsetToSingbox, surgeRulesetToSingbox } from './singbox'; -import { buildParseDomainMap, sortDomains } from './stable-sort-domain'; -import { createTrie } from './trie'; -import { invariant } from 'foxact/invariant'; -import { OUTPUT_CLASH_DIR, OUTPUT_SINGBOX_DIR, OUTPUT_SURGE_DIR } from '../constants/dir'; -import stringify from 'json-stringify-pretty-compact'; -import { appendArrayInPlace } from './append-array-in-place'; -import { nullthrow } from 'foxact/nullthrow'; -import createKeywordFilter from './aho-corasick'; - -abstract class RuleOutput { - protected domainTrie = createTrie(null, true); - protected domainKeywords = new Set(); - protected domainWildcard = new Set(); - protected ipcidr = new Set(); - protected ipcidrNoResolve = new Set(); - protected ipcidr6 = new Set(); - protected ipcidr6NoResolve = new Set(); - protected otherRules: Array<[raw: string, orderWeight: number]> = []; - protected abstract type: 'domainset' | 'non_ip' | 'ip'; - - protected pendingPromise = Promise.resolve(); - - static jsonToLines(this: void, json: unknown): string[] { - return stringify(json).split('\n'); - } - - constructor( - protected readonly span: Span, - protected readonly id: string - ) {} - - protected title: string | null = null; - withTitle(title: string) { - this.title = title; - return this; - } - - protected description: string[] | readonly string[] | null = null; - withDescription(description: string[] | readonly string[]) { - this.description = description; - return this; - } - - protected date = new Date(); - withDate(date: Date) { - this.date = date; - return this; - } - - protected apexDomainMap: Map | null = null; - protected subDomainMap: Map | null = null; - withDomainMap(apexDomainMap: Map, subDomainMap: Map) { - this.apexDomainMap = apexDomainMap; - this.subDomainMap = subDomainMap; - return this; - } - - addDomain(domain: string) { - this.domainTrie.add(domain); - return this; - } - - addDomainSuffix(domain: string) { - this.domainTrie.add(domain[0] === '.' ? domain : '.' + domain); - return this; - } - - bulkAddDomainSuffix(domains: string[]) { - for (let i = 0, len = domains.length; i < len; i++) { - this.addDomainSuffix(domains[i]); - } - return this; - } - - addDomainKeyword(keyword: string) { - this.domainKeywords.add(keyword); - return this; - } - - addDomainWildcard(wildcard: string) { - this.domainWildcard.add(wildcard); - return this; - } - - private async addFromDomainsetPromise(source: AsyncIterable | Iterable | string[]) { - for await (const line of source) { - if (line[0] === '.') { - this.addDomainSuffix(line); - } else { - this.addDomain(line); - } - } - } - - addFromDomainset(source: AsyncIterable | Iterable | string[]) { - this.pendingPromise = this.pendingPromise.then(() => this.addFromDomainsetPromise(source)); - return this; - } - - private async addFromRulesetPromise(source: AsyncIterable | Iterable) { - for await (const line of source) { - const splitted = line.split(','); - const type = splitted[0]; - const value = splitted[1]; - const arg = splitted[2]; - - switch (type) { - case 'DOMAIN': - this.addDomain(value); - break; - case 'DOMAIN-SUFFIX': - this.addDomainSuffix(value); - break; - case 'DOMAIN-KEYWORD': - this.addDomainKeyword(value); - break; - case 'DOMAIN-WILDCARD': - this.addDomainWildcard(value); - break; - case 'IP-CIDR': - (arg === 'no-resolve' ? this.ipcidrNoResolve : this.ipcidr).add(value); - break; - case 'IP-CIDR6': - (arg === 'no-resolve' ? this.ipcidr6NoResolve : this.ipcidr6).add(value); - break; - default: - this.otherRules.push([line, type in sortTypeOrder ? sortTypeOrder[type] : sortTypeOrder[defaultSortTypeOrder]]); - break; - } - } - } - - addFromRuleset(source: AsyncIterable | Iterable) { - this.pendingPromise = this.pendingPromise.then(() => this.addFromRulesetPromise(source)); - return this; - } - - bulkAddCIDR4(cidr: string[]) { - for (let i = 0, len = cidr.length; i < len; i++) { - this.ipcidr.add(cidr[i]); - } - return this; - } - - bulkAddCIDR4NoResolve(cidr: string[]) { - for (let i = 0, len = cidr.length; i < len; i++) { - this.ipcidrNoResolve.add(cidr[i]); - } - return this; - } - - bulkAddCIDR6(cidr: string[]) { - for (let i = 0, len = cidr.length; i < len; i++) { - this.ipcidr6.add(cidr[i]); - } - return this; - } - - bulkAddCIDR6NoResolve(cidr: string[]) { - for (let i = 0, len = cidr.length; i < len; i++) { - this.ipcidr6NoResolve.add(cidr[i]); - } - return this; - } - - abstract write(): Promise; -} - -export class DomainsetOutput extends RuleOutput { - protected type = 'domainset' as const; - - private $dumped: string[] | null = null; - - get dumped() { - if (!this.$dumped) { - const kwfilter = createKeywordFilter(this.domainKeywords); - - const results: string[] = []; - - const dumped = this.domainTrie.dump(); - - for (let i = 0, len = dumped.length; i < len; i++) { - const domain = dumped[i]; - if (!kwfilter(domain)) { - results.push(domain); - } - } - - this.$dumped = results; - } - return this.$dumped; - } - - calcDomainMap() { - if (!this.apexDomainMap || !this.subDomainMap) { - const { domainMap, subdomainMap } = buildParseDomainMap(this.dumped); - this.apexDomainMap = domainMap; - this.subDomainMap = subdomainMap; - } - } - - async write() { - await this.pendingPromise; - - invariant(this.title, 'Missing title'); - invariant(this.description, 'Missing description'); - - const sorted = sortDomains(this.dumped, this.apexDomainMap, this.subDomainMap); - sorted.push('this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'); - - const surge = sorted; - const clash = surgeDomainsetToClashDomainset(sorted); - // TODO: Implement singbox directly using data - const singbox = RuleOutput.jsonToLines(surgeDomainsetToSingbox(sorted)); - - await Promise.all([ - compareAndWriteFile( - this.span, - withBannerArray( - this.title, - this.description, - this.date, - surge - ), - path.join(OUTPUT_SURGE_DIR, this.type, this.id + '.conf') - ), - compareAndWriteFile( - this.span, - withBannerArray( - this.title, - this.description, - this.date, - clash - ), - path.join(OUTPUT_CLASH_DIR, this.type, this.id + '.txt') - ), - compareAndWriteFile( - this.span, - singbox, - path.join(OUTPUT_SINGBOX_DIR, this.type, this.id + '.json') - ) - ]); - } - - getStatMap() { - invariant(this.dumped, 'Non dumped yet'); - invariant(this.apexDomainMap, 'Missing apex domain map'); - - return Array.from( - ( - nullthrow(this.dumped, 'Non dumped yet').reduce>((acc, cur) => { - const suffix = this.apexDomainMap!.get(cur); - if (suffix) { - acc.set(suffix, (acc.get(suffix) ?? 0) + 1); - } - return acc; - }, new Map()) - ).entries() - ) - .filter(a => a[1] > 9) - .sort( - (a, b) => (b[1] - a[1]) || a[0].localeCompare(b[0]) - ) - .map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`); - } -} - -export class IPListOutput extends RuleOutput { - protected type = 'ip' as const; - - constructor(span: Span, id: string, private readonly clashUseRule = true) { - super(span, id); - } - - async write() { - await this.pendingPromise; - - invariant(this.title, 'Missing title'); - invariant(this.description, 'Missing description'); - - const sorted4 = Array.from(this.ipcidr); - const sorted6 = Array.from(this.ipcidr6); - const merged = appendArrayInPlace(appendArrayInPlace([], sorted4), sorted6); - - const surge = sorted4.map(i => `IP-CIDR,${i}`); - appendArrayInPlace(surge, sorted6.map(i => `IP-CIDR6,${i}`)); - surge.push('DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'); - - const clash = this.clashUseRule ? surge : merged; - // TODO: Implement singbox directly using data - const singbox = RuleOutput.jsonToLines(ipCidrListToSingbox(merged)); - - await Promise.all([ - compareAndWriteFile( - this.span, - withBannerArray( - this.title, - this.description, - this.date, - surge - ), - path.join(OUTPUT_SURGE_DIR, this.type, this.id + '.conf') - ), - compareAndWriteFile( - this.span, - withBannerArray( - this.title, - this.description, - this.date, - clash - ), - path.join(OUTPUT_CLASH_DIR, this.type, this.id + '.txt') - ), - compareAndWriteFile( - this.span, - singbox, - path.join(OUTPUT_SINGBOX_DIR, this.type, this.id + '.json') - ) - ]); - } -} - -export class RulesetOutput extends RuleOutput { - constructor(span: Span, id: string, protected type: 'non_ip' | 'ip') { - super(span, id); - } - - async write() { - await this.pendingPromise; - - invariant(this.title, 'Missing title'); - invariant(this.description, 'Missing description'); - - const results: string[] = [ - 'DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe' - ]; - - const kwfilter = createKeywordFilter(this.domainKeywords); - - const sortedDomains = sortDomains(this.domainTrie.dump(), this.apexDomainMap, this.subDomainMap); - for (let i = 0, len = sortedDomains.length; i < len; i++) { - const domain = sortedDomains[i]; - if (kwfilter(domain)) { - continue; - } - if (domain[0] === '.') { - results.push(`DOMAIN-SUFFIX,${domain.slice(1)}`); - } else { - results.push(`DOMAIN,${domain}`); - } - } - - for (const keyword of this.domainKeywords) { - results.push(`DOMAIN-KEYWORD,${keyword}`); - } - for (const wildcard of this.domainWildcard) { - results.push(`DOMAIN-WILDCARD,${wildcard}`); - } - - const sortedRules = this.otherRules.sort((a, b) => a[1] - b[1]); - for (let i = 0, len = sortedRules.length; i < len; i++) { - results.push(sortedRules[i][0]); - } - - this.ipcidr.forEach(cidr => results.push(`IP-CIDR,${cidr}`)); - this.ipcidrNoResolve.forEach(cidr => results.push(`IP-CIDR,${cidr},no-resolve`)); - this.ipcidr6.forEach(cidr => results.push(`IP-CIDR6,${cidr}`)); - this.ipcidr6NoResolve.forEach(cidr => results.push(`IP-CIDR6,${cidr},no-resolve`)); - - const surge = results; - const clash = surgeRulesetToClashClassicalTextRuleset(results); - // TODO: Implement singbox directly using data - const singbox = RuleOutput.jsonToLines(surgeRulesetToSingbox(results)); - - await Promise.all([ - compareAndWriteFile( - this.span, - withBannerArray( - this.title, - this.description, - this.date, - surge - ), - path.join(OUTPUT_SURGE_DIR, this.type, this.id + '.conf') - ), - compareAndWriteFile( - this.span, - withBannerArray( - this.title, - this.description, - this.date, - clash - ), - path.join(OUTPUT_CLASH_DIR, this.type, this.id + '.txt') - ), - compareAndWriteFile( - this.span, - singbox, - path.join(OUTPUT_SINGBOX_DIR, this.type, this.id + '.json') - ) - ]); - } -} diff --git a/Build/lib/create-file.ts b/Build/lib/create-file.ts index 8be60c97..3458ebce 100644 --- a/Build/lib/create-file.ts +++ b/Build/lib/create-file.ts @@ -1,17 +1,448 @@ -// @ts-check -import { surgeDomainsetToClashDomainset, surgeRulesetToClashClassicalTextRuleset } from './clash'; -import picocolors from 'picocolors'; -import type { Span } from '../trace'; import path from 'node:path'; + +import type { Span } from '../trace'; +import { surgeDomainsetToClashDomainset, surgeRulesetToClashClassicalTextRuleset } from './clash'; +import { ipCidrListToSingbox, surgeDomainsetToSingbox, surgeRulesetToSingbox } from './singbox'; +import { buildParseDomainMap, sortDomains } from './stable-sort-domain'; +import { createTrie } from './trie'; +import { invariant } from 'foxact/invariant'; +import { OUTPUT_CLASH_DIR, OUTPUT_SINGBOX_DIR, OUTPUT_SURGE_DIR } from '../constants/dir'; +import stringify from 'json-stringify-pretty-compact'; +import { appendArrayInPlace } from './append-array-in-place'; +import { nullthrow } from 'foxact/nullthrow'; +import createKeywordFilter from './aho-corasick'; +import picocolors from 'picocolors'; import fs from 'node:fs'; import { fastStringArrayJoin, writeFile } from './misc'; import { readFileByLine } from './fetch-text-by-line'; -import stringify from 'json-stringify-pretty-compact'; -import { surgeDomainsetToSingbox, surgeRulesetToSingbox } from './singbox'; -import { createTrie } from './trie'; -import { pack, unpackFirst, unpackSecond } from './bitwise'; import { asyncWriteToStream } from './async-write-to-stream'; +const defaultSortTypeOrder = Symbol('defaultSortTypeOrder'); +const sortTypeOrder: Record = { + DOMAIN: 1, + 'DOMAIN-SUFFIX': 2, + 'DOMAIN-KEYWORD': 10, + // experimental domain wildcard support + 'DOMAIN-WILDCARD': 20, + 'DOMAIN-REGEX': 21, + 'USER-AGENT': 30, + 'PROCESS-NAME': 40, + [defaultSortTypeOrder]: 50, // default sort order for unknown type + 'URL-REGEX': 100, + AND: 300, + OR: 300, + 'IP-CIDR': 400, + 'IP-CIDR6': 400 +}; + +abstract class RuleOutput { + protected domainTrie = createTrie(null, true); + protected domainKeywords = new Set(); + protected domainWildcard = new Set(); + protected ipcidr = new Set(); + protected ipcidrNoResolve = new Set(); + protected ipcidr6 = new Set(); + protected ipcidr6NoResolve = new Set(); + protected otherRules: Array<[raw: string, orderWeight: number]> = []; + protected abstract type: 'domainset' | 'non_ip' | 'ip'; + + protected pendingPromise = Promise.resolve(); + + static jsonToLines(this: void, json: unknown): string[] { + return stringify(json).split('\n'); + } + + constructor( + protected readonly span: Span, + protected readonly id: string + ) {} + + protected title: string | null = null; + withTitle(title: string) { + this.title = title; + return this; + } + + protected description: string[] | readonly string[] | null = null; + withDescription(description: string[] | readonly string[]) { + this.description = description; + return this; + } + + protected date = new Date(); + withDate(date: Date) { + this.date = date; + return this; + } + + protected apexDomainMap: Map | null = null; + protected subDomainMap: Map | null = null; + withDomainMap(apexDomainMap: Map, subDomainMap: Map) { + this.apexDomainMap = apexDomainMap; + this.subDomainMap = subDomainMap; + return this; + } + + addDomain(domain: string) { + this.domainTrie.add(domain); + return this; + } + + addDomainSuffix(domain: string) { + this.domainTrie.add(domain[0] === '.' ? domain : '.' + domain); + return this; + } + + bulkAddDomainSuffix(domains: string[]) { + for (let i = 0, len = domains.length; i < len; i++) { + this.addDomainSuffix(domains[i]); + } + return this; + } + + addDomainKeyword(keyword: string) { + this.domainKeywords.add(keyword); + return this; + } + + addDomainWildcard(wildcard: string) { + this.domainWildcard.add(wildcard); + return this; + } + + private async addFromDomainsetPromise(source: AsyncIterable | Iterable | string[]) { + for await (const line of source) { + if (line[0] === '.') { + this.addDomainSuffix(line); + } else { + this.addDomain(line); + } + } + } + + addFromDomainset(source: AsyncIterable | Iterable | string[]) { + this.pendingPromise = this.pendingPromise.then(() => this.addFromDomainsetPromise(source)); + return this; + } + + private async addFromRulesetPromise(source: AsyncIterable | Iterable) { + for await (const line of source) { + const splitted = line.split(','); + const type = splitted[0]; + const value = splitted[1]; + const arg = splitted[2]; + + switch (type) { + case 'DOMAIN': + this.addDomain(value); + break; + case 'DOMAIN-SUFFIX': + this.addDomainSuffix(value); + break; + case 'DOMAIN-KEYWORD': + this.addDomainKeyword(value); + break; + case 'DOMAIN-WILDCARD': + this.addDomainWildcard(value); + break; + case 'IP-CIDR': + (arg === 'no-resolve' ? this.ipcidrNoResolve : this.ipcidr).add(value); + break; + case 'IP-CIDR6': + (arg === 'no-resolve' ? this.ipcidr6NoResolve : this.ipcidr6).add(value); + break; + default: + this.otherRules.push([line, type in sortTypeOrder ? sortTypeOrder[type] : sortTypeOrder[defaultSortTypeOrder]]); + break; + } + } + } + + addFromRuleset(source: AsyncIterable | Iterable) { + this.pendingPromise = this.pendingPromise.then(() => this.addFromRulesetPromise(source)); + return this; + } + + bulkAddCIDR4(cidr: string[]) { + for (let i = 0, len = cidr.length; i < len; i++) { + this.ipcidr.add(cidr[i]); + } + return this; + } + + bulkAddCIDR4NoResolve(cidr: string[]) { + for (let i = 0, len = cidr.length; i < len; i++) { + this.ipcidrNoResolve.add(cidr[i]); + } + return this; + } + + bulkAddCIDR6(cidr: string[]) { + for (let i = 0, len = cidr.length; i < len; i++) { + this.ipcidr6.add(cidr[i]); + } + return this; + } + + bulkAddCIDR6NoResolve(cidr: string[]) { + for (let i = 0, len = cidr.length; i < len; i++) { + this.ipcidr6NoResolve.add(cidr[i]); + } + return this; + } + + abstract write(): Promise; +} + +export class DomainsetOutput extends RuleOutput { + protected type = 'domainset' as const; + + private $dumped: string[] | null = null; + + get dumped() { + if (!this.$dumped) { + const kwfilter = createKeywordFilter(this.domainKeywords); + + const results: string[] = []; + + const dumped = this.domainTrie.dump(); + + for (let i = 0, len = dumped.length; i < len; i++) { + const domain = dumped[i]; + if (!kwfilter(domain)) { + results.push(domain); + } + } + + this.$dumped = results; + } + return this.$dumped; + } + + calcDomainMap() { + if (!this.apexDomainMap || !this.subDomainMap) { + const { domainMap, subdomainMap } = buildParseDomainMap(this.dumped); + this.apexDomainMap = domainMap; + this.subDomainMap = subdomainMap; + } + } + + async write() { + await this.pendingPromise; + + invariant(this.title, 'Missing title'); + invariant(this.description, 'Missing description'); + + const sorted = sortDomains(this.dumped, this.apexDomainMap, this.subDomainMap); + sorted.push('this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'); + + const surge = sorted; + const clash = surgeDomainsetToClashDomainset(sorted); + // TODO: Implement singbox directly using data + const singbox = RuleOutput.jsonToLines(surgeDomainsetToSingbox(sorted)); + + await Promise.all([ + compareAndWriteFile( + this.span, + withBannerArray( + this.title, + this.description, + this.date, + surge + ), + path.join(OUTPUT_SURGE_DIR, this.type, this.id + '.conf') + ), + compareAndWriteFile( + this.span, + withBannerArray( + this.title, + this.description, + this.date, + clash + ), + path.join(OUTPUT_CLASH_DIR, this.type, this.id + '.txt') + ), + compareAndWriteFile( + this.span, + singbox, + path.join(OUTPUT_SINGBOX_DIR, this.type, this.id + '.json') + ) + ]); + } + + getStatMap() { + invariant(this.dumped, 'Non dumped yet'); + invariant(this.apexDomainMap, 'Missing apex domain map'); + + return Array.from( + ( + nullthrow(this.dumped, 'Non dumped yet').reduce>((acc, cur) => { + const suffix = this.apexDomainMap!.get(cur); + if (suffix) { + acc.set(suffix, (acc.get(suffix) ?? 0) + 1); + } + return acc; + }, new Map()) + ).entries() + ) + .filter(a => a[1] > 9) + .sort( + (a, b) => (b[1] - a[1]) || a[0].localeCompare(b[0]) + ) + .map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`); + } +} + +export class IPListOutput extends RuleOutput { + protected type = 'ip' as const; + + constructor(span: Span, id: string, private readonly clashUseRule = true) { + super(span, id); + } + + async write() { + await this.pendingPromise; + + invariant(this.title, 'Missing title'); + invariant(this.description, 'Missing description'); + + const sorted4 = Array.from(this.ipcidr); + const sorted6 = Array.from(this.ipcidr6); + const merged = appendArrayInPlace(appendArrayInPlace([], sorted4), sorted6); + + const surge = sorted4.map(i => `IP-CIDR,${i}`); + appendArrayInPlace(surge, sorted6.map(i => `IP-CIDR6,${i}`)); + surge.push('DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'); + + const clash = this.clashUseRule ? surge : merged; + // TODO: Implement singbox directly using data + const singbox = RuleOutput.jsonToLines(ipCidrListToSingbox(merged)); + + await Promise.all([ + compareAndWriteFile( + this.span, + withBannerArray( + this.title, + this.description, + this.date, + surge + ), + path.join(OUTPUT_SURGE_DIR, this.type, this.id + '.conf') + ), + compareAndWriteFile( + this.span, + withBannerArray( + this.title, + this.description, + this.date, + clash + ), + path.join(OUTPUT_CLASH_DIR, this.type, this.id + '.txt') + ), + compareAndWriteFile( + this.span, + singbox, + path.join(OUTPUT_SINGBOX_DIR, this.type, this.id + '.json') + ) + ]); + } +} + +export class RulesetOutput extends RuleOutput { + constructor(span: Span, id: string, protected type: 'non_ip' | 'ip') { + super(span, id); + } + + async write() { + await this.pendingPromise; + + invariant(this.title, 'Missing title'); + invariant(this.description, 'Missing description'); + + const results: string[] = [ + 'DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe' + ]; + + const kwfilter = createKeywordFilter(this.domainKeywords); + + const sortedDomains = sortDomains(this.domainTrie.dump(), this.apexDomainMap, this.subDomainMap); + for (let i = 0, len = sortedDomains.length; i < len; i++) { + const domain = sortedDomains[i]; + if (kwfilter(domain)) { + continue; + } + if (domain[0] === '.') { + results.push(`DOMAIN-SUFFIX,${domain.slice(1)}`); + } else { + results.push(`DOMAIN,${domain}`); + } + } + + for (const keyword of this.domainKeywords) { + results.push(`DOMAIN-KEYWORD,${keyword}`); + } + for (const wildcard of this.domainWildcard) { + results.push(`DOMAIN-WILDCARD,${wildcard}`); + } + + const sortedRules = this.otherRules.sort((a, b) => a[1] - b[1]); + for (let i = 0, len = sortedRules.length; i < len; i++) { + results.push(sortedRules[i][0]); + } + + this.ipcidr.forEach(cidr => results.push(`IP-CIDR,${cidr}`)); + this.ipcidrNoResolve.forEach(cidr => results.push(`IP-CIDR,${cidr},no-resolve`)); + this.ipcidr6.forEach(cidr => results.push(`IP-CIDR6,${cidr}`)); + this.ipcidr6NoResolve.forEach(cidr => results.push(`IP-CIDR6,${cidr},no-resolve`)); + + const surge = results; + const clash = surgeRulesetToClashClassicalTextRuleset(results); + // TODO: Implement singbox directly using data + const singbox = RuleOutput.jsonToLines(surgeRulesetToSingbox(results)); + + await Promise.all([ + compareAndWriteFile( + this.span, + withBannerArray( + this.title, + this.description, + this.date, + surge + ), + path.join(OUTPUT_SURGE_DIR, this.type, this.id + '.conf') + ), + compareAndWriteFile( + this.span, + withBannerArray( + this.title, + this.description, + this.date, + clash + ), + path.join(OUTPUT_CLASH_DIR, this.type, this.id + '.txt') + ), + compareAndWriteFile( + this.span, + singbox, + path.join(OUTPUT_SINGBOX_DIR, this.type, this.id + '.json') + ) + ]); + } +} + +function withBannerArray(title: string, description: string[] | readonly string[], date: Date, content: string[]) { + return [ + '#########################################', + `# ${title}`, + `# Last Updated: ${date.toISOString()}`, + `# Size: ${content.length}`, + ...description.map(line => (line ? `# ${line}` : '#')), + '#########################################', + ...content, + '################## EOF ##################' + ]; +}; + export const fileEqual = async (linesA: string[], source: AsyncIterable): Promise => { if (linesA.length === 0) { return false; @@ -94,172 +525,3 @@ export async function compareAndWriteFile(span: Span, linesA: string[], filePath writeStream.end(); }); } - -export const withBannerArray = (title: string, description: string[] | readonly string[], date: Date, content: string[]) => { - return [ - '#########################################', - `# ${title}`, - `# Last Updated: ${date.toISOString()}`, - `# Size: ${content.length}`, - ...description.map(line => (line ? `# ${line}` : '#')), - '#########################################', - ...content, - '################## EOF ##################' - ]; -}; - -export const defaultSortTypeOrder = Symbol('defaultSortTypeOrder'); -export const sortTypeOrder: Record = { - DOMAIN: 1, - 'DOMAIN-SUFFIX': 2, - 'DOMAIN-KEYWORD': 10, - // experimental domain wildcard support - 'DOMAIN-WILDCARD': 20, - 'DOMAIN-REGEX': 21, - 'USER-AGENT': 30, - 'PROCESS-NAME': 40, - [defaultSortTypeOrder]: 50, // default sort order for unknown type - 'URL-REGEX': 100, - AND: 300, - OR: 300, - 'IP-CIDR': 400, - 'IP-CIDR6': 400 -}; - -const flagDomain = 1 << 2; -const flagDomainSuffix = 1 << 3; - -// dedupe and sort based on rule type -const processRuleSet = (ruleSet: string[]) => { - const trie = createTrie(null, true); - - /** Packed Array<[valueIndex: number, weight: number]> */ - const sortMap: number[] = []; - for (let i = 0, len = ruleSet.length; i < len; i++) { - const line = ruleSet[i]; - const [type, value] = line.split(','); - - let extraWeight = 0; - - switch (type) { - case 'DOMAIN': - trie.add(value, pack(i, flagDomain)); - break; - case 'DOMAIN-SUFFIX': - trie.add('.' + value, pack(i, flagDomainSuffix)); - break; - case 'URL-REGEX': - if (value.includes('.+') || value.includes('.*')) { - extraWeight += 10; - } - if (value.includes('|')) { - extraWeight += 1; - } - sortMap.push(pack(i, sortTypeOrder[type] + extraWeight)); - break; - case null: - sortMap.push(pack(i, 10)); - break; - default: - if (type in sortTypeOrder) { - sortMap.push(pack(i, sortTypeOrder[type])); - } else { - sortMap.push(pack(i, sortTypeOrder[defaultSortTypeOrder])); - } - } - } - - const dumped = trie.dumpMeta(); - - for (let i = 0, len = dumped.length; i < len; i++) { - const originalIndex = unpackFirst(dumped[i]); - const flag = unpackSecond(dumped[i]); - - const type = flag === flagDomain ? 'DOMAIN' : 'DOMAIN-SUFFIX'; - - sortMap.push(pack(originalIndex, sortTypeOrder[type])); - } - - return sortMap - .sort((a, b) => unpackSecond(a) - unpackSecond(b)) - .map(c => ruleSet[unpackFirst(c)]); -}; - -const MARK = 'this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'; - -export const createRuleset = ( - parentSpan: Span, - title: string, description: string[] | readonly string[], date: Date, content: string[], - type: 'ruleset' | 'domainset', - [surgePath, clashPath, singBoxPath, _clashMrsPath]: readonly [ - surgePath: string, - clashPath: string, - singBoxPath: string, - _clashMrsPath?: string - ] -) => parentSpan.traceChildAsync( - `create ruleset: ${path.basename(surgePath, path.extname(surgePath))}`, - async (childSpan) => { - const surgeContent = childSpan.traceChildSync('process surge ruleset', () => { - let _surgeContent; - switch (type) { - case 'domainset': - _surgeContent = [MARK, ...content]; - break; - case 'ruleset': - _surgeContent = [`DOMAIN,${MARK}`, ...processRuleSet(content)]; - break; - default: - throw new TypeError(`Unknown type: ${type}`); - } - - return withBannerArray(title, description, date, _surgeContent); - }); - - const clashContent = childSpan.traceChildSync('convert incoming ruleset to clash', () => { - let _clashContent; - switch (type) { - case 'domainset': - _clashContent = [MARK, ...surgeDomainsetToClashDomainset(content)]; - break; - case 'ruleset': - _clashContent = [`DOMAIN,${MARK}`, ...surgeRulesetToClashClassicalTextRuleset(processRuleSet(content))]; - break; - default: - throw new TypeError(`Unknown type: ${type}`); - } - return withBannerArray(title, description, date, _clashContent); - }); - const singboxContent = childSpan.traceChildSync('convert incoming ruleset to singbox', () => { - let _singBoxContent; - switch (type) { - case 'domainset': - _singBoxContent = surgeDomainsetToSingbox([MARK, ...processRuleSet(content)]); - break; - case 'ruleset': - _singBoxContent = surgeRulesetToSingbox([`DOMAIN,${MARK}`, ...processRuleSet(content)]); - break; - default: - throw new TypeError(`Unknown type: ${type}`); - } - return stringify(_singBoxContent).split('\n'); - }); - - await Promise.all([ - compareAndWriteFile(childSpan, surgeContent, surgePath), - compareAndWriteFile(childSpan, clashContent, clashPath), - compareAndWriteFile(childSpan, singboxContent, singBoxPath) - ]); - - // if (clashMrsPath) { - // if (type === 'domainset') { - // await childSpan.traceChildAsync('clash meta mrs domain ' + clashMrsPath, async () => { - // await fs.promises.mkdir(path.dirname(clashMrsPath), { recursive: true }); - // await convertClashMetaMrs( - // 'domain', 'text', clashPath, clashMrsPath - // ); - // }); - // } - // } - } -); diff --git a/Source/domainset/cdn.conf b/Source/domainset/cdn.conf index 4fe4c2ea..3a533b9c 100644 --- a/Source/domainset/cdn.conf +++ b/Source/domainset/cdn.conf @@ -832,6 +832,7 @@ telemetry.nextjs.org telemetry.vercel.com stats.setapp.com .app-analytics-services.com +.telemetry.services.yofi.ai # influxdata .cloud.influxdata.com .cloud1.influxdata.com @@ -1001,6 +1002,7 @@ ak.sail-horizon.com micro.rubiconproject.com static.iris.informa.com .localizecdn.com +js.verygoodvault.com loader.mantis-intelligence.com mantisloader.mantis-awx.com @@ -1081,6 +1083,8 @@ images.prismic.io .website-files.com .a.storyblok.com images.contentful.com +.payloadcms.app +cms.payloadcms.com # >> Weebly cdn2.editmysite.com @@ -2881,6 +2885,10 @@ rawmarkdown.cloudflareaccess.org static.global.visioncritical.com assets.visioncritical.net s1.divhunt.com +dh-site.b-cdn.net +divhunt-site.b-cdn.net +divhunt.b-cdn.net +global.divhunt.com .smushcdn.com .wpmucdn.com cdn.permutive.com @@ -2921,3 +2929,5 @@ eu-images.contentstack.com images.contentstack.io ui.contentstack.com .static.chaosgroup.com +static.moonpay.com +static.launchdarkly.com