From 1783cccf7f5a4ad82fe8ff89caa13e665d8d4536 Mon Sep 17 00:00:00 2001 From: SukkaW Date: Sat, 21 Sep 2024 16:24:13 +0800 Subject: [PATCH] Refactor: add more parsing --- Build/lib/cache-filesystem.ts | 8 +-- Build/lib/clash.ts | 3 +- Build/lib/create-file.ts | 111 +++++++++++++++++++++++++--------- Build/lib/misc.ts | 11 ++++ Build/lib/parse-filter.ts | 3 +- 5 files changed, 97 insertions(+), 39 deletions(-) diff --git a/Build/lib/cache-filesystem.ts b/Build/lib/cache-filesystem.ts index c17dde31..b5c6df2a 100644 --- a/Build/lib/cache-filesystem.ts +++ b/Build/lib/cache-filesystem.ts @@ -4,13 +4,11 @@ import os from 'node:os'; import path from 'node:path'; import { mkdirSync } from 'node:fs'; import picocolors from 'picocolors'; -import { fastStringArrayJoin } from './misc'; +import { fastStringArrayJoin, identity } from './misc'; import { performance } from 'node:perf_hooks'; import fs from 'node:fs'; import { stringHash } from './string-hash'; -const identity = (x: any) => x; - const enum CacheStatus { Hit = 'hit', Stale = 'stale', @@ -186,7 +184,7 @@ export class Cache { if (cached == null) { console.log(picocolors.yellow('[cache] miss'), picocolors.gray(key), picocolors.gray(`ttl: ${TTL.humanReadable(ttl)}`)); - const serializer = 'serializer' in opt ? opt.serializer : identity; + const serializer = 'serializer' in opt ? opt.serializer : identity as any; const promise = fn(); @@ -202,7 +200,7 @@ export class Cache { this.updateTtl(key, ttl); } - const deserializer = 'deserializer' in opt ? opt.deserializer : identity; + const deserializer = 'deserializer' in opt ? opt.deserializer : identity as any; return deserializer(cached); } diff --git a/Build/lib/clash.ts b/Build/lib/clash.ts index a7fc74de..d38ccb2f 100644 --- a/Build/lib/clash.ts +++ b/Build/lib/clash.ts @@ -1,8 +1,7 @@ import picocolors from 'picocolors'; -import { domainWildCardToRegex } from './misc'; +import { domainWildCardToRegex, identity } from './misc'; import { isProbablyIpv4, isProbablyIpv6 } from './is-fast-ip'; -const identity = (x: T): T => x; const unsupported = Symbol('unsupported'); // https://dreamacro.github.io/clash/configuration/rules.html diff --git a/Build/lib/create-file.ts b/Build/lib/create-file.ts index f229b6cc..0f1fee9c 100644 --- a/Build/lib/create-file.ts +++ b/Build/lib/create-file.ts @@ -13,7 +13,7 @@ import { nullthrow } from 'foxact/nullthrow'; import createKeywordFilter from './aho-corasick'; import picocolors from 'picocolors'; import fs from 'node:fs'; -import { fastStringArrayJoin, writeFile } from './misc'; +import { appendArrayFromSet, fastStringArrayJoin, writeFile } from './misc'; import { readFileByLine } from './fetch-text-by-line'; import { asyncWriteToStream } from './async-write-to-stream'; @@ -40,26 +40,49 @@ abstract class RuleOutput { protected domainTrie = createTrie(null, true); protected domainKeywords = new Set(); protected domainWildcard = new Set(); + protected userAgent = new Set(); + protected processName = new Set(); + protected processPath = new Set(); + protected urlRegex = new Set(); protected ipcidr = new Set(); protected ipcidrNoResolve = new Set(); + protected ipasn = new Set(); + protected ipasnNoResolve = new Set(); protected ipcidr6 = new Set(); protected ipcidr6NoResolve = new Set(); + protected geoip = new Set(); + protected groipNoResolve = new Set(); // TODO: add sourceIpcidr // TODO: add sourcePort // TODO: add port - // TODO: processName - // TODO: processPath - // TODO: userAgent - // TODO: urlRegex protected otherRules: Array<[raw: string, orderWeight: number]> = []; protected abstract type: 'domainset' | 'non_ip' | 'ip'; protected pendingPromise = Promise.resolve(); - static jsonToLines(this: void, json: unknown): string[] { - return stringify(json).split('\n'); - } + static jsonToLines = (json: unknown): string[] => stringify(json).split('\n'); + + static domainWildCardToRegex = (domain: string) => { + let result = '^'; + for (let i = 0, len = domain.length; i < len; i++) { + switch (domain[i]) { + case '.': + result += String.raw`\.`; + break; + case '*': + result += '[a-zA-Z0-9-_.]*?'; + break; + case '?': + result += '[a-zA-Z0-9-_.]'; + break; + default: + result += domain[i]; + } + } + result += '$'; + return result; + }; constructor( protected readonly span: Span, @@ -114,11 +137,6 @@ abstract class RuleOutput { return this; } - addDomainWildcard(wildcard: string) { - this.domainWildcard.add(wildcard); - return this; - } - private async addFromDomainsetPromise(source: AsyncIterable | Iterable | string[]) { for await (const line of source) { if (line[0] === '.') { @@ -152,14 +170,35 @@ abstract class RuleOutput { this.addDomainKeyword(value); break; case 'DOMAIN-WILDCARD': - this.addDomainWildcard(value); + this.domainWildcard.add(value); break; + case 'USER-AGENT': + this.userAgent.add(value); + break; + case 'PROCESS-NAME': + if (value.includes('/') || value.includes('\\')) { + this.processPath.add(value); + } else { + this.processName.add(value); + } + break; + case 'URL-REGEX': { + const [, ...rest] = splitted; + this.urlRegex.add(rest.join(',')); + break; + } case 'IP-CIDR': (arg === 'no-resolve' ? this.ipcidrNoResolve : this.ipcidr).add(value); break; case 'IP-CIDR6': (arg === 'no-resolve' ? this.ipcidr6NoResolve : this.ipcidr6).add(value); break; + case 'IP-ASN': + (arg === 'no-resolve' ? this.ipasnNoResolve : this.ipasn).add(value); + break; + case 'GEOIP': + (arg === 'no-resolve' ? this.groipNoResolve : this.geoip).add(value); + break; default: this.otherRules.push([line, type in sortTypeOrder ? sortTypeOrder[type] : sortTypeOrder[defaultSortTypeOrder]]); break; @@ -374,9 +413,7 @@ export class RulesetOutput extends RuleOutput { const kwfilter = createKeywordFilter(this.domainKeywords); - const sortedDomains = sortDomains(this.domainTrie.dump(), this.apexDomainMap, this.subDomainMap); - for (let i = 0, len = sortedDomains.length; i < len; i++) { - const domain = sortedDomains[i]; + for (const domain of sortDomains(this.domainTrie.dump(), this.apexDomainMap, this.subDomainMap)) { if (kwfilter(domain)) { continue; } @@ -387,22 +424,36 @@ export class RulesetOutput extends RuleOutput { } } - for (const keyword of this.domainKeywords) { - results.push(`DOMAIN-KEYWORD,${keyword}`); - } - for (const wildcard of this.domainWildcard) { - results.push(`DOMAIN-WILDCARD,${wildcard}`); + appendArrayFromSet(results, this.domainKeywords, i => `DOMAIN-KEYWORD,${i}`); + appendArrayFromSet(results, this.domainWildcard, i => `DOMAIN-WILDCARD,${i}`); + + appendArrayFromSet(results, this.userAgent, i => `USER-AGENT,${i}`); + + appendArrayFromSet(results, this.processName, i => `PROCESS-NAME,${i}`); + appendArrayFromSet(results, this.processPath, i => `PROCESS-NAME,${i}`); + + for (const [rule] of this.otherRules.sort((a, b) => a[1] - b[1])) { + results.push(rule); } - const sortedRules = this.otherRules.sort((a, b) => a[1] - b[1]); - for (let i = 0, len = sortedRules.length; i < len; i++) { - results.push(sortedRules[i][0]); - } + appendArrayFromSet(results, this.urlRegex, i => `URL-REGEX,${i}`); - this.ipcidr.forEach(cidr => results.push(`IP-CIDR,${cidr}`)); - this.ipcidrNoResolve.forEach(cidr => results.push(`IP-CIDR,${cidr},no-resolve`)); - this.ipcidr6.forEach(cidr => results.push(`IP-CIDR6,${cidr}`)); - this.ipcidr6NoResolve.forEach(cidr => results.push(`IP-CIDR6,${cidr},no-resolve`)); + appendArrayFromSet(results, this.ipcidrNoResolve, i => `IP-CIDR,${i},no-resolve`); + appendArrayFromSet(results, this.ipcidr6NoResolve, i => `IP-CIDR6,${i},no-resolve`); + appendArrayFromSet(results, this.ipasnNoResolve, i => `IP-ASN,${i},no-resolve`); + appendArrayFromSet(results, this.groipNoResolve, i => `GEOIP,${i},no-resolve`); + + appendArrayFromSet(results, this.ipcidr, i => `IP-CIDR,${i}`); + appendArrayFromSet(results, this.ipcidr6, i => `IP-CIDR6,${i}`); + appendArrayFromSet(results, this.ipasn, i => `IP-ASN,${i}`); + appendArrayFromSet(results, this.geoip, i => `GEOIP,${i}`); + + for (const geoip of this.geoip) { + results.push(`GEOIP,${geoip}`); + } + for (const geoip of this.groipNoResolve) { + results.push(`GEOIP,${geoip},no-resolve`); + } const surge = results; const clash = surgeRulesetToClashClassicalTextRuleset(results); diff --git a/Build/lib/misc.ts b/Build/lib/misc.ts index 723d9e1d..13ceaa8c 100644 --- a/Build/lib/misc.ts +++ b/Build/lib/misc.ts @@ -61,6 +61,17 @@ export const domainWildCardToRegex = (domain: string) => { return result; }; +export const identity = (x: T): T => x; + +export const appendArrayFromSet = (dest: T[], source: Set, transformer: (item: T) => T = identity) => { + const iterator = source.values(); + let step: IteratorResult; + + while ((step = iterator.next(), !step.done)) { + dest.push(transformer(step.value)); + } +}; + export const output = (id: string, type: 'non_ip' | 'ip' | 'domainset') => { return [ path.join(OUTPUT_SURGE_DIR, type, id + '.conf'), diff --git a/Build/lib/parse-filter.ts b/Build/lib/parse-filter.ts index 61c2868b..f0f2e613 100644 --- a/Build/lib/parse-filter.ts +++ b/Build/lib/parse-filter.ts @@ -11,13 +11,12 @@ import { deserializeArray, fsFetchCache, serializeArray, createCacheKey } from ' import type { Span } from '../trace'; import createKeywordFilter from './aho-corasick'; import { looseTldtsOpt } from '../constants/loose-tldts-opt'; +import { identity } from './misc'; const DEBUG_DOMAIN_TO_FIND: string | null = null; // example.com | null let foundDebugDomain = false; const temporaryBypass = typeof DEBUG_DOMAIN_TO_FIND === 'string'; -const identity = (x: T) => x; - const domainListLineCb = (l: string, set: string[], includeAllSubDomain: boolean, meta: string) => { let line = processLine(l); if (!line) return;