Refactor: add more parsing

This commit is contained in:
SukkaW 2024-09-21 16:24:13 +08:00
parent 6425b6096e
commit 1783cccf7f
5 changed files with 97 additions and 39 deletions

View File

@ -4,13 +4,11 @@ import os from 'node:os';
import path from 'node:path'; import path from 'node:path';
import { mkdirSync } from 'node:fs'; import { mkdirSync } from 'node:fs';
import picocolors from 'picocolors'; import picocolors from 'picocolors';
import { fastStringArrayJoin } from './misc'; import { fastStringArrayJoin, identity } from './misc';
import { performance } from 'node:perf_hooks'; import { performance } from 'node:perf_hooks';
import fs from 'node:fs'; import fs from 'node:fs';
import { stringHash } from './string-hash'; import { stringHash } from './string-hash';
const identity = (x: any) => x;
const enum CacheStatus { const enum CacheStatus {
Hit = 'hit', Hit = 'hit',
Stale = 'stale', Stale = 'stale',
@ -186,7 +184,7 @@ export class Cache<S = string> {
if (cached == null) { if (cached == null) {
console.log(picocolors.yellow('[cache] miss'), picocolors.gray(key), picocolors.gray(`ttl: ${TTL.humanReadable(ttl)}`)); console.log(picocolors.yellow('[cache] miss'), picocolors.gray(key), picocolors.gray(`ttl: ${TTL.humanReadable(ttl)}`));
const serializer = 'serializer' in opt ? opt.serializer : identity; const serializer = 'serializer' in opt ? opt.serializer : identity as any;
const promise = fn(); const promise = fn();
@ -202,7 +200,7 @@ export class Cache<S = string> {
this.updateTtl(key, ttl); this.updateTtl(key, ttl);
} }
const deserializer = 'deserializer' in opt ? opt.deserializer : identity; const deserializer = 'deserializer' in opt ? opt.deserializer : identity as any;
return deserializer(cached); return deserializer(cached);
} }

View File

@ -1,8 +1,7 @@
import picocolors from 'picocolors'; import picocolors from 'picocolors';
import { domainWildCardToRegex } from './misc'; import { domainWildCardToRegex, identity } from './misc';
import { isProbablyIpv4, isProbablyIpv6 } from './is-fast-ip'; import { isProbablyIpv4, isProbablyIpv6 } from './is-fast-ip';
const identity = <T>(x: T): T => x;
const unsupported = Symbol('unsupported'); const unsupported = Symbol('unsupported');
// https://dreamacro.github.io/clash/configuration/rules.html // https://dreamacro.github.io/clash/configuration/rules.html

View File

@ -13,7 +13,7 @@ import { nullthrow } from 'foxact/nullthrow';
import createKeywordFilter from './aho-corasick'; import createKeywordFilter from './aho-corasick';
import picocolors from 'picocolors'; import picocolors from 'picocolors';
import fs from 'node:fs'; import fs from 'node:fs';
import { fastStringArrayJoin, writeFile } from './misc'; import { appendArrayFromSet, fastStringArrayJoin, writeFile } from './misc';
import { readFileByLine } from './fetch-text-by-line'; import { readFileByLine } from './fetch-text-by-line';
import { asyncWriteToStream } from './async-write-to-stream'; import { asyncWriteToStream } from './async-write-to-stream';
@ -40,26 +40,49 @@ abstract class RuleOutput {
protected domainTrie = createTrie<unknown>(null, true); protected domainTrie = createTrie<unknown>(null, true);
protected domainKeywords = new Set<string>(); protected domainKeywords = new Set<string>();
protected domainWildcard = new Set<string>(); protected domainWildcard = new Set<string>();
protected userAgent = new Set<string>();
protected processName = new Set<string>();
protected processPath = new Set<string>();
protected urlRegex = new Set<string>();
protected ipcidr = new Set<string>(); protected ipcidr = new Set<string>();
protected ipcidrNoResolve = new Set<string>(); protected ipcidrNoResolve = new Set<string>();
protected ipasn = new Set<string>();
protected ipasnNoResolve = new Set<string>();
protected ipcidr6 = new Set<string>(); protected ipcidr6 = new Set<string>();
protected ipcidr6NoResolve = new Set<string>(); protected ipcidr6NoResolve = new Set<string>();
protected geoip = new Set<string>();
protected groipNoResolve = new Set<string>();
// TODO: add sourceIpcidr // TODO: add sourceIpcidr
// TODO: add sourcePort // TODO: add sourcePort
// TODO: add port // TODO: add port
// TODO: processName
// TODO: processPath
// TODO: userAgent
// TODO: urlRegex
protected otherRules: Array<[raw: string, orderWeight: number]> = []; protected otherRules: Array<[raw: string, orderWeight: number]> = [];
protected abstract type: 'domainset' | 'non_ip' | 'ip'; protected abstract type: 'domainset' | 'non_ip' | 'ip';
protected pendingPromise = Promise.resolve(); protected pendingPromise = Promise.resolve();
static jsonToLines(this: void, json: unknown): string[] { static jsonToLines = (json: unknown): string[] => stringify(json).split('\n');
return stringify(json).split('\n');
} static domainWildCardToRegex = (domain: string) => {
let result = '^';
for (let i = 0, len = domain.length; i < len; i++) {
switch (domain[i]) {
case '.':
result += String.raw`\.`;
break;
case '*':
result += '[a-zA-Z0-9-_.]*?';
break;
case '?':
result += '[a-zA-Z0-9-_.]';
break;
default:
result += domain[i];
}
}
result += '$';
return result;
};
constructor( constructor(
protected readonly span: Span, protected readonly span: Span,
@ -114,11 +137,6 @@ abstract class RuleOutput {
return this; return this;
} }
addDomainWildcard(wildcard: string) {
this.domainWildcard.add(wildcard);
return this;
}
private async addFromDomainsetPromise(source: AsyncIterable<string> | Iterable<string> | string[]) { private async addFromDomainsetPromise(source: AsyncIterable<string> | Iterable<string> | string[]) {
for await (const line of source) { for await (const line of source) {
if (line[0] === '.') { if (line[0] === '.') {
@ -152,14 +170,35 @@ abstract class RuleOutput {
this.addDomainKeyword(value); this.addDomainKeyword(value);
break; break;
case 'DOMAIN-WILDCARD': case 'DOMAIN-WILDCARD':
this.addDomainWildcard(value); this.domainWildcard.add(value);
break; break;
case 'USER-AGENT':
this.userAgent.add(value);
break;
case 'PROCESS-NAME':
if (value.includes('/') || value.includes('\\')) {
this.processPath.add(value);
} else {
this.processName.add(value);
}
break;
case 'URL-REGEX': {
const [, ...rest] = splitted;
this.urlRegex.add(rest.join(','));
break;
}
case 'IP-CIDR': case 'IP-CIDR':
(arg === 'no-resolve' ? this.ipcidrNoResolve : this.ipcidr).add(value); (arg === 'no-resolve' ? this.ipcidrNoResolve : this.ipcidr).add(value);
break; break;
case 'IP-CIDR6': case 'IP-CIDR6':
(arg === 'no-resolve' ? this.ipcidr6NoResolve : this.ipcidr6).add(value); (arg === 'no-resolve' ? this.ipcidr6NoResolve : this.ipcidr6).add(value);
break; break;
case 'IP-ASN':
(arg === 'no-resolve' ? this.ipasnNoResolve : this.ipasn).add(value);
break;
case 'GEOIP':
(arg === 'no-resolve' ? this.groipNoResolve : this.geoip).add(value);
break;
default: default:
this.otherRules.push([line, type in sortTypeOrder ? sortTypeOrder[type] : sortTypeOrder[defaultSortTypeOrder]]); this.otherRules.push([line, type in sortTypeOrder ? sortTypeOrder[type] : sortTypeOrder[defaultSortTypeOrder]]);
break; break;
@ -374,9 +413,7 @@ export class RulesetOutput extends RuleOutput {
const kwfilter = createKeywordFilter(this.domainKeywords); const kwfilter = createKeywordFilter(this.domainKeywords);
const sortedDomains = sortDomains(this.domainTrie.dump(), this.apexDomainMap, this.subDomainMap); for (const domain of sortDomains(this.domainTrie.dump(), this.apexDomainMap, this.subDomainMap)) {
for (let i = 0, len = sortedDomains.length; i < len; i++) {
const domain = sortedDomains[i];
if (kwfilter(domain)) { if (kwfilter(domain)) {
continue; continue;
} }
@ -387,22 +424,36 @@ export class RulesetOutput extends RuleOutput {
} }
} }
for (const keyword of this.domainKeywords) { appendArrayFromSet(results, this.domainKeywords, i => `DOMAIN-KEYWORD,${i}`);
results.push(`DOMAIN-KEYWORD,${keyword}`); appendArrayFromSet(results, this.domainWildcard, i => `DOMAIN-WILDCARD,${i}`);
}
for (const wildcard of this.domainWildcard) { appendArrayFromSet(results, this.userAgent, i => `USER-AGENT,${i}`);
results.push(`DOMAIN-WILDCARD,${wildcard}`);
appendArrayFromSet(results, this.processName, i => `PROCESS-NAME,${i}`);
appendArrayFromSet(results, this.processPath, i => `PROCESS-NAME,${i}`);
for (const [rule] of this.otherRules.sort((a, b) => a[1] - b[1])) {
results.push(rule);
} }
const sortedRules = this.otherRules.sort((a, b) => a[1] - b[1]); appendArrayFromSet(results, this.urlRegex, i => `URL-REGEX,${i}`);
for (let i = 0, len = sortedRules.length; i < len; i++) {
results.push(sortedRules[i][0]);
}
this.ipcidr.forEach(cidr => results.push(`IP-CIDR,${cidr}`)); appendArrayFromSet(results, this.ipcidrNoResolve, i => `IP-CIDR,${i},no-resolve`);
this.ipcidrNoResolve.forEach(cidr => results.push(`IP-CIDR,${cidr},no-resolve`)); appendArrayFromSet(results, this.ipcidr6NoResolve, i => `IP-CIDR6,${i},no-resolve`);
this.ipcidr6.forEach(cidr => results.push(`IP-CIDR6,${cidr}`)); appendArrayFromSet(results, this.ipasnNoResolve, i => `IP-ASN,${i},no-resolve`);
this.ipcidr6NoResolve.forEach(cidr => results.push(`IP-CIDR6,${cidr},no-resolve`)); appendArrayFromSet(results, this.groipNoResolve, i => `GEOIP,${i},no-resolve`);
appendArrayFromSet(results, this.ipcidr, i => `IP-CIDR,${i}`);
appendArrayFromSet(results, this.ipcidr6, i => `IP-CIDR6,${i}`);
appendArrayFromSet(results, this.ipasn, i => `IP-ASN,${i}`);
appendArrayFromSet(results, this.geoip, i => `GEOIP,${i}`);
for (const geoip of this.geoip) {
results.push(`GEOIP,${geoip}`);
}
for (const geoip of this.groipNoResolve) {
results.push(`GEOIP,${geoip},no-resolve`);
}
const surge = results; const surge = results;
const clash = surgeRulesetToClashClassicalTextRuleset(results); const clash = surgeRulesetToClashClassicalTextRuleset(results);

View File

@ -61,6 +61,17 @@ export const domainWildCardToRegex = (domain: string) => {
return result; return result;
}; };
export const identity = <T>(x: T): T => x;
export const appendArrayFromSet = <T>(dest: T[], source: Set<T>, transformer: (item: T) => T = identity) => {
const iterator = source.values();
let step: IteratorResult<T, undefined>;
while ((step = iterator.next(), !step.done)) {
dest.push(transformer(step.value));
}
};
export const output = (id: string, type: 'non_ip' | 'ip' | 'domainset') => { export const output = (id: string, type: 'non_ip' | 'ip' | 'domainset') => {
return [ return [
path.join(OUTPUT_SURGE_DIR, type, id + '.conf'), path.join(OUTPUT_SURGE_DIR, type, id + '.conf'),

View File

@ -11,13 +11,12 @@ import { deserializeArray, fsFetchCache, serializeArray, createCacheKey } from '
import type { Span } from '../trace'; import type { Span } from '../trace';
import createKeywordFilter from './aho-corasick'; import createKeywordFilter from './aho-corasick';
import { looseTldtsOpt } from '../constants/loose-tldts-opt'; import { looseTldtsOpt } from '../constants/loose-tldts-opt';
import { identity } from './misc';
const DEBUG_DOMAIN_TO_FIND: string | null = null; // example.com | null const DEBUG_DOMAIN_TO_FIND: string | null = null; // example.com | null
let foundDebugDomain = false; let foundDebugDomain = false;
const temporaryBypass = typeof DEBUG_DOMAIN_TO_FIND === 'string'; const temporaryBypass = typeof DEBUG_DOMAIN_TO_FIND === 'string';
const identity = <T>(x: T) => x;
const domainListLineCb = (l: string, set: string[], includeAllSubDomain: boolean, meta: string) => { const domainListLineCb = (l: string, set: string[], includeAllSubDomain: boolean, meta: string) => {
let line = processLine(l); let line = processLine(l);
if (!line) return; if (!line) return;