Refactor: new MITM hostname sgmodule codegen

This commit is contained in:
SukkaW
2024-09-25 21:14:56 +08:00
parent d3a015ff1b
commit 5577f3b14b
7 changed files with 138 additions and 59 deletions

View File

@@ -14,6 +14,7 @@ import { DomainsetOutput, RulesetOutput } from './lib/create-file';
const MAGIC_COMMAND_SKIP = '# $ custom_build_script';
const MAGIC_COMMAND_TITLE = '# $ meta_title ';
const MAGIC_COMMAND_DESCRIPTION = '# $ meta_description ';
const MAGIC_COMMAND_SGMODULE_MITM_HOSTNAMES = '# $ sgmodule_mitm_hostnames ';
const domainsetSrcFolder = 'domainset' + path.sep;
@@ -73,6 +74,7 @@ const processFile = (span: Span, sourcePath: string) => {
let title = '';
const descriptions: string[] = [];
let sgmodulePathname: string | null = null;
try {
for await (const line of readFileByLine(sourcePath)) {
@@ -90,6 +92,11 @@ const processFile = (span: Span, sourcePath: string) => {
continue;
}
if (line.startsWith(MAGIC_COMMAND_SGMODULE_MITM_HOSTNAMES)) {
sgmodulePathname = line.slice(MAGIC_COMMAND_SGMODULE_MITM_HOSTNAMES.length).trim();
continue;
}
const l = processLine(line);
if (l) {
lines.push(l);
@@ -100,7 +107,7 @@ const processFile = (span: Span, sourcePath: string) => {
console.trace(e);
}
return [title, descriptions, lines] as const;
return [title, descriptions, lines, sgmodulePathname] as const;
});
};
@@ -148,7 +155,7 @@ async function transformRuleset(parentSpan: Span, sourcePath: string, relativePa
throw new TypeError(`Invalid type: ${type}`);
}
const [title, descriptions, lines] = res;
const [title, descriptions, lines, sgmodulePathname] = res;
let description: string[];
if (descriptions.length) {
@@ -162,6 +169,7 @@ async function transformRuleset(parentSpan: Span, sourcePath: string, relativePa
return new RulesetOutput(span, id, type)
.withTitle(title)
.withDescription(description)
.withMitmSgmodulePath(sgmodulePathname)
.addFromRuleset(lines)
.write();
});

View File

@@ -7,30 +7,6 @@ import { getHostname } from 'tldts';
import { OUTPUT_SURGE_DIR } from './constants/dir';
const PRESET_MITM_HOSTNAMES = [
// '*baidu.com',
'*.ydstatic.com',
// '*snssdk.com',
// '*musical.com',
// '*musical.ly',
// '*snssdk.ly',
'api.zhihu.com',
'www.zhihu.com',
'api.chelaile.net.cn',
'atrace.chelaile.net.cn',
'*.meituan.net',
'ctrl.playcvn.com',
'ctrl.playcvn.net',
'ctrl.zmzapi.com',
'ctrl.zmzapi.net',
'api.zhuishushenqi.com',
'b.zhuishushenqi.com',
'ggic.cmvideo.cn',
'ggic2.cmvideo.cn',
'mrobot.pcauto.com.cn',
'mrobot.pconline.com.cn',
'home.umetrip.com',
'discardrp.umetrip.com',
'startup.umetrip.com',
'dsp-x.jd.com',
'bdsp-x.jd.com'
];

View File

@@ -1,4 +1,4 @@
import { OUTPUT_CLASH_DIR, OUTPUT_SINGBOX_DIR, OUTPUT_SURGE_DIR } from '../../constants/dir';
import { OUTPUT_CLASH_DIR, OUTPUT_MODULES_DIR, OUTPUT_SINGBOX_DIR, OUTPUT_SURGE_DIR } from '../../constants/dir';
import type { Span } from '../../trace';
import { createTrie } from '../trie';
import stringify from 'json-stringify-pretty-compact';
@@ -256,7 +256,7 @@ export abstract class RuleOutput<TPreprocessed = unknown> {
invariant(this.title, 'Missing title');
invariant(this.description, 'Missing description');
await Promise.all([
const promises = [
compareAndWriteFile(
this.span,
withBannerArray(
@@ -282,12 +282,37 @@ export abstract class RuleOutput<TPreprocessed = unknown> {
this.singbox(),
path.join(OUTPUT_SINGBOX_DIR, this.type, this.id + '.json')
)
]);
];
if (this.mitmSgmodule) {
const sgmodule = this.mitmSgmodule();
const sgMOdulePath = this.mitmSgmodulePath ?? path.join(this.type, this.id + '.sgmodule');
if (sgmodule) {
promises.push(
compareAndWriteFile(
this.span,
sgmodule,
path.join(OUTPUT_MODULES_DIR, sgMOdulePath)
)
);
}
}
await Promise.all(promises);
}
abstract surge(): string[];
abstract clash(): string[];
abstract singbox(): string[];
protected mitmSgmodulePath: string | null = null;
withMitmSgmodulePath(path: string | null) {
if (path) {
this.mitmSgmodulePath = path;
}
return this;
}
abstract mitmSgmodule?(): string[] | null;
}
export const fileEqual = async (linesA: string[], source: AsyncIterable<string>): Promise<boolean> => {

View File

@@ -6,6 +6,9 @@ import { appendArrayFromSet } from '../misc';
import type { SingboxSourceFormat } from '../singbox';
import { sortDomains } from '../stable-sort-domain';
import { RuleOutput } from './base';
import picocolors from 'picocolors';
import { normalizeDomain } from '../normalize-domain';
import { isProbablyIpv4 } from '../is-fast-ip';
type Preprocessed = [domain: string[], domainSuffix: string[], sortedDomainRules: string[]];
@@ -131,4 +134,97 @@ export class RulesetOutput extends RuleOutput<Preprocessed> {
return RuleOutput.jsonToLines(singbox);
}
mitmSgmodule(): string[] | null {
if (this.urlRegex.size === 0 || this.mitmSgmodulePath === null) {
return null;
}
const urlRegexResults: Array<{ origin: string, processed: string[] }> = [];
const parsedFailures: Array<[original: string, processed: string]> = [];
const parsed: Array<[original: string, domain: string]> = [];
for (let urlRegex of this.urlRegex) {
if (
urlRegex.startsWith('http://')
|| urlRegex.startsWith('^http://')
) {
continue;
}
if (urlRegex.startsWith('^https?://')) {
urlRegex = urlRegex.slice(10);
}
if (urlRegex.startsWith('^https://')) {
urlRegex = urlRegex.slice(9);
}
const potentialHostname = urlRegex.split('/')[0]
// pre process regex
.replaceAll(String.raw`\.`, '.')
.replaceAll('.+', '*')
.replaceAll(/([a-z])\?/g, '($1|)')
// convert regex to surge hostlist syntax
.replaceAll('([a-z])', '?')
.replaceAll(String.raw`\d`, '?')
.replaceAll(/\*+/g, '*');
let processed: string[] = [potentialHostname];
const matches = [...potentialHostname.matchAll(/\((?:([^()|]+)\|)+([^()|]*)\)/g)];
if (matches.length > 0) {
const replaceVariant = (combinations: string[], fullMatch: string, options: string[]): string[] => {
const newCombinations: string[] = [];
combinations.forEach(combination => {
options.forEach(option => {
newCombinations.push(combination.replace(fullMatch, option));
});
});
return newCombinations;
};
for (let i = 0; i < matches.length; i++) {
const match = matches[i];
const [_, ...options] = match;
processed = replaceVariant(processed, _, options);
}
}
urlRegexResults.push({
origin: potentialHostname,
processed
});
}
for (const i of urlRegexResults) {
for (const processed of i.processed) {
if (normalizeDomain(
processed
.replaceAll('*', 'a')
.replaceAll('?', 'b')
)) {
parsed.push([i.origin, processed]);
} else if (!isProbablyIpv4(processed)) {
parsedFailures.push([i.origin, processed]);
}
}
}
console.error(picocolors.bold('Parsed Failed'));
if (parsedFailures.length > 0) {
console.table(parsedFailures);
}
return [
'#!name=[Sukka] Surge Reject MITM',
'#!desc=为 URL Regex 规则组启用 MITM',
'',
'[MITM]',
'hostname = %APPEND% ' + Array.from(new Set(parsed.map(i => i[1]))).join(', ')
];
}
}