From 3ca9122a842d7cd15d13894892d5bfcd8c13942d Mon Sep 17 00:00:00 2001 From: SukkaW Date: Mon, 23 Sep 2024 15:15:07 +0800 Subject: [PATCH] Improve ruleset build process --- Build/build-reject-domainset.ts | 7 +++-- Build/lib/get-phishing-domains.ts | 10 +++++-- Build/lib/rules/base.ts | 27 +++++++++++++---- Build/lib/rules/domainset.ts | 49 ++++++++++++++---------------- Build/lib/rules/ip.ts | 36 +++++++++++----------- Build/lib/rules/ruleset.ts | 50 +++++++++++++++---------------- 6 files changed, 96 insertions(+), 83 deletions(-) diff --git a/Build/build-reject-domainset.ts b/Build/build-reject-domainset.ts index fa46f80b..f8b0220c 100644 --- a/Build/build-reject-domainset.ts +++ b/Build/build-reject-domainset.ts @@ -122,7 +122,8 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as rejectOutput.addDomainKeyword(value); // Add for later deduplication rejectExtraOutput.addDomainKeyword(value); // Add for later deduplication } else if (type === 'DOMAIN-SUFFIX') { - rejectOutput.addDomainSuffix(value); // Add for later deduplication + rejectOutput.whitelistDomain('.' + value); // Add for later deduplication + rejectExtraOutput.whitelistDomain('.' + value); // Add for later deduplication } } }); @@ -139,8 +140,8 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as rejectExtraOutput.whitelistDomain(domain); } - for (const domain of rejectOutput.sorted) { - rejectExtraOutput.whitelistDomain(domain); + for (let i = 0, len = rejectOutput.$preprocessed.length; i < len; i++) { + rejectOutput.whitelistDomain(rejectOutput.$preprocessed[i]); } }); diff --git a/Build/lib/get-phishing-domains.ts b/Build/lib/get-phishing-domains.ts index e71c046d..b6b49ee4 100644 --- a/Build/lib/get-phishing-domains.ts +++ b/Build/lib/get-phishing-domains.ts @@ -122,6 +122,7 @@ const sensitiveKeywords = createKeywordFilter([ 'fb-com', 'facebook.', 'facebook-', + 'facebook-com', '.facebook', '-facebook', 'coinbase', @@ -139,7 +140,9 @@ const sensitiveKeywords = createKeywordFilter([ 'booking.com-', 'booking-eu', 'vinted-cz', - 'inpost-pl' + 'inpost-pl', + 'login.microsoft', + 'login-microsoft' ]); const lowKeywords = createKeywordFilter([ '-co-jp', @@ -147,7 +150,8 @@ const lowKeywords = createKeywordFilter([ 'customer-', '.www-', 'instagram', - 'microsoft' + 'microsoft', + 'passwordreset' ]); const cacheKey = createCacheKey(__filename); @@ -224,7 +228,7 @@ async function processPhihsingDomains(domainArr: string[]) { domainScoreMap[apexDomain] >= 12 || (domainScoreMap[apexDomain] >= 5 && domainCountMap[apexDomain] >= 4) ) { - domainArr.push(`.${apexDomain}`); + domainArr.push('.' + apexDomain); } } diff --git a/Build/lib/rules/base.ts b/Build/lib/rules/base.ts index 9be186ad..07b4d797 100644 --- a/Build/lib/rules/base.ts +++ b/Build/lib/rules/base.ts @@ -11,7 +11,7 @@ import { fastStringArrayJoin, writeFile } from '../misc'; import { readFileByLine } from '../fetch-text-by-line'; import { asyncWriteToStream } from '../async-write-to-stream'; -export abstract class RuleOutput { +export abstract class RuleOutput { protected domainTrie = createTrie(null, true); protected domainKeywords = new Set(); protected domainWildcard = new Set(); @@ -64,10 +64,14 @@ export abstract class RuleOutput { return result; }; + protected span: Span; + constructor( - protected readonly span: Span, + span: Span, protected readonly id: string - ) {} + ) { + this.span = span.traceChild('RuleOutput'); + } protected title: string | null = null; withTitle(title: string) { @@ -234,14 +238,21 @@ export abstract class RuleOutput { return this; } - abstract surge(): string[]; - abstract clash(): string[]; - abstract singbox(): string[]; + protected abstract preprocess(): NonNullable; done() { return this.pendingPromise; } + private $$preprocessed: TPreprocessed | null = null; + + get $preprocessed() { + if (this.$$preprocessed === null) { + this.$$preprocessed = this.span.traceChildSync('RuleOutput#preprocess: ' + this.id, () => this.preprocess()); + } + return this.$$preprocessed; + } + async write(): Promise { await this.done(); @@ -276,6 +287,10 @@ export abstract class RuleOutput { ) ]); } + + abstract surge(): string[]; + abstract clash(): string[]; + abstract singbox(): string[]; } export const fileEqual = async (linesA: string[], source: AsyncIterable): Promise => { diff --git a/Build/lib/rules/domainset.ts b/Build/lib/rules/domainset.ts index 4cf1a215..1aabdc88 100644 --- a/Build/lib/rules/domainset.ts +++ b/Build/lib/rules/domainset.ts @@ -5,56 +5,53 @@ import { RuleOutput } from './base'; import type { SingboxSourceFormat } from '../singbox'; import { nullthrow } from 'foxact/nullthrow'; -export class DomainsetOutput extends RuleOutput { +type Preprocessed = string[]; + +export class DomainsetOutput extends RuleOutput { protected type = 'domainset' as const; - private $sorted: string[] | null = null; + preprocess() { + const kwfilter = createKeywordFilter(this.domainKeywords); - get sorted() { - if (!this.$sorted) { - const kwfilter = createKeywordFilter(this.domainKeywords); + const results: string[] = []; - const results: string[] = []; + const dumped = this.domainTrie.dump(); - const dumped = this.domainTrie.dump(); - - for (let i = 0, len = dumped.length; i < len; i++) { - const domain = dumped[i]; - if (!kwfilter(domain)) { - results.push(domain); - } + for (let i = 0, len = dumped.length; i < len; i++) { + const domain = dumped[i]; + if (!kwfilter(domain)) { + results.push(domain); } - - const sorted = sortDomains(results, this.apexDomainMap, this.subDomainMap); - sorted.push('this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'); - - this.$sorted = sorted; } - return this.$sorted; + + const sorted = sortDomains(results, this.apexDomainMap, this.subDomainMap); + sorted.push('this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'); + + return sorted; } calcDomainMap() { if (!this.apexDomainMap || !this.subDomainMap) { - const { domainMap, subdomainMap } = buildParseDomainMap(this.sorted); + const { domainMap, subdomainMap } = buildParseDomainMap(this.$preprocessed); this.apexDomainMap = domainMap; this.subDomainMap = subdomainMap; } } surge(): string[] { - return this.sorted; + return this.$preprocessed; } clash(): string[] { - return this.sorted.map(i => (i[0] === '.' ? `+${i}` : i)); + return this.$preprocessed.map(i => (i[0] === '.' ? `+${i}` : i)); } singbox(): string[] { const domains: string[] = []; const domainSuffixes: string[] = []; - for (let i = 0, len = this.sorted.length; i < len; i++) { - const domain = this.sorted[i]; + for (let i = 0, len = this.$preprocessed.length; i < len; i++) { + const domain = this.$preprocessed[i]; if (domain[0] === '.') { domainSuffixes.push(domain.slice(1)); } else { @@ -72,11 +69,11 @@ export class DomainsetOutput extends RuleOutput { } getStatMap() { - invariant(this.sorted, 'Non dumped yet'); + invariant(this.$preprocessed, 'Non dumped yet'); invariant(this.apexDomainMap, 'Missing apex domain map'); return Array.from( - nullthrow(this.sorted, 'Non dumped yet') + nullthrow(this.$preprocessed, 'Non dumped yet') .reduce>((acc, cur) => { const suffix = this.apexDomainMap!.get(cur); if (suffix) { diff --git a/Build/lib/rules/ip.ts b/Build/lib/rules/ip.ts index 979eebce..b375ef0d 100644 --- a/Build/lib/rules/ip.ts +++ b/Build/lib/rules/ip.ts @@ -6,30 +6,28 @@ import { RuleOutput } from './base'; import { merge } from 'fast-cidr-tools'; -export class IPListOutput extends RuleOutput { +type Preprocessed = string[]; + +export class IPListOutput extends RuleOutput { protected type = 'ip' as const; constructor(span: Span, id: string, private readonly clashUseRule = true) { super(span, id); } - private $merged: string[] | null = null; - get merged() { - if (!this.$merged) { - const results: string[] = []; - appendArrayInPlace( - results, - merge( - appendArrayInPlace(Array.from(this.ipcidrNoResolve), Array.from(this.ipcidr)), - true - ) - ); - appendArrayFromSet(results, this.ipcidr6NoResolve); - appendArrayFromSet(results, this.ipcidr6); + protected preprocess() { + const results: string[] = []; + appendArrayInPlace( + results, + merge( + appendArrayInPlace(Array.from(this.ipcidrNoResolve), Array.from(this.ipcidr)), + true + ) + ); + appendArrayFromSet(results, this.ipcidr6NoResolve); + appendArrayFromSet(results, this.ipcidr6); - this.$merged = results; - } - return this.$merged; + return results; } private $surge: string[] | null = null; @@ -59,7 +57,7 @@ export class IPListOutput extends RuleOutput { return this.surge(); } - return this.merged; + return this.$preprocessed; } singbox(): string[] { @@ -67,7 +65,7 @@ export class IPListOutput extends RuleOutput { version: 2, rules: [{ domain: ['this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'], - ip_cidr: this.merged + ip_cidr: this.$preprocessed }] }; return RuleOutput.jsonToLines(singbox); diff --git a/Build/lib/rules/ruleset.ts b/Build/lib/rules/ruleset.ts index a7870eab..fb1fc1d8 100644 --- a/Build/lib/rules/ruleset.ts +++ b/Build/lib/rules/ruleset.ts @@ -7,41 +7,39 @@ import type { SingboxSourceFormat } from '../singbox'; import { sortDomains } from '../stable-sort-domain'; import { RuleOutput } from './base'; -export class RulesetOutput extends RuleOutput { +type Preprocessed = [domain: string[], domainSuffix: string[], sortedDomainRules: string[]]; + +export class RulesetOutput extends RuleOutput { constructor(span: Span, id: string, protected type: 'non_ip' | 'ip') { super(span, id); } - private $computed: [domain: string[], domainSuffix: string[], sortedDomainRules: string[]] | null = null; - private computed() { - if (!this.$computed) { - const kwfilter = createKeywordFilter(this.domainKeywords); + protected preprocess() { + const kwfilter = createKeywordFilter(this.domainKeywords); - const domains: string[] = []; - const domainSuffixes: string[] = []; - const sortedDomainRules: string[] = []; + const domains: string[] = []; + const domainSuffixes: string[] = []; + const sortedDomainRules: string[] = []; - for (const domain of sortDomains(this.domainTrie.dump(), this.apexDomainMap, this.subDomainMap)) { - if (kwfilter(domain)) { - continue; - } - if (domain[0] === '.') { - domainSuffixes.push(domain.slice(1)); - sortedDomainRules.push(`DOMAIN-SUFFIX,${domain.slice(1)}`); - } else { - domains.push(domain); - sortedDomainRules.push(`DOMAIN,${domain}`); - } + for (const domain of sortDomains(this.domainTrie.dump(), this.apexDomainMap, this.subDomainMap)) { + if (kwfilter(domain)) { + continue; + } + if (domain[0] === '.') { + domainSuffixes.push(domain.slice(1)); + sortedDomainRules.push(`DOMAIN-SUFFIX,${domain.slice(1)}`); + } else { + domains.push(domain); + sortedDomainRules.push(`DOMAIN,${domain}`); } - - this.$computed = [domains, domainSuffixes, sortedDomainRules]; } - return this.$computed; + + return [domains, domainSuffixes, sortedDomainRules] satisfies Preprocessed; } surge(): string[] { const results: string[] = ['DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe']; - appendArrayInPlace(results, this.computed()[2]); + appendArrayInPlace(results, this.$preprocessed[2]); appendArrayFromSet(results, this.domainKeywords, i => `DOMAIN-KEYWORD,${i}`); appendArrayFromSet(results, this.domainWildcard, i => `DOMAIN-WILDCARD,${i}`); @@ -77,7 +75,7 @@ export class RulesetOutput extends RuleOutput { clash(): string[] { const results: string[] = ['DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe']; - appendArrayInPlace(results, this.computed()[2]); + appendArrayInPlace(results, this.$preprocessed[2]); appendArrayFromSet(results, this.domainKeywords, i => `DOMAIN-KEYWORD,${i}`); appendArrayFromSet(results, this.domainWildcard, i => `DOMAIN-REGEX,${RuleOutput.domainWildCardToRegex(i)}`); @@ -121,8 +119,8 @@ export class RulesetOutput extends RuleOutput { const singbox: SingboxSourceFormat = { version: 2, rules: [{ - domain: appendArrayInPlace(['this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'], this.computed()[0]), - domain_suffix: this.computed()[1], + domain: appendArrayInPlace(['this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'], this.$preprocessed[0]), + domain_suffix: this.$preprocessed[1], domain_keyword: Array.from(this.domainKeywords), domain_regex: Array.from(this.domainWildcard).map(RuleOutput.domainWildCardToRegex), ip_cidr,