Improve ruleset build process

This commit is contained in:
SukkaW 2024-09-23 15:15:07 +08:00
parent dd264dd95a
commit 3ca9122a84
6 changed files with 96 additions and 83 deletions

View File

@ -122,7 +122,8 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
rejectOutput.addDomainKeyword(value); // Add for later deduplication rejectOutput.addDomainKeyword(value); // Add for later deduplication
rejectExtraOutput.addDomainKeyword(value); // Add for later deduplication rejectExtraOutput.addDomainKeyword(value); // Add for later deduplication
} else if (type === 'DOMAIN-SUFFIX') { } else if (type === 'DOMAIN-SUFFIX') {
rejectOutput.addDomainSuffix(value); // Add for later deduplication rejectOutput.whitelistDomain('.' + value); // Add for later deduplication
rejectExtraOutput.whitelistDomain('.' + value); // Add for later deduplication
} }
} }
}); });
@ -139,8 +140,8 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
rejectExtraOutput.whitelistDomain(domain); rejectExtraOutput.whitelistDomain(domain);
} }
for (const domain of rejectOutput.sorted) { for (let i = 0, len = rejectOutput.$preprocessed.length; i < len; i++) {
rejectExtraOutput.whitelistDomain(domain); rejectOutput.whitelistDomain(rejectOutput.$preprocessed[i]);
} }
}); });

View File

@ -122,6 +122,7 @@ const sensitiveKeywords = createKeywordFilter([
'fb-com', 'fb-com',
'facebook.', 'facebook.',
'facebook-', 'facebook-',
'facebook-com',
'.facebook', '.facebook',
'-facebook', '-facebook',
'coinbase', 'coinbase',
@ -139,7 +140,9 @@ const sensitiveKeywords = createKeywordFilter([
'booking.com-', 'booking.com-',
'booking-eu', 'booking-eu',
'vinted-cz', 'vinted-cz',
'inpost-pl' 'inpost-pl',
'login.microsoft',
'login-microsoft'
]); ]);
const lowKeywords = createKeywordFilter([ const lowKeywords = createKeywordFilter([
'-co-jp', '-co-jp',
@ -147,7 +150,8 @@ const lowKeywords = createKeywordFilter([
'customer-', 'customer-',
'.www-', '.www-',
'instagram', 'instagram',
'microsoft' 'microsoft',
'passwordreset'
]); ]);
const cacheKey = createCacheKey(__filename); const cacheKey = createCacheKey(__filename);
@ -224,7 +228,7 @@ async function processPhihsingDomains(domainArr: string[]) {
domainScoreMap[apexDomain] >= 12 domainScoreMap[apexDomain] >= 12
|| (domainScoreMap[apexDomain] >= 5 && domainCountMap[apexDomain] >= 4) || (domainScoreMap[apexDomain] >= 5 && domainCountMap[apexDomain] >= 4)
) { ) {
domainArr.push(`.${apexDomain}`); domainArr.push('.' + apexDomain);
} }
} }

View File

@ -11,7 +11,7 @@ import { fastStringArrayJoin, writeFile } from '../misc';
import { readFileByLine } from '../fetch-text-by-line'; import { readFileByLine } from '../fetch-text-by-line';
import { asyncWriteToStream } from '../async-write-to-stream'; import { asyncWriteToStream } from '../async-write-to-stream';
export abstract class RuleOutput { export abstract class RuleOutput<TPreprocessed = unknown> {
protected domainTrie = createTrie<unknown>(null, true); protected domainTrie = createTrie<unknown>(null, true);
protected domainKeywords = new Set<string>(); protected domainKeywords = new Set<string>();
protected domainWildcard = new Set<string>(); protected domainWildcard = new Set<string>();
@ -64,10 +64,14 @@ export abstract class RuleOutput {
return result; return result;
}; };
protected span: Span;
constructor( constructor(
protected readonly span: Span, span: Span,
protected readonly id: string protected readonly id: string
) {} ) {
this.span = span.traceChild('RuleOutput');
}
protected title: string | null = null; protected title: string | null = null;
withTitle(title: string) { withTitle(title: string) {
@ -234,14 +238,21 @@ export abstract class RuleOutput {
return this; return this;
} }
abstract surge(): string[]; protected abstract preprocess(): NonNullable<TPreprocessed>;
abstract clash(): string[];
abstract singbox(): string[];
done() { done() {
return this.pendingPromise; return this.pendingPromise;
} }
private $$preprocessed: TPreprocessed | null = null;
get $preprocessed() {
if (this.$$preprocessed === null) {
this.$$preprocessed = this.span.traceChildSync('RuleOutput#preprocess: ' + this.id, () => this.preprocess());
}
return this.$$preprocessed;
}
async write(): Promise<void> { async write(): Promise<void> {
await this.done(); await this.done();
@ -276,6 +287,10 @@ export abstract class RuleOutput {
) )
]); ]);
} }
abstract surge(): string[];
abstract clash(): string[];
abstract singbox(): string[];
} }
export const fileEqual = async (linesA: string[], source: AsyncIterable<string>): Promise<boolean> => { export const fileEqual = async (linesA: string[], source: AsyncIterable<string>): Promise<boolean> => {

View File

@ -5,13 +5,12 @@ import { RuleOutput } from './base';
import type { SingboxSourceFormat } from '../singbox'; import type { SingboxSourceFormat } from '../singbox';
import { nullthrow } from 'foxact/nullthrow'; import { nullthrow } from 'foxact/nullthrow';
export class DomainsetOutput extends RuleOutput { type Preprocessed = string[];
export class DomainsetOutput extends RuleOutput<Preprocessed> {
protected type = 'domainset' as const; protected type = 'domainset' as const;
private $sorted: string[] | null = null; preprocess() {
get sorted() {
if (!this.$sorted) {
const kwfilter = createKeywordFilter(this.domainKeywords); const kwfilter = createKeywordFilter(this.domainKeywords);
const results: string[] = []; const results: string[] = [];
@ -28,33 +27,31 @@ export class DomainsetOutput extends RuleOutput {
const sorted = sortDomains(results, this.apexDomainMap, this.subDomainMap); const sorted = sortDomains(results, this.apexDomainMap, this.subDomainMap);
sorted.push('this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'); sorted.push('this_ruleset_is_made_by_sukkaw.ruleset.skk.moe');
this.$sorted = sorted; return sorted;
}
return this.$sorted;
} }
calcDomainMap() { calcDomainMap() {
if (!this.apexDomainMap || !this.subDomainMap) { if (!this.apexDomainMap || !this.subDomainMap) {
const { domainMap, subdomainMap } = buildParseDomainMap(this.sorted); const { domainMap, subdomainMap } = buildParseDomainMap(this.$preprocessed);
this.apexDomainMap = domainMap; this.apexDomainMap = domainMap;
this.subDomainMap = subdomainMap; this.subDomainMap = subdomainMap;
} }
} }
surge(): string[] { surge(): string[] {
return this.sorted; return this.$preprocessed;
} }
clash(): string[] { clash(): string[] {
return this.sorted.map(i => (i[0] === '.' ? `+${i}` : i)); return this.$preprocessed.map(i => (i[0] === '.' ? `+${i}` : i));
} }
singbox(): string[] { singbox(): string[] {
const domains: string[] = []; const domains: string[] = [];
const domainSuffixes: string[] = []; const domainSuffixes: string[] = [];
for (let i = 0, len = this.sorted.length; i < len; i++) { for (let i = 0, len = this.$preprocessed.length; i < len; i++) {
const domain = this.sorted[i]; const domain = this.$preprocessed[i];
if (domain[0] === '.') { if (domain[0] === '.') {
domainSuffixes.push(domain.slice(1)); domainSuffixes.push(domain.slice(1));
} else { } else {
@ -72,11 +69,11 @@ export class DomainsetOutput extends RuleOutput {
} }
getStatMap() { getStatMap() {
invariant(this.sorted, 'Non dumped yet'); invariant(this.$preprocessed, 'Non dumped yet');
invariant(this.apexDomainMap, 'Missing apex domain map'); invariant(this.apexDomainMap, 'Missing apex domain map');
return Array.from( return Array.from(
nullthrow(this.sorted, 'Non dumped yet') nullthrow(this.$preprocessed, 'Non dumped yet')
.reduce<Map<string, number>>((acc, cur) => { .reduce<Map<string, number>>((acc, cur) => {
const suffix = this.apexDomainMap!.get(cur); const suffix = this.apexDomainMap!.get(cur);
if (suffix) { if (suffix) {

View File

@ -6,16 +6,16 @@ import { RuleOutput } from './base';
import { merge } from 'fast-cidr-tools'; import { merge } from 'fast-cidr-tools';
export class IPListOutput extends RuleOutput { type Preprocessed = string[];
export class IPListOutput extends RuleOutput<Preprocessed> {
protected type = 'ip' as const; protected type = 'ip' as const;
constructor(span: Span, id: string, private readonly clashUseRule = true) { constructor(span: Span, id: string, private readonly clashUseRule = true) {
super(span, id); super(span, id);
} }
private $merged: string[] | null = null; protected preprocess() {
get merged() {
if (!this.$merged) {
const results: string[] = []; const results: string[] = [];
appendArrayInPlace( appendArrayInPlace(
results, results,
@ -27,9 +27,7 @@ export class IPListOutput extends RuleOutput {
appendArrayFromSet(results, this.ipcidr6NoResolve); appendArrayFromSet(results, this.ipcidr6NoResolve);
appendArrayFromSet(results, this.ipcidr6); appendArrayFromSet(results, this.ipcidr6);
this.$merged = results; return results;
}
return this.$merged;
} }
private $surge: string[] | null = null; private $surge: string[] | null = null;
@ -59,7 +57,7 @@ export class IPListOutput extends RuleOutput {
return this.surge(); return this.surge();
} }
return this.merged; return this.$preprocessed;
} }
singbox(): string[] { singbox(): string[] {
@ -67,7 +65,7 @@ export class IPListOutput extends RuleOutput {
version: 2, version: 2,
rules: [{ rules: [{
domain: ['this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'], domain: ['this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'],
ip_cidr: this.merged ip_cidr: this.$preprocessed
}] }]
}; };
return RuleOutput.jsonToLines(singbox); return RuleOutput.jsonToLines(singbox);

View File

@ -7,14 +7,14 @@ import type { SingboxSourceFormat } from '../singbox';
import { sortDomains } from '../stable-sort-domain'; import { sortDomains } from '../stable-sort-domain';
import { RuleOutput } from './base'; import { RuleOutput } from './base';
export class RulesetOutput extends RuleOutput { type Preprocessed = [domain: string[], domainSuffix: string[], sortedDomainRules: string[]];
export class RulesetOutput extends RuleOutput<Preprocessed> {
constructor(span: Span, id: string, protected type: 'non_ip' | 'ip') { constructor(span: Span, id: string, protected type: 'non_ip' | 'ip') {
super(span, id); super(span, id);
} }
private $computed: [domain: string[], domainSuffix: string[], sortedDomainRules: string[]] | null = null; protected preprocess() {
private computed() {
if (!this.$computed) {
const kwfilter = createKeywordFilter(this.domainKeywords); const kwfilter = createKeywordFilter(this.domainKeywords);
const domains: string[] = []; const domains: string[] = [];
@ -34,14 +34,12 @@ export class RulesetOutput extends RuleOutput {
} }
} }
this.$computed = [domains, domainSuffixes, sortedDomainRules]; return [domains, domainSuffixes, sortedDomainRules] satisfies Preprocessed;
}
return this.$computed;
} }
surge(): string[] { surge(): string[] {
const results: string[] = ['DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe']; const results: string[] = ['DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'];
appendArrayInPlace(results, this.computed()[2]); appendArrayInPlace(results, this.$preprocessed[2]);
appendArrayFromSet(results, this.domainKeywords, i => `DOMAIN-KEYWORD,${i}`); appendArrayFromSet(results, this.domainKeywords, i => `DOMAIN-KEYWORD,${i}`);
appendArrayFromSet(results, this.domainWildcard, i => `DOMAIN-WILDCARD,${i}`); appendArrayFromSet(results, this.domainWildcard, i => `DOMAIN-WILDCARD,${i}`);
@ -77,7 +75,7 @@ export class RulesetOutput extends RuleOutput {
clash(): string[] { clash(): string[] {
const results: string[] = ['DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe']; const results: string[] = ['DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'];
appendArrayInPlace(results, this.computed()[2]); appendArrayInPlace(results, this.$preprocessed[2]);
appendArrayFromSet(results, this.domainKeywords, i => `DOMAIN-KEYWORD,${i}`); appendArrayFromSet(results, this.domainKeywords, i => `DOMAIN-KEYWORD,${i}`);
appendArrayFromSet(results, this.domainWildcard, i => `DOMAIN-REGEX,${RuleOutput.domainWildCardToRegex(i)}`); appendArrayFromSet(results, this.domainWildcard, i => `DOMAIN-REGEX,${RuleOutput.domainWildCardToRegex(i)}`);
@ -121,8 +119,8 @@ export class RulesetOutput extends RuleOutput {
const singbox: SingboxSourceFormat = { const singbox: SingboxSourceFormat = {
version: 2, version: 2,
rules: [{ rules: [{
domain: appendArrayInPlace(['this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'], this.computed()[0]), domain: appendArrayInPlace(['this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'], this.$preprocessed[0]),
domain_suffix: this.computed()[1], domain_suffix: this.$preprocessed[1],
domain_keyword: Array.from(this.domainKeywords), domain_keyword: Array.from(this.domainKeywords),
domain_regex: Array.from(this.domainWildcard).map(RuleOutput.domainWildCardToRegex), domain_regex: Array.from(this.domainWildcard).map(RuleOutput.domainWildCardToRegex),
ip_cidr, ip_cidr,