Improve ruleset build process

This commit is contained in:
SukkaW 2024-09-23 15:15:07 +08:00
parent dd264dd95a
commit 3ca9122a84
6 changed files with 96 additions and 83 deletions

View File

@ -122,7 +122,8 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
rejectOutput.addDomainKeyword(value); // Add for later deduplication
rejectExtraOutput.addDomainKeyword(value); // Add for later deduplication
} else if (type === 'DOMAIN-SUFFIX') {
rejectOutput.addDomainSuffix(value); // Add for later deduplication
rejectOutput.whitelistDomain('.' + value); // Add for later deduplication
rejectExtraOutput.whitelistDomain('.' + value); // Add for later deduplication
}
}
});
@ -139,8 +140,8 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
rejectExtraOutput.whitelistDomain(domain);
}
for (const domain of rejectOutput.sorted) {
rejectExtraOutput.whitelistDomain(domain);
for (let i = 0, len = rejectOutput.$preprocessed.length; i < len; i++) {
rejectOutput.whitelistDomain(rejectOutput.$preprocessed[i]);
}
});

View File

@ -122,6 +122,7 @@ const sensitiveKeywords = createKeywordFilter([
'fb-com',
'facebook.',
'facebook-',
'facebook-com',
'.facebook',
'-facebook',
'coinbase',
@ -139,7 +140,9 @@ const sensitiveKeywords = createKeywordFilter([
'booking.com-',
'booking-eu',
'vinted-cz',
'inpost-pl'
'inpost-pl',
'login.microsoft',
'login-microsoft'
]);
const lowKeywords = createKeywordFilter([
'-co-jp',
@ -147,7 +150,8 @@ const lowKeywords = createKeywordFilter([
'customer-',
'.www-',
'instagram',
'microsoft'
'microsoft',
'passwordreset'
]);
const cacheKey = createCacheKey(__filename);
@ -224,7 +228,7 @@ async function processPhihsingDomains(domainArr: string[]) {
domainScoreMap[apexDomain] >= 12
|| (domainScoreMap[apexDomain] >= 5 && domainCountMap[apexDomain] >= 4)
) {
domainArr.push(`.${apexDomain}`);
domainArr.push('.' + apexDomain);
}
}

View File

@ -11,7 +11,7 @@ import { fastStringArrayJoin, writeFile } from '../misc';
import { readFileByLine } from '../fetch-text-by-line';
import { asyncWriteToStream } from '../async-write-to-stream';
export abstract class RuleOutput {
export abstract class RuleOutput<TPreprocessed = unknown> {
protected domainTrie = createTrie<unknown>(null, true);
protected domainKeywords = new Set<string>();
protected domainWildcard = new Set<string>();
@ -64,10 +64,14 @@ export abstract class RuleOutput {
return result;
};
protected span: Span;
constructor(
protected readonly span: Span,
span: Span,
protected readonly id: string
) {}
) {
this.span = span.traceChild('RuleOutput');
}
protected title: string | null = null;
withTitle(title: string) {
@ -234,14 +238,21 @@ export abstract class RuleOutput {
return this;
}
abstract surge(): string[];
abstract clash(): string[];
abstract singbox(): string[];
protected abstract preprocess(): NonNullable<TPreprocessed>;
done() {
return this.pendingPromise;
}
private $$preprocessed: TPreprocessed | null = null;
get $preprocessed() {
if (this.$$preprocessed === null) {
this.$$preprocessed = this.span.traceChildSync('RuleOutput#preprocess: ' + this.id, () => this.preprocess());
}
return this.$$preprocessed;
}
async write(): Promise<void> {
await this.done();
@ -276,6 +287,10 @@ export abstract class RuleOutput {
)
]);
}
abstract surge(): string[];
abstract clash(): string[];
abstract singbox(): string[];
}
export const fileEqual = async (linesA: string[], source: AsyncIterable<string>): Promise<boolean> => {

View File

@ -5,56 +5,53 @@ import { RuleOutput } from './base';
import type { SingboxSourceFormat } from '../singbox';
import { nullthrow } from 'foxact/nullthrow';
export class DomainsetOutput extends RuleOutput {
type Preprocessed = string[];
export class DomainsetOutput extends RuleOutput<Preprocessed> {
protected type = 'domainset' as const;
private $sorted: string[] | null = null;
preprocess() {
const kwfilter = createKeywordFilter(this.domainKeywords);
get sorted() {
if (!this.$sorted) {
const kwfilter = createKeywordFilter(this.domainKeywords);
const results: string[] = [];
const results: string[] = [];
const dumped = this.domainTrie.dump();
const dumped = this.domainTrie.dump();
for (let i = 0, len = dumped.length; i < len; i++) {
const domain = dumped[i];
if (!kwfilter(domain)) {
results.push(domain);
}
for (let i = 0, len = dumped.length; i < len; i++) {
const domain = dumped[i];
if (!kwfilter(domain)) {
results.push(domain);
}
const sorted = sortDomains(results, this.apexDomainMap, this.subDomainMap);
sorted.push('this_ruleset_is_made_by_sukkaw.ruleset.skk.moe');
this.$sorted = sorted;
}
return this.$sorted;
const sorted = sortDomains(results, this.apexDomainMap, this.subDomainMap);
sorted.push('this_ruleset_is_made_by_sukkaw.ruleset.skk.moe');
return sorted;
}
calcDomainMap() {
if (!this.apexDomainMap || !this.subDomainMap) {
const { domainMap, subdomainMap } = buildParseDomainMap(this.sorted);
const { domainMap, subdomainMap } = buildParseDomainMap(this.$preprocessed);
this.apexDomainMap = domainMap;
this.subDomainMap = subdomainMap;
}
}
surge(): string[] {
return this.sorted;
return this.$preprocessed;
}
clash(): string[] {
return this.sorted.map(i => (i[0] === '.' ? `+${i}` : i));
return this.$preprocessed.map(i => (i[0] === '.' ? `+${i}` : i));
}
singbox(): string[] {
const domains: string[] = [];
const domainSuffixes: string[] = [];
for (let i = 0, len = this.sorted.length; i < len; i++) {
const domain = this.sorted[i];
for (let i = 0, len = this.$preprocessed.length; i < len; i++) {
const domain = this.$preprocessed[i];
if (domain[0] === '.') {
domainSuffixes.push(domain.slice(1));
} else {
@ -72,11 +69,11 @@ export class DomainsetOutput extends RuleOutput {
}
getStatMap() {
invariant(this.sorted, 'Non dumped yet');
invariant(this.$preprocessed, 'Non dumped yet');
invariant(this.apexDomainMap, 'Missing apex domain map');
return Array.from(
nullthrow(this.sorted, 'Non dumped yet')
nullthrow(this.$preprocessed, 'Non dumped yet')
.reduce<Map<string, number>>((acc, cur) => {
const suffix = this.apexDomainMap!.get(cur);
if (suffix) {

View File

@ -6,30 +6,28 @@ import { RuleOutput } from './base';
import { merge } from 'fast-cidr-tools';
export class IPListOutput extends RuleOutput {
type Preprocessed = string[];
export class IPListOutput extends RuleOutput<Preprocessed> {
protected type = 'ip' as const;
constructor(span: Span, id: string, private readonly clashUseRule = true) {
super(span, id);
}
private $merged: string[] | null = null;
get merged() {
if (!this.$merged) {
const results: string[] = [];
appendArrayInPlace(
results,
merge(
appendArrayInPlace(Array.from(this.ipcidrNoResolve), Array.from(this.ipcidr)),
true
)
);
appendArrayFromSet(results, this.ipcidr6NoResolve);
appendArrayFromSet(results, this.ipcidr6);
protected preprocess() {
const results: string[] = [];
appendArrayInPlace(
results,
merge(
appendArrayInPlace(Array.from(this.ipcidrNoResolve), Array.from(this.ipcidr)),
true
)
);
appendArrayFromSet(results, this.ipcidr6NoResolve);
appendArrayFromSet(results, this.ipcidr6);
this.$merged = results;
}
return this.$merged;
return results;
}
private $surge: string[] | null = null;
@ -59,7 +57,7 @@ export class IPListOutput extends RuleOutput {
return this.surge();
}
return this.merged;
return this.$preprocessed;
}
singbox(): string[] {
@ -67,7 +65,7 @@ export class IPListOutput extends RuleOutput {
version: 2,
rules: [{
domain: ['this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'],
ip_cidr: this.merged
ip_cidr: this.$preprocessed
}]
};
return RuleOutput.jsonToLines(singbox);

View File

@ -7,41 +7,39 @@ import type { SingboxSourceFormat } from '../singbox';
import { sortDomains } from '../stable-sort-domain';
import { RuleOutput } from './base';
export class RulesetOutput extends RuleOutput {
type Preprocessed = [domain: string[], domainSuffix: string[], sortedDomainRules: string[]];
export class RulesetOutput extends RuleOutput<Preprocessed> {
constructor(span: Span, id: string, protected type: 'non_ip' | 'ip') {
super(span, id);
}
private $computed: [domain: string[], domainSuffix: string[], sortedDomainRules: string[]] | null = null;
private computed() {
if (!this.$computed) {
const kwfilter = createKeywordFilter(this.domainKeywords);
protected preprocess() {
const kwfilter = createKeywordFilter(this.domainKeywords);
const domains: string[] = [];
const domainSuffixes: string[] = [];
const sortedDomainRules: string[] = [];
const domains: string[] = [];
const domainSuffixes: string[] = [];
const sortedDomainRules: string[] = [];
for (const domain of sortDomains(this.domainTrie.dump(), this.apexDomainMap, this.subDomainMap)) {
if (kwfilter(domain)) {
continue;
}
if (domain[0] === '.') {
domainSuffixes.push(domain.slice(1));
sortedDomainRules.push(`DOMAIN-SUFFIX,${domain.slice(1)}`);
} else {
domains.push(domain);
sortedDomainRules.push(`DOMAIN,${domain}`);
}
for (const domain of sortDomains(this.domainTrie.dump(), this.apexDomainMap, this.subDomainMap)) {
if (kwfilter(domain)) {
continue;
}
if (domain[0] === '.') {
domainSuffixes.push(domain.slice(1));
sortedDomainRules.push(`DOMAIN-SUFFIX,${domain.slice(1)}`);
} else {
domains.push(domain);
sortedDomainRules.push(`DOMAIN,${domain}`);
}
this.$computed = [domains, domainSuffixes, sortedDomainRules];
}
return this.$computed;
return [domains, domainSuffixes, sortedDomainRules] satisfies Preprocessed;
}
surge(): string[] {
const results: string[] = ['DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'];
appendArrayInPlace(results, this.computed()[2]);
appendArrayInPlace(results, this.$preprocessed[2]);
appendArrayFromSet(results, this.domainKeywords, i => `DOMAIN-KEYWORD,${i}`);
appendArrayFromSet(results, this.domainWildcard, i => `DOMAIN-WILDCARD,${i}`);
@ -77,7 +75,7 @@ export class RulesetOutput extends RuleOutput {
clash(): string[] {
const results: string[] = ['DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'];
appendArrayInPlace(results, this.computed()[2]);
appendArrayInPlace(results, this.$preprocessed[2]);
appendArrayFromSet(results, this.domainKeywords, i => `DOMAIN-KEYWORD,${i}`);
appendArrayFromSet(results, this.domainWildcard, i => `DOMAIN-REGEX,${RuleOutput.domainWildCardToRegex(i)}`);
@ -121,8 +119,8 @@ export class RulesetOutput extends RuleOutput {
const singbox: SingboxSourceFormat = {
version: 2,
rules: [{
domain: appendArrayInPlace(['this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'], this.computed()[0]),
domain_suffix: this.computed()[1],
domain: appendArrayInPlace(['this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'], this.$preprocessed[0]),
domain_suffix: this.$preprocessed[1],
domain_keyword: Array.from(this.domainKeywords),
domain_regex: Array.from(this.domainWildcard).map(RuleOutput.domainWildCardToRegex),
ip_cidr,