mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Chore: dedupe and sort other rulesets
This commit is contained in:
parent
d4ee25e75a
commit
90079b9987
@ -5,7 +5,7 @@ import { createTrie } from './lib/trie';
|
||||
import { task } from './trace';
|
||||
import { SHARED_DESCRIPTION } from './lib/constants';
|
||||
import { getPublicSuffixListTextPromise } from './lib/download-publicsuffixlist';
|
||||
import { domainDeduper } from './lib/domain-deduper';
|
||||
import { domainsetDeduper } from './lib/domain-deduper';
|
||||
import { appendArrayInPlace } from './lib/append-array-in-place';
|
||||
import { sortDomains } from './lib/stable-sort-domain';
|
||||
import { output } from './lib/misc';
|
||||
@ -76,7 +76,7 @@ export const buildCdnDownloadConf = task(require.main === module, __filename)(as
|
||||
'This file contains object storage and static assets CDN domains.'
|
||||
],
|
||||
new Date(),
|
||||
sortDomains(domainDeduper(cdnDomainsList)),
|
||||
sortDomains(domainsetDeduper(cdnDomainsList)),
|
||||
'domainset',
|
||||
output('cdn', 'domainset')
|
||||
),
|
||||
@ -89,7 +89,7 @@ export const buildCdnDownloadConf = task(require.main === module, __filename)(as
|
||||
'This file contains domains for software updating & large file hosting.'
|
||||
],
|
||||
new Date(),
|
||||
sortDomains(domainDeduper(downloadDomainSet)),
|
||||
sortDomains(domainsetDeduper(downloadDomainSet)),
|
||||
'domainset',
|
||||
output('download', 'domainset')
|
||||
)
|
||||
|
||||
@ -4,7 +4,7 @@ import * as path from 'node:path';
|
||||
import { readFileByLine } from './lib/fetch-text-by-line';
|
||||
import { processLine } from './lib/process-line';
|
||||
import { createRuleset } from './lib/create-file';
|
||||
import { domainDeduper } from './lib/domain-deduper';
|
||||
import { domainsetDeduper } from './lib/domain-deduper';
|
||||
import type { Span } from './trace';
|
||||
import { task } from './trace';
|
||||
import { SHARED_DESCRIPTION } from './lib/constants';
|
||||
@ -116,7 +116,7 @@ function transformDomainset(parentSpan: Span, sourcePath: string, relativePath:
|
||||
const clashFileBasename = relativePath.slice(0, -path.extname(relativePath).length);
|
||||
|
||||
const [title, descriptions, lines] = res;
|
||||
const deduped = domainDeduper(lines);
|
||||
const deduped = domainsetDeduper(lines);
|
||||
|
||||
let description: string[];
|
||||
if (descriptions.length) {
|
||||
|
||||
@ -7,7 +7,7 @@ import { createTrie } from './lib/trie';
|
||||
|
||||
import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, DOMAIN_LISTS, HOSTS_EXTRA, DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_EXTRA, PHISHING_DOMAIN_LISTS_EXTRA } from './constants/reject-data-source';
|
||||
import { createRuleset, compareAndWriteFile } from './lib/create-file';
|
||||
import { domainDeduper } from './lib/domain-deduper';
|
||||
import { domainsetDeduper } from './lib/domain-deduper';
|
||||
import createKeywordFilter from './lib/aho-corasick';
|
||||
import { readFileByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
|
||||
import { buildParseDomainMap, sortDomains } from './lib/stable-sort-domain';
|
||||
@ -149,8 +149,8 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
|
||||
});
|
||||
|
||||
// Dedupe domainSets
|
||||
const dudupedDominArray = span.traceChildSync('dedupe from covered subdomain (base)', () => domainDeduper(baseTrie));
|
||||
const dudupedDominArrayExtra = span.traceChildSync('dedupe from covered subdomain (extra)', () => domainDeduper(extraTrie));
|
||||
const dudupedDominArray = span.traceChildSync('dedupe from covered subdomain (base)', () => domainsetDeduper(baseTrie));
|
||||
const dudupedDominArrayExtra = span.traceChildSync('dedupe from covered subdomain (extra)', () => domainsetDeduper(extraTrie));
|
||||
|
||||
console.log(`Final size ${dudupedDominArray.length} + ${dudupedDominArrayExtra.length}`);
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import { domainDeduper } from './lib/domain-deduper';
|
||||
import { domainsetDeduper } from './lib/domain-deduper';
|
||||
import path from 'node:path';
|
||||
import { createRuleset } from './lib/create-file';
|
||||
import { sortDomains } from './lib/stable-sort-domain';
|
||||
@ -235,7 +235,7 @@ export const buildSpeedtestDomainSet = task(require.main === module, __filename)
|
||||
}
|
||||
}))));
|
||||
|
||||
const deduped = span.traceChildSync('sort result', () => sortDomains(domainDeduper(domainTrie)));
|
||||
const deduped = span.traceChildSync('sort result', () => sortDomains(domainsetDeduper(domainTrie)));
|
||||
|
||||
const description = [
|
||||
...SHARED_DESCRIPTION,
|
||||
|
||||
9
Build/lib/bitwise.ts
Normal file
9
Build/lib/bitwise.ts
Normal file
@ -0,0 +1,9 @@
|
||||
/** Packs two 16-bit integers into one 32-bit integer */
|
||||
export const pack = (a: number, b: number): number => {
|
||||
return (a << 16) | b;
|
||||
};
|
||||
|
||||
/** Unpacks two 16-bit integers from one 32-bit integer */
|
||||
export const unpack = (value: number): [a: number, b: number] => {
|
||||
return [(value >> 16) & 0xFFFF, value & 0xFFFF];
|
||||
};
|
||||
@ -8,6 +8,8 @@ import { fastStringArrayJoin, writeFile } from './misc';
|
||||
import { readFileByLine } from './fetch-text-by-line';
|
||||
import stringify from 'json-stringify-pretty-compact';
|
||||
import { ipCidrListToSingbox, surgeDomainsetToSingbox, surgeRulesetToSingbox } from './singbox';
|
||||
import { createTrie } from './trie';
|
||||
import { pack, unpack } from './bitwise';
|
||||
|
||||
export async function compareAndWriteFile(span: Span, linesA: string[], filePath: string) {
|
||||
let isEqual = true;
|
||||
@ -92,17 +94,6 @@ const withBannerArray = (title: string, description: string[] | readonly string[
|
||||
];
|
||||
};
|
||||
|
||||
const collectType = (rule: string) => {
|
||||
let buf = '';
|
||||
for (let i = 0, len = rule.length; i < len; i++) {
|
||||
if (rule[i] === ',') {
|
||||
return buf;
|
||||
}
|
||||
buf += rule[i];
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
const defaultSortTypeOrder = Symbol('defaultSortTypeOrder');
|
||||
const sortTypeOrder: Record<string | typeof defaultSortTypeOrder, number> = {
|
||||
DOMAIN: 1,
|
||||
@ -120,33 +111,62 @@ const sortTypeOrder: Record<string | typeof defaultSortTypeOrder, number> = {
|
||||
'IP-CIDR': 400,
|
||||
'IP-CIDR6': 400
|
||||
};
|
||||
// sort DOMAIN-SUFFIX and DOMAIN first, then DOMAIN-KEYWORD, then IP-CIDR and IP-CIDR6 if any
|
||||
export const sortRuleSet = (ruleSet: string[]) => {
|
||||
return ruleSet.map((rule) => {
|
||||
const type = collectType(rule);
|
||||
if (!type) {
|
||||
return [10, rule] as const;
|
||||
}
|
||||
if (!(type in sortTypeOrder)) {
|
||||
return [sortTypeOrder[defaultSortTypeOrder], rule] as const;
|
||||
}
|
||||
if (type === 'URL-REGEX') {
|
||||
let extraWeight = 0;
|
||||
if (rule.includes('.+') || rule.includes('.*')) {
|
||||
extraWeight += 10;
|
||||
}
|
||||
if (rule.includes('|')) {
|
||||
extraWeight += 1;
|
||||
}
|
||||
|
||||
return [
|
||||
sortTypeOrder[type] + extraWeight,
|
||||
rule
|
||||
] as const;
|
||||
const flagDomain = 1 << 2;
|
||||
const flagDomainSuffix = 1 << 3;
|
||||
|
||||
// dedupe and sort based on rule type
|
||||
const processRuleSet = (ruleSet: string[]) => {
|
||||
const trie = createTrie<number>(null, true);
|
||||
|
||||
const sortMap: Array<[value: number, weight: number]> = [];
|
||||
for (let i = 0, len = ruleSet.length; i < len; i++) {
|
||||
const line = ruleSet[i];
|
||||
const [type, value] = line.split(',');
|
||||
|
||||
let extraWeight = 0;
|
||||
|
||||
switch (type) {
|
||||
case 'DOMAIN':
|
||||
trie.add(value, pack(i, flagDomain));
|
||||
break;
|
||||
case 'DOMAIN-SUFFIX':
|
||||
trie.add('.' + value, pack(i, flagDomainSuffix));
|
||||
break;
|
||||
case 'URL-REGEX':
|
||||
if (value.includes('.+') || value.includes('.*')) {
|
||||
extraWeight += 10;
|
||||
}
|
||||
if (value.includes('|')) {
|
||||
extraWeight += 1;
|
||||
}
|
||||
sortMap.push([i, sortTypeOrder[type] + extraWeight]);
|
||||
break;
|
||||
case null:
|
||||
sortMap.push([i, 10]);
|
||||
break;
|
||||
default:
|
||||
if (type in sortTypeOrder) {
|
||||
sortMap.push([i, sortTypeOrder[type]]);
|
||||
} else {
|
||||
sortMap.push([i, sortTypeOrder[defaultSortTypeOrder]]);
|
||||
}
|
||||
}
|
||||
return [sortTypeOrder[type], rule] as const;
|
||||
}).sort((a, b) => a[0] - b[0])
|
||||
.map(c => c[1]);
|
||||
}
|
||||
|
||||
const dumped = trie.dumpWithMeta();
|
||||
for (let i = 0, len = dumped.length; i < len; i++) {
|
||||
const [originalIndex, flag] = unpack(dumped[i][1]);
|
||||
console.log(dumped[i][0], ruleSet[originalIndex]);
|
||||
|
||||
const type = flag === flagDomain ? 'DOMAIN' : 'DOMAIN-SUFFIX';
|
||||
|
||||
sortMap.push([originalIndex, sortTypeOrder[type]]);
|
||||
}
|
||||
|
||||
return sortMap
|
||||
.sort((a, b) => a[1] - b[1])
|
||||
.map(c => ruleSet[c[0]]);
|
||||
};
|
||||
|
||||
const MARK = 'this_ruleset_is_made_by_sukkaw.ruleset.skk.moe';
|
||||
@ -162,7 +182,7 @@ export const createRuleset = (
|
||||
_clashMrsPath?: string
|
||||
]
|
||||
) => parentSpan.traceChild(`create ruleset: ${path.basename(surgePath, path.extname(surgePath))}`).traceAsyncFn(async (childSpan) => {
|
||||
content = sortRuleSet(content);
|
||||
content = processRuleSet(content);
|
||||
const surgeContent = childSpan.traceChildSync('process surge ruleset', () => {
|
||||
let _surgeContent;
|
||||
switch (type) {
|
||||
|
||||
@ -1,8 +1,6 @@
|
||||
import { createTrie, type Trie } from './trie';
|
||||
|
||||
export function domainDeduper(inputDomains: string[] | Trie, toArray?: true): string[];
|
||||
export function domainDeduper(inputDomains: string[] | Trie, toArray: false): Set<string>;
|
||||
export function domainDeduper(inputDomains: string[] | Trie, toArray = true): string[] | Set<string> {
|
||||
export function domainsetDeduper(inputDomains: string[] | Trie): string[] {
|
||||
let trie: Trie;
|
||||
if (Array.isArray(inputDomains)) {
|
||||
trie = createTrie(inputDomains, true);
|
||||
@ -12,28 +10,5 @@ export function domainDeduper(inputDomains: string[] | Trie, toArray = true): st
|
||||
throw new Error('Invalid trie');
|
||||
}
|
||||
|
||||
const dumped = trie.dump();
|
||||
if (toArray) {
|
||||
return dumped;
|
||||
}
|
||||
return new Set(dumped);
|
||||
|
||||
// const trie = createTrie(inputDomains, true);
|
||||
// const sets = new Set(inputDomains);
|
||||
|
||||
// for (let i = 0, len1 = inputDomains.length; i < len1; i++) {
|
||||
// const d = inputDomains[i];
|
||||
// if (d[0] !== '.') {
|
||||
// continue;
|
||||
// }
|
||||
|
||||
// trie.substractSetInPlaceFromFound(d, sets);
|
||||
// sets.delete(d.slice(1));
|
||||
// }
|
||||
|
||||
// if (toArray) {
|
||||
// return Array.from(sets);
|
||||
// }
|
||||
|
||||
// return sets;
|
||||
return trie.dump();
|
||||
}
|
||||
|
||||
@ -7,10 +7,11 @@ import { inspect } from 'node:util';
|
||||
|
||||
const noop = () => { /** noop */ };
|
||||
|
||||
type TrieNode = [
|
||||
type TrieNode<Meta = any> = [
|
||||
boolean, /** sentinel */
|
||||
TrieNode | null, /** parent */
|
||||
Map<string, TrieNode> /** children */
|
||||
Map<string, TrieNode>, /** children */
|
||||
Meta /** meta */
|
||||
];
|
||||
|
||||
const deepTrieNodeToJSON = (node: TrieNode) => {
|
||||
@ -18,14 +19,17 @@ const deepTrieNodeToJSON = (node: TrieNode) => {
|
||||
if (node[0]) {
|
||||
obj['[start]'] = node[0];
|
||||
}
|
||||
if (node[3] !== undefined) {
|
||||
obj['[meta]'] = node[3];
|
||||
}
|
||||
node[2].forEach((value, key) => {
|
||||
obj[key] = deepTrieNodeToJSON(value);
|
||||
});
|
||||
return obj;
|
||||
};
|
||||
|
||||
const createNode = (parent: TrieNode | null = null): TrieNode => {
|
||||
return [false, parent, new Map<string, TrieNode>()] as TrieNode;
|
||||
const createNode = <Meta = any>(parent: TrieNode | null = null, meta: Meta | null = null): TrieNode => {
|
||||
return [false, parent, new Map<string, TrieNode>(), meta] as TrieNode<Meta>;
|
||||
};
|
||||
|
||||
export const hostnameToTokens = (hostname: string): string[] => {
|
||||
@ -72,16 +76,16 @@ const walkHostnameTokens = (hostname: string, onToken: (token: string) => boolea
|
||||
return false;
|
||||
};
|
||||
|
||||
export const createTrie = (from?: string[] | Set<string> | null, smolTree = false) => {
|
||||
export const createTrie = <Meta = any>(from?: string[] | Set<string> | null, smolTree = false) => {
|
||||
let size = 0;
|
||||
const root: TrieNode = createNode();
|
||||
const root: TrieNode<Meta> = createNode();
|
||||
|
||||
/**
|
||||
* Method used to add the given suffix to the trie.
|
||||
*/
|
||||
const add = smolTree
|
||||
? (suffix: string): void => {
|
||||
let node: TrieNode = root;
|
||||
? (suffix: string, meta?: Meta): void => {
|
||||
let node: TrieNode<Meta> = root;
|
||||
|
||||
const onToken = (token: string) => {
|
||||
if (node[2].has(token)) {
|
||||
@ -98,6 +102,7 @@ export const createTrie = (from?: string[] | Set<string> | null, smolTree = fals
|
||||
node = newNode;
|
||||
}
|
||||
|
||||
node[3] = meta!;
|
||||
return false;
|
||||
};
|
||||
|
||||
@ -128,8 +133,8 @@ export const createTrie = (from?: string[] | Set<string> | null, smolTree = fals
|
||||
|
||||
node[0] = true;
|
||||
}
|
||||
: (suffix: string): void => {
|
||||
let node: TrieNode = root;
|
||||
: (suffix: string, meta?: Meta): void => {
|
||||
let node: TrieNode<Meta> = root;
|
||||
|
||||
const onToken = (token: string) => {
|
||||
if (node[2].has(token)) {
|
||||
@ -140,6 +145,7 @@ export const createTrie = (from?: string[] | Set<string> | null, smolTree = fals
|
||||
node = newNode;
|
||||
}
|
||||
|
||||
node[3] = meta!;
|
||||
return false;
|
||||
};
|
||||
|
||||
@ -221,15 +227,15 @@ export const createTrie = (from?: string[] | Set<string> | null, smolTree = fals
|
||||
};
|
||||
|
||||
const walk = (
|
||||
onMatches: (suffix: string[]) => void,
|
||||
onMatches: (suffix: string[], meta: Meta) => void,
|
||||
initialNode = root,
|
||||
initialSuffix: string[] = []
|
||||
) => {
|
||||
const nodeStack: TrieNode[] = [initialNode];
|
||||
const nodeStack: Array<TrieNode<Meta>> = [initialNode];
|
||||
// Resolving initial string (begin the start of the stack)
|
||||
const suffixStack: string[][] = [initialSuffix];
|
||||
|
||||
let node: TrieNode = root;
|
||||
let node: TrieNode<Meta> = root;
|
||||
|
||||
do {
|
||||
node = nodeStack.pop()!;
|
||||
@ -244,7 +250,7 @@ export const createTrie = (from?: string[] | Set<string> | null, smolTree = fals
|
||||
|
||||
// If the node is a sentinel, we push the suffix to the results
|
||||
if (node[0]) {
|
||||
onMatches(suffix);
|
||||
onMatches(suffix, node[3]);
|
||||
}
|
||||
} while (nodeStack.length);
|
||||
};
|
||||
@ -383,6 +389,16 @@ export const createTrie = (from?: string[] | Set<string> | null, smolTree = fals
|
||||
return results;
|
||||
};
|
||||
|
||||
const dumpWithMeta = () => {
|
||||
const results: Array<[string, Meta]> = [];
|
||||
|
||||
walk((suffix, meta) => {
|
||||
results.push([fastStringArrayJoin(suffix, ''), meta]);
|
||||
});
|
||||
|
||||
return results;
|
||||
};
|
||||
|
||||
const whitelist = (suffix: string) => {
|
||||
if (!smolTree) {
|
||||
throw new Error('whitelist method is only available in smolTree mode.');
|
||||
@ -428,7 +444,7 @@ export const createTrie = (from?: string[] | Set<string> | null, smolTree = fals
|
||||
add(from[i]);
|
||||
}
|
||||
} else if (from) {
|
||||
from.forEach(add);
|
||||
from.forEach((value) => add(value));
|
||||
}
|
||||
|
||||
return {
|
||||
@ -440,6 +456,7 @@ export const createTrie = (from?: string[] | Set<string> | null, smolTree = fals
|
||||
delete: remove,
|
||||
has,
|
||||
dump,
|
||||
dumpWithMeta,
|
||||
get size() {
|
||||
if (smolTree) {
|
||||
throw new Error('A Trie with smolTree enabled cannot have correct size!');
|
||||
@ -460,5 +477,3 @@ export const createTrie = (from?: string[] | Set<string> | null, smolTree = fals
|
||||
};
|
||||
|
||||
export type Trie = ReturnType<typeof createTrie>;
|
||||
|
||||
export default createTrie;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user