mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Perf: re-use trie when building reject domainsets
This commit is contained in:
parent
02bff12245
commit
5b725192e3
@ -23,7 +23,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
||||
/** Whitelists */
|
||||
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
|
||||
|
||||
let domainSets = new Set<string>();
|
||||
const domainSets = new Set<string>();
|
||||
|
||||
// Parse from AdGuard Filters
|
||||
const shouldStop = await span
|
||||
@ -73,11 +73,10 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
let previousSize = domainSets.size;
|
||||
console.log(`Import ${previousSize} rules from Hosts / AdBlock Filter Rules & reject_sukka.conf!`);
|
||||
console.log(`Import ${domainSets.size} rules from Hosts / AdBlock Filter Rules & reject_sukka.conf!`);
|
||||
|
||||
// Dedupe domainSets
|
||||
await span.traceChildAsync('dedupe from black keywords', async (childSpan) => {
|
||||
await span.traceChildAsync('dedupe from black keywords/suffixes', async (childSpan) => {
|
||||
/** Collect DOMAIN-KEYWORD from non_ip/reject.conf for deduplication */
|
||||
const domainKeywordsSet = new Set<string>();
|
||||
|
||||
@ -96,16 +95,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
||||
// Remove as many domains as possible from domainSets before creating trie
|
||||
SetSubstract(domainSets, filterRuleWhitelistDomainSets);
|
||||
|
||||
domainSets = new Set(childSpan.traceChildSync('dedupe from white suffixes', () => {
|
||||
const trie = createTrie(domainSets, true, true);
|
||||
|
||||
filterRuleWhitelistDomainSets.forEach(suffix => {
|
||||
trie.whitelist(suffix);
|
||||
});
|
||||
|
||||
return trie.dump();
|
||||
}));
|
||||
|
||||
// Perform kwfilter to remove as many domains as possible from domainSets before creating trie
|
||||
childSpan.traceChildSync('dedupe from black keywords', () => {
|
||||
const kwfilter = createKeywordFilter(domainKeywordsSet);
|
||||
|
||||
@ -116,15 +106,18 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
console.log(`Deduped ${previousSize} - ${domainSets.size} = ${previousSize - domainSets.size} from black keywords and suffixes!`);
|
||||
});
|
||||
previousSize = domainSets.size;
|
||||
|
||||
const trie = createTrie(domainSets, true, true);
|
||||
span.traceChildSync('dedupe from white suffixes', () => {
|
||||
filterRuleWhitelistDomainSets.forEach(suffix => {
|
||||
trie.whitelist(suffix);
|
||||
});
|
||||
});
|
||||
|
||||
// Dedupe domainSets
|
||||
const dudupedDominArray = span.traceChildSync('dedupe from covered subdomain', () => domainDeduper(Array.from(domainSets)));
|
||||
const dudupedDominArray = span.traceChildSync('dedupe from covered subdomain', () => domainDeduper(trie));
|
||||
|
||||
console.log(`Deduped ${previousSize - dudupedDominArray.length} rules from covered subdomain!`);
|
||||
console.log(`Final size ${dudupedDominArray.length}`);
|
||||
|
||||
// Create reject stats
|
||||
|
||||
@ -1,9 +1,17 @@
|
||||
import { createTrie } from './trie';
|
||||
import { createTrie, type Trie } from './trie';
|
||||
|
||||
export function domainDeduper(inputDomains: string[] | Trie, toArray?: true): string[];
|
||||
export function domainDeduper(inputDomains: string[] | Trie, toArray: false): Set<string>;
|
||||
export function domainDeduper(inputDomains: string[] | Trie, toArray = true): string[] | Set<string> {
|
||||
let trie: Trie;
|
||||
if (Array.isArray(inputDomains)) {
|
||||
trie = createTrie(inputDomains, true, true);
|
||||
} else if (!inputDomains.hostnameMode || !inputDomains.smolTree) {
|
||||
throw new Error('Invalid trie');
|
||||
} else {
|
||||
trie = inputDomains;
|
||||
}
|
||||
|
||||
export function domainDeduper(inputDomains: string[], toArray?: true): string[];
|
||||
export function domainDeduper(inputDomains: string[], toArray: false): Set<string>;
|
||||
export function domainDeduper(inputDomains: string[], toArray = true): string[] | Set<string> {
|
||||
const trie = createTrie(inputDomains, true, true);
|
||||
const dumped = trie.dump();
|
||||
if (toArray) {
|
||||
return dumped;
|
||||
|
||||
@ -448,8 +448,13 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
||||
return root;
|
||||
},
|
||||
whitelist,
|
||||
[Bun.inspect.custom]: () => JSON.stringify(deepTrieNodeToJSON(root), null, 2)
|
||||
[Bun.inspect.custom]: () => JSON.stringify(deepTrieNodeToJSON(root), null, 2),
|
||||
|
||||
hostnameMode,
|
||||
smolTree
|
||||
};
|
||||
};
|
||||
|
||||
export type Trie = ReturnType<typeof createTrie>;
|
||||
|
||||
export default createTrie;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user