Perf: re-use trie when building reject domainsets

This commit is contained in:
SukkaW 2024-05-26 01:20:45 +08:00
parent 02bff12245
commit 5b725192e3
3 changed files with 31 additions and 25 deletions

View File

@ -23,7 +23,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
/** Whitelists */
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
let domainSets = new Set<string>();
const domainSets = new Set<string>();
// Parse from AdGuard Filters
const shouldStop = await span
@ -73,11 +73,10 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
process.exit(1);
}
let previousSize = domainSets.size;
console.log(`Import ${previousSize} rules from Hosts / AdBlock Filter Rules & reject_sukka.conf!`);
console.log(`Import ${domainSets.size} rules from Hosts / AdBlock Filter Rules & reject_sukka.conf!`);
// Dedupe domainSets
await span.traceChildAsync('dedupe from black keywords', async (childSpan) => {
await span.traceChildAsync('dedupe from black keywords/suffixes', async (childSpan) => {
/** Collect DOMAIN-KEYWORD from non_ip/reject.conf for deduplication */
const domainKeywordsSet = new Set<string>();
@ -96,16 +95,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
// Remove as many domains as possible from domainSets before creating trie
SetSubstract(domainSets, filterRuleWhitelistDomainSets);
domainSets = new Set(childSpan.traceChildSync('dedupe from white suffixes', () => {
const trie = createTrie(domainSets, true, true);
filterRuleWhitelistDomainSets.forEach(suffix => {
trie.whitelist(suffix);
});
return trie.dump();
}));
// Perform kwfilter to remove as many domains as possible from domainSets before creating trie
childSpan.traceChildSync('dedupe from black keywords', () => {
const kwfilter = createKeywordFilter(domainKeywordsSet);
@ -116,15 +106,18 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
}
}
});
console.log(`Deduped ${previousSize} - ${domainSets.size} = ${previousSize - domainSets.size} from black keywords and suffixes!`);
});
previousSize = domainSets.size;
const trie = createTrie(domainSets, true, true);
span.traceChildSync('dedupe from white suffixes', () => {
filterRuleWhitelistDomainSets.forEach(suffix => {
trie.whitelist(suffix);
});
});
// Dedupe domainSets
const dudupedDominArray = span.traceChildSync('dedupe from covered subdomain', () => domainDeduper(Array.from(domainSets)));
const dudupedDominArray = span.traceChildSync('dedupe from covered subdomain', () => domainDeduper(trie));
console.log(`Deduped ${previousSize - dudupedDominArray.length} rules from covered subdomain!`);
console.log(`Final size ${dudupedDominArray.length}`);
// Create reject stats

View File

@ -1,9 +1,17 @@
import { createTrie } from './trie';
import { createTrie, type Trie } from './trie';
export function domainDeduper(inputDomains: string[] | Trie, toArray?: true): string[];
export function domainDeduper(inputDomains: string[] | Trie, toArray: false): Set<string>;
export function domainDeduper(inputDomains: string[] | Trie, toArray = true): string[] | Set<string> {
let trie: Trie;
if (Array.isArray(inputDomains)) {
trie = createTrie(inputDomains, true, true);
} else if (!inputDomains.hostnameMode || !inputDomains.smolTree) {
throw new Error('Invalid trie');
} else {
trie = inputDomains;
}
export function domainDeduper(inputDomains: string[], toArray?: true): string[];
export function domainDeduper(inputDomains: string[], toArray: false): Set<string>;
export function domainDeduper(inputDomains: string[], toArray = true): string[] | Set<string> {
const trie = createTrie(inputDomains, true, true);
const dumped = trie.dump();
if (toArray) {
return dumped;

View File

@ -448,8 +448,13 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
return root;
},
whitelist,
[Bun.inspect.custom]: () => JSON.stringify(deepTrieNodeToJSON(root), null, 2)
[Bun.inspect.custom]: () => JSON.stringify(deepTrieNodeToJSON(root), null, 2),
hostnameMode,
smolTree
};
};
export type Trie = ReturnType<typeof createTrie>;
export default createTrie;