diff --git a/Build/build-reject-domainset.ts b/Build/build-reject-domainset.ts index 4e85ea44..e713cabe 100644 --- a/Build/build-reject-domainset.ts +++ b/Build/build-reject-domainset.ts @@ -23,7 +23,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => { /** Whitelists */ const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST); - let domainSets = new Set(); + const domainSets = new Set(); // Parse from AdGuard Filters const shouldStop = await span @@ -73,11 +73,10 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => { process.exit(1); } - let previousSize = domainSets.size; - console.log(`Import ${previousSize} rules from Hosts / AdBlock Filter Rules & reject_sukka.conf!`); + console.log(`Import ${domainSets.size} rules from Hosts / AdBlock Filter Rules & reject_sukka.conf!`); // Dedupe domainSets - await span.traceChildAsync('dedupe from black keywords', async (childSpan) => { + await span.traceChildAsync('dedupe from black keywords/suffixes', async (childSpan) => { /** Collect DOMAIN-KEYWORD from non_ip/reject.conf for deduplication */ const domainKeywordsSet = new Set(); @@ -96,16 +95,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => { // Remove as many domains as possible from domainSets before creating trie SetSubstract(domainSets, filterRuleWhitelistDomainSets); - domainSets = new Set(childSpan.traceChildSync('dedupe from white suffixes', () => { - const trie = createTrie(domainSets, true, true); - - filterRuleWhitelistDomainSets.forEach(suffix => { - trie.whitelist(suffix); - }); - - return trie.dump(); - })); - + // Perform kwfilter to remove as many domains as possible from domainSets before creating trie childSpan.traceChildSync('dedupe from black keywords', () => { const kwfilter = createKeywordFilter(domainKeywordsSet); @@ -116,15 +106,18 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => { } } }); - - console.log(`Deduped ${previousSize} - ${domainSets.size} = ${previousSize - domainSets.size} from black keywords and suffixes!`); }); - previousSize = domainSets.size; + + const trie = createTrie(domainSets, true, true); + span.traceChildSync('dedupe from white suffixes', () => { + filterRuleWhitelistDomainSets.forEach(suffix => { + trie.whitelist(suffix); + }); + }); // Dedupe domainSets - const dudupedDominArray = span.traceChildSync('dedupe from covered subdomain', () => domainDeduper(Array.from(domainSets))); + const dudupedDominArray = span.traceChildSync('dedupe from covered subdomain', () => domainDeduper(trie)); - console.log(`Deduped ${previousSize - dudupedDominArray.length} rules from covered subdomain!`); console.log(`Final size ${dudupedDominArray.length}`); // Create reject stats diff --git a/Build/lib/domain-deduper.ts b/Build/lib/domain-deduper.ts index b89b1a19..61321896 100644 --- a/Build/lib/domain-deduper.ts +++ b/Build/lib/domain-deduper.ts @@ -1,9 +1,17 @@ -import { createTrie } from './trie'; +import { createTrie, type Trie } from './trie'; + +export function domainDeduper(inputDomains: string[] | Trie, toArray?: true): string[]; +export function domainDeduper(inputDomains: string[] | Trie, toArray: false): Set; +export function domainDeduper(inputDomains: string[] | Trie, toArray = true): string[] | Set { + let trie: Trie; + if (Array.isArray(inputDomains)) { + trie = createTrie(inputDomains, true, true); + } else if (!inputDomains.hostnameMode || !inputDomains.smolTree) { + throw new Error('Invalid trie'); + } else { + trie = inputDomains; + } -export function domainDeduper(inputDomains: string[], toArray?: true): string[]; -export function domainDeduper(inputDomains: string[], toArray: false): Set; -export function domainDeduper(inputDomains: string[], toArray = true): string[] | Set { - const trie = createTrie(inputDomains, true, true); const dumped = trie.dump(); if (toArray) { return dumped; diff --git a/Build/lib/trie.ts b/Build/lib/trie.ts index 1b6357be..84646fbe 100644 --- a/Build/lib/trie.ts +++ b/Build/lib/trie.ts @@ -448,8 +448,13 @@ export const createTrie = (from?: string[] | Set | null, hostnameMode = return root; }, whitelist, - [Bun.inspect.custom]: () => JSON.stringify(deepTrieNodeToJSON(root), null, 2) + [Bun.inspect.custom]: () => JSON.stringify(deepTrieNodeToJSON(root), null, 2), + + hostnameMode, + smolTree }; }; +export type Trie = ReturnType; + export default createTrie;