mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-13 01:30:37 +08:00
Perf: re-use trie when building reject domainsets
This commit is contained in:
parent
02bff12245
commit
5b725192e3
@ -23,7 +23,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
|||||||
/** Whitelists */
|
/** Whitelists */
|
||||||
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
|
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
|
||||||
|
|
||||||
let domainSets = new Set<string>();
|
const domainSets = new Set<string>();
|
||||||
|
|
||||||
// Parse from AdGuard Filters
|
// Parse from AdGuard Filters
|
||||||
const shouldStop = await span
|
const shouldStop = await span
|
||||||
@ -73,11 +73,10 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
|||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
let previousSize = domainSets.size;
|
console.log(`Import ${domainSets.size} rules from Hosts / AdBlock Filter Rules & reject_sukka.conf!`);
|
||||||
console.log(`Import ${previousSize} rules from Hosts / AdBlock Filter Rules & reject_sukka.conf!`);
|
|
||||||
|
|
||||||
// Dedupe domainSets
|
// Dedupe domainSets
|
||||||
await span.traceChildAsync('dedupe from black keywords', async (childSpan) => {
|
await span.traceChildAsync('dedupe from black keywords/suffixes', async (childSpan) => {
|
||||||
/** Collect DOMAIN-KEYWORD from non_ip/reject.conf for deduplication */
|
/** Collect DOMAIN-KEYWORD from non_ip/reject.conf for deduplication */
|
||||||
const domainKeywordsSet = new Set<string>();
|
const domainKeywordsSet = new Set<string>();
|
||||||
|
|
||||||
@ -96,16 +95,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
|||||||
// Remove as many domains as possible from domainSets before creating trie
|
// Remove as many domains as possible from domainSets before creating trie
|
||||||
SetSubstract(domainSets, filterRuleWhitelistDomainSets);
|
SetSubstract(domainSets, filterRuleWhitelistDomainSets);
|
||||||
|
|
||||||
domainSets = new Set(childSpan.traceChildSync('dedupe from white suffixes', () => {
|
// Perform kwfilter to remove as many domains as possible from domainSets before creating trie
|
||||||
const trie = createTrie(domainSets, true, true);
|
|
||||||
|
|
||||||
filterRuleWhitelistDomainSets.forEach(suffix => {
|
|
||||||
trie.whitelist(suffix);
|
|
||||||
});
|
|
||||||
|
|
||||||
return trie.dump();
|
|
||||||
}));
|
|
||||||
|
|
||||||
childSpan.traceChildSync('dedupe from black keywords', () => {
|
childSpan.traceChildSync('dedupe from black keywords', () => {
|
||||||
const kwfilter = createKeywordFilter(domainKeywordsSet);
|
const kwfilter = createKeywordFilter(domainKeywordsSet);
|
||||||
|
|
||||||
@ -116,15 +106,18 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
console.log(`Deduped ${previousSize} - ${domainSets.size} = ${previousSize - domainSets.size} from black keywords and suffixes!`);
|
|
||||||
});
|
});
|
||||||
previousSize = domainSets.size;
|
|
||||||
|
const trie = createTrie(domainSets, true, true);
|
||||||
|
span.traceChildSync('dedupe from white suffixes', () => {
|
||||||
|
filterRuleWhitelistDomainSets.forEach(suffix => {
|
||||||
|
trie.whitelist(suffix);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
// Dedupe domainSets
|
// Dedupe domainSets
|
||||||
const dudupedDominArray = span.traceChildSync('dedupe from covered subdomain', () => domainDeduper(Array.from(domainSets)));
|
const dudupedDominArray = span.traceChildSync('dedupe from covered subdomain', () => domainDeduper(trie));
|
||||||
|
|
||||||
console.log(`Deduped ${previousSize - dudupedDominArray.length} rules from covered subdomain!`);
|
|
||||||
console.log(`Final size ${dudupedDominArray.length}`);
|
console.log(`Final size ${dudupedDominArray.length}`);
|
||||||
|
|
||||||
// Create reject stats
|
// Create reject stats
|
||||||
|
|||||||
@ -1,9 +1,17 @@
|
|||||||
import { createTrie } from './trie';
|
import { createTrie, type Trie } from './trie';
|
||||||
|
|
||||||
|
export function domainDeduper(inputDomains: string[] | Trie, toArray?: true): string[];
|
||||||
|
export function domainDeduper(inputDomains: string[] | Trie, toArray: false): Set<string>;
|
||||||
|
export function domainDeduper(inputDomains: string[] | Trie, toArray = true): string[] | Set<string> {
|
||||||
|
let trie: Trie;
|
||||||
|
if (Array.isArray(inputDomains)) {
|
||||||
|
trie = createTrie(inputDomains, true, true);
|
||||||
|
} else if (!inputDomains.hostnameMode || !inputDomains.smolTree) {
|
||||||
|
throw new Error('Invalid trie');
|
||||||
|
} else {
|
||||||
|
trie = inputDomains;
|
||||||
|
}
|
||||||
|
|
||||||
export function domainDeduper(inputDomains: string[], toArray?: true): string[];
|
|
||||||
export function domainDeduper(inputDomains: string[], toArray: false): Set<string>;
|
|
||||||
export function domainDeduper(inputDomains: string[], toArray = true): string[] | Set<string> {
|
|
||||||
const trie = createTrie(inputDomains, true, true);
|
|
||||||
const dumped = trie.dump();
|
const dumped = trie.dump();
|
||||||
if (toArray) {
|
if (toArray) {
|
||||||
return dumped;
|
return dumped;
|
||||||
|
|||||||
@ -448,8 +448,13 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|||||||
return root;
|
return root;
|
||||||
},
|
},
|
||||||
whitelist,
|
whitelist,
|
||||||
[Bun.inspect.custom]: () => JSON.stringify(deepTrieNodeToJSON(root), null, 2)
|
[Bun.inspect.custom]: () => JSON.stringify(deepTrieNodeToJSON(root), null, 2),
|
||||||
|
|
||||||
|
hostnameMode,
|
||||||
|
smolTree
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export type Trie = ReturnType<typeof createTrie>;
|
||||||
|
|
||||||
export default createTrie;
|
export default createTrie;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user