From 59b86f706f9151c7f3e1d56aab86f6f9328a1ea6 Mon Sep 17 00:00:00 2001 From: SukkaW Date: Fri, 10 May 2024 23:49:50 +0800 Subject: [PATCH] Perf/Refactor: trie w/ hostname mode --- Build/lib/trie.test.ts | 15 +++-- Build/lib/trie.ts | 126 +++++++++++++++++++++++++++++++---------- 2 files changed, 106 insertions(+), 35 deletions(-) diff --git a/Build/lib/trie.test.ts b/Build/lib/trie.test.ts index 4fbefc4f..b0edd2a1 100644 --- a/Build/lib/trie.test.ts +++ b/Build/lib/trie.test.ts @@ -79,6 +79,8 @@ describe('Trie', () => { trie.add('sesqueroman'); trie.add('greek'); + console.log({ trie }); + expect(trie.find('roman')).toEqual(['roman', 'esqueroman', 'sesqueroman']); expect(trie.find('man')).toEqual(['roman', 'esqueroman', 'sesqueroman']); expect(trie.find('esqueroman')).toEqual(['esqueroman', 'sesqueroman']); @@ -97,16 +99,21 @@ describe('Trie', () => { }); }); -describe('surge domainset dedupe', () => { +describe.each([ + ['hostname mode off', false], + ['hostname mode on', true] +])('surge domainset dedupe %s', (_, hostnameMode) => { it('should not remove same entry', () => { - const trie = createTrie(['.skk.moe', 'noc.one']); + const trie = createTrie(['.skk.moe', 'noc.one'], hostnameMode); + + console.log(trie); expect(trie.find('.skk.moe')).toStrictEqual(['.skk.moe']); expect(trie.find('noc.one')).toStrictEqual(['noc.one']); }); it('should remove subdomain', () => { - const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net']); + const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode); console.log(trie); @@ -115,7 +122,7 @@ describe('surge domainset dedupe', () => { }); it('should not remove non-subdomain', () => { - const trie = createTrie(['skk.moe', 'sukkaskk.moe']); + const trie = createTrie(['skk.moe', 'sukkaskk.moe'], hostnameMode); expect(trie.find('.skk.moe')).toStrictEqual([]); }); }); diff --git a/Build/lib/trie.ts b/Build/lib/trie.ts index cc669067..d6810b1d 100644 --- a/Build/lib/trie.ts +++ b/Build/lib/trie.ts @@ -33,10 +33,34 @@ const createNode = (): TrieNode => { return node; }; -export const createTrie = (from?: string[] | Set | null) => { +export const createTrie = (from?: string[] | Set | null, hostnameMode = false) => { let size = 0; const root: TrieNode = createNode(); + const suffixToTokens = hostnameMode + ? (suffix: string) => { + let buf = ''; + const tokens: string[] = []; + for (let i = 0, l = suffix.length; i < l; i++) { + const c = suffix[i]; + if (c === '.') { + if (buf) { + tokens.push(buf, /* . */ c); + buf = ''; + } else { + tokens.push(/* . */ c); + } + } else { + buf += c; + } + } + if (buf) { + tokens.push(buf); + } + return tokens; + } + : (suffix: string) => suffix; + /** * Method used to add the given prefix to the trie. */ @@ -44,8 +68,10 @@ export const createTrie = (from?: string[] | Set | null) => { let node: TrieNode = root; let token: string; - for (let i = suffix.length - 1; i >= 0; i--) { - token = suffix[i]; + const tokens = suffixToTokens(suffix); + + for (let i = tokens.length - 1; i >= 0; i--) { + token = tokens[i]; if (node.has(token)) { node = node.get(token)!; @@ -64,14 +90,16 @@ export const createTrie = (from?: string[] | Set | null) => { }; /** - * @param {string} suffix + * @param {string} $suffix */ const contains = (suffix: string): boolean => { let node: TrieNode | undefined = root; let token: string; - for (let i = suffix.length - 1; i >= 0; i--) { - token = suffix[i]; + const tokens = suffixToTokens(suffix); + + for (let i = tokens.length - 1; i >= 0; i--) { + token = tokens[i]; node = node.get(token); if (!node) return false; @@ -86,48 +114,70 @@ export const createTrie = (from?: string[] | Set | null) => { let node: TrieNode | undefined = root; let token: string; - for (let i = inputSuffix.length - 1; i >= 0; i--) { - token = inputSuffix[i]; + const inputTokens = suffixToTokens(inputSuffix); + + for (let i = inputTokens.length - 1; i >= 0; i--) { + token = inputTokens[i]; + + if (hostnameMode && token === '') { + break; + } node = node.get(token); if (!node) return []; } - const matches: string[] = []; + const matches: Array = []; // Performing DFS from prefix const nodeStack: TrieNode[] = [node]; - const suffixStack: string[] = [inputSuffix]; + const suffixStack: Array = [inputTokens]; do { - const suffix: string = suffixStack.pop()!; + const suffix: string | string[] = suffixStack.pop()!; node = nodeStack.pop()!; if (node[SENTINEL]) { - if (includeEqualWithSuffix || suffix !== inputSuffix) { + if (includeEqualWithSuffix) { + matches.push(suffix); + } else if (hostnameMode) { + if ((suffix as string[]).some((t, i) => t !== inputTokens[i])) { + matches.push(suffix); + } + } else if (suffix !== inputTokens) { matches.push(suffix); } } node.forEach((childNode, k) => { nodeStack.push(childNode); - suffixStack.push(k + suffix); + + if (hostnameMode) { + const stack = (suffix as string[]).slice(); + stack.unshift(k); + + suffixStack.push(stack); + } else { + suffixStack.push(k + (suffix as string)); + } }); } while (nodeStack.length); - return matches; + return hostnameMode ? matches.map((m) => (m as string[]).join('')) : matches as string[]; }; /** - * Works like trie.find, but instead of returning the matches as an array, it removes them from the given set in-place. - */ + * Works like trie.find, but instead of returning the matches as an array, it removes them from the given set in-place. + */ const substractSetInPlaceFromFound = (inputSuffix: string, set: Set) => { let node: TrieNode | undefined = root; let token: string; + const inputTokens = suffixToTokens(inputSuffix); + // Find the leaf-est node, and early return if not any - for (let i = inputSuffix.length - 1; i >= 0; i--) { - token = inputSuffix[i]; + for (let i = inputTokens.length - 1; i >= 0; i--) { + token = inputTokens[i]; node = node.get(token); if (!node) return; @@ -135,29 +185,39 @@ export const createTrie = (from?: string[] | Set | null) => { // Performing DFS from prefix const nodeStack: TrieNode[] = [node]; - const suffixStack: string[] = [inputSuffix]; + const suffixStack: Array = [inputTokens]; do { const suffix = suffixStack.pop()!; node = nodeStack.pop()!; if (node[SENTINEL]) { - if (suffix !== inputSuffix) { - // found match, delete it from set - set.delete(suffix); + if (suffix !== inputTokens) { + // found match, delete it from set + if (hostnameMode) { + set.delete((suffix as string[]).join('')); + } else { + set.delete(suffix as string); + } } } node.forEach((childNode, k) => { nodeStack.push(childNode); - suffixStack.push(k + suffix); + if (hostnameMode) { + const stack = (suffix as string[]).slice(); + stack.unshift(k); + suffixStack.push(stack); + } else { + suffixStack.push(k + (suffix as string)); + } }); } while (nodeStack.length); }; /** - * Method used to delete a prefix from the trie. - */ + * Method used to delete a prefix from the trie. + */ const remove = (suffix: string): boolean => { let node: TrieNode | undefined = root; let toPrune: TrieNode | null = null; @@ -165,8 +225,10 @@ export const createTrie = (from?: string[] | Set | null) => { let parent: TrieNode = node; let token: string; - for (let i = suffix.length - 1; i >= 0; i--) { - token = suffix[i]; + const suffixTokens = suffixToTokens(suffix); + + for (let i = suffixTokens.length - 1; i >= 0; i--) { + token = suffixTokens[i]; parent = node; node = node.get(token); @@ -203,13 +265,15 @@ export const createTrie = (from?: string[] | Set | null) => { }; /** - * Method used to assert whether the given prefix exists in the Trie. - */ + * Method used to assert whether the given prefix exists in the Trie. + */ const has = (suffix: string): boolean => { let node: TrieNode = root; - for (let i = suffix.length - 1; i >= 0; i--) { - const token = suffix[i]; + const tokens = suffixToTokens(suffix); + + for (let i = tokens.length - 1; i >= 0; i--) { + const token = tokens[i]; if (!node.has(token)) { return false;