From 64317794b0f52e507f3aeb1c70b28c92703197c6 Mon Sep 17 00:00:00 2001 From: SukkaW Date: Sun, 2 Mar 2025 00:05:53 +0800 Subject: [PATCH] Perf: strip more branches --- Build/lib/trie.ts | 64 +++++++++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 24 deletions(-) diff --git a/Build/lib/trie.ts b/Build/lib/trie.ts index 6ed0a095..5ce5a8e1 100644 --- a/Build/lib/trie.ts +++ b/Build/lib/trie.ts @@ -13,10 +13,11 @@ const START = 1 << 1; const INCLUDE_ALL_SUBDOMAIN = 1 << 2; type TrieNode = [ - flag: number, /** end, includeAllSubdomain (.example.org, ||example.com) */ - TrieNode | null, /** parent */ - Map, /** children */ - Meta /** meta */ + /** end, includeAllSubdomain (.example.org, ||example.com) */ flag: number, + /** parent */ TrieNode | null, + /** children */ Map, + /** token */ token: string, + /** meta */ Meta ]; function deepTrieNodeToJSON(node: TrieNode, @@ -25,11 +26,11 @@ function deepTrieNodeToJSON(node: TrieNode, obj['[start]'] = getBit(node[0], START); obj['[subdomain]'] = getBit(node[0], INCLUDE_ALL_SUBDOMAIN); - if (node[3] != null) { + if (node[4] != null) { if (unpackMeta) { - obj['[meta]'] = unpackMeta(node[3]); + obj['[meta]'] = unpackMeta(node[4]); } else { - obj['[meta]'] = node[3]; + obj['[meta]'] = node[4]; } } node[2].forEach((value, key) => { @@ -38,7 +39,7 @@ function deepTrieNodeToJSON(node: TrieNode, return obj; } -const createNode = (parent: TrieNode | null = null): TrieNode => [1, parent, new Map(), null] as TrieNode; +const createNode = (token: string, parent: TrieNode | null = null): TrieNode => [1, parent, new Map(), token, null] as TrieNode; function hostnameToTokens(hostname: string, hostnameFromIndex: number): string[] { const tokens = hostname.split('.'); @@ -90,7 +91,7 @@ interface FindSingleChildLeafResult { } abstract class Triebase { - protected readonly $root: TrieNode = createNode(); + protected readonly $root: TrieNode = createNode('$root'); protected $size = 0; get root() { @@ -259,7 +260,7 @@ abstract class Triebase { // If the node is a sentinel, we push the suffix to the results if (getBit(node[0], START)) { - onMatches(suffix, getBit(node[0], INCLUDE_ALL_SUBDOMAIN), node[3]); + onMatches(suffix, getBit(node[0], INCLUDE_ALL_SUBDOMAIN), node[4]); } } while (nodeStack.length); }; @@ -303,7 +304,7 @@ abstract class Triebase { // If the node is a sentinel, we push the suffix to the results if (getBit(node[0], START)) { - onMatches(suffix, getBit(node[0], INCLUDE_ALL_SUBDOMAIN), node[3]); + onMatches(suffix, getBit(node[0], INCLUDE_ALL_SUBDOMAIN), node[4]); } } while (nodeStack.length); }; @@ -317,19 +318,16 @@ abstract class Triebase { const child = node[2]; - // console.log({ - // child, parent, token - // }); - // console.log(this.inspect(0)); + const childSize = child.size + (getBit(node[0], INCLUDE_ALL_SUBDOMAIN) ? 1 : 0); if (toPrune !== null) { // the most near branch that could potentially being pruned - if (child.size > 1) { + if (childSize >= 1) { // The branch has some children, the branch need retain. // And we need to abort prune that parent branch, so we set it to null toPrune = null; tokenToPrune = null; } - } else if (child.size < 1) { + } else if (childSize < 1) { // There is only one token child, or no child at all, we can prune it safely // It is now the top-est branch that could potentially being pruned toPrune = parent; @@ -534,7 +532,7 @@ export class HostnameSmolTrie extends Triebase { return true; } } else { - const newNode = createNode(node); + const newNode = createNode(token, node); curNodeChildren.set(token, newNode); node = newNode; } @@ -571,7 +569,7 @@ export class HostnameSmolTrie extends Triebase { } else { node[0] = deleteBit(node[0], INCLUDE_ALL_SUBDOMAIN); } - node[3] = meta!; + node[4] = meta!; } public whitelist(suffix: string, includeAllSubdomain = suffix[0] === '.', hostnameFromIndex = suffix[0] === '.' ? 1 : 0) { @@ -585,25 +583,43 @@ export class HostnameSmolTrie extends Triebase { if (includeAllSubdomain) { // If there is a `[start]sub.example.com` here, remove it node[0] = deleteBit(node[0], INCLUDE_ALL_SUBDOMAIN); - node[0] = deleteBit(node[0], START); // Removing all the child nodes by empty the children node[2].clear(); + // we do not remove sub.example.com for now, we will do that later } else { // Trying to whitelist `example.com` when there is already a `.example.com` in the trie node[0] = deleteBit(node[0], INCLUDE_ALL_SUBDOMAIN); } - // return early if not found - if (missingBit(node[0], START)) return; + if (includeAllSubdomain) { + node[1]?.[2].delete(node[3]); + } else if (missingBit(node[0], START) && node[1]) { + return; + } if (toPrune && tokenToPrune) { toPrune[2].delete(tokenToPrune); } else { node[0] = deleteBit(node[0], START); } + + cleanUpEmptyNode(node); }; } +function cleanUpEmptyNode(node: TrieNode) { + if ( + missingBit(node[0], START) + && node[2].size === 0 + && missingBit(node[0], INCLUDE_ALL_SUBDOMAIN) + && node[1] + ) { + node[1][2].delete(node[3]); + + cleanUpEmptyNode(node[1]); + } +} + export class HostnameTrie extends Triebase { get size() { return this.$size; @@ -618,7 +634,7 @@ export class HostnameTrie extends Triebase { if (child.has(token)) { node = child.get(token)!; } else { - const newNode = createNode(node); + const newNode = createNode(token, node); child.set(token, newNode); node = newNode; } @@ -644,7 +660,7 @@ export class HostnameTrie extends Triebase { } else { node[0] = deleteBit(node[0], INCLUDE_ALL_SUBDOMAIN); } - node[3] = meta!; + node[4] = meta!; } }