Perf: faster smolTree trie creation by avoiding if in loop

This commit is contained in:
SukkaW 2024-05-27 03:24:29 +08:00
parent efa34399b0
commit 70f837ba15
2 changed files with 41 additions and 32 deletions

1
.gitignore vendored
View File

@ -4,6 +4,7 @@ node_modules
.wireit .wireit
.cache .cache
public public
tmp*
# $ build output # $ build output
List/ List/

View File

@ -62,6 +62,8 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
let size = 0; let size = 0;
const root: TrieNode = createNode(); const root: TrieNode = createNode();
const isHostnameMode = (_token: string | string[]): _token is string[] => hostnameMode;
const suffixToTokens = hostnameMode const suffixToTokens = hostnameMode
? hostnameToTokens ? hostnameToTokens
: (suffix: string) => suffix; : (suffix: string) => suffix;
@ -91,25 +93,32 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
node.set(token, newNode); node.set(token, newNode);
node = newNode; node = newNode;
} }
}
if (smolTree) { // If we are in smolTree mode, we need to do something at the end of the loop
if (smolTree) {
if (tokens[0] === '.') {
// Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie // Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
if (i === 1 && tokens[0] === '.') {
// If there is a `[start]sub.example.com` here, remove it
node[SENTINEL] = false;
// Removing the rest of the child nodes by creating a new node and disconnecting the old one const parent = node[PARENT]!;
const newNode = createNode(node);
node.set('.', newNode); // Make sure parent `[start]sub.example.com` (without dot) is removed (SETINEL to false)
node = newNode; parent[SENTINEL] = false;
break;
} // Removing the rest of the parent's child nodes by disconnecting the old one and creating a new node
if (i === 0) { const newNode = createNode(node);
// Trying to add `example.com` when there is already a `.example.com` in the trie // The SENTINEL of this newNode will be set to true at the end of the function, so we don't need to set it here
if (node.get('.')?.[SENTINEL] === true) {
return; parent.set('.', newNode);
}
} // Now the real leaf-est node is the new node, change the pointer to it
node = newNode;
}
if (node.get('.')?.[SENTINEL] === true) {
// Trying to add `example.com` when there is already a `.example.com` in the trie
// No need to increment size and set SENTINEL to true (skip this "new" item)
return;
} }
} }
@ -176,8 +185,8 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
if (node[SENTINEL]) { if (node[SENTINEL]) {
if (includeEqualWithSuffix) { if (includeEqualWithSuffix) {
matches.push(suffix); matches.push(suffix);
} else if (hostnameMode) { } else if (isHostnameMode(suffix)) {
if ((suffix as string[]).some((t, i) => t !== inputTokens[i])) { if (suffix.some((t, i) => t !== inputTokens[i])) {
matches.push(suffix); matches.push(suffix);
} }
} else if (suffix !== inputTokens) { } else if (suffix !== inputTokens) {
@ -188,10 +197,10 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
node.forEach((childNode, k) => { node.forEach((childNode, k) => {
nodeStack.push(childNode); nodeStack.push(childNode);
if (hostnameMode) { if (isHostnameMode(suffix)) {
suffixStack.push([k, ...suffix]); suffixStack.push([k, ...suffix]);
} else { } else {
suffixStack.push(k + (suffix as string)); suffixStack.push(k + suffix);
} }
}); });
} while (nodeStack.length); } while (nodeStack.length);
@ -230,20 +239,20 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
if (node[SENTINEL]) { if (node[SENTINEL]) {
// found match, delete it from set // found match, delete it from set
if (hostnameMode) { if (isHostnameMode(suffix)) {
set.delete((suffix as string[]).join('')); set.delete(suffix.join(''));
} else if (suffix !== inputTokens) { } else if (suffix !== inputTokens) {
set.delete(suffix as string); set.delete(suffix);
} }
} }
node.forEach((childNode, k) => { node.forEach((childNode, k) => {
nodeStack.push(childNode); nodeStack.push(childNode);
if (hostnameMode) { if (isHostnameMode(suffix)) {
const stack = [k, ...suffix]; const stack = [k, ...suffix];
suffixStack.push(stack); suffixStack.push(stack);
} else { } else {
suffixStack.push(k + (suffix as string)); suffixStack.push(k + suffix);
} }
}); });
} while (nodeStack.length); } while (nodeStack.length);
@ -336,17 +345,15 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
const suffix = suffixStack.pop()!; const suffix = suffixStack.pop()!;
node.forEach((childNode, k) => { node.forEach((childNode, k) => {
// Pushing the child node to the stack for next iteration of DFS
nodeStack.push(childNode); nodeStack.push(childNode);
if (hostnameMode) { suffixStack.push(isHostnameMode(suffix) ? [k, ...suffix] : k + suffix);
suffixStack.push([k, ...suffix]);
} else {
suffixStack.push(k + (suffix as string));
}
}); });
// If the node is a sentinel, we push the suffix to the results
if (node[SENTINEL]) { if (node[SENTINEL]) {
results.push(hostnameMode ? (suffix as string[]).join('') : (suffix as string)); results.push(isHostnameMode(suffix) ? suffix.join('') : suffix);
} }
} while (nodeStack.length); } while (nodeStack.length);
@ -448,7 +455,8 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
return root; return root;
}, },
whitelist, whitelist,
[Bun.inspect.custom]: () => JSON.stringify(deepTrieNodeToJSON(root), null, 2),
[Bun.inspect.custom]: (depth: number) => JSON.stringify(deepTrieNodeToJSON(root), null, 2).split('\n').map((line) => ' '.repeat(depth) + line).join('\n'),
hostnameMode, hostnameMode,
smolTree smolTree