/** * Hostbane-Optimized Trie based on Mnemonist Trie */ import { fastStringCompare } from './misc'; import util from 'node:util'; import { noop } from 'foxact/noop'; import { fastStringArrayJoin } from 'foxts/fast-string-array-join'; import FIFO from './fifo'; type TrieNode = [ boolean, /** end */ boolean, /** includeAllSubdomain (.example.org, ||example.com) */ TrieNode | null, /** parent */ Map, /** children */ Meta /** meta */ ]; function deepTrieNodeToJSON(node: TrieNode, unpackMeta: ((meta?: any) => string) | undefined) { const obj: Record = {}; if (node[0]) { obj['[start]'] = node[0]; } obj['[subdomain]'] = node[1]; if (node[4] != null) { if (unpackMeta) { obj['[meta]'] = unpackMeta(node[3]); } else { obj['[meta]'] = node[3]; } } node[3].forEach((value, key) => { obj[key] = deepTrieNodeToJSON(value, unpackMeta); }); return obj; } const createNode = (allSubdomain = false, parent: TrieNode | null = null): TrieNode => [false, allSubdomain, parent, new Map(), null] as TrieNode; export function hostnameToTokens(hostname: string): string[] { const tokens = hostname.split('.'); const results: string[] = []; let token = ''; for (let i = 0, l = tokens.length; i < l; i++) { token = tokens[i]; if (token.length > 0) { results.push(token); } } return results; } function walkHostnameTokens(hostname: string, onToken: (token: string) => boolean | null): boolean | null { const tokens = hostname.split('.'); const l = tokens.length - 1; // we are at the first of hostname, no splitor there let token = ''; for (let i = l; i >= 0; i--) { token = tokens[i]; if (token.length > 0) { const t = onToken(token); if (t === null) { return null; } // if the callback returns true, we should skip the rest if (t) { return true; } } } return false; } interface FindSingleChildLeafResult { node: TrieNode, toPrune: TrieNode | null, tokenToPrune: string | null, parent: TrieNode } abstract class Triebase { protected readonly $root: TrieNode = createNode(); protected $size = 0; get root() { return this.$root; } constructor(from?: string[] | Set | null) { // Actually build trie if (Array.isArray(from)) { for (let i = 0, l = from.length; i < l; i++) { this.add(from[i]); } } else if (from) { from.forEach((value) => this.add(value)); } } public abstract add(suffix: string, includeAllSubdomain?: boolean, meta?: Meta): void; protected walkIntoLeafWithTokens( tokens: string[], onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop ) { let node: TrieNode = this.$root; let parent: TrieNode = node; let token: string; for (let i = tokens.length - 1; i >= 0; i--) { token = tokens[i]; // if (token === '') { // break; // } parent = node; if (node[3].has(token)) { node = node[3].get(token)!; } else { return null; } onLoop(node, parent, token); } return { node, parent }; }; protected walkIntoLeafWithSuffix( suffix: string, onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop ) { let node: TrieNode = this.$root; let parent: TrieNode = node; const onToken = (token: string) => { // if (token === '') { // return true; // } parent = node; if (node[3].has(token)) { node = node[3].get(token)!; } else { return null; } onLoop(node, parent, token); return false; }; if (walkHostnameTokens(suffix, onToken) === null) { return null; } return { node, parent }; }; public contains(suffix: string, includeAllSubdomain = suffix[0] === '.'): boolean { if (suffix[0] === '.') { suffix = suffix.slice(1); } const res = this.walkIntoLeafWithSuffix(suffix); if (!res) return false; if (includeAllSubdomain) return res.node[1]; return true; }; private static bfsResults: [node: TrieNode | null, suffix: string[]] = [null, []]; private static bfs(this: void, nodeStack: FIFO>, suffixStack: FIFO) { const node = nodeStack.dequeue()!; const suffix = suffixStack.dequeue()!; node[3].forEach((childNode, k) => { // Pushing the child node to the stack for next iteration of DFS nodeStack.enqueue(childNode); suffixStack.enqueue([k, ...suffix]); }); Triebase.bfsResults[0] = node; Triebase.bfsResults[1] = suffix; return Triebase.bfsResults; } private static bfsWithSort(this: void, nodeStack: FIFO>, suffixStack: FIFO) { const node = nodeStack.dequeue()!; const suffix = suffixStack.dequeue()!; if (node[3].size) { const keys = Array.from(node[3].keys()).sort(Triebase.compare); for (let i = 0, l = keys.length; i < l; i++) { const key = keys[i]; const childNode = node[3].get(key)!; // Pushing the child node to the stack for next iteration of DFS nodeStack.enqueue(childNode); suffixStack.enqueue([key, ...suffix]); } } Triebase.bfsResults[0] = node; Triebase.bfsResults[1] = suffix; return Triebase.bfsResults; } private walk( onMatches: (suffix: string[], subdomain: boolean, meta: Meta) => void, initialNode = this.$root, initialSuffix: string[] = [], withSort = false ) { const bfsImpl = withSort ? Triebase.bfsWithSort : Triebase.bfs; const nodeStack = new FIFO>(); nodeStack.enqueue(initialNode); // Resolving initial string (begin the start of the stack) const suffixStack = new FIFO(); suffixStack.enqueue(initialSuffix); let node: TrieNode = initialNode; let r; do { r = bfsImpl(nodeStack, suffixStack); node = r[0]!; const suffix = r[1]; // If the node is a sentinel, we push the suffix to the results if (node[0]) { onMatches(suffix, node[1], node[4]); } } while (nodeStack.size); }; static compare(this: void, a: string, b: string) { if (a === b) return 0; return (a.length - b.length) || fastStringCompare(a, b); } private walkWithSort( onMatches: (suffix: string[], subdomain: boolean, meta: Meta) => void, initialNode = this.$root, initialSuffix: string[] = [] ) { const nodeStack = new FIFO>(); nodeStack.enqueue(initialNode); // Resolving initial string (begin the start of the stack) const suffixStack = new FIFO(); suffixStack.enqueue(initialSuffix); let node: TrieNode = initialNode; do { node = nodeStack.dequeue()!; const suffix = suffixStack.dequeue()!; if (node[3].size) { const keys = Array.from(node[3].keys()).sort(Triebase.compare); for (let i = 0, l = keys.length; i < l; i++) { const key = keys[i]; const childNode = node[3].get(key)!; // Pushing the child node to the stack for next iteration of DFS nodeStack.enqueue(childNode); suffixStack.enqueue([key, ...suffix]); } } // If the node is a sentinel, we push the suffix to the results if (node[0]) { onMatches(suffix, node[1], node[4]); } } while (nodeStack.size); }; protected getSingleChildLeaf(tokens: string[]): FindSingleChildLeafResult | null { let toPrune: TrieNode | null = null; let tokenToPrune: string | null = null; const onLoop = (node: TrieNode, parent: TrieNode, token: string) => { // Keeping track of a potential branch to prune // Even if the node size is 1, but the single child is ".", we should retain the branch // Since the "." could be special if it is the leaf-est node const onlyChild = node[3].size === 0 && !node[2]; if (toPrune != null) { // the top-est branch that could potentially being pruned if (!onlyChild) { // The branch has moew than single child, retain the branch. // And we need to abort prune the parent, so we set it to null toPrune = null; tokenToPrune = null; } } else if (onlyChild) { // There is only one token child, or no child at all, we can prune it safely // It is now the top-est branch that could potentially being pruned toPrune = parent; tokenToPrune = token; } }; const res = this.walkIntoLeafWithTokens(tokens, onLoop); if (res === null) return null; return { node: res.node, toPrune, tokenToPrune, parent: res.parent }; }; /** * Method used to retrieve every item in the trie with the given prefix. */ public find( inputSuffix: string, subdomainOnly = inputSuffix[0] === '.' // /** @default true */ includeEqualWithSuffix = true ): string[] { if (inputSuffix[0] === '.') { inputSuffix = inputSuffix.slice(1); } const inputTokens = hostnameToTokens(inputSuffix); const res = this.walkIntoLeafWithTokens(inputTokens); if (res === null) return []; const results: string[] = []; const onMatches = subdomainOnly ? (suffix: string[], subdomain: boolean) => { // fast path (default option) const d = fastStringArrayJoin(suffix, '.'); if (!subdomain && d === inputSuffix) return; results.push(subdomain ? '.' + d : d); } : (suffix: string[], subdomain: boolean) => { // fast path (default option) const d = fastStringArrayJoin(suffix, '.'); results.push(subdomain ? '.' + d : d); }; this.walk( onMatches, res.node, // Performing DFS from prefix inputTokens ); return results; }; /** * Method used to delete a prefix from the trie. */ public remove(suffix: string): boolean { const res = this.getSingleChildLeaf(hostnameToTokens(suffix)); if (res === null) return false; if (!res.node[0]) return false; this.$size--; const { node, toPrune, tokenToPrune } = res; if (tokenToPrune && toPrune) { toPrune[3].delete(tokenToPrune); } else { node[0] = false; } return true; }; // eslint-disable-next-line @typescript-eslint/unbound-method -- safe public delete = this.remove; /** * Method used to assert whether the given prefix exists in the Trie. */ public has(suffix: string, includeAllSubdomain = suffix[0] === '.'): boolean { if (suffix[0] === '.') { suffix = suffix.slice(1); } const res = this.walkIntoLeafWithSuffix(suffix); if (res === null) return false; if (!res.node[0]) return false; if (includeAllSubdomain) return res.node[1]; return true; }; public dump(onSuffix: (suffix: string) => void, withSort?: boolean): void; public dump(onSuffix?: null, withSort?: boolean): string[]; public dump(onSuffix?: ((suffix: string) => void) | null, withSort = false): string[] | void { const results: string[] = []; const handleSuffix = onSuffix ? (suffix: string[], subdomain: boolean) => { const d = fastStringArrayJoin(suffix, '.'); onSuffix(subdomain ? '.' + d : d); } : (suffix: string[], subdomain: boolean) => { const d = fastStringArrayJoin(suffix, '.'); results.push(subdomain ? '.' + d : d); }; if (withSort) { this.walkWithSort(handleSuffix); } else { this.walk(handleSuffix); } return results; }; public dumpMeta(onMeta: (meta: Meta) => void, withSort?: boolean): void; public dumpMeta(onMeta?: null, withSort?: boolean): Meta[]; public dumpMeta(onMeta?: ((meta: Meta) => void) | null, withSort = false): Meta[] | void { const results: Meta[] = []; const handleMeta = onMeta ? (_suffix: string[], _subdomain: boolean, meta: Meta) => onMeta(meta) : (_suffix: string[], _subdomain: boolean, meta: Meta) => results.push(meta); if (withSort) { this.walkWithSort(handleMeta); } else { this.walk(handleMeta); } return results; }; public dumpWithMeta(onSuffix: (suffix: string, meta: Meta | undefined) => void, withSort?: boolean): void; public dumpWithMeta(onMeta?: null, withSort?: boolean): Array<[string, Meta | undefined]>; public dumpWithMeta(onSuffix?: ((suffix: string, meta: Meta | undefined) => void) | null, withSort = false): Array<[string, Meta | undefined]> | void { const results: Array<[string, Meta | undefined]> = []; const handleSuffix = onSuffix ? (suffix: string[], subdomain: boolean, meta: Meta | undefined) => { const d = fastStringArrayJoin(suffix, '.'); return onSuffix(subdomain ? '.' + d : d, meta); } : (suffix: string[], subdomain: boolean, meta: Meta | undefined) => { const d = fastStringArrayJoin(suffix, '.'); results.push([subdomain ? '.' + d : d, meta]); }; if (withSort) { this.walkWithSort(handleSuffix); } else { this.walk(handleSuffix); } return results; }; public inspect(depth: number, unpackMeta?: (meta?: Meta) => any) { return fastStringArrayJoin( JSON.stringify(deepTrieNodeToJSON(this.$root, unpackMeta), null, 2).split('\n').map((line) => ' '.repeat(depth) + line), '\n' ); } public [util.inspect.custom](depth: number) { return this.inspect(depth); }; } export class HostnameSmolTrie extends Triebase { public smolTree = true; add(suffix: string, includeAllSubdomain = suffix[0] === '.', meta?: Meta): void { let node: TrieNode = this.$root; let curNodeChildren: Map> = node[3]; if (suffix[0] === '.') { suffix = suffix.slice(1); } const onToken = (token: string) => { curNodeChildren = node[3]; if (curNodeChildren.has(token)) { node = curNodeChildren.get(token)!; // During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie, skip adding the rest of the node if (node[1]) { return true; } } else { const newNode = createNode(false, node); curNodeChildren.set(token, newNode); node = newNode; } return false; }; // When walkHostnameTokens returns true, we should skip the rest if (walkHostnameTokens(suffix, onToken)) { return; } // If we are in smolTree mode, we need to do something at the end of the loop if (includeAllSubdomain) { // Trying to add `[.]sub.example.com` where there is already a `blog.sub.example.com` in the trie // Make sure parent `[start]sub.example.com` (without dot) is removed (SETINEL to false) // (/** parent */ node[2]!)[0] = false; // Removing the rest of the parent's child nodes node[3].clear(); // The SENTINEL of this node will be set to true at the end of the function, so we don't need to set it here // we can use else-if here, because the children is now empty, we don't need to check the leading "." } else if (node[1]) { // Trying to add `example.com` when there is already a `.example.com` in the trie // No need to increment size and set SENTINEL to true (skip this "new" item) return; } node[0] = true; node[1] = includeAllSubdomain; node[4] = meta!; } public whitelist(suffix: string, includeAllSubdomain = suffix[0] === '.') { if (suffix[0] === '.') { suffix = suffix.slice(1); } const tokens = hostnameToTokens(suffix); const res = this.getSingleChildLeaf(tokens); if (res === null) return; const { node, toPrune, tokenToPrune } = res; // Trying to whitelist `[start].sub.example.com` where there might already be a `[start]blog.sub.example.com` in the trie if (includeAllSubdomain) { // If there is a `[start]sub.example.com` here, remove it node[0] = false; node[1] = false; // Removing all the child nodes by empty the children node[3].clear(); } else { // Trying to whitelist `example.com` when there is already a `.example.com` in the trie node[1] = false; } // return early if not found if (!node[0]) return; if (tokenToPrune && toPrune) { toPrune[3].delete(tokenToPrune); } else { node[0] = false; } }; } export class HostnameTrie extends Triebase { get size() { return this.$size; } add(suffix: string, includeAllSubdomain = suffix[0] === '.', meta?: Meta): void { let node: TrieNode = this.$root; const onToken = (token: string) => { if (node[3].has(token)) { node = node[3].get(token)!; } else { const newNode = createNode(false, node); node[3].set(token, newNode); node = newNode; } return false; }; if (suffix[0] === '.') { suffix = suffix.slice(1); } // When walkHostnameTokens returns true, we should skip the rest if (walkHostnameTokens(suffix, onToken)) { return; } // if same entry has been added before, skip if (node[0]) { return; } this.$size++; node[0] = true; node[1] = includeAllSubdomain; node[4] = meta!; } } export function createTrie(from: string[] | Set | null, smolTree: true): HostnameSmolTrie; export function createTrie(from?: string[] | Set | null, smolTree?: false): HostnameTrie; export function createTrie<_Meta = any>(from?: string[] | Set | null, smolTree = true) { if (smolTree) { return new HostnameSmolTrie(from); } return new HostnameTrie(from); }; export type Trie = ReturnType; // function deepEqualArray(a: string[], b: string[]) { // let len = a.length; // if (len !== b.length) return false; // while (len--) { // if (a[len] !== b[len]) return false; // } // return true; // };