diff --git a/Build/lib/domain-deduper.ts b/Build/lib/domain-deduper.ts index 89d34505..b89b1a19 100644 --- a/Build/lib/domain-deduper.ts +++ b/Build/lib/domain-deduper.ts @@ -3,22 +3,29 @@ import { createTrie } from './trie'; export function domainDeduper(inputDomains: string[], toArray?: true): string[]; export function domainDeduper(inputDomains: string[], toArray: false): Set; export function domainDeduper(inputDomains: string[], toArray = true): string[] | Set { - const trie = createTrie(inputDomains, true); - const sets = new Set(inputDomains); - - for (let i = 0, len1 = inputDomains.length; i < len1; i++) { - const d = inputDomains[i]; - if (d[0] !== '.') { - continue; - } - - trie.substractSetInPlaceFromFound(d, sets); - sets.delete(d.slice(1)); - } - + const trie = createTrie(inputDomains, true, true); + const dumped = trie.dump(); if (toArray) { - return Array.from(sets); + return dumped; } + return new Set(dumped); - return sets; + // const trie = createTrie(inputDomains, true); + // const sets = new Set(inputDomains); + + // for (let i = 0, len1 = inputDomains.length; i < len1; i++) { + // const d = inputDomains[i]; + // if (d[0] !== '.') { + // continue; + // } + + // trie.substractSetInPlaceFromFound(d, sets); + // sets.delete(d.slice(1)); + // } + + // if (toArray) { + // return Array.from(sets); + // } + + // return sets; } diff --git a/Build/lib/trie.test.ts b/Build/lib/trie.test.ts index b0edd2a1..0c0bd7eb 100644 --- a/Build/lib/trie.test.ts +++ b/Build/lib/trie.test.ts @@ -112,7 +112,7 @@ describe.each([ expect(trie.find('noc.one')).toStrictEqual(['noc.one']); }); - it('should remove subdomain', () => { + it('should match subdomain - 1', () => { const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode); console.log(trie); @@ -121,8 +121,80 @@ describe.each([ expect(trie.find('.sukkaw.com')).toStrictEqual(['www.sukkaw.com']); }); + it('should match subdomain - 2', () => { + const trie = createTrie(['www.noc.one', 'www.sukkaw.com', '.skk.moe', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode); + + console.log(trie); + + expect(trie.find('.skk.moe')).toStrictEqual(['.skk.moe', 'image.cdn.skk.moe', 'blog.skk.moe']); + expect(trie.find('.sukkaw.com')).toStrictEqual(['www.sukkaw.com']); + }); + it('should not remove non-subdomain', () => { const trie = createTrie(['skk.moe', 'sukkaskk.moe'], hostnameMode); expect(trie.find('.skk.moe')).toStrictEqual([]); }); }); + +describe('smol tree', () => { + it('should create simple tree - 1', () => { + const trie = createTrie([ + '.skk.moe', 'blog.skk.moe', '.cdn.skk.moe', 'skk.moe', + 'www.noc.one', 'cdn.noc.one', + '.blog.sub.example.com', 'sub.example.com', 'cdn.sub.example.com', '.sub.example.com' + ], true, true); + + console.log(trie); + + expect(trie.dump()).toStrictEqual([ + '.sub.example.com', + 'cdn.noc.one', 'www.noc.one', + '.skk.moe' + ]); + }); + + it.only('should create simple tree - 2', () => { + const trie = createTrie([ + '.skk.moe', 'blog.skk.moe', '.cdn.skk.moe', 'skk.moe' + ], true, true); + + console.log({ trie }); + + expect(trie.dump()).toStrictEqual([ + '.skk.moe' + ]); + }); + + it('should create simple tree - 2', () => { + const trie = createTrie([ + '.blog.sub.example.com', 'cdn.sub.example.com', '.sub.example.com' + ], true, true); + + console.log(trie); + + expect(trie.dump()).toStrictEqual([ + '.sub.example.com' + ]); + + trie.add('.sub.example.com'); + expect(trie.dump()).toStrictEqual([ + '.sub.example.com' + ]); + }); + + it('should create simple tree - 3', () => { + const trie = createTrie([ + 'commercial.shouji.360.cn', + 'act.commercial.shouji.360.cn', + 'cdn.creative.medialytics.com', + 'px.cdn.creative.medialytics.com' + ], true, true); + + expect(trie.dump()).toStrictEqual([ + 'cdn.creative.medialytics.com', + 'px.cdn.creative.medialytics.com', + 'commercial.shouji.360.cn', + 'act.commercial.shouji.360.cn' + ]); + }); +}); diff --git a/Build/lib/trie.ts b/Build/lib/trie.ts index d6810b1d..f81bc117 100644 --- a/Build/lib/trie.ts +++ b/Build/lib/trie.ts @@ -5,9 +5,11 @@ // import { Trie } from 'mnemonist'; export const SENTINEL = Symbol('SENTINEL'); +const PARENT = Symbol('Parent Node'); type TrieNode = { [SENTINEL]: boolean, + [PARENT]: TrieNode | null, [Bun.inspect.custom]: () => string } & Map; @@ -26,14 +28,15 @@ function trieNodeInspectCustom(this: TrieNode) { return JSON.stringify(deepTrieNodeToJSON(this), null, 2); } -const createNode = (): TrieNode => { +const createNode = (parent: TrieNode | null = null): TrieNode => { const node = new Map() as TrieNode; node[SENTINEL] = false; + node[PARENT] = parent; node[Bun.inspect.custom] = trieNodeInspectCustom; return node; }; -export const createTrie = (from?: string[] | Set | null, hostnameMode = false) => { +export const createTrie = (from?: string[] | Set | null, hostnameMode = false, smolTree = false) => { let size = 0; const root: TrieNode = createNode(); @@ -75,11 +78,35 @@ export const createTrie = (from?: string[] | Set | null, hostnameMode = if (node.has(token)) { node = node.get(token)!; + + if (smolTree) { + if (node.get('.')?.[SENTINEL] === true) { + return; + } + // return; + } } else { - const newNode = createNode(); + const newNode = createNode(node); node.set(token, newNode); node = newNode; } + + if (smolTree) { + if (i === 1 && tokens[0] === '.') { + node[SENTINEL] = false; + // Trying to add `.sub.example.com` where there is already a `blog.sub.example.com` in the trie + const newNode = createNode(node); + node.set('.', newNode); + node = newNode; + break; + } + if (i === 0) { + // Trying to add `example.com` when there is already a `.example.com` in the trie + if (node.get('.')?.[SENTINEL] === true) { + return; + } + } + } } // Do we need to increase size? @@ -107,10 +134,15 @@ export const createTrie = (from?: string[] | Set | null, hostnameMode = return true; }; + /** * Method used to retrieve every item in the trie with the given prefix. */ const find = (inputSuffix: string, /** @default true */ includeEqualWithSuffix = true): string[] => { + if (smolTree) { + throw new Error('A Trie with smolTree enabled cannot perform find!'); + } + let node: TrieNode | undefined = root; let token: string; @@ -153,10 +185,7 @@ export const createTrie = (from?: string[] | Set | null, hostnameMode = nodeStack.push(childNode); if (hostnameMode) { - const stack = (suffix as string[]).slice(); - stack.unshift(k); - - suffixStack.push(stack); + suffixStack.push([k, ...suffix]); } else { suffixStack.push(k + (suffix as string)); } @@ -167,9 +196,13 @@ export const createTrie = (from?: string[] | Set | null, hostnameMode = }; /** - * Works like trie.find, but instead of returning the matches as an array, it removes them from the given set in-place. - */ + * Works like trie.find, but instead of returning the matches as an array, it removes them from the given set in-place. + */ const substractSetInPlaceFromFound = (inputSuffix: string, set: Set) => { + if (smolTree) { + throw new Error('A Trie with smolTree enabled cannot perform substractSetInPlaceFromFound!'); + } + let node: TrieNode | undefined = root; let token: string; @@ -193,7 +226,7 @@ export const createTrie = (from?: string[] | Set | null, hostnameMode = if (node[SENTINEL]) { if (suffix !== inputTokens) { - // found match, delete it from set + // found match, delete it from set if (hostnameMode) { set.delete((suffix as string[]).join('')); } else { @@ -205,8 +238,7 @@ export const createTrie = (from?: string[] | Set | null, hostnameMode = node.forEach((childNode, k) => { nodeStack.push(childNode); if (hostnameMode) { - const stack = (suffix as string[]).slice(); - stack.unshift(k); + const stack = [k, ...suffix]; suffixStack.push(stack); } else { suffixStack.push(k + (suffix as string)); @@ -216,8 +248,8 @@ export const createTrie = (from?: string[] | Set | null, hostnameMode = }; /** - * Method used to delete a prefix from the trie. - */ + * Method used to delete a prefix from the trie. + */ const remove = (suffix: string): boolean => { let node: TrieNode | undefined = root; let toPrune: TrieNode | null = null; @@ -294,35 +326,43 @@ export const createTrie = (from?: string[] | Set | null, hostnameMode = } const dump = () => { - const node = root; const nodeStack: TrieNode[] = []; - const suffixStack: string[] = []; + const suffixStack: Array = []; // Resolving initial string - const suffix = ''; + const suffix = hostnameMode ? [] : ''; - nodeStack.push(node); + nodeStack.push(root); suffixStack.push(suffix); const results: string[] = []; - let currentNode: TrieNode; - let currentPrefix: string; - let hasValue = false; + let node: TrieNode; do { - currentNode = nodeStack.pop()!; - currentPrefix = suffixStack.pop()!; + let hasValue = false; - if (currentNode[SENTINEL]) { + node = nodeStack.pop()!; + const suffix = suffixStack.pop()!; + + if (node[SENTINEL]) { hasValue = true; } node.forEach((childNode, k) => { nodeStack.push(childNode); - suffixStack.push(k + suffix); + + if (hostnameMode) { + suffixStack.push([k, ...suffix]); + } else { + suffixStack.push(k + (suffix as string)); + } }); - if (hasValue) results.push(currentPrefix); + if (hasValue) { + results.push( + hostnameMode ? (suffix as string[]).join('') : (suffix as string) + ); + } } while (nodeStack.length); return results; @@ -338,6 +378,9 @@ export const createTrie = (from?: string[] | Set | null, hostnameMode = has, dump, get size() { + if (smolTree) { + throw new Error('A Trie with smolTree enabled cannot have correct size!'); + } return size; }, get root() {