From b7a11b55ed1b76e12f0371ba7e3914de57922638 Mon Sep 17 00:00:00 2001 From: SukkaW Date: Sun, 28 Jan 2024 00:55:23 +0800 Subject: [PATCH] Simplify kwfilter --- Build/lib/aho-corasick.test.ts | 12 ++++ Build/lib/aho-corasick.ts | 112 +++++++++++++++------------------ 2 files changed, 62 insertions(+), 62 deletions(-) create mode 100644 Build/lib/aho-corasick.test.ts diff --git a/Build/lib/aho-corasick.test.ts b/Build/lib/aho-corasick.test.ts new file mode 100644 index 00000000..a47d537c --- /dev/null +++ b/Build/lib/aho-corasick.test.ts @@ -0,0 +1,12 @@ +// eslint-disable-next-line import/no-unresolved -- bun +import { describe, expect, it } from 'bun:test'; +import createKeywordFilter from './aho-corasick'; + +describe('AhoCorasick', () => { + it('basic', () => { + const kwfilter = createKeywordFilter(['ap', 'an']); + expect(kwfilter('bananan')).toBeTrue(); + expect(kwfilter('apple')).toBeTrue(); + expect(kwfilter('melon')).toBeFalse(); + }); +}); diff --git a/Build/lib/aho-corasick.ts b/Build/lib/aho-corasick.ts index 9b0fd136..cebbc5ad 100644 --- a/Build/lib/aho-corasick.ts +++ b/Build/lib/aho-corasick.ts @@ -1,95 +1,83 @@ interface Node { - /** @default 0 */ - depth?: number, - key: string, /** @default false */ - word?: boolean, - children: Record, - fail?: Node, - count: number + wordEnd?: boolean, + children: Map, + fail?: Node } -const createNode = (key: string, depth = 0): Node => ({ - depth, - key, - word: false, - children: {}, - fail: undefined, - count: 0 +const createNode = (): Node => ({ + wordEnd: false, + children: new Map(), + fail: undefined }); const createKeywordFilter = (keys: string[] | Set) => { - const root = createNode('root'); + const root = createNode(); - const build = () => { - const queue: Node[] = []; - queue.push(root); - - let idx = 0; - while (queue.length > idx) { - const beginNode = queue[idx]; - const map = beginNode.children; - // eslint-disable-next-line guard-for-in -- plain object - for (const key in beginNode.children) { - const node = map[key]; - let failNode = beginNode.fail; - - while (failNode && !failNode.children[key]) { - failNode = failNode.fail; - } - - if (node) { - node.fail = failNode?.children[key] || root; - - queue.push(node); - } - } - - idx++; - } - }; - - const put = (key: string, len: number) => { + const put = (key: string, len = key.length) => { let node = root; const lastIdx = len - 1; - node.count++; - for (let idx = 0; idx < len; idx++) { - const val = key[idx]; - const nextNode = node.children[val]; - if (nextNode) { - nextNode.count++; - node = nextNode; + for (let idx = 0; idx < len; idx++) { + const char = key[idx]; + + if (node.children.has(char)) { + node = node.children.get(char)!; } else { - const newNode = createNode(val, idx + 1); - newNode.count = 1; - node.children[val] = newNode; + const newNode = createNode(); + node.children.set(char, newNode); node = newNode; } - if (lastIdx === idx && node.depth) { - node.word = true; + if (lastIdx === idx && node !== root) { + node.wordEnd = true; } } }; - keys.forEach(k => put(k, k.length)); + keys.forEach(k => put(k)); - build(); + // const build = () => { + const queue: Node[] = []; + queue.push(root); + + let idx = 0; + while (queue.length > idx) { + const beginNode = queue[idx]; + const children = beginNode.children; + + children.forEach((node, char) => { + let failNode = beginNode.fail; + + while (failNode && !failNode.children.has(char)) { + failNode = failNode.fail; + } + + if (node) { + node.fail = failNode?.children.get(char) || root; + + queue.push(node); + } + }); + + idx++; + } + // }; + // build(); return (text: string) => { let node: Node | undefined = root; for (let i = 0, textLen = text.length; i < textLen; i++) { // const key = text.charAt(i); - const key = text[i]; + const char = text[i]; - while (node && !node.children[key]) { + while (node && !node.children.has(char)) { node = node.fail; } - node = node?.children[key] || root; + node = node?.children.get(char) || root; - if (node.word) { + if (node.wordEnd) { return true; } }