Simplify kwfilter

This commit is contained in:
SukkaW 2024-01-28 00:55:23 +08:00
parent 7458d6ff86
commit b7a11b55ed
2 changed files with 62 additions and 62 deletions

View File

@ -0,0 +1,12 @@
// eslint-disable-next-line import/no-unresolved -- bun
import { describe, expect, it } from 'bun:test';
import createKeywordFilter from './aho-corasick';
describe('AhoCorasick', () => {
it('basic', () => {
const kwfilter = createKeywordFilter(['ap', 'an']);
expect(kwfilter('bananan')).toBeTrue();
expect(kwfilter('apple')).toBeTrue();
expect(kwfilter('melon')).toBeFalse();
});
});

View File

@ -1,95 +1,83 @@
interface Node { interface Node {
/** @default 0 */
depth?: number,
key: string,
/** @default false */ /** @default false */
word?: boolean, wordEnd?: boolean,
children: Record<string, Node>, children: Map<string, Node | undefined>,
fail?: Node, fail?: Node
count: number
} }
const createNode = (key: string, depth = 0): Node => ({ const createNode = (): Node => ({
depth, wordEnd: false,
key, children: new Map(),
word: false, fail: undefined
children: {},
fail: undefined,
count: 0
}); });
const createKeywordFilter = (keys: string[] | Set<string>) => { const createKeywordFilter = (keys: string[] | Set<string>) => {
const root = createNode('root'); const root = createNode();
const build = () => { const put = (key: string, len = key.length) => {
const queue: Node[] = [];
queue.push(root);
let idx = 0;
while (queue.length > idx) {
const beginNode = queue[idx];
const map = beginNode.children;
// eslint-disable-next-line guard-for-in -- plain object
for (const key in beginNode.children) {
const node = map[key];
let failNode = beginNode.fail;
while (failNode && !failNode.children[key]) {
failNode = failNode.fail;
}
if (node) {
node.fail = failNode?.children[key] || root;
queue.push(node);
}
}
idx++;
}
};
const put = (key: string, len: number) => {
let node = root; let node = root;
const lastIdx = len - 1; const lastIdx = len - 1;
node.count++;
for (let idx = 0; idx < len; idx++) {
const val = key[idx];
const nextNode = node.children[val];
if (nextNode) { for (let idx = 0; idx < len; idx++) {
nextNode.count++; const char = key[idx];
node = nextNode;
if (node.children.has(char)) {
node = node.children.get(char)!;
} else { } else {
const newNode = createNode(val, idx + 1); const newNode = createNode();
newNode.count = 1; node.children.set(char, newNode);
node.children[val] = newNode;
node = newNode; node = newNode;
} }
if (lastIdx === idx && node.depth) { if (lastIdx === idx && node !== root) {
node.word = true; node.wordEnd = true;
} }
} }
}; };
keys.forEach(k => put(k, k.length)); keys.forEach(k => put(k));
build(); // const build = () => {
const queue: Node[] = [];
queue.push(root);
let idx = 0;
while (queue.length > idx) {
const beginNode = queue[idx];
const children = beginNode.children;
children.forEach((node, char) => {
let failNode = beginNode.fail;
while (failNode && !failNode.children.has(char)) {
failNode = failNode.fail;
}
if (node) {
node.fail = failNode?.children.get(char) || root;
queue.push(node);
}
});
idx++;
}
// };
// build();
return (text: string) => { return (text: string) => {
let node: Node | undefined = root; let node: Node | undefined = root;
for (let i = 0, textLen = text.length; i < textLen; i++) { for (let i = 0, textLen = text.length; i < textLen; i++) {
// const key = text.charAt(i); // const key = text.charAt(i);
const key = text[i]; const char = text[i];
while (node && !node.children[key]) { while (node && !node.children.has(char)) {
node = node.fail; node = node.fail;
} }
node = node?.children[key] || root; node = node?.children.get(char) || root;
if (node.word) { if (node.wordEnd) {
return true; return true;
} }
} }