mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Simplify kwfilter
This commit is contained in:
parent
7458d6ff86
commit
b7a11b55ed
12
Build/lib/aho-corasick.test.ts
Normal file
12
Build/lib/aho-corasick.test.ts
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
// eslint-disable-next-line import/no-unresolved -- bun
|
||||||
|
import { describe, expect, it } from 'bun:test';
|
||||||
|
import createKeywordFilter from './aho-corasick';
|
||||||
|
|
||||||
|
describe('AhoCorasick', () => {
|
||||||
|
it('basic', () => {
|
||||||
|
const kwfilter = createKeywordFilter(['ap', 'an']);
|
||||||
|
expect(kwfilter('bananan')).toBeTrue();
|
||||||
|
expect(kwfilter('apple')).toBeTrue();
|
||||||
|
expect(kwfilter('melon')).toBeFalse();
|
||||||
|
});
|
||||||
|
});
|
||||||
@ -1,95 +1,83 @@
|
|||||||
interface Node {
|
interface Node {
|
||||||
/** @default 0 */
|
|
||||||
depth?: number,
|
|
||||||
key: string,
|
|
||||||
/** @default false */
|
/** @default false */
|
||||||
word?: boolean,
|
wordEnd?: boolean,
|
||||||
children: Record<string, Node>,
|
children: Map<string, Node | undefined>,
|
||||||
fail?: Node,
|
fail?: Node
|
||||||
count: number
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const createNode = (key: string, depth = 0): Node => ({
|
const createNode = (): Node => ({
|
||||||
depth,
|
wordEnd: false,
|
||||||
key,
|
children: new Map(),
|
||||||
word: false,
|
fail: undefined
|
||||||
children: {},
|
|
||||||
fail: undefined,
|
|
||||||
count: 0
|
|
||||||
});
|
});
|
||||||
|
|
||||||
const createKeywordFilter = (keys: string[] | Set<string>) => {
|
const createKeywordFilter = (keys: string[] | Set<string>) => {
|
||||||
const root = createNode('root');
|
const root = createNode();
|
||||||
|
|
||||||
const build = () => {
|
const put = (key: string, len = key.length) => {
|
||||||
const queue: Node[] = [];
|
|
||||||
queue.push(root);
|
|
||||||
|
|
||||||
let idx = 0;
|
|
||||||
while (queue.length > idx) {
|
|
||||||
const beginNode = queue[idx];
|
|
||||||
const map = beginNode.children;
|
|
||||||
// eslint-disable-next-line guard-for-in -- plain object
|
|
||||||
for (const key in beginNode.children) {
|
|
||||||
const node = map[key];
|
|
||||||
let failNode = beginNode.fail;
|
|
||||||
|
|
||||||
while (failNode && !failNode.children[key]) {
|
|
||||||
failNode = failNode.fail;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (node) {
|
|
||||||
node.fail = failNode?.children[key] || root;
|
|
||||||
|
|
||||||
queue.push(node);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
idx++;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const put = (key: string, len: number) => {
|
|
||||||
let node = root;
|
let node = root;
|
||||||
const lastIdx = len - 1;
|
const lastIdx = len - 1;
|
||||||
node.count++;
|
|
||||||
for (let idx = 0; idx < len; idx++) {
|
|
||||||
const val = key[idx];
|
|
||||||
const nextNode = node.children[val];
|
|
||||||
|
|
||||||
if (nextNode) {
|
for (let idx = 0; idx < len; idx++) {
|
||||||
nextNode.count++;
|
const char = key[idx];
|
||||||
node = nextNode;
|
|
||||||
|
if (node.children.has(char)) {
|
||||||
|
node = node.children.get(char)!;
|
||||||
} else {
|
} else {
|
||||||
const newNode = createNode(val, idx + 1);
|
const newNode = createNode();
|
||||||
newNode.count = 1;
|
node.children.set(char, newNode);
|
||||||
node.children[val] = newNode;
|
|
||||||
node = newNode;
|
node = newNode;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lastIdx === idx && node.depth) {
|
if (lastIdx === idx && node !== root) {
|
||||||
node.word = true;
|
node.wordEnd = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
keys.forEach(k => put(k, k.length));
|
keys.forEach(k => put(k));
|
||||||
|
|
||||||
build();
|
// const build = () => {
|
||||||
|
const queue: Node[] = [];
|
||||||
|
queue.push(root);
|
||||||
|
|
||||||
|
let idx = 0;
|
||||||
|
while (queue.length > idx) {
|
||||||
|
const beginNode = queue[idx];
|
||||||
|
const children = beginNode.children;
|
||||||
|
|
||||||
|
children.forEach((node, char) => {
|
||||||
|
let failNode = beginNode.fail;
|
||||||
|
|
||||||
|
while (failNode && !failNode.children.has(char)) {
|
||||||
|
failNode = failNode.fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (node) {
|
||||||
|
node.fail = failNode?.children.get(char) || root;
|
||||||
|
|
||||||
|
queue.push(node);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
|
// };
|
||||||
|
// build();
|
||||||
|
|
||||||
return (text: string) => {
|
return (text: string) => {
|
||||||
let node: Node | undefined = root;
|
let node: Node | undefined = root;
|
||||||
|
|
||||||
for (let i = 0, textLen = text.length; i < textLen; i++) {
|
for (let i = 0, textLen = text.length; i < textLen; i++) {
|
||||||
// const key = text.charAt(i);
|
// const key = text.charAt(i);
|
||||||
const key = text[i];
|
const char = text[i];
|
||||||
|
|
||||||
while (node && !node.children[key]) {
|
while (node && !node.children.has(char)) {
|
||||||
node = node.fail;
|
node = node.fail;
|
||||||
}
|
}
|
||||||
node = node?.children[key] || root;
|
node = node?.children.get(char) || root;
|
||||||
|
|
||||||
if (node.word) {
|
if (node.wordEnd) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user