Update Aho Corasick benchmark

This commit is contained in:
SukkaW 2024-11-21 21:48:33 +08:00
parent 6747357d35
commit 200da7a2be
2 changed files with 100 additions and 11 deletions

View File

@ -0,0 +1,73 @@
import { fetchRemoteTextByLine } from './fetch-text-by-line';
import { processLineFromReadline } from './process-line';
import createKeywordFilter from './aho-corasick';
import ModernAhoCorasick from 'modern-ahocorasick';
import { AhoCorasick as MonyoneAhoCorasick } from '@monyone/aho-corasick';
// @ts-expect-error -- no types
import FastScanner from 'fastscan';
import { AhoCorasick as RustAhoCorasick } from '@blackglory/aho-corasick';
function runKeywordFilter(data: string[], testFn: (line: string) => boolean) {
for (let i = 0, len = data.length; i < len; i++) {
testFn(data[i]);
}
}
export function getFns(keywordsSet: string[] | readonly string[]) {
const tmp1 = new ModernAhoCorasick(keywordsSet.slice());
const tmp2 = new MonyoneAhoCorasick(keywordsSet.slice());
const scanner = new FastScanner(keywordsSet.slice());
const tmp3 = new RustAhoCorasick(keywordsSet.slice(), { caseSensitive: true });
return [
['createKeywordFilter', createKeywordFilter(keywordsSet.slice())],
['modern-ahocorasick', (line: string) => tmp1.search(line).length > 0],
['@monyone/aho-corasick', (line: string) => tmp2.hasKeywordInText(line)],
['fastscan', (line: string) => scanner.search(line).length > 0],
['@blackglory/aho-corasick', (line: string) => tmp3.isMatch(line)]
] as const;
}
if (require.main === module) {
(async () => {
const { bench, group, run } = await import('mitata');
const data = await processLineFromReadline(await fetchRemoteTextByLine('https://easylist.to/easylist/easylist.txt'));
console.log({ dataLen: data.length });
const keywordsSet = [
'!',
'?',
'*',
'[',
'(',
']',
')',
',',
'#',
'%',
'&',
'=',
'~',
// special modifier
'$popup',
'$removeparam',
'$popunder',
'$cname',
'$frame',
// some bad syntax
'^popup'
];
const fns = getFns(keywordsSet);
group(() => {
fns.forEach(([name, fn]) => {
bench(name, () => runKeywordFilter(data, fn));
});
});
run();
})();
}

View File

@ -1,17 +1,33 @@
import { describe, it } from 'mocha';
import { expect } from 'expect';
import createKeywordFilter from './aho-corasick';
import { getFns } from './aho-corasick.bench';
describe('AhoCorasick', () => {
it('basic', () => {
let kwfilter = createKeywordFilter(['ap', 'an']);
expect(kwfilter('bananan')).toBe(true);
expect(kwfilter('apple')).toBe(true);
expect(kwfilter('melon')).toBe(false);
for (const test of ([
[
['ap', 'an'],
['bananan', 'apple', 'melon'],
[true, true, false]
],
[
['cdn', 'sukka'],
['bananan', 'apple', 'melon'],
[false, false, false]
]
] as const)) {
const kwtests = getFns(test[0]);
const fixtures = test[1];
const expected = test[2];
kwfilter = createKeywordFilter(['cdn', 'sukka']);
expect(kwfilter('bananan')).toBe(false);
expect(kwfilter('apple')).toBe(false);
expect(kwfilter('melon')).toBe(false);
});
for (const kwtest of kwtests) {
const fnName = kwtest[0];
const fn = kwtest[1];
it(fnName, () => {
for (let i = 0, len = fixtures.length; i < len; i++) {
expect(fn(fixtures[i])).toBe(expected[i]);
}
});
}
}
});