diff --git a/Build/lib/aho-corasick.bench.ts b/Build/lib/aho-corasick.bench.ts new file mode 100644 index 00000000..3a6e71ee --- /dev/null +++ b/Build/lib/aho-corasick.bench.ts @@ -0,0 +1,73 @@ +import { fetchRemoteTextByLine } from './fetch-text-by-line'; +import { processLineFromReadline } from './process-line'; + +import createKeywordFilter from './aho-corasick'; + +import ModernAhoCorasick from 'modern-ahocorasick'; +import { AhoCorasick as MonyoneAhoCorasick } from '@monyone/aho-corasick'; +// @ts-expect-error -- no types +import FastScanner from 'fastscan'; +import { AhoCorasick as RustAhoCorasick } from '@blackglory/aho-corasick'; + +function runKeywordFilter(data: string[], testFn: (line: string) => boolean) { + for (let i = 0, len = data.length; i < len; i++) { + testFn(data[i]); + } +} + +export function getFns(keywordsSet: string[] | readonly string[]) { + const tmp1 = new ModernAhoCorasick(keywordsSet.slice()); + const tmp2 = new MonyoneAhoCorasick(keywordsSet.slice()); + const scanner = new FastScanner(keywordsSet.slice()); + const tmp3 = new RustAhoCorasick(keywordsSet.slice(), { caseSensitive: true }); + + return [ + ['createKeywordFilter', createKeywordFilter(keywordsSet.slice())], + ['modern-ahocorasick', (line: string) => tmp1.search(line).length > 0], + ['@monyone/aho-corasick', (line: string) => tmp2.hasKeywordInText(line)], + ['fastscan', (line: string) => scanner.search(line).length > 0], + ['@blackglory/aho-corasick', (line: string) => tmp3.isMatch(line)] + ] as const; +} + +if (require.main === module) { + (async () => { + const { bench, group, run } = await import('mitata'); + + const data = await processLineFromReadline(await fetchRemoteTextByLine('https://easylist.to/easylist/easylist.txt')); + console.log({ dataLen: data.length }); + const keywordsSet = [ + '!', + '?', + '*', + '[', + '(', + ']', + ')', + ',', + '#', + '%', + '&', + '=', + '~', + // special modifier + '$popup', + '$removeparam', + '$popunder', + '$cname', + '$frame', + // some bad syntax + '^popup' + ]; + + const fns = getFns(keywordsSet); + + group(() => { + fns.forEach(([name, fn]) => { + bench(name, () => runKeywordFilter(data, fn)); + }); + }); + + run(); + })(); +} diff --git a/Build/lib/aho-corasick.test.ts b/Build/lib/aho-corasick.test.ts index 085aae6a..5b871b4b 100644 --- a/Build/lib/aho-corasick.test.ts +++ b/Build/lib/aho-corasick.test.ts @@ -1,17 +1,33 @@ import { describe, it } from 'mocha'; import { expect } from 'expect'; -import createKeywordFilter from './aho-corasick'; +import { getFns } from './aho-corasick.bench'; describe('AhoCorasick', () => { - it('basic', () => { - let kwfilter = createKeywordFilter(['ap', 'an']); - expect(kwfilter('bananan')).toBe(true); - expect(kwfilter('apple')).toBe(true); - expect(kwfilter('melon')).toBe(false); + for (const test of ([ + [ + ['ap', 'an'], + ['bananan', 'apple', 'melon'], + [true, true, false] + ], + [ + ['cdn', 'sukka'], + ['bananan', 'apple', 'melon'], + [false, false, false] + ] + ] as const)) { + const kwtests = getFns(test[0]); + const fixtures = test[1]; + const expected = test[2]; - kwfilter = createKeywordFilter(['cdn', 'sukka']); - expect(kwfilter('bananan')).toBe(false); - expect(kwfilter('apple')).toBe(false); - expect(kwfilter('melon')).toBe(false); - }); + for (const kwtest of kwtests) { + const fnName = kwtest[0]; + const fn = kwtest[1]; + + it(fnName, () => { + for (let i = 0, len = fixtures.length; i < len; i++) { + expect(fn(fixtures[i])).toBe(expected[i]); + } + }); + } + } });