Perf/Refactor: trie w/ hostname mode

This commit is contained in:
SukkaW 2024-05-10 23:49:50 +08:00
parent 487d4fecd6
commit 59b86f706f
2 changed files with 106 additions and 35 deletions

View File

@ -79,6 +79,8 @@ describe('Trie', () => {
trie.add('sesqueroman');
trie.add('greek');
console.log({ trie });
expect(trie.find('roman')).toEqual(['roman', 'esqueroman', 'sesqueroman']);
expect(trie.find('man')).toEqual(['roman', 'esqueroman', 'sesqueroman']);
expect(trie.find('esqueroman')).toEqual(['esqueroman', 'sesqueroman']);
@ -97,16 +99,21 @@ describe('Trie', () => {
});
});
describe('surge domainset dedupe', () => {
describe.each([
['hostname mode off', false],
['hostname mode on', true]
])('surge domainset dedupe %s', (_, hostnameMode) => {
it('should not remove same entry', () => {
const trie = createTrie(['.skk.moe', 'noc.one']);
const trie = createTrie(['.skk.moe', 'noc.one'], hostnameMode);
console.log(trie);
expect(trie.find('.skk.moe')).toStrictEqual(['.skk.moe']);
expect(trie.find('noc.one')).toStrictEqual(['noc.one']);
});
it('should remove subdomain', () => {
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net']);
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode);
console.log(trie);
@ -115,7 +122,7 @@ describe('surge domainset dedupe', () => {
});
it('should not remove non-subdomain', () => {
const trie = createTrie(['skk.moe', 'sukkaskk.moe']);
const trie = createTrie(['skk.moe', 'sukkaskk.moe'], hostnameMode);
expect(trie.find('.skk.moe')).toStrictEqual([]);
});
});

View File

@ -33,10 +33,34 @@ const createNode = (): TrieNode => {
return node;
};
export const createTrie = (from?: string[] | Set<string> | null) => {
export const createTrie = (from?: string[] | Set<string> | null, hostnameMode = false) => {
let size = 0;
const root: TrieNode = createNode();
const suffixToTokens = hostnameMode
? (suffix: string) => {
let buf = '';
const tokens: string[] = [];
for (let i = 0, l = suffix.length; i < l; i++) {
const c = suffix[i];
if (c === '.') {
if (buf) {
tokens.push(buf, /* . */ c);
buf = '';
} else {
tokens.push(/* . */ c);
}
} else {
buf += c;
}
}
if (buf) {
tokens.push(buf);
}
return tokens;
}
: (suffix: string) => suffix;
/**
* Method used to add the given prefix to the trie.
*/
@ -44,8 +68,10 @@ export const createTrie = (from?: string[] | Set<string> | null) => {
let node: TrieNode = root;
let token: string;
for (let i = suffix.length - 1; i >= 0; i--) {
token = suffix[i];
const tokens = suffixToTokens(suffix);
for (let i = tokens.length - 1; i >= 0; i--) {
token = tokens[i];
if (node.has(token)) {
node = node.get(token)!;
@ -64,14 +90,16 @@ export const createTrie = (from?: string[] | Set<string> | null) => {
};
/**
* @param {string} suffix
* @param {string} $suffix
*/
const contains = (suffix: string): boolean => {
let node: TrieNode | undefined = root;
let token: string;
for (let i = suffix.length - 1; i >= 0; i--) {
token = suffix[i];
const tokens = suffixToTokens(suffix);
for (let i = tokens.length - 1; i >= 0; i--) {
token = tokens[i];
node = node.get(token);
if (!node) return false;
@ -86,36 +114,56 @@ export const createTrie = (from?: string[] | Set<string> | null) => {
let node: TrieNode | undefined = root;
let token: string;
for (let i = inputSuffix.length - 1; i >= 0; i--) {
token = inputSuffix[i];
const inputTokens = suffixToTokens(inputSuffix);
for (let i = inputTokens.length - 1; i >= 0; i--) {
token = inputTokens[i];
if (hostnameMode && token === '') {
break;
}
node = node.get(token);
if (!node) return [];
}
const matches: string[] = [];
const matches: Array<string | string[]> = [];
// Performing DFS from prefix
const nodeStack: TrieNode[] = [node];
const suffixStack: string[] = [inputSuffix];
const suffixStack: Array<string | string[]> = [inputTokens];
do {
const suffix: string = suffixStack.pop()!;
const suffix: string | string[] = suffixStack.pop()!;
node = nodeStack.pop()!;
if (node[SENTINEL]) {
if (includeEqualWithSuffix || suffix !== inputSuffix) {
if (includeEqualWithSuffix) {
matches.push(suffix);
} else if (hostnameMode) {
if ((suffix as string[]).some((t, i) => t !== inputTokens[i])) {
matches.push(suffix);
}
} else if (suffix !== inputTokens) {
matches.push(suffix);
}
}
node.forEach((childNode, k) => {
nodeStack.push(childNode);
suffixStack.push(k + suffix);
if (hostnameMode) {
const stack = (suffix as string[]).slice();
stack.unshift(k);
suffixStack.push(stack);
} else {
suffixStack.push(k + (suffix as string));
}
});
} while (nodeStack.length);
return matches;
return hostnameMode ? matches.map((m) => (m as string[]).join('')) : matches as string[];
};
/**
@ -125,9 +173,11 @@ export const createTrie = (from?: string[] | Set<string> | null) => {
let node: TrieNode | undefined = root;
let token: string;
const inputTokens = suffixToTokens(inputSuffix);
// Find the leaf-est node, and early return if not any
for (let i = inputSuffix.length - 1; i >= 0; i--) {
token = inputSuffix[i];
for (let i = inputTokens.length - 1; i >= 0; i--) {
token = inputTokens[i];
node = node.get(token);
if (!node) return;
@ -135,22 +185,32 @@ export const createTrie = (from?: string[] | Set<string> | null) => {
// Performing DFS from prefix
const nodeStack: TrieNode[] = [node];
const suffixStack: string[] = [inputSuffix];
const suffixStack: Array<string | string[]> = [inputTokens];
do {
const suffix = suffixStack.pop()!;
node = nodeStack.pop()!;
if (node[SENTINEL]) {
if (suffix !== inputSuffix) {
if (suffix !== inputTokens) {
// found match, delete it from set
set.delete(suffix);
if (hostnameMode) {
set.delete((suffix as string[]).join(''));
} else {
set.delete(suffix as string);
}
}
}
node.forEach((childNode, k) => {
nodeStack.push(childNode);
suffixStack.push(k + suffix);
if (hostnameMode) {
const stack = (suffix as string[]).slice();
stack.unshift(k);
suffixStack.push(stack);
} else {
suffixStack.push(k + (suffix as string));
}
});
} while (nodeStack.length);
};
@ -165,8 +225,10 @@ export const createTrie = (from?: string[] | Set<string> | null) => {
let parent: TrieNode = node;
let token: string;
for (let i = suffix.length - 1; i >= 0; i--) {
token = suffix[i];
const suffixTokens = suffixToTokens(suffix);
for (let i = suffixTokens.length - 1; i >= 0; i--) {
token = suffixTokens[i];
parent = node;
node = node.get(token);
@ -208,8 +270,10 @@ export const createTrie = (from?: string[] | Set<string> | null) => {
const has = (suffix: string): boolean => {
let node: TrieNode = root;
for (let i = suffix.length - 1; i >= 0; i--) {
const token = suffix[i];
const tokens = suffixToTokens(suffix);
for (let i = tokens.length - 1; i >= 0; i--) {
const token = tokens[i];
if (!node.has(token)) {
return false;