Feat: trie now supports whitelist method

This commit is contained in:
SukkaW 2024-05-26 00:53:01 +08:00
parent 05c2db6ac7
commit 48b5f609dd
2 changed files with 122 additions and 7 deletions

View File

@ -144,8 +144,6 @@ describe('smol tree', () => {
'.blog.sub.example.com', 'sub.example.com', 'cdn.sub.example.com', '.sub.example.com' '.blog.sub.example.com', 'sub.example.com', 'cdn.sub.example.com', '.sub.example.com'
], true, true); ], true, true);
console.log(trie);
expect(trie.dump()).toStrictEqual([ expect(trie.dump()).toStrictEqual([
'.sub.example.com', '.sub.example.com',
'cdn.noc.one', 'www.noc.one', 'cdn.noc.one', 'www.noc.one',
@ -153,13 +151,11 @@ describe('smol tree', () => {
]); ]);
}); });
it.only('should create simple tree - 2', () => { it('should create simple tree - 2', () => {
const trie = createTrie([ const trie = createTrie([
'.skk.moe', 'blog.skk.moe', '.cdn.skk.moe', 'skk.moe' '.skk.moe', 'blog.skk.moe', '.cdn.skk.moe', 'skk.moe'
], true, true); ], true, true);
console.log({ trie });
expect(trie.dump()).toStrictEqual([ expect(trie.dump()).toStrictEqual([
'.skk.moe' '.skk.moe'
]); ]);
@ -170,8 +166,6 @@ describe('smol tree', () => {
'.blog.sub.example.com', 'cdn.sub.example.com', '.sub.example.com' '.blog.sub.example.com', 'cdn.sub.example.com', '.sub.example.com'
], true, true); ], true, true);
console.log(trie);
expect(trie.dump()).toStrictEqual([ expect(trie.dump()).toStrictEqual([
'.sub.example.com' '.sub.example.com'
]); ]);
@ -197,4 +191,54 @@ describe('smol tree', () => {
'act.commercial.shouji.360.cn' 'act.commercial.shouji.360.cn'
]); ]);
}); });
it('should dedupe subdomain properly', () => {
const trie = createTrie([
'skk.moe',
'anotherskk.moe',
'blog.anotherskk.moe',
'blog.skk.moe'
], true, true);
expect(trie.dump()).toStrictEqual([
'anotherskk.moe',
'blog.anotherskk.moe',
'skk.moe',
'blog.skk.moe'
]);
});
it('should efficiently whitelist domains', () => {
const trie = createTrie([
'skk.moe',
'anotherskk.moe',
'blog.anotherskk.moe',
'blog.skk.moe'
], true, true);
expect(trie.dump()).toStrictEqual([
'anotherskk.moe',
'blog.anotherskk.moe',
'skk.moe',
'blog.skk.moe'
]);
trie.whitelist('.skk.moe');
expect(trie.dump()).toStrictEqual([
'anotherskk.moe',
'blog.anotherskk.moe'
]);
trie.whitelist('anotherskk.moe');
expect(trie.dump()).toStrictEqual([
'blog.anotherskk.moe'
]);
trie.add('anotherskk.moe');
trie.whitelist('.anotherskk.moe');
expect(trie.dump()).toStrictEqual([]);
});
}); });

View File

@ -351,6 +351,76 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
return results; return results;
}; };
const whitelist = (suffix: string) => {
if (!hostnameMode && !smolTree) {
throw new Error('whitelist method is only available in hostname mode or smolTree mode.');
}
let node: TrieNode | undefined = root;
let toPrune: TrieNode | null = null;
let tokenToPrune: string | null = null;
let parent: TrieNode = node;
const tokens = suffixToTokens(suffix);
let token: string;
for (let i = tokens.length - 1; i >= 0; i--) {
token = tokens[i];
parent = node;
node = node.get(token);
if (!node) {
return;
}
// Keeping track of a potential branch to prune
// If the node is to be pruned, but they are more than one token child in it, we can't prune it
// If there is only one token child, or no child at all, we can prune it safely
const onlyChild = node.size === 1 && node.has(token);
if (onlyChild) {
toPrune = parent;
tokenToPrune = token;
} else if (toPrune !== null) { // not only child, retain the branch
toPrune = null;
tokenToPrune = null;
}
// During the whitelist of `[start]blog.skk.moe` and find out that there is a `[start].skk.moe` in the trie
// Dedupe the covered subdomain by skipping
if (node.get('.')?.[SENTINEL]) {
return;
}
// Trying to whitelist `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
if (i === 1 && tokens[0] === '.') {
// If there is a `[start]sub.example.com` here, remove it
node[SENTINEL] = false;
// Removing the rest of the child nodes by creating a new node and disconnecting the old one
const newNode = createNode(node);
node.set('.', newNode);
node = newNode;
break;
}
if (i === 0) {
// Trying to add `example.com` when there is already a `.example.com` in the trie
if (node.get('.')?.[SENTINEL] === true) {
return;
}
}
}
if (!node[SENTINEL]) return false;
if (tokenToPrune && toPrune) {
toPrune.delete(tokenToPrune);
} else {
node[SENTINEL] = false;
}
};
if (Array.isArray(from)) { if (Array.isArray(from)) {
for (let i = 0, l = from.length; i < l; i++) { for (let i = 0, l = from.length; i < l; i++) {
add(from[i]); add(from[i]);
@ -377,6 +447,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
get root() { get root() {
return root; return root;
}, },
whitelist,
[Bun.inspect.custom]: () => JSON.stringify(deepTrieNodeToJSON(root), null, 2) [Bun.inspect.custom]: () => JSON.stringify(deepTrieNodeToJSON(root), null, 2)
}; };
}; };