Chore: update source deduping tool

This commit is contained in:
SukkaW
2025-11-11 00:07:43 +08:00
parent 4d9c2a5154
commit bb6c7cb3fa
3 changed files with 93 additions and 8 deletions

View File

@@ -365,4 +365,47 @@ describe('smol tree', () => {
trie.whitelist('cdn.example.com');
expect(trie.dump()).toStrictEqual(['blog.cdn.example.com']);
});
it('contains - normal', () => {
const trie = createTrie([
'skk.moe',
'anotherskk.moe',
'blog.anotherskk.moe',
'blog.skk.moe'
], true);
expect(trie.contains('skk.moe')).toBe(true);
expect(trie.contains('blog.skk.moe')).toBe(true);
expect(trie.contains('anotherskk.moe')).toBe(true);
expect(trie.contains('blog.anotherskk.moe')).toBe(true);
expect(trie.contains('example.com')).toBe(false);
expect(trie.contains('blog.example.com')).toBe(false);
expect(trie.contains('skk.mo')).toBe(false);
expect(trie.contains('cdn.skk.moe')).toBe(false);
});
it('contains - subdomain', () => {
const trie = createTrie([
'index.rubygems.org'
], true);
expect(trie.contains('rubygems.org')).toBe(false);
expect(trie.contains('index.rubygems.org')).toBe(true);
expect(trie.contains('sub.index.rubygems.org')).toBe(false);
});
it('contains - include subdomains', () => {
const trie = createTrie([
'.skk.moe'
], true);
expect(trie.contains('skk.moe')).toBe(true);
expect(trie.contains('blog.skk.moe')).toBe(true);
expect(trie.contains('image.cdn.skk.moe')).toBe(true);
expect(trie.contains('example.com')).toBe(false);
expect(trie.contains('blog.example.com')).toBe(false);
expect(trie.contains('skk.mo')).toBe(false);
});
});

View File

@@ -186,10 +186,44 @@ abstract class Triebase<Meta = unknown> {
public contains(suffix: string, includeAllSubdomain = suffix[0] === '.'): boolean {
const hostnameFromIndex = suffix[0] === '.' ? 1 : 0;
const res = this.walkIntoLeafWithSuffix(suffix, hostnameFromIndex);
if (!res) return false;
if (includeAllSubdomain) return getBit(res.node[0], INCLUDE_ALL_SUBDOMAIN);
return true;
let node: TrieNode = this.$root;
// let parent: TrieNode = node;
let child: Map<string, TrieNode<Meta>> = node[2];
let result = false;
const onToken = (token: string) => {
// if (token === '') {
// return true;
// }
// parent = node;
child = node[2];
if (child.has(token)) {
node = child.get(token)!;
} else {
if (getBit(node[0], INCLUDE_ALL_SUBDOMAIN)) {
result = true;
}
return null;
}
return false;
};
if (walkHostnameTokens(suffix, onToken, hostnameFromIndex) === null) {
return result;
}
if (includeAllSubdomain) return getBit(node[0], INCLUDE_ALL_SUBDOMAIN);
return getBit(node[0], START);
// if (res === null) return false;
// if (includeAllSubdomain) return getBit(res.node[0], INCLUDE_ALL_SUBDOMAIN);
// return true;
};
private static bfsResults: [node: TrieNode | null, suffix: string[]] = [null, []];