mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Fix/Perf: more efficient and correct whitelisting
This commit is contained in:
parent
8b1eeb1c14
commit
2f329a4144
@ -94,9 +94,6 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
||||
}
|
||||
});
|
||||
|
||||
// Remove as many domains as possible from domainSets before creating trie
|
||||
SetSubstract(domainSets, filterRuleWhitelistDomainSets);
|
||||
|
||||
// Perform kwfilter to remove as many domains as possible from domainSets before creating trie
|
||||
childSpan.traceChildSync('dedupe from black keywords', () => {
|
||||
const kwfilter = createKeywordFilter(domainKeywordsSet);
|
||||
@ -110,11 +107,14 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
||||
});
|
||||
});
|
||||
|
||||
const trie = span.traceChildSync('dedupe from white suffixes', () => {
|
||||
const trie = createTrie(domainSets, true, true);
|
||||
span.traceChildSync('dedupe from white suffixes', () => {
|
||||
|
||||
filterRuleWhitelistDomainSets.forEach(suffix => {
|
||||
trie.whitelist(suffix);
|
||||
});
|
||||
|
||||
return trie;
|
||||
});
|
||||
|
||||
// Dedupe domainSets
|
||||
|
||||
@ -251,7 +251,7 @@ export const PREDEFINED_WHITELIST = [
|
||||
'business.site', // Drag'n'Drop site building platform
|
||||
'page.link', // Firebase URL Shortener
|
||||
'notion.site'
|
||||
];
|
||||
].map(suffix => `.${suffix}`);
|
||||
|
||||
export const PREDEFINED_ENFORCED_WHITELIST = [
|
||||
'r2.dev',
|
||||
|
||||
@ -208,6 +208,26 @@ describe('smol tree', () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it('should whitelist trie correctly', () => {
|
||||
const trie = createTrie([
|
||||
'.t.co',
|
||||
't.co',
|
||||
'example.t.co',
|
||||
'.skk.moe'
|
||||
], true, true);
|
||||
|
||||
expect(trie.dump()).toStrictEqual([
|
||||
'.skk.moe',
|
||||
'.t.co'
|
||||
]);
|
||||
|
||||
trie.whitelist('.t.co');
|
||||
expect(trie.dump()).toStrictEqual(['.skk.moe']);
|
||||
|
||||
trie.whitelist('skk.moe');
|
||||
expect(trie.dump()).toStrictEqual([]);
|
||||
});
|
||||
|
||||
it('should efficiently whitelist domains', () => {
|
||||
const trie = createTrie([
|
||||
'skk.moe',
|
||||
@ -231,7 +251,6 @@ describe('smol tree', () => {
|
||||
]);
|
||||
|
||||
trie.whitelist('anotherskk.moe');
|
||||
|
||||
expect(trie.dump()).toStrictEqual([
|
||||
'blog.anotherskk.moe'
|
||||
]);
|
||||
|
||||
@ -370,27 +370,11 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
||||
parent = node;
|
||||
|
||||
node = node.get(token);
|
||||
if (!node) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Keeping track of a potential branch to prune
|
||||
// If the node is to be pruned, but they are more than one token child in it, we can't prune it
|
||||
// If there is only one token child, or no child at all, we can prune it safely
|
||||
|
||||
const onlyChild = node.size === 1 && node.has(token);
|
||||
|
||||
if (onlyChild) {
|
||||
toPrune = parent;
|
||||
tokenToPrune = token;
|
||||
} else if (toPrune !== null) { // not only child, retain the branch
|
||||
toPrune = null;
|
||||
tokenToPrune = null;
|
||||
}
|
||||
if (!node) return;
|
||||
|
||||
// During the whitelist of `[start]blog.skk.moe` and find out that there is a `[start].skk.moe` in the trie
|
||||
// Dedupe the covered subdomain by skipping
|
||||
if (node.get('.')?.[SENTINEL]) {
|
||||
if (i > 1 && node.get('.')?.[SENTINEL] === true) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -399,21 +383,35 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
||||
// If there is a `[start]sub.example.com` here, remove it
|
||||
node[SENTINEL] = false;
|
||||
|
||||
// Removing the rest of the child nodes by creating a new node and disconnecting the old one
|
||||
const newNode = createNode(node);
|
||||
node.set('.', newNode);
|
||||
node = newNode;
|
||||
break;
|
||||
// Removing all the child nodes by disconnecting "."
|
||||
node.delete('.');
|
||||
} else if (i === 0) {
|
||||
// Trying to whitelist `example.com` when there is already a `.example.com` in the trie
|
||||
const dotNode = node.get('.');
|
||||
if (dotNode?.[SENTINEL] === true) {
|
||||
dotNode[SENTINEL] = false;
|
||||
}
|
||||
if (i === 0) {
|
||||
// Trying to add `example.com` when there is already a `.example.com` in the trie
|
||||
if (node.get('.')?.[SENTINEL] === true) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Keeping track of a potential branch to prune
|
||||
// If the node is to be pruned, but they are more than one token child in it, we can't prune it
|
||||
// If there is only one token child, or no child at all, we can prune it safely
|
||||
|
||||
if (toPrune != null) { // the first branch that could potentially being pruned
|
||||
if (node.size > 1 || node.has('.')) {
|
||||
// not only child, retain the branch.
|
||||
// And we need to abort prune the parent, so we set it to null
|
||||
toPrune = null;
|
||||
tokenToPrune = null;
|
||||
}
|
||||
} else if (node.size < 2 && !node.has('.')) {
|
||||
toPrune = parent;
|
||||
tokenToPrune = token;
|
||||
}
|
||||
}
|
||||
|
||||
if (!node[SENTINEL]) return false;
|
||||
|
||||
if (tokenToPrune && toPrune) {
|
||||
toPrune.delete(tokenToPrune);
|
||||
} else {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user