mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Perf: refactor trie to avoid iterate twice
This commit is contained in:
parent
315b38b999
commit
8bcfc0e528
@ -14,7 +14,6 @@ import { SOURCE_DIR } from './constants/dir';
|
||||
const getS3OSSDomainsPromise = (async (): Promise<string[]> => {
|
||||
const trie = createTrie(
|
||||
await getPublicSuffixListTextPromise(),
|
||||
true,
|
||||
false
|
||||
);
|
||||
|
||||
|
||||
@ -26,7 +26,7 @@ const BLACKLIST = [
|
||||
|
||||
export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => {
|
||||
// First trie is to find the microsoft domains that matches probe domains
|
||||
const trie = createTrie(null, true);
|
||||
const trie = createTrie(null, false);
|
||||
for await (const line of await fetchRemoteTextByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) {
|
||||
const domain = extractDomainsFromFelixDnsmasq(line);
|
||||
if (domain) {
|
||||
@ -36,7 +36,7 @@ export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => {
|
||||
const foundMicrosoftCdnDomains = PROBE_DOMAINS.flatMap(domain => trie.find(domain));
|
||||
|
||||
// Second trie is to remove blacklisted domains
|
||||
const trie2 = createTrie(foundMicrosoftCdnDomains, true, true);
|
||||
const trie2 = createTrie(foundMicrosoftCdnDomains, true);
|
||||
BLACKLIST.forEach(trie2.whitelist);
|
||||
|
||||
return sortDomains(trie2.dump())
|
||||
|
||||
@ -115,8 +115,8 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
|
||||
});
|
||||
|
||||
const [baseTrie, extraTrie] = span.traceChildSync('create smol trie while deduping black keywords', (childSpan) => {
|
||||
const baseTrie = createTrie(null, true, true);
|
||||
const extraTrie = createTrie(null, true, true);
|
||||
const baseTrie = createTrie(null, true);
|
||||
const extraTrie = createTrie(null, true);
|
||||
|
||||
const kwfilter = createKeywordFilter(domainKeywordsSet);
|
||||
|
||||
|
||||
@ -177,7 +177,6 @@ export const buildSpeedtestDomainSet = task(require.main === module, __filename)
|
||||
// OpenSpeedtest
|
||||
'open.cachefly.net'
|
||||
],
|
||||
true,
|
||||
true
|
||||
);
|
||||
|
||||
|
||||
@ -5,11 +5,11 @@ export function domainDeduper(inputDomains: string[] | Trie, toArray: false): Se
|
||||
export function domainDeduper(inputDomains: string[] | Trie, toArray = true): string[] | Set<string> {
|
||||
let trie: Trie;
|
||||
if (Array.isArray(inputDomains)) {
|
||||
trie = createTrie(inputDomains, true, true);
|
||||
} else if (!inputDomains.hostnameMode || !inputDomains.smolTree) {
|
||||
throw new Error('Invalid trie');
|
||||
} else {
|
||||
trie = createTrie(inputDomains, true);
|
||||
} else if (inputDomains.smolTree) {
|
||||
trie = inputDomains;
|
||||
} else {
|
||||
throw new Error('Invalid trie');
|
||||
}
|
||||
|
||||
const dumped = trie.dump();
|
||||
|
||||
@ -1,27 +1,41 @@
|
||||
import { createTrie } from './trie';
|
||||
import { createTrie, hostnameToTokens } from './trie';
|
||||
import { describe, it } from 'mocha';
|
||||
import { expect } from 'chai';
|
||||
|
||||
describe('Trie', () => {
|
||||
it('should be possible to add items to a Trie.', () => {
|
||||
const trie = createTrie();
|
||||
describe('hostname to tokens', () => {
|
||||
it('should split hostname into tokens.', () => {
|
||||
expect(hostnameToTokens('.blog.skk.moe')).to.deep.equal([
|
||||
'.',
|
||||
'blog',
|
||||
'.',
|
||||
'skk',
|
||||
'.',
|
||||
'moe'
|
||||
]);
|
||||
|
||||
trie.add('sukka');
|
||||
trie.add('ukka');
|
||||
trie.add('akku');
|
||||
expect(hostnameToTokens('blog.skk.moe')).to.deep.equal([
|
||||
'blog',
|
||||
'.',
|
||||
'skk',
|
||||
'.',
|
||||
'moe'
|
||||
]);
|
||||
|
||||
expect(trie.size).to.equal(3);
|
||||
expect(hostnameToTokens('skk.moe')).to.deep.equal([
|
||||
'skk',
|
||||
'.',
|
||||
'moe'
|
||||
]);
|
||||
|
||||
expect(trie.has('sukka')).to.equal(true);
|
||||
expect(trie.has('ukka')).to.equal(true);
|
||||
expect(trie.has('akku')).to.equal(true);
|
||||
expect(trie.has('noc')).to.equal(false);
|
||||
expect(trie.has('suk')).to.equal(false);
|
||||
expect(trie.has('sukkaw')).to.equal(false);
|
||||
expect(hostnameToTokens('moe')).to.deep.equal([
|
||||
'moe'
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
it('should be possible to add domains to a Trie (hostname).', () => {
|
||||
const trie = createTrie(null, true);
|
||||
describe('Trie', () => {
|
||||
it('should be possible to add domains to a Trie.', () => {
|
||||
const trie = createTrie();
|
||||
|
||||
trie.add('a.skk.moe');
|
||||
trie.add('skk.moe');
|
||||
@ -40,17 +54,6 @@ describe('Trie', () => {
|
||||
it('adding the same item several times should not increase size.', () => {
|
||||
const trie = createTrie();
|
||||
|
||||
trie.add('rat');
|
||||
trie.add('erat');
|
||||
trie.add('rat');
|
||||
|
||||
expect(trie.size).to.equal(2);
|
||||
expect(trie.has('rat')).to.equal(true);
|
||||
});
|
||||
|
||||
it('adding the same item several times should not increase size (hostname).', () => {
|
||||
const trie = createTrie(null, true);
|
||||
|
||||
trie.add('skk.moe');
|
||||
trie.add('blog.skk.moe');
|
||||
trie.add('skk.moe');
|
||||
@ -71,30 +74,7 @@ describe('Trie', () => {
|
||||
});
|
||||
|
||||
it('should be possible to delete items.', () => {
|
||||
const trie = createTrie();
|
||||
|
||||
trie.add('rat');
|
||||
trie.add('rate');
|
||||
trie.add('tar');
|
||||
|
||||
expect(trie.delete('')).to.equal(false);
|
||||
expect(trie.delete('')).to.equal(false);
|
||||
expect(trie.delete('hello')).to.equal(false);
|
||||
|
||||
expect(trie.delete('rat')).to.equal(true);
|
||||
expect(trie.has('rat')).to.equal(false);
|
||||
expect(trie.has('rate')).to.equal(true);
|
||||
|
||||
expect(trie.size).to.equal(2);
|
||||
|
||||
expect(trie.delete('rate')).to.equal(true);
|
||||
expect(trie.size).to.equal(1);
|
||||
expect(trie.delete('tar')).to.equal(true);
|
||||
expect(trie.size).to.equal(0);
|
||||
});
|
||||
|
||||
it('should be possible to delete items (hostname).', () => {
|
||||
const trie = createTrie(null, true);
|
||||
const trie = createTrie(null);
|
||||
|
||||
trie.add('skk.moe');
|
||||
trie.add('example.com');
|
||||
@ -117,17 +97,6 @@ describe('Trie', () => {
|
||||
});
|
||||
|
||||
it('should be possible to check the existence of a sequence in the Trie.', () => {
|
||||
const trie = createTrie();
|
||||
|
||||
trie.add('romanesque');
|
||||
|
||||
expect(trie.has('romanesque')).to.equal(true);
|
||||
expect(trie.has('roman')).to.equal(false);
|
||||
expect(trie.has('esque')).to.equal(false);
|
||||
expect(trie.has('')).to.equal(false);
|
||||
});
|
||||
|
||||
it('should be possible to check the existence of a sequence in the Trie (hostname).', () => {
|
||||
const trie = createTrie(null, true);
|
||||
|
||||
trie.add('example.org.skk.moe');
|
||||
@ -139,23 +108,7 @@ describe('Trie', () => {
|
||||
});
|
||||
|
||||
it('should be possible to retrieve items matching the given prefix.', () => {
|
||||
const trie = createTrie();
|
||||
|
||||
trie.add('roman');
|
||||
trie.add('esqueroman');
|
||||
trie.add('sesqueroman');
|
||||
trie.add('greek');
|
||||
|
||||
expect(trie.find('roman')).to.deep.equal(['roman', 'esqueroman', 'sesqueroman']);
|
||||
expect(trie.find('man')).to.deep.equal(['roman', 'esqueroman', 'sesqueroman']);
|
||||
expect(trie.find('esqueroman')).to.deep.equal(['esqueroman', 'sesqueroman']);
|
||||
expect(trie.find('eek')).to.deep.equal(['greek']);
|
||||
expect(trie.find('hello')).to.deep.equal([]);
|
||||
expect(trie.find('')).to.deep.equal(['greek', 'roman', 'esqueroman', 'sesqueroman']);
|
||||
});
|
||||
|
||||
it('should be possible to retrieve items matching the given prefix (hostname).', () => {
|
||||
const trie = createTrie(null, true);
|
||||
const trie = createTrie(null);
|
||||
|
||||
trie.add('example.com');
|
||||
trie.add('blog.example.com');
|
||||
@ -171,47 +124,42 @@ describe('Trie', () => {
|
||||
});
|
||||
|
||||
it('should be possible to create a trie from an arbitrary iterable.', () => {
|
||||
let trie = createTrie(['roman', 'esqueroman']);
|
||||
let trie = createTrie(['skk.moe', 'blog.skk.moe']);
|
||||
|
||||
expect(trie.size).to.equal(2);
|
||||
expect(trie.has('roman')).to.equal(true);
|
||||
expect(trie.has('skk.moe')).to.equal(true);
|
||||
|
||||
trie = createTrie(new Set(['skk.moe', 'example.com']), true);
|
||||
trie = createTrie(new Set(['skk.moe', 'example.com']));
|
||||
expect(trie.size).to.equal(2);
|
||||
expect(trie.has('skk.moe')).to.equal(true);
|
||||
});
|
||||
});
|
||||
|
||||
([
|
||||
['hostname mode off', false],
|
||||
['hostname mode on', true]
|
||||
] as const).forEach(([description, hostnameMode]) => {
|
||||
describe('surge domainset dedupe ' + description, () => {
|
||||
it('should not remove same entry', () => {
|
||||
const trie = createTrie(['.skk.moe', 'noc.one'], hostnameMode);
|
||||
describe('surge domainset dedupe', () => {
|
||||
it('should not remove same entry', () => {
|
||||
const trie = createTrie(['.skk.moe', 'noc.one']);
|
||||
|
||||
expect(trie.find('.skk.moe')).to.deep.equal(['.skk.moe']);
|
||||
expect(trie.find('noc.one')).to.deep.equal(['noc.one']);
|
||||
});
|
||||
expect(trie.find('.skk.moe')).to.deep.equal(['.skk.moe']);
|
||||
expect(trie.find('noc.one')).to.deep.equal(['noc.one']);
|
||||
});
|
||||
|
||||
it('should match subdomain - 1', () => {
|
||||
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode);
|
||||
it('should match subdomain - 1', () => {
|
||||
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net']);
|
||||
|
||||
expect(trie.find('.skk.moe')).to.deep.equal(['image.cdn.skk.moe', 'blog.skk.moe']);
|
||||
expect(trie.find('.sukkaw.com')).to.deep.equal(['www.sukkaw.com']);
|
||||
});
|
||||
expect(trie.find('.skk.moe')).to.deep.equal(['image.cdn.skk.moe', 'blog.skk.moe']);
|
||||
expect(trie.find('.sukkaw.com')).to.deep.equal(['www.sukkaw.com']);
|
||||
});
|
||||
|
||||
it('should match subdomain - 2', () => {
|
||||
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', '.skk.moe', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode);
|
||||
it('should match subdomain - 2', () => {
|
||||
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', '.skk.moe', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net']);
|
||||
|
||||
expect(trie.find('.skk.moe')).to.deep.equal(['.skk.moe', 'image.cdn.skk.moe', 'blog.skk.moe']);
|
||||
expect(trie.find('.sukkaw.com')).to.deep.equal(['www.sukkaw.com']);
|
||||
});
|
||||
expect(trie.find('.skk.moe')).to.deep.equal(['.skk.moe', 'image.cdn.skk.moe', 'blog.skk.moe']);
|
||||
expect(trie.find('.sukkaw.com')).to.deep.equal(['www.sukkaw.com']);
|
||||
});
|
||||
|
||||
it('should not remove non-subdomain', () => {
|
||||
const trie = createTrie(['skk.moe', 'sukkaskk.moe'], hostnameMode);
|
||||
expect(trie.find('.skk.moe')).to.deep.equal([]);
|
||||
});
|
||||
it('should not remove non-subdomain', () => {
|
||||
const trie = createTrie(['skk.moe', 'sukkaskk.moe']);
|
||||
expect(trie.find('.skk.moe')).to.deep.equal([]);
|
||||
});
|
||||
});
|
||||
|
||||
@ -221,7 +169,7 @@ describe('smol tree', () => {
|
||||
'.skk.moe', 'blog.skk.moe', '.cdn.skk.moe', 'skk.moe',
|
||||
'www.noc.one', 'cdn.noc.one',
|
||||
'.blog.sub.example.com', 'sub.example.com', 'cdn.sub.example.com', '.sub.example.com'
|
||||
], true, true);
|
||||
], true);
|
||||
|
||||
expect(trie.dump()).to.deep.equal([
|
||||
'.sub.example.com',
|
||||
@ -233,7 +181,7 @@ describe('smol tree', () => {
|
||||
it('should create simple tree - 2', () => {
|
||||
const trie = createTrie([
|
||||
'.skk.moe', 'blog.skk.moe', '.cdn.skk.moe', 'skk.moe'
|
||||
], true, true);
|
||||
], true);
|
||||
|
||||
expect(trie.dump()).to.deep.equal([
|
||||
'.skk.moe'
|
||||
@ -243,7 +191,7 @@ describe('smol tree', () => {
|
||||
it('should create simple tree - 2', () => {
|
||||
const trie = createTrie([
|
||||
'.blog.sub.example.com', 'cdn.sub.example.com', '.sub.example.com'
|
||||
], true, true);
|
||||
], true);
|
||||
|
||||
expect(trie.dump()).to.deep.equal([
|
||||
'.sub.example.com'
|
||||
@ -261,7 +209,7 @@ describe('smol tree', () => {
|
||||
'act.commercial.shouji.360.cn',
|
||||
'cdn.creative.medialytics.com',
|
||||
'px.cdn.creative.medialytics.com'
|
||||
], true, true);
|
||||
], true);
|
||||
|
||||
expect(trie.dump()).to.deep.equal([
|
||||
'cdn.creative.medialytics.com',
|
||||
@ -277,7 +225,7 @@ describe('smol tree', () => {
|
||||
'anotherskk.moe',
|
||||
'blog.anotherskk.moe',
|
||||
'blog.skk.moe'
|
||||
], true, true);
|
||||
], true);
|
||||
|
||||
expect(trie.dump()).to.deep.equal([
|
||||
'anotherskk.moe',
|
||||
@ -293,7 +241,7 @@ describe('smol tree', () => {
|
||||
'anotherskk.moe',
|
||||
'blog.anotherskk.moe',
|
||||
'blog.skk.moe'
|
||||
], true, true);
|
||||
], true);
|
||||
|
||||
expect(trie.dump()).to.deep.equal([
|
||||
'anotherskk.moe',
|
||||
@ -328,7 +276,7 @@ describe('smol tree', () => {
|
||||
'.skk.moe',
|
||||
'blog.cdn.example.com',
|
||||
'cdn.example.com'
|
||||
], true, true);
|
||||
], true);
|
||||
|
||||
expect(trie.dump()).to.deep.equal([
|
||||
'cdn.example.com', 'blog.cdn.example.com',
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Suffix Trie based on Mnemonist Trie
|
||||
* Hostbane-Optimized Trie based on Mnemonist Trie
|
||||
*/
|
||||
|
||||
import { fastStringArrayJoin } from './misc';
|
||||
@ -28,57 +28,86 @@ const createNode = (parent: TrieNode | null = null): TrieNode => {
|
||||
return [false, parent, new Map<string, TrieNode>()] as TrieNode;
|
||||
};
|
||||
|
||||
const hostnameToTokens = (hostname: string): string[] => {
|
||||
return hostname.split('.').reduce<string[]>((acc, token, index) => {
|
||||
if (index > 0) {
|
||||
acc.push('.', token);
|
||||
} else if (token.length > 0) {
|
||||
acc.push(token);
|
||||
export const hostnameToTokens = (hostname: string): string[] => {
|
||||
const tokens = hostname.split('.');
|
||||
const results: string[] = [];
|
||||
let token = '';
|
||||
for (let i = 0, l = tokens.length; i < l; i++) {
|
||||
if (i > 0) {
|
||||
results.push('.');
|
||||
}
|
||||
return acc;
|
||||
}, []);
|
||||
|
||||
token = tokens[i];
|
||||
if (token.length > 0) {
|
||||
results.push(token);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
};
|
||||
|
||||
export const createTrie = (from?: string[] | Set<string> | null, hostnameMode = false, smolTree = false) => {
|
||||
const walkHostnameTokens = (hostname: string, onToken: (token: string) => boolean | null): boolean | null => {
|
||||
const tokens = hostname.split('.');
|
||||
let token = '';
|
||||
|
||||
const l = tokens.length - 1;
|
||||
for (let i = l; i >= 0; i--) {
|
||||
if (
|
||||
i < l
|
||||
// when onToken returns true, we should skip the rest of the loop
|
||||
&& onToken('.')
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
token = tokens[i];
|
||||
if (
|
||||
token.length > 0
|
||||
// when onToken returns true, we should skip the rest of the loop
|
||||
&& onToken(token)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
export const createTrie = (from?: string[] | Set<string> | null, smolTree = false) => {
|
||||
let size = 0;
|
||||
const root: TrieNode = createNode();
|
||||
|
||||
const isHostnameMode = (_token: string | string[]): _token is string[] => hostnameMode;
|
||||
|
||||
const suffixToTokens = hostnameMode
|
||||
? hostnameToTokens
|
||||
: (suffix: string) => suffix;
|
||||
|
||||
/**
|
||||
* Method used to add the given suffix to the trie.
|
||||
*/
|
||||
const add = smolTree
|
||||
? (suffix: string): void => {
|
||||
let node: TrieNode = root;
|
||||
let token: string;
|
||||
|
||||
const tokens = suffixToTokens(suffix);
|
||||
|
||||
for (let i = tokens.length - 1; i >= 0; i--) {
|
||||
token = tokens[i];
|
||||
|
||||
const onToken = (token: string) => {
|
||||
if (node[2].has(token)) {
|
||||
node = node[2].get(token)!;
|
||||
|
||||
// During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie
|
||||
// Dedupe the covered subdomain by skipping
|
||||
if (token === '.' && node[0]) {
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
const newNode = createNode(node);
|
||||
node[2].set(token, newNode);
|
||||
node = newNode;
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
// When walkHostnameTokens returns true, we should skip the rest
|
||||
if (walkHostnameTokens(suffix, onToken)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// If we are in smolTree mode, we need to do something at the end of the loop
|
||||
if (tokens[0] === '.') {
|
||||
if (suffix[0] === '.') {
|
||||
// Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
|
||||
|
||||
const parent = node[1]!;
|
||||
@ -101,13 +130,8 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
||||
}
|
||||
: (suffix: string): void => {
|
||||
let node: TrieNode = root;
|
||||
let token: string;
|
||||
|
||||
const tokens = suffixToTokens(suffix);
|
||||
|
||||
for (let i = tokens.length - 1; i >= 0; i--) {
|
||||
token = tokens[i];
|
||||
|
||||
const onToken = (token: string) => {
|
||||
if (node[2].has(token)) {
|
||||
node = node[2].get(token)!;
|
||||
} else {
|
||||
@ -115,6 +139,13 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
||||
node[2].set(token, newNode);
|
||||
node = newNode;
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
// When walkHostnameTokens returns true, we should skip the rest
|
||||
if (walkHostnameTokens(suffix, onToken)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!node[0]) { // smol tree don't have size, so else-if here
|
||||
@ -124,7 +155,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
||||
};
|
||||
|
||||
const walkIntoLeafWithTokens = (
|
||||
tokens: string | string[],
|
||||
tokens: string[],
|
||||
onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
|
||||
) => {
|
||||
let node: TrieNode = root;
|
||||
@ -135,7 +166,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
||||
for (let i = tokens.length - 1; i >= 0; i--) {
|
||||
token = tokens[i];
|
||||
|
||||
if (hostnameMode && token === '') {
|
||||
if (token === '') {
|
||||
break;
|
||||
}
|
||||
|
||||
@ -153,19 +184,50 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
||||
return { node, parent };
|
||||
};
|
||||
|
||||
const walkIntoLeafWithSuffix = (
|
||||
suffix: string,
|
||||
onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
|
||||
) => {
|
||||
let node: TrieNode = root;
|
||||
let parent: TrieNode = node;
|
||||
|
||||
const onToken = (token: string) => {
|
||||
if (token === '') {
|
||||
return true;
|
||||
}
|
||||
|
||||
parent = node;
|
||||
|
||||
if (node[2].has(token)) {
|
||||
node = node[2].get(token)!;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
||||
onLoop(node, parent, token);
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
if (walkHostnameTokens(suffix, onToken) === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return { node, parent };
|
||||
};
|
||||
|
||||
const contains = (suffix: string): boolean => {
|
||||
const tokens = suffixToTokens(suffix);
|
||||
return walkIntoLeafWithTokens(tokens) !== null;
|
||||
return walkIntoLeafWithSuffix(suffix) !== null;
|
||||
};
|
||||
|
||||
const walk = (
|
||||
onMatches: (suffix: string | string[]) => void,
|
||||
onMatches: (suffix: string[]) => void,
|
||||
initialNode = root,
|
||||
initialSuffix: string | string[] = hostnameMode ? [] : ''
|
||||
initialSuffix: string[] = []
|
||||
) => {
|
||||
const nodeStack: TrieNode[] = [initialNode];
|
||||
// Resolving initial string (begin the start of the stack)
|
||||
const suffixStack: Array<string | string[]> = [initialSuffix];
|
||||
const suffixStack: string[][] = [initialSuffix];
|
||||
|
||||
let node: TrieNode = root;
|
||||
|
||||
@ -177,7 +239,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
||||
// Pushing the child node to the stack for next iteration of DFS
|
||||
nodeStack.push(childNode);
|
||||
|
||||
suffixStack.push(isHostnameMode(suffix) ? [k, ...suffix] : k + suffix);
|
||||
suffixStack.push([k, ...suffix]);
|
||||
});
|
||||
|
||||
// If the node is a sentinel, we push the suffix to the results
|
||||
@ -194,7 +256,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
||||
parent: TrieNode
|
||||
}
|
||||
|
||||
const getSingleChildLeaf = (tokens: string | string[]): FindSingleChildLeafResult | null => {
|
||||
const getSingleChildLeaf = (tokens: string[]): FindSingleChildLeafResult | null => {
|
||||
let toPrune: TrieNode | null = null;
|
||||
let tokenToPrune: string | null = null;
|
||||
|
||||
@ -203,7 +265,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
||||
|
||||
// Even if the node size is 1, but the single child is ".", we should retain the branch
|
||||
// Since the "." could be special if it is the leaf-est node
|
||||
const onlyChild = node[2].size < 2 && (!hostnameMode || !node[2].has('.'));
|
||||
const onlyChild = node[2].size < 2 && !node[2].has('.');
|
||||
|
||||
if (toPrune != null) { // the top-est branch that could potentially being pruned
|
||||
if (!onlyChild) {
|
||||
@ -234,35 +296,27 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
||||
throw new Error('A Trie with smolTree enabled cannot perform find!');
|
||||
}
|
||||
|
||||
const inputTokens = suffixToTokens(inputSuffix);
|
||||
const inputTokens = hostnameToTokens(inputSuffix);
|
||||
const res = walkIntoLeafWithTokens(inputTokens);
|
||||
if (res === null) return [];
|
||||
|
||||
const matches: Array<string | string[]> = [];
|
||||
const matches: string[][] = [];
|
||||
|
||||
const onMatches = includeEqualWithSuffix
|
||||
? (suffix: string | string[]) => matches.push(suffix)
|
||||
: (
|
||||
hostnameMode
|
||||
? (suffix: string[]) => {
|
||||
if (suffix.some((t, i) => t !== inputTokens[i])) {
|
||||
matches.push(suffix);
|
||||
}
|
||||
}
|
||||
: (suffix: string) => {
|
||||
if (suffix !== inputTokens) {
|
||||
matches.push(suffix);
|
||||
}
|
||||
}
|
||||
);
|
||||
? (suffix: string[]) => matches.push(suffix)
|
||||
: (suffix: string[]) => {
|
||||
if (suffix.some((t, i) => t !== inputTokens[i])) {
|
||||
matches.push(suffix);
|
||||
}
|
||||
};
|
||||
|
||||
walk(
|
||||
onMatches as any,
|
||||
onMatches,
|
||||
res.node, // Performing DFS from prefix
|
||||
inputTokens
|
||||
);
|
||||
|
||||
return hostnameMode ? matches.map((m) => fastStringArrayJoin(m as string[], '')) : matches as string[];
|
||||
return matches.map((m) => fastStringArrayJoin(m, ''));
|
||||
};
|
||||
|
||||
/**
|
||||
@ -273,17 +327,15 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
||||
throw new Error('A Trie with smolTree enabled cannot perform substractSetInPlaceFromFound!');
|
||||
}
|
||||
|
||||
const inputTokens = suffixToTokens(inputSuffix);
|
||||
const inputTokens = hostnameToTokens(inputSuffix);
|
||||
|
||||
const res = walkIntoLeafWithTokens(inputTokens);
|
||||
if (res === null) return;
|
||||
|
||||
const onMatches = hostnameMode
|
||||
? (suffix: string[]) => set.delete(fastStringArrayJoin(suffix, ''))
|
||||
: (suffix: string) => set.delete(suffix);
|
||||
const onMatches = (suffix: string[]) => set.delete(fastStringArrayJoin(suffix, ''));
|
||||
|
||||
walk(
|
||||
onMatches as any,
|
||||
onMatches,
|
||||
res.node, // Performing DFS from prefix
|
||||
inputTokens
|
||||
);
|
||||
@ -293,7 +345,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
||||
* Method used to delete a prefix from the trie.
|
||||
*/
|
||||
const remove = (suffix: string): boolean => {
|
||||
const res = getSingleChildLeaf(suffixToTokens(suffix));
|
||||
const res = getSingleChildLeaf(hostnameToTokens(suffix));
|
||||
if (res === null) return false;
|
||||
|
||||
if (!res.node[0]) return false;
|
||||
@ -314,8 +366,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
||||
* Method used to assert whether the given prefix exists in the Trie.
|
||||
*/
|
||||
const has = (suffix: string): boolean => {
|
||||
const tokens = suffixToTokens(suffix);
|
||||
const res = walkIntoLeafWithTokens(tokens);
|
||||
const res = walkIntoLeafWithSuffix(suffix);
|
||||
|
||||
return res
|
||||
? res.node[0]
|
||||
@ -326,20 +377,18 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
||||
const results: string[] = [];
|
||||
|
||||
walk(suffix => {
|
||||
results.push(
|
||||
isHostnameMode(suffix) ? fastStringArrayJoin(suffix, '') : suffix
|
||||
);
|
||||
results.push(fastStringArrayJoin(suffix, ''));
|
||||
});
|
||||
|
||||
return results;
|
||||
};
|
||||
|
||||
const whitelist = (suffix: string) => {
|
||||
if (!hostnameMode && !smolTree) {
|
||||
throw new Error('whitelist method is only available in hostname mode or smolTree mode.');
|
||||
if (!smolTree) {
|
||||
throw new Error('whitelist method is only available in smolTree mode.');
|
||||
}
|
||||
|
||||
const tokens = suffixToTokens(suffix);
|
||||
const tokens = hostnameToTokens(suffix);
|
||||
const res = getSingleChildLeaf(tokens);
|
||||
|
||||
if (res === null) return;
|
||||
@ -406,7 +455,6 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
||||
JSON.stringify(deepTrieNodeToJSON(root), null, 2).split('\n').map((line) => ' '.repeat(depth) + line),
|
||||
'\n'
|
||||
),
|
||||
hostnameMode,
|
||||
smolTree
|
||||
};
|
||||
};
|
||||
|
||||
@ -14,7 +14,7 @@ export const parseDomesticList = async () => {
|
||||
}
|
||||
}
|
||||
|
||||
const trie = createTrie(set, true);
|
||||
const trie = createTrie(set);
|
||||
|
||||
const top5000 = new Set<string>();
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user