Perf: refactor trie to avoid iterate twice

This commit is contained in:
SukkaW 2024-09-07 00:16:56 +08:00
parent 315b38b999
commit 8bcfc0e528
8 changed files with 191 additions and 197 deletions

View File

@ -14,7 +14,6 @@ import { SOURCE_DIR } from './constants/dir';
const getS3OSSDomainsPromise = (async (): Promise<string[]> => { const getS3OSSDomainsPromise = (async (): Promise<string[]> => {
const trie = createTrie( const trie = createTrie(
await getPublicSuffixListTextPromise(), await getPublicSuffixListTextPromise(),
true,
false false
); );

View File

@ -26,7 +26,7 @@ const BLACKLIST = [
export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => { export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => {
// First trie is to find the microsoft domains that matches probe domains // First trie is to find the microsoft domains that matches probe domains
const trie = createTrie(null, true); const trie = createTrie(null, false);
for await (const line of await fetchRemoteTextByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) { for await (const line of await fetchRemoteTextByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) {
const domain = extractDomainsFromFelixDnsmasq(line); const domain = extractDomainsFromFelixDnsmasq(line);
if (domain) { if (domain) {
@ -36,7 +36,7 @@ export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => {
const foundMicrosoftCdnDomains = PROBE_DOMAINS.flatMap(domain => trie.find(domain)); const foundMicrosoftCdnDomains = PROBE_DOMAINS.flatMap(domain => trie.find(domain));
// Second trie is to remove blacklisted domains // Second trie is to remove blacklisted domains
const trie2 = createTrie(foundMicrosoftCdnDomains, true, true); const trie2 = createTrie(foundMicrosoftCdnDomains, true);
BLACKLIST.forEach(trie2.whitelist); BLACKLIST.forEach(trie2.whitelist);
return sortDomains(trie2.dump()) return sortDomains(trie2.dump())

View File

@ -115,8 +115,8 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
}); });
const [baseTrie, extraTrie] = span.traceChildSync('create smol trie while deduping black keywords', (childSpan) => { const [baseTrie, extraTrie] = span.traceChildSync('create smol trie while deduping black keywords', (childSpan) => {
const baseTrie = createTrie(null, true, true); const baseTrie = createTrie(null, true);
const extraTrie = createTrie(null, true, true); const extraTrie = createTrie(null, true);
const kwfilter = createKeywordFilter(domainKeywordsSet); const kwfilter = createKeywordFilter(domainKeywordsSet);

View File

@ -177,7 +177,6 @@ export const buildSpeedtestDomainSet = task(require.main === module, __filename)
// OpenSpeedtest // OpenSpeedtest
'open.cachefly.net' 'open.cachefly.net'
], ],
true,
true true
); );

View File

@ -5,11 +5,11 @@ export function domainDeduper(inputDomains: string[] | Trie, toArray: false): Se
export function domainDeduper(inputDomains: string[] | Trie, toArray = true): string[] | Set<string> { export function domainDeduper(inputDomains: string[] | Trie, toArray = true): string[] | Set<string> {
let trie: Trie; let trie: Trie;
if (Array.isArray(inputDomains)) { if (Array.isArray(inputDomains)) {
trie = createTrie(inputDomains, true, true); trie = createTrie(inputDomains, true);
} else if (!inputDomains.hostnameMode || !inputDomains.smolTree) { } else if (inputDomains.smolTree) {
throw new Error('Invalid trie');
} else {
trie = inputDomains; trie = inputDomains;
} else {
throw new Error('Invalid trie');
} }
const dumped = trie.dump(); const dumped = trie.dump();

View File

@ -1,27 +1,41 @@
import { createTrie } from './trie'; import { createTrie, hostnameToTokens } from './trie';
import { describe, it } from 'mocha'; import { describe, it } from 'mocha';
import { expect } from 'chai'; import { expect } from 'chai';
describe('Trie', () => { describe('hostname to tokens', () => {
it('should be possible to add items to a Trie.', () => { it('should split hostname into tokens.', () => {
const trie = createTrie(); expect(hostnameToTokens('.blog.skk.moe')).to.deep.equal([
'.',
'blog',
'.',
'skk',
'.',
'moe'
]);
trie.add('sukka'); expect(hostnameToTokens('blog.skk.moe')).to.deep.equal([
trie.add('ukka'); 'blog',
trie.add('akku'); '.',
'skk',
'.',
'moe'
]);
expect(trie.size).to.equal(3); expect(hostnameToTokens('skk.moe')).to.deep.equal([
'skk',
'.',
'moe'
]);
expect(trie.has('sukka')).to.equal(true); expect(hostnameToTokens('moe')).to.deep.equal([
expect(trie.has('ukka')).to.equal(true); 'moe'
expect(trie.has('akku')).to.equal(true); ]);
expect(trie.has('noc')).to.equal(false);
expect(trie.has('suk')).to.equal(false);
expect(trie.has('sukkaw')).to.equal(false);
}); });
});
it('should be possible to add domains to a Trie (hostname).', () => { describe('Trie', () => {
const trie = createTrie(null, true); it('should be possible to add domains to a Trie.', () => {
const trie = createTrie();
trie.add('a.skk.moe'); trie.add('a.skk.moe');
trie.add('skk.moe'); trie.add('skk.moe');
@ -40,17 +54,6 @@ describe('Trie', () => {
it('adding the same item several times should not increase size.', () => { it('adding the same item several times should not increase size.', () => {
const trie = createTrie(); const trie = createTrie();
trie.add('rat');
trie.add('erat');
trie.add('rat');
expect(trie.size).to.equal(2);
expect(trie.has('rat')).to.equal(true);
});
it('adding the same item several times should not increase size (hostname).', () => {
const trie = createTrie(null, true);
trie.add('skk.moe'); trie.add('skk.moe');
trie.add('blog.skk.moe'); trie.add('blog.skk.moe');
trie.add('skk.moe'); trie.add('skk.moe');
@ -71,30 +74,7 @@ describe('Trie', () => {
}); });
it('should be possible to delete items.', () => { it('should be possible to delete items.', () => {
const trie = createTrie(); const trie = createTrie(null);
trie.add('rat');
trie.add('rate');
trie.add('tar');
expect(trie.delete('')).to.equal(false);
expect(trie.delete('')).to.equal(false);
expect(trie.delete('hello')).to.equal(false);
expect(trie.delete('rat')).to.equal(true);
expect(trie.has('rat')).to.equal(false);
expect(trie.has('rate')).to.equal(true);
expect(trie.size).to.equal(2);
expect(trie.delete('rate')).to.equal(true);
expect(trie.size).to.equal(1);
expect(trie.delete('tar')).to.equal(true);
expect(trie.size).to.equal(0);
});
it('should be possible to delete items (hostname).', () => {
const trie = createTrie(null, true);
trie.add('skk.moe'); trie.add('skk.moe');
trie.add('example.com'); trie.add('example.com');
@ -117,17 +97,6 @@ describe('Trie', () => {
}); });
it('should be possible to check the existence of a sequence in the Trie.', () => { it('should be possible to check the existence of a sequence in the Trie.', () => {
const trie = createTrie();
trie.add('romanesque');
expect(trie.has('romanesque')).to.equal(true);
expect(trie.has('roman')).to.equal(false);
expect(trie.has('esque')).to.equal(false);
expect(trie.has('')).to.equal(false);
});
it('should be possible to check the existence of a sequence in the Trie (hostname).', () => {
const trie = createTrie(null, true); const trie = createTrie(null, true);
trie.add('example.org.skk.moe'); trie.add('example.org.skk.moe');
@ -139,23 +108,7 @@ describe('Trie', () => {
}); });
it('should be possible to retrieve items matching the given prefix.', () => { it('should be possible to retrieve items matching the given prefix.', () => {
const trie = createTrie(); const trie = createTrie(null);
trie.add('roman');
trie.add('esqueroman');
trie.add('sesqueroman');
trie.add('greek');
expect(trie.find('roman')).to.deep.equal(['roman', 'esqueroman', 'sesqueroman']);
expect(trie.find('man')).to.deep.equal(['roman', 'esqueroman', 'sesqueroman']);
expect(trie.find('esqueroman')).to.deep.equal(['esqueroman', 'sesqueroman']);
expect(trie.find('eek')).to.deep.equal(['greek']);
expect(trie.find('hello')).to.deep.equal([]);
expect(trie.find('')).to.deep.equal(['greek', 'roman', 'esqueroman', 'sesqueroman']);
});
it('should be possible to retrieve items matching the given prefix (hostname).', () => {
const trie = createTrie(null, true);
trie.add('example.com'); trie.add('example.com');
trie.add('blog.example.com'); trie.add('blog.example.com');
@ -171,47 +124,42 @@ describe('Trie', () => {
}); });
it('should be possible to create a trie from an arbitrary iterable.', () => { it('should be possible to create a trie from an arbitrary iterable.', () => {
let trie = createTrie(['roman', 'esqueroman']); let trie = createTrie(['skk.moe', 'blog.skk.moe']);
expect(trie.size).to.equal(2); expect(trie.size).to.equal(2);
expect(trie.has('roman')).to.equal(true); expect(trie.has('skk.moe')).to.equal(true);
trie = createTrie(new Set(['skk.moe', 'example.com']), true); trie = createTrie(new Set(['skk.moe', 'example.com']));
expect(trie.size).to.equal(2); expect(trie.size).to.equal(2);
expect(trie.has('skk.moe')).to.equal(true); expect(trie.has('skk.moe')).to.equal(true);
}); });
}); });
([ describe('surge domainset dedupe', () => {
['hostname mode off', false], it('should not remove same entry', () => {
['hostname mode on', true] const trie = createTrie(['.skk.moe', 'noc.one']);
] as const).forEach(([description, hostnameMode]) => {
describe('surge domainset dedupe ' + description, () => {
it('should not remove same entry', () => {
const trie = createTrie(['.skk.moe', 'noc.one'], hostnameMode);
expect(trie.find('.skk.moe')).to.deep.equal(['.skk.moe']); expect(trie.find('.skk.moe')).to.deep.equal(['.skk.moe']);
expect(trie.find('noc.one')).to.deep.equal(['noc.one']); expect(trie.find('noc.one')).to.deep.equal(['noc.one']);
}); });
it('should match subdomain - 1', () => { it('should match subdomain - 1', () => {
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode); const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net']);
expect(trie.find('.skk.moe')).to.deep.equal(['image.cdn.skk.moe', 'blog.skk.moe']); expect(trie.find('.skk.moe')).to.deep.equal(['image.cdn.skk.moe', 'blog.skk.moe']);
expect(trie.find('.sukkaw.com')).to.deep.equal(['www.sukkaw.com']); expect(trie.find('.sukkaw.com')).to.deep.equal(['www.sukkaw.com']);
}); });
it('should match subdomain - 2', () => { it('should match subdomain - 2', () => {
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', '.skk.moe', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode); const trie = createTrie(['www.noc.one', 'www.sukkaw.com', '.skk.moe', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net']);
expect(trie.find('.skk.moe')).to.deep.equal(['.skk.moe', 'image.cdn.skk.moe', 'blog.skk.moe']); expect(trie.find('.skk.moe')).to.deep.equal(['.skk.moe', 'image.cdn.skk.moe', 'blog.skk.moe']);
expect(trie.find('.sukkaw.com')).to.deep.equal(['www.sukkaw.com']); expect(trie.find('.sukkaw.com')).to.deep.equal(['www.sukkaw.com']);
}); });
it('should not remove non-subdomain', () => { it('should not remove non-subdomain', () => {
const trie = createTrie(['skk.moe', 'sukkaskk.moe'], hostnameMode); const trie = createTrie(['skk.moe', 'sukkaskk.moe']);
expect(trie.find('.skk.moe')).to.deep.equal([]); expect(trie.find('.skk.moe')).to.deep.equal([]);
});
}); });
}); });
@ -221,7 +169,7 @@ describe('smol tree', () => {
'.skk.moe', 'blog.skk.moe', '.cdn.skk.moe', 'skk.moe', '.skk.moe', 'blog.skk.moe', '.cdn.skk.moe', 'skk.moe',
'www.noc.one', 'cdn.noc.one', 'www.noc.one', 'cdn.noc.one',
'.blog.sub.example.com', 'sub.example.com', 'cdn.sub.example.com', '.sub.example.com' '.blog.sub.example.com', 'sub.example.com', 'cdn.sub.example.com', '.sub.example.com'
], true, true); ], true);
expect(trie.dump()).to.deep.equal([ expect(trie.dump()).to.deep.equal([
'.sub.example.com', '.sub.example.com',
@ -233,7 +181,7 @@ describe('smol tree', () => {
it('should create simple tree - 2', () => { it('should create simple tree - 2', () => {
const trie = createTrie([ const trie = createTrie([
'.skk.moe', 'blog.skk.moe', '.cdn.skk.moe', 'skk.moe' '.skk.moe', 'blog.skk.moe', '.cdn.skk.moe', 'skk.moe'
], true, true); ], true);
expect(trie.dump()).to.deep.equal([ expect(trie.dump()).to.deep.equal([
'.skk.moe' '.skk.moe'
@ -243,7 +191,7 @@ describe('smol tree', () => {
it('should create simple tree - 2', () => { it('should create simple tree - 2', () => {
const trie = createTrie([ const trie = createTrie([
'.blog.sub.example.com', 'cdn.sub.example.com', '.sub.example.com' '.blog.sub.example.com', 'cdn.sub.example.com', '.sub.example.com'
], true, true); ], true);
expect(trie.dump()).to.deep.equal([ expect(trie.dump()).to.deep.equal([
'.sub.example.com' '.sub.example.com'
@ -261,7 +209,7 @@ describe('smol tree', () => {
'act.commercial.shouji.360.cn', 'act.commercial.shouji.360.cn',
'cdn.creative.medialytics.com', 'cdn.creative.medialytics.com',
'px.cdn.creative.medialytics.com' 'px.cdn.creative.medialytics.com'
], true, true); ], true);
expect(trie.dump()).to.deep.equal([ expect(trie.dump()).to.deep.equal([
'cdn.creative.medialytics.com', 'cdn.creative.medialytics.com',
@ -277,7 +225,7 @@ describe('smol tree', () => {
'anotherskk.moe', 'anotherskk.moe',
'blog.anotherskk.moe', 'blog.anotherskk.moe',
'blog.skk.moe' 'blog.skk.moe'
], true, true); ], true);
expect(trie.dump()).to.deep.equal([ expect(trie.dump()).to.deep.equal([
'anotherskk.moe', 'anotherskk.moe',
@ -293,7 +241,7 @@ describe('smol tree', () => {
'anotherskk.moe', 'anotherskk.moe',
'blog.anotherskk.moe', 'blog.anotherskk.moe',
'blog.skk.moe' 'blog.skk.moe'
], true, true); ], true);
expect(trie.dump()).to.deep.equal([ expect(trie.dump()).to.deep.equal([
'anotherskk.moe', 'anotherskk.moe',
@ -328,7 +276,7 @@ describe('smol tree', () => {
'.skk.moe', '.skk.moe',
'blog.cdn.example.com', 'blog.cdn.example.com',
'cdn.example.com' 'cdn.example.com'
], true, true); ], true);
expect(trie.dump()).to.deep.equal([ expect(trie.dump()).to.deep.equal([
'cdn.example.com', 'blog.cdn.example.com', 'cdn.example.com', 'blog.cdn.example.com',

View File

@ -1,5 +1,5 @@
/** /**
* Suffix Trie based on Mnemonist Trie * Hostbane-Optimized Trie based on Mnemonist Trie
*/ */
import { fastStringArrayJoin } from './misc'; import { fastStringArrayJoin } from './misc';
@ -28,57 +28,86 @@ const createNode = (parent: TrieNode | null = null): TrieNode => {
return [false, parent, new Map<string, TrieNode>()] as TrieNode; return [false, parent, new Map<string, TrieNode>()] as TrieNode;
}; };
const hostnameToTokens = (hostname: string): string[] => { export const hostnameToTokens = (hostname: string): string[] => {
return hostname.split('.').reduce<string[]>((acc, token, index) => { const tokens = hostname.split('.');
if (index > 0) { const results: string[] = [];
acc.push('.', token); let token = '';
} else if (token.length > 0) { for (let i = 0, l = tokens.length; i < l; i++) {
acc.push(token); if (i > 0) {
results.push('.');
} }
return acc;
}, []); token = tokens[i];
if (token.length > 0) {
results.push(token);
}
}
return results;
}; };
export const createTrie = (from?: string[] | Set<string> | null, hostnameMode = false, smolTree = false) => { const walkHostnameTokens = (hostname: string, onToken: (token: string) => boolean | null): boolean | null => {
const tokens = hostname.split('.');
let token = '';
const l = tokens.length - 1;
for (let i = l; i >= 0; i--) {
if (
i < l
// when onToken returns true, we should skip the rest of the loop
&& onToken('.')
) {
return true;
}
token = tokens[i];
if (
token.length > 0
// when onToken returns true, we should skip the rest of the loop
&& onToken(token)
) {
return true;
}
}
return false;
};
export const createTrie = (from?: string[] | Set<string> | null, smolTree = false) => {
let size = 0; let size = 0;
const root: TrieNode = createNode(); const root: TrieNode = createNode();
const isHostnameMode = (_token: string | string[]): _token is string[] => hostnameMode;
const suffixToTokens = hostnameMode
? hostnameToTokens
: (suffix: string) => suffix;
/** /**
* Method used to add the given suffix to the trie. * Method used to add the given suffix to the trie.
*/ */
const add = smolTree const add = smolTree
? (suffix: string): void => { ? (suffix: string): void => {
let node: TrieNode = root; let node: TrieNode = root;
let token: string;
const tokens = suffixToTokens(suffix);
for (let i = tokens.length - 1; i >= 0; i--) {
token = tokens[i];
const onToken = (token: string) => {
if (node[2].has(token)) { if (node[2].has(token)) {
node = node[2].get(token)!; node = node[2].get(token)!;
// During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie // During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie
// Dedupe the covered subdomain by skipping // Dedupe the covered subdomain by skipping
if (token === '.' && node[0]) { if (token === '.' && node[0]) {
return; return true;
} }
} else { } else {
const newNode = createNode(node); const newNode = createNode(node);
node[2].set(token, newNode); node[2].set(token, newNode);
node = newNode; node = newNode;
} }
return false;
};
// When walkHostnameTokens returns true, we should skip the rest
if (walkHostnameTokens(suffix, onToken)) {
return;
} }
// If we are in smolTree mode, we need to do something at the end of the loop // If we are in smolTree mode, we need to do something at the end of the loop
if (tokens[0] === '.') { if (suffix[0] === '.') {
// Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie // Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
const parent = node[1]!; const parent = node[1]!;
@ -101,13 +130,8 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
} }
: (suffix: string): void => { : (suffix: string): void => {
let node: TrieNode = root; let node: TrieNode = root;
let token: string;
const tokens = suffixToTokens(suffix);
for (let i = tokens.length - 1; i >= 0; i--) {
token = tokens[i];
const onToken = (token: string) => {
if (node[2].has(token)) { if (node[2].has(token)) {
node = node[2].get(token)!; node = node[2].get(token)!;
} else { } else {
@ -115,6 +139,13 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
node[2].set(token, newNode); node[2].set(token, newNode);
node = newNode; node = newNode;
} }
return false;
};
// When walkHostnameTokens returns true, we should skip the rest
if (walkHostnameTokens(suffix, onToken)) {
return;
} }
if (!node[0]) { // smol tree don't have size, so else-if here if (!node[0]) { // smol tree don't have size, so else-if here
@ -124,7 +155,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
}; };
const walkIntoLeafWithTokens = ( const walkIntoLeafWithTokens = (
tokens: string | string[], tokens: string[],
onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
) => { ) => {
let node: TrieNode = root; let node: TrieNode = root;
@ -135,7 +166,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
for (let i = tokens.length - 1; i >= 0; i--) { for (let i = tokens.length - 1; i >= 0; i--) {
token = tokens[i]; token = tokens[i];
if (hostnameMode && token === '') { if (token === '') {
break; break;
} }
@ -153,19 +184,50 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
return { node, parent }; return { node, parent };
}; };
const walkIntoLeafWithSuffix = (
suffix: string,
onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
) => {
let node: TrieNode = root;
let parent: TrieNode = node;
const onToken = (token: string) => {
if (token === '') {
return true;
}
parent = node;
if (node[2].has(token)) {
node = node[2].get(token)!;
} else {
return null;
}
onLoop(node, parent, token);
return false;
};
if (walkHostnameTokens(suffix, onToken) === null) {
return null;
}
return { node, parent };
};
const contains = (suffix: string): boolean => { const contains = (suffix: string): boolean => {
const tokens = suffixToTokens(suffix); return walkIntoLeafWithSuffix(suffix) !== null;
return walkIntoLeafWithTokens(tokens) !== null;
}; };
const walk = ( const walk = (
onMatches: (suffix: string | string[]) => void, onMatches: (suffix: string[]) => void,
initialNode = root, initialNode = root,
initialSuffix: string | string[] = hostnameMode ? [] : '' initialSuffix: string[] = []
) => { ) => {
const nodeStack: TrieNode[] = [initialNode]; const nodeStack: TrieNode[] = [initialNode];
// Resolving initial string (begin the start of the stack) // Resolving initial string (begin the start of the stack)
const suffixStack: Array<string | string[]> = [initialSuffix]; const suffixStack: string[][] = [initialSuffix];
let node: TrieNode = root; let node: TrieNode = root;
@ -177,7 +239,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
// Pushing the child node to the stack for next iteration of DFS // Pushing the child node to the stack for next iteration of DFS
nodeStack.push(childNode); nodeStack.push(childNode);
suffixStack.push(isHostnameMode(suffix) ? [k, ...suffix] : k + suffix); suffixStack.push([k, ...suffix]);
}); });
// If the node is a sentinel, we push the suffix to the results // If the node is a sentinel, we push the suffix to the results
@ -194,7 +256,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
parent: TrieNode parent: TrieNode
} }
const getSingleChildLeaf = (tokens: string | string[]): FindSingleChildLeafResult | null => { const getSingleChildLeaf = (tokens: string[]): FindSingleChildLeafResult | null => {
let toPrune: TrieNode | null = null; let toPrune: TrieNode | null = null;
let tokenToPrune: string | null = null; let tokenToPrune: string | null = null;
@ -203,7 +265,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
// Even if the node size is 1, but the single child is ".", we should retain the branch // Even if the node size is 1, but the single child is ".", we should retain the branch
// Since the "." could be special if it is the leaf-est node // Since the "." could be special if it is the leaf-est node
const onlyChild = node[2].size < 2 && (!hostnameMode || !node[2].has('.')); const onlyChild = node[2].size < 2 && !node[2].has('.');
if (toPrune != null) { // the top-est branch that could potentially being pruned if (toPrune != null) { // the top-est branch that could potentially being pruned
if (!onlyChild) { if (!onlyChild) {
@ -234,35 +296,27 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
throw new Error('A Trie with smolTree enabled cannot perform find!'); throw new Error('A Trie with smolTree enabled cannot perform find!');
} }
const inputTokens = suffixToTokens(inputSuffix); const inputTokens = hostnameToTokens(inputSuffix);
const res = walkIntoLeafWithTokens(inputTokens); const res = walkIntoLeafWithTokens(inputTokens);
if (res === null) return []; if (res === null) return [];
const matches: Array<string | string[]> = []; const matches: string[][] = [];
const onMatches = includeEqualWithSuffix const onMatches = includeEqualWithSuffix
? (suffix: string | string[]) => matches.push(suffix) ? (suffix: string[]) => matches.push(suffix)
: ( : (suffix: string[]) => {
hostnameMode if (suffix.some((t, i) => t !== inputTokens[i])) {
? (suffix: string[]) => { matches.push(suffix);
if (suffix.some((t, i) => t !== inputTokens[i])) { }
matches.push(suffix); };
}
}
: (suffix: string) => {
if (suffix !== inputTokens) {
matches.push(suffix);
}
}
);
walk( walk(
onMatches as any, onMatches,
res.node, // Performing DFS from prefix res.node, // Performing DFS from prefix
inputTokens inputTokens
); );
return hostnameMode ? matches.map((m) => fastStringArrayJoin(m as string[], '')) : matches as string[]; return matches.map((m) => fastStringArrayJoin(m, ''));
}; };
/** /**
@ -273,17 +327,15 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
throw new Error('A Trie with smolTree enabled cannot perform substractSetInPlaceFromFound!'); throw new Error('A Trie with smolTree enabled cannot perform substractSetInPlaceFromFound!');
} }
const inputTokens = suffixToTokens(inputSuffix); const inputTokens = hostnameToTokens(inputSuffix);
const res = walkIntoLeafWithTokens(inputTokens); const res = walkIntoLeafWithTokens(inputTokens);
if (res === null) return; if (res === null) return;
const onMatches = hostnameMode const onMatches = (suffix: string[]) => set.delete(fastStringArrayJoin(suffix, ''));
? (suffix: string[]) => set.delete(fastStringArrayJoin(suffix, ''))
: (suffix: string) => set.delete(suffix);
walk( walk(
onMatches as any, onMatches,
res.node, // Performing DFS from prefix res.node, // Performing DFS from prefix
inputTokens inputTokens
); );
@ -293,7 +345,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
* Method used to delete a prefix from the trie. * Method used to delete a prefix from the trie.
*/ */
const remove = (suffix: string): boolean => { const remove = (suffix: string): boolean => {
const res = getSingleChildLeaf(suffixToTokens(suffix)); const res = getSingleChildLeaf(hostnameToTokens(suffix));
if (res === null) return false; if (res === null) return false;
if (!res.node[0]) return false; if (!res.node[0]) return false;
@ -314,8 +366,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
* Method used to assert whether the given prefix exists in the Trie. * Method used to assert whether the given prefix exists in the Trie.
*/ */
const has = (suffix: string): boolean => { const has = (suffix: string): boolean => {
const tokens = suffixToTokens(suffix); const res = walkIntoLeafWithSuffix(suffix);
const res = walkIntoLeafWithTokens(tokens);
return res return res
? res.node[0] ? res.node[0]
@ -326,20 +377,18 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
const results: string[] = []; const results: string[] = [];
walk(suffix => { walk(suffix => {
results.push( results.push(fastStringArrayJoin(suffix, ''));
isHostnameMode(suffix) ? fastStringArrayJoin(suffix, '') : suffix
);
}); });
return results; return results;
}; };
const whitelist = (suffix: string) => { const whitelist = (suffix: string) => {
if (!hostnameMode && !smolTree) { if (!smolTree) {
throw new Error('whitelist method is only available in hostname mode or smolTree mode.'); throw new Error('whitelist method is only available in smolTree mode.');
} }
const tokens = suffixToTokens(suffix); const tokens = hostnameToTokens(suffix);
const res = getSingleChildLeaf(tokens); const res = getSingleChildLeaf(tokens);
if (res === null) return; if (res === null) return;
@ -406,7 +455,6 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
JSON.stringify(deepTrieNodeToJSON(root), null, 2).split('\n').map((line) => ' '.repeat(depth) + line), JSON.stringify(deepTrieNodeToJSON(root), null, 2).split('\n').map((line) => ' '.repeat(depth) + line),
'\n' '\n'
), ),
hostnameMode,
smolTree smolTree
}; };
}; };

View File

@ -14,7 +14,7 @@ export const parseDomesticList = async () => {
} }
} }
const trie = createTrie(set, true); const trie = createTrie(set);
const top5000 = new Set<string>(); const top5000 = new Set<string>();