mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-14 02:00:37 +08:00
Perf: refactor trie to avoid iterate twice
This commit is contained in:
parent
315b38b999
commit
8bcfc0e528
@ -14,7 +14,6 @@ import { SOURCE_DIR } from './constants/dir';
|
|||||||
const getS3OSSDomainsPromise = (async (): Promise<string[]> => {
|
const getS3OSSDomainsPromise = (async (): Promise<string[]> => {
|
||||||
const trie = createTrie(
|
const trie = createTrie(
|
||||||
await getPublicSuffixListTextPromise(),
|
await getPublicSuffixListTextPromise(),
|
||||||
true,
|
|
||||||
false
|
false
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@ -26,7 +26,7 @@ const BLACKLIST = [
|
|||||||
|
|
||||||
export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => {
|
export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => {
|
||||||
// First trie is to find the microsoft domains that matches probe domains
|
// First trie is to find the microsoft domains that matches probe domains
|
||||||
const trie = createTrie(null, true);
|
const trie = createTrie(null, false);
|
||||||
for await (const line of await fetchRemoteTextByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) {
|
for await (const line of await fetchRemoteTextByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) {
|
||||||
const domain = extractDomainsFromFelixDnsmasq(line);
|
const domain = extractDomainsFromFelixDnsmasq(line);
|
||||||
if (domain) {
|
if (domain) {
|
||||||
@ -36,7 +36,7 @@ export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => {
|
|||||||
const foundMicrosoftCdnDomains = PROBE_DOMAINS.flatMap(domain => trie.find(domain));
|
const foundMicrosoftCdnDomains = PROBE_DOMAINS.flatMap(domain => trie.find(domain));
|
||||||
|
|
||||||
// Second trie is to remove blacklisted domains
|
// Second trie is to remove blacklisted domains
|
||||||
const trie2 = createTrie(foundMicrosoftCdnDomains, true, true);
|
const trie2 = createTrie(foundMicrosoftCdnDomains, true);
|
||||||
BLACKLIST.forEach(trie2.whitelist);
|
BLACKLIST.forEach(trie2.whitelist);
|
||||||
|
|
||||||
return sortDomains(trie2.dump())
|
return sortDomains(trie2.dump())
|
||||||
|
|||||||
@ -115,8 +115,8 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
|
|||||||
});
|
});
|
||||||
|
|
||||||
const [baseTrie, extraTrie] = span.traceChildSync('create smol trie while deduping black keywords', (childSpan) => {
|
const [baseTrie, extraTrie] = span.traceChildSync('create smol trie while deduping black keywords', (childSpan) => {
|
||||||
const baseTrie = createTrie(null, true, true);
|
const baseTrie = createTrie(null, true);
|
||||||
const extraTrie = createTrie(null, true, true);
|
const extraTrie = createTrie(null, true);
|
||||||
|
|
||||||
const kwfilter = createKeywordFilter(domainKeywordsSet);
|
const kwfilter = createKeywordFilter(domainKeywordsSet);
|
||||||
|
|
||||||
|
|||||||
@ -177,7 +177,6 @@ export const buildSpeedtestDomainSet = task(require.main === module, __filename)
|
|||||||
// OpenSpeedtest
|
// OpenSpeedtest
|
||||||
'open.cachefly.net'
|
'open.cachefly.net'
|
||||||
],
|
],
|
||||||
true,
|
|
||||||
true
|
true
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@ -5,11 +5,11 @@ export function domainDeduper(inputDomains: string[] | Trie, toArray: false): Se
|
|||||||
export function domainDeduper(inputDomains: string[] | Trie, toArray = true): string[] | Set<string> {
|
export function domainDeduper(inputDomains: string[] | Trie, toArray = true): string[] | Set<string> {
|
||||||
let trie: Trie;
|
let trie: Trie;
|
||||||
if (Array.isArray(inputDomains)) {
|
if (Array.isArray(inputDomains)) {
|
||||||
trie = createTrie(inputDomains, true, true);
|
trie = createTrie(inputDomains, true);
|
||||||
} else if (!inputDomains.hostnameMode || !inputDomains.smolTree) {
|
} else if (inputDomains.smolTree) {
|
||||||
throw new Error('Invalid trie');
|
|
||||||
} else {
|
|
||||||
trie = inputDomains;
|
trie = inputDomains;
|
||||||
|
} else {
|
||||||
|
throw new Error('Invalid trie');
|
||||||
}
|
}
|
||||||
|
|
||||||
const dumped = trie.dump();
|
const dumped = trie.dump();
|
||||||
|
|||||||
@ -1,27 +1,41 @@
|
|||||||
import { createTrie } from './trie';
|
import { createTrie, hostnameToTokens } from './trie';
|
||||||
import { describe, it } from 'mocha';
|
import { describe, it } from 'mocha';
|
||||||
import { expect } from 'chai';
|
import { expect } from 'chai';
|
||||||
|
|
||||||
describe('Trie', () => {
|
describe('hostname to tokens', () => {
|
||||||
it('should be possible to add items to a Trie.', () => {
|
it('should split hostname into tokens.', () => {
|
||||||
const trie = createTrie();
|
expect(hostnameToTokens('.blog.skk.moe')).to.deep.equal([
|
||||||
|
'.',
|
||||||
|
'blog',
|
||||||
|
'.',
|
||||||
|
'skk',
|
||||||
|
'.',
|
||||||
|
'moe'
|
||||||
|
]);
|
||||||
|
|
||||||
trie.add('sukka');
|
expect(hostnameToTokens('blog.skk.moe')).to.deep.equal([
|
||||||
trie.add('ukka');
|
'blog',
|
||||||
trie.add('akku');
|
'.',
|
||||||
|
'skk',
|
||||||
|
'.',
|
||||||
|
'moe'
|
||||||
|
]);
|
||||||
|
|
||||||
expect(trie.size).to.equal(3);
|
expect(hostnameToTokens('skk.moe')).to.deep.equal([
|
||||||
|
'skk',
|
||||||
|
'.',
|
||||||
|
'moe'
|
||||||
|
]);
|
||||||
|
|
||||||
expect(trie.has('sukka')).to.equal(true);
|
expect(hostnameToTokens('moe')).to.deep.equal([
|
||||||
expect(trie.has('ukka')).to.equal(true);
|
'moe'
|
||||||
expect(trie.has('akku')).to.equal(true);
|
]);
|
||||||
expect(trie.has('noc')).to.equal(false);
|
});
|
||||||
expect(trie.has('suk')).to.equal(false);
|
|
||||||
expect(trie.has('sukkaw')).to.equal(false);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should be possible to add domains to a Trie (hostname).', () => {
|
describe('Trie', () => {
|
||||||
const trie = createTrie(null, true);
|
it('should be possible to add domains to a Trie.', () => {
|
||||||
|
const trie = createTrie();
|
||||||
|
|
||||||
trie.add('a.skk.moe');
|
trie.add('a.skk.moe');
|
||||||
trie.add('skk.moe');
|
trie.add('skk.moe');
|
||||||
@ -40,17 +54,6 @@ describe('Trie', () => {
|
|||||||
it('adding the same item several times should not increase size.', () => {
|
it('adding the same item several times should not increase size.', () => {
|
||||||
const trie = createTrie();
|
const trie = createTrie();
|
||||||
|
|
||||||
trie.add('rat');
|
|
||||||
trie.add('erat');
|
|
||||||
trie.add('rat');
|
|
||||||
|
|
||||||
expect(trie.size).to.equal(2);
|
|
||||||
expect(trie.has('rat')).to.equal(true);
|
|
||||||
});
|
|
||||||
|
|
||||||
it('adding the same item several times should not increase size (hostname).', () => {
|
|
||||||
const trie = createTrie(null, true);
|
|
||||||
|
|
||||||
trie.add('skk.moe');
|
trie.add('skk.moe');
|
||||||
trie.add('blog.skk.moe');
|
trie.add('blog.skk.moe');
|
||||||
trie.add('skk.moe');
|
trie.add('skk.moe');
|
||||||
@ -71,30 +74,7 @@ describe('Trie', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
it('should be possible to delete items.', () => {
|
it('should be possible to delete items.', () => {
|
||||||
const trie = createTrie();
|
const trie = createTrie(null);
|
||||||
|
|
||||||
trie.add('rat');
|
|
||||||
trie.add('rate');
|
|
||||||
trie.add('tar');
|
|
||||||
|
|
||||||
expect(trie.delete('')).to.equal(false);
|
|
||||||
expect(trie.delete('')).to.equal(false);
|
|
||||||
expect(trie.delete('hello')).to.equal(false);
|
|
||||||
|
|
||||||
expect(trie.delete('rat')).to.equal(true);
|
|
||||||
expect(trie.has('rat')).to.equal(false);
|
|
||||||
expect(trie.has('rate')).to.equal(true);
|
|
||||||
|
|
||||||
expect(trie.size).to.equal(2);
|
|
||||||
|
|
||||||
expect(trie.delete('rate')).to.equal(true);
|
|
||||||
expect(trie.size).to.equal(1);
|
|
||||||
expect(trie.delete('tar')).to.equal(true);
|
|
||||||
expect(trie.size).to.equal(0);
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should be possible to delete items (hostname).', () => {
|
|
||||||
const trie = createTrie(null, true);
|
|
||||||
|
|
||||||
trie.add('skk.moe');
|
trie.add('skk.moe');
|
||||||
trie.add('example.com');
|
trie.add('example.com');
|
||||||
@ -117,17 +97,6 @@ describe('Trie', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
it('should be possible to check the existence of a sequence in the Trie.', () => {
|
it('should be possible to check the existence of a sequence in the Trie.', () => {
|
||||||
const trie = createTrie();
|
|
||||||
|
|
||||||
trie.add('romanesque');
|
|
||||||
|
|
||||||
expect(trie.has('romanesque')).to.equal(true);
|
|
||||||
expect(trie.has('roman')).to.equal(false);
|
|
||||||
expect(trie.has('esque')).to.equal(false);
|
|
||||||
expect(trie.has('')).to.equal(false);
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should be possible to check the existence of a sequence in the Trie (hostname).', () => {
|
|
||||||
const trie = createTrie(null, true);
|
const trie = createTrie(null, true);
|
||||||
|
|
||||||
trie.add('example.org.skk.moe');
|
trie.add('example.org.skk.moe');
|
||||||
@ -139,23 +108,7 @@ describe('Trie', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
it('should be possible to retrieve items matching the given prefix.', () => {
|
it('should be possible to retrieve items matching the given prefix.', () => {
|
||||||
const trie = createTrie();
|
const trie = createTrie(null);
|
||||||
|
|
||||||
trie.add('roman');
|
|
||||||
trie.add('esqueroman');
|
|
||||||
trie.add('sesqueroman');
|
|
||||||
trie.add('greek');
|
|
||||||
|
|
||||||
expect(trie.find('roman')).to.deep.equal(['roman', 'esqueroman', 'sesqueroman']);
|
|
||||||
expect(trie.find('man')).to.deep.equal(['roman', 'esqueroman', 'sesqueroman']);
|
|
||||||
expect(trie.find('esqueroman')).to.deep.equal(['esqueroman', 'sesqueroman']);
|
|
||||||
expect(trie.find('eek')).to.deep.equal(['greek']);
|
|
||||||
expect(trie.find('hello')).to.deep.equal([]);
|
|
||||||
expect(trie.find('')).to.deep.equal(['greek', 'roman', 'esqueroman', 'sesqueroman']);
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should be possible to retrieve items matching the given prefix (hostname).', () => {
|
|
||||||
const trie = createTrie(null, true);
|
|
||||||
|
|
||||||
trie.add('example.com');
|
trie.add('example.com');
|
||||||
trie.add('blog.example.com');
|
trie.add('blog.example.com');
|
||||||
@ -171,49 +124,44 @@ describe('Trie', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
it('should be possible to create a trie from an arbitrary iterable.', () => {
|
it('should be possible to create a trie from an arbitrary iterable.', () => {
|
||||||
let trie = createTrie(['roman', 'esqueroman']);
|
let trie = createTrie(['skk.moe', 'blog.skk.moe']);
|
||||||
|
|
||||||
expect(trie.size).to.equal(2);
|
expect(trie.size).to.equal(2);
|
||||||
expect(trie.has('roman')).to.equal(true);
|
expect(trie.has('skk.moe')).to.equal(true);
|
||||||
|
|
||||||
trie = createTrie(new Set(['skk.moe', 'example.com']), true);
|
trie = createTrie(new Set(['skk.moe', 'example.com']));
|
||||||
expect(trie.size).to.equal(2);
|
expect(trie.size).to.equal(2);
|
||||||
expect(trie.has('skk.moe')).to.equal(true);
|
expect(trie.has('skk.moe')).to.equal(true);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
([
|
describe('surge domainset dedupe', () => {
|
||||||
['hostname mode off', false],
|
|
||||||
['hostname mode on', true]
|
|
||||||
] as const).forEach(([description, hostnameMode]) => {
|
|
||||||
describe('surge domainset dedupe ' + description, () => {
|
|
||||||
it('should not remove same entry', () => {
|
it('should not remove same entry', () => {
|
||||||
const trie = createTrie(['.skk.moe', 'noc.one'], hostnameMode);
|
const trie = createTrie(['.skk.moe', 'noc.one']);
|
||||||
|
|
||||||
expect(trie.find('.skk.moe')).to.deep.equal(['.skk.moe']);
|
expect(trie.find('.skk.moe')).to.deep.equal(['.skk.moe']);
|
||||||
expect(trie.find('noc.one')).to.deep.equal(['noc.one']);
|
expect(trie.find('noc.one')).to.deep.equal(['noc.one']);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should match subdomain - 1', () => {
|
it('should match subdomain - 1', () => {
|
||||||
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode);
|
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net']);
|
||||||
|
|
||||||
expect(trie.find('.skk.moe')).to.deep.equal(['image.cdn.skk.moe', 'blog.skk.moe']);
|
expect(trie.find('.skk.moe')).to.deep.equal(['image.cdn.skk.moe', 'blog.skk.moe']);
|
||||||
expect(trie.find('.sukkaw.com')).to.deep.equal(['www.sukkaw.com']);
|
expect(trie.find('.sukkaw.com')).to.deep.equal(['www.sukkaw.com']);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should match subdomain - 2', () => {
|
it('should match subdomain - 2', () => {
|
||||||
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', '.skk.moe', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode);
|
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', '.skk.moe', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net']);
|
||||||
|
|
||||||
expect(trie.find('.skk.moe')).to.deep.equal(['.skk.moe', 'image.cdn.skk.moe', 'blog.skk.moe']);
|
expect(trie.find('.skk.moe')).to.deep.equal(['.skk.moe', 'image.cdn.skk.moe', 'blog.skk.moe']);
|
||||||
expect(trie.find('.sukkaw.com')).to.deep.equal(['www.sukkaw.com']);
|
expect(trie.find('.sukkaw.com')).to.deep.equal(['www.sukkaw.com']);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should not remove non-subdomain', () => {
|
it('should not remove non-subdomain', () => {
|
||||||
const trie = createTrie(['skk.moe', 'sukkaskk.moe'], hostnameMode);
|
const trie = createTrie(['skk.moe', 'sukkaskk.moe']);
|
||||||
expect(trie.find('.skk.moe')).to.deep.equal([]);
|
expect(trie.find('.skk.moe')).to.deep.equal([]);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
|
||||||
|
|
||||||
describe('smol tree', () => {
|
describe('smol tree', () => {
|
||||||
it('should create simple tree - 1', () => {
|
it('should create simple tree - 1', () => {
|
||||||
@ -221,7 +169,7 @@ describe('smol tree', () => {
|
|||||||
'.skk.moe', 'blog.skk.moe', '.cdn.skk.moe', 'skk.moe',
|
'.skk.moe', 'blog.skk.moe', '.cdn.skk.moe', 'skk.moe',
|
||||||
'www.noc.one', 'cdn.noc.one',
|
'www.noc.one', 'cdn.noc.one',
|
||||||
'.blog.sub.example.com', 'sub.example.com', 'cdn.sub.example.com', '.sub.example.com'
|
'.blog.sub.example.com', 'sub.example.com', 'cdn.sub.example.com', '.sub.example.com'
|
||||||
], true, true);
|
], true);
|
||||||
|
|
||||||
expect(trie.dump()).to.deep.equal([
|
expect(trie.dump()).to.deep.equal([
|
||||||
'.sub.example.com',
|
'.sub.example.com',
|
||||||
@ -233,7 +181,7 @@ describe('smol tree', () => {
|
|||||||
it('should create simple tree - 2', () => {
|
it('should create simple tree - 2', () => {
|
||||||
const trie = createTrie([
|
const trie = createTrie([
|
||||||
'.skk.moe', 'blog.skk.moe', '.cdn.skk.moe', 'skk.moe'
|
'.skk.moe', 'blog.skk.moe', '.cdn.skk.moe', 'skk.moe'
|
||||||
], true, true);
|
], true);
|
||||||
|
|
||||||
expect(trie.dump()).to.deep.equal([
|
expect(trie.dump()).to.deep.equal([
|
||||||
'.skk.moe'
|
'.skk.moe'
|
||||||
@ -243,7 +191,7 @@ describe('smol tree', () => {
|
|||||||
it('should create simple tree - 2', () => {
|
it('should create simple tree - 2', () => {
|
||||||
const trie = createTrie([
|
const trie = createTrie([
|
||||||
'.blog.sub.example.com', 'cdn.sub.example.com', '.sub.example.com'
|
'.blog.sub.example.com', 'cdn.sub.example.com', '.sub.example.com'
|
||||||
], true, true);
|
], true);
|
||||||
|
|
||||||
expect(trie.dump()).to.deep.equal([
|
expect(trie.dump()).to.deep.equal([
|
||||||
'.sub.example.com'
|
'.sub.example.com'
|
||||||
@ -261,7 +209,7 @@ describe('smol tree', () => {
|
|||||||
'act.commercial.shouji.360.cn',
|
'act.commercial.shouji.360.cn',
|
||||||
'cdn.creative.medialytics.com',
|
'cdn.creative.medialytics.com',
|
||||||
'px.cdn.creative.medialytics.com'
|
'px.cdn.creative.medialytics.com'
|
||||||
], true, true);
|
], true);
|
||||||
|
|
||||||
expect(trie.dump()).to.deep.equal([
|
expect(trie.dump()).to.deep.equal([
|
||||||
'cdn.creative.medialytics.com',
|
'cdn.creative.medialytics.com',
|
||||||
@ -277,7 +225,7 @@ describe('smol tree', () => {
|
|||||||
'anotherskk.moe',
|
'anotherskk.moe',
|
||||||
'blog.anotherskk.moe',
|
'blog.anotherskk.moe',
|
||||||
'blog.skk.moe'
|
'blog.skk.moe'
|
||||||
], true, true);
|
], true);
|
||||||
|
|
||||||
expect(trie.dump()).to.deep.equal([
|
expect(trie.dump()).to.deep.equal([
|
||||||
'anotherskk.moe',
|
'anotherskk.moe',
|
||||||
@ -293,7 +241,7 @@ describe('smol tree', () => {
|
|||||||
'anotherskk.moe',
|
'anotherskk.moe',
|
||||||
'blog.anotherskk.moe',
|
'blog.anotherskk.moe',
|
||||||
'blog.skk.moe'
|
'blog.skk.moe'
|
||||||
], true, true);
|
], true);
|
||||||
|
|
||||||
expect(trie.dump()).to.deep.equal([
|
expect(trie.dump()).to.deep.equal([
|
||||||
'anotherskk.moe',
|
'anotherskk.moe',
|
||||||
@ -328,7 +276,7 @@ describe('smol tree', () => {
|
|||||||
'.skk.moe',
|
'.skk.moe',
|
||||||
'blog.cdn.example.com',
|
'blog.cdn.example.com',
|
||||||
'cdn.example.com'
|
'cdn.example.com'
|
||||||
], true, true);
|
], true);
|
||||||
|
|
||||||
expect(trie.dump()).to.deep.equal([
|
expect(trie.dump()).to.deep.equal([
|
||||||
'cdn.example.com', 'blog.cdn.example.com',
|
'cdn.example.com', 'blog.cdn.example.com',
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* Suffix Trie based on Mnemonist Trie
|
* Hostbane-Optimized Trie based on Mnemonist Trie
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { fastStringArrayJoin } from './misc';
|
import { fastStringArrayJoin } from './misc';
|
||||||
@ -28,57 +28,86 @@ const createNode = (parent: TrieNode | null = null): TrieNode => {
|
|||||||
return [false, parent, new Map<string, TrieNode>()] as TrieNode;
|
return [false, parent, new Map<string, TrieNode>()] as TrieNode;
|
||||||
};
|
};
|
||||||
|
|
||||||
const hostnameToTokens = (hostname: string): string[] => {
|
export const hostnameToTokens = (hostname: string): string[] => {
|
||||||
return hostname.split('.').reduce<string[]>((acc, token, index) => {
|
const tokens = hostname.split('.');
|
||||||
if (index > 0) {
|
const results: string[] = [];
|
||||||
acc.push('.', token);
|
let token = '';
|
||||||
} else if (token.length > 0) {
|
for (let i = 0, l = tokens.length; i < l; i++) {
|
||||||
acc.push(token);
|
if (i > 0) {
|
||||||
|
results.push('.');
|
||||||
}
|
}
|
||||||
return acc;
|
|
||||||
}, []);
|
token = tokens[i];
|
||||||
|
if (token.length > 0) {
|
||||||
|
results.push(token);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return results;
|
||||||
};
|
};
|
||||||
|
|
||||||
export const createTrie = (from?: string[] | Set<string> | null, hostnameMode = false, smolTree = false) => {
|
const walkHostnameTokens = (hostname: string, onToken: (token: string) => boolean | null): boolean | null => {
|
||||||
|
const tokens = hostname.split('.');
|
||||||
|
let token = '';
|
||||||
|
|
||||||
|
const l = tokens.length - 1;
|
||||||
|
for (let i = l; i >= 0; i--) {
|
||||||
|
if (
|
||||||
|
i < l
|
||||||
|
// when onToken returns true, we should skip the rest of the loop
|
||||||
|
&& onToken('.')
|
||||||
|
) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
token = tokens[i];
|
||||||
|
if (
|
||||||
|
token.length > 0
|
||||||
|
// when onToken returns true, we should skip the rest of the loop
|
||||||
|
&& onToken(token)
|
||||||
|
) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
export const createTrie = (from?: string[] | Set<string> | null, smolTree = false) => {
|
||||||
let size = 0;
|
let size = 0;
|
||||||
const root: TrieNode = createNode();
|
const root: TrieNode = createNode();
|
||||||
|
|
||||||
const isHostnameMode = (_token: string | string[]): _token is string[] => hostnameMode;
|
|
||||||
|
|
||||||
const suffixToTokens = hostnameMode
|
|
||||||
? hostnameToTokens
|
|
||||||
: (suffix: string) => suffix;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Method used to add the given suffix to the trie.
|
* Method used to add the given suffix to the trie.
|
||||||
*/
|
*/
|
||||||
const add = smolTree
|
const add = smolTree
|
||||||
? (suffix: string): void => {
|
? (suffix: string): void => {
|
||||||
let node: TrieNode = root;
|
let node: TrieNode = root;
|
||||||
let token: string;
|
|
||||||
|
|
||||||
const tokens = suffixToTokens(suffix);
|
|
||||||
|
|
||||||
for (let i = tokens.length - 1; i >= 0; i--) {
|
|
||||||
token = tokens[i];
|
|
||||||
|
|
||||||
|
const onToken = (token: string) => {
|
||||||
if (node[2].has(token)) {
|
if (node[2].has(token)) {
|
||||||
node = node[2].get(token)!;
|
node = node[2].get(token)!;
|
||||||
|
|
||||||
// During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie
|
// During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie
|
||||||
// Dedupe the covered subdomain by skipping
|
// Dedupe the covered subdomain by skipping
|
||||||
if (token === '.' && node[0]) {
|
if (token === '.' && node[0]) {
|
||||||
return;
|
return true;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const newNode = createNode(node);
|
const newNode = createNode(node);
|
||||||
node[2].set(token, newNode);
|
node[2].set(token, newNode);
|
||||||
node = newNode;
|
node = newNode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
// When walkHostnameTokens returns true, we should skip the rest
|
||||||
|
if (walkHostnameTokens(suffix, onToken)) {
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we are in smolTree mode, we need to do something at the end of the loop
|
// If we are in smolTree mode, we need to do something at the end of the loop
|
||||||
if (tokens[0] === '.') {
|
if (suffix[0] === '.') {
|
||||||
// Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
|
// Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
|
||||||
|
|
||||||
const parent = node[1]!;
|
const parent = node[1]!;
|
||||||
@ -101,13 +130,8 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|||||||
}
|
}
|
||||||
: (suffix: string): void => {
|
: (suffix: string): void => {
|
||||||
let node: TrieNode = root;
|
let node: TrieNode = root;
|
||||||
let token: string;
|
|
||||||
|
|
||||||
const tokens = suffixToTokens(suffix);
|
|
||||||
|
|
||||||
for (let i = tokens.length - 1; i >= 0; i--) {
|
|
||||||
token = tokens[i];
|
|
||||||
|
|
||||||
|
const onToken = (token: string) => {
|
||||||
if (node[2].has(token)) {
|
if (node[2].has(token)) {
|
||||||
node = node[2].get(token)!;
|
node = node[2].get(token)!;
|
||||||
} else {
|
} else {
|
||||||
@ -115,6 +139,13 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|||||||
node[2].set(token, newNode);
|
node[2].set(token, newNode);
|
||||||
node = newNode;
|
node = newNode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
// When walkHostnameTokens returns true, we should skip the rest
|
||||||
|
if (walkHostnameTokens(suffix, onToken)) {
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!node[0]) { // smol tree don't have size, so else-if here
|
if (!node[0]) { // smol tree don't have size, so else-if here
|
||||||
@ -124,7 +155,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|||||||
};
|
};
|
||||||
|
|
||||||
const walkIntoLeafWithTokens = (
|
const walkIntoLeafWithTokens = (
|
||||||
tokens: string | string[],
|
tokens: string[],
|
||||||
onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
|
onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
|
||||||
) => {
|
) => {
|
||||||
let node: TrieNode = root;
|
let node: TrieNode = root;
|
||||||
@ -135,7 +166,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|||||||
for (let i = tokens.length - 1; i >= 0; i--) {
|
for (let i = tokens.length - 1; i >= 0; i--) {
|
||||||
token = tokens[i];
|
token = tokens[i];
|
||||||
|
|
||||||
if (hostnameMode && token === '') {
|
if (token === '') {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -153,19 +184,50 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|||||||
return { node, parent };
|
return { node, parent };
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const walkIntoLeafWithSuffix = (
|
||||||
|
suffix: string,
|
||||||
|
onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
|
||||||
|
) => {
|
||||||
|
let node: TrieNode = root;
|
||||||
|
let parent: TrieNode = node;
|
||||||
|
|
||||||
|
const onToken = (token: string) => {
|
||||||
|
if (token === '') {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
parent = node;
|
||||||
|
|
||||||
|
if (node[2].has(token)) {
|
||||||
|
node = node[2].get(token)!;
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
onLoop(node, parent, token);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (walkHostnameTokens(suffix, onToken) === null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return { node, parent };
|
||||||
|
};
|
||||||
|
|
||||||
const contains = (suffix: string): boolean => {
|
const contains = (suffix: string): boolean => {
|
||||||
const tokens = suffixToTokens(suffix);
|
return walkIntoLeafWithSuffix(suffix) !== null;
|
||||||
return walkIntoLeafWithTokens(tokens) !== null;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const walk = (
|
const walk = (
|
||||||
onMatches: (suffix: string | string[]) => void,
|
onMatches: (suffix: string[]) => void,
|
||||||
initialNode = root,
|
initialNode = root,
|
||||||
initialSuffix: string | string[] = hostnameMode ? [] : ''
|
initialSuffix: string[] = []
|
||||||
) => {
|
) => {
|
||||||
const nodeStack: TrieNode[] = [initialNode];
|
const nodeStack: TrieNode[] = [initialNode];
|
||||||
// Resolving initial string (begin the start of the stack)
|
// Resolving initial string (begin the start of the stack)
|
||||||
const suffixStack: Array<string | string[]> = [initialSuffix];
|
const suffixStack: string[][] = [initialSuffix];
|
||||||
|
|
||||||
let node: TrieNode = root;
|
let node: TrieNode = root;
|
||||||
|
|
||||||
@ -177,7 +239,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|||||||
// Pushing the child node to the stack for next iteration of DFS
|
// Pushing the child node to the stack for next iteration of DFS
|
||||||
nodeStack.push(childNode);
|
nodeStack.push(childNode);
|
||||||
|
|
||||||
suffixStack.push(isHostnameMode(suffix) ? [k, ...suffix] : k + suffix);
|
suffixStack.push([k, ...suffix]);
|
||||||
});
|
});
|
||||||
|
|
||||||
// If the node is a sentinel, we push the suffix to the results
|
// If the node is a sentinel, we push the suffix to the results
|
||||||
@ -194,7 +256,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|||||||
parent: TrieNode
|
parent: TrieNode
|
||||||
}
|
}
|
||||||
|
|
||||||
const getSingleChildLeaf = (tokens: string | string[]): FindSingleChildLeafResult | null => {
|
const getSingleChildLeaf = (tokens: string[]): FindSingleChildLeafResult | null => {
|
||||||
let toPrune: TrieNode | null = null;
|
let toPrune: TrieNode | null = null;
|
||||||
let tokenToPrune: string | null = null;
|
let tokenToPrune: string | null = null;
|
||||||
|
|
||||||
@ -203,7 +265,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|||||||
|
|
||||||
// Even if the node size is 1, but the single child is ".", we should retain the branch
|
// Even if the node size is 1, but the single child is ".", we should retain the branch
|
||||||
// Since the "." could be special if it is the leaf-est node
|
// Since the "." could be special if it is the leaf-est node
|
||||||
const onlyChild = node[2].size < 2 && (!hostnameMode || !node[2].has('.'));
|
const onlyChild = node[2].size < 2 && !node[2].has('.');
|
||||||
|
|
||||||
if (toPrune != null) { // the top-est branch that could potentially being pruned
|
if (toPrune != null) { // the top-est branch that could potentially being pruned
|
||||||
if (!onlyChild) {
|
if (!onlyChild) {
|
||||||
@ -234,35 +296,27 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|||||||
throw new Error('A Trie with smolTree enabled cannot perform find!');
|
throw new Error('A Trie with smolTree enabled cannot perform find!');
|
||||||
}
|
}
|
||||||
|
|
||||||
const inputTokens = suffixToTokens(inputSuffix);
|
const inputTokens = hostnameToTokens(inputSuffix);
|
||||||
const res = walkIntoLeafWithTokens(inputTokens);
|
const res = walkIntoLeafWithTokens(inputTokens);
|
||||||
if (res === null) return [];
|
if (res === null) return [];
|
||||||
|
|
||||||
const matches: Array<string | string[]> = [];
|
const matches: string[][] = [];
|
||||||
|
|
||||||
const onMatches = includeEqualWithSuffix
|
const onMatches = includeEqualWithSuffix
|
||||||
? (suffix: string | string[]) => matches.push(suffix)
|
? (suffix: string[]) => matches.push(suffix)
|
||||||
: (
|
: (suffix: string[]) => {
|
||||||
hostnameMode
|
|
||||||
? (suffix: string[]) => {
|
|
||||||
if (suffix.some((t, i) => t !== inputTokens[i])) {
|
if (suffix.some((t, i) => t !== inputTokens[i])) {
|
||||||
matches.push(suffix);
|
matches.push(suffix);
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
: (suffix: string) => {
|
|
||||||
if (suffix !== inputTokens) {
|
|
||||||
matches.push(suffix);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
walk(
|
walk(
|
||||||
onMatches as any,
|
onMatches,
|
||||||
res.node, // Performing DFS from prefix
|
res.node, // Performing DFS from prefix
|
||||||
inputTokens
|
inputTokens
|
||||||
);
|
);
|
||||||
|
|
||||||
return hostnameMode ? matches.map((m) => fastStringArrayJoin(m as string[], '')) : matches as string[];
|
return matches.map((m) => fastStringArrayJoin(m, ''));
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -273,17 +327,15 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|||||||
throw new Error('A Trie with smolTree enabled cannot perform substractSetInPlaceFromFound!');
|
throw new Error('A Trie with smolTree enabled cannot perform substractSetInPlaceFromFound!');
|
||||||
}
|
}
|
||||||
|
|
||||||
const inputTokens = suffixToTokens(inputSuffix);
|
const inputTokens = hostnameToTokens(inputSuffix);
|
||||||
|
|
||||||
const res = walkIntoLeafWithTokens(inputTokens);
|
const res = walkIntoLeafWithTokens(inputTokens);
|
||||||
if (res === null) return;
|
if (res === null) return;
|
||||||
|
|
||||||
const onMatches = hostnameMode
|
const onMatches = (suffix: string[]) => set.delete(fastStringArrayJoin(suffix, ''));
|
||||||
? (suffix: string[]) => set.delete(fastStringArrayJoin(suffix, ''))
|
|
||||||
: (suffix: string) => set.delete(suffix);
|
|
||||||
|
|
||||||
walk(
|
walk(
|
||||||
onMatches as any,
|
onMatches,
|
||||||
res.node, // Performing DFS from prefix
|
res.node, // Performing DFS from prefix
|
||||||
inputTokens
|
inputTokens
|
||||||
);
|
);
|
||||||
@ -293,7 +345,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|||||||
* Method used to delete a prefix from the trie.
|
* Method used to delete a prefix from the trie.
|
||||||
*/
|
*/
|
||||||
const remove = (suffix: string): boolean => {
|
const remove = (suffix: string): boolean => {
|
||||||
const res = getSingleChildLeaf(suffixToTokens(suffix));
|
const res = getSingleChildLeaf(hostnameToTokens(suffix));
|
||||||
if (res === null) return false;
|
if (res === null) return false;
|
||||||
|
|
||||||
if (!res.node[0]) return false;
|
if (!res.node[0]) return false;
|
||||||
@ -314,8 +366,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|||||||
* Method used to assert whether the given prefix exists in the Trie.
|
* Method used to assert whether the given prefix exists in the Trie.
|
||||||
*/
|
*/
|
||||||
const has = (suffix: string): boolean => {
|
const has = (suffix: string): boolean => {
|
||||||
const tokens = suffixToTokens(suffix);
|
const res = walkIntoLeafWithSuffix(suffix);
|
||||||
const res = walkIntoLeafWithTokens(tokens);
|
|
||||||
|
|
||||||
return res
|
return res
|
||||||
? res.node[0]
|
? res.node[0]
|
||||||
@ -326,20 +377,18 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|||||||
const results: string[] = [];
|
const results: string[] = [];
|
||||||
|
|
||||||
walk(suffix => {
|
walk(suffix => {
|
||||||
results.push(
|
results.push(fastStringArrayJoin(suffix, ''));
|
||||||
isHostnameMode(suffix) ? fastStringArrayJoin(suffix, '') : suffix
|
|
||||||
);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
return results;
|
return results;
|
||||||
};
|
};
|
||||||
|
|
||||||
const whitelist = (suffix: string) => {
|
const whitelist = (suffix: string) => {
|
||||||
if (!hostnameMode && !smolTree) {
|
if (!smolTree) {
|
||||||
throw new Error('whitelist method is only available in hostname mode or smolTree mode.');
|
throw new Error('whitelist method is only available in smolTree mode.');
|
||||||
}
|
}
|
||||||
|
|
||||||
const tokens = suffixToTokens(suffix);
|
const tokens = hostnameToTokens(suffix);
|
||||||
const res = getSingleChildLeaf(tokens);
|
const res = getSingleChildLeaf(tokens);
|
||||||
|
|
||||||
if (res === null) return;
|
if (res === null) return;
|
||||||
@ -406,7 +455,6 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|||||||
JSON.stringify(deepTrieNodeToJSON(root), null, 2).split('\n').map((line) => ' '.repeat(depth) + line),
|
JSON.stringify(deepTrieNodeToJSON(root), null, 2).split('\n').map((line) => ' '.repeat(depth) + line),
|
||||||
'\n'
|
'\n'
|
||||||
),
|
),
|
||||||
hostnameMode,
|
|
||||||
smolTree
|
smolTree
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|||||||
@ -14,7 +14,7 @@ export const parseDomesticList = async () => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const trie = createTrie(set, true);
|
const trie = createTrie(set);
|
||||||
|
|
||||||
const top5000 = new Set<string>();
|
const top5000 = new Set<string>();
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user