mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 17:20:35 +08:00
Simplify Trie
This commit is contained in:
parent
e6f7a98ee9
commit
b742701f59
@ -12,7 +12,7 @@ import { readFileByLine } from '../fetch-text-by-line';
|
|||||||
import { asyncWriteToStream } from '../async-write-to-stream';
|
import { asyncWriteToStream } from '../async-write-to-stream';
|
||||||
|
|
||||||
export abstract class RuleOutput<TPreprocessed = unknown> {
|
export abstract class RuleOutput<TPreprocessed = unknown> {
|
||||||
protected domainTrie = createTrie<string>(null, true);
|
protected domainTrie = createTrie(null, true);
|
||||||
protected domainKeywords = new Set<string>();
|
protected domainKeywords = new Set<string>();
|
||||||
protected domainWildcard = new Set<string>();
|
protected domainWildcard = new Set<string>();
|
||||||
protected userAgent = new Set<string>();
|
protected userAgent = new Set<string>();
|
||||||
@ -97,7 +97,7 @@ export abstract class RuleOutput<TPreprocessed = unknown> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
addDomain(domain: string) {
|
addDomain(domain: string) {
|
||||||
this.domainTrie.add(domain, domain);
|
this.domainTrie.add(domain);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -109,7 +109,8 @@ export abstract class RuleOutput<TPreprocessed = unknown> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
addDomainSuffix(domain: string) {
|
addDomainSuffix(domain: string) {
|
||||||
return this.addDomain(domain[0] === '.' ? domain : '.' + domain);
|
this.domainTrie.add(domain, true);
|
||||||
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
bulkAddDomainSuffix(domains: string[]) {
|
bulkAddDomainSuffix(domains: string[]) {
|
||||||
|
|||||||
@ -1,37 +1,37 @@
|
|||||||
import { createTrie, hostnameToTokens } from './trie';
|
import { createTrie } from './trie';
|
||||||
import { describe, it } from 'mocha';
|
import { describe, it } from 'mocha';
|
||||||
import { expect } from 'chai';
|
import { expect } from 'chai';
|
||||||
|
|
||||||
describe('hostname to tokens', () => {
|
// describe('hostname to tokens', () => {
|
||||||
it('should split hostname into tokens.', () => {
|
// it('should split hostname into tokens.', () => {
|
||||||
expect(hostnameToTokens('.blog.skk.moe')).to.deep.equal([
|
// expect(hostnameToTokens('.blog.skk.moe')).to.deep.equal([
|
||||||
'.',
|
// '.',
|
||||||
'blog',
|
// 'blog',
|
||||||
'.',
|
// '.',
|
||||||
'skk',
|
// 'skk',
|
||||||
'.',
|
// '.',
|
||||||
'moe'
|
// 'moe'
|
||||||
]);
|
// ]);
|
||||||
|
|
||||||
expect(hostnameToTokens('blog.skk.moe')).to.deep.equal([
|
// expect(hostnameToTokens('blog.skk.moe')).to.deep.equal([
|
||||||
'blog',
|
// 'blog',
|
||||||
'.',
|
// '.',
|
||||||
'skk',
|
// 'skk',
|
||||||
'.',
|
// '.',
|
||||||
'moe'
|
// 'moe'
|
||||||
]);
|
// ]);
|
||||||
|
|
||||||
expect(hostnameToTokens('skk.moe')).to.deep.equal([
|
// expect(hostnameToTokens('skk.moe')).to.deep.equal([
|
||||||
'skk',
|
// 'skk',
|
||||||
'.',
|
// '.',
|
||||||
'moe'
|
// 'moe'
|
||||||
]);
|
// ]);
|
||||||
|
|
||||||
expect(hostnameToTokens('moe')).to.deep.equal([
|
// expect(hostnameToTokens('moe')).to.deep.equal([
|
||||||
'moe'
|
// 'moe'
|
||||||
]);
|
// ]);
|
||||||
});
|
// });
|
||||||
});
|
// });
|
||||||
|
|
||||||
describe('Trie', () => {
|
describe('Trie', () => {
|
||||||
it('should be possible to add domains to a Trie.', () => {
|
it('should be possible to add domains to a Trie.', () => {
|
||||||
@ -43,12 +43,12 @@ describe('Trie', () => {
|
|||||||
|
|
||||||
expect(trie.size).to.equal(3);
|
expect(trie.size).to.equal(3);
|
||||||
|
|
||||||
expect(trie.has('a.skk.moe')).to.equal(true);
|
expect(trie.has('a.skk.moe'), 'a.skk.moe').to.equal(true);
|
||||||
expect(trie.has('skk.moe')).to.equal(true);
|
expect(trie.has('skk.moe'), 'skk.moe').to.equal(true);
|
||||||
expect(trie.has('anotherskk.moe')).to.equal(true);
|
expect(trie.has('anotherskk.moe'), 'anotherskk.moe').to.equal(true);
|
||||||
expect(trie.has('example.com')).to.equal(false);
|
expect(trie.has('example.com'), 'example.com').to.equal(false);
|
||||||
expect(trie.has('skk.mo')).to.equal(false);
|
expect(trie.has('skk.mo'), 'skk.mo').to.equal(false);
|
||||||
expect(trie.has('another.skk.moe')).to.equal(false);
|
expect(trie.has('another.skk.moe'), 'another.skk.moe').to.equal(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('adding the same item several times should not increase size.', () => {
|
it('adding the same item several times should not increase size.', () => {
|
||||||
@ -78,6 +78,7 @@ describe('Trie', () => {
|
|||||||
const trie = createTrie(null, false);
|
const trie = createTrie(null, false);
|
||||||
|
|
||||||
trie.add('skk.moe');
|
trie.add('skk.moe');
|
||||||
|
trie.add('blog.skk.moe');
|
||||||
trie.add('example.com');
|
trie.add('example.com');
|
||||||
trie.add('moe.sb');
|
trie.add('moe.sb');
|
||||||
|
|
||||||
@ -89,12 +90,12 @@ describe('Trie', () => {
|
|||||||
expect(trie.has('skk.moe')).to.equal(false);
|
expect(trie.has('skk.moe')).to.equal(false);
|
||||||
expect(trie.has('moe.sb')).to.equal(true);
|
expect(trie.has('moe.sb')).to.equal(true);
|
||||||
|
|
||||||
expect(trie.size).to.equal(2);
|
expect(trie.size).to.equal(3);
|
||||||
|
|
||||||
expect(trie.delete('example.com')).to.equal(true);
|
expect(trie.delete('example.com')).to.equal(true);
|
||||||
expect(trie.size).to.equal(1);
|
expect(trie.size).to.equal(2);
|
||||||
expect(trie.delete('moe.sb')).to.equal(true);
|
expect(trie.delete('moe.sb')).to.equal(true);
|
||||||
expect(trie.size).to.equal(0);
|
expect(trie.size).to.equal(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should be possible to check the existence of a sequence in the Trie.', () => {
|
it('should be possible to check the existence of a sequence in the Trie.', () => {
|
||||||
@ -116,15 +117,15 @@ describe('Trie', () => {
|
|||||||
trie.add('cdn.example.com');
|
trie.add('cdn.example.com');
|
||||||
trie.add('example.org');
|
trie.add('example.org');
|
||||||
|
|
||||||
expect(trie.find('example.com')).to.deep.equal(['example.com', 'cdn.example.com', 'blog.example.com']);
|
expect(trie.find('example.com'), 'example.com').to.deep.equal(['example.com', 'cdn.example.com', 'blog.example.com']);
|
||||||
expect(trie.find('com')).to.deep.equal(['example.com', 'cdn.example.com', 'blog.example.com']);
|
expect(trie.find('com'), 'com').to.deep.equal(['example.com', 'cdn.example.com', 'blog.example.com']);
|
||||||
expect(trie.find('.example.com')).to.deep.equal(['cdn.example.com', 'blog.example.com']);
|
expect(trie.find('.example.com'), '.example.com').to.deep.equal(['cdn.example.com', 'blog.example.com']);
|
||||||
expect(trie.find('org')).to.deep.equal(['example.org']);
|
expect(trie.find('org'), 'prg').to.deep.equal(['example.org']);
|
||||||
expect(trie.find('example.net')).to.deep.equal([]);
|
expect(trie.find('example.net'), 'example.net').to.deep.equal([]);
|
||||||
expect(trie.find('')).to.deep.equal(['example.org', 'example.com', 'cdn.example.com', 'blog.example.com']);
|
expect(trie.find(''), '').to.deep.equal(['example.org', 'example.com', 'cdn.example.com', 'blog.example.com']);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should be possible to retrieve items matching the given prefix even with a smol trie.', () => {
|
it('should be possible to retrieve items matching the given prefix even with a smol trie', () => {
|
||||||
const trie = createTrie(null, true);
|
const trie = createTrie(null, true);
|
||||||
|
|
||||||
trie.add('.example.com');
|
trie.add('.example.com');
|
||||||
@ -206,7 +207,7 @@ describe('smol tree', () => {
|
|||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should create simple tree - 2', () => {
|
it('should create simple tree - 3', () => {
|
||||||
const trie = createTrie([
|
const trie = createTrie([
|
||||||
'.blog.sub.example.com', 'cdn.sub.example.com', '.sub.example.com'
|
'.blog.sub.example.com', 'cdn.sub.example.com', '.sub.example.com'
|
||||||
], true);
|
], true);
|
||||||
@ -258,10 +259,16 @@ describe('smol tree', () => {
|
|||||||
'skk.moe',
|
'skk.moe',
|
||||||
'anotherskk.moe',
|
'anotherskk.moe',
|
||||||
'blog.anotherskk.moe',
|
'blog.anotherskk.moe',
|
||||||
'blog.skk.moe'
|
'blog.skk.moe',
|
||||||
|
'.cdn.local',
|
||||||
|
'blog.img.skk.local',
|
||||||
|
'img.skk.local'
|
||||||
], true);
|
], true);
|
||||||
|
|
||||||
expect(trie.dump()).to.deep.equal([
|
expect(trie.dump(), '1').to.deep.equal([
|
||||||
|
'img.skk.local',
|
||||||
|
'blog.img.skk.local',
|
||||||
|
'.cdn.local',
|
||||||
'anotherskk.moe',
|
'anotherskk.moe',
|
||||||
'blog.anotherskk.moe',
|
'blog.anotherskk.moe',
|
||||||
'skk.moe',
|
'skk.moe',
|
||||||
@ -270,20 +277,44 @@ describe('smol tree', () => {
|
|||||||
|
|
||||||
trie.whitelist('.skk.moe');
|
trie.whitelist('.skk.moe');
|
||||||
|
|
||||||
expect(trie.dump()).to.deep.equal([
|
expect(trie.dump(), '2').to.deep.equal([
|
||||||
|
'img.skk.local',
|
||||||
|
'blog.img.skk.local',
|
||||||
|
'.cdn.local',
|
||||||
'anotherskk.moe',
|
'anotherskk.moe',
|
||||||
'blog.anotherskk.moe'
|
'blog.anotherskk.moe'
|
||||||
]);
|
]);
|
||||||
|
|
||||||
trie.whitelist('anotherskk.moe');
|
trie.whitelist('anotherskk.moe');
|
||||||
expect(trie.dump()).to.deep.equal([
|
expect(trie.dump(), '3').to.deep.equal([
|
||||||
|
'img.skk.local',
|
||||||
|
'blog.img.skk.local',
|
||||||
|
'.cdn.local',
|
||||||
'blog.anotherskk.moe'
|
'blog.anotherskk.moe'
|
||||||
]);
|
]);
|
||||||
|
|
||||||
trie.add('anotherskk.moe');
|
trie.add('anotherskk.moe');
|
||||||
trie.whitelist('.anotherskk.moe');
|
trie.whitelist('.anotherskk.moe');
|
||||||
|
|
||||||
expect(trie.dump()).to.deep.equal([]);
|
expect(trie.dump(), '4').to.deep.equal([
|
||||||
|
'img.skk.local',
|
||||||
|
'blog.img.skk.local',
|
||||||
|
'.cdn.local'
|
||||||
|
]);
|
||||||
|
|
||||||
|
trie.whitelist('img.skk.local');
|
||||||
|
expect(trie.dump(), '5').to.deep.equal([
|
||||||
|
'blog.img.skk.local',
|
||||||
|
'.cdn.local'
|
||||||
|
]);
|
||||||
|
|
||||||
|
trie.whitelist('cdn.local');
|
||||||
|
expect(trie.dump(), '6').to.deep.equal([
|
||||||
|
'blog.img.skk.local'
|
||||||
|
]);
|
||||||
|
|
||||||
|
trie.whitelist('.skk.local');
|
||||||
|
expect(trie.dump(), '7').to.deep.equal([]);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should whitelist trie correctly', () => {
|
it('should whitelist trie correctly', () => {
|
||||||
|
|||||||
@ -7,7 +7,8 @@ import util from 'node:util';
|
|||||||
import { noop } from 'foxact/noop';
|
import { noop } from 'foxact/noop';
|
||||||
|
|
||||||
type TrieNode<Meta = any> = [
|
type TrieNode<Meta = any> = [
|
||||||
boolean, /** sentinel */
|
boolean, /** end */
|
||||||
|
boolean, /** includeAllSubdoain (.example.org, ||example.com) */
|
||||||
TrieNode | null, /** parent */
|
TrieNode | null, /** parent */
|
||||||
Map<string, TrieNode>, /** children */
|
Map<string, TrieNode>, /** children */
|
||||||
Meta /** meta */
|
Meta /** meta */
|
||||||
@ -19,59 +20,56 @@ function deepTrieNodeToJSON(node: TrieNode,
|
|||||||
if (node[0]) {
|
if (node[0]) {
|
||||||
obj['[start]'] = node[0];
|
obj['[start]'] = node[0];
|
||||||
}
|
}
|
||||||
if (node[3] != null) {
|
obj['[subdomain]'] = node[1];
|
||||||
|
if (node[4] != null) {
|
||||||
if (unpackMeta) {
|
if (unpackMeta) {
|
||||||
obj['[meta]'] = unpackMeta(node[3]);
|
obj['[meta]'] = unpackMeta(node[3]);
|
||||||
} else {
|
} else {
|
||||||
obj['[meta]'] = node[3];
|
obj['[meta]'] = node[3];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
node[2].forEach((value, key) => {
|
node[3].forEach((value, key) => {
|
||||||
obj[key] = deepTrieNodeToJSON(value, unpackMeta);
|
obj[key] = deepTrieNodeToJSON(value, unpackMeta);
|
||||||
});
|
});
|
||||||
return obj;
|
return obj;
|
||||||
}
|
}
|
||||||
|
|
||||||
const createNode = <Meta = any>(parent: TrieNode | null = null): TrieNode => [false, parent, new Map<string, TrieNode>(), null] as TrieNode<Meta>;
|
const createNode = <Meta = any>(allSubdomain = false, parent: TrieNode | null = null): TrieNode => [false, allSubdomain, parent, new Map<string, TrieNode>(), null] as TrieNode<Meta>;
|
||||||
|
|
||||||
export function hostnameToTokens(hostname: string): string[] {
|
export function hostnameToTokens(hostname: string): string[] {
|
||||||
const tokens = hostname.split('.');
|
const tokens = hostname.split('.');
|
||||||
const results: string[] = [];
|
const results: string[] = [];
|
||||||
let token = '';
|
let token = '';
|
||||||
for (let i = 0, l = tokens.length; i < l; i++) {
|
|
||||||
if (i > 0) {
|
|
||||||
results.push('.');
|
|
||||||
}
|
|
||||||
|
|
||||||
|
for (let i = 0, l = tokens.length; i < l; i++) {
|
||||||
token = tokens[i];
|
token = tokens[i];
|
||||||
if (token.length > 0) {
|
if (token.length > 0) {
|
||||||
results.push(token);
|
results.push(token);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
function walkHostnameTokens(hostname: string, onToken: (token: string) => boolean | null): boolean | null {
|
function walkHostnameTokens(hostname: string, onToken: (token: string) => boolean | null): boolean | null {
|
||||||
const tokens = hostname.split('.');
|
const tokens = hostname.split('.');
|
||||||
let token = '';
|
|
||||||
|
|
||||||
const l = tokens.length - 1;
|
const l = tokens.length - 1;
|
||||||
|
|
||||||
|
// we are at the first of hostname, no splitor there
|
||||||
|
let token = '';
|
||||||
|
|
||||||
for (let i = l; i >= 0; i--) {
|
for (let i = l; i >= 0; i--) {
|
||||||
if (
|
token = tokens[i];
|
||||||
i < l // when i === l, we are at the first of hostname, no splitor there
|
if (token.length > 0) {
|
||||||
// when onToken returns true, we should skip the rest of the loop
|
const t = onToken(token);
|
||||||
&& onToken('.')
|
if (t === null) {
|
||||||
) {
|
return null;
|
||||||
|
}
|
||||||
|
// if the callback returns true, we should skip the rest
|
||||||
|
if (t) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
token = tokens[i];
|
|
||||||
if (
|
|
||||||
token.length > 0
|
|
||||||
// when onToken returns true, we should skip the rest of the loop
|
|
||||||
&& onToken(token)
|
|
||||||
) {
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -104,7 +102,7 @@ abstract class Triebase<Meta = any> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract add(suffix: string, meta?: Meta): void;
|
public abstract add(suffix: string, includeAllSubdoain?: boolean, meta?: Meta): void;
|
||||||
|
|
||||||
protected walkIntoLeafWithTokens(
|
protected walkIntoLeafWithTokens(
|
||||||
tokens: string[],
|
tokens: string[],
|
||||||
@ -124,8 +122,8 @@ abstract class Triebase<Meta = any> {
|
|||||||
|
|
||||||
parent = node;
|
parent = node;
|
||||||
|
|
||||||
if (node[2].has(token)) {
|
if (node[3].has(token)) {
|
||||||
node = node[2].get(token)!;
|
node = node[3].get(token)!;
|
||||||
} else {
|
} else {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
@ -144,14 +142,14 @@ abstract class Triebase<Meta = any> {
|
|||||||
let parent: TrieNode = node;
|
let parent: TrieNode = node;
|
||||||
|
|
||||||
const onToken = (token: string) => {
|
const onToken = (token: string) => {
|
||||||
if (token === '') {
|
// if (token === '') {
|
||||||
return true;
|
// return true;
|
||||||
}
|
// }
|
||||||
|
|
||||||
parent = node;
|
parent = node;
|
||||||
|
|
||||||
if (node[2].has(token)) {
|
if (node[3].has(token)) {
|
||||||
node = node[2].get(token)!;
|
node = node[3].get(token)!;
|
||||||
} else {
|
} else {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
@ -168,10 +166,18 @@ abstract class Triebase<Meta = any> {
|
|||||||
return { node, parent };
|
return { node, parent };
|
||||||
};
|
};
|
||||||
|
|
||||||
public contains(suffix: string): boolean { return this.walkIntoLeafWithSuffix(suffix) !== null; };
|
public contains(suffix: string, includeAllSubdoain = suffix[0] === '.'): boolean {
|
||||||
|
if (suffix[0] === '.') {
|
||||||
|
suffix = suffix.slice(1);
|
||||||
|
}
|
||||||
|
const res = this.walkIntoLeafWithSuffix(suffix);
|
||||||
|
if (!res) return false;
|
||||||
|
if (includeAllSubdoain) return res.node[1];
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
private walk(
|
private walk(
|
||||||
onMatches: (suffix: string[], meta: Meta) => void,
|
onMatches: (suffix: string[], subdomain: boolean, meta: Meta) => void,
|
||||||
initialNode = this.$root,
|
initialNode = this.$root,
|
||||||
initialSuffix: string[] = []
|
initialSuffix: string[] = []
|
||||||
) {
|
) {
|
||||||
@ -185,7 +191,7 @@ abstract class Triebase<Meta = any> {
|
|||||||
node = nodeStack.pop()!;
|
node = nodeStack.pop()!;
|
||||||
const suffix = suffixStack.pop()!;
|
const suffix = suffixStack.pop()!;
|
||||||
|
|
||||||
node[2].forEach((childNode, k) => {
|
node[3].forEach((childNode, k) => {
|
||||||
// Pushing the child node to the stack for next iteration of DFS
|
// Pushing the child node to the stack for next iteration of DFS
|
||||||
nodeStack.push(childNode);
|
nodeStack.push(childNode);
|
||||||
|
|
||||||
@ -194,7 +200,7 @@ abstract class Triebase<Meta = any> {
|
|||||||
|
|
||||||
// If the node is a sentinel, we push the suffix to the results
|
// If the node is a sentinel, we push the suffix to the results
|
||||||
if (node[0]) {
|
if (node[0]) {
|
||||||
onMatches(suffix, node[3]);
|
onMatches(suffix, node[1], node[4]);
|
||||||
}
|
}
|
||||||
} while (nodeStack.length);
|
} while (nodeStack.length);
|
||||||
};
|
};
|
||||||
@ -208,7 +214,7 @@ abstract class Triebase<Meta = any> {
|
|||||||
|
|
||||||
// Even if the node size is 1, but the single child is ".", we should retain the branch
|
// Even if the node size is 1, but the single child is ".", we should retain the branch
|
||||||
// Since the "." could be special if it is the leaf-est node
|
// Since the "." could be special if it is the leaf-est node
|
||||||
const onlyChild = node[2].size < 2 && !node[2].has('.');
|
const onlyChild = node[3].size === 0 && !node[2];
|
||||||
|
|
||||||
if (toPrune != null) { // the top-est branch that could potentially being pruned
|
if (toPrune != null) { // the top-est branch that could potentially being pruned
|
||||||
if (!onlyChild) {
|
if (!onlyChild) {
|
||||||
@ -236,26 +242,29 @@ abstract class Triebase<Meta = any> {
|
|||||||
*/
|
*/
|
||||||
public find(
|
public find(
|
||||||
inputSuffix: string,
|
inputSuffix: string,
|
||||||
/** @default true */ includeEqualWithSuffix = true
|
subdomainOnly = inputSuffix[0] === '.'
|
||||||
|
// /** @default true */ includeEqualWithSuffix = true
|
||||||
): string[] {
|
): string[] {
|
||||||
// if (smolTree) {
|
if (inputSuffix[0] === '.') {
|
||||||
// throw new Error('A Trie with smolTree enabled cannot perform find!');
|
inputSuffix = inputSuffix.slice(1);
|
||||||
// }
|
}
|
||||||
|
|
||||||
const inputTokens = hostnameToTokens(inputSuffix);
|
const inputTokens = hostnameToTokens(inputSuffix);
|
||||||
const res = this.walkIntoLeafWithTokens(inputTokens);
|
const res = this.walkIntoLeafWithTokens(inputTokens);
|
||||||
if (res === null) return [];
|
if (res === null) return [];
|
||||||
|
|
||||||
const matches: string[][] = [];
|
const results: string[] = [];
|
||||||
|
|
||||||
const onMatches = includeEqualWithSuffix
|
const onMatches = subdomainOnly
|
||||||
// fast path (default option)
|
? (suffix: string[], subdomain: boolean) => { // fast path (default option)
|
||||||
? (suffix: string[]) => matches.push(suffix)
|
const d = fastStringArrayJoin(suffix, '.');
|
||||||
// slow path
|
if (!subdomain && d === inputSuffix) return;
|
||||||
: (suffix: string[]) => {
|
|
||||||
if (!deepEqualArray(suffix, inputTokens)) {
|
results.push(subdomain ? '.' + d : d);
|
||||||
matches.push(suffix);
|
|
||||||
}
|
}
|
||||||
|
: (suffix: string[], subdomain: boolean) => { // fast path (default option)
|
||||||
|
const d = fastStringArrayJoin(suffix, '.');
|
||||||
|
results.push(subdomain ? '.' + d : d);
|
||||||
};
|
};
|
||||||
|
|
||||||
this.walk(
|
this.walk(
|
||||||
@ -264,7 +273,7 @@ abstract class Triebase<Meta = any> {
|
|||||||
inputTokens
|
inputTokens
|
||||||
);
|
);
|
||||||
|
|
||||||
return matches.map((m) => fastStringArrayJoin(m, ''));
|
return results;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -280,7 +289,7 @@ abstract class Triebase<Meta = any> {
|
|||||||
const { node, toPrune, tokenToPrune } = res;
|
const { node, toPrune, tokenToPrune } = res;
|
||||||
|
|
||||||
if (tokenToPrune && toPrune) {
|
if (tokenToPrune && toPrune) {
|
||||||
toPrune[2].delete(tokenToPrune);
|
toPrune[3].delete(tokenToPrune);
|
||||||
} else {
|
} else {
|
||||||
node[0] = false;
|
node[0] = false;
|
||||||
}
|
}
|
||||||
@ -288,18 +297,23 @@ abstract class Triebase<Meta = any> {
|
|||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
// eslint-disable-next-line @typescript-eslint/unbound-method -- alias class methods
|
// eslint-disable-next-line @typescript-eslint/unbound-method -- safe
|
||||||
public delete = this.remove;
|
public delete = this.remove;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Method used to assert whether the given prefix exists in the Trie.
|
* Method used to assert whether the given prefix exists in the Trie.
|
||||||
*/
|
*/
|
||||||
public has(suffix: string): boolean {
|
public has(suffix: string, includeAllSubdoain = suffix[0] === '.'): boolean {
|
||||||
|
if (suffix[0] === '.') {
|
||||||
|
suffix = suffix.slice(1);
|
||||||
|
}
|
||||||
|
|
||||||
const res = this.walkIntoLeafWithSuffix(suffix);
|
const res = this.walkIntoLeafWithSuffix(suffix);
|
||||||
|
|
||||||
return res
|
if (res === null) return false;
|
||||||
? res.node[0]
|
if (!res.node[0]) return false;
|
||||||
: false;
|
if (includeAllSubdoain) return res.node[1];
|
||||||
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
public dump(onSuffix: (suffix: string) => void): void;
|
public dump(onSuffix: (suffix: string) => void): void;
|
||||||
@ -308,8 +322,14 @@ abstract class Triebase<Meta = any> {
|
|||||||
const results: string[] = [];
|
const results: string[] = [];
|
||||||
|
|
||||||
const handleSuffix = onSuffix
|
const handleSuffix = onSuffix
|
||||||
? (suffix: string[]) => onSuffix(fastStringArrayJoin(suffix, ''))
|
? (suffix: string[], subdomain: boolean) => {
|
||||||
: (suffix: string[]) => results.push(fastStringArrayJoin(suffix, ''));
|
const d = fastStringArrayJoin(suffix, '.');
|
||||||
|
onSuffix(subdomain ? '.' + d : d);
|
||||||
|
}
|
||||||
|
: (suffix: string[], subdomain: boolean) => {
|
||||||
|
const d = fastStringArrayJoin(suffix, '.');
|
||||||
|
results.push(subdomain ? '.' + d : d);
|
||||||
|
};
|
||||||
|
|
||||||
this.walk(handleSuffix);
|
this.walk(handleSuffix);
|
||||||
|
|
||||||
@ -322,8 +342,8 @@ abstract class Triebase<Meta = any> {
|
|||||||
const results: Meta[] = [];
|
const results: Meta[] = [];
|
||||||
|
|
||||||
const handleMeta = onMeta
|
const handleMeta = onMeta
|
||||||
? (_suffix: string[], meta: Meta) => onMeta(meta)
|
? (_suffix: string[], _subdomain: boolean, meta: Meta) => onMeta(meta)
|
||||||
: (_suffix: string[], meta: Meta) => results.push(meta);
|
: (_suffix: string[], _subdomain: boolean, meta: Meta) => results.push(meta);
|
||||||
|
|
||||||
this.walk(handleMeta);
|
this.walk(handleMeta);
|
||||||
|
|
||||||
@ -331,13 +351,19 @@ abstract class Triebase<Meta = any> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
public dumpWithMeta(onSuffix: (suffix: string, meta: Meta | undefined) => void): void;
|
public dumpWithMeta(onSuffix: (suffix: string, meta: Meta | undefined) => void): void;
|
||||||
public dumpWithMeta(): string[];
|
public dumpWithMeta(): Array<[string, Meta | undefined]>;
|
||||||
public dumpWithMeta(onSuffix?: (suffix: string, meta: Meta | undefined) => void): string[] | void {
|
public dumpWithMeta(onSuffix?: (suffix: string, meta: Meta | undefined) => void): Array<[string, Meta | undefined]> | void {
|
||||||
const results: string[] = [];
|
const results: Array<[string, Meta | undefined]> = [];
|
||||||
|
|
||||||
const handleSuffix = onSuffix
|
const handleSuffix = onSuffix
|
||||||
? (suffix: string[], meta: Meta | undefined) => onSuffix(fastStringArrayJoin(suffix, ''), meta)
|
? (suffix: string[], subdomain: boolean, meta: Meta | undefined) => {
|
||||||
: (suffix: string[]) => results.push(fastStringArrayJoin(suffix, ''));
|
const d = fastStringArrayJoin(suffix, '.');
|
||||||
|
return onSuffix(subdomain ? '.' + d : d, meta);
|
||||||
|
}
|
||||||
|
: (suffix: string[], subdomain: boolean, meta: Meta | undefined) => {
|
||||||
|
const d = fastStringArrayJoin(suffix, '.');
|
||||||
|
results.push([subdomain ? '.' + d : d, meta]);
|
||||||
|
};
|
||||||
|
|
||||||
this.walk(handleSuffix);
|
this.walk(handleSuffix);
|
||||||
|
|
||||||
@ -359,21 +385,25 @@ abstract class Triebase<Meta = any> {
|
|||||||
export class HostnameSmolTrie<Meta = any> extends Triebase<Meta> {
|
export class HostnameSmolTrie<Meta = any> extends Triebase<Meta> {
|
||||||
public smolTree = true;
|
public smolTree = true;
|
||||||
|
|
||||||
add(suffix: string, meta?: Meta): void {
|
add(suffix: string, includeAllSubdoain = suffix[0] === '.', meta?: Meta): void {
|
||||||
let node: TrieNode<Meta> = this.$root;
|
let node: TrieNode<Meta> = this.$root;
|
||||||
let curNodeChildren: Map<string, TrieNode<Meta>> = node[2];
|
let curNodeChildren: Map<string, TrieNode<Meta>> = node[3];
|
||||||
|
|
||||||
|
if (suffix[0] === '.') {
|
||||||
|
suffix = suffix.slice(1);
|
||||||
|
}
|
||||||
|
|
||||||
const onToken = (token: string) => {
|
const onToken = (token: string) => {
|
||||||
curNodeChildren = node[2];
|
curNodeChildren = node[3];
|
||||||
if (curNodeChildren.has(token)) {
|
if (curNodeChildren.has(token)) {
|
||||||
node = curNodeChildren.get(token)!;
|
node = curNodeChildren.get(token)!;
|
||||||
|
|
||||||
// During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie, skip adding the rest of the node
|
// During the adding of `[start]blog|.skk.moe` and find out that there is a `[start].skk.moe` in the trie, skip adding the rest of the node
|
||||||
if (node[0] && token === '.') {
|
if (node[1]) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const newNode = createNode(node);
|
const newNode = createNode(false, node);
|
||||||
curNodeChildren.set(token, newNode);
|
curNodeChildren.set(token, newNode);
|
||||||
node = newNode;
|
node = newNode;
|
||||||
}
|
}
|
||||||
@ -387,55 +417,57 @@ export class HostnameSmolTrie<Meta = any> extends Triebase<Meta> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// If we are in smolTree mode, we need to do something at the end of the loop
|
// If we are in smolTree mode, we need to do something at the end of the loop
|
||||||
if (suffix[0] === '.') {
|
if (includeAllSubdoain) {
|
||||||
// Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
|
// Trying to add `[.]sub.example.com` where there is already a `blog.sub.example.com` in the trie
|
||||||
|
|
||||||
// Make sure parent `[start]sub.example.com` (without dot) is removed (SETINEL to false)
|
// Make sure parent `[start]sub.example.com` (without dot) is removed (SETINEL to false)
|
||||||
(/** parent */ node[1]!)[0] = false;
|
// (/** parent */ node[2]!)[0] = false;
|
||||||
|
|
||||||
// Removing the rest of the parent's child nodes
|
// Removing the rest of the parent's child nodes
|
||||||
node[2].clear();
|
node[3].clear();
|
||||||
// The SENTINEL of this node will be set to true at the end of the function, so we don't need to set it here
|
// The SENTINEL of this node will be set to true at the end of the function, so we don't need to set it here
|
||||||
|
|
||||||
// we can use else-if here, because the children is now empty, we don't need to check the leading "."
|
// we can use else-if here, because the children is now empty, we don't need to check the leading "."
|
||||||
} else if (node[2].get('.')?.[0] === true) {
|
} else if (node[1]) {
|
||||||
// Trying to add `example.com` when there is already a `.example.com` in the trie
|
// Trying to add `example.com` when there is already a `.example.com` in the trie
|
||||||
// No need to increment size and set SENTINEL to true (skip this "new" item)
|
// No need to increment size and set SENTINEL to true (skip this "new" item)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
node[0] = true;
|
node[0] = true;
|
||||||
node[3] = meta!;
|
node[1] = includeAllSubdoain;
|
||||||
|
node[4] = meta!;
|
||||||
|
}
|
||||||
|
|
||||||
|
public whitelist(suffix: string, includeAllSubdoain = suffix[0] === '.') {
|
||||||
|
if (suffix[0] === '.') {
|
||||||
|
suffix = suffix.slice(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
public whitelist(suffix: string) {
|
|
||||||
const tokens = hostnameToTokens(suffix);
|
const tokens = hostnameToTokens(suffix);
|
||||||
const res = this.getSingleChildLeaf(tokens);
|
const res = this.getSingleChildLeaf(tokens);
|
||||||
|
|
||||||
if (res === null) return;
|
if (res === null) return;
|
||||||
|
|
||||||
const { node, toPrune, tokenToPrune, parent } = res;
|
const { node, toPrune, tokenToPrune } = res;
|
||||||
|
|
||||||
// Trying to whitelist `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
|
// Trying to whitelist `[start].sub.example.com` where there might already be a `[start]blog.sub.example.com` in the trie
|
||||||
if (tokens[0] === '.') {
|
if (includeAllSubdoain) {
|
||||||
// If there is a `[start]sub.example.com` here, remove it
|
// If there is a `[start]sub.example.com` here, remove it
|
||||||
parent[0] = false;
|
node[0] = false;
|
||||||
|
node[1] = false;
|
||||||
// Removing all the child nodes by empty the children
|
// Removing all the child nodes by empty the children
|
||||||
// This removes the only child ".", which removes "blog.sub.example.com"
|
node[3].clear();
|
||||||
parent[2].clear();
|
|
||||||
} else {
|
} else {
|
||||||
// Trying to whitelist `example.com` when there is already a `.example.com` in the trie
|
// Trying to whitelist `example.com` when there is already a `.example.com` in the trie
|
||||||
const dotNode = node[2].get('.');
|
node[1] = false;
|
||||||
if (dotNode) {
|
|
||||||
dotNode[0] = false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// return early if not found
|
// return early if not found
|
||||||
if (!node[0]) return;
|
if (!node[0]) return;
|
||||||
|
|
||||||
if (tokenToPrune && toPrune) {
|
if (tokenToPrune && toPrune) {
|
||||||
toPrune[2].delete(tokenToPrune);
|
toPrune[3].delete(tokenToPrune);
|
||||||
} else {
|
} else {
|
||||||
node[0] = false;
|
node[0] = false;
|
||||||
}
|
}
|
||||||
@ -447,31 +479,39 @@ export class HostnameTrie<Meta = any> extends Triebase<Meta> {
|
|||||||
return this.$size;
|
return this.$size;
|
||||||
}
|
}
|
||||||
|
|
||||||
add(suffix: string, meta?: Meta): void {
|
add(suffix: string, includeAllSubdoain = suffix[0] === '.', meta?: Meta): void {
|
||||||
let node: TrieNode<Meta> = this.$root;
|
let node: TrieNode<Meta> = this.$root;
|
||||||
|
|
||||||
const onToken = (token: string) => {
|
const onToken = (token: string) => {
|
||||||
if (node[2].has(token)) {
|
if (node[3].has(token)) {
|
||||||
node = node[2].get(token)!;
|
node = node[3].get(token)!;
|
||||||
} else {
|
} else {
|
||||||
const newNode = createNode(node);
|
const newNode = createNode(false, node);
|
||||||
node[2].set(token, newNode);
|
node[3].set(token, newNode);
|
||||||
node = newNode;
|
node = newNode;
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (suffix[0] === '.') {
|
||||||
|
suffix = suffix.slice(1);
|
||||||
|
}
|
||||||
|
|
||||||
// When walkHostnameTokens returns true, we should skip the rest
|
// When walkHostnameTokens returns true, we should skip the rest
|
||||||
if (walkHostnameTokens(suffix, onToken)) {
|
if (walkHostnameTokens(suffix, onToken)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!node[0]) {
|
// if same entry has been added before, skip
|
||||||
|
if (node[0]) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
this.$size++;
|
this.$size++;
|
||||||
node[0] = true;
|
node[0] = true;
|
||||||
node[3] = meta!;
|
node[1] = includeAllSubdoain;
|
||||||
}
|
node[4] = meta!;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -486,11 +526,11 @@ export function createTrie<_Meta = any>(from?: string[] | Set<string> | null, sm
|
|||||||
|
|
||||||
export type Trie = ReturnType<typeof createTrie>;
|
export type Trie = ReturnType<typeof createTrie>;
|
||||||
|
|
||||||
function deepEqualArray(a: string[], b: string[]) {
|
// function deepEqualArray(a: string[], b: string[]) {
|
||||||
let len = a.length;
|
// let len = a.length;
|
||||||
if (len !== b.length) return false;
|
// if (len !== b.length) return false;
|
||||||
while (len--) {
|
// while (len--) {
|
||||||
if (a[len] !== b[len]) return false;
|
// if (a[len] !== b[len]) return false;
|
||||||
}
|
// }
|
||||||
return true;
|
// return true;
|
||||||
};
|
// };
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user