mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 17:20:35 +08:00
Rrefactor: rewrite trie in class
This commit is contained in:
parent
d1041f0e59
commit
a7e7c19a51
@ -10,8 +10,7 @@ import { processLine } from './lib/process-line';
|
|||||||
import { DomainsetOutput } from './lib/create-file';
|
import { DomainsetOutput } from './lib/create-file';
|
||||||
|
|
||||||
const getS3OSSDomainsPromise = (async (): Promise<string[]> => {
|
const getS3OSSDomainsPromise = (async (): Promise<string[]> => {
|
||||||
const trie = createTrie(
|
const trie = createTrie((await getPublicSuffixListTextPromise()).reduce<string[]>(
|
||||||
(await getPublicSuffixListTextPromise()).reduce<string[]>(
|
|
||||||
(acc, cur) => {
|
(acc, cur) => {
|
||||||
const tmp = processLine(cur);
|
const tmp = processLine(cur);
|
||||||
if (tmp) {
|
if (tmp) {
|
||||||
@ -20,9 +19,7 @@ const getS3OSSDomainsPromise = (async (): Promise<string[]> => {
|
|||||||
return acc;
|
return acc;
|
||||||
},
|
},
|
||||||
[]
|
[]
|
||||||
),
|
));
|
||||||
true
|
|
||||||
);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract OSS domain from publicsuffix list
|
* Extract OSS domain from publicsuffix list
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
import { task } from './trace';
|
import { task } from './trace';
|
||||||
import { fetchRemoteTextByLine } from './lib/fetch-text-by-line';
|
import { fetchRemoteTextByLine } from './lib/fetch-text-by-line';
|
||||||
import { createTrie } from './lib/trie';
|
import { HostnameSmolTrie } from './lib/trie';
|
||||||
import { SHARED_DESCRIPTION } from './lib/constants';
|
import { SHARED_DESCRIPTION } from './lib/constants';
|
||||||
import { createMemoizedPromise } from './lib/memo-promise';
|
import { createMemoizedPromise } from './lib/memo-promise';
|
||||||
import { extractDomainsFromFelixDnsmasq } from './lib/parse-dnsmasq';
|
import { extractDomainsFromFelixDnsmasq } from './lib/parse-dnsmasq';
|
||||||
@ -27,7 +27,7 @@ const BLACKLIST = [
|
|||||||
|
|
||||||
export const getMicrosoftCdnRulesetPromise = createMemoizedPromise<[domains: string[], domainSuffixes: string[]]>(async () => {
|
export const getMicrosoftCdnRulesetPromise = createMemoizedPromise<[domains: string[], domainSuffixes: string[]]>(async () => {
|
||||||
// First trie is to find the microsoft domains that matches probe domains
|
// First trie is to find the microsoft domains that matches probe domains
|
||||||
const trie = createTrie(null, true);
|
const trie = new HostnameSmolTrie();
|
||||||
for await (const line of await fetchRemoteTextByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) {
|
for await (const line of await fetchRemoteTextByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) {
|
||||||
const domain = extractDomainsFromFelixDnsmasq(line);
|
const domain = extractDomainsFromFelixDnsmasq(line);
|
||||||
if (domain) {
|
if (domain) {
|
||||||
@ -37,8 +37,8 @@ export const getMicrosoftCdnRulesetPromise = createMemoizedPromise<[domains: str
|
|||||||
const foundMicrosoftCdnDomains = PROBE_DOMAINS.flatMap(domain => trie.find(domain));
|
const foundMicrosoftCdnDomains = PROBE_DOMAINS.flatMap(domain => trie.find(domain));
|
||||||
|
|
||||||
// Second trie is to remove blacklisted domains
|
// Second trie is to remove blacklisted domains
|
||||||
const trie2 = createTrie(foundMicrosoftCdnDomains, true);
|
const trie2 = new HostnameSmolTrie(foundMicrosoftCdnDomains);
|
||||||
BLACKLIST.forEach(trie2.whitelist);
|
BLACKLIST.forEach(black => trie2.whitelist(black));
|
||||||
|
|
||||||
const domains: string[] = DOMAINS;
|
const domains: string[] = DOMAINS;
|
||||||
const domainSuffixes: string[] = DOMAIN_SUFFIXES;
|
const domainSuffixes: string[] = DOMAIN_SUFFIXES;
|
||||||
|
|||||||
@ -191,7 +191,7 @@ async function processPhihsingDomains(domainArr: string[]) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function calcDomainAbuseScore(subdomain: string, fullDomain: string) {
|
export function calcDomainAbuseScore(subdomain: string, fullDomain: string = subdomain) {
|
||||||
let weight = 0;
|
let weight = 0;
|
||||||
|
|
||||||
const hitLowKeywords = lowKeywords(fullDomain);
|
const hitLowKeywords = lowKeywords(fullDomain);
|
||||||
|
|||||||
@ -56,6 +56,7 @@ describe('Trie', () => {
|
|||||||
|
|
||||||
trie.add('skk.moe');
|
trie.add('skk.moe');
|
||||||
trie.add('blog.skk.moe');
|
trie.add('blog.skk.moe');
|
||||||
|
// eslint-disable-next-line sukka/no-element-overwrite -- deliberately do testing
|
||||||
trie.add('skk.moe');
|
trie.add('skk.moe');
|
||||||
|
|
||||||
expect(trie.size).to.equal(2);
|
expect(trie.size).to.equal(2);
|
||||||
@ -63,18 +64,18 @@ describe('Trie', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
it('should be possible to set the null sequence.', () => {
|
it('should be possible to set the null sequence.', () => {
|
||||||
let trie = createTrie(null, false);
|
const trie = createTrie(null, false);
|
||||||
|
|
||||||
trie.add('');
|
trie.add('');
|
||||||
expect(trie.has('')).to.equal(true);
|
expect(trie.has('')).to.equal(true);
|
||||||
|
|
||||||
trie = createTrie(null, true);
|
const trie2 = createTrie(null, true);
|
||||||
trie.add('');
|
trie2.add('');
|
||||||
expect(trie.has('')).to.equal(true);
|
expect(trie2.has('')).to.equal(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should be possible to delete items.', () => {
|
it('should be possible to delete items.', () => {
|
||||||
const trie = createTrie(null);
|
const trie = createTrie(null, false);
|
||||||
|
|
||||||
trie.add('skk.moe');
|
trie.add('skk.moe');
|
||||||
trie.add('example.com');
|
trie.add('example.com');
|
||||||
@ -108,7 +109,7 @@ describe('Trie', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
it('should be possible to retrieve items matching the given prefix.', () => {
|
it('should be possible to retrieve items matching the given prefix.', () => {
|
||||||
const trie = createTrie(null);
|
const trie = createTrie(null, false);
|
||||||
|
|
||||||
trie.add('example.com');
|
trie.add('example.com');
|
||||||
trie.add('blog.example.com');
|
trie.add('blog.example.com');
|
||||||
@ -141,12 +142,12 @@ describe('Trie', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
it('should be possible to create a trie from an arbitrary iterable.', () => {
|
it('should be possible to create a trie from an arbitrary iterable.', () => {
|
||||||
let trie = createTrie(['skk.moe', 'blog.skk.moe']);
|
let trie = createTrie(['skk.moe', 'blog.skk.moe'], false);
|
||||||
|
|
||||||
expect(trie.size).to.equal(2);
|
expect(trie.size).to.equal(2);
|
||||||
expect(trie.has('skk.moe')).to.equal(true);
|
expect(trie.has('skk.moe')).to.equal(true);
|
||||||
|
|
||||||
trie = createTrie(new Set(['skk.moe', 'example.com']));
|
trie = createTrie(new Set(['skk.moe', 'example.com']), false);
|
||||||
expect(trie.size).to.equal(2);
|
expect(trie.size).to.equal(2);
|
||||||
expect(trie.has('skk.moe')).to.equal(true);
|
expect(trie.has('skk.moe')).to.equal(true);
|
||||||
});
|
});
|
||||||
@ -154,28 +155,28 @@ describe('Trie', () => {
|
|||||||
|
|
||||||
describe('surge domainset dedupe', () => {
|
describe('surge domainset dedupe', () => {
|
||||||
it('should not remove same entry', () => {
|
it('should not remove same entry', () => {
|
||||||
const trie = createTrie(['.skk.moe', 'noc.one']);
|
const trie = createTrie(['.skk.moe', 'noc.one'], false);
|
||||||
|
|
||||||
expect(trie.find('.skk.moe')).to.deep.equal(['.skk.moe']);
|
expect(trie.find('.skk.moe')).to.deep.equal(['.skk.moe']);
|
||||||
expect(trie.find('noc.one')).to.deep.equal(['noc.one']);
|
expect(trie.find('noc.one')).to.deep.equal(['noc.one']);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should match subdomain - 1', () => {
|
it('should match subdomain - 1', () => {
|
||||||
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net']);
|
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], false);
|
||||||
|
|
||||||
expect(trie.find('.skk.moe')).to.deep.equal(['image.cdn.skk.moe', 'blog.skk.moe']);
|
expect(trie.find('.skk.moe')).to.deep.equal(['image.cdn.skk.moe', 'blog.skk.moe']);
|
||||||
expect(trie.find('.sukkaw.com')).to.deep.equal(['www.sukkaw.com']);
|
expect(trie.find('.sukkaw.com')).to.deep.equal(['www.sukkaw.com']);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should match subdomain - 2', () => {
|
it('should match subdomain - 2', () => {
|
||||||
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', '.skk.moe', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net']);
|
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', '.skk.moe', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], false);
|
||||||
|
|
||||||
expect(trie.find('.skk.moe')).to.deep.equal(['.skk.moe', 'image.cdn.skk.moe', 'blog.skk.moe']);
|
expect(trie.find('.skk.moe')).to.deep.equal(['.skk.moe', 'image.cdn.skk.moe', 'blog.skk.moe']);
|
||||||
expect(trie.find('.sukkaw.com')).to.deep.equal(['www.sukkaw.com']);
|
expect(trie.find('.sukkaw.com')).to.deep.equal(['www.sukkaw.com']);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should not remove non-subdomain', () => {
|
it('should not remove non-subdomain', () => {
|
||||||
const trie = createTrie(['skk.moe', 'sukkaskk.moe']);
|
const trie = createTrie(['skk.moe', 'sukkaskk.moe'], false);
|
||||||
expect(trie.find('.skk.moe')).to.deep.equal([]);
|
expect(trie.find('.skk.moe')).to.deep.equal([]);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@ -80,16 +80,281 @@ const walkHostnameTokens = (hostname: string, onToken: (token: string) => boolea
|
|||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
|
||||||
export const createTrie = <Meta = any>(from?: string[] | Set<string> | null, smolTree = false) => {
|
interface FindSingleChildLeafResult<Meta> {
|
||||||
let size = 0;
|
node: TrieNode<Meta>,
|
||||||
const root: TrieNode<Meta> = createNode();
|
toPrune: TrieNode<Meta> | null,
|
||||||
|
tokenToPrune: string | null,
|
||||||
|
parent: TrieNode<Meta>
|
||||||
|
}
|
||||||
|
|
||||||
|
abstract class Triebase<Meta = any> {
|
||||||
|
protected readonly $root: TrieNode<Meta> = createNode();
|
||||||
|
protected $size = 0;
|
||||||
|
|
||||||
|
get root() {
|
||||||
|
return this.$root;
|
||||||
|
}
|
||||||
|
|
||||||
|
constructor(from?: string[] | Set<string> | null) {
|
||||||
|
// Actually build trie
|
||||||
|
if (Array.isArray(from)) {
|
||||||
|
for (let i = 0, l = from.length; i < l; i++) {
|
||||||
|
this.add(from[i]);
|
||||||
|
}
|
||||||
|
} else if (from) {
|
||||||
|
from.forEach((value) => this.add(value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract add(suffix: string, meta?: Meta): void;
|
||||||
|
|
||||||
|
protected walkIntoLeafWithTokens(
|
||||||
|
tokens: string[],
|
||||||
|
onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
|
||||||
|
) {
|
||||||
|
let node: TrieNode = this.$root;
|
||||||
|
let parent: TrieNode = node;
|
||||||
|
|
||||||
|
let token: string;
|
||||||
|
|
||||||
|
for (let i = tokens.length - 1; i >= 0; i--) {
|
||||||
|
token = tokens[i];
|
||||||
|
|
||||||
|
// if (token === '') {
|
||||||
|
// break;
|
||||||
|
// }
|
||||||
|
|
||||||
|
parent = node;
|
||||||
|
|
||||||
|
if (node[2].has(token)) {
|
||||||
|
node = node[2].get(token)!;
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
onLoop(node, parent, token);
|
||||||
|
}
|
||||||
|
|
||||||
|
return { node, parent };
|
||||||
|
};
|
||||||
|
|
||||||
|
protected walkIntoLeafWithSuffix(
|
||||||
|
suffix: string,
|
||||||
|
onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
|
||||||
|
) {
|
||||||
|
let node: TrieNode = this.$root;
|
||||||
|
let parent: TrieNode = node;
|
||||||
|
|
||||||
|
const onToken = (token: string) => {
|
||||||
|
if (token === '') {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
parent = node;
|
||||||
|
|
||||||
|
if (node[2].has(token)) {
|
||||||
|
node = node[2].get(token)!;
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
onLoop(node, parent, token);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (walkHostnameTokens(suffix, onToken) === null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return { node, parent };
|
||||||
|
};
|
||||||
|
|
||||||
|
public contains(suffix: string): boolean { return this.walkIntoLeafWithSuffix(suffix) !== null; };
|
||||||
|
|
||||||
|
private walk(
|
||||||
|
onMatches: (suffix: string[], meta: Meta) => void,
|
||||||
|
initialNode = this.$root,
|
||||||
|
initialSuffix: string[] = []
|
||||||
|
) {
|
||||||
|
const nodeStack: Array<TrieNode<Meta>> = [initialNode];
|
||||||
|
// Resolving initial string (begin the start of the stack)
|
||||||
|
const suffixStack: string[][] = [initialSuffix];
|
||||||
|
|
||||||
|
let node: TrieNode<Meta> = initialNode;
|
||||||
|
|
||||||
|
do {
|
||||||
|
node = nodeStack.pop()!;
|
||||||
|
const suffix = suffixStack.pop()!;
|
||||||
|
|
||||||
|
node[2].forEach((childNode, k) => {
|
||||||
|
// Pushing the child node to the stack for next iteration of DFS
|
||||||
|
nodeStack.push(childNode);
|
||||||
|
|
||||||
|
suffixStack.push([k, ...suffix]);
|
||||||
|
});
|
||||||
|
|
||||||
|
// If the node is a sentinel, we push the suffix to the results
|
||||||
|
if (node[0]) {
|
||||||
|
onMatches(suffix, node[3]);
|
||||||
|
}
|
||||||
|
} while (nodeStack.length);
|
||||||
|
};
|
||||||
|
|
||||||
|
protected getSingleChildLeaf(tokens: string[]): FindSingleChildLeafResult<Meta> | null {
|
||||||
|
let toPrune: TrieNode | null = null;
|
||||||
|
let tokenToPrune: string | null = null;
|
||||||
|
|
||||||
|
const onLoop = (node: TrieNode, parent: TrieNode, token: string) => {
|
||||||
|
// Keeping track of a potential branch to prune
|
||||||
|
|
||||||
|
// Even if the node size is 1, but the single child is ".", we should retain the branch
|
||||||
|
// Since the "." could be special if it is the leaf-est node
|
||||||
|
const onlyChild = node[2].size < 2 && !node[2].has('.');
|
||||||
|
|
||||||
|
if (toPrune != null) { // the top-est branch that could potentially being pruned
|
||||||
|
if (!onlyChild) {
|
||||||
|
// The branch has moew than single child, retain the branch.
|
||||||
|
// And we need to abort prune the parent, so we set it to null
|
||||||
|
toPrune = null;
|
||||||
|
tokenToPrune = null;
|
||||||
|
}
|
||||||
|
} else if (onlyChild) {
|
||||||
|
// There is only one token child, or no child at all, we can prune it safely
|
||||||
|
// It is now the top-est branch that could potentially being pruned
|
||||||
|
toPrune = parent;
|
||||||
|
tokenToPrune = token;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const res = this.walkIntoLeafWithTokens(tokens, onLoop);
|
||||||
|
|
||||||
|
if (res === null) return null;
|
||||||
|
return { node: res.node, toPrune, tokenToPrune, parent: res.parent };
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Method used to add the given suffix to the trie.
|
* Method used to retrieve every item in the trie with the given prefix.
|
||||||
*/
|
*/
|
||||||
const add = smolTree
|
public find(
|
||||||
? (suffix: string, meta?: Meta): void => {
|
inputSuffix: string,
|
||||||
let node: TrieNode<Meta> = root;
|
/** @default true */ includeEqualWithSuffix = true
|
||||||
|
): string[] {
|
||||||
|
// if (smolTree) {
|
||||||
|
// throw new Error('A Trie with smolTree enabled cannot perform find!');
|
||||||
|
// }
|
||||||
|
|
||||||
|
const inputTokens = hostnameToTokens(inputSuffix);
|
||||||
|
const res = this.walkIntoLeafWithTokens(inputTokens);
|
||||||
|
if (res === null) return [];
|
||||||
|
|
||||||
|
const matches: string[][] = [];
|
||||||
|
|
||||||
|
const onMatches = includeEqualWithSuffix
|
||||||
|
// fast path (default option)
|
||||||
|
? (suffix: string[]) => matches.push(suffix)
|
||||||
|
// slow path
|
||||||
|
: (suffix: string[]) => {
|
||||||
|
if (!deepEqualArray(suffix, inputTokens)) {
|
||||||
|
matches.push(suffix);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
this.walk(
|
||||||
|
onMatches,
|
||||||
|
res.node, // Performing DFS from prefix
|
||||||
|
inputTokens
|
||||||
|
);
|
||||||
|
|
||||||
|
return matches.map((m) => fastStringArrayJoin(m, ''));
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method used to delete a prefix from the trie.
|
||||||
|
*/
|
||||||
|
public remove(suffix: string): boolean {
|
||||||
|
const res = this.getSingleChildLeaf(hostnameToTokens(suffix));
|
||||||
|
if (res === null) return false;
|
||||||
|
|
||||||
|
if (!res.node[0]) return false;
|
||||||
|
|
||||||
|
this.$size--;
|
||||||
|
const { node, toPrune, tokenToPrune } = res;
|
||||||
|
|
||||||
|
if (tokenToPrune && toPrune) {
|
||||||
|
toPrune[2].delete(tokenToPrune);
|
||||||
|
} else {
|
||||||
|
node[0] = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
// eslint-disable-next-line @typescript-eslint/unbound-method -- alias class methods
|
||||||
|
public delete = this.remove;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method used to assert whether the given prefix exists in the Trie.
|
||||||
|
*/
|
||||||
|
public has(suffix: string): boolean {
|
||||||
|
const res = this.walkIntoLeafWithSuffix(suffix);
|
||||||
|
|
||||||
|
return res
|
||||||
|
? res.node[0]
|
||||||
|
: false;
|
||||||
|
};
|
||||||
|
|
||||||
|
public dump(onSuffix: (suffix: string) => void): void;
|
||||||
|
public dump(): string[];
|
||||||
|
public dump(onSuffix?: (suffix: string) => void): string[] | void {
|
||||||
|
const results: string[] = [];
|
||||||
|
|
||||||
|
const handleSuffix = onSuffix
|
||||||
|
? (suffix: string[]) => onSuffix(fastStringArrayJoin(suffix, ''))
|
||||||
|
: (suffix: string[]) => results.push(fastStringArrayJoin(suffix, ''));
|
||||||
|
|
||||||
|
this.walk(handleSuffix);
|
||||||
|
|
||||||
|
return results;
|
||||||
|
};
|
||||||
|
|
||||||
|
public dumpMeta() {
|
||||||
|
const results: Meta[] = [];
|
||||||
|
|
||||||
|
this.walk((_suffix, meta) => {
|
||||||
|
results.push(meta);
|
||||||
|
});
|
||||||
|
|
||||||
|
return results;
|
||||||
|
};
|
||||||
|
|
||||||
|
public dumpWithMeta() {
|
||||||
|
const results: Array<[string, Meta]> = [];
|
||||||
|
|
||||||
|
this.walk((suffix, meta) => {
|
||||||
|
results.push([fastStringArrayJoin(suffix, ''), meta]);
|
||||||
|
});
|
||||||
|
|
||||||
|
return results;
|
||||||
|
};
|
||||||
|
|
||||||
|
public inspect(depth: number, unpackMeta?: (meta?: Meta) => any) {
|
||||||
|
return fastStringArrayJoin(
|
||||||
|
JSON.stringify(deepTrieNodeToJSON(this.$root, unpackMeta), null, 2).split('\n').map((line) => ' '.repeat(depth) + line),
|
||||||
|
'\n'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public [util.inspect.custom](depth: number) {
|
||||||
|
return this.inspect(depth);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export class HostnameSmolTrie<Meta = any> extends Triebase<Meta> {
|
||||||
|
public smolTree = true;
|
||||||
|
|
||||||
|
add(suffix: string, meta?: Meta): void {
|
||||||
|
let node: TrieNode<Meta> = this.$root;
|
||||||
let curNodeChildren: Map<string, TrieNode<Meta>> = node[2];
|
let curNodeChildren: Map<string, TrieNode<Meta>> = node[2];
|
||||||
|
|
||||||
const onToken = (token: string) => {
|
const onToken = (token: string) => {
|
||||||
@ -136,274 +401,10 @@ export const createTrie = <Meta = any>(from?: string[] | Set<string> | null, smo
|
|||||||
node[0] = true;
|
node[0] = true;
|
||||||
node[3] = meta!;
|
node[3] = meta!;
|
||||||
}
|
}
|
||||||
: (suffix: string, meta?: Meta): void => {
|
|
||||||
let node: TrieNode<Meta> = root;
|
|
||||||
|
|
||||||
const onToken = (token: string) => {
|
|
||||||
if (node[2].has(token)) {
|
|
||||||
node = node[2].get(token)!;
|
|
||||||
} else {
|
|
||||||
const newNode = createNode(node);
|
|
||||||
node[2].set(token, newNode);
|
|
||||||
node = newNode;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
};
|
|
||||||
|
|
||||||
// When walkHostnameTokens returns true, we should skip the rest
|
|
||||||
if (walkHostnameTokens(suffix, onToken)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!node[0]) {
|
|
||||||
size++;
|
|
||||||
node[0] = true;
|
|
||||||
node[3] = meta!;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const walkIntoLeafWithTokens = (
|
|
||||||
tokens: string[],
|
|
||||||
onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
|
|
||||||
) => {
|
|
||||||
let node: TrieNode = root;
|
|
||||||
let parent: TrieNode = node;
|
|
||||||
|
|
||||||
let token: string;
|
|
||||||
|
|
||||||
for (let i = tokens.length - 1; i >= 0; i--) {
|
|
||||||
token = tokens[i];
|
|
||||||
|
|
||||||
// if (token === '') {
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
|
|
||||||
parent = node;
|
|
||||||
|
|
||||||
if (node[2].has(token)) {
|
|
||||||
node = node[2].get(token)!;
|
|
||||||
} else {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
onLoop(node, parent, token);
|
|
||||||
}
|
|
||||||
|
|
||||||
return { node, parent };
|
|
||||||
};
|
|
||||||
|
|
||||||
const walkIntoLeafWithSuffix = (
|
|
||||||
suffix: string,
|
|
||||||
onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
|
|
||||||
) => {
|
|
||||||
let node: TrieNode = root;
|
|
||||||
let parent: TrieNode = node;
|
|
||||||
|
|
||||||
const onToken = (token: string) => {
|
|
||||||
if (token === '') {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
parent = node;
|
|
||||||
|
|
||||||
if (node[2].has(token)) {
|
|
||||||
node = node[2].get(token)!;
|
|
||||||
} else {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
onLoop(node, parent, token);
|
|
||||||
|
|
||||||
return false;
|
|
||||||
};
|
|
||||||
|
|
||||||
if (walkHostnameTokens(suffix, onToken) === null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
return { node, parent };
|
|
||||||
};
|
|
||||||
|
|
||||||
const contains = (suffix: string): boolean => walkIntoLeafWithSuffix(suffix) !== null;
|
|
||||||
|
|
||||||
const walk = (
|
|
||||||
onMatches: (suffix: string[], meta: Meta) => void,
|
|
||||||
initialNode = root,
|
|
||||||
initialSuffix: string[] = []
|
|
||||||
) => {
|
|
||||||
const nodeStack: Array<TrieNode<Meta>> = [initialNode];
|
|
||||||
// Resolving initial string (begin the start of the stack)
|
|
||||||
const suffixStack: string[][] = [initialSuffix];
|
|
||||||
|
|
||||||
let node: TrieNode<Meta> = root;
|
|
||||||
|
|
||||||
do {
|
|
||||||
node = nodeStack.pop()!;
|
|
||||||
const suffix = suffixStack.pop()!;
|
|
||||||
|
|
||||||
node[2].forEach((childNode, k) => {
|
|
||||||
// Pushing the child node to the stack for next iteration of DFS
|
|
||||||
nodeStack.push(childNode);
|
|
||||||
|
|
||||||
suffixStack.push([k, ...suffix]);
|
|
||||||
});
|
|
||||||
|
|
||||||
// If the node is a sentinel, we push the suffix to the results
|
|
||||||
if (node[0]) {
|
|
||||||
onMatches(suffix, node[3]);
|
|
||||||
}
|
|
||||||
} while (nodeStack.length);
|
|
||||||
};
|
|
||||||
|
|
||||||
interface FindSingleChildLeafResult {
|
|
||||||
node: TrieNode,
|
|
||||||
toPrune: TrieNode | null,
|
|
||||||
tokenToPrune: string | null,
|
|
||||||
parent: TrieNode
|
|
||||||
}
|
|
||||||
|
|
||||||
const getSingleChildLeaf = (tokens: string[]): FindSingleChildLeafResult | null => {
|
|
||||||
let toPrune: TrieNode | null = null;
|
|
||||||
let tokenToPrune: string | null = null;
|
|
||||||
|
|
||||||
const onLoop = (node: TrieNode, parent: TrieNode, token: string) => {
|
|
||||||
// Keeping track of a potential branch to prune
|
|
||||||
|
|
||||||
// Even if the node size is 1, but the single child is ".", we should retain the branch
|
|
||||||
// Since the "." could be special if it is the leaf-est node
|
|
||||||
const onlyChild = node[2].size < 2 && !node[2].has('.');
|
|
||||||
|
|
||||||
if (toPrune != null) { // the top-est branch that could potentially being pruned
|
|
||||||
if (!onlyChild) {
|
|
||||||
// The branch has moew than single child, retain the branch.
|
|
||||||
// And we need to abort prune the parent, so we set it to null
|
|
||||||
toPrune = null;
|
|
||||||
tokenToPrune = null;
|
|
||||||
}
|
|
||||||
} else if (onlyChild) {
|
|
||||||
// There is only one token child, or no child at all, we can prune it safely
|
|
||||||
// It is now the top-est branch that could potentially being pruned
|
|
||||||
toPrune = parent;
|
|
||||||
tokenToPrune = token;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const res = walkIntoLeafWithTokens(tokens, onLoop);
|
|
||||||
|
|
||||||
if (res === null) return null;
|
|
||||||
return { node: res.node, toPrune, tokenToPrune, parent: res.parent };
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Method used to retrieve every item in the trie with the given prefix.
|
|
||||||
*/
|
|
||||||
const find = (
|
|
||||||
inputSuffix: string,
|
|
||||||
/** @default true */ includeEqualWithSuffix = true
|
|
||||||
): string[] => {
|
|
||||||
// if (smolTree) {
|
|
||||||
// throw new Error('A Trie with smolTree enabled cannot perform find!');
|
|
||||||
// }
|
|
||||||
|
|
||||||
const inputTokens = hostnameToTokens(inputSuffix);
|
|
||||||
const res = walkIntoLeafWithTokens(inputTokens);
|
|
||||||
if (res === null) return [];
|
|
||||||
|
|
||||||
const matches: string[][] = [];
|
|
||||||
|
|
||||||
const onMatches = includeEqualWithSuffix
|
|
||||||
// fast path (default option)
|
|
||||||
? (suffix: string[]) => matches.push(suffix)
|
|
||||||
// slow path
|
|
||||||
: (suffix: string[]) => {
|
|
||||||
if (!deepEqualArray(suffix, inputTokens)) {
|
|
||||||
matches.push(suffix);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
walk(
|
|
||||||
onMatches,
|
|
||||||
res.node, // Performing DFS from prefix
|
|
||||||
inputTokens
|
|
||||||
);
|
|
||||||
|
|
||||||
return matches.map((m) => fastStringArrayJoin(m, ''));
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Method used to delete a prefix from the trie.
|
|
||||||
*/
|
|
||||||
const remove = (suffix: string): boolean => {
|
|
||||||
const res = getSingleChildLeaf(hostnameToTokens(suffix));
|
|
||||||
if (res === null) return false;
|
|
||||||
|
|
||||||
if (!res.node[0]) return false;
|
|
||||||
|
|
||||||
size--;
|
|
||||||
const { node, toPrune, tokenToPrune } = res;
|
|
||||||
|
|
||||||
if (tokenToPrune && toPrune) {
|
|
||||||
toPrune[2].delete(tokenToPrune);
|
|
||||||
} else {
|
|
||||||
node[0] = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Method used to assert whether the given prefix exists in the Trie.
|
|
||||||
*/
|
|
||||||
const has = (suffix: string): boolean => {
|
|
||||||
const res = walkIntoLeafWithSuffix(suffix);
|
|
||||||
|
|
||||||
return res
|
|
||||||
? res.node[0]
|
|
||||||
: false;
|
|
||||||
};
|
|
||||||
|
|
||||||
function dump(onSuffix: (suffix: string) => void): void;
|
|
||||||
function dump(): string[];
|
|
||||||
function dump(onSuffix?: (suffix: string) => void): string[] | void {
|
|
||||||
const results: string[] = [];
|
|
||||||
|
|
||||||
const handleSuffix = onSuffix
|
|
||||||
? (suffix: string[]) => onSuffix(fastStringArrayJoin(suffix, ''))
|
|
||||||
: (suffix: string[]) => results.push(fastStringArrayJoin(suffix, ''));
|
|
||||||
|
|
||||||
walk(handleSuffix);
|
|
||||||
|
|
||||||
return results;
|
|
||||||
};
|
|
||||||
|
|
||||||
const dumpMeta = () => {
|
|
||||||
const results: Meta[] = [];
|
|
||||||
|
|
||||||
walk((suffix, meta) => {
|
|
||||||
results.push(meta);
|
|
||||||
});
|
|
||||||
|
|
||||||
return results;
|
|
||||||
};
|
|
||||||
|
|
||||||
const dumpWithMeta = () => {
|
|
||||||
const results: Array<[string, Meta]> = [];
|
|
||||||
|
|
||||||
walk((suffix, meta) => {
|
|
||||||
results.push([fastStringArrayJoin(suffix, ''), meta]);
|
|
||||||
});
|
|
||||||
|
|
||||||
return results;
|
|
||||||
};
|
|
||||||
|
|
||||||
const whitelist = (suffix: string) => {
|
|
||||||
if (!smolTree) {
|
|
||||||
throw new Error('whitelist method is only available in smolTree mode.');
|
|
||||||
}
|
|
||||||
|
|
||||||
|
public whitelist(suffix: string) {
|
||||||
const tokens = hostnameToTokens(suffix);
|
const tokens = hostnameToTokens(suffix);
|
||||||
const res = getSingleChildLeaf(tokens);
|
const res = this.getSingleChildLeaf(tokens);
|
||||||
|
|
||||||
if (res === null) return;
|
if (res === null) return;
|
||||||
|
|
||||||
@ -433,45 +434,48 @@ export const createTrie = <Meta = any>(from?: string[] | Set<string> | null, smo
|
|||||||
node[0] = false;
|
node[0] = false;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// Actually build trie
|
export class HostnameTrie<Meta = any> extends Triebase<Meta> {
|
||||||
if (Array.isArray(from)) {
|
|
||||||
for (let i = 0, l = from.length; i < l; i++) {
|
|
||||||
add(from[i]);
|
|
||||||
}
|
|
||||||
} else if (from) {
|
|
||||||
from.forEach((value) => add(value));
|
|
||||||
}
|
|
||||||
|
|
||||||
const inspect = (depth: number, unpackMeta?: (meta?: Meta) => any) => fastStringArrayJoin(
|
|
||||||
JSON.stringify(deepTrieNodeToJSON(root, unpackMeta), null, 2).split('\n').map((line) => ' '.repeat(depth) + line),
|
|
||||||
'\n'
|
|
||||||
);
|
|
||||||
|
|
||||||
return {
|
|
||||||
add,
|
|
||||||
contains,
|
|
||||||
find,
|
|
||||||
remove,
|
|
||||||
delete: remove,
|
|
||||||
has,
|
|
||||||
dump,
|
|
||||||
dumpMeta,
|
|
||||||
dumpWithMeta,
|
|
||||||
get size() {
|
get size() {
|
||||||
if (smolTree) {
|
return this.$size;
|
||||||
throw new Error('A Trie with smolTree enabled cannot have correct size!');
|
|
||||||
}
|
}
|
||||||
return size;
|
|
||||||
},
|
add(suffix: string, meta?: Meta): void {
|
||||||
get root() {
|
let node: TrieNode<Meta> = this.$root;
|
||||||
return root;
|
|
||||||
},
|
const onToken = (token: string) => {
|
||||||
whitelist,
|
if (node[2].has(token)) {
|
||||||
inspect,
|
node = node[2].get(token)!;
|
||||||
[util.inspect.custom]: inspect,
|
} else {
|
||||||
smolTree
|
const newNode = createNode(node);
|
||||||
|
node[2].set(token, newNode);
|
||||||
|
node = newNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// When walkHostnameTokens returns true, we should skip the rest
|
||||||
|
if (walkHostnameTokens(suffix, onToken)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!node[0]) {
|
||||||
|
this.$size++;
|
||||||
|
node[0] = true;
|
||||||
|
node[3] = meta!;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createTrie<Meta = any>(from: string[] | Set<string> | null, smolTree: true): HostnameSmolTrie<Meta>;
|
||||||
|
export function createTrie<Meta = any>(from?: string[] | Set<string> | null, smolTree?: false): HostnameTrie<Meta>;
|
||||||
|
export function createTrie<_Meta = any>(from?: string[] | Set<string> | null, smolTree = true) {
|
||||||
|
if (smolTree) {
|
||||||
|
return new HostnameSmolTrie(from);
|
||||||
|
}
|
||||||
|
return new HostnameTrie(from);
|
||||||
};
|
};
|
||||||
|
|
||||||
export type Trie = ReturnType<typeof createTrie>;
|
export type Trie = ReturnType<typeof createTrie>;
|
||||||
|
|||||||
@ -7,7 +7,7 @@ import { parseFelixDnsmasq } from './lib/parse-dnsmasq';
|
|||||||
import { SOURCE_DIR } from './constants/dir';
|
import { SOURCE_DIR } from './constants/dir';
|
||||||
|
|
||||||
export const parseDomesticList = async () => {
|
export const parseDomesticList = async () => {
|
||||||
const trie = createTrie(await parseFelixDnsmasq('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf'), true);
|
const trie = createTrie(await parseFelixDnsmasq('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf'));
|
||||||
|
|
||||||
const top5000 = new Set<string>();
|
const top5000 = new Set<string>();
|
||||||
|
|
||||||
|
|||||||
@ -76,7 +76,7 @@ export const parseGfwList = async () => {
|
|||||||
})).text();
|
})).text();
|
||||||
const topDomains = parse(res);
|
const topDomains = parse(res);
|
||||||
|
|
||||||
const trie = createTrie(blackSet, true);
|
const trie = createTrie(blackSet);
|
||||||
|
|
||||||
for await (const [domain] of topDomains) {
|
for await (const [domain] of topDomains) {
|
||||||
if (trie.has(domain)) {
|
if (trie.has(domain)) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user