Enable trie hostname mode & extend hostname mode test cases

This commit is contained in:
SukkaW 2024-05-27 01:56:08 +08:00
parent d137bdb8a3
commit eb0623c1a9
5 changed files with 110 additions and 31 deletions

View File

@ -7,9 +7,14 @@ import { SHARED_DESCRIPTION } from './lib/constants';
import { getPublicSuffixListTextPromise } from './lib/download-publicsuffixlist';
import { domainDeduper } from './lib/domain-deduper';
import { appendArrayInPlace } from './lib/append-array-in-place';
import { sortDomains } from './lib/stable-sort-domain';
const getS3OSSDomainsPromise = (async (): Promise<Set<string>> => {
const trie = createTrie((await getPublicSuffixListTextPromise()).split('\n'));
const trie = createTrie(
(await getPublicSuffixListTextPromise()).split('\n'),
true,
false
);
/**
* Extract OSS domain from publicsuffix list
@ -69,7 +74,7 @@ export const buildCdnDownloadConf = task(import.meta.path, async (span) => {
'This file contains object storage and static assets CDN domains.'
],
new Date(),
domainDeduper(cdnDomainsList),
sortDomains(domainDeduper(cdnDomainsList)),
'domainset',
path.resolve(import.meta.dir, '../List/domainset/cdn.conf'),
path.resolve(import.meta.dir, '../Clash/domainset/cdn.txt')
@ -83,10 +88,10 @@ export const buildCdnDownloadConf = task(import.meta.path, async (span) => {
'This file contains domains for software updating & large file hosting.'
],
new Date(),
domainDeduper([
sortDomains(domainDeduper([
...downloadDomainSet,
...steamDomainSet
]),
])),
'domainset',
path.resolve(import.meta.dir, '../List/domainset/download.conf'),
path.resolve(import.meta.dir, '../Clash/domainset/download.txt')

View File

@ -6,6 +6,7 @@ import { createTrie } from './lib/trie';
import { SHARED_DESCRIPTION } from './lib/constants';
import { createMemoizedPromise } from './lib/memo-promise';
import { extractDomainsFromFelixDnsmasq } from './lib/parse-dnsmasq';
import { sortDomains } from './lib/stable-sort-domain';
const PROBE_DOMAINS = ['.microsoft.com', '.windows.net', '.windows.com', '.windowsupdate.com', '.windowssearch.com', '.office.net'];
@ -25,22 +26,22 @@ const BLACKLIST = [
export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => {
// First trie is to find the microsoft domains that matches probe domains
const trie = createTrie();
const trie = createTrie(null, true);
for await (const line of await fetchRemoteTextByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) {
const domain = extractDomainsFromFelixDnsmasq(line);
if (domain) {
trie.add(domain);
}
}
const set = new Set(PROBE_DOMAINS.flatMap(domain => trie.find(domain)));
const foundMicrosoftCdnDomains = PROBE_DOMAINS.flatMap(domain => trie.find(domain));
// Second trie is to remove blacklisted domains
const trie2 = createTrie(set);
BLACKLIST.forEach(black => {
trie2.substractSetInPlaceFromFound(black, set);
});
const trie2 = createTrie(foundMicrosoftCdnDomains, true, true);
BLACKLIST.forEach(trie2.whitelist);
return Array.from(set).map(d => `DOMAIN-SUFFIX,${d}`).concat(WHITELIST);
return sortDomains(trie2.dump())
.map(d => `DOMAIN-SUFFIX,${d}`)
.concat(WHITELIST);
});
export const buildMicrosoftCdn = task(import.meta.path, async (span) => {
@ -53,11 +54,7 @@ export const buildMicrosoftCdn = task(import.meta.path, async (span) => {
' - https://github.com/felixonmars/dnsmasq-china-list'
];
const promise = getMicrosoftCdnRulesetPromise();
const peeked = Bun.peek(promise);
const res: string[] = peeked === promise
? await span.traceChildPromise('get microsoft cdn domains', promise)
: (peeked as string[]);
const res: string[] = await span.traceChildPromise('get microsoft cdn domains', getMicrosoftCdnRulesetPromise());
return createRuleset(
span,

View File

@ -20,6 +20,23 @@ describe('Trie', () => {
expect(trie.has('sukkaw')).toBeFalse();
});
it('should be possible to add domains to a Trie (hostname).', () => {
const trie = createTrie(null, true);
trie.add('a.skk.moe');
trie.add('skk.moe');
trie.add('anotherskk.moe');
expect(trie.size).toBe(3);
expect(trie.has('a.skk.moe')).toBeTrue();
expect(trie.has('skk.moe')).toBeTrue();
expect(trie.has('anotherskk.moe')).toBeTrue();
expect(trie.has('example.com')).toBeFalse();
expect(trie.has('skk.mo')).toBeFalse();
expect(trie.has('another.skk.moe')).toBeFalse();
});
it('adding the same item several times should not increase size.', () => {
const trie = createTrie();
@ -31,9 +48,24 @@ describe('Trie', () => {
expect(trie.has('rat')).toBeTrue();
});
it('should be possible to set the null sequence.', () => {
const trie = createTrie();
it('adding the same item several times should not increase size (hostname).', () => {
const trie = createTrie(null, true);
trie.add('skk.moe');
trie.add('blog.skk.moe');
trie.add('skk.moe');
expect(trie.size).toBe(2);
expect(trie.has('skk.moe')).toBeTrue();
});
it('should be possible to set the null sequence.', () => {
let trie = createTrie();
trie.add('');
expect(trie.has('')).toBeTrue();
trie = createTrie(null, true);
trie.add('');
expect(trie.has('')).toBeTrue();
});
@ -61,6 +93,29 @@ describe('Trie', () => {
expect(trie.size).toBe(0);
});
it('should be possible to delete items (hostname).', () => {
const trie = createTrie(null, true);
trie.add('skk.moe');
trie.add('example.com');
trie.add('moe.sb');
expect(trie.delete('')).toBeFalse();
expect(trie.delete('')).toBeFalse();
expect(trie.delete('example.org')).toBeFalse();
expect(trie.delete('skk.moe')).toBeTrue();
expect(trie.has('skk.moe')).toBeFalse();
expect(trie.has('moe.sb')).toBeTrue();
expect(trie.size).toBe(2);
expect(trie.delete('example.com')).toBeTrue();
expect(trie.size).toBe(1);
expect(trie.delete('moe.sb')).toBeTrue();
expect(trie.size).toBe(0);
});
it('should be possible to check the existence of a sequence in the Trie.', () => {
const trie = createTrie();
@ -68,6 +123,18 @@ describe('Trie', () => {
expect(trie.has('romanesque')).toBe(true);
expect(trie.has('roman')).toBe(false);
expect(trie.has('esque')).toBe(false);
expect(trie.has('')).toBe(false);
});
it('should be possible to check the existence of a sequence in the Trie (hostname).', () => {
const trie = createTrie(null, true);
trie.add('example.org.skk.moe');
expect(trie.has('example.org.skk.moe')).toBe(true);
expect(trie.has('skk.moe')).toBe(false);
expect(trie.has('example.org')).toBe(false);
expect(trie.has('')).toBe(false);
});
@ -79,8 +146,6 @@ describe('Trie', () => {
trie.add('sesqueroman');
trie.add('greek');
console.log({ trie });
expect(trie.find('roman')).toEqual(['roman', 'esqueroman', 'sesqueroman']);
expect(trie.find('man')).toEqual(['roman', 'esqueroman', 'sesqueroman']);
expect(trie.find('esqueroman')).toEqual(['esqueroman', 'sesqueroman']);
@ -89,13 +154,31 @@ describe('Trie', () => {
expect(trie.find('')).toEqual(['greek', 'roman', 'esqueroman', 'sesqueroman']);
});
it('should be possible to create a trie from an arbitrary iterable.', () => {
const words = ['roman', 'esqueroman'];
it('should be possible to retrieve items matching the given prefix (hostname).', () => {
const trie = createTrie(null, true);
const trie = createTrie(words);
trie.add('example.com');
trie.add('blog.example.com');
trie.add('cdn.example.com');
trie.add('example.org');
expect(trie.find('example.com')).toEqual(['example.com', 'cdn.example.com', 'blog.example.com']);
expect(trie.find('com')).toEqual(['example.com', 'cdn.example.com', 'blog.example.com']);
expect(trie.find('.example.com')).toEqual(['cdn.example.com', 'blog.example.com']);
expect(trie.find('org')).toEqual(['example.org']);
expect(trie.find('example.net')).toEqual([]);
expect(trie.find('')).toEqual(['example.org', 'example.com', 'cdn.example.com', 'blog.example.com']);
});
it('should be possible to create a trie from an arbitrary iterable.', () => {
let trie = createTrie(['roman', 'esqueroman']);
expect(trie.size).toBe(2);
expect(trie.has('roman')).toBe(true);
trie = createTrie(new Set(['skk.moe', 'example.com']), true);
expect(trie.size).toBe(2);
expect(trie.has('skk.moe')).toBe(true);
});
});
@ -106,8 +189,6 @@ describe.each([
it('should not remove same entry', () => {
const trie = createTrie(['.skk.moe', 'noc.one'], hostnameMode);
console.log(trie);
expect(trie.find('.skk.moe')).toStrictEqual(['.skk.moe']);
expect(trie.find('noc.one')).toStrictEqual(['noc.one']);
});
@ -115,8 +196,6 @@ describe.each([
it('should match subdomain - 1', () => {
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode);
console.log(trie);
expect(trie.find('.skk.moe')).toStrictEqual(['image.cdn.skk.moe', 'blog.skk.moe']);
expect(trie.find('.sukkaw.com')).toStrictEqual(['www.sukkaw.com']);
});
@ -124,8 +203,6 @@ describe.each([
it('should match subdomain - 2', () => {
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', '.skk.moe', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode);
console.log(trie);
expect(trie.find('.skk.moe')).toStrictEqual(['.skk.moe', 'image.cdn.skk.moe', 'blog.skk.moe']);
expect(trie.find('.sukkaw.com')).toStrictEqual(['www.sukkaw.com']);
});

View File

@ -14,7 +14,7 @@ export const parseDomesticList = async () => {
}
}
const trie = createTrie(set);
const trie = createTrie(set, true);
const top5000 = new Set<string>();

View File

@ -75,7 +75,7 @@ export const parseGfwList = async () => {
})).text();
const topDomains = parse(res);
const trie = createTrie(blackSet);
const trie = createTrie(blackSet, true);
for await (const [domain] of topDomains) {
if (trie.has(domain)) {