Enable trie hostname mode & extend hostname mode test cases

This commit is contained in:
SukkaW 2024-05-27 01:56:08 +08:00
parent d137bdb8a3
commit eb0623c1a9
5 changed files with 110 additions and 31 deletions

View File

@ -7,9 +7,14 @@ import { SHARED_DESCRIPTION } from './lib/constants';
import { getPublicSuffixListTextPromise } from './lib/download-publicsuffixlist'; import { getPublicSuffixListTextPromise } from './lib/download-publicsuffixlist';
import { domainDeduper } from './lib/domain-deduper'; import { domainDeduper } from './lib/domain-deduper';
import { appendArrayInPlace } from './lib/append-array-in-place'; import { appendArrayInPlace } from './lib/append-array-in-place';
import { sortDomains } from './lib/stable-sort-domain';
const getS3OSSDomainsPromise = (async (): Promise<Set<string>> => { const getS3OSSDomainsPromise = (async (): Promise<Set<string>> => {
const trie = createTrie((await getPublicSuffixListTextPromise()).split('\n')); const trie = createTrie(
(await getPublicSuffixListTextPromise()).split('\n'),
true,
false
);
/** /**
* Extract OSS domain from publicsuffix list * Extract OSS domain from publicsuffix list
@ -69,7 +74,7 @@ export const buildCdnDownloadConf = task(import.meta.path, async (span) => {
'This file contains object storage and static assets CDN domains.' 'This file contains object storage and static assets CDN domains.'
], ],
new Date(), new Date(),
domainDeduper(cdnDomainsList), sortDomains(domainDeduper(cdnDomainsList)),
'domainset', 'domainset',
path.resolve(import.meta.dir, '../List/domainset/cdn.conf'), path.resolve(import.meta.dir, '../List/domainset/cdn.conf'),
path.resolve(import.meta.dir, '../Clash/domainset/cdn.txt') path.resolve(import.meta.dir, '../Clash/domainset/cdn.txt')
@ -83,10 +88,10 @@ export const buildCdnDownloadConf = task(import.meta.path, async (span) => {
'This file contains domains for software updating & large file hosting.' 'This file contains domains for software updating & large file hosting.'
], ],
new Date(), new Date(),
domainDeduper([ sortDomains(domainDeduper([
...downloadDomainSet, ...downloadDomainSet,
...steamDomainSet ...steamDomainSet
]), ])),
'domainset', 'domainset',
path.resolve(import.meta.dir, '../List/domainset/download.conf'), path.resolve(import.meta.dir, '../List/domainset/download.conf'),
path.resolve(import.meta.dir, '../Clash/domainset/download.txt') path.resolve(import.meta.dir, '../Clash/domainset/download.txt')

View File

@ -6,6 +6,7 @@ import { createTrie } from './lib/trie';
import { SHARED_DESCRIPTION } from './lib/constants'; import { SHARED_DESCRIPTION } from './lib/constants';
import { createMemoizedPromise } from './lib/memo-promise'; import { createMemoizedPromise } from './lib/memo-promise';
import { extractDomainsFromFelixDnsmasq } from './lib/parse-dnsmasq'; import { extractDomainsFromFelixDnsmasq } from './lib/parse-dnsmasq';
import { sortDomains } from './lib/stable-sort-domain';
const PROBE_DOMAINS = ['.microsoft.com', '.windows.net', '.windows.com', '.windowsupdate.com', '.windowssearch.com', '.office.net']; const PROBE_DOMAINS = ['.microsoft.com', '.windows.net', '.windows.com', '.windowsupdate.com', '.windowssearch.com', '.office.net'];
@ -25,22 +26,22 @@ const BLACKLIST = [
export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => { export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => {
// First trie is to find the microsoft domains that matches probe domains // First trie is to find the microsoft domains that matches probe domains
const trie = createTrie(); const trie = createTrie(null, true);
for await (const line of await fetchRemoteTextByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) { for await (const line of await fetchRemoteTextByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) {
const domain = extractDomainsFromFelixDnsmasq(line); const domain = extractDomainsFromFelixDnsmasq(line);
if (domain) { if (domain) {
trie.add(domain); trie.add(domain);
} }
} }
const set = new Set(PROBE_DOMAINS.flatMap(domain => trie.find(domain))); const foundMicrosoftCdnDomains = PROBE_DOMAINS.flatMap(domain => trie.find(domain));
// Second trie is to remove blacklisted domains // Second trie is to remove blacklisted domains
const trie2 = createTrie(set); const trie2 = createTrie(foundMicrosoftCdnDomains, true, true);
BLACKLIST.forEach(black => { BLACKLIST.forEach(trie2.whitelist);
trie2.substractSetInPlaceFromFound(black, set);
});
return Array.from(set).map(d => `DOMAIN-SUFFIX,${d}`).concat(WHITELIST); return sortDomains(trie2.dump())
.map(d => `DOMAIN-SUFFIX,${d}`)
.concat(WHITELIST);
}); });
export const buildMicrosoftCdn = task(import.meta.path, async (span) => { export const buildMicrosoftCdn = task(import.meta.path, async (span) => {
@ -53,11 +54,7 @@ export const buildMicrosoftCdn = task(import.meta.path, async (span) => {
' - https://github.com/felixonmars/dnsmasq-china-list' ' - https://github.com/felixonmars/dnsmasq-china-list'
]; ];
const promise = getMicrosoftCdnRulesetPromise(); const res: string[] = await span.traceChildPromise('get microsoft cdn domains', getMicrosoftCdnRulesetPromise());
const peeked = Bun.peek(promise);
const res: string[] = peeked === promise
? await span.traceChildPromise('get microsoft cdn domains', promise)
: (peeked as string[]);
return createRuleset( return createRuleset(
span, span,

View File

@ -20,6 +20,23 @@ describe('Trie', () => {
expect(trie.has('sukkaw')).toBeFalse(); expect(trie.has('sukkaw')).toBeFalse();
}); });
it('should be possible to add domains to a Trie (hostname).', () => {
const trie = createTrie(null, true);
trie.add('a.skk.moe');
trie.add('skk.moe');
trie.add('anotherskk.moe');
expect(trie.size).toBe(3);
expect(trie.has('a.skk.moe')).toBeTrue();
expect(trie.has('skk.moe')).toBeTrue();
expect(trie.has('anotherskk.moe')).toBeTrue();
expect(trie.has('example.com')).toBeFalse();
expect(trie.has('skk.mo')).toBeFalse();
expect(trie.has('another.skk.moe')).toBeFalse();
});
it('adding the same item several times should not increase size.', () => { it('adding the same item several times should not increase size.', () => {
const trie = createTrie(); const trie = createTrie();
@ -31,9 +48,24 @@ describe('Trie', () => {
expect(trie.has('rat')).toBeTrue(); expect(trie.has('rat')).toBeTrue();
}); });
it('should be possible to set the null sequence.', () => { it('adding the same item several times should not increase size (hostname).', () => {
const trie = createTrie(); const trie = createTrie(null, true);
trie.add('skk.moe');
trie.add('blog.skk.moe');
trie.add('skk.moe');
expect(trie.size).toBe(2);
expect(trie.has('skk.moe')).toBeTrue();
});
it('should be possible to set the null sequence.', () => {
let trie = createTrie();
trie.add('');
expect(trie.has('')).toBeTrue();
trie = createTrie(null, true);
trie.add(''); trie.add('');
expect(trie.has('')).toBeTrue(); expect(trie.has('')).toBeTrue();
}); });
@ -61,6 +93,29 @@ describe('Trie', () => {
expect(trie.size).toBe(0); expect(trie.size).toBe(0);
}); });
it('should be possible to delete items (hostname).', () => {
const trie = createTrie(null, true);
trie.add('skk.moe');
trie.add('example.com');
trie.add('moe.sb');
expect(trie.delete('')).toBeFalse();
expect(trie.delete('')).toBeFalse();
expect(trie.delete('example.org')).toBeFalse();
expect(trie.delete('skk.moe')).toBeTrue();
expect(trie.has('skk.moe')).toBeFalse();
expect(trie.has('moe.sb')).toBeTrue();
expect(trie.size).toBe(2);
expect(trie.delete('example.com')).toBeTrue();
expect(trie.size).toBe(1);
expect(trie.delete('moe.sb')).toBeTrue();
expect(trie.size).toBe(0);
});
it('should be possible to check the existence of a sequence in the Trie.', () => { it('should be possible to check the existence of a sequence in the Trie.', () => {
const trie = createTrie(); const trie = createTrie();
@ -68,6 +123,18 @@ describe('Trie', () => {
expect(trie.has('romanesque')).toBe(true); expect(trie.has('romanesque')).toBe(true);
expect(trie.has('roman')).toBe(false); expect(trie.has('roman')).toBe(false);
expect(trie.has('esque')).toBe(false);
expect(trie.has('')).toBe(false);
});
it('should be possible to check the existence of a sequence in the Trie (hostname).', () => {
const trie = createTrie(null, true);
trie.add('example.org.skk.moe');
expect(trie.has('example.org.skk.moe')).toBe(true);
expect(trie.has('skk.moe')).toBe(false);
expect(trie.has('example.org')).toBe(false);
expect(trie.has('')).toBe(false); expect(trie.has('')).toBe(false);
}); });
@ -79,8 +146,6 @@ describe('Trie', () => {
trie.add('sesqueroman'); trie.add('sesqueroman');
trie.add('greek'); trie.add('greek');
console.log({ trie });
expect(trie.find('roman')).toEqual(['roman', 'esqueroman', 'sesqueroman']); expect(trie.find('roman')).toEqual(['roman', 'esqueroman', 'sesqueroman']);
expect(trie.find('man')).toEqual(['roman', 'esqueroman', 'sesqueroman']); expect(trie.find('man')).toEqual(['roman', 'esqueroman', 'sesqueroman']);
expect(trie.find('esqueroman')).toEqual(['esqueroman', 'sesqueroman']); expect(trie.find('esqueroman')).toEqual(['esqueroman', 'sesqueroman']);
@ -89,13 +154,31 @@ describe('Trie', () => {
expect(trie.find('')).toEqual(['greek', 'roman', 'esqueroman', 'sesqueroman']); expect(trie.find('')).toEqual(['greek', 'roman', 'esqueroman', 'sesqueroman']);
}); });
it('should be possible to create a trie from an arbitrary iterable.', () => { it('should be possible to retrieve items matching the given prefix (hostname).', () => {
const words = ['roman', 'esqueroman']; const trie = createTrie(null, true);
const trie = createTrie(words); trie.add('example.com');
trie.add('blog.example.com');
trie.add('cdn.example.com');
trie.add('example.org');
expect(trie.find('example.com')).toEqual(['example.com', 'cdn.example.com', 'blog.example.com']);
expect(trie.find('com')).toEqual(['example.com', 'cdn.example.com', 'blog.example.com']);
expect(trie.find('.example.com')).toEqual(['cdn.example.com', 'blog.example.com']);
expect(trie.find('org')).toEqual(['example.org']);
expect(trie.find('example.net')).toEqual([]);
expect(trie.find('')).toEqual(['example.org', 'example.com', 'cdn.example.com', 'blog.example.com']);
});
it('should be possible to create a trie from an arbitrary iterable.', () => {
let trie = createTrie(['roman', 'esqueroman']);
expect(trie.size).toBe(2); expect(trie.size).toBe(2);
expect(trie.has('roman')).toBe(true); expect(trie.has('roman')).toBe(true);
trie = createTrie(new Set(['skk.moe', 'example.com']), true);
expect(trie.size).toBe(2);
expect(trie.has('skk.moe')).toBe(true);
}); });
}); });
@ -106,8 +189,6 @@ describe.each([
it('should not remove same entry', () => { it('should not remove same entry', () => {
const trie = createTrie(['.skk.moe', 'noc.one'], hostnameMode); const trie = createTrie(['.skk.moe', 'noc.one'], hostnameMode);
console.log(trie);
expect(trie.find('.skk.moe')).toStrictEqual(['.skk.moe']); expect(trie.find('.skk.moe')).toStrictEqual(['.skk.moe']);
expect(trie.find('noc.one')).toStrictEqual(['noc.one']); expect(trie.find('noc.one')).toStrictEqual(['noc.one']);
}); });
@ -115,8 +196,6 @@ describe.each([
it('should match subdomain - 1', () => { it('should match subdomain - 1', () => {
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode); const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode);
console.log(trie);
expect(trie.find('.skk.moe')).toStrictEqual(['image.cdn.skk.moe', 'blog.skk.moe']); expect(trie.find('.skk.moe')).toStrictEqual(['image.cdn.skk.moe', 'blog.skk.moe']);
expect(trie.find('.sukkaw.com')).toStrictEqual(['www.sukkaw.com']); expect(trie.find('.sukkaw.com')).toStrictEqual(['www.sukkaw.com']);
}); });
@ -124,8 +203,6 @@ describe.each([
it('should match subdomain - 2', () => { it('should match subdomain - 2', () => {
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', '.skk.moe', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode); const trie = createTrie(['www.noc.one', 'www.sukkaw.com', '.skk.moe', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode);
console.log(trie);
expect(trie.find('.skk.moe')).toStrictEqual(['.skk.moe', 'image.cdn.skk.moe', 'blog.skk.moe']); expect(trie.find('.skk.moe')).toStrictEqual(['.skk.moe', 'image.cdn.skk.moe', 'blog.skk.moe']);
expect(trie.find('.sukkaw.com')).toStrictEqual(['www.sukkaw.com']); expect(trie.find('.sukkaw.com')).toStrictEqual(['www.sukkaw.com']);
}); });

View File

@ -14,7 +14,7 @@ export const parseDomesticList = async () => {
} }
} }
const trie = createTrie(set); const trie = createTrie(set, true);
const top5000 = new Set<string>(); const top5000 = new Set<string>();

View File

@ -75,7 +75,7 @@ export const parseGfwList = async () => {
})).text(); })).text();
const topDomains = parse(res); const topDomains = parse(res);
const trie = createTrie(blackSet); const trie = createTrie(blackSet, true);
for await (const [domain] of topDomains) { for await (const [domain] of topDomains) {
if (trie.has(domain)) { if (trie.has(domain)) {