From eb0623c1a97ddcdecfc99f87a69f433e9e48faea Mon Sep 17 00:00:00 2001 From: SukkaW Date: Mon, 27 May 2024 01:56:08 +0800 Subject: [PATCH] Enable trie hostname mode & extend hostname mode test cases --- Build/build-cdn-download-conf.ts | 13 ++-- Build/build-microsoft-cdn.ts | 21 +++---- Build/lib/trie.test.ts | 103 +++++++++++++++++++++++++++---- Build/validate-domestic.ts | 2 +- Build/validate-gfwlist.ts | 2 +- 5 files changed, 110 insertions(+), 31 deletions(-) diff --git a/Build/build-cdn-download-conf.ts b/Build/build-cdn-download-conf.ts index 76f71129..ab42b02e 100644 --- a/Build/build-cdn-download-conf.ts +++ b/Build/build-cdn-download-conf.ts @@ -7,9 +7,14 @@ import { SHARED_DESCRIPTION } from './lib/constants'; import { getPublicSuffixListTextPromise } from './lib/download-publicsuffixlist'; import { domainDeduper } from './lib/domain-deduper'; import { appendArrayInPlace } from './lib/append-array-in-place'; +import { sortDomains } from './lib/stable-sort-domain'; const getS3OSSDomainsPromise = (async (): Promise> => { - const trie = createTrie((await getPublicSuffixListTextPromise()).split('\n')); + const trie = createTrie( + (await getPublicSuffixListTextPromise()).split('\n'), + true, + false + ); /** * Extract OSS domain from publicsuffix list @@ -69,7 +74,7 @@ export const buildCdnDownloadConf = task(import.meta.path, async (span) => { 'This file contains object storage and static assets CDN domains.' ], new Date(), - domainDeduper(cdnDomainsList), + sortDomains(domainDeduper(cdnDomainsList)), 'domainset', path.resolve(import.meta.dir, '../List/domainset/cdn.conf'), path.resolve(import.meta.dir, '../Clash/domainset/cdn.txt') @@ -83,10 +88,10 @@ export const buildCdnDownloadConf = task(import.meta.path, async (span) => { 'This file contains domains for software updating & large file hosting.' ], new Date(), - domainDeduper([ + sortDomains(domainDeduper([ ...downloadDomainSet, ...steamDomainSet - ]), + ])), 'domainset', path.resolve(import.meta.dir, '../List/domainset/download.conf'), path.resolve(import.meta.dir, '../Clash/domainset/download.txt') diff --git a/Build/build-microsoft-cdn.ts b/Build/build-microsoft-cdn.ts index 2ce1b0c1..78d15861 100644 --- a/Build/build-microsoft-cdn.ts +++ b/Build/build-microsoft-cdn.ts @@ -6,6 +6,7 @@ import { createTrie } from './lib/trie'; import { SHARED_DESCRIPTION } from './lib/constants'; import { createMemoizedPromise } from './lib/memo-promise'; import { extractDomainsFromFelixDnsmasq } from './lib/parse-dnsmasq'; +import { sortDomains } from './lib/stable-sort-domain'; const PROBE_DOMAINS = ['.microsoft.com', '.windows.net', '.windows.com', '.windowsupdate.com', '.windowssearch.com', '.office.net']; @@ -25,22 +26,22 @@ const BLACKLIST = [ export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => { // First trie is to find the microsoft domains that matches probe domains - const trie = createTrie(); + const trie = createTrie(null, true); for await (const line of await fetchRemoteTextByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) { const domain = extractDomainsFromFelixDnsmasq(line); if (domain) { trie.add(domain); } } - const set = new Set(PROBE_DOMAINS.flatMap(domain => trie.find(domain))); + const foundMicrosoftCdnDomains = PROBE_DOMAINS.flatMap(domain => trie.find(domain)); // Second trie is to remove blacklisted domains - const trie2 = createTrie(set); - BLACKLIST.forEach(black => { - trie2.substractSetInPlaceFromFound(black, set); - }); + const trie2 = createTrie(foundMicrosoftCdnDomains, true, true); + BLACKLIST.forEach(trie2.whitelist); - return Array.from(set).map(d => `DOMAIN-SUFFIX,${d}`).concat(WHITELIST); + return sortDomains(trie2.dump()) + .map(d => `DOMAIN-SUFFIX,${d}`) + .concat(WHITELIST); }); export const buildMicrosoftCdn = task(import.meta.path, async (span) => { @@ -53,11 +54,7 @@ export const buildMicrosoftCdn = task(import.meta.path, async (span) => { ' - https://github.com/felixonmars/dnsmasq-china-list' ]; - const promise = getMicrosoftCdnRulesetPromise(); - const peeked = Bun.peek(promise); - const res: string[] = peeked === promise - ? await span.traceChildPromise('get microsoft cdn domains', promise) - : (peeked as string[]); + const res: string[] = await span.traceChildPromise('get microsoft cdn domains', getMicrosoftCdnRulesetPromise()); return createRuleset( span, diff --git a/Build/lib/trie.test.ts b/Build/lib/trie.test.ts index cda62011..71c588b2 100644 --- a/Build/lib/trie.test.ts +++ b/Build/lib/trie.test.ts @@ -20,6 +20,23 @@ describe('Trie', () => { expect(trie.has('sukkaw')).toBeFalse(); }); + it('should be possible to add domains to a Trie (hostname).', () => { + const trie = createTrie(null, true); + + trie.add('a.skk.moe'); + trie.add('skk.moe'); + trie.add('anotherskk.moe'); + + expect(trie.size).toBe(3); + + expect(trie.has('a.skk.moe')).toBeTrue(); + expect(trie.has('skk.moe')).toBeTrue(); + expect(trie.has('anotherskk.moe')).toBeTrue(); + expect(trie.has('example.com')).toBeFalse(); + expect(trie.has('skk.mo')).toBeFalse(); + expect(trie.has('another.skk.moe')).toBeFalse(); + }); + it('adding the same item several times should not increase size.', () => { const trie = createTrie(); @@ -31,9 +48,24 @@ describe('Trie', () => { expect(trie.has('rat')).toBeTrue(); }); - it('should be possible to set the null sequence.', () => { - const trie = createTrie(); + it('adding the same item several times should not increase size (hostname).', () => { + const trie = createTrie(null, true); + trie.add('skk.moe'); + trie.add('blog.skk.moe'); + trie.add('skk.moe'); + + expect(trie.size).toBe(2); + expect(trie.has('skk.moe')).toBeTrue(); + }); + + it('should be possible to set the null sequence.', () => { + let trie = createTrie(); + + trie.add(''); + expect(trie.has('')).toBeTrue(); + + trie = createTrie(null, true); trie.add(''); expect(trie.has('')).toBeTrue(); }); @@ -61,6 +93,29 @@ describe('Trie', () => { expect(trie.size).toBe(0); }); + it('should be possible to delete items (hostname).', () => { + const trie = createTrie(null, true); + + trie.add('skk.moe'); + trie.add('example.com'); + trie.add('moe.sb'); + + expect(trie.delete('')).toBeFalse(); + expect(trie.delete('')).toBeFalse(); + expect(trie.delete('example.org')).toBeFalse(); + + expect(trie.delete('skk.moe')).toBeTrue(); + expect(trie.has('skk.moe')).toBeFalse(); + expect(trie.has('moe.sb')).toBeTrue(); + + expect(trie.size).toBe(2); + + expect(trie.delete('example.com')).toBeTrue(); + expect(trie.size).toBe(1); + expect(trie.delete('moe.sb')).toBeTrue(); + expect(trie.size).toBe(0); + }); + it('should be possible to check the existence of a sequence in the Trie.', () => { const trie = createTrie(); @@ -68,6 +123,18 @@ describe('Trie', () => { expect(trie.has('romanesque')).toBe(true); expect(trie.has('roman')).toBe(false); + expect(trie.has('esque')).toBe(false); + expect(trie.has('')).toBe(false); + }); + + it('should be possible to check the existence of a sequence in the Trie (hostname).', () => { + const trie = createTrie(null, true); + + trie.add('example.org.skk.moe'); + + expect(trie.has('example.org.skk.moe')).toBe(true); + expect(trie.has('skk.moe')).toBe(false); + expect(trie.has('example.org')).toBe(false); expect(trie.has('')).toBe(false); }); @@ -79,8 +146,6 @@ describe('Trie', () => { trie.add('sesqueroman'); trie.add('greek'); - console.log({ trie }); - expect(trie.find('roman')).toEqual(['roman', 'esqueroman', 'sesqueroman']); expect(trie.find('man')).toEqual(['roman', 'esqueroman', 'sesqueroman']); expect(trie.find('esqueroman')).toEqual(['esqueroman', 'sesqueroman']); @@ -89,13 +154,31 @@ describe('Trie', () => { expect(trie.find('')).toEqual(['greek', 'roman', 'esqueroman', 'sesqueroman']); }); - it('should be possible to create a trie from an arbitrary iterable.', () => { - const words = ['roman', 'esqueroman']; + it('should be possible to retrieve items matching the given prefix (hostname).', () => { + const trie = createTrie(null, true); - const trie = createTrie(words); + trie.add('example.com'); + trie.add('blog.example.com'); + trie.add('cdn.example.com'); + trie.add('example.org'); + + expect(trie.find('example.com')).toEqual(['example.com', 'cdn.example.com', 'blog.example.com']); + expect(trie.find('com')).toEqual(['example.com', 'cdn.example.com', 'blog.example.com']); + expect(trie.find('.example.com')).toEqual(['cdn.example.com', 'blog.example.com']); + expect(trie.find('org')).toEqual(['example.org']); + expect(trie.find('example.net')).toEqual([]); + expect(trie.find('')).toEqual(['example.org', 'example.com', 'cdn.example.com', 'blog.example.com']); + }); + + it('should be possible to create a trie from an arbitrary iterable.', () => { + let trie = createTrie(['roman', 'esqueroman']); expect(trie.size).toBe(2); expect(trie.has('roman')).toBe(true); + + trie = createTrie(new Set(['skk.moe', 'example.com']), true); + expect(trie.size).toBe(2); + expect(trie.has('skk.moe')).toBe(true); }); }); @@ -106,8 +189,6 @@ describe.each([ it('should not remove same entry', () => { const trie = createTrie(['.skk.moe', 'noc.one'], hostnameMode); - console.log(trie); - expect(trie.find('.skk.moe')).toStrictEqual(['.skk.moe']); expect(trie.find('noc.one')).toStrictEqual(['noc.one']); }); @@ -115,8 +196,6 @@ describe.each([ it('should match subdomain - 1', () => { const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode); - console.log(trie); - expect(trie.find('.skk.moe')).toStrictEqual(['image.cdn.skk.moe', 'blog.skk.moe']); expect(trie.find('.sukkaw.com')).toStrictEqual(['www.sukkaw.com']); }); @@ -124,8 +203,6 @@ describe.each([ it('should match subdomain - 2', () => { const trie = createTrie(['www.noc.one', 'www.sukkaw.com', '.skk.moe', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode); - console.log(trie); - expect(trie.find('.skk.moe')).toStrictEqual(['.skk.moe', 'image.cdn.skk.moe', 'blog.skk.moe']); expect(trie.find('.sukkaw.com')).toStrictEqual(['www.sukkaw.com']); }); diff --git a/Build/validate-domestic.ts b/Build/validate-domestic.ts index e516891a..cca03949 100644 --- a/Build/validate-domestic.ts +++ b/Build/validate-domestic.ts @@ -14,7 +14,7 @@ export const parseDomesticList = async () => { } } - const trie = createTrie(set); + const trie = createTrie(set, true); const top5000 = new Set(); diff --git a/Build/validate-gfwlist.ts b/Build/validate-gfwlist.ts index 1dfd6dac..431023e4 100644 --- a/Build/validate-gfwlist.ts +++ b/Build/validate-gfwlist.ts @@ -75,7 +75,7 @@ export const parseGfwList = async () => { })).text(); const topDomains = parse(res); - const trie = createTrie(blackSet); + const trie = createTrie(blackSet, true); for await (const [domain] of topDomains) { if (trie.has(domain)) {