mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 09:10:35 +08:00
Enable trie hostname mode & extend hostname mode test cases
This commit is contained in:
parent
d137bdb8a3
commit
eb0623c1a9
@ -7,9 +7,14 @@ import { SHARED_DESCRIPTION } from './lib/constants';
|
|||||||
import { getPublicSuffixListTextPromise } from './lib/download-publicsuffixlist';
|
import { getPublicSuffixListTextPromise } from './lib/download-publicsuffixlist';
|
||||||
import { domainDeduper } from './lib/domain-deduper';
|
import { domainDeduper } from './lib/domain-deduper';
|
||||||
import { appendArrayInPlace } from './lib/append-array-in-place';
|
import { appendArrayInPlace } from './lib/append-array-in-place';
|
||||||
|
import { sortDomains } from './lib/stable-sort-domain';
|
||||||
|
|
||||||
const getS3OSSDomainsPromise = (async (): Promise<Set<string>> => {
|
const getS3OSSDomainsPromise = (async (): Promise<Set<string>> => {
|
||||||
const trie = createTrie((await getPublicSuffixListTextPromise()).split('\n'));
|
const trie = createTrie(
|
||||||
|
(await getPublicSuffixListTextPromise()).split('\n'),
|
||||||
|
true,
|
||||||
|
false
|
||||||
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract OSS domain from publicsuffix list
|
* Extract OSS domain from publicsuffix list
|
||||||
@ -69,7 +74,7 @@ export const buildCdnDownloadConf = task(import.meta.path, async (span) => {
|
|||||||
'This file contains object storage and static assets CDN domains.'
|
'This file contains object storage and static assets CDN domains.'
|
||||||
],
|
],
|
||||||
new Date(),
|
new Date(),
|
||||||
domainDeduper(cdnDomainsList),
|
sortDomains(domainDeduper(cdnDomainsList)),
|
||||||
'domainset',
|
'domainset',
|
||||||
path.resolve(import.meta.dir, '../List/domainset/cdn.conf'),
|
path.resolve(import.meta.dir, '../List/domainset/cdn.conf'),
|
||||||
path.resolve(import.meta.dir, '../Clash/domainset/cdn.txt')
|
path.resolve(import.meta.dir, '../Clash/domainset/cdn.txt')
|
||||||
@ -83,10 +88,10 @@ export const buildCdnDownloadConf = task(import.meta.path, async (span) => {
|
|||||||
'This file contains domains for software updating & large file hosting.'
|
'This file contains domains for software updating & large file hosting.'
|
||||||
],
|
],
|
||||||
new Date(),
|
new Date(),
|
||||||
domainDeduper([
|
sortDomains(domainDeduper([
|
||||||
...downloadDomainSet,
|
...downloadDomainSet,
|
||||||
...steamDomainSet
|
...steamDomainSet
|
||||||
]),
|
])),
|
||||||
'domainset',
|
'domainset',
|
||||||
path.resolve(import.meta.dir, '../List/domainset/download.conf'),
|
path.resolve(import.meta.dir, '../List/domainset/download.conf'),
|
||||||
path.resolve(import.meta.dir, '../Clash/domainset/download.txt')
|
path.resolve(import.meta.dir, '../Clash/domainset/download.txt')
|
||||||
|
|||||||
@ -6,6 +6,7 @@ import { createTrie } from './lib/trie';
|
|||||||
import { SHARED_DESCRIPTION } from './lib/constants';
|
import { SHARED_DESCRIPTION } from './lib/constants';
|
||||||
import { createMemoizedPromise } from './lib/memo-promise';
|
import { createMemoizedPromise } from './lib/memo-promise';
|
||||||
import { extractDomainsFromFelixDnsmasq } from './lib/parse-dnsmasq';
|
import { extractDomainsFromFelixDnsmasq } from './lib/parse-dnsmasq';
|
||||||
|
import { sortDomains } from './lib/stable-sort-domain';
|
||||||
|
|
||||||
const PROBE_DOMAINS = ['.microsoft.com', '.windows.net', '.windows.com', '.windowsupdate.com', '.windowssearch.com', '.office.net'];
|
const PROBE_DOMAINS = ['.microsoft.com', '.windows.net', '.windows.com', '.windowsupdate.com', '.windowssearch.com', '.office.net'];
|
||||||
|
|
||||||
@ -25,22 +26,22 @@ const BLACKLIST = [
|
|||||||
|
|
||||||
export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => {
|
export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => {
|
||||||
// First trie is to find the microsoft domains that matches probe domains
|
// First trie is to find the microsoft domains that matches probe domains
|
||||||
const trie = createTrie();
|
const trie = createTrie(null, true);
|
||||||
for await (const line of await fetchRemoteTextByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) {
|
for await (const line of await fetchRemoteTextByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) {
|
||||||
const domain = extractDomainsFromFelixDnsmasq(line);
|
const domain = extractDomainsFromFelixDnsmasq(line);
|
||||||
if (domain) {
|
if (domain) {
|
||||||
trie.add(domain);
|
trie.add(domain);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const set = new Set(PROBE_DOMAINS.flatMap(domain => trie.find(domain)));
|
const foundMicrosoftCdnDomains = PROBE_DOMAINS.flatMap(domain => trie.find(domain));
|
||||||
|
|
||||||
// Second trie is to remove blacklisted domains
|
// Second trie is to remove blacklisted domains
|
||||||
const trie2 = createTrie(set);
|
const trie2 = createTrie(foundMicrosoftCdnDomains, true, true);
|
||||||
BLACKLIST.forEach(black => {
|
BLACKLIST.forEach(trie2.whitelist);
|
||||||
trie2.substractSetInPlaceFromFound(black, set);
|
|
||||||
});
|
|
||||||
|
|
||||||
return Array.from(set).map(d => `DOMAIN-SUFFIX,${d}`).concat(WHITELIST);
|
return sortDomains(trie2.dump())
|
||||||
|
.map(d => `DOMAIN-SUFFIX,${d}`)
|
||||||
|
.concat(WHITELIST);
|
||||||
});
|
});
|
||||||
|
|
||||||
export const buildMicrosoftCdn = task(import.meta.path, async (span) => {
|
export const buildMicrosoftCdn = task(import.meta.path, async (span) => {
|
||||||
@ -53,11 +54,7 @@ export const buildMicrosoftCdn = task(import.meta.path, async (span) => {
|
|||||||
' - https://github.com/felixonmars/dnsmasq-china-list'
|
' - https://github.com/felixonmars/dnsmasq-china-list'
|
||||||
];
|
];
|
||||||
|
|
||||||
const promise = getMicrosoftCdnRulesetPromise();
|
const res: string[] = await span.traceChildPromise('get microsoft cdn domains', getMicrosoftCdnRulesetPromise());
|
||||||
const peeked = Bun.peek(promise);
|
|
||||||
const res: string[] = peeked === promise
|
|
||||||
? await span.traceChildPromise('get microsoft cdn domains', promise)
|
|
||||||
: (peeked as string[]);
|
|
||||||
|
|
||||||
return createRuleset(
|
return createRuleset(
|
||||||
span,
|
span,
|
||||||
|
|||||||
@ -20,6 +20,23 @@ describe('Trie', () => {
|
|||||||
expect(trie.has('sukkaw')).toBeFalse();
|
expect(trie.has('sukkaw')).toBeFalse();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should be possible to add domains to a Trie (hostname).', () => {
|
||||||
|
const trie = createTrie(null, true);
|
||||||
|
|
||||||
|
trie.add('a.skk.moe');
|
||||||
|
trie.add('skk.moe');
|
||||||
|
trie.add('anotherskk.moe');
|
||||||
|
|
||||||
|
expect(trie.size).toBe(3);
|
||||||
|
|
||||||
|
expect(trie.has('a.skk.moe')).toBeTrue();
|
||||||
|
expect(trie.has('skk.moe')).toBeTrue();
|
||||||
|
expect(trie.has('anotherskk.moe')).toBeTrue();
|
||||||
|
expect(trie.has('example.com')).toBeFalse();
|
||||||
|
expect(trie.has('skk.mo')).toBeFalse();
|
||||||
|
expect(trie.has('another.skk.moe')).toBeFalse();
|
||||||
|
});
|
||||||
|
|
||||||
it('adding the same item several times should not increase size.', () => {
|
it('adding the same item several times should not increase size.', () => {
|
||||||
const trie = createTrie();
|
const trie = createTrie();
|
||||||
|
|
||||||
@ -31,9 +48,24 @@ describe('Trie', () => {
|
|||||||
expect(trie.has('rat')).toBeTrue();
|
expect(trie.has('rat')).toBeTrue();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should be possible to set the null sequence.', () => {
|
it('adding the same item several times should not increase size (hostname).', () => {
|
||||||
const trie = createTrie();
|
const trie = createTrie(null, true);
|
||||||
|
|
||||||
|
trie.add('skk.moe');
|
||||||
|
trie.add('blog.skk.moe');
|
||||||
|
trie.add('skk.moe');
|
||||||
|
|
||||||
|
expect(trie.size).toBe(2);
|
||||||
|
expect(trie.has('skk.moe')).toBeTrue();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should be possible to set the null sequence.', () => {
|
||||||
|
let trie = createTrie();
|
||||||
|
|
||||||
|
trie.add('');
|
||||||
|
expect(trie.has('')).toBeTrue();
|
||||||
|
|
||||||
|
trie = createTrie(null, true);
|
||||||
trie.add('');
|
trie.add('');
|
||||||
expect(trie.has('')).toBeTrue();
|
expect(trie.has('')).toBeTrue();
|
||||||
});
|
});
|
||||||
@ -61,6 +93,29 @@ describe('Trie', () => {
|
|||||||
expect(trie.size).toBe(0);
|
expect(trie.size).toBe(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should be possible to delete items (hostname).', () => {
|
||||||
|
const trie = createTrie(null, true);
|
||||||
|
|
||||||
|
trie.add('skk.moe');
|
||||||
|
trie.add('example.com');
|
||||||
|
trie.add('moe.sb');
|
||||||
|
|
||||||
|
expect(trie.delete('')).toBeFalse();
|
||||||
|
expect(trie.delete('')).toBeFalse();
|
||||||
|
expect(trie.delete('example.org')).toBeFalse();
|
||||||
|
|
||||||
|
expect(trie.delete('skk.moe')).toBeTrue();
|
||||||
|
expect(trie.has('skk.moe')).toBeFalse();
|
||||||
|
expect(trie.has('moe.sb')).toBeTrue();
|
||||||
|
|
||||||
|
expect(trie.size).toBe(2);
|
||||||
|
|
||||||
|
expect(trie.delete('example.com')).toBeTrue();
|
||||||
|
expect(trie.size).toBe(1);
|
||||||
|
expect(trie.delete('moe.sb')).toBeTrue();
|
||||||
|
expect(trie.size).toBe(0);
|
||||||
|
});
|
||||||
|
|
||||||
it('should be possible to check the existence of a sequence in the Trie.', () => {
|
it('should be possible to check the existence of a sequence in the Trie.', () => {
|
||||||
const trie = createTrie();
|
const trie = createTrie();
|
||||||
|
|
||||||
@ -68,6 +123,18 @@ describe('Trie', () => {
|
|||||||
|
|
||||||
expect(trie.has('romanesque')).toBe(true);
|
expect(trie.has('romanesque')).toBe(true);
|
||||||
expect(trie.has('roman')).toBe(false);
|
expect(trie.has('roman')).toBe(false);
|
||||||
|
expect(trie.has('esque')).toBe(false);
|
||||||
|
expect(trie.has('')).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should be possible to check the existence of a sequence in the Trie (hostname).', () => {
|
||||||
|
const trie = createTrie(null, true);
|
||||||
|
|
||||||
|
trie.add('example.org.skk.moe');
|
||||||
|
|
||||||
|
expect(trie.has('example.org.skk.moe')).toBe(true);
|
||||||
|
expect(trie.has('skk.moe')).toBe(false);
|
||||||
|
expect(trie.has('example.org')).toBe(false);
|
||||||
expect(trie.has('')).toBe(false);
|
expect(trie.has('')).toBe(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -79,8 +146,6 @@ describe('Trie', () => {
|
|||||||
trie.add('sesqueroman');
|
trie.add('sesqueroman');
|
||||||
trie.add('greek');
|
trie.add('greek');
|
||||||
|
|
||||||
console.log({ trie });
|
|
||||||
|
|
||||||
expect(trie.find('roman')).toEqual(['roman', 'esqueroman', 'sesqueroman']);
|
expect(trie.find('roman')).toEqual(['roman', 'esqueroman', 'sesqueroman']);
|
||||||
expect(trie.find('man')).toEqual(['roman', 'esqueroman', 'sesqueroman']);
|
expect(trie.find('man')).toEqual(['roman', 'esqueroman', 'sesqueroman']);
|
||||||
expect(trie.find('esqueroman')).toEqual(['esqueroman', 'sesqueroman']);
|
expect(trie.find('esqueroman')).toEqual(['esqueroman', 'sesqueroman']);
|
||||||
@ -89,13 +154,31 @@ describe('Trie', () => {
|
|||||||
expect(trie.find('')).toEqual(['greek', 'roman', 'esqueroman', 'sesqueroman']);
|
expect(trie.find('')).toEqual(['greek', 'roman', 'esqueroman', 'sesqueroman']);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should be possible to create a trie from an arbitrary iterable.', () => {
|
it('should be possible to retrieve items matching the given prefix (hostname).', () => {
|
||||||
const words = ['roman', 'esqueroman'];
|
const trie = createTrie(null, true);
|
||||||
|
|
||||||
const trie = createTrie(words);
|
trie.add('example.com');
|
||||||
|
trie.add('blog.example.com');
|
||||||
|
trie.add('cdn.example.com');
|
||||||
|
trie.add('example.org');
|
||||||
|
|
||||||
|
expect(trie.find('example.com')).toEqual(['example.com', 'cdn.example.com', 'blog.example.com']);
|
||||||
|
expect(trie.find('com')).toEqual(['example.com', 'cdn.example.com', 'blog.example.com']);
|
||||||
|
expect(trie.find('.example.com')).toEqual(['cdn.example.com', 'blog.example.com']);
|
||||||
|
expect(trie.find('org')).toEqual(['example.org']);
|
||||||
|
expect(trie.find('example.net')).toEqual([]);
|
||||||
|
expect(trie.find('')).toEqual(['example.org', 'example.com', 'cdn.example.com', 'blog.example.com']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should be possible to create a trie from an arbitrary iterable.', () => {
|
||||||
|
let trie = createTrie(['roman', 'esqueroman']);
|
||||||
|
|
||||||
expect(trie.size).toBe(2);
|
expect(trie.size).toBe(2);
|
||||||
expect(trie.has('roman')).toBe(true);
|
expect(trie.has('roman')).toBe(true);
|
||||||
|
|
||||||
|
trie = createTrie(new Set(['skk.moe', 'example.com']), true);
|
||||||
|
expect(trie.size).toBe(2);
|
||||||
|
expect(trie.has('skk.moe')).toBe(true);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -106,8 +189,6 @@ describe.each([
|
|||||||
it('should not remove same entry', () => {
|
it('should not remove same entry', () => {
|
||||||
const trie = createTrie(['.skk.moe', 'noc.one'], hostnameMode);
|
const trie = createTrie(['.skk.moe', 'noc.one'], hostnameMode);
|
||||||
|
|
||||||
console.log(trie);
|
|
||||||
|
|
||||||
expect(trie.find('.skk.moe')).toStrictEqual(['.skk.moe']);
|
expect(trie.find('.skk.moe')).toStrictEqual(['.skk.moe']);
|
||||||
expect(trie.find('noc.one')).toStrictEqual(['noc.one']);
|
expect(trie.find('noc.one')).toStrictEqual(['noc.one']);
|
||||||
});
|
});
|
||||||
@ -115,8 +196,6 @@ describe.each([
|
|||||||
it('should match subdomain - 1', () => {
|
it('should match subdomain - 1', () => {
|
||||||
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode);
|
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode);
|
||||||
|
|
||||||
console.log(trie);
|
|
||||||
|
|
||||||
expect(trie.find('.skk.moe')).toStrictEqual(['image.cdn.skk.moe', 'blog.skk.moe']);
|
expect(trie.find('.skk.moe')).toStrictEqual(['image.cdn.skk.moe', 'blog.skk.moe']);
|
||||||
expect(trie.find('.sukkaw.com')).toStrictEqual(['www.sukkaw.com']);
|
expect(trie.find('.sukkaw.com')).toStrictEqual(['www.sukkaw.com']);
|
||||||
});
|
});
|
||||||
@ -124,8 +203,6 @@ describe.each([
|
|||||||
it('should match subdomain - 2', () => {
|
it('should match subdomain - 2', () => {
|
||||||
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', '.skk.moe', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode);
|
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', '.skk.moe', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode);
|
||||||
|
|
||||||
console.log(trie);
|
|
||||||
|
|
||||||
expect(trie.find('.skk.moe')).toStrictEqual(['.skk.moe', 'image.cdn.skk.moe', 'blog.skk.moe']);
|
expect(trie.find('.skk.moe')).toStrictEqual(['.skk.moe', 'image.cdn.skk.moe', 'blog.skk.moe']);
|
||||||
expect(trie.find('.sukkaw.com')).toStrictEqual(['www.sukkaw.com']);
|
expect(trie.find('.sukkaw.com')).toStrictEqual(['www.sukkaw.com']);
|
||||||
});
|
});
|
||||||
|
|||||||
@ -14,7 +14,7 @@ export const parseDomesticList = async () => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const trie = createTrie(set);
|
const trie = createTrie(set, true);
|
||||||
|
|
||||||
const top5000 = new Set<string>();
|
const top5000 = new Set<string>();
|
||||||
|
|
||||||
|
|||||||
@ -75,7 +75,7 @@ export const parseGfwList = async () => {
|
|||||||
})).text();
|
})).text();
|
||||||
const topDomains = parse(res);
|
const topDomains = parse(res);
|
||||||
|
|
||||||
const trie = createTrie(blackSet);
|
const trie = createTrie(blackSet, true);
|
||||||
|
|
||||||
for await (const [domain] of topDomains) {
|
for await (const [domain] of topDomains) {
|
||||||
if (trie.has(domain)) {
|
if (trie.has(domain)) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user