mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-14 02:00:37 +08:00
Chore: update source deduping tool
This commit is contained in:
parent
4d9c2a5154
commit
bb6c7cb3fa
@ -365,4 +365,47 @@ describe('smol tree', () => {
|
|||||||
trie.whitelist('cdn.example.com');
|
trie.whitelist('cdn.example.com');
|
||||||
expect(trie.dump()).toStrictEqual(['blog.cdn.example.com']);
|
expect(trie.dump()).toStrictEqual(['blog.cdn.example.com']);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('contains - normal', () => {
|
||||||
|
const trie = createTrie([
|
||||||
|
'skk.moe',
|
||||||
|
'anotherskk.moe',
|
||||||
|
'blog.anotherskk.moe',
|
||||||
|
'blog.skk.moe'
|
||||||
|
], true);
|
||||||
|
|
||||||
|
expect(trie.contains('skk.moe')).toBe(true);
|
||||||
|
expect(trie.contains('blog.skk.moe')).toBe(true);
|
||||||
|
expect(trie.contains('anotherskk.moe')).toBe(true);
|
||||||
|
expect(trie.contains('blog.anotherskk.moe')).toBe(true);
|
||||||
|
|
||||||
|
expect(trie.contains('example.com')).toBe(false);
|
||||||
|
expect(trie.contains('blog.example.com')).toBe(false);
|
||||||
|
expect(trie.contains('skk.mo')).toBe(false);
|
||||||
|
expect(trie.contains('cdn.skk.moe')).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('contains - subdomain', () => {
|
||||||
|
const trie = createTrie([
|
||||||
|
'index.rubygems.org'
|
||||||
|
], true);
|
||||||
|
|
||||||
|
expect(trie.contains('rubygems.org')).toBe(false);
|
||||||
|
expect(trie.contains('index.rubygems.org')).toBe(true);
|
||||||
|
expect(trie.contains('sub.index.rubygems.org')).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('contains - include subdomains', () => {
|
||||||
|
const trie = createTrie([
|
||||||
|
'.skk.moe'
|
||||||
|
], true);
|
||||||
|
|
||||||
|
expect(trie.contains('skk.moe')).toBe(true);
|
||||||
|
expect(trie.contains('blog.skk.moe')).toBe(true);
|
||||||
|
expect(trie.contains('image.cdn.skk.moe')).toBe(true);
|
||||||
|
|
||||||
|
expect(trie.contains('example.com')).toBe(false);
|
||||||
|
expect(trie.contains('blog.example.com')).toBe(false);
|
||||||
|
expect(trie.contains('skk.mo')).toBe(false);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@ -186,10 +186,44 @@ abstract class Triebase<Meta = unknown> {
|
|||||||
public contains(suffix: string, includeAllSubdomain = suffix[0] === '.'): boolean {
|
public contains(suffix: string, includeAllSubdomain = suffix[0] === '.'): boolean {
|
||||||
const hostnameFromIndex = suffix[0] === '.' ? 1 : 0;
|
const hostnameFromIndex = suffix[0] === '.' ? 1 : 0;
|
||||||
|
|
||||||
const res = this.walkIntoLeafWithSuffix(suffix, hostnameFromIndex);
|
let node: TrieNode = this.$root;
|
||||||
if (!res) return false;
|
// let parent: TrieNode = node;
|
||||||
if (includeAllSubdomain) return getBit(res.node[0], INCLUDE_ALL_SUBDOMAIN);
|
|
||||||
return true;
|
let child: Map<string, TrieNode<Meta>> = node[2];
|
||||||
|
|
||||||
|
let result = false;
|
||||||
|
|
||||||
|
const onToken = (token: string) => {
|
||||||
|
// if (token === '') {
|
||||||
|
// return true;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// parent = node;
|
||||||
|
|
||||||
|
child = node[2];
|
||||||
|
|
||||||
|
if (child.has(token)) {
|
||||||
|
node = child.get(token)!;
|
||||||
|
} else {
|
||||||
|
if (getBit(node[0], INCLUDE_ALL_SUBDOMAIN)) {
|
||||||
|
result = true;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (walkHostnameTokens(suffix, onToken, hostnameFromIndex) === null) {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (includeAllSubdomain) return getBit(node[0], INCLUDE_ALL_SUBDOMAIN);
|
||||||
|
return getBit(node[0], START);
|
||||||
|
|
||||||
|
// if (res === null) return false;
|
||||||
|
// if (includeAllSubdomain) return getBit(res.node[0], INCLUDE_ALL_SUBDOMAIN);
|
||||||
|
// return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
private static bfsResults: [node: TrieNode | null, suffix: string[]] = [null, []];
|
private static bfsResults: [node: TrieNode | null, suffix: string[]] = [null, []];
|
||||||
|
|||||||
@ -4,7 +4,7 @@ import fsp from 'node:fs/promises';
|
|||||||
import { SOURCE_DIR } from './constants/dir';
|
import { SOURCE_DIR } from './constants/dir';
|
||||||
import { readFileByLine } from './lib/fetch-text-by-line';
|
import { readFileByLine } from './lib/fetch-text-by-line';
|
||||||
import { processLine } from './lib/process-line';
|
import { processLine } from './lib/process-line';
|
||||||
import { HostnameSmolTrie, HostnameTrie } from './lib/trie';
|
import { HostnameSmolTrie } from './lib/trie';
|
||||||
import { task } from './trace';
|
import { task } from './trace';
|
||||||
|
|
||||||
const ENFORCED_WHITELIST = [
|
const ENFORCED_WHITELIST = [
|
||||||
@ -21,7 +21,8 @@ const ENFORCED_WHITELIST = [
|
|||||||
'samsungqbe.com',
|
'samsungqbe.com',
|
||||||
'ntp.api.bz',
|
'ntp.api.bz',
|
||||||
'cdn.tuk.dev',
|
'cdn.tuk.dev',
|
||||||
'vocadb-analytics.fly.dev'
|
'vocadb-analytics.fly.dev',
|
||||||
|
'img.vim-cn.com'
|
||||||
];
|
];
|
||||||
|
|
||||||
const WHITELIST: string[] = ['httpdns.bilivideo.com', 'ntp.api.bz', 'httpdns-v6.gslb.yy.com', 'img.vim-cn.com', 'img.jjbb.me', 'thingproxy.freeboard.io', 'assets.chess24.com', 'cdn.chess24.com', 'static-assets.freeanimehentai.net', 'static.javcdn.info', 'cdn.vidible.tv', 'it.apache.contactlab.it', 'mirror.netinch.com', 'de.freedif.org', 'league1.maoyuncloud.cn', 'spl.ztvx8.com', 'zls.xz6d.com', 'iadmatapk.nosdn.127.net', 'show.buzzcity.net', 'click.buzzcity.net', 'apps.buzzcity.net', 'content-cdn.y2mate.com', 'images.voguehk.com', 'cdn.amh.moe', 'statics.mnnews.tw'];
|
const WHITELIST: string[] = ['httpdns.bilivideo.com', 'ntp.api.bz', 'httpdns-v6.gslb.yy.com', 'img.vim-cn.com', 'img.jjbb.me', 'thingproxy.freeboard.io', 'assets.chess24.com', 'cdn.chess24.com', 'static-assets.freeanimehentai.net', 'static.javcdn.info', 'cdn.vidible.tv', 'it.apache.contactlab.it', 'mirror.netinch.com', 'de.freedif.org', 'league1.maoyuncloud.cn', 'spl.ztvx8.com', 'zls.xz6d.com', 'iadmatapk.nosdn.127.net', 'show.buzzcity.net', 'click.buzzcity.net', 'apps.buzzcity.net', 'content-cdn.y2mate.com', 'images.voguehk.com', 'cdn.amh.moe', 'statics.mnnews.tw'];
|
||||||
@ -51,10 +52,13 @@ task(require.main === module, __filename)(async (span) => {
|
|||||||
async function dedupeFile(file: string, whitelist: HostnameSmolTrie) {
|
async function dedupeFile(file: string, whitelist: HostnameSmolTrie) {
|
||||||
const result: string[] = [];
|
const result: string[] = [];
|
||||||
|
|
||||||
const trie = new HostnameTrie();
|
const trie = new HostnameSmolTrie();
|
||||||
|
|
||||||
let line: string | null = '';
|
let line: string | null = '';
|
||||||
|
|
||||||
|
// eslint-disable-next-line @typescript-eslint/unbound-method -- .call
|
||||||
|
let trieHasOrContains = HostnameSmolTrie.prototype.has;
|
||||||
|
|
||||||
for await (const l of readFileByLine(file)) {
|
for await (const l of readFileByLine(file)) {
|
||||||
line = processLine(l);
|
line = processLine(l);
|
||||||
|
|
||||||
@ -62,12 +66,16 @@ async function dedupeFile(file: string, whitelist: HostnameSmolTrie) {
|
|||||||
if (l.startsWith('# $ skip_dedupe_src')) {
|
if (l.startsWith('# $ skip_dedupe_src')) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (l.startsWith('# $ dedupe_use_trie_contains')) {
|
||||||
|
// eslint-disable-next-line @typescript-eslint/unbound-method -- .call
|
||||||
|
trieHasOrContains = HostnameSmolTrie.prototype.contains;
|
||||||
|
}
|
||||||
|
|
||||||
result.push(l); // keep all comments and blank lines
|
result.push(l); // keep all comments and blank lines
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (trie.has(line)) {
|
if (trieHasOrContains.call(trie, line)) {
|
||||||
continue; // drop duplicate
|
continue; // drop duplicate
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user