mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Chore: update source deduping tool
This commit is contained in:
parent
4d9c2a5154
commit
bb6c7cb3fa
@ -365,4 +365,47 @@ describe('smol tree', () => {
|
||||
trie.whitelist('cdn.example.com');
|
||||
expect(trie.dump()).toStrictEqual(['blog.cdn.example.com']);
|
||||
});
|
||||
|
||||
it('contains - normal', () => {
|
||||
const trie = createTrie([
|
||||
'skk.moe',
|
||||
'anotherskk.moe',
|
||||
'blog.anotherskk.moe',
|
||||
'blog.skk.moe'
|
||||
], true);
|
||||
|
||||
expect(trie.contains('skk.moe')).toBe(true);
|
||||
expect(trie.contains('blog.skk.moe')).toBe(true);
|
||||
expect(trie.contains('anotherskk.moe')).toBe(true);
|
||||
expect(trie.contains('blog.anotherskk.moe')).toBe(true);
|
||||
|
||||
expect(trie.contains('example.com')).toBe(false);
|
||||
expect(trie.contains('blog.example.com')).toBe(false);
|
||||
expect(trie.contains('skk.mo')).toBe(false);
|
||||
expect(trie.contains('cdn.skk.moe')).toBe(false);
|
||||
});
|
||||
|
||||
it('contains - subdomain', () => {
|
||||
const trie = createTrie([
|
||||
'index.rubygems.org'
|
||||
], true);
|
||||
|
||||
expect(trie.contains('rubygems.org')).toBe(false);
|
||||
expect(trie.contains('index.rubygems.org')).toBe(true);
|
||||
expect(trie.contains('sub.index.rubygems.org')).toBe(false);
|
||||
});
|
||||
|
||||
it('contains - include subdomains', () => {
|
||||
const trie = createTrie([
|
||||
'.skk.moe'
|
||||
], true);
|
||||
|
||||
expect(trie.contains('skk.moe')).toBe(true);
|
||||
expect(trie.contains('blog.skk.moe')).toBe(true);
|
||||
expect(trie.contains('image.cdn.skk.moe')).toBe(true);
|
||||
|
||||
expect(trie.contains('example.com')).toBe(false);
|
||||
expect(trie.contains('blog.example.com')).toBe(false);
|
||||
expect(trie.contains('skk.mo')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
@ -186,10 +186,44 @@ abstract class Triebase<Meta = unknown> {
|
||||
public contains(suffix: string, includeAllSubdomain = suffix[0] === '.'): boolean {
|
||||
const hostnameFromIndex = suffix[0] === '.' ? 1 : 0;
|
||||
|
||||
const res = this.walkIntoLeafWithSuffix(suffix, hostnameFromIndex);
|
||||
if (!res) return false;
|
||||
if (includeAllSubdomain) return getBit(res.node[0], INCLUDE_ALL_SUBDOMAIN);
|
||||
return true;
|
||||
let node: TrieNode = this.$root;
|
||||
// let parent: TrieNode = node;
|
||||
|
||||
let child: Map<string, TrieNode<Meta>> = node[2];
|
||||
|
||||
let result = false;
|
||||
|
||||
const onToken = (token: string) => {
|
||||
// if (token === '') {
|
||||
// return true;
|
||||
// }
|
||||
|
||||
// parent = node;
|
||||
|
||||
child = node[2];
|
||||
|
||||
if (child.has(token)) {
|
||||
node = child.get(token)!;
|
||||
} else {
|
||||
if (getBit(node[0], INCLUDE_ALL_SUBDOMAIN)) {
|
||||
result = true;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
if (walkHostnameTokens(suffix, onToken, hostnameFromIndex) === null) {
|
||||
return result;
|
||||
}
|
||||
|
||||
if (includeAllSubdomain) return getBit(node[0], INCLUDE_ALL_SUBDOMAIN);
|
||||
return getBit(node[0], START);
|
||||
|
||||
// if (res === null) return false;
|
||||
// if (includeAllSubdomain) return getBit(res.node[0], INCLUDE_ALL_SUBDOMAIN);
|
||||
// return true;
|
||||
};
|
||||
|
||||
private static bfsResults: [node: TrieNode | null, suffix: string[]] = [null, []];
|
||||
|
||||
@ -4,7 +4,7 @@ import fsp from 'node:fs/promises';
|
||||
import { SOURCE_DIR } from './constants/dir';
|
||||
import { readFileByLine } from './lib/fetch-text-by-line';
|
||||
import { processLine } from './lib/process-line';
|
||||
import { HostnameSmolTrie, HostnameTrie } from './lib/trie';
|
||||
import { HostnameSmolTrie } from './lib/trie';
|
||||
import { task } from './trace';
|
||||
|
||||
const ENFORCED_WHITELIST = [
|
||||
@ -21,7 +21,8 @@ const ENFORCED_WHITELIST = [
|
||||
'samsungqbe.com',
|
||||
'ntp.api.bz',
|
||||
'cdn.tuk.dev',
|
||||
'vocadb-analytics.fly.dev'
|
||||
'vocadb-analytics.fly.dev',
|
||||
'img.vim-cn.com'
|
||||
];
|
||||
|
||||
const WHITELIST: string[] = ['httpdns.bilivideo.com', 'ntp.api.bz', 'httpdns-v6.gslb.yy.com', 'img.vim-cn.com', 'img.jjbb.me', 'thingproxy.freeboard.io', 'assets.chess24.com', 'cdn.chess24.com', 'static-assets.freeanimehentai.net', 'static.javcdn.info', 'cdn.vidible.tv', 'it.apache.contactlab.it', 'mirror.netinch.com', 'de.freedif.org', 'league1.maoyuncloud.cn', 'spl.ztvx8.com', 'zls.xz6d.com', 'iadmatapk.nosdn.127.net', 'show.buzzcity.net', 'click.buzzcity.net', 'apps.buzzcity.net', 'content-cdn.y2mate.com', 'images.voguehk.com', 'cdn.amh.moe', 'statics.mnnews.tw'];
|
||||
@ -51,10 +52,13 @@ task(require.main === module, __filename)(async (span) => {
|
||||
async function dedupeFile(file: string, whitelist: HostnameSmolTrie) {
|
||||
const result: string[] = [];
|
||||
|
||||
const trie = new HostnameTrie();
|
||||
const trie = new HostnameSmolTrie();
|
||||
|
||||
let line: string | null = '';
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/unbound-method -- .call
|
||||
let trieHasOrContains = HostnameSmolTrie.prototype.has;
|
||||
|
||||
for await (const l of readFileByLine(file)) {
|
||||
line = processLine(l);
|
||||
|
||||
@ -62,12 +66,16 @@ async function dedupeFile(file: string, whitelist: HostnameSmolTrie) {
|
||||
if (l.startsWith('# $ skip_dedupe_src')) {
|
||||
return;
|
||||
}
|
||||
if (l.startsWith('# $ dedupe_use_trie_contains')) {
|
||||
// eslint-disable-next-line @typescript-eslint/unbound-method -- .call
|
||||
trieHasOrContains = HostnameSmolTrie.prototype.contains;
|
||||
}
|
||||
|
||||
result.push(l); // keep all comments and blank lines
|
||||
continue;
|
||||
}
|
||||
|
||||
if (trie.has(line)) {
|
||||
if (trieHasOrContains.call(trie, line)) {
|
||||
continue; // drop duplicate
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user