Chore: print gfwlist validation count

This commit is contained in:
SukkaW 2025-11-19 23:20:50 +08:00
parent c12add3a8b
commit b388936bf3
3 changed files with 19 additions and 8 deletions

View File

@ -23,10 +23,11 @@ type TrieNode<Meta = any> = [
function deepTrieNodeToJSON<Meta = unknown>(node: TrieNode,
unpackMeta: ((meta?: Meta) => string) | undefined) {
const obj: Record<string, unknown> = {};
const obj: Record<string, unknown> = {
['[start]']: getBit(node[0], START),
['[subdomain]']: getBit(node[0], INCLUDE_ALL_SUBDOMAIN)
};
obj['[start]'] = getBit(node[0], START);
obj['[subdomain]'] = getBit(node[0], INCLUDE_ALL_SUBDOMAIN);
if (node[4] != null) {
if (unpackMeta) {
obj['[meta]'] = unpackMeta(node[4]);
@ -278,12 +279,10 @@ abstract class Triebase<Meta = unknown> {
) {
const dfsImpl = withSort ? Triebase.dfsWithSort : Triebase.dfs;
const nodeStack: Array<TrieNode<Meta>> = [];
nodeStack.push(initialNode);
const nodeStack: Array<TrieNode<Meta>> = [initialNode];
// Resolving initial string (begin the start of the stack)
const suffixStack: string[][] = [];
suffixStack.push(initialSuffix);
const suffixStack: string[][] = [initialSuffix];
let node: TrieNode<Meta> = initialNode;
let r;

View File

@ -57,6 +57,8 @@ export async function parseGfwList() {
const whiteSet = new Set<string>();
const gfwListTrie = new HostnameSmolTrie();
let totalGfwSize = 0;
const gfwlistIgnoreLineKwfilter = createKeywordFilter([
'.*',
'*',
@ -100,14 +102,17 @@ export async function parseGfwList() {
}
const d = fastNormalizeDomain(line);
if (d) {
totalGfwSize++;
gfwListTrie.add(d);
continue;
}
}
for await (const l of await fetchRemoteTextByLine('https://raw.githubusercontent.com/Loyalsoldier/cn-blocked-domain/release/domains.txt', true)) {
totalGfwSize++;
gfwListTrie.add(l);
}
for await (const l of await fetchRemoteTextByLine('https://raw.githubusercontent.com/Loyalsoldier/v2ray-rules-dat/release/gfw.txt', true)) {
totalGfwSize++;
gfwListTrie.add(l);
}
@ -147,6 +152,9 @@ export async function parseGfwList() {
whiteSet.forEach(domain => gfwListTrie.whitelist(domain, true));
let gfwListSize = 0;
gfwListTrie.dump(() => gfwListSize++);
const kwfilter = createKeywordFilter([...keywordSet]);
const missingTop10000Gfwed = new Set<string>();
@ -158,7 +166,7 @@ export async function parseGfwList() {
});
console.log(Array.from(missingTop10000Gfwed).join('\n'));
console.log('', missingTop10000Gfwed.size);
console.log({ totalGfwSize, gfwListSize, missingSize: missingTop10000Gfwed.size });
return [
whiteSet,
@ -170,3 +178,5 @@ export async function parseGfwList() {
if (require.main === module) {
parseGfwList().catch(console.error);
}
// python.com waiting-for-sell

View File

@ -577,6 +577,7 @@ DOMAIN-SUFFIX,atlassian.com
DOMAIN-SUFFIX,atlassian.net
DOMAIN-SUFFIX,autodesk.com
DOMAIN-SUFFIX,auth0.com
DOMAIN-SUFFIX,av-wiki.net
DOMAIN-SUFFIX,av28.com
DOMAIN-SUFFIX,avg.com
DOMAIN-SUFFIX,avgle.com
@ -1041,6 +1042,7 @@ DOMAIN-SUFFIX,packagephobia.com
DOMAIN-SUFFIX,pagesix.com
DOMAIN-SUFFIX,pastebin.com
DOMAIN-SUFFIX,patreon.com
DOMAIN-SUFFIX,pawoo.net
DOMAIN-SUFFIX,peeringdb.com
DOMAIN-SUFFIX,perplexity.ai
DOMAIN-SUFFIX,pigav.com