Chore: print gfwlist validation count

This commit is contained in:
SukkaW 2025-11-19 23:20:50 +08:00
parent c12add3a8b
commit b388936bf3
3 changed files with 19 additions and 8 deletions

View File

@ -23,10 +23,11 @@ type TrieNode<Meta = any> = [
function deepTrieNodeToJSON<Meta = unknown>(node: TrieNode, function deepTrieNodeToJSON<Meta = unknown>(node: TrieNode,
unpackMeta: ((meta?: Meta) => string) | undefined) { unpackMeta: ((meta?: Meta) => string) | undefined) {
const obj: Record<string, unknown> = {}; const obj: Record<string, unknown> = {
['[start]']: getBit(node[0], START),
['[subdomain]']: getBit(node[0], INCLUDE_ALL_SUBDOMAIN)
};
obj['[start]'] = getBit(node[0], START);
obj['[subdomain]'] = getBit(node[0], INCLUDE_ALL_SUBDOMAIN);
if (node[4] != null) { if (node[4] != null) {
if (unpackMeta) { if (unpackMeta) {
obj['[meta]'] = unpackMeta(node[4]); obj['[meta]'] = unpackMeta(node[4]);
@ -278,12 +279,10 @@ abstract class Triebase<Meta = unknown> {
) { ) {
const dfsImpl = withSort ? Triebase.dfsWithSort : Triebase.dfs; const dfsImpl = withSort ? Triebase.dfsWithSort : Triebase.dfs;
const nodeStack: Array<TrieNode<Meta>> = []; const nodeStack: Array<TrieNode<Meta>> = [initialNode];
nodeStack.push(initialNode);
// Resolving initial string (begin the start of the stack) // Resolving initial string (begin the start of the stack)
const suffixStack: string[][] = []; const suffixStack: string[][] = [initialSuffix];
suffixStack.push(initialSuffix);
let node: TrieNode<Meta> = initialNode; let node: TrieNode<Meta> = initialNode;
let r; let r;

View File

@ -57,6 +57,8 @@ export async function parseGfwList() {
const whiteSet = new Set<string>(); const whiteSet = new Set<string>();
const gfwListTrie = new HostnameSmolTrie(); const gfwListTrie = new HostnameSmolTrie();
let totalGfwSize = 0;
const gfwlistIgnoreLineKwfilter = createKeywordFilter([ const gfwlistIgnoreLineKwfilter = createKeywordFilter([
'.*', '.*',
'*', '*',
@ -100,14 +102,17 @@ export async function parseGfwList() {
} }
const d = fastNormalizeDomain(line); const d = fastNormalizeDomain(line);
if (d) { if (d) {
totalGfwSize++;
gfwListTrie.add(d); gfwListTrie.add(d);
continue; continue;
} }
} }
for await (const l of await fetchRemoteTextByLine('https://raw.githubusercontent.com/Loyalsoldier/cn-blocked-domain/release/domains.txt', true)) { for await (const l of await fetchRemoteTextByLine('https://raw.githubusercontent.com/Loyalsoldier/cn-blocked-domain/release/domains.txt', true)) {
totalGfwSize++;
gfwListTrie.add(l); gfwListTrie.add(l);
} }
for await (const l of await fetchRemoteTextByLine('https://raw.githubusercontent.com/Loyalsoldier/v2ray-rules-dat/release/gfw.txt', true)) { for await (const l of await fetchRemoteTextByLine('https://raw.githubusercontent.com/Loyalsoldier/v2ray-rules-dat/release/gfw.txt', true)) {
totalGfwSize++;
gfwListTrie.add(l); gfwListTrie.add(l);
} }
@ -147,6 +152,9 @@ export async function parseGfwList() {
whiteSet.forEach(domain => gfwListTrie.whitelist(domain, true)); whiteSet.forEach(domain => gfwListTrie.whitelist(domain, true));
let gfwListSize = 0;
gfwListTrie.dump(() => gfwListSize++);
const kwfilter = createKeywordFilter([...keywordSet]); const kwfilter = createKeywordFilter([...keywordSet]);
const missingTop10000Gfwed = new Set<string>(); const missingTop10000Gfwed = new Set<string>();
@ -158,7 +166,7 @@ export async function parseGfwList() {
}); });
console.log(Array.from(missingTop10000Gfwed).join('\n')); console.log(Array.from(missingTop10000Gfwed).join('\n'));
console.log('', missingTop10000Gfwed.size); console.log({ totalGfwSize, gfwListSize, missingSize: missingTop10000Gfwed.size });
return [ return [
whiteSet, whiteSet,
@ -170,3 +178,5 @@ export async function parseGfwList() {
if (require.main === module) { if (require.main === module) {
parseGfwList().catch(console.error); parseGfwList().catch(console.error);
} }
// python.com waiting-for-sell

View File

@ -577,6 +577,7 @@ DOMAIN-SUFFIX,atlassian.com
DOMAIN-SUFFIX,atlassian.net DOMAIN-SUFFIX,atlassian.net
DOMAIN-SUFFIX,autodesk.com DOMAIN-SUFFIX,autodesk.com
DOMAIN-SUFFIX,auth0.com DOMAIN-SUFFIX,auth0.com
DOMAIN-SUFFIX,av-wiki.net
DOMAIN-SUFFIX,av28.com DOMAIN-SUFFIX,av28.com
DOMAIN-SUFFIX,avg.com DOMAIN-SUFFIX,avg.com
DOMAIN-SUFFIX,avgle.com DOMAIN-SUFFIX,avgle.com
@ -1041,6 +1042,7 @@ DOMAIN-SUFFIX,packagephobia.com
DOMAIN-SUFFIX,pagesix.com DOMAIN-SUFFIX,pagesix.com
DOMAIN-SUFFIX,pastebin.com DOMAIN-SUFFIX,pastebin.com
DOMAIN-SUFFIX,patreon.com DOMAIN-SUFFIX,patreon.com
DOMAIN-SUFFIX,pawoo.net
DOMAIN-SUFFIX,peeringdb.com DOMAIN-SUFFIX,peeringdb.com
DOMAIN-SUFFIX,perplexity.ai DOMAIN-SUFFIX,perplexity.ai
DOMAIN-SUFFIX,pigav.com DOMAIN-SUFFIX,pigav.com