From a004ffb9606c8d986433152021b675b3da6eaf45 Mon Sep 17 00:00:00 2001 From: SukkaW Date: Wed, 18 Sep 2024 13:57:00 +0800 Subject: [PATCH] Fix: avoid non-doaminlist into trie --- Build/build-cdn-download-conf.ts | 12 +++++++++++- Build/build-reject-domainset.ts | 5 ++--- Build/build-speedtest-domainset.ts | 3 +-- Build/lib/process-line.ts | 1 + 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/Build/build-cdn-download-conf.ts b/Build/build-cdn-download-conf.ts index 377e8488..47f11a80 100644 --- a/Build/build-cdn-download-conf.ts +++ b/Build/build-cdn-download-conf.ts @@ -10,10 +10,20 @@ import { appendArrayInPlace } from './lib/append-array-in-place'; import { sortDomains } from './lib/stable-sort-domain'; import { output } from './lib/misc'; import { SOURCE_DIR } from './constants/dir'; +import { processLine } from './lib/process-line'; const getS3OSSDomainsPromise = (async (): Promise => { const trie = createTrie( - await getPublicSuffixListTextPromise(), + (await getPublicSuffixListTextPromise()).reduce( + (acc, cur) => { + const tmp = processLine(cur); + if (tmp) { + acc.push(tmp); + } + return acc; + }, + [] + ), false ); diff --git a/Build/build-reject-domainset.ts b/Build/build-reject-domainset.ts index 8a158e5d..e1a8aea5 100644 --- a/Build/build-reject-domainset.ts +++ b/Build/build-reject-domainset.ts @@ -7,7 +7,6 @@ import { createTrie } from './lib/trie'; import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, DOMAIN_LISTS, HOSTS_EXTRA, DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_EXTRA, PHISHING_DOMAIN_LISTS_EXTRA } from './constants/reject-data-source'; import { createRuleset, compareAndWriteFile } from './lib/create-file'; -import { domainsetDeduper } from './lib/domain-deduper'; import createKeywordFilter from './lib/aho-corasick'; import { readFileByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line'; import { buildParseDomainMap, sortDomains } from './lib/stable-sort-domain'; @@ -148,8 +147,8 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as }); // Dedupe domainSets - const dedupedDominArray = span.traceChildSync('dedupe from covered subdomain (base)', () => domainsetDeduper(baseTrie)); - const dudupedDominArrayExtra = span.traceChildSync('dedupe from covered subdomain (extra)', () => domainsetDeduper(extraTrie)); + const dedupedDominArray = span.traceChildSync('dedupe from covered subdomain (base)', () => baseTrie.dump()); + const dudupedDominArrayExtra = span.traceChildSync('dedupe from covered subdomain (extra)', () => extraTrie.dump()); console.log(`Final size ${dedupedDominArray.length} + ${dudupedDominArrayExtra.length}`); diff --git a/Build/build-speedtest-domainset.ts b/Build/build-speedtest-domainset.ts index ad72d407..67b6cb61 100644 --- a/Build/build-speedtest-domainset.ts +++ b/Build/build-speedtest-domainset.ts @@ -1,4 +1,3 @@ -import { domainsetDeduper } from './lib/domain-deduper'; import path from 'node:path'; import { createRuleset } from './lib/create-file'; import { sortDomains } from './lib/stable-sort-domain'; @@ -235,7 +234,7 @@ export const buildSpeedtestDomainSet = task(require.main === module, __filename) } })))); - const deduped = span.traceChildSync('sort result', () => sortDomains(domainsetDeduper(domainTrie))); + const deduped = span.traceChildSync('sort result', () => sortDomains(domainTrie.dump())); const description = [ ...SHARED_DESCRIPTION, diff --git a/Build/lib/process-line.ts b/Build/lib/process-line.ts index ab41b08f..4785a66a 100644 --- a/Build/lib/process-line.ts +++ b/Build/lib/process-line.ts @@ -16,6 +16,7 @@ export const processLine = (line: string): string | null => { || line_0 === '\r' || line_0 === '\n' || line_0 === '!' + || (line_0 === '/' && trimmed[1] === '/') ) { return null; }