Fix: avoid non-doaminlist into trie

This commit is contained in:
SukkaW 2024-09-18 13:57:00 +08:00
parent c5513ef363
commit a004ffb960
4 changed files with 15 additions and 6 deletions

View File

@ -10,10 +10,20 @@ import { appendArrayInPlace } from './lib/append-array-in-place';
import { sortDomains } from './lib/stable-sort-domain';
import { output } from './lib/misc';
import { SOURCE_DIR } from './constants/dir';
import { processLine } from './lib/process-line';
const getS3OSSDomainsPromise = (async (): Promise<string[]> => {
const trie = createTrie(
await getPublicSuffixListTextPromise(),
(await getPublicSuffixListTextPromise()).reduce<string[]>(
(acc, cur) => {
const tmp = processLine(cur);
if (tmp) {
acc.push(tmp);
}
return acc;
},
[]
),
false
);

View File

@ -7,7 +7,6 @@ import { createTrie } from './lib/trie';
import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, DOMAIN_LISTS, HOSTS_EXTRA, DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_EXTRA, PHISHING_DOMAIN_LISTS_EXTRA } from './constants/reject-data-source';
import { createRuleset, compareAndWriteFile } from './lib/create-file';
import { domainsetDeduper } from './lib/domain-deduper';
import createKeywordFilter from './lib/aho-corasick';
import { readFileByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
import { buildParseDomainMap, sortDomains } from './lib/stable-sort-domain';
@ -148,8 +147,8 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
});
// Dedupe domainSets
const dedupedDominArray = span.traceChildSync('dedupe from covered subdomain (base)', () => domainsetDeduper(baseTrie));
const dudupedDominArrayExtra = span.traceChildSync('dedupe from covered subdomain (extra)', () => domainsetDeduper(extraTrie));
const dedupedDominArray = span.traceChildSync('dedupe from covered subdomain (base)', () => baseTrie.dump());
const dudupedDominArrayExtra = span.traceChildSync('dedupe from covered subdomain (extra)', () => extraTrie.dump());
console.log(`Final size ${dedupedDominArray.length} + ${dudupedDominArrayExtra.length}`);

View File

@ -1,4 +1,3 @@
import { domainsetDeduper } from './lib/domain-deduper';
import path from 'node:path';
import { createRuleset } from './lib/create-file';
import { sortDomains } from './lib/stable-sort-domain';
@ -235,7 +234,7 @@ export const buildSpeedtestDomainSet = task(require.main === module, __filename)
}
}))));
const deduped = span.traceChildSync('sort result', () => sortDomains(domainsetDeduper(domainTrie)));
const deduped = span.traceChildSync('sort result', () => sortDomains(domainTrie.dump()));
const description = [
...SHARED_DESCRIPTION,

View File

@ -16,6 +16,7 @@ export const processLine = (line: string): string | null => {
|| line_0 === '\r'
|| line_0 === '\n'
|| line_0 === '!'
|| (line_0 === '/' && trimmed[1] === '/')
) {
return null;
}