diff --git a/Build/build-common.ts b/Build/build-common.ts index a8c202e2..91530951 100644 --- a/Build/build-common.ts +++ b/Build/build-common.ts @@ -109,7 +109,7 @@ async function transformDomainset(parentSpan: Span, sourcePath: string, relative ) ]; - return createRuleset( + return span.traceAsyncFn(() => createRuleset( span, title, description, @@ -118,7 +118,7 @@ async function transformDomainset(parentSpan: Span, sourcePath: string, relative 'domainset', path.resolve(outputSurgeDir, relativePath), path.resolve(outputClashDir, `${relativePath.slice(0, -path.extname(relativePath).length)}.txt`) - ); + )); } /** diff --git a/Build/build-reject-domainset.ts b/Build/build-reject-domainset.ts index b885a031..5c0a56d6 100644 --- a/Build/build-reject-domainset.ts +++ b/Build/build-reject-domainset.ts @@ -4,12 +4,12 @@ import path from 'path'; import { processHosts, processFilterRules, processDomainLists } from './lib/parse-filter'; import { createTrie } from './lib/trie'; -import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST, DOMAIN_LISTS } from './lib/reject-data-source'; +import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, DOMAIN_LISTS } from './lib/reject-data-source'; import { createRuleset, compareAndWriteFile } from './lib/create-file'; import { processLine } from './lib/process-line'; import { domainDeduper } from './lib/domain-deduper'; import createKeywordFilter from './lib/aho-corasick'; -import { readFileByLine } from './lib/fetch-text-by-line'; +import { readFileByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line'; import { sortDomains } from './lib/stable-sort-domain'; import { task } from './trace'; import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix'; @@ -63,25 +63,10 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => { setAddFromArray(domainSets, purePhishingDomains); }), childSpan.traceChild('process reject_sukka.conf').traceAsyncFn(async () => { - for await (const l of readFileByLine(path.resolve(import.meta.dir, '../Source/domainset/reject_sukka.conf'))) { - const line = processLine(l); - if (!line) continue; - domainSets.add(line); - } + setAddFromArray(domainSets, await readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/domainset/reject_sukka.conf'))); }) ]); - // remove pre-defined enforced blacklist from whitelist - const trie0 = createTrie(filterRuleWhitelistDomainSets); - - for (let i = 0, len1 = PREDEFINED_ENFORCED_BACKLIST.length; i < len1; i++) { - const enforcedBlack = PREDEFINED_ENFORCED_BACKLIST[i]; - const found = trie0.find(enforcedBlack); - for (let j = 0, len2 = found.length; j < len2; j++) { - filterRuleWhitelistDomainSets.delete(found[j]); - } - } - return shouldStop; }); @@ -116,25 +101,22 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => { }); filterRuleWhitelistDomainSets.forEach(suffix => { trie1.find(suffix, true).forEach(f => domainSets.delete(f)); + + if (suffix[0] === '.') { + // handle case like removing `g.msn.com` due to white `.g.msn.com` (`@@||g.msn.com`) + domainSets.delete(suffix.slice(1)); + } else { + // If `g.msn.com` is whitelisted, then `.g.msn.com` should be removed from domain set + domainSets.delete(`.${suffix}`); + } }); // remove pre-defined enforced blacklist from whitelist const kwfilter = createKeywordFilter(domainKeywordsSet); - // handle case like removing `g.msn.com` due to white `.g.msn.com` (`@@||g.msn.com`) for (const domain of domainSets) { - if (domain[0] === '.') { - if (filterRuleWhitelistDomainSets.has(domain)) { - domainSets.delete(domain); - continue; - } - } else if (filterRuleWhitelistDomainSets.has(`.${domain}`)) { - domainSets.delete(domain); - continue; - } - // Remove keyword - if (kwfilter.search(domain)) { + if (kwfilter(domain)) { domainSets.delete(domain); } } diff --git a/Build/lib/aho-corasick.ts b/Build/lib/aho-corasick.ts index e713d97b..9b0fd136 100644 --- a/Build/lib/aho-corasick.ts +++ b/Build/lib/aho-corasick.ts @@ -77,7 +77,7 @@ const createKeywordFilter = (keys: string[] | Set) => { build(); - const search = (text: string) => { + return (text: string) => { let node: Node | undefined = root; for (let i = 0, textLen = text.length; i < textLen; i++) { @@ -96,10 +96,6 @@ const createKeywordFilter = (keys: string[] | Set) => { return false; }; - - return { - search - }; }; export default createKeywordFilter; diff --git a/Build/lib/get-phishing-domains.ts b/Build/lib/get-phishing-domains.ts index ea8d33ec..6ade79b5 100644 --- a/Build/lib/get-phishing-domains.ts +++ b/Build/lib/get-phishing-domains.ts @@ -99,17 +99,19 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g SetAdd(domainSet, domainSet2); } - span.traceChild('whitelisting phishing domains').traceSyncFn(() => { - const trieForRemovingWhiteListed = createTrie(domainSet); + span.traceChild('whitelisting phishing domains').traceSyncFn((parentSpan) => { + const trieForRemovingWhiteListed = parentSpan.traceChild('create trie for whitelisting').traceSyncFn(() => createTrie(domainSet)); - for (let i = 0, len = WHITELIST_DOMAIN.length; i < len; i++) { - const white = WHITELIST_DOMAIN[i]; - const found = trieForRemovingWhiteListed.find(`.${white}`, true); - for (let j = 0, len2 = found.length; j < len2; j++) { - domainSet.delete(found[j]); + return parentSpan.traceChild('delete whitelisted from domainset').traceSyncFn(() => { + for (let i = 0, len = WHITELIST_DOMAIN.length; i < len; i++) { + const white = WHITELIST_DOMAIN[i]; + const found = trieForRemovingWhiteListed.find(`.${white}`, true); + for (let j = 0, len2 = found.length; j < len2; j++) { + domainSet.delete(found[j]); + } + domainSet.delete(white); } - domainSet.delete(white); - } + }); }); const domainCountMap: Record = {}; @@ -177,11 +179,15 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g } }); - const results = span.traceChild('get final phishing results').traceSyncFn( - () => Object.entries(domainCountMap) - .filter(entries => entries[1] >= 5) - .map(entries => entries[0]) - ); + const results = span.traceChild('get final phishing results').traceSyncFn(() => { + const results: string[] = []; + for (const domain in domainCountMap) { + if (domainCountMap[domain] > 5) { + results.push(domain); + } + } + return results; + }); return [results, domainSet] as const; }); diff --git a/Build/lib/reject-data-source.ts b/Build/lib/reject-data-source.ts index 363340b0..e6ada558 100644 --- a/Build/lib/reject-data-source.ts +++ b/Build/lib/reject-data-source.ts @@ -211,10 +211,6 @@ export const PREDEFINED_WHITELIST = [ 'pstmrk.it' ]; -export const PREDEFINED_ENFORCED_BACKLIST = [ - 'telemetry.mozilla.org' -]; - export const PREDEFINED_ENFORCED_WHITELIST = [ 'godaddysites.com', 'web.app', diff --git a/Source/non_ip/reject.conf b/Source/non_ip/reject.conf index 1bdb67c2..06adfc30 100644 --- a/Source/non_ip/reject.conf +++ b/Source/non_ip/reject.conf @@ -43,6 +43,10 @@ DOMAIN-SUFFIX,pantheonsite.io DOMAIN-SUFFIX,sitebeat.crazydomains.com # >> Snowplow Analytics (publicsuffix) DOMAIN-SUFFIX,try-snowplow.com +# >> Mozilla Telemetry (Enforcing) +DOMAIN-SUFFIX,telemetry-coverage.mozilla.org +DOMAIN-SUFFIX,telemetry.mozilla.org +DOMAIN-SUFFIX,incoming-telemetry.thunderbird.net # >> Phishing DOMAIN-SUFFIX,gofenews.com