diff --git a/Build/build-reject-domainset.ts b/Build/build-reject-domainset.ts index 165602f3..4e85ea44 100644 --- a/Build/build-reject-domainset.ts +++ b/Build/build-reject-domainset.ts @@ -15,7 +15,7 @@ import * as tldts from 'tldts'; import { SHARED_DESCRIPTION } from './lib/constants'; import { getPhishingDomains } from './lib/get-phishing-domains'; -import * as SetHelpers from 'mnemonist/set'; +import { add as SetAdd, subtract as SetSubstract } from 'mnemonist/set'; import { setAddFromArray } from './lib/set-add-from-array'; import { sort } from './lib/timsort'; @@ -23,7 +23,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => { /** Whitelists */ const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST); - const domainSets = new Set(); + let domainSets = new Set(); // Parse from AdGuard Filters const shouldStop = await span @@ -33,9 +33,9 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => { let shouldStop = false; await Promise.all([ // Parse from remote hosts & domain lists - ...HOSTS.map(entry => processHosts(childSpan, entry[0], entry[1], entry[2], entry[3]).then(hosts => SetHelpers.add(domainSets, hosts))), + ...HOSTS.map(entry => processHosts(childSpan, entry[0], entry[1], entry[2], entry[3]).then(hosts => SetAdd(domainSets, hosts))), - ...DOMAIN_LISTS.map(entry => processDomainLists(childSpan, entry[0], entry[1], entry[2]).then(hosts => SetHelpers.add(domainSets, hosts))), + ...DOMAIN_LISTS.map(entry => processDomainLists(childSpan, entry[0], entry[1], entry[2]).then(hosts => SetAdd(domainSets, hosts))), ...ADGUARD_FILTERS.map(input => ( typeof input === 'string' @@ -58,7 +58,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => { setAddFromArray(filterRuleWhitelistDomainSets, black); }))), getPhishingDomains(childSpan).then(([purePhishingDomains, fullPhishingDomainSet]) => { - SetHelpers.add(domainSets, fullPhishingDomainSet); + SetAdd(domainSets, fullPhishingDomainSet); setAddFromArray(domainSets, purePhishingDomains); }), childSpan.traceChildAsync('process reject_sukka.conf', async () => { @@ -94,22 +94,17 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => { }); // Remove as many domains as possible from domainSets before creating trie - SetHelpers.subtract(domainSets, filterRuleWhitelistDomainSets); + SetSubstract(domainSets, filterRuleWhitelistDomainSets); - childSpan.traceChildSync('dedupe from white suffixes', () => { - const trie = createTrie(domainSets); + domainSets = new Set(childSpan.traceChildSync('dedupe from white suffixes', () => { + const trie = createTrie(domainSets, true, true); filterRuleWhitelistDomainSets.forEach(suffix => { - trie.substractSetInPlaceFromFound(suffix, domainSets); - if (suffix[0] === '.') { - domainSets.delete(suffix.slice(1)); - domainSets.delete(suffix); - } else { - domainSets.delete(`.${suffix}`); - domainSets.delete(suffix); - } + trie.whitelist(suffix); }); - }); + + return trie.dump(); + })); childSpan.traceChildSync('dedupe from black keywords', () => { const kwfilter = createKeywordFilter(domainKeywordsSet); diff --git a/Build/lib/get-phishing-domains.ts b/Build/lib/get-phishing-domains.ts index 1770453a..a8b4de07 100644 --- a/Build/lib/get-phishing-domains.ts +++ b/Build/lib/get-phishing-domains.ts @@ -1,22 +1,11 @@ import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix'; import { processDomainLists } from './parse-filter'; import * as tldts from 'tldts'; -import { createTrie } from './trie'; import { TTL } from './cache-filesystem'; import { add as SetAdd } from 'mnemonist/set'; import type { Span } from '../trace'; -const WHITELIST_DOMAIN = [ - 'w3s.link', - 'dweb.link', - 'nftstorage.link', - 'square.site', - 'business.site', - 'page.link', // Firebase URL Shortener - 'fleek.cool', - 'notion.site' -]; const BLACK_TLD = new Set([ 'accountant', 'autos', @@ -114,20 +103,6 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g return domainSet; }); - span.traceChildSync('whitelisting phishing domains', (curSpan) => { - const trieForRemovingWhiteListed = curSpan.traceChildSync('create trie for whitelisting', () => createTrie(domainSet)); - - return curSpan.traceChild('delete whitelisted from domainset').traceSyncFn(() => { - for (let i = 0, len = WHITELIST_DOMAIN.length; i < len; i++) { - const white = WHITELIST_DOMAIN[i]; - domainSet.delete(white); - domainSet.delete(`.${white}`); - - trieForRemovingWhiteListed.substractSetInPlaceFromFound(`.${white}`, domainSet); - } - }); - }); - const domainCountMap: Record = {}; span.traceChildSync('process phishing domain set', () => { diff --git a/Build/lib/reject-data-source.ts b/Build/lib/reject-data-source.ts index a8c0f947..54c6d161 100644 --- a/Build/lib/reject-data-source.ts +++ b/Build/lib/reject-data-source.ts @@ -224,8 +224,16 @@ export const PREDEFINED_WHITELIST = [ 'email.accounts.bitly.com', // Fuck Peter Lowe Hosts 'adsense.google.com', // Fuck Peter Lowe Hosts 'api.vip.miui.com', // Fuck Goodbye Xiaomi Ads - 'stripe.com' // EasyPrivacy only blocks m.stripe.com wwith $third-party, + 'stripe.com', // EasyPrivacy only blocks m.stripe.com wwith $third-party, // yet stupid AdGuardDNSFilter blocks all of it. Stupid AdGuard + 'w3s.link', // ipfs gateway + 'dweb.link', // ipfs gateway + 'nftstorage.link', // ipfs gateway + 'fleek.cool', // ipfs gateway + 'square.site', // Drag'n'Drop site building platform + 'business.site', // Drag'n'Drop site building platform + 'page.link', // Firebase URL Shortener + 'notion.site' ]; export const PREDEFINED_ENFORCED_WHITELIST = [