From d137bdb8a3a0cdf7973352eddff05f885eae2f02 Mon Sep 17 00:00:00 2001 From: SukkaW Date: Mon, 27 May 2024 01:26:18 +0800 Subject: [PATCH] Fix: properly whitelisting domains --- Build/build-reject-domainset.ts | 6 +-- Build/lib/get-phishing-domains.ts | 15 ++++-- Build/lib/reject-data-source.ts | 88 ++++++++++++++----------------- Build/lib/trie.test.ts | 49 ++++++++++------- 4 files changed, 83 insertions(+), 75 deletions(-) diff --git a/Build/build-reject-domainset.ts b/Build/build-reject-domainset.ts index 4fb51457..d62dc02a 100644 --- a/Build/build-reject-domainset.ts +++ b/Build/build-reject-domainset.ts @@ -109,11 +109,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => { const trie = span.traceChildSync('dedupe from white suffixes', () => { const trie = createTrie(domainSets, true, true); - - filterRuleWhitelistDomainSets.forEach(suffix => { - trie.whitelist(suffix); - }); - + filterRuleWhitelistDomainSets.forEach(trie.whitelist); return trie; }); diff --git a/Build/lib/get-phishing-domains.ts b/Build/lib/get-phishing-domains.ts index 8296fadf..13fd6e3e 100644 --- a/Build/lib/get-phishing-domains.ts +++ b/Build/lib/get-phishing-domains.ts @@ -1,10 +1,9 @@ import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix'; import { processDomainLists } from './parse-filter'; import { getSubdomain, getPublicSuffix } from 'tldts-experimental'; -import { TTL } from './cache-filesystem'; import type { Span } from '../trace'; -import { appendArrayInPlace, appendArrayInPlaceCurried } from './append-array-in-place'; +import { appendArrayInPlaceCurried } from './append-array-in-place'; import { PHISHING_DOMAIN_LISTS } from './reject-data-source'; const BLACK_TLD = new Set([ @@ -90,6 +89,16 @@ const BLACK_TLD = new Set([ 'design' ]); +export const WHITELIST_MAIN_DOMAINS = new Set([ + 'w3s.link', // ipfs gateway + 'dweb.link', // ipfs gateway + 'nftstorage.link', // ipfs gateway + 'fleek.cool', // ipfs gateway + 'business.site', // Drag'n'Drop site building platform + 'page.link', // Firebase URL Shortener + 'notion.site' +]); + const tldtsOpt: Parameters[1] = { allowPrivateDomains: false, extractHostname: false, @@ -132,7 +141,7 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g }); for (const domain in domainCountMap) { - if (domainCountMap[domain] >= 8) { + if (domainCountMap[domain] >= 8 && !WHITELIST_MAIN_DOMAINS.has(domain)) { domainArr.push(`.${domain}`); } } diff --git a/Build/lib/reject-data-source.ts b/Build/lib/reject-data-source.ts index 222d5cc6..3147c87c 100644 --- a/Build/lib/reject-data-source.ts +++ b/Build/lib/reject-data-source.ts @@ -193,65 +193,59 @@ export const ADGUARD_FILTERS: AdGuardFilterSource[] = [ ] as const; export const PREDEFINED_WHITELIST = [ - 'localhost', - 'local', - 'localhost.localdomain', - 'broadcasthost', - 'ip6-loopback', - 'ip6-localnet', - 'ip6-mcastprefix', - 'ip6-allnodes', - 'ip6-allrouters', - 'ip6-allhosts', - 'mcastprefix', - 'skk.moe', + '.localhost', + '.local', + '.localhost.localdomain', + '.broadcasthost', + '.ip6-loopback', + '.ip6-localnet', + '.ip6-mcastprefix', + '.ip6-allnodes', + '.ip6-allrouters', + '.ip6-allhosts', + '.mcastprefix', + '.skk.moe', 'analytics.google.com', - 'cloud.answerhub.com', + '.cloud.answerhub.com', 'ae01.alicdn.com', - 'whoami.akamai.net', - 'whoami.ds.akahelp.net', + '.whoami.akamai.net', + '.whoami.ds.akahelp.net', 'pxlk9.net.', // This one is malformed from EasyList, which I will manually add instead - 'instant.page', // No, it doesn't violate anyone's privacy. I will whitelist it - 'piwik.pro', + '.instant.page', // No, it doesn't violate anyone's privacy. I will whitelist it + '.piwik.pro', 'mixpanel.com', 'cdn.mxpnl.com', - 'heapanalytics.com', - 'segment.com', - 'segmentify.com', - 't.co', // pgl yoyo add t.co to the blacklist - 'survicate.com', // AdGuardDNSFilter - 'perfops.io', // AdGuardDNSFilter - 'd2axgrpnciinw7.cloudfront.net', // ADGuardDNSFilter - 'sb-cd.com', // AdGuard - 'storage.yandexcloud.net', // phishing list - 'login.microsoftonline.com', // phishing list + '.heapanalytics.com', + '.segment.com', + '.segmentify.com', + '.t.co', // pgl yoyo add t.co to the blacklist + '.survicate.com', // AdGuardDNSFilter + '.perfops.io', // AdGuardDNSFilter + '.d2axgrpnciinw7.cloudfront.net', // ADGuardDNSFilter + '.sb-cd.com', // AdGuard + '.storage.yandexcloud.net', // phishing list + '.login.microsoftonline.com', // phishing list 'api.xiaomi.com', // https://github.com/jerryn70/GoodbyeAds/issues/281 'api.io.mi.com', // https://github.com/jerryn70/GoodbyeAds/issues/281 - 'cdn.userreport.com', // https://github.com/AdguardTeam/AdGuardSDNSFilter/issues/1158 - 'ip-api.com', - 'fastly-analytics.com', - 'digitaloceanspaces.com', + '.cdn.userreport.com', // https://github.com/AdguardTeam/AdGuardSDNSFilter/issues/1158 + '.ip-api.com', + '.fastly-analytics.com', + '.digitaloceanspaces.com', 's3.nl-ams.scw.cloud', - 'geolocation-db.com', - 'uploads.codesandbox.io', - 'vlscppe.microsoft.com', // Affect Windows ISO download https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_trackers.txt - 'statsig.com', // OpenAI use this for A/B testing - 'pstmrk.it', // Fuck Peter Lowe Hosts - 'clicks.mlsend.com', // Fuck Peter Lowe Hosts + '.geolocation-db.com', + '.uploads.codesandbox.io', + '.vlscppe.microsoft.com', // Affect Windows ISO download https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_trackers.txt + '.statsig.com', // OpenAI use this for A/B testing + '.pstmrk.it', // Fuck Peter Lowe Hosts + '.clicks.mlsend.com', // Fuck Peter Lowe Hosts 'email.accounts.bitly.com', // Fuck Peter Lowe Hosts 'adsense.google.com', // Fuck Peter Lowe Hosts 'api.vip.miui.com', // Fuck Goodbye Xiaomi Ads - 'stripe.com', // EasyPrivacy only blocks m.stripe.com wwith $third-party, + 'staging.ai.api.xiaomi.com', // Fuck Goodbye Xiaomi Ads + 'm.stripe.com', // EasyPrivacy only blocks m.stripe.com wwith $third-party, // yet stupid AdGuardDNSFilter blocks all of it. Stupid AdGuard - 'w3s.link', // ipfs gateway - 'dweb.link', // ipfs gateway - 'nftstorage.link', // ipfs gateway - 'fleek.cool', // ipfs gateway - 'square.site', // Drag'n'Drop site building platform - 'business.site', // Drag'n'Drop site building platform - 'page.link', // Firebase URL Shortener - 'notion.site' -].map(suffix => `.${suffix}`); + '.w3s.link' // stupid phishing.army, introduce both "*.ipfs.w3s.link" and ".w3s.link" to the block list +]; export const PREDEFINED_ENFORCED_WHITELIST = [ 'r2.dev', diff --git a/Build/lib/trie.test.ts b/Build/lib/trie.test.ts index af8b0fd4..cda62011 100644 --- a/Build/lib/trie.test.ts +++ b/Build/lib/trie.test.ts @@ -208,26 +208,6 @@ describe('smol tree', () => { ]); }); - it('should whitelist trie correctly', () => { - const trie = createTrie([ - '.t.co', - 't.co', - 'example.t.co', - '.skk.moe' - ], true, true); - - expect(trie.dump()).toStrictEqual([ - '.skk.moe', - '.t.co' - ]); - - trie.whitelist('.t.co'); - expect(trie.dump()).toStrictEqual(['.skk.moe']); - - trie.whitelist('skk.moe'); - expect(trie.dump()).toStrictEqual([]); - }); - it('should efficiently whitelist domains', () => { const trie = createTrie([ 'skk.moe', @@ -260,4 +240,33 @@ describe('smol tree', () => { expect(trie.dump()).toStrictEqual([]); }); + + it('should whitelist trie correctly', () => { + const trie = createTrie([ + '.t.co', + 't.co', + 'example.t.co', + '.skk.moe', + 'blog.cdn.example.com', + 'cdn.example.com' + ], true, true); + + expect(trie.dump()).toStrictEqual([ + 'cdn.example.com', 'blog.cdn.example.com', + '.skk.moe', + '.t.co' + ]); + + trie.whitelist('.t.co'); + expect(trie.dump()).toStrictEqual([ + 'cdn.example.com', 'blog.cdn.example.com', + '.skk.moe' + ]); + + trie.whitelist('skk.moe'); + expect(trie.dump()).toStrictEqual(['cdn.example.com', 'blog.cdn.example.com']); + + trie.whitelist('cdn.example.com'); + expect(trie.dump()).toStrictEqual(['blog.cdn.example.com']); + }); });