Fix: properly whitelisting domains

This commit is contained in:
SukkaW 2024-05-27 01:26:18 +08:00
parent 2f329a4144
commit d137bdb8a3
4 changed files with 83 additions and 75 deletions

View File

@ -109,11 +109,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
const trie = span.traceChildSync('dedupe from white suffixes', () => { const trie = span.traceChildSync('dedupe from white suffixes', () => {
const trie = createTrie(domainSets, true, true); const trie = createTrie(domainSets, true, true);
filterRuleWhitelistDomainSets.forEach(trie.whitelist);
filterRuleWhitelistDomainSets.forEach(suffix => {
trie.whitelist(suffix);
});
return trie; return trie;
}); });

View File

@ -1,10 +1,9 @@
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix'; import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
import { processDomainLists } from './parse-filter'; import { processDomainLists } from './parse-filter';
import { getSubdomain, getPublicSuffix } from 'tldts-experimental'; import { getSubdomain, getPublicSuffix } from 'tldts-experimental';
import { TTL } from './cache-filesystem';
import type { Span } from '../trace'; import type { Span } from '../trace';
import { appendArrayInPlace, appendArrayInPlaceCurried } from './append-array-in-place'; import { appendArrayInPlaceCurried } from './append-array-in-place';
import { PHISHING_DOMAIN_LISTS } from './reject-data-source'; import { PHISHING_DOMAIN_LISTS } from './reject-data-source';
const BLACK_TLD = new Set([ const BLACK_TLD = new Set([
@ -90,6 +89,16 @@ const BLACK_TLD = new Set([
'design' 'design'
]); ]);
export const WHITELIST_MAIN_DOMAINS = new Set([
'w3s.link', // ipfs gateway
'dweb.link', // ipfs gateway
'nftstorage.link', // ipfs gateway
'fleek.cool', // ipfs gateway
'business.site', // Drag'n'Drop site building platform
'page.link', // Firebase URL Shortener
'notion.site'
]);
const tldtsOpt: Parameters<typeof getSubdomain>[1] = { const tldtsOpt: Parameters<typeof getSubdomain>[1] = {
allowPrivateDomains: false, allowPrivateDomains: false,
extractHostname: false, extractHostname: false,
@ -132,7 +141,7 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
}); });
for (const domain in domainCountMap) { for (const domain in domainCountMap) {
if (domainCountMap[domain] >= 8) { if (domainCountMap[domain] >= 8 && !WHITELIST_MAIN_DOMAINS.has(domain)) {
domainArr.push(`.${domain}`); domainArr.push(`.${domain}`);
} }
} }

View File

@ -193,65 +193,59 @@ export const ADGUARD_FILTERS: AdGuardFilterSource[] = [
] as const; ] as const;
export const PREDEFINED_WHITELIST = [ export const PREDEFINED_WHITELIST = [
'localhost', '.localhost',
'local', '.local',
'localhost.localdomain', '.localhost.localdomain',
'broadcasthost', '.broadcasthost',
'ip6-loopback', '.ip6-loopback',
'ip6-localnet', '.ip6-localnet',
'ip6-mcastprefix', '.ip6-mcastprefix',
'ip6-allnodes', '.ip6-allnodes',
'ip6-allrouters', '.ip6-allrouters',
'ip6-allhosts', '.ip6-allhosts',
'mcastprefix', '.mcastprefix',
'skk.moe', '.skk.moe',
'analytics.google.com', 'analytics.google.com',
'cloud.answerhub.com', '.cloud.answerhub.com',
'ae01.alicdn.com', 'ae01.alicdn.com',
'whoami.akamai.net', '.whoami.akamai.net',
'whoami.ds.akahelp.net', '.whoami.ds.akahelp.net',
'pxlk9.net.', // This one is malformed from EasyList, which I will manually add instead 'pxlk9.net.', // This one is malformed from EasyList, which I will manually add instead
'instant.page', // No, it doesn't violate anyone's privacy. I will whitelist it '.instant.page', // No, it doesn't violate anyone's privacy. I will whitelist it
'piwik.pro', '.piwik.pro',
'mixpanel.com', 'mixpanel.com',
'cdn.mxpnl.com', 'cdn.mxpnl.com',
'heapanalytics.com', '.heapanalytics.com',
'segment.com', '.segment.com',
'segmentify.com', '.segmentify.com',
't.co', // pgl yoyo add t.co to the blacklist '.t.co', // pgl yoyo add t.co to the blacklist
'survicate.com', // AdGuardDNSFilter '.survicate.com', // AdGuardDNSFilter
'perfops.io', // AdGuardDNSFilter '.perfops.io', // AdGuardDNSFilter
'd2axgrpnciinw7.cloudfront.net', // ADGuardDNSFilter '.d2axgrpnciinw7.cloudfront.net', // ADGuardDNSFilter
'sb-cd.com', // AdGuard '.sb-cd.com', // AdGuard
'storage.yandexcloud.net', // phishing list '.storage.yandexcloud.net', // phishing list
'login.microsoftonline.com', // phishing list '.login.microsoftonline.com', // phishing list
'api.xiaomi.com', // https://github.com/jerryn70/GoodbyeAds/issues/281 'api.xiaomi.com', // https://github.com/jerryn70/GoodbyeAds/issues/281
'api.io.mi.com', // https://github.com/jerryn70/GoodbyeAds/issues/281 'api.io.mi.com', // https://github.com/jerryn70/GoodbyeAds/issues/281
'cdn.userreport.com', // https://github.com/AdguardTeam/AdGuardSDNSFilter/issues/1158 '.cdn.userreport.com', // https://github.com/AdguardTeam/AdGuardSDNSFilter/issues/1158
'ip-api.com', '.ip-api.com',
'fastly-analytics.com', '.fastly-analytics.com',
'digitaloceanspaces.com', '.digitaloceanspaces.com',
's3.nl-ams.scw.cloud', 's3.nl-ams.scw.cloud',
'geolocation-db.com', '.geolocation-db.com',
'uploads.codesandbox.io', '.uploads.codesandbox.io',
'vlscppe.microsoft.com', // Affect Windows ISO download https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_trackers.txt '.vlscppe.microsoft.com', // Affect Windows ISO download https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_trackers.txt
'statsig.com', // OpenAI use this for A/B testing '.statsig.com', // OpenAI use this for A/B testing
'pstmrk.it', // Fuck Peter Lowe Hosts '.pstmrk.it', // Fuck Peter Lowe Hosts
'clicks.mlsend.com', // Fuck Peter Lowe Hosts '.clicks.mlsend.com', // Fuck Peter Lowe Hosts
'email.accounts.bitly.com', // Fuck Peter Lowe Hosts 'email.accounts.bitly.com', // Fuck Peter Lowe Hosts
'adsense.google.com', // Fuck Peter Lowe Hosts 'adsense.google.com', // Fuck Peter Lowe Hosts
'api.vip.miui.com', // Fuck Goodbye Xiaomi Ads 'api.vip.miui.com', // Fuck Goodbye Xiaomi Ads
'stripe.com', // EasyPrivacy only blocks m.stripe.com wwith $third-party, 'staging.ai.api.xiaomi.com', // Fuck Goodbye Xiaomi Ads
'm.stripe.com', // EasyPrivacy only blocks m.stripe.com wwith $third-party,
// yet stupid AdGuardDNSFilter blocks all of it. Stupid AdGuard // yet stupid AdGuardDNSFilter blocks all of it. Stupid AdGuard
'w3s.link', // ipfs gateway '.w3s.link' // stupid phishing.army, introduce both "*.ipfs.w3s.link" and ".w3s.link" to the block list
'dweb.link', // ipfs gateway ];
'nftstorage.link', // ipfs gateway
'fleek.cool', // ipfs gateway
'square.site', // Drag'n'Drop site building platform
'business.site', // Drag'n'Drop site building platform
'page.link', // Firebase URL Shortener
'notion.site'
].map(suffix => `.${suffix}`);
export const PREDEFINED_ENFORCED_WHITELIST = [ export const PREDEFINED_ENFORCED_WHITELIST = [
'r2.dev', 'r2.dev',

View File

@ -208,26 +208,6 @@ describe('smol tree', () => {
]); ]);
}); });
it('should whitelist trie correctly', () => {
const trie = createTrie([
'.t.co',
't.co',
'example.t.co',
'.skk.moe'
], true, true);
expect(trie.dump()).toStrictEqual([
'.skk.moe',
'.t.co'
]);
trie.whitelist('.t.co');
expect(trie.dump()).toStrictEqual(['.skk.moe']);
trie.whitelist('skk.moe');
expect(trie.dump()).toStrictEqual([]);
});
it('should efficiently whitelist domains', () => { it('should efficiently whitelist domains', () => {
const trie = createTrie([ const trie = createTrie([
'skk.moe', 'skk.moe',
@ -260,4 +240,33 @@ describe('smol tree', () => {
expect(trie.dump()).toStrictEqual([]); expect(trie.dump()).toStrictEqual([]);
}); });
it('should whitelist trie correctly', () => {
const trie = createTrie([
'.t.co',
't.co',
'example.t.co',
'.skk.moe',
'blog.cdn.example.com',
'cdn.example.com'
], true, true);
expect(trie.dump()).toStrictEqual([
'cdn.example.com', 'blog.cdn.example.com',
'.skk.moe',
'.t.co'
]);
trie.whitelist('.t.co');
expect(trie.dump()).toStrictEqual([
'cdn.example.com', 'blog.cdn.example.com',
'.skk.moe'
]);
trie.whitelist('skk.moe');
expect(trie.dump()).toStrictEqual(['cdn.example.com', 'blog.cdn.example.com']);
trie.whitelist('cdn.example.com');
expect(trie.dump()).toStrictEqual(['blog.cdn.example.com']);
});
}); });