Fix: properly whitelisting domains

This commit is contained in:
SukkaW 2024-05-27 01:26:18 +08:00
parent 2f329a4144
commit d137bdb8a3
4 changed files with 83 additions and 75 deletions

View File

@ -109,11 +109,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
const trie = span.traceChildSync('dedupe from white suffixes', () => {
const trie = createTrie(domainSets, true, true);
filterRuleWhitelistDomainSets.forEach(suffix => {
trie.whitelist(suffix);
});
filterRuleWhitelistDomainSets.forEach(trie.whitelist);
return trie;
});

View File

@ -1,10 +1,9 @@
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
import { processDomainLists } from './parse-filter';
import { getSubdomain, getPublicSuffix } from 'tldts-experimental';
import { TTL } from './cache-filesystem';
import type { Span } from '../trace';
import { appendArrayInPlace, appendArrayInPlaceCurried } from './append-array-in-place';
import { appendArrayInPlaceCurried } from './append-array-in-place';
import { PHISHING_DOMAIN_LISTS } from './reject-data-source';
const BLACK_TLD = new Set([
@ -90,6 +89,16 @@ const BLACK_TLD = new Set([
'design'
]);
export const WHITELIST_MAIN_DOMAINS = new Set([
'w3s.link', // ipfs gateway
'dweb.link', // ipfs gateway
'nftstorage.link', // ipfs gateway
'fleek.cool', // ipfs gateway
'business.site', // Drag'n'Drop site building platform
'page.link', // Firebase URL Shortener
'notion.site'
]);
const tldtsOpt: Parameters<typeof getSubdomain>[1] = {
allowPrivateDomains: false,
extractHostname: false,
@ -132,7 +141,7 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
});
for (const domain in domainCountMap) {
if (domainCountMap[domain] >= 8) {
if (domainCountMap[domain] >= 8 && !WHITELIST_MAIN_DOMAINS.has(domain)) {
domainArr.push(`.${domain}`);
}
}

View File

@ -193,65 +193,59 @@ export const ADGUARD_FILTERS: AdGuardFilterSource[] = [
] as const;
export const PREDEFINED_WHITELIST = [
'localhost',
'local',
'localhost.localdomain',
'broadcasthost',
'ip6-loopback',
'ip6-localnet',
'ip6-mcastprefix',
'ip6-allnodes',
'ip6-allrouters',
'ip6-allhosts',
'mcastprefix',
'skk.moe',
'.localhost',
'.local',
'.localhost.localdomain',
'.broadcasthost',
'.ip6-loopback',
'.ip6-localnet',
'.ip6-mcastprefix',
'.ip6-allnodes',
'.ip6-allrouters',
'.ip6-allhosts',
'.mcastprefix',
'.skk.moe',
'analytics.google.com',
'cloud.answerhub.com',
'.cloud.answerhub.com',
'ae01.alicdn.com',
'whoami.akamai.net',
'whoami.ds.akahelp.net',
'.whoami.akamai.net',
'.whoami.ds.akahelp.net',
'pxlk9.net.', // This one is malformed from EasyList, which I will manually add instead
'instant.page', // No, it doesn't violate anyone's privacy. I will whitelist it
'piwik.pro',
'.instant.page', // No, it doesn't violate anyone's privacy. I will whitelist it
'.piwik.pro',
'mixpanel.com',
'cdn.mxpnl.com',
'heapanalytics.com',
'segment.com',
'segmentify.com',
't.co', // pgl yoyo add t.co to the blacklist
'survicate.com', // AdGuardDNSFilter
'perfops.io', // AdGuardDNSFilter
'd2axgrpnciinw7.cloudfront.net', // ADGuardDNSFilter
'sb-cd.com', // AdGuard
'storage.yandexcloud.net', // phishing list
'login.microsoftonline.com', // phishing list
'.heapanalytics.com',
'.segment.com',
'.segmentify.com',
'.t.co', // pgl yoyo add t.co to the blacklist
'.survicate.com', // AdGuardDNSFilter
'.perfops.io', // AdGuardDNSFilter
'.d2axgrpnciinw7.cloudfront.net', // ADGuardDNSFilter
'.sb-cd.com', // AdGuard
'.storage.yandexcloud.net', // phishing list
'.login.microsoftonline.com', // phishing list
'api.xiaomi.com', // https://github.com/jerryn70/GoodbyeAds/issues/281
'api.io.mi.com', // https://github.com/jerryn70/GoodbyeAds/issues/281
'cdn.userreport.com', // https://github.com/AdguardTeam/AdGuardSDNSFilter/issues/1158
'ip-api.com',
'fastly-analytics.com',
'digitaloceanspaces.com',
'.cdn.userreport.com', // https://github.com/AdguardTeam/AdGuardSDNSFilter/issues/1158
'.ip-api.com',
'.fastly-analytics.com',
'.digitaloceanspaces.com',
's3.nl-ams.scw.cloud',
'geolocation-db.com',
'uploads.codesandbox.io',
'vlscppe.microsoft.com', // Affect Windows ISO download https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_trackers.txt
'statsig.com', // OpenAI use this for A/B testing
'pstmrk.it', // Fuck Peter Lowe Hosts
'clicks.mlsend.com', // Fuck Peter Lowe Hosts
'.geolocation-db.com',
'.uploads.codesandbox.io',
'.vlscppe.microsoft.com', // Affect Windows ISO download https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_trackers.txt
'.statsig.com', // OpenAI use this for A/B testing
'.pstmrk.it', // Fuck Peter Lowe Hosts
'.clicks.mlsend.com', // Fuck Peter Lowe Hosts
'email.accounts.bitly.com', // Fuck Peter Lowe Hosts
'adsense.google.com', // Fuck Peter Lowe Hosts
'api.vip.miui.com', // Fuck Goodbye Xiaomi Ads
'stripe.com', // EasyPrivacy only blocks m.stripe.com wwith $third-party,
'staging.ai.api.xiaomi.com', // Fuck Goodbye Xiaomi Ads
'm.stripe.com', // EasyPrivacy only blocks m.stripe.com wwith $third-party,
// yet stupid AdGuardDNSFilter blocks all of it. Stupid AdGuard
'w3s.link', // ipfs gateway
'dweb.link', // ipfs gateway
'nftstorage.link', // ipfs gateway
'fleek.cool', // ipfs gateway
'square.site', // Drag'n'Drop site building platform
'business.site', // Drag'n'Drop site building platform
'page.link', // Firebase URL Shortener
'notion.site'
].map(suffix => `.${suffix}`);
'.w3s.link' // stupid phishing.army, introduce both "*.ipfs.w3s.link" and ".w3s.link" to the block list
];
export const PREDEFINED_ENFORCED_WHITELIST = [
'r2.dev',

View File

@ -208,26 +208,6 @@ describe('smol tree', () => {
]);
});
it('should whitelist trie correctly', () => {
const trie = createTrie([
'.t.co',
't.co',
'example.t.co',
'.skk.moe'
], true, true);
expect(trie.dump()).toStrictEqual([
'.skk.moe',
'.t.co'
]);
trie.whitelist('.t.co');
expect(trie.dump()).toStrictEqual(['.skk.moe']);
trie.whitelist('skk.moe');
expect(trie.dump()).toStrictEqual([]);
});
it('should efficiently whitelist domains', () => {
const trie = createTrie([
'skk.moe',
@ -260,4 +240,33 @@ describe('smol tree', () => {
expect(trie.dump()).toStrictEqual([]);
});
it('should whitelist trie correctly', () => {
const trie = createTrie([
'.t.co',
't.co',
'example.t.co',
'.skk.moe',
'blog.cdn.example.com',
'cdn.example.com'
], true, true);
expect(trie.dump()).toStrictEqual([
'cdn.example.com', 'blog.cdn.example.com',
'.skk.moe',
'.t.co'
]);
trie.whitelist('.t.co');
expect(trie.dump()).toStrictEqual([
'cdn.example.com', 'blog.cdn.example.com',
'.skk.moe'
]);
trie.whitelist('skk.moe');
expect(trie.dump()).toStrictEqual(['cdn.example.com', 'blog.cdn.example.com']);
trie.whitelist('cdn.example.com');
expect(trie.dump()).toStrictEqual(['blog.cdn.example.com']);
});
});