Update CDN & Reject Hosts

This commit is contained in:
SukkaW 2024-10-22 21:23:08 +08:00
parent e3d03f4214
commit 2d4c07d62f
4 changed files with 61 additions and 36 deletions

View File

@ -22,12 +22,7 @@ export const HOSTS_EXTRA: HostsSource[] = [
// Dan Pollock's hosts file, 0.0.0.0 version is 30 KiB smaller // Dan Pollock's hosts file, 0.0.0.0 version is 30 KiB smaller
['https://someonewhocares.org/hosts/zero/hosts', null, true, TTL.THREE_HOURS()], ['https://someonewhocares.org/hosts/zero/hosts', null, true, TTL.THREE_HOURS()],
// ad-wars is not actively maintained, so we set a 7 days cache ttl // ad-wars is not actively maintained, so we set a 7 days cache ttl
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', null, false, TTL.TWO_WEEKS()], ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', null, false, TTL.TWO_WEEKS()]
[
'https://raw.githubusercontent.com/durablenapkin/scamblocklist/master/hosts.txt',
[],
true, TTL.TWLVE_HOURS()
]
]; ];
export const DOMAIN_LISTS: HostsSource[] = [ export const DOMAIN_LISTS: HostsSource[] = [
@ -84,6 +79,10 @@ export const DOMAIN_LISTS_EXTRA: HostsSource[] = [
['https://raw.githubusercontent.com/Spam404/lists/master/main-blacklist.txt', [], true, TTL.TEN_DAYS()] ['https://raw.githubusercontent.com/Spam404/lists/master/main-blacklist.txt', [], true, TTL.TEN_DAYS()]
]; ];
export const PHISHING_HOSTS_EXTRA: HostsSource[] = [
['https://raw.githubusercontent.com/durablenapkin/scamblocklist/master/hosts.txt', [], true, TTL.THREE_DAYS()]
];
export const PHISHING_DOMAIN_LISTS_EXTRA: HostsSource[] = [ export const PHISHING_DOMAIN_LISTS_EXTRA: HostsSource[] = [
[ [
'https://phishing-filter.pages.dev/phishing-filter-domains.txt', 'https://phishing-filter.pages.dev/phishing-filter-domains.txt',
@ -396,5 +395,9 @@ export const PREDEFINED_WHITELIST = [
'ab.chatgpt.com', // EasyPrivacy blocks this 'ab.chatgpt.com', // EasyPrivacy blocks this
'jnn-pa.googleapis.com', // ad-wars 'jnn-pa.googleapis.com', // ad-wars
'imasdk.googleapis.com', // ad-wars 'imasdk.googleapis.com', // ad-wars
'.l.qq.com' // ad-wars '.l.qq.com', // ad-wars
'.clients.your-server.de', // rDNS .static.183.213.201.138.clients.your-server.de
'.bc.googleusercontent.com', // rDNS 218.178.172.34.bc.googleusercontent.com
'.host.secureserver.net', // rDNS .64.149.167.72.host.secureserver.net,
'.ip.linodeusercontent.com' // rDNS 45-79-169-153.ip.linodeusercontent.com
]; ];

View File

@ -1,10 +1,10 @@
import { processDomainLists } from './parse-filter'; import { processDomainLists, processHosts } from './parse-filter';
import * as tldts from 'tldts-experimental'; import * as tldts from 'tldts-experimental';
import { dummySpan } from '../trace'; import { dummySpan } from '../trace';
import type { Span } from '../trace'; import type { Span } from '../trace';
import { appendArrayInPlaceCurried } from './append-array-in-place'; import { appendArrayInPlaceCurried } from './append-array-in-place';
import { PHISHING_DOMAIN_LISTS_EXTRA } from '../constants/reject-data-source'; import { PHISHING_DOMAIN_LISTS_EXTRA, PHISHING_HOSTS_EXTRA } from '../constants/reject-data-source';
import { loosTldOptWithPrivateDomains } from '../constants/loose-tldts-opt'; import { loosTldOptWithPrivateDomains } from '../constants/loose-tldts-opt';
import picocolors from 'picocolors'; import picocolors from 'picocolors';
import createKeywordFilter from './aho-corasick'; import createKeywordFilter from './aho-corasick';
@ -22,7 +22,7 @@ const BLACK_TLD = new Set([
'ga', 'gd', 'gives', 'gq', 'group', 'host', 'ga', 'gd', 'gives', 'gq', 'group', 'host',
'icu', 'id', 'info', 'ink', 'icu', 'id', 'info', 'ink',
'lat', 'life', 'live', 'link', 'loan', 'lol', 'ltd', 'lat', 'life', 'live', 'link', 'loan', 'lol', 'ltd',
'me', 'men', 'ml', 'mobi', 'mom', 'me', 'men', 'ml', 'mobi', 'mom', 'monster',
'net.pl', 'net.pl',
'one', 'online', 'one', 'online',
'party', 'pro', 'pl', 'pw', 'party', 'pro', 'pl', 'pw',
@ -48,6 +48,12 @@ const WHITELIST_MAIN_DOMAINS = new Set([
'zendesk.com' 'zendesk.com'
]); ]);
const leathalKeywords = createKeywordFilter([
'vinted-',
'inpost-pl',
'vlnted-'
]);
const sensitiveKeywords = createKeywordFilter([ const sensitiveKeywords = createKeywordFilter([
'.amazon-', '.amazon-',
'-amazon', '-amazon',
@ -65,14 +71,15 @@ const sensitiveKeywords = createKeywordFilter([
'booking-com', 'booking-com',
'booking.com-', 'booking.com-',
'booking-eu', 'booking-eu',
'vinted-cz', 'vinted-',
'inpost-pl', 'inpost-pl',
'login.microsoft', 'login.microsoft',
'login-microsoft', 'login-microsoft',
'microsoftonline', 'microsoftonline',
'google.com-', 'google.com-',
'minecraft', 'minecraft',
'staemco' 'staemco',
'oferta'
]); ]);
const lowKeywords = createKeywordFilter([ const lowKeywords = createKeywordFilter([
'transactions-', 'transactions-',
@ -96,7 +103,8 @@ const lowKeywords = createKeywordFilter([
'microsof', 'microsof',
'passwordreset', 'passwordreset',
'.google-', '.google-',
'recover' 'recover',
'banking'
]); ]);
const cacheKey = createCacheKey(__filename); const cacheKey = createCacheKey(__filename);
@ -154,20 +162,15 @@ const processPhihsingDomains = cache(function processPhihsingDomains(domainArr:
if ( if (
// !WHITELIST_MAIN_DOMAINS.has(apexDomain) // !WHITELIST_MAIN_DOMAINS.has(apexDomain)
(domainScoreMap[apexDomain] >= 24) (domainScoreMap[apexDomain] >= 24)
|| (domainScoreMap[apexDomain] >= 16 && domainCountMap[apexDomain] >= 4) || (domainScoreMap[apexDomain] >= 16 && domainCountMap[apexDomain] >= 7)
|| (domainScoreMap[apexDomain] >= 13 && domainCountMap[apexDomain] >= 7) || (domainScoreMap[apexDomain] >= 13 && domainCountMap[apexDomain] >= 11)
|| (domainScoreMap[apexDomain] >= 5 && domainCountMap[apexDomain] >= 10) || (domainScoreMap[apexDomain] >= 5 && domainCountMap[apexDomain] >= 14)
|| (domainScoreMap[apexDomain] >= 3 && domainCountMap[apexDomain] >= 16) || (domainScoreMap[apexDomain] >= 3 && domainCountMap[apexDomain] >= 20)
) { ) {
domainArr.push('.' + apexDomain); domainArr.push('.' + apexDomain);
} }
} }
// console.log(
// domainScoreMap['wordpress.com'],
// domainCountMap['wordpress.com']
// );
return Promise.resolve(domainArr); return Promise.resolve(domainArr);
}, { }, {
serializer: serializeArray, serializer: serializeArray,
@ -179,8 +182,10 @@ export function getPhishingDomains(parentSpan: Span) {
const domainArr = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => { const domainArr = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => {
const domainArr: string[] = []; const domainArr: string[] = [];
(await Promise.all(PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(curSpan, ...entry, cacheKey)))) await Promise.all([
.forEach(appendArrayInPlaceCurried(domainArr)); ...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(curSpan, ...entry, cacheKey)),
...PHISHING_HOSTS_EXTRA.map(entry => processHosts(curSpan, ...entry, cacheKey))
]).then(domainGroups => domainGroups.forEach(appendArrayInPlaceCurried(domainArr)));
return domainArr; return domainArr;
}); });
@ -193,6 +198,10 @@ export function getPhishingDomains(parentSpan: Span) {
} }
export function calcDomainAbuseScore(subdomain: string, fullDomain: string = subdomain) { export function calcDomainAbuseScore(subdomain: string, fullDomain: string = subdomain) {
if (leathalKeywords(fullDomain)) {
return 100;
}
let weight = 0; let weight = 0;
const hitLowKeywords = lowKeywords(fullDomain); const hitLowKeywords = lowKeywords(fullDomain);
@ -209,17 +218,14 @@ export function calcDomainAbuseScore(subdomain: string, fullDomain: string = sub
const subdomainLength = subdomain.length; const subdomainLength = subdomain.length;
if (subdomainLength > 6) { if (subdomainLength > 13) {
weight += 0.25; weight += 0.2;
if (subdomainLength > 11) { if (subdomainLength > 20) {
weight += 0.6; weight += 1;
if (subdomainLength > 20) { if (subdomainLength > 30) {
weight += 1; weight += 5;
if (subdomainLength > 30) { if (subdomainLength > 40) {
weight += 2; weight += 10;
if (subdomainLength > 40) {
weight += 4;
}
} }
} }
} }

View File

@ -1045,9 +1045,12 @@ cdn.consentmanager.net
widget.usersnap.com widget.usersnap.com
cdn.playwire.com cdn.playwire.com
widget.usepylon.com widget.usepylon.com
app.groove.cm
app.groovefunnels.com
loader.mantis-intelligence.com loader.mantis-intelligence.com
mantisloader.mantis-awx.com mantisloader.mantis-awx.com
conversiongorilla.com
.patientpop.com
cdn.transcend.io cdn.transcend.io
.transcend-cdn.com .transcend-cdn.com
@ -3094,3 +3097,6 @@ assets.raspberrypi.com
assets.brevo.com assets.brevo.com
corp-backend.brevo.com corp-backend.brevo.com
designsystem.brevo.com designsystem.brevo.com
assets.grooveapps.com
assets.clickfunnels.com
.wixstudio.io

View File

@ -319,6 +319,7 @@ inst.360safe.com
.dkonto.pl .dkonto.pl
.googleplay.pro .googleplay.pro
.printondemandmerchandise.com .printondemandmerchandise.com
.thebitmeister.com
# Phishing domain like `www-facebook.to-kr.com` # Phishing domain like `www-facebook.to-kr.com`
.to-kr.com .to-kr.com
# Poland Hosting dhosting.pl's free subdomain # Poland Hosting dhosting.pl's free subdomain
@ -718,6 +719,10 @@ ads-shopping.shouqianba.com
ad.maoyan.com ad.maoyan.com
e.jparking.cn e.jparking.cn
adapi.izuiyou.com adapi.izuiyou.com
.sponsor.printondemandagency.com
.whatisaweekend.com
.mob.com
.duomeng.cn
adimg.daumcdn.net adimg.daumcdn.net
live.tvpot.daum.net live.tvpot.daum.net
@ -892,6 +897,10 @@ metrics.brevo.com
.adfunlink.com .adfunlink.com
.ubixioe.com .ubixioe.com
# CNAME: dualstack.beaconserver-ce-vpc0-1537565064.eu-west-1.elb.amazonaws.com
# note "beaconserver"
.internal.dradis.netflix.com
.adjust.io .adjust.io
.airbrake.io .airbrake.io
.apsalar.com .apsalar.com
@ -1968,6 +1977,7 @@ xadx.file.market.xiaomi.com
.pandora.xiaomi.com .pandora.xiaomi.com
mi-stat.gslb.mi-idc.com mi-stat.gslb.mi-idc.com
mlog.search.xiaomi.net mlog.search.xiaomi.net
# verify.sec.xiaomi.com # 验证码 captcha
# 小米云扫描组件 # 小米云扫描组件
.avlyun.sec.intl.miui.com .avlyun.sec.intl.miui.com
.avlyun.sec.miui.com .avlyun.sec.miui.com