Update CDN & Reject Hosts

This commit is contained in:
SukkaW
2024-10-22 21:23:08 +08:00
parent e3d03f4214
commit 2d4c07d62f
4 changed files with 61 additions and 36 deletions

View File

@@ -1,10 +1,10 @@
import { processDomainLists } from './parse-filter';
import { processDomainLists, processHosts } from './parse-filter';
import * as tldts from 'tldts-experimental';
import { dummySpan } from '../trace';
import type { Span } from '../trace';
import { appendArrayInPlaceCurried } from './append-array-in-place';
import { PHISHING_DOMAIN_LISTS_EXTRA } from '../constants/reject-data-source';
import { PHISHING_DOMAIN_LISTS_EXTRA, PHISHING_HOSTS_EXTRA } from '../constants/reject-data-source';
import { loosTldOptWithPrivateDomains } from '../constants/loose-tldts-opt';
import picocolors from 'picocolors';
import createKeywordFilter from './aho-corasick';
@@ -22,7 +22,7 @@ const BLACK_TLD = new Set([
'ga', 'gd', 'gives', 'gq', 'group', 'host',
'icu', 'id', 'info', 'ink',
'lat', 'life', 'live', 'link', 'loan', 'lol', 'ltd',
'me', 'men', 'ml', 'mobi', 'mom',
'me', 'men', 'ml', 'mobi', 'mom', 'monster',
'net.pl',
'one', 'online',
'party', 'pro', 'pl', 'pw',
@@ -48,6 +48,12 @@ const WHITELIST_MAIN_DOMAINS = new Set([
'zendesk.com'
]);
const leathalKeywords = createKeywordFilter([
'vinted-',
'inpost-pl',
'vlnted-'
]);
const sensitiveKeywords = createKeywordFilter([
'.amazon-',
'-amazon',
@@ -65,14 +71,15 @@ const sensitiveKeywords = createKeywordFilter([
'booking-com',
'booking.com-',
'booking-eu',
'vinted-cz',
'vinted-',
'inpost-pl',
'login.microsoft',
'login-microsoft',
'microsoftonline',
'google.com-',
'minecraft',
'staemco'
'staemco',
'oferta'
]);
const lowKeywords = createKeywordFilter([
'transactions-',
@@ -96,7 +103,8 @@ const lowKeywords = createKeywordFilter([
'microsof',
'passwordreset',
'.google-',
'recover'
'recover',
'banking'
]);
const cacheKey = createCacheKey(__filename);
@@ -154,20 +162,15 @@ const processPhihsingDomains = cache(function processPhihsingDomains(domainArr:
if (
// !WHITELIST_MAIN_DOMAINS.has(apexDomain)
(domainScoreMap[apexDomain] >= 24)
|| (domainScoreMap[apexDomain] >= 16 && domainCountMap[apexDomain] >= 4)
|| (domainScoreMap[apexDomain] >= 13 && domainCountMap[apexDomain] >= 7)
|| (domainScoreMap[apexDomain] >= 5 && domainCountMap[apexDomain] >= 10)
|| (domainScoreMap[apexDomain] >= 3 && domainCountMap[apexDomain] >= 16)
|| (domainScoreMap[apexDomain] >= 16 && domainCountMap[apexDomain] >= 7)
|| (domainScoreMap[apexDomain] >= 13 && domainCountMap[apexDomain] >= 11)
|| (domainScoreMap[apexDomain] >= 5 && domainCountMap[apexDomain] >= 14)
|| (domainScoreMap[apexDomain] >= 3 && domainCountMap[apexDomain] >= 20)
) {
domainArr.push('.' + apexDomain);
}
}
// console.log(
// domainScoreMap['wordpress.com'],
// domainCountMap['wordpress.com']
// );
return Promise.resolve(domainArr);
}, {
serializer: serializeArray,
@@ -179,8 +182,10 @@ export function getPhishingDomains(parentSpan: Span) {
const domainArr = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => {
const domainArr: string[] = [];
(await Promise.all(PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(curSpan, ...entry, cacheKey))))
.forEach(appendArrayInPlaceCurried(domainArr));
await Promise.all([
...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(curSpan, ...entry, cacheKey)),
...PHISHING_HOSTS_EXTRA.map(entry => processHosts(curSpan, ...entry, cacheKey))
]).then(domainGroups => domainGroups.forEach(appendArrayInPlaceCurried(domainArr)));
return domainArr;
});
@@ -193,6 +198,10 @@ export function getPhishingDomains(parentSpan: Span) {
}
export function calcDomainAbuseScore(subdomain: string, fullDomain: string = subdomain) {
if (leathalKeywords(fullDomain)) {
return 100;
}
let weight = 0;
const hitLowKeywords = lowKeywords(fullDomain);
@@ -209,17 +218,14 @@ export function calcDomainAbuseScore(subdomain: string, fullDomain: string = sub
const subdomainLength = subdomain.length;
if (subdomainLength > 6) {
weight += 0.25;
if (subdomainLength > 11) {
weight += 0.6;
if (subdomainLength > 20) {
weight += 1;
if (subdomainLength > 30) {
weight += 2;
if (subdomainLength > 40) {
weight += 4;
}
if (subdomainLength > 13) {
weight += 0.2;
if (subdomainLength > 20) {
weight += 1;
if (subdomainLength > 30) {
weight += 5;
if (subdomainLength > 40) {
weight += 10;
}
}
}