mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-14 02:00:37 +08:00
Improve Phishing score algorithm
This commit is contained in:
parent
3ebb0077d5
commit
c1234df35b
@ -7,3 +7,8 @@ export const looseTldtsOpt: Parameters<typeof tldts.getSubdomain>[1] = {
|
||||
detectIp: false,
|
||||
mixedInputs: false
|
||||
};
|
||||
|
||||
export const loosTldOptWithPrivateDomains: Parameters<typeof tldts.getSubdomain>[1] = {
|
||||
...looseTldtsOpt,
|
||||
allowPrivateDomains: true
|
||||
};
|
||||
|
||||
@ -4,6 +4,11 @@ import { calcDomainAbuseScore } from './get-phishing-domains';
|
||||
|
||||
describe('sortDomains', () => {
|
||||
it('nmdj.pl', () => {
|
||||
console.log(calcDomainAbuseScore('.01462ccca801fed55370d79231c876e5.nmdj.pl', '.01462ccca801fed55370d79231c876e5', false));
|
||||
console.log(calcDomainAbuseScore('plikgier'));
|
||||
console.log(calcDomainAbuseScore('www.addgumtree'));
|
||||
console.log(calcDomainAbuseScore('zrz'));
|
||||
console.log(calcDomainAbuseScore('z1'));
|
||||
console.log(calcDomainAbuseScore('accountsettingaddrecoverymanagesiteupdatebillingreview.village'));
|
||||
console.log(calcDomainAbuseScore('allegrolokalnie'));
|
||||
});
|
||||
});
|
||||
|
||||
@ -4,7 +4,7 @@ import * as tldts from 'tldts-experimental';
|
||||
import type { Span } from '../trace';
|
||||
import { appendArrayInPlaceCurried } from './append-array-in-place';
|
||||
import { PHISHING_DOMAIN_LISTS_EXTRA } from '../constants/reject-data-source';
|
||||
import { looseTldtsOpt } from '../constants/loose-tldts-opt';
|
||||
import { loosTldOptWithPrivateDomains } from '../constants/loose-tldts-opt';
|
||||
import picocolors from 'picocolors';
|
||||
import createKeywordFilter from './aho-corasick';
|
||||
import { createCacheKey } from './cache-filesystem';
|
||||
@ -78,6 +78,7 @@ const BLACK_TLD = new Set([
|
||||
'space',
|
||||
'store',
|
||||
'stream',
|
||||
'surf',
|
||||
'tech',
|
||||
'tk',
|
||||
'tokyo',
|
||||
@ -96,13 +97,13 @@ const BLACK_TLD = new Set([
|
||||
|
||||
const WHITELIST_MAIN_DOMAINS = new Set([
|
||||
'w3s.link', // ipfs gateway
|
||||
'dweb.link', // ipfs gateway
|
||||
'nftstorage.link', // ipfs gateway
|
||||
// 'dweb.link', // ipfs gateway
|
||||
// 'nftstorage.link', // ipfs gateway
|
||||
'fleek.cool', // ipfs gateway
|
||||
'business.site', // Drag'n'Drop site building platform
|
||||
'page.link', // Firebase URL Shortener
|
||||
'notion.site',
|
||||
'vercel.app',
|
||||
// 'notion.site',
|
||||
// 'vercel.app',
|
||||
'gitbook.io'
|
||||
]);
|
||||
|
||||
@ -121,14 +122,18 @@ const sensitiveKeywords = createKeywordFilter([
|
||||
'virus-',
|
||||
'icloud-',
|
||||
'apple-',
|
||||
'www.apple.',
|
||||
'-coinbase',
|
||||
'coinbase-'
|
||||
'coinbase-',
|
||||
'lcloud.',
|
||||
'lcloud-'
|
||||
]);
|
||||
const lowKeywords = createKeywordFilter([
|
||||
'-co-jp',
|
||||
'customer.',
|
||||
'customer-',
|
||||
'.www-'
|
||||
'.www-',
|
||||
'instagram'
|
||||
]);
|
||||
|
||||
const cacheKey = createCacheKey(__filename);
|
||||
@ -144,6 +149,7 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
||||
});
|
||||
|
||||
const domainCountMap: Record<string, number> = {};
|
||||
const domainScoreMap: Record<string, number> = {};
|
||||
|
||||
span.traceChildSync('process phishing domain set', () => {
|
||||
for (let i = 0, len = domainArr.length; i < len; i++) {
|
||||
@ -152,8 +158,13 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
||||
const {
|
||||
publicSuffix: tld,
|
||||
domain: apexDomain,
|
||||
subdomain
|
||||
} = tldts.parse(line, looseTldtsOpt);
|
||||
subdomain,
|
||||
isPrivate
|
||||
} = tldts.parse(line, loosTldOptWithPrivateDomains);
|
||||
|
||||
if (isPrivate) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!tld) {
|
||||
console.log(picocolors.yellow('[phishing domains] E0001'), 'missing tld', { line, tld });
|
||||
@ -164,16 +175,30 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
||||
continue;
|
||||
}
|
||||
|
||||
let sensitiveKeywordsHit: boolean | null = null;
|
||||
if (tld.length < 6 && !tld.includes('.') && !BLACK_TLD.has(tld) && !(sensitiveKeywordsHit = sensitiveKeywords(line))) continue;
|
||||
|
||||
domainCountMap[apexDomain] ||= 0;
|
||||
domainCountMap[apexDomain] += calcDomainAbuseScore(line, subdomain, sensitiveKeywordsHit);
|
||||
domainCountMap[apexDomain] += 1;
|
||||
|
||||
if (!(apexDomain in domainScoreMap)) {
|
||||
domainScoreMap[apexDomain] = 0;
|
||||
if (BLACK_TLD.has(tld)) {
|
||||
domainScoreMap[apexDomain] += 4;
|
||||
} else if (tld.length > 6) {
|
||||
domainScoreMap[apexDomain] += 2;
|
||||
}
|
||||
}
|
||||
domainScoreMap[apexDomain] += calcDomainAbuseScore(subdomain);
|
||||
}
|
||||
});
|
||||
|
||||
for (const domain in domainCountMap) {
|
||||
if (domainCountMap[domain] >= 10 && !WHITELIST_MAIN_DOMAINS.has(domain)) {
|
||||
if (
|
||||
!WHITELIST_MAIN_DOMAINS.has(domain)
|
||||
&& (
|
||||
domainScoreMap[domain] >= 12
|
||||
|| (domainScoreMap[domain] >= 5 && domainCountMap[domain] >= 4)
|
||||
)
|
||||
) {
|
||||
console.log({ domain });
|
||||
domainArr.push(`.${domain}`);
|
||||
}
|
||||
}
|
||||
@ -181,51 +206,48 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
||||
return domainArr;
|
||||
});
|
||||
|
||||
export function calcDomainAbuseScore(line: string, subdomain: string | null, sensitiveKeywordsHit: boolean | null) {
|
||||
let weight = 1;
|
||||
|
||||
const hitLowKeywords = lowKeywords(line);
|
||||
|
||||
sensitiveKeywordsHit ??= sensitiveKeywords(line);
|
||||
if (sensitiveKeywordsHit) {
|
||||
weight += 4;
|
||||
if (hitLowKeywords) {
|
||||
weight += 5;
|
||||
}
|
||||
} else if (hitLowKeywords) {
|
||||
weight += 0.5;
|
||||
}
|
||||
|
||||
const lineLen = line.length;
|
||||
|
||||
if (lineLen > 19) {
|
||||
// Add more weight if the domain is long enough
|
||||
if (lineLen > 44) {
|
||||
weight += 3.5;
|
||||
} else if (lineLen > 34) {
|
||||
weight += 2.5;
|
||||
} else if (lineLen > 29) {
|
||||
weight += 1.5;
|
||||
} else if (lineLen > 24) {
|
||||
weight += 0.75;
|
||||
} else {
|
||||
weight += 0.25;
|
||||
}
|
||||
}
|
||||
export function calcDomainAbuseScore(subdomain: string | null) {
|
||||
let weight = 0;
|
||||
|
||||
if (subdomain) {
|
||||
if (subdomain.length > 40) {
|
||||
weight += 3;
|
||||
} else if (subdomain.length > 30) {
|
||||
weight += 1.5;
|
||||
} else if (subdomain.length > 20) {
|
||||
weight += 1;
|
||||
} else if (subdomain.length > 10) {
|
||||
weight += 0.1;
|
||||
const hitLowKeywords = lowKeywords(subdomain);
|
||||
const sensitiveKeywordsHit = sensitiveKeywords(subdomain);
|
||||
|
||||
if (sensitiveKeywordsHit) {
|
||||
weight += 8;
|
||||
if (hitLowKeywords) {
|
||||
weight += 4;
|
||||
}
|
||||
if (subdomain.slice(1).includes('.')) {
|
||||
} else if (hitLowKeywords) {
|
||||
weight += 1;
|
||||
}
|
||||
|
||||
const subdomainLength = subdomain.length;
|
||||
|
||||
if (subdomainLength > 4) {
|
||||
weight += 0.5;
|
||||
if (subdomainLength > 10) {
|
||||
weight += 0.5;
|
||||
if (subdomainLength > 20) {
|
||||
weight += 1;
|
||||
if (subdomainLength > 30) {
|
||||
weight += 2;
|
||||
if (subdomainLength > 40) {
|
||||
weight += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (subdomain.startsWith('www.')) {
|
||||
weight += 4;
|
||||
} else if (subdomain.slice(1).includes('.')) {
|
||||
weight += 1;
|
||||
if (subdomain.includes('www.')) {
|
||||
weight += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return weight;
|
||||
|
||||
@ -114,6 +114,8 @@ optimus-ads.amap.com
|
||||
.sitebeat.crazydomains.com
|
||||
# online d'n'd website builder (https://www.infonline.de/)
|
||||
.apps.iocnt.de
|
||||
# codeanywhere is a heaven of abuse
|
||||
.codeanyapp.com
|
||||
|
||||
# >> Qihoo 360
|
||||
hot.m.shouji.360tpcdn.com
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user