mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-14 02:00:37 +08:00
Improve Phishing score algorithm
This commit is contained in:
parent
3ebb0077d5
commit
c1234df35b
@ -7,3 +7,8 @@ export const looseTldtsOpt: Parameters<typeof tldts.getSubdomain>[1] = {
|
|||||||
detectIp: false,
|
detectIp: false,
|
||||||
mixedInputs: false
|
mixedInputs: false
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const loosTldOptWithPrivateDomains: Parameters<typeof tldts.getSubdomain>[1] = {
|
||||||
|
...looseTldtsOpt,
|
||||||
|
allowPrivateDomains: true
|
||||||
|
};
|
||||||
|
|||||||
@ -4,6 +4,11 @@ import { calcDomainAbuseScore } from './get-phishing-domains';
|
|||||||
|
|
||||||
describe('sortDomains', () => {
|
describe('sortDomains', () => {
|
||||||
it('nmdj.pl', () => {
|
it('nmdj.pl', () => {
|
||||||
console.log(calcDomainAbuseScore('.01462ccca801fed55370d79231c876e5.nmdj.pl', '.01462ccca801fed55370d79231c876e5', false));
|
console.log(calcDomainAbuseScore('plikgier'));
|
||||||
|
console.log(calcDomainAbuseScore('www.addgumtree'));
|
||||||
|
console.log(calcDomainAbuseScore('zrz'));
|
||||||
|
console.log(calcDomainAbuseScore('z1'));
|
||||||
|
console.log(calcDomainAbuseScore('accountsettingaddrecoverymanagesiteupdatebillingreview.village'));
|
||||||
|
console.log(calcDomainAbuseScore('allegrolokalnie'));
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@ -4,7 +4,7 @@ import * as tldts from 'tldts-experimental';
|
|||||||
import type { Span } from '../trace';
|
import type { Span } from '../trace';
|
||||||
import { appendArrayInPlaceCurried } from './append-array-in-place';
|
import { appendArrayInPlaceCurried } from './append-array-in-place';
|
||||||
import { PHISHING_DOMAIN_LISTS_EXTRA } from '../constants/reject-data-source';
|
import { PHISHING_DOMAIN_LISTS_EXTRA } from '../constants/reject-data-source';
|
||||||
import { looseTldtsOpt } from '../constants/loose-tldts-opt';
|
import { loosTldOptWithPrivateDomains } from '../constants/loose-tldts-opt';
|
||||||
import picocolors from 'picocolors';
|
import picocolors from 'picocolors';
|
||||||
import createKeywordFilter from './aho-corasick';
|
import createKeywordFilter from './aho-corasick';
|
||||||
import { createCacheKey } from './cache-filesystem';
|
import { createCacheKey } from './cache-filesystem';
|
||||||
@ -78,6 +78,7 @@ const BLACK_TLD = new Set([
|
|||||||
'space',
|
'space',
|
||||||
'store',
|
'store',
|
||||||
'stream',
|
'stream',
|
||||||
|
'surf',
|
||||||
'tech',
|
'tech',
|
||||||
'tk',
|
'tk',
|
||||||
'tokyo',
|
'tokyo',
|
||||||
@ -96,13 +97,13 @@ const BLACK_TLD = new Set([
|
|||||||
|
|
||||||
const WHITELIST_MAIN_DOMAINS = new Set([
|
const WHITELIST_MAIN_DOMAINS = new Set([
|
||||||
'w3s.link', // ipfs gateway
|
'w3s.link', // ipfs gateway
|
||||||
'dweb.link', // ipfs gateway
|
// 'dweb.link', // ipfs gateway
|
||||||
'nftstorage.link', // ipfs gateway
|
// 'nftstorage.link', // ipfs gateway
|
||||||
'fleek.cool', // ipfs gateway
|
'fleek.cool', // ipfs gateway
|
||||||
'business.site', // Drag'n'Drop site building platform
|
'business.site', // Drag'n'Drop site building platform
|
||||||
'page.link', // Firebase URL Shortener
|
'page.link', // Firebase URL Shortener
|
||||||
'notion.site',
|
// 'notion.site',
|
||||||
'vercel.app',
|
// 'vercel.app',
|
||||||
'gitbook.io'
|
'gitbook.io'
|
||||||
]);
|
]);
|
||||||
|
|
||||||
@ -121,14 +122,18 @@ const sensitiveKeywords = createKeywordFilter([
|
|||||||
'virus-',
|
'virus-',
|
||||||
'icloud-',
|
'icloud-',
|
||||||
'apple-',
|
'apple-',
|
||||||
|
'www.apple.',
|
||||||
'-coinbase',
|
'-coinbase',
|
||||||
'coinbase-'
|
'coinbase-',
|
||||||
|
'lcloud.',
|
||||||
|
'lcloud-'
|
||||||
]);
|
]);
|
||||||
const lowKeywords = createKeywordFilter([
|
const lowKeywords = createKeywordFilter([
|
||||||
'-co-jp',
|
'-co-jp',
|
||||||
'customer.',
|
'customer.',
|
||||||
'customer-',
|
'customer-',
|
||||||
'.www-'
|
'.www-',
|
||||||
|
'instagram'
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const cacheKey = createCacheKey(__filename);
|
const cacheKey = createCacheKey(__filename);
|
||||||
@ -144,6 +149,7 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
|||||||
});
|
});
|
||||||
|
|
||||||
const domainCountMap: Record<string, number> = {};
|
const domainCountMap: Record<string, number> = {};
|
||||||
|
const domainScoreMap: Record<string, number> = {};
|
||||||
|
|
||||||
span.traceChildSync('process phishing domain set', () => {
|
span.traceChildSync('process phishing domain set', () => {
|
||||||
for (let i = 0, len = domainArr.length; i < len; i++) {
|
for (let i = 0, len = domainArr.length; i < len; i++) {
|
||||||
@ -152,8 +158,13 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
|||||||
const {
|
const {
|
||||||
publicSuffix: tld,
|
publicSuffix: tld,
|
||||||
domain: apexDomain,
|
domain: apexDomain,
|
||||||
subdomain
|
subdomain,
|
||||||
} = tldts.parse(line, looseTldtsOpt);
|
isPrivate
|
||||||
|
} = tldts.parse(line, loosTldOptWithPrivateDomains);
|
||||||
|
|
||||||
|
if (isPrivate) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (!tld) {
|
if (!tld) {
|
||||||
console.log(picocolors.yellow('[phishing domains] E0001'), 'missing tld', { line, tld });
|
console.log(picocolors.yellow('[phishing domains] E0001'), 'missing tld', { line, tld });
|
||||||
@ -164,16 +175,30 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
let sensitiveKeywordsHit: boolean | null = null;
|
|
||||||
if (tld.length < 6 && !tld.includes('.') && !BLACK_TLD.has(tld) && !(sensitiveKeywordsHit = sensitiveKeywords(line))) continue;
|
|
||||||
|
|
||||||
domainCountMap[apexDomain] ||= 0;
|
domainCountMap[apexDomain] ||= 0;
|
||||||
domainCountMap[apexDomain] += calcDomainAbuseScore(line, subdomain, sensitiveKeywordsHit);
|
domainCountMap[apexDomain] += 1;
|
||||||
|
|
||||||
|
if (!(apexDomain in domainScoreMap)) {
|
||||||
|
domainScoreMap[apexDomain] = 0;
|
||||||
|
if (BLACK_TLD.has(tld)) {
|
||||||
|
domainScoreMap[apexDomain] += 4;
|
||||||
|
} else if (tld.length > 6) {
|
||||||
|
domainScoreMap[apexDomain] += 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
domainScoreMap[apexDomain] += calcDomainAbuseScore(subdomain);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
for (const domain in domainCountMap) {
|
for (const domain in domainCountMap) {
|
||||||
if (domainCountMap[domain] >= 10 && !WHITELIST_MAIN_DOMAINS.has(domain)) {
|
if (
|
||||||
|
!WHITELIST_MAIN_DOMAINS.has(domain)
|
||||||
|
&& (
|
||||||
|
domainScoreMap[domain] >= 12
|
||||||
|
|| (domainScoreMap[domain] >= 5 && domainCountMap[domain] >= 4)
|
||||||
|
)
|
||||||
|
) {
|
||||||
|
console.log({ domain });
|
||||||
domainArr.push(`.${domain}`);
|
domainArr.push(`.${domain}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -181,50 +206,47 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
|||||||
return domainArr;
|
return domainArr;
|
||||||
});
|
});
|
||||||
|
|
||||||
export function calcDomainAbuseScore(line: string, subdomain: string | null, sensitiveKeywordsHit: boolean | null) {
|
export function calcDomainAbuseScore(subdomain: string | null) {
|
||||||
let weight = 1;
|
let weight = 0;
|
||||||
|
|
||||||
const hitLowKeywords = lowKeywords(line);
|
|
||||||
|
|
||||||
sensitiveKeywordsHit ??= sensitiveKeywords(line);
|
|
||||||
if (sensitiveKeywordsHit) {
|
|
||||||
weight += 4;
|
|
||||||
if (hitLowKeywords) {
|
|
||||||
weight += 5;
|
|
||||||
}
|
|
||||||
} else if (hitLowKeywords) {
|
|
||||||
weight += 0.5;
|
|
||||||
}
|
|
||||||
|
|
||||||
const lineLen = line.length;
|
|
||||||
|
|
||||||
if (lineLen > 19) {
|
|
||||||
// Add more weight if the domain is long enough
|
|
||||||
if (lineLen > 44) {
|
|
||||||
weight += 3.5;
|
|
||||||
} else if (lineLen > 34) {
|
|
||||||
weight += 2.5;
|
|
||||||
} else if (lineLen > 29) {
|
|
||||||
weight += 1.5;
|
|
||||||
} else if (lineLen > 24) {
|
|
||||||
weight += 0.75;
|
|
||||||
} else {
|
|
||||||
weight += 0.25;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (subdomain) {
|
if (subdomain) {
|
||||||
if (subdomain.length > 40) {
|
const hitLowKeywords = lowKeywords(subdomain);
|
||||||
weight += 3;
|
const sensitiveKeywordsHit = sensitiveKeywords(subdomain);
|
||||||
} else if (subdomain.length > 30) {
|
|
||||||
weight += 1.5;
|
if (sensitiveKeywordsHit) {
|
||||||
} else if (subdomain.length > 20) {
|
weight += 8;
|
||||||
|
if (hitLowKeywords) {
|
||||||
|
weight += 4;
|
||||||
|
}
|
||||||
|
} else if (hitLowKeywords) {
|
||||||
weight += 1;
|
weight += 1;
|
||||||
} else if (subdomain.length > 10) {
|
|
||||||
weight += 0.1;
|
|
||||||
}
|
}
|
||||||
if (subdomain.slice(1).includes('.')) {
|
|
||||||
weight += 1;
|
const subdomainLength = subdomain.length;
|
||||||
|
|
||||||
|
if (subdomainLength > 4) {
|
||||||
|
weight += 0.5;
|
||||||
|
if (subdomainLength > 10) {
|
||||||
|
weight += 0.5;
|
||||||
|
if (subdomainLength > 20) {
|
||||||
|
weight += 1;
|
||||||
|
if (subdomainLength > 30) {
|
||||||
|
weight += 2;
|
||||||
|
if (subdomainLength > 40) {
|
||||||
|
weight += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (subdomain.startsWith('www.')) {
|
||||||
|
weight += 4;
|
||||||
|
} else if (subdomain.slice(1).includes('.')) {
|
||||||
|
weight += 1;
|
||||||
|
if (subdomain.includes('www.')) {
|
||||||
|
weight += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -114,6 +114,8 @@ optimus-ads.amap.com
|
|||||||
.sitebeat.crazydomains.com
|
.sitebeat.crazydomains.com
|
||||||
# online d'n'd website builder (https://www.infonline.de/)
|
# online d'n'd website builder (https://www.infonline.de/)
|
||||||
.apps.iocnt.de
|
.apps.iocnt.de
|
||||||
|
# codeanywhere is a heaven of abuse
|
||||||
|
.codeanyapp.com
|
||||||
|
|
||||||
# >> Qihoo 360
|
# >> Qihoo 360
|
||||||
hot.m.shouji.360tpcdn.com
|
hot.m.shouji.360tpcdn.com
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user