Feat: update phishing domain threshold

This commit is contained in:
SukkaW 2024-05-19 03:58:00 +08:00
parent 6380d0be5a
commit b5a6e05a84
4 changed files with 95 additions and 63 deletions

View File

@ -11,7 +11,6 @@ import createKeywordFilter from './lib/aho-corasick';
import { readFileByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line'; import { readFileByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
import { sortDomains } from './lib/stable-sort-domain'; import { sortDomains } from './lib/stable-sort-domain';
import { task } from './trace'; import { task } from './trace';
import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
import * as tldts from 'tldts'; import * as tldts from 'tldts';
import { SHARED_DESCRIPTION } from './lib/constants'; import { SHARED_DESCRIPTION } from './lib/constants';
import { getPhishingDomains } from './lib/get-phishing-domains'; import { getPhishingDomains } from './lib/get-phishing-domains';

View File

@ -0,0 +1,10 @@
// eslint-disable-next-line import-x/no-unresolved -- bun
import { describe, expect, it } from 'bun:test';
import { calcDomainAbuseScore } from './get-phishing-domains';
describe('sortDomains', () => {
it('nmdj.pl', () => {
console.log(calcDomainAbuseScore('.01462ccca801fed55370d79231c876e5.nmdj.pl'));
});
});

View File

@ -103,21 +103,23 @@ const BLACK_TLD = new Set([
]); ]);
export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('get phishing domains').traceAsyncFn(async (span) => { export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('get phishing domains').traceAsyncFn(async (span) => {
const [domainSet, domainSet2, gorhill] = await Promise.all([ const gorhill = await getGorhillPublicSuffixPromise();
processDomainLists(span, 'https://curbengh.github.io/phishing-filter/phishing-filter-domains.txt', true, TTL.THREE_HOURS()),
isCI const domainSet = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => {
? processDomainLists(span, 'https://phishing.army/download/phishing_army_blocklist.txt', true, TTL.THREE_HOURS()) const [domainSet, domainSet2] = await Promise.all([
: null, processDomainLists(curSpan, 'https://curbengh.github.io/phishing-filter/phishing-filter-domains.txt', true, TTL.THREE_HOURS()),
getGorhillPublicSuffixPromise() processDomainLists(curSpan, 'https://phishing.army/download/phishing_army_blocklist.txt', true, TTL.THREE_HOURS())
]); ]);
if (domainSet2) {
SetAdd(domainSet, domainSet2); SetAdd(domainSet, domainSet2);
}
span.traceChildSync('whitelisting phishing domains', (parentSpan) => { return domainSet;
const trieForRemovingWhiteListed = parentSpan.traceChildSync('create trie for whitelisting', () => createTrie(domainSet)); });
return parentSpan.traceChild('delete whitelisted from domainset').traceSyncFn(() => { span.traceChildSync('whitelisting phishing domains', (curSpan) => {
const trieForRemovingWhiteListed = curSpan.traceChildSync('create trie for whitelisting', () => createTrie(domainSet));
return curSpan.traceChild('delete whitelisted from domainset').traceSyncFn(() => {
for (let i = 0, len = WHITELIST_DOMAIN.length; i < len; i++) { for (let i = 0, len = WHITELIST_DOMAIN.length; i < len; i++) {
const white = WHITELIST_DOMAIN[i]; const white = WHITELIST_DOMAIN[i];
domainSet.delete(white); domainSet.delete(white);
@ -134,68 +136,28 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
const domainArr = Array.from(domainSet); const domainArr = Array.from(domainSet);
for (let i = 0, len = domainArr.length; i < len; i++) { for (let i = 0, len = domainArr.length; i < len; i++) {
const line = processLine(domainArr[i]); const line = domainArr[i];
if (!line) continue;
const apexDomain = gorhill.getDomain(line); const safeGorhillLine = line[0] === '.' ? line.slice(1) : line;
if (!apexDomain) continue;
domainCountMap[apexDomain] ||= 0; const apexDomain = gorhill.getDomain(safeGorhillLine);
if (!apexDomain) {
const isPhishingDomainMockingCoJp = line.includes('-co-jp'); console.log({ line });
if (isPhishingDomainMockingCoJp) { continue;
domainCountMap[apexDomain] += 0.5;
} }
if (line.startsWith('.amaz')) { const tld = gorhill.getPublicSuffix(safeGorhillLine);
domainCountMap[apexDomain] += 0.5;
if (line.startsWith('.amazon-')) {
domainCountMap[apexDomain] += 4.5;
}
if (isPhishingDomainMockingCoJp) {
domainCountMap[apexDomain] += 4;
}
} else if (line.startsWith('.customer')) {
domainCountMap[apexDomain] += 0.25;
}
const tld = gorhill.getPublicSuffix(line[0] === '.' ? line.slice(1) : line);
if (!tld || !BLACK_TLD.has(tld)) continue; if (!tld || !BLACK_TLD.has(tld)) continue;
// Only when tld is black will this 1 weight be added domainCountMap[apexDomain] ||= 0;
domainCountMap[apexDomain] += 1; domainCountMap[apexDomain] += calcDomainAbuseScore(line);
const lineLen = line.length;
if (lineLen > 19) {
// Add more weight if the domain is long enough
if (lineLen > 44) {
domainCountMap[apexDomain] += 3.5;
} else if (lineLen > 34) {
domainCountMap[apexDomain] += 2.5;
} else if (lineLen > 29) {
domainCountMap[apexDomain] += 1.5;
} else if (lineLen > 24) {
domainCountMap[apexDomain] += 0.75;
} else {
domainCountMap[apexDomain] += 0.25;
}
if (domainCountMap[apexDomain] < 5) {
const subdomain = tldts.getSubdomain(line, { detectIp: false });
if (subdomain?.includes('.')) {
domainCountMap[apexDomain] += 1.5;
}
}
}
} }
}); });
const results = span.traceChildSync('get final phishing results', () => { const results = span.traceChildSync('get final phishing results', () => {
const res: string[] = []; const res: string[] = [];
for (const domain in domainCountMap) { for (const domain in domainCountMap) {
if (domainCountMap[domain] >= 5) { if (domainCountMap[domain] >= 8) {
res.push(`.${domain}`); res.push(`.${domain}`);
} }
} }
@ -204,3 +166,61 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
return [results, domainSet] as const; return [results, domainSet] as const;
}); });
export function calcDomainAbuseScore(line: string) {
let weight = 1;
const isPhishingDomainMockingCoJp = line.includes('-co-jp');
if (isPhishingDomainMockingCoJp) {
weight += 0.5;
}
if (line.startsWith('.amaz')) {
weight += 0.5;
if (line.startsWith('.amazon-')) {
weight += 4.5;
}
if (isPhishingDomainMockingCoJp) {
weight += 4;
}
} else if (line.includes('.customer')) {
weight += 0.25;
}
const lineLen = line.length;
if (lineLen > 19) {
// Add more weight if the domain is long enough
if (lineLen > 44) {
weight += 3.5;
} else if (lineLen > 34) {
weight += 2.5;
} else if (lineLen > 29) {
weight += 1.5;
} else if (lineLen > 24) {
weight += 0.75;
} else {
weight += 0.25;
}
}
const subdomain = tldts.getSubdomain(line, { detectIp: false });
if (subdomain) {
if (subdomain.slice(1).includes('.')) {
weight += 1;
}
if (subdomain.length > 40) {
weight += 3;
} else if (subdomain.length > 30) {
weight += 1.5;
} else if (subdomain.length > 20) {
weight += 1;
} else if (subdomain.length > 10) {
weight += 0.1;
}
}
return weight;
}

View File

@ -302,6 +302,7 @@ inst.360safe.com
.pages.net.br .pages.net.br
.myenotice.com .myenotice.com
.eu5.net .eu5.net
.jdie.pl
# --- AD Block --- # --- AD Block ---
@ -733,6 +734,8 @@ comments.gazo.space
.footprintdns.com .footprintdns.com
.measure.office.com .measure.office.com
.opinionjet.com
# >> Tracking # >> Tracking
.mktg.tags.f5.com .mktg.tags.f5.com
.trk.caseads.com .trk.caseads.com