mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Feat: update phishing domain threshold
This commit is contained in:
parent
6380d0be5a
commit
b5a6e05a84
@ -11,7 +11,6 @@ import createKeywordFilter from './lib/aho-corasick';
|
||||
import { readFileByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
|
||||
import { sortDomains } from './lib/stable-sort-domain';
|
||||
import { task } from './trace';
|
||||
import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
|
||||
import * as tldts from 'tldts';
|
||||
import { SHARED_DESCRIPTION } from './lib/constants';
|
||||
import { getPhishingDomains } from './lib/get-phishing-domains';
|
||||
|
||||
10
Build/lib/get-phishing-domains.test.ts
Normal file
10
Build/lib/get-phishing-domains.test.ts
Normal file
@ -0,0 +1,10 @@
|
||||
// eslint-disable-next-line import-x/no-unresolved -- bun
|
||||
import { describe, expect, it } from 'bun:test';
|
||||
|
||||
import { calcDomainAbuseScore } from './get-phishing-domains';
|
||||
|
||||
describe('sortDomains', () => {
|
||||
it('nmdj.pl', () => {
|
||||
console.log(calcDomainAbuseScore('.01462ccca801fed55370d79231c876e5.nmdj.pl'));
|
||||
});
|
||||
});
|
||||
@ -103,21 +103,23 @@ const BLACK_TLD = new Set([
|
||||
]);
|
||||
|
||||
export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('get phishing domains').traceAsyncFn(async (span) => {
|
||||
const [domainSet, domainSet2, gorhill] = await Promise.all([
|
||||
processDomainLists(span, 'https://curbengh.github.io/phishing-filter/phishing-filter-domains.txt', true, TTL.THREE_HOURS()),
|
||||
isCI
|
||||
? processDomainLists(span, 'https://phishing.army/download/phishing_army_blocklist.txt', true, TTL.THREE_HOURS())
|
||||
: null,
|
||||
getGorhillPublicSuffixPromise()
|
||||
]);
|
||||
if (domainSet2) {
|
||||
const gorhill = await getGorhillPublicSuffixPromise();
|
||||
|
||||
const domainSet = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => {
|
||||
const [domainSet, domainSet2] = await Promise.all([
|
||||
processDomainLists(curSpan, 'https://curbengh.github.io/phishing-filter/phishing-filter-domains.txt', true, TTL.THREE_HOURS()),
|
||||
processDomainLists(curSpan, 'https://phishing.army/download/phishing_army_blocklist.txt', true, TTL.THREE_HOURS())
|
||||
]);
|
||||
|
||||
SetAdd(domainSet, domainSet2);
|
||||
}
|
||||
|
||||
span.traceChildSync('whitelisting phishing domains', (parentSpan) => {
|
||||
const trieForRemovingWhiteListed = parentSpan.traceChildSync('create trie for whitelisting', () => createTrie(domainSet));
|
||||
return domainSet;
|
||||
});
|
||||
|
||||
return parentSpan.traceChild('delete whitelisted from domainset').traceSyncFn(() => {
|
||||
span.traceChildSync('whitelisting phishing domains', (curSpan) => {
|
||||
const trieForRemovingWhiteListed = curSpan.traceChildSync('create trie for whitelisting', () => createTrie(domainSet));
|
||||
|
||||
return curSpan.traceChild('delete whitelisted from domainset').traceSyncFn(() => {
|
||||
for (let i = 0, len = WHITELIST_DOMAIN.length; i < len; i++) {
|
||||
const white = WHITELIST_DOMAIN[i];
|
||||
domainSet.delete(white);
|
||||
@ -134,68 +136,28 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
||||
const domainArr = Array.from(domainSet);
|
||||
|
||||
for (let i = 0, len = domainArr.length; i < len; i++) {
|
||||
const line = processLine(domainArr[i]);
|
||||
if (!line) continue;
|
||||
const line = domainArr[i];
|
||||
|
||||
const apexDomain = gorhill.getDomain(line);
|
||||
if (!apexDomain) continue;
|
||||
const safeGorhillLine = line[0] === '.' ? line.slice(1) : line;
|
||||
|
||||
domainCountMap[apexDomain] ||= 0;
|
||||
|
||||
const isPhishingDomainMockingCoJp = line.includes('-co-jp');
|
||||
if (isPhishingDomainMockingCoJp) {
|
||||
domainCountMap[apexDomain] += 0.5;
|
||||
const apexDomain = gorhill.getDomain(safeGorhillLine);
|
||||
if (!apexDomain) {
|
||||
console.log({ line });
|
||||
continue;
|
||||
}
|
||||
|
||||
if (line.startsWith('.amaz')) {
|
||||
domainCountMap[apexDomain] += 0.5;
|
||||
|
||||
if (line.startsWith('.amazon-')) {
|
||||
domainCountMap[apexDomain] += 4.5;
|
||||
}
|
||||
if (isPhishingDomainMockingCoJp) {
|
||||
domainCountMap[apexDomain] += 4;
|
||||
}
|
||||
} else if (line.startsWith('.customer')) {
|
||||
domainCountMap[apexDomain] += 0.25;
|
||||
}
|
||||
|
||||
const tld = gorhill.getPublicSuffix(line[0] === '.' ? line.slice(1) : line);
|
||||
const tld = gorhill.getPublicSuffix(safeGorhillLine);
|
||||
if (!tld || !BLACK_TLD.has(tld)) continue;
|
||||
|
||||
// Only when tld is black will this 1 weight be added
|
||||
domainCountMap[apexDomain] += 1;
|
||||
|
||||
const lineLen = line.length;
|
||||
|
||||
if (lineLen > 19) {
|
||||
// Add more weight if the domain is long enough
|
||||
if (lineLen > 44) {
|
||||
domainCountMap[apexDomain] += 3.5;
|
||||
} else if (lineLen > 34) {
|
||||
domainCountMap[apexDomain] += 2.5;
|
||||
} else if (lineLen > 29) {
|
||||
domainCountMap[apexDomain] += 1.5;
|
||||
} else if (lineLen > 24) {
|
||||
domainCountMap[apexDomain] += 0.75;
|
||||
} else {
|
||||
domainCountMap[apexDomain] += 0.25;
|
||||
}
|
||||
|
||||
if (domainCountMap[apexDomain] < 5) {
|
||||
const subdomain = tldts.getSubdomain(line, { detectIp: false });
|
||||
if (subdomain?.includes('.')) {
|
||||
domainCountMap[apexDomain] += 1.5;
|
||||
}
|
||||
}
|
||||
}
|
||||
domainCountMap[apexDomain] ||= 0;
|
||||
domainCountMap[apexDomain] += calcDomainAbuseScore(line);
|
||||
}
|
||||
});
|
||||
|
||||
const results = span.traceChildSync('get final phishing results', () => {
|
||||
const res: string[] = [];
|
||||
for (const domain in domainCountMap) {
|
||||
if (domainCountMap[domain] >= 5) {
|
||||
if (domainCountMap[domain] >= 8) {
|
||||
res.push(`.${domain}`);
|
||||
}
|
||||
}
|
||||
@ -204,3 +166,61 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
||||
|
||||
return [results, domainSet] as const;
|
||||
});
|
||||
|
||||
export function calcDomainAbuseScore(line: string) {
|
||||
let weight = 1;
|
||||
|
||||
const isPhishingDomainMockingCoJp = line.includes('-co-jp');
|
||||
if (isPhishingDomainMockingCoJp) {
|
||||
weight += 0.5;
|
||||
}
|
||||
|
||||
if (line.startsWith('.amaz')) {
|
||||
weight += 0.5;
|
||||
|
||||
if (line.startsWith('.amazon-')) {
|
||||
weight += 4.5;
|
||||
}
|
||||
if (isPhishingDomainMockingCoJp) {
|
||||
weight += 4;
|
||||
}
|
||||
} else if (line.includes('.customer')) {
|
||||
weight += 0.25;
|
||||
}
|
||||
|
||||
const lineLen = line.length;
|
||||
|
||||
if (lineLen > 19) {
|
||||
// Add more weight if the domain is long enough
|
||||
if (lineLen > 44) {
|
||||
weight += 3.5;
|
||||
} else if (lineLen > 34) {
|
||||
weight += 2.5;
|
||||
} else if (lineLen > 29) {
|
||||
weight += 1.5;
|
||||
} else if (lineLen > 24) {
|
||||
weight += 0.75;
|
||||
} else {
|
||||
weight += 0.25;
|
||||
}
|
||||
}
|
||||
|
||||
const subdomain = tldts.getSubdomain(line, { detectIp: false });
|
||||
|
||||
if (subdomain) {
|
||||
if (subdomain.slice(1).includes('.')) {
|
||||
weight += 1;
|
||||
}
|
||||
if (subdomain.length > 40) {
|
||||
weight += 3;
|
||||
} else if (subdomain.length > 30) {
|
||||
weight += 1.5;
|
||||
} else if (subdomain.length > 20) {
|
||||
weight += 1;
|
||||
} else if (subdomain.length > 10) {
|
||||
weight += 0.1;
|
||||
}
|
||||
}
|
||||
|
||||
return weight;
|
||||
}
|
||||
|
||||
@ -302,6 +302,7 @@ inst.360safe.com
|
||||
.pages.net.br
|
||||
.myenotice.com
|
||||
.eu5.net
|
||||
.jdie.pl
|
||||
|
||||
# --- AD Block ---
|
||||
|
||||
@ -733,6 +734,8 @@ comments.gazo.space
|
||||
.footprintdns.com
|
||||
.measure.office.com
|
||||
|
||||
.opinionjet.com
|
||||
|
||||
# >> Tracking
|
||||
.mktg.tags.f5.com
|
||||
.trk.caseads.com
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user