Phishing domains trim www

This commit is contained in:
SukkaW
2025-01-19 12:33:27 +08:00
parent a46f24dd9a
commit ca9415ecc6
3 changed files with 41 additions and 19 deletions

View File

@@ -208,7 +208,7 @@ const processPhihsingDomains = cache(function processPhihsingDomains(domainArr:
}); });
const downloads = [ const downloads = [
...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry)), ...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry, true)),
...PHISHING_HOSTS_EXTRA.map(entry => processHostsWithPreload(...entry)) ...PHISHING_HOSTS_EXTRA.map(entry => processHostsWithPreload(...entry))
]; ];

View File

@@ -24,6 +24,24 @@ export function fastNormalizeDomain(domain: string, parsed: TldTsParsed | null =
return parsed.hostname; return parsed.hostname;
} }
export function fastNormalizeDomainIgnoreWww(domain: string, parsed: TldTsParsed | null = null) {
// We don't want tldts to call its own "extractHostname" on ip, bail out ip first.
// Now ip has been bailed out, we can safely set normalizeTldtsOpt.detectIp to false.
if (isProbablyIpv4(domain) || isProbablyIpv6(domain)) {
return null;
}
parsed ??= tldts.parse(domain, normalizeTldtsOpt);
// Private invalid domain (things like .tor, .dn42, etc)
if (!parsed.isIcann && !parsed.isPrivate) return null;
if (parsed.subdomain === 'www') {
return parsed.domain;
}
return parsed.hostname;
}
export function normalizeDomain(domain: string, parsed: TldTsParsed | null = null) { export function normalizeDomain(domain: string, parsed: TldTsParsed | null = null) {
if (domain.length === 0) return null; if (domain.length === 0) return null;

View File

@@ -1,36 +1,35 @@
import picocolors from 'picocolors'; import { fastNormalizeDomain, fastNormalizeDomainIgnoreWww } from '../normalize-domain';
import { fastNormalizeDomain } from '../normalize-domain';
import { processLine } from '../process-line'; import { processLine } from '../process-line';
import { onBlackFound } from './shared'; import { onBlackFound } from './shared';
import { fetchAssets } from '../fetch-assets'; import { fetchAssets } from '../fetch-assets';
import type { Span } from '../../trace'; import type { Span } from '../../trace';
function domainListLineCb(l: string, set: string[], includeAllSubDomain: boolean, meta: string) { function domainListLineCb(l: string, set: string[], includeAllSubDomain: boolean, meta: string, normalizeDomain = fastNormalizeDomain) {
const line = processLine(l); const line = processLine(l);
if (!line) return; if (!line) return;
const domain = fastNormalizeDomain(line); const domain = normalizeDomain(line);
if (!domain) return; if (!domain) {
if (domain !== line) { // console.log(
console.log( // picocolors.red('[process domain list]'),
picocolors.red('[process domain list]'), // picocolors.gray(`line: ${line}`),
picocolors.gray(`line: ${line}`), // picocolors.gray(`domain: ${domain}`),
picocolors.gray(`domain: ${domain}`), // picocolors.gray(meta)
picocolors.gray(meta) // );
);
return; return;
} }
onBlackFound(domain, meta); onBlackFound(domain, meta);
set.push(includeAllSubDomain ? `.${line}` : line); set.push(includeAllSubDomain ? `.${domain}` : domain);
} }
export function processDomainLists( export function processDomainLists(
span: Span, span: Span,
domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, wwwToApex = false
) { ) {
const domainNormalizer = wwwToApex ? fastNormalizeDomainIgnoreWww : fastNormalizeDomain;
return span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => { return span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => {
const text = await span.traceChildAsync('download', () => fetchAssets( const text = await span.traceChildAsync('download', () => fetchAssets(
domainListsUrl, domainListsUrl,
@@ -41,7 +40,7 @@ export function processDomainLists(
span.traceChildSync('parse domain list', () => { span.traceChildSync('parse domain list', () => {
for (let i = 0, len = filterRules.length; i < len; i++) { for (let i = 0, len = filterRules.length; i < len; i++) {
domainListLineCb(filterRules[i], domainSets, includeAllSubDomain, domainListsUrl); domainListLineCb(filterRules[i], domainSets, includeAllSubDomain, domainListsUrl, domainNormalizer);
} }
}); });
@@ -49,7 +48,12 @@ export function processDomainLists(
}); });
} }
export function processDomainListsWithPreload(domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false) { export function processDomainListsWithPreload(
domainListsUrl: string, mirrors: string[] | null,
includeAllSubDomain = false, wwwToApex = false
) {
const domainNormalizer = wwwToApex ? fastNormalizeDomainIgnoreWww : fastNormalizeDomain;
const downloadPromise = fetchAssets(domainListsUrl, mirrors); const downloadPromise = fetchAssets(domainListsUrl, mirrors);
return (span: Span) => span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => { return (span: Span) => span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => {
@@ -59,7 +63,7 @@ export function processDomainListsWithPreload(domainListsUrl: string, mirrors: s
span.traceChildSync('parse domain list', () => { span.traceChildSync('parse domain list', () => {
for (let i = 0, len = filterRules.length; i < len; i++) { for (let i = 0, len = filterRules.length; i < len; i++) {
domainListLineCb(filterRules[i], domainSets, includeAllSubDomain, domainListsUrl); domainListLineCb(filterRules[i], domainSets, includeAllSubDomain, domainListsUrl, domainNormalizer);
} }
}); });