Phishing domains trim www

This commit is contained in:
SukkaW
2025-01-19 12:33:27 +08:00
parent a46f24dd9a
commit ca9415ecc6
3 changed files with 41 additions and 19 deletions

View File

@@ -208,7 +208,7 @@ const processPhihsingDomains = cache(function processPhihsingDomains(domainArr:
});
const downloads = [
...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry)),
...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry, true)),
...PHISHING_HOSTS_EXTRA.map(entry => processHostsWithPreload(...entry))
];

View File

@@ -24,6 +24,24 @@ export function fastNormalizeDomain(domain: string, parsed: TldTsParsed | null =
return parsed.hostname;
}
export function fastNormalizeDomainIgnoreWww(domain: string, parsed: TldTsParsed | null = null) {
// We don't want tldts to call its own "extractHostname" on ip, bail out ip first.
// Now ip has been bailed out, we can safely set normalizeTldtsOpt.detectIp to false.
if (isProbablyIpv4(domain) || isProbablyIpv6(domain)) {
return null;
}
parsed ??= tldts.parse(domain, normalizeTldtsOpt);
// Private invalid domain (things like .tor, .dn42, etc)
if (!parsed.isIcann && !parsed.isPrivate) return null;
if (parsed.subdomain === 'www') {
return parsed.domain;
}
return parsed.hostname;
}
export function normalizeDomain(domain: string, parsed: TldTsParsed | null = null) {
if (domain.length === 0) return null;

View File

@@ -1,36 +1,35 @@
import picocolors from 'picocolors';
import { fastNormalizeDomain } from '../normalize-domain';
import { fastNormalizeDomain, fastNormalizeDomainIgnoreWww } from '../normalize-domain';
import { processLine } from '../process-line';
import { onBlackFound } from './shared';
import { fetchAssets } from '../fetch-assets';
import type { Span } from '../../trace';
function domainListLineCb(l: string, set: string[], includeAllSubDomain: boolean, meta: string) {
function domainListLineCb(l: string, set: string[], includeAllSubDomain: boolean, meta: string, normalizeDomain = fastNormalizeDomain) {
const line = processLine(l);
if (!line) return;
const domain = fastNormalizeDomain(line);
if (!domain) return;
if (domain !== line) {
console.log(
picocolors.red('[process domain list]'),
picocolors.gray(`line: ${line}`),
picocolors.gray(`domain: ${domain}`),
picocolors.gray(meta)
);
const domain = normalizeDomain(line);
if (!domain) {
// console.log(
// picocolors.red('[process domain list]'),
// picocolors.gray(`line: ${line}`),
// picocolors.gray(`domain: ${domain}`),
// picocolors.gray(meta)
// );
return;
}
onBlackFound(domain, meta);
set.push(includeAllSubDomain ? `.${line}` : line);
set.push(includeAllSubDomain ? `.${domain}` : domain);
}
export function processDomainLists(
span: Span,
domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false
domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, wwwToApex = false
) {
const domainNormalizer = wwwToApex ? fastNormalizeDomainIgnoreWww : fastNormalizeDomain;
return span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => {
const text = await span.traceChildAsync('download', () => fetchAssets(
domainListsUrl,
@@ -41,7 +40,7 @@ export function processDomainLists(
span.traceChildSync('parse domain list', () => {
for (let i = 0, len = filterRules.length; i < len; i++) {
domainListLineCb(filterRules[i], domainSets, includeAllSubDomain, domainListsUrl);
domainListLineCb(filterRules[i], domainSets, includeAllSubDomain, domainListsUrl, domainNormalizer);
}
});
@@ -49,7 +48,12 @@ export function processDomainLists(
});
}
export function processDomainListsWithPreload(domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false) {
export function processDomainListsWithPreload(
domainListsUrl: string, mirrors: string[] | null,
includeAllSubDomain = false, wwwToApex = false
) {
const domainNormalizer = wwwToApex ? fastNormalizeDomainIgnoreWww : fastNormalizeDomain;
const downloadPromise = fetchAssets(domainListsUrl, mirrors);
return (span: Span) => span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => {
@@ -59,7 +63,7 @@ export function processDomainListsWithPreload(domainListsUrl: string, mirrors: s
span.traceChildSync('parse domain list', () => {
for (let i = 0, len = filterRules.length; i < len; i++) {
domainListLineCb(filterRules[i], domainSets, includeAllSubDomain, domainListsUrl);
domainListLineCb(filterRules[i], domainSets, includeAllSubDomain, domainListsUrl, domainNormalizer);
}
});