Reject Hosts drop prefixed www.

This commit is contained in:
SukkaW
2025-01-24 14:48:09 +08:00
parent 98d37c3749
commit 9790b40a72
5 changed files with 37 additions and 32 deletions

View File

@@ -208,7 +208,7 @@ const processPhihsingDomains = cache(function processPhihsingDomains(domainArr:
});
const downloads = [
...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry, true)),
...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry)),
...PHISHING_HOSTS_EXTRA.map(entry => processHostsWithPreload(...entry))
];

View File

@@ -7,6 +7,32 @@ import { isProbablyIpv4, isProbablyIpv6 } from 'foxts/is-probably-ip';
type TldTsParsed = ReturnType<typeof tldts.parse>;
/**
* Skipped the input non-empty check, the `domain` should not be empty.
*/
export function fastNormalizeDomainWithoutWww(domain: string, parsed: TldTsParsed | null = null) {
// We don't want tldts to call its own "extractHostname" on ip, bail out ip first.
// Now ip has been bailed out, we can safely set normalizeTldtsOpt.detectIp to false.
if (isProbablyIpv4(domain) || isProbablyIpv6(domain)) {
return null;
}
parsed ??= tldts.parse(domain, normalizeTldtsOpt);
// Private invalid domain (things like .tor, .dn42, etc)
if (!parsed.isIcann && !parsed.isPrivate) return null;
if (parsed.subdomain) {
if (parsed.subdomain === 'www') {
return parsed.domain;
}
if (parsed.subdomain.startsWith('www.')) {
return parsed.subdomain.slice(4) + '.' + parsed.domain;
}
}
return parsed.hostname;
}
/**
* Skipped the input non-empty check, the `domain` should not be empty.
*/
@@ -24,24 +50,6 @@ export function fastNormalizeDomain(domain: string, parsed: TldTsParsed | null =
return parsed.hostname;
}
export function fastNormalizeDomainIgnoreWww(domain: string, parsed: TldTsParsed | null = null) {
// We don't want tldts to call its own "extractHostname" on ip, bail out ip first.
// Now ip has been bailed out, we can safely set normalizeTldtsOpt.detectIp to false.
if (isProbablyIpv4(domain) || isProbablyIpv6(domain)) {
return null;
}
parsed ??= tldts.parse(domain, normalizeTldtsOpt);
// Private invalid domain (things like .tor, .dn42, etc)
if (!parsed.isIcann && !parsed.isPrivate) return null;
if (parsed.subdomain === 'www') {
return parsed.domain;
}
return parsed.hostname;
}
export function normalizeDomain(domain: string, parsed: TldTsParsed | null = null) {
if (domain.length === 0) return null;

View File

@@ -1,4 +1,4 @@
import { fastNormalizeDomain, fastNormalizeDomainIgnoreWww } from '../normalize-domain';
import { fastNormalizeDomain, fastNormalizeDomainWithoutWww } from '../normalize-domain';
import { processLine } from '../process-line';
import { onBlackFound } from './shared';
import { fetchAssets } from '../fetch-assets';
@@ -27,9 +27,8 @@ function domainListLineCbIncludeAllSubdomain(line: string, set: string[], meta:
export function processDomainLists(
span: Span,
domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, wwwToApex = false
domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false
) {
const domainNormalizer = wwwToApex ? fastNormalizeDomainIgnoreWww : fastNormalizeDomain;
const lineCb = includeAllSubDomain ? domainListLineCbIncludeAllSubdomain : domainListLineCb;
return span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => {
@@ -42,7 +41,7 @@ export function processDomainLists(
span.traceChildSync('parse domain list', () => {
for (let i = 0, len = filterRules.length; i < len; i++) {
lineCb(filterRules[i], domainSets, domainListsUrl, domainNormalizer);
lineCb(filterRules[i], domainSets, domainListsUrl, fastNormalizeDomainWithoutWww);
}
});
@@ -52,10 +51,8 @@ export function processDomainLists(
export function processDomainListsWithPreload(
domainListsUrl: string, mirrors: string[] | null,
includeAllSubDomain = false, wwwToApex = false
includeAllSubDomain = false
) {
const domainNormalizer = wwwToApex ? fastNormalizeDomainIgnoreWww : fastNormalizeDomain;
const downloadPromise = fetchAssets(domainListsUrl, mirrors, true);
const lineCb = includeAllSubDomain ? domainListLineCbIncludeAllSubdomain : domainListLineCb;
@@ -65,7 +62,7 @@ export function processDomainListsWithPreload(
span.traceChildSync('parse domain list', () => {
for (let i = 0, len = filterRules.length; i < len; i++) {
lineCb(filterRules[i], domainSets, domainListsUrl, domainNormalizer);
lineCb(filterRules[i], domainSets, domainListsUrl, fastNormalizeDomainWithoutWww);
}
});

View File

@@ -3,10 +3,10 @@ import type { Span } from '../../trace';
import { fetchAssets } from '../fetch-assets';
import { onBlackFound, onWhiteFound } from './shared';
import { createRetrieKeywordFilter as createKeywordFilter } from 'foxts/retrie';
import { fastNormalizeDomain } from '../normalize-domain';
import { looseTldtsOpt } from '../../constants/loose-tldts-opt';
import tldts from 'tldts-experimental';
import { NetworkFilter } from '@ghostery/adblocker';
import { fastNormalizeDomainWithoutWww } from '../normalize-domain';
const enum ParseType {
WhiteIncludeSubdomain = 0,
@@ -221,7 +221,7 @@ export function parse($line: string, result: [string, ParseType], includeThirdPa
&& filter.isPlain() // isPlain() === !isRegex()
&& (!filter.isFullRegex())
) {
const hostname = fastNormalizeDomain(filter.hostname);
const hostname = fastNormalizeDomainWithoutWww(filter.hostname);
if (!hostname) {
result[1] = ParseType.Null;
return result;
@@ -436,7 +436,7 @@ export function parse($line: string, result: [string, ParseType], includeThirdPa
return result;
}
const domain = fastNormalizeDomain(sliced);
const domain = fastNormalizeDomainWithoutWww(sliced);
if (domain && domain === sliced) {
result[0] = domain;

View File

@@ -1,6 +1,6 @@
import type { Span } from '../../trace';
import { fetchAssets } from '../fetch-assets';
import { fastNormalizeDomain } from '../normalize-domain';
import { fastNormalizeDomainWithoutWww } from '../normalize-domain';
import { onBlackFound } from './shared';
function hostsLineCb(line: string, set: string[], includeAllSubDomain: boolean, meta: string) {
@@ -8,7 +8,7 @@ function hostsLineCb(line: string, set: string[], includeAllSubDomain: boolean,
if (!_domain) {
return;
}
const domain = fastNormalizeDomain(_domain);
const domain = fastNormalizeDomainWithoutWww(_domain);
if (!domain) {
return;
}