Perf: reduce operations in normalizeDomain

This commit is contained in:
SukkaW 2025-01-17 21:28:28 +08:00
parent c07fe1dd56
commit c6bbbf9d4c
6 changed files with 29 additions and 24 deletions

View File

@ -6,28 +6,28 @@ import { normalizeTldtsOpt } from '../constants/loose-tldts-opt';
type TldTsParsed = ReturnType<typeof tldts.parse>;
/**
* Skipped the input non-empty check, the `domain` should not be empty.
*/
export function fastNormalizeDomain(domain: string, parsed: TldTsParsed = tldts.parse(domain, normalizeTldtsOpt)) {
if (parsed.isIp) return null;
// Private invalid domain (things like .tor, .dn42, etc)
if (!parsed.isIcann && !parsed.isPrivate) return null;
return parsed.hostname;
}
export function normalizeDomain(domain: string, parsed: TldTsParsed | null = null) {
if (domain.length === 0) return null;
parsed ??= tldts.parse(domain, normalizeTldtsOpt);
if (parsed.isIp) return null;
let h = parsed.hostname;
if (h === null) return null;
// Private invalid domain (things like .tor, .dn42, etc)
if (!parsed.isIcann && !parsed.isPrivate) return null;
let sliceStart = 0;
let sliceEnd = 0;
// const h = parsed.hostname;
// if (h === null) return null;
if (h[0] === '.') sliceStart = 1;
// eslint-disable-next-line sukka/string/prefer-string-starts-ends-with -- performance
if (h[h.length - 1] === '.') sliceEnd = -1;
if (sliceStart !== 0 || sliceEnd !== 0) {
h = h.slice(sliceStart, sliceEnd);
}
return h.length > 0 ? h : null;
return parsed.hostname;
}

View File

@ -1,5 +1,5 @@
import picocolors from 'picocolors';
import { normalizeDomain } from '../normalize-domain';
import { fastNormalizeDomain } from '../normalize-domain';
import { processLine } from '../process-line';
import { onBlackFound } from './shared';
import { fetchAssetsWithout304 } from '../fetch-assets';
@ -9,7 +9,7 @@ function domainListLineCb(l: string, set: string[], includeAllSubDomain: boolean
const line = processLine(l);
if (!line) return;
const domain = normalizeDomain(line);
const domain = fastNormalizeDomain(line);
if (!domain) return;
if (domain !== line) {
console.log(

View File

@ -3,7 +3,7 @@ import type { Span } from '../../trace';
import { fetchAssetsWithout304 } from '../fetch-assets';
import { onBlackFound, onWhiteFound } from './shared';
import { createRetrieKeywordFilter as createKeywordFilter } from 'foxts/retrie';
import { normalizeDomain } from '../normalize-domain';
import { fastNormalizeDomain } from '../normalize-domain';
import { looseTldtsOpt } from '../../constants/loose-tldts-opt';
import tldts from 'tldts-experimental';
import { NetworkFilter } from '@ghostery/adblocker';
@ -227,7 +227,7 @@ export function parse($line: string, result: [string, ParseType], allowThirdPart
&& filter.isPlain() // isPlain() === !isRegex()
&& (!filter.isFullRegex())
) {
const hostname = normalizeDomain(filter.hostname);
const hostname = fastNormalizeDomain(filter.hostname);
if (!hostname) {
result[1] = ParseType.Null;
return result;
@ -421,6 +421,11 @@ export function parse($line: string, result: [string, ParseType], allowThirdPart
}
const sliced = (sliceStart > 0 || sliceEnd < 0) ? line.slice(sliceStart, sliceEnd === 0 ? undefined : sliceEnd) : line;
if (sliced.length === 0) {
result[1] = ParseType.Null;
return result;
}
if (sliced.charCodeAt(0) === 45 /* - */) {
// line.startsWith('-') is not a valid domain
result[1] = ParseType.ErrorMessage;
@ -437,7 +442,7 @@ export function parse($line: string, result: [string, ParseType], allowThirdPart
return result;
}
const domain = normalizeDomain(sliced);
const domain = fastNormalizeDomain(sliced);
if (domain && domain === sliced) {
result[0] = domain;

View File

@ -1,6 +1,6 @@
import type { Span } from '../../trace';
import { fetchAssetsWithout304 } from '../fetch-assets';
import { normalizeDomain } from '../normalize-domain';
import { fastNormalizeDomain } from '../normalize-domain';
import { processLine } from '../process-line';
import { onBlackFound } from './shared';
@ -14,7 +14,7 @@ function hostsLineCb(l: string, set: string[], includeAllSubDomain: boolean, met
if (!_domain) {
return;
}
const domain = normalizeDomain(_domain);
const domain = fastNormalizeDomain(_domain);
if (!domain) {
return;
}

View File

@ -1,5 +1,5 @@
import { processLine } from './lib/process-line';
import { normalizeDomain } from './lib/normalize-domain';
import { fastNormalizeDomain } from './lib/normalize-domain';
import { HostnameSmolTrie } from './lib/trie';
// import { Readable } from 'stream';
import { parse } from 'csv-parse/sync';
@ -54,7 +54,7 @@ export async function parseGfwList() {
trie.add(line);
continue;
}
const d = normalizeDomain(line);
const d = fastNormalizeDomain(line);
if (d) {
trie.add(d);
continue;

View File

@ -879,7 +879,7 @@ export const HK: StreamService[] = [
NOW_E,
VIUTV,
MYTV_SUPER,
HBO_ASIA,
HBO_ASIA
// BILIBILI_INTL
];