diff --git a/Build/build-domestic-ruleset.ts b/Build/build-domestic-ruleset.ts index 284b0a52..9c047a0d 100644 --- a/Build/build-domestic-ruleset.ts +++ b/Build/build-domestic-ruleset.ts @@ -56,10 +56,11 @@ export const buildDomesticRuleset = task(import.meta.path, async (span) => { : [] ), ...domains.flatMap((domain) => [ - `${domain} = server:${dns}`, - `*.${domain} = server:${dns}` - ]) - ]) + `${domain} = server:${dns}`, + `*.${domain} = server:${dns}` + ]) + ] + ) ], path.resolve(import.meta.dir, '../Modules/sukka_local_dns_mapping.sgmodule') ) diff --git a/Build/build-internal-cdn-rules.ts b/Build/build-internal-cdn-rules.ts index f4ea07b4..b2359fb9 100644 --- a/Build/build-internal-cdn-rules.ts +++ b/Build/build-internal-cdn-rules.ts @@ -4,7 +4,6 @@ import { readFileByLine } from './lib/fetch-text-by-line'; import { sortDomains } from './lib/stable-sort-domain'; import { task } from './trace'; import { compareAndWriteFile } from './lib/create-file'; -import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix'; import { domainDeduper } from './lib/domain-deduper'; import { sort } from './lib/timsort'; diff --git a/Build/lib/get-phishing-domains.test.ts b/Build/lib/get-phishing-domains.test.ts index 07a45906..7183d032 100644 --- a/Build/lib/get-phishing-domains.test.ts +++ b/Build/lib/get-phishing-domains.test.ts @@ -1,5 +1,5 @@ // eslint-disable-next-line import-x/no-unresolved -- bun -import { describe, expect, it } from 'bun:test'; +import { describe, it } from 'bun:test'; import { calcDomainAbuseScore } from './get-phishing-domains'; diff --git a/Build/lib/get-phishing-domains.ts b/Build/lib/get-phishing-domains.ts index 0e47eef5..76e05b85 100644 --- a/Build/lib/get-phishing-domains.ts +++ b/Build/lib/get-phishing-domains.ts @@ -1,9 +1,8 @@ import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix'; import { processDomainLists } from './parse-filter'; -import { getSubdomain } from 'tldts'; +import { getSubdomain, getPublicSuffix } from 'tldts-experimental'; import { TTL } from './cache-filesystem'; -import { add as SetAdd } from 'mnemonist/set'; import type { Span } from '../trace'; import { appendArrayInPlace } from './append-array-in-place'; @@ -90,6 +89,14 @@ const BLACK_TLD = new Set([ 'design' ]); +const tldtsOpt: Parameters[1] = { + allowPrivateDomains: false, + extractHostname: false, + validateHostname: false, + detectIp: false, + mixedInputs: false +}; + export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('get phishing domains').traceAsyncFn(async (span) => { const gorhill = await getGorhillPublicSuffixPromise(); @@ -117,7 +124,7 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g continue; } - const tld = gorhill.getPublicSuffix(safeGorhillLine); + const tld = getPublicSuffix(safeGorhillLine, tldtsOpt); if (!tld || !BLACK_TLD.has(tld)) continue; domainCountMap[apexDomain] ||= 0; @@ -174,7 +181,7 @@ export function calcDomainAbuseScore(line: string) { } } - const subdomain = getSubdomain(line, { detectIp: false }); + const subdomain = getSubdomain(line, tldtsOpt); if (subdomain) { if (subdomain.slice(1).includes('.')) { diff --git a/Build/lib/stable-sort-domain.ts b/Build/lib/stable-sort-domain.ts index 160a25ef..1f844ec1 100644 --- a/Build/lib/stable-sort-domain.ts +++ b/Build/lib/stable-sort-domain.ts @@ -1,7 +1,7 @@ // tldts-experimental is way faster than tldts, but very little bit inaccurate // (since it is hashes based). But the result is still deterministic, which is // enough when sorting. -import * as tldts from 'tldts-experimental'; +import { getDomain, getSubdomain } from 'tldts-experimental'; import { sort } from './timsort'; export const compare = (a: string, b: string) => { @@ -9,7 +9,7 @@ export const compare = (a: string, b: string) => { return (a.length - b.length) || a.localeCompare(b); }; -const tldtsOpt: Parameters[1] = { +const tldtsOpt: Parameters[1] = { allowPrivateDomains: false, extractHostname: false, validateHostname: false, @@ -24,11 +24,11 @@ export const sortDomains = (inputs: string[]) => { for (let i = 0, len = inputs.length; i < len; i++) { const cur = inputs[i]; if (!domainMap.has(cur)) { - const topD = tldts.getDomain(cur, tldtsOpt); + const topD = getDomain(cur, tldtsOpt); domainMap.set(cur, topD ?? cur); } if (!subdomainMap.has(cur)) { - const subD = tldts.getSubdomain(cur, tldtsOpt); + const subD = getSubdomain(cur, tldtsOpt); subdomainMap.set(cur, subD ?? cur); } } diff --git a/Build/lib/tldts.bench.ts b/Build/lib/tldts.bench.ts new file mode 100644 index 00000000..10ffad1f --- /dev/null +++ b/Build/lib/tldts.bench.ts @@ -0,0 +1,53 @@ +import { fetchRemoteTextByLine } from './fetch-text-by-line'; +import { processLineFromReadline } from './process-line'; + +import { bench, group, run } from 'mitata'; + +import * as tldts from 'tldts'; +import * as tldtsExperimental from 'tldts-experimental'; +import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix'; + +(async () => { + const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt')); + + const gorhill = await getGorhillPublicSuffixPromise(); + const tldtsOpt: Parameters[1] = { + allowPrivateDomains: false, + extractHostname: false, + validateHostname: false, + detectIp: false, + mixedInputs: false + }; + + (['getDomain', 'getPublicSuffix', 'getSubdomain'] as const).forEach(methodName => { + group(methodName, () => { + if (methodName in gorhill) { + bench('gorhill', () => { + for (let i = 0, len = data.length; i < len; i++) { + const line = data[i]; + const safeGorhillLine = line[0] === '.' ? line.slice(1) : line; + + // @ts-expect-error -- type guarded + gorhill[methodName](safeGorhillLine); + } + }); + } + + bench('tldts', () => { + for (let i = 0, len = data.length; i < len; i++) { + // eslint-disable-next-line import-x/namespace -- safe + tldts[methodName](data[i], tldtsOpt); + } + }); + + bench('tldts-experimental', () => { + for (let i = 0, len = data.length; i < len; i++) { + // eslint-disable-next-line import-x/namespace -- safe + tldtsExperimental[methodName](data[i], tldtsOpt); + } + }); + }); + }); + + run(); +})(); diff --git a/Build/validate-domestic.ts b/Build/validate-domestic.ts index 44cb4c41..e516891a 100644 --- a/Build/validate-domestic.ts +++ b/Build/validate-domestic.ts @@ -3,7 +3,6 @@ import { parse } from 'csv-parse/sync'; import { createTrie } from './lib/trie'; import path from 'path'; import { processLine } from './lib/process-line'; -import { extract } from 'tar-stream'; import { extractDomainsFromFelixDnsmasq } from './lib/parse-dnsmasq'; export const parseDomesticList = async () => {