mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Perf: speed-up tldts
This commit is contained in:
parent
21a31e6c1f
commit
aa3cb9e586
@ -56,10 +56,11 @@ export const buildDomesticRuleset = task(import.meta.path, async (span) => {
|
||||
: []
|
||||
),
|
||||
...domains.flatMap((domain) => [
|
||||
`${domain} = server:${dns}`,
|
||||
`*.${domain} = server:${dns}`
|
||||
])
|
||||
])
|
||||
`${domain} = server:${dns}`,
|
||||
`*.${domain} = server:${dns}`
|
||||
])
|
||||
]
|
||||
)
|
||||
],
|
||||
path.resolve(import.meta.dir, '../Modules/sukka_local_dns_mapping.sgmodule')
|
||||
)
|
||||
|
||||
@ -4,7 +4,6 @@ import { readFileByLine } from './lib/fetch-text-by-line';
|
||||
import { sortDomains } from './lib/stable-sort-domain';
|
||||
import { task } from './trace';
|
||||
import { compareAndWriteFile } from './lib/create-file';
|
||||
import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
|
||||
import { domainDeduper } from './lib/domain-deduper';
|
||||
import { sort } from './lib/timsort';
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
// eslint-disable-next-line import-x/no-unresolved -- bun
|
||||
import { describe, expect, it } from 'bun:test';
|
||||
import { describe, it } from 'bun:test';
|
||||
|
||||
import { calcDomainAbuseScore } from './get-phishing-domains';
|
||||
|
||||
|
||||
@ -1,9 +1,8 @@
|
||||
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
|
||||
import { processDomainLists } from './parse-filter';
|
||||
import { getSubdomain } from 'tldts';
|
||||
import { getSubdomain, getPublicSuffix } from 'tldts-experimental';
|
||||
import { TTL } from './cache-filesystem';
|
||||
|
||||
import { add as SetAdd } from 'mnemonist/set';
|
||||
import type { Span } from '../trace';
|
||||
import { appendArrayInPlace } from './append-array-in-place';
|
||||
|
||||
@ -90,6 +89,14 @@ const BLACK_TLD = new Set([
|
||||
'design'
|
||||
]);
|
||||
|
||||
const tldtsOpt: Parameters<typeof getSubdomain>[1] = {
|
||||
allowPrivateDomains: false,
|
||||
extractHostname: false,
|
||||
validateHostname: false,
|
||||
detectIp: false,
|
||||
mixedInputs: false
|
||||
};
|
||||
|
||||
export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('get phishing domains').traceAsyncFn(async (span) => {
|
||||
const gorhill = await getGorhillPublicSuffixPromise();
|
||||
|
||||
@ -117,7 +124,7 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
||||
continue;
|
||||
}
|
||||
|
||||
const tld = gorhill.getPublicSuffix(safeGorhillLine);
|
||||
const tld = getPublicSuffix(safeGorhillLine, tldtsOpt);
|
||||
if (!tld || !BLACK_TLD.has(tld)) continue;
|
||||
|
||||
domainCountMap[apexDomain] ||= 0;
|
||||
@ -174,7 +181,7 @@ export function calcDomainAbuseScore(line: string) {
|
||||
}
|
||||
}
|
||||
|
||||
const subdomain = getSubdomain(line, { detectIp: false });
|
||||
const subdomain = getSubdomain(line, tldtsOpt);
|
||||
|
||||
if (subdomain) {
|
||||
if (subdomain.slice(1).includes('.')) {
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
// tldts-experimental is way faster than tldts, but very little bit inaccurate
|
||||
// (since it is hashes based). But the result is still deterministic, which is
|
||||
// enough when sorting.
|
||||
import * as tldts from 'tldts-experimental';
|
||||
import { getDomain, getSubdomain } from 'tldts-experimental';
|
||||
import { sort } from './timsort';
|
||||
|
||||
export const compare = (a: string, b: string) => {
|
||||
@ -9,7 +9,7 @@ export const compare = (a: string, b: string) => {
|
||||
return (a.length - b.length) || a.localeCompare(b);
|
||||
};
|
||||
|
||||
const tldtsOpt: Parameters<typeof tldts.getDomain>[1] = {
|
||||
const tldtsOpt: Parameters<typeof getDomain>[1] = {
|
||||
allowPrivateDomains: false,
|
||||
extractHostname: false,
|
||||
validateHostname: false,
|
||||
@ -24,11 +24,11 @@ export const sortDomains = (inputs: string[]) => {
|
||||
for (let i = 0, len = inputs.length; i < len; i++) {
|
||||
const cur = inputs[i];
|
||||
if (!domainMap.has(cur)) {
|
||||
const topD = tldts.getDomain(cur, tldtsOpt);
|
||||
const topD = getDomain(cur, tldtsOpt);
|
||||
domainMap.set(cur, topD ?? cur);
|
||||
}
|
||||
if (!subdomainMap.has(cur)) {
|
||||
const subD = tldts.getSubdomain(cur, tldtsOpt);
|
||||
const subD = getSubdomain(cur, tldtsOpt);
|
||||
subdomainMap.set(cur, subD ?? cur);
|
||||
}
|
||||
}
|
||||
|
||||
53
Build/lib/tldts.bench.ts
Normal file
53
Build/lib/tldts.bench.ts
Normal file
@ -0,0 +1,53 @@
|
||||
import { fetchRemoteTextByLine } from './fetch-text-by-line';
|
||||
import { processLineFromReadline } from './process-line';
|
||||
|
||||
import { bench, group, run } from 'mitata';
|
||||
|
||||
import * as tldts from 'tldts';
|
||||
import * as tldtsExperimental from 'tldts-experimental';
|
||||
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
|
||||
|
||||
(async () => {
|
||||
const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt'));
|
||||
|
||||
const gorhill = await getGorhillPublicSuffixPromise();
|
||||
const tldtsOpt: Parameters<typeof tldts.getDomain>[1] = {
|
||||
allowPrivateDomains: false,
|
||||
extractHostname: false,
|
||||
validateHostname: false,
|
||||
detectIp: false,
|
||||
mixedInputs: false
|
||||
};
|
||||
|
||||
(['getDomain', 'getPublicSuffix', 'getSubdomain'] as const).forEach(methodName => {
|
||||
group(methodName, () => {
|
||||
if (methodName in gorhill) {
|
||||
bench('gorhill', () => {
|
||||
for (let i = 0, len = data.length; i < len; i++) {
|
||||
const line = data[i];
|
||||
const safeGorhillLine = line[0] === '.' ? line.slice(1) : line;
|
||||
|
||||
// @ts-expect-error -- type guarded
|
||||
gorhill[methodName](safeGorhillLine);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
bench('tldts', () => {
|
||||
for (let i = 0, len = data.length; i < len; i++) {
|
||||
// eslint-disable-next-line import-x/namespace -- safe
|
||||
tldts[methodName](data[i], tldtsOpt);
|
||||
}
|
||||
});
|
||||
|
||||
bench('tldts-experimental', () => {
|
||||
for (let i = 0, len = data.length; i < len; i++) {
|
||||
// eslint-disable-next-line import-x/namespace -- safe
|
||||
tldtsExperimental[methodName](data[i], tldtsOpt);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
run();
|
||||
})();
|
||||
@ -3,7 +3,6 @@ import { parse } from 'csv-parse/sync';
|
||||
import { createTrie } from './lib/trie';
|
||||
import path from 'path';
|
||||
import { processLine } from './lib/process-line';
|
||||
import { extract } from 'tar-stream';
|
||||
import { extractDomainsFromFelixDnsmasq } from './lib/parse-dnsmasq';
|
||||
|
||||
export const parseDomesticList = async () => {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user