mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Perf: speed-up tldts
This commit is contained in:
parent
21a31e6c1f
commit
aa3cb9e586
@ -56,10 +56,11 @@ export const buildDomesticRuleset = task(import.meta.path, async (span) => {
|
|||||||
: []
|
: []
|
||||||
),
|
),
|
||||||
...domains.flatMap((domain) => [
|
...domains.flatMap((domain) => [
|
||||||
`${domain} = server:${dns}`,
|
`${domain} = server:${dns}`,
|
||||||
`*.${domain} = server:${dns}`
|
`*.${domain} = server:${dns}`
|
||||||
])
|
])
|
||||||
])
|
]
|
||||||
|
)
|
||||||
],
|
],
|
||||||
path.resolve(import.meta.dir, '../Modules/sukka_local_dns_mapping.sgmodule')
|
path.resolve(import.meta.dir, '../Modules/sukka_local_dns_mapping.sgmodule')
|
||||||
)
|
)
|
||||||
|
|||||||
@ -4,7 +4,6 @@ import { readFileByLine } from './lib/fetch-text-by-line';
|
|||||||
import { sortDomains } from './lib/stable-sort-domain';
|
import { sortDomains } from './lib/stable-sort-domain';
|
||||||
import { task } from './trace';
|
import { task } from './trace';
|
||||||
import { compareAndWriteFile } from './lib/create-file';
|
import { compareAndWriteFile } from './lib/create-file';
|
||||||
import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
|
|
||||||
import { domainDeduper } from './lib/domain-deduper';
|
import { domainDeduper } from './lib/domain-deduper';
|
||||||
import { sort } from './lib/timsort';
|
import { sort } from './lib/timsort';
|
||||||
|
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
// eslint-disable-next-line import-x/no-unresolved -- bun
|
// eslint-disable-next-line import-x/no-unresolved -- bun
|
||||||
import { describe, expect, it } from 'bun:test';
|
import { describe, it } from 'bun:test';
|
||||||
|
|
||||||
import { calcDomainAbuseScore } from './get-phishing-domains';
|
import { calcDomainAbuseScore } from './get-phishing-domains';
|
||||||
|
|
||||||
|
|||||||
@ -1,9 +1,8 @@
|
|||||||
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
|
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
|
||||||
import { processDomainLists } from './parse-filter';
|
import { processDomainLists } from './parse-filter';
|
||||||
import { getSubdomain } from 'tldts';
|
import { getSubdomain, getPublicSuffix } from 'tldts-experimental';
|
||||||
import { TTL } from './cache-filesystem';
|
import { TTL } from './cache-filesystem';
|
||||||
|
|
||||||
import { add as SetAdd } from 'mnemonist/set';
|
|
||||||
import type { Span } from '../trace';
|
import type { Span } from '../trace';
|
||||||
import { appendArrayInPlace } from './append-array-in-place';
|
import { appendArrayInPlace } from './append-array-in-place';
|
||||||
|
|
||||||
@ -90,6 +89,14 @@ const BLACK_TLD = new Set([
|
|||||||
'design'
|
'design'
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
const tldtsOpt: Parameters<typeof getSubdomain>[1] = {
|
||||||
|
allowPrivateDomains: false,
|
||||||
|
extractHostname: false,
|
||||||
|
validateHostname: false,
|
||||||
|
detectIp: false,
|
||||||
|
mixedInputs: false
|
||||||
|
};
|
||||||
|
|
||||||
export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('get phishing domains').traceAsyncFn(async (span) => {
|
export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('get phishing domains').traceAsyncFn(async (span) => {
|
||||||
const gorhill = await getGorhillPublicSuffixPromise();
|
const gorhill = await getGorhillPublicSuffixPromise();
|
||||||
|
|
||||||
@ -117,7 +124,7 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const tld = gorhill.getPublicSuffix(safeGorhillLine);
|
const tld = getPublicSuffix(safeGorhillLine, tldtsOpt);
|
||||||
if (!tld || !BLACK_TLD.has(tld)) continue;
|
if (!tld || !BLACK_TLD.has(tld)) continue;
|
||||||
|
|
||||||
domainCountMap[apexDomain] ||= 0;
|
domainCountMap[apexDomain] ||= 0;
|
||||||
@ -174,7 +181,7 @@ export function calcDomainAbuseScore(line: string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const subdomain = getSubdomain(line, { detectIp: false });
|
const subdomain = getSubdomain(line, tldtsOpt);
|
||||||
|
|
||||||
if (subdomain) {
|
if (subdomain) {
|
||||||
if (subdomain.slice(1).includes('.')) {
|
if (subdomain.slice(1).includes('.')) {
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
// tldts-experimental is way faster than tldts, but very little bit inaccurate
|
// tldts-experimental is way faster than tldts, but very little bit inaccurate
|
||||||
// (since it is hashes based). But the result is still deterministic, which is
|
// (since it is hashes based). But the result is still deterministic, which is
|
||||||
// enough when sorting.
|
// enough when sorting.
|
||||||
import * as tldts from 'tldts-experimental';
|
import { getDomain, getSubdomain } from 'tldts-experimental';
|
||||||
import { sort } from './timsort';
|
import { sort } from './timsort';
|
||||||
|
|
||||||
export const compare = (a: string, b: string) => {
|
export const compare = (a: string, b: string) => {
|
||||||
@ -9,7 +9,7 @@ export const compare = (a: string, b: string) => {
|
|||||||
return (a.length - b.length) || a.localeCompare(b);
|
return (a.length - b.length) || a.localeCompare(b);
|
||||||
};
|
};
|
||||||
|
|
||||||
const tldtsOpt: Parameters<typeof tldts.getDomain>[1] = {
|
const tldtsOpt: Parameters<typeof getDomain>[1] = {
|
||||||
allowPrivateDomains: false,
|
allowPrivateDomains: false,
|
||||||
extractHostname: false,
|
extractHostname: false,
|
||||||
validateHostname: false,
|
validateHostname: false,
|
||||||
@ -24,11 +24,11 @@ export const sortDomains = (inputs: string[]) => {
|
|||||||
for (let i = 0, len = inputs.length; i < len; i++) {
|
for (let i = 0, len = inputs.length; i < len; i++) {
|
||||||
const cur = inputs[i];
|
const cur = inputs[i];
|
||||||
if (!domainMap.has(cur)) {
|
if (!domainMap.has(cur)) {
|
||||||
const topD = tldts.getDomain(cur, tldtsOpt);
|
const topD = getDomain(cur, tldtsOpt);
|
||||||
domainMap.set(cur, topD ?? cur);
|
domainMap.set(cur, topD ?? cur);
|
||||||
}
|
}
|
||||||
if (!subdomainMap.has(cur)) {
|
if (!subdomainMap.has(cur)) {
|
||||||
const subD = tldts.getSubdomain(cur, tldtsOpt);
|
const subD = getSubdomain(cur, tldtsOpt);
|
||||||
subdomainMap.set(cur, subD ?? cur);
|
subdomainMap.set(cur, subD ?? cur);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
53
Build/lib/tldts.bench.ts
Normal file
53
Build/lib/tldts.bench.ts
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
import { fetchRemoteTextByLine } from './fetch-text-by-line';
|
||||||
|
import { processLineFromReadline } from './process-line';
|
||||||
|
|
||||||
|
import { bench, group, run } from 'mitata';
|
||||||
|
|
||||||
|
import * as tldts from 'tldts';
|
||||||
|
import * as tldtsExperimental from 'tldts-experimental';
|
||||||
|
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
|
||||||
|
|
||||||
|
(async () => {
|
||||||
|
const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt'));
|
||||||
|
|
||||||
|
const gorhill = await getGorhillPublicSuffixPromise();
|
||||||
|
const tldtsOpt: Parameters<typeof tldts.getDomain>[1] = {
|
||||||
|
allowPrivateDomains: false,
|
||||||
|
extractHostname: false,
|
||||||
|
validateHostname: false,
|
||||||
|
detectIp: false,
|
||||||
|
mixedInputs: false
|
||||||
|
};
|
||||||
|
|
||||||
|
(['getDomain', 'getPublicSuffix', 'getSubdomain'] as const).forEach(methodName => {
|
||||||
|
group(methodName, () => {
|
||||||
|
if (methodName in gorhill) {
|
||||||
|
bench('gorhill', () => {
|
||||||
|
for (let i = 0, len = data.length; i < len; i++) {
|
||||||
|
const line = data[i];
|
||||||
|
const safeGorhillLine = line[0] === '.' ? line.slice(1) : line;
|
||||||
|
|
||||||
|
// @ts-expect-error -- type guarded
|
||||||
|
gorhill[methodName](safeGorhillLine);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
bench('tldts', () => {
|
||||||
|
for (let i = 0, len = data.length; i < len; i++) {
|
||||||
|
// eslint-disable-next-line import-x/namespace -- safe
|
||||||
|
tldts[methodName](data[i], tldtsOpt);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
bench('tldts-experimental', () => {
|
||||||
|
for (let i = 0, len = data.length; i < len; i++) {
|
||||||
|
// eslint-disable-next-line import-x/namespace -- safe
|
||||||
|
tldtsExperimental[methodName](data[i], tldtsOpt);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
run();
|
||||||
|
})();
|
||||||
@ -3,7 +3,6 @@ import { parse } from 'csv-parse/sync';
|
|||||||
import { createTrie } from './lib/trie';
|
import { createTrie } from './lib/trie';
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
import { processLine } from './lib/process-line';
|
import { processLine } from './lib/process-line';
|
||||||
import { extract } from 'tar-stream';
|
|
||||||
import { extractDomainsFromFelixDnsmasq } from './lib/parse-dnsmasq';
|
import { extractDomainsFromFelixDnsmasq } from './lib/parse-dnsmasq';
|
||||||
|
|
||||||
export const parseDomesticList = async () => {
|
export const parseDomesticList = async () => {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user