Perf: speed up tldts

This commit is contained in:
SukkaW 2023-11-03 11:08:36 +08:00
parent 3d275f2dea
commit 5701fc9b70
5 changed files with 7 additions and 6 deletions

View File

@ -35,7 +35,7 @@ const buildInternalCDNDomains = task(__filename, async () => {
*/ */
const processLocalDomainSet = async (domainSetPath) => { const processLocalDomainSet = async (domainSetPath) => {
for await (const line of readFileByLine(domainSetPath)) { for await (const line of readFileByLine(domainSetPath)) {
const parsed = tldts.parse(line, { allowPrivateDomains: true }); const parsed = tldts.parse(line, { allowPrivateDomains: true, detectIp: false });
if (parsed.isIp) continue; if (parsed.isIp) continue;
if (parsed.isIcann || parsed.isPrivate) { if (parsed.isIcann || parsed.isPrivate) {
if (parsed.domain) { if (parsed.domain) {

View File

@ -142,7 +142,7 @@ const buildPhishingDomainSet = task(__filename, async () => {
} }
if (domainCountMap[apexDomain] < 5) { if (domainCountMap[apexDomain] < 5) {
const subdomain = tldts.getSubdomain(line); const subdomain = tldts.getSubdomain(line, { detectIp: false });
if (subdomain?.includes('.')) { if (subdomain?.includes('.')) {
domainCountMap[apexDomain] += 1.5; domainCountMap[apexDomain] += 1.5;
} }

View File

@ -74,7 +74,7 @@ const REDIRECT = /** @type {const} */ ([
]); ]);
const buildRedirectModule = task(__filename, async () => { const buildRedirectModule = task(__filename, async () => {
const domains = Array.from(new Set(REDIRECT.map(([from]) => tldts.getHostname(from)))).filter(Boolean); const domains = Array.from(new Set(REDIRECT.map(([from]) => tldts.getHostname(from, { detectIp: false })))).filter(Boolean);
return compareAndWriteFile( return compareAndWriteFile(
[ [

View File

@ -179,7 +179,7 @@ const buildRejectDomainSet = task(__filename, async () => {
'* Collect reject domain stats', '* Collect reject domain stats',
() => Object.entries( () => Object.entries(
dudupedDominArray.reduce((acc, cur) => { dudupedDominArray.reduce((acc, cur) => {
const suffix = tldts.getDomain(cur, { allowPrivateDomains: false }); const suffix = tldts.getDomain(cur, { allowPrivateDomains: false, detectIp: false });
if (suffix) { if (suffix) {
acc[suffix] = (acc[suffix] ?? 0) + 1; acc[suffix] = (acc[suffix] ?? 0) + 1;
} }

View File

@ -1,10 +1,11 @@
const { toASCII } = require('punycode/'); const { toASCII } = require('punycode/');
const fs = require('fs'); const fs = require('fs');
const path = require('path'); const path = require('path');
const { traceAsync } = require('./trace-runner');
const publicSuffixPath = path.resolve(__dirname, '../../node_modules/.cache/public_suffix_list_dat.txt'); const publicSuffixPath = path.resolve(__dirname, '../../node_modules/.cache/public_suffix_list_dat.txt');
const getGorhillPublicSuffix = async () => { const getGorhillPublicSuffix = () => traceAsync('create gorhill public suffix instance', async () => {
const customFetch = async (url) => { const customFetch = async (url) => {
const buf = await fs.promises.readFile(url); const buf = await fs.promises.readFile(url);
return { return {
@ -26,7 +27,7 @@ const getGorhillPublicSuffix = async () => {
await gorhill.enableWASM({ customFetch }); await gorhill.enableWASM({ customFetch });
return gorhill; return gorhill;
}; });
/** @type {Promise<import('gorhill-publicsuffixlist').default> | null} */ /** @type {Promise<import('gorhill-publicsuffixlist').default> | null} */
let gorhillPublicSuffixPromise = null; let gorhillPublicSuffixPromise = null;