Perf: remove cached tld parse

This commit is contained in:
SukkaW
2024-05-12 00:50:50 +08:00
parent 160e7bfab7
commit 35aa11f361
3 changed files with 26 additions and 46 deletions

View File

@@ -1,9 +0,0 @@
import { createCache } from './cache-apply';
import type { PublicSuffixList } from '@gorhill/publicsuffixlist';
let gorhillGetDomainCache: ReturnType<typeof createCache> | null = null;
export const createCachedGorhillGetDomain = (gorhill: PublicSuffixList) => {
gorhillGetDomainCache ??= createCache('cached-gorhill-get-domain', true);
return (domain: string) => gorhillGetDomainCache! // we do know gothillGetDomainCache exists here
.sync(domain, () => gorhill.getDomain(domain[0] === '.' ? domain.slice(1) : domain));
};

View File

@@ -2,7 +2,6 @@ import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
import { processDomainLists } from './parse-filter'; import { processDomainLists } from './parse-filter';
import * as tldts from 'tldts'; import * as tldts from 'tldts';
import { createTrie } from './trie'; import { createTrie } from './trie';
import { createCachedGorhillGetDomain } from './cached-tld-parse';
import { processLine } from './process-line'; import { processLine } from './process-line';
import { TTL } from './cache-filesystem'; import { TTL } from './cache-filesystem';
import { isCI } from 'ci-info'; import { isCI } from 'ci-info';
@@ -130,7 +129,6 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
}); });
const domainCountMap: Record<string, number> = {}; const domainCountMap: Record<string, number> = {};
const getDomain = createCachedGorhillGetDomain(gorhill);
span.traceChildSync('process phishing domain set', () => { span.traceChildSync('process phishing domain set', () => {
const domainArr = Array.from(domainSet); const domainArr = Array.from(domainSet);
@@ -139,7 +137,7 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
const line = processLine(domainArr[i]); const line = processLine(domainArr[i]);
if (!line) continue; if (!line) continue;
const apexDomain = getDomain(line); const apexDomain = gorhill.getDomain(line);
if (!apexDomain) continue; if (!apexDomain) continue;
domainCountMap[apexDomain] ||= 0; domainCountMap[apexDomain] ||= 0;

View File

@@ -79,11 +79,10 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
if (node.has(token)) { if (node.has(token)) {
node = node.get(token)!; node = node.get(token)!;
if (smolTree) { // During the adding of `[start]blog.skk.moe` and find out that there is a `[start].skk.moe` in the trie
if (node.get('.')?.[SENTINEL] === true) { // Dedupe the covered subdomain by skipping
return; if (smolTree && (node.get('.')?.[SENTINEL])) {
} return;
// return;
} }
} else { } else {
const newNode = createNode(node); const newNode = createNode(node);
@@ -92,9 +91,12 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
} }
if (smolTree) { if (smolTree) {
// Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
if (i === 1 && tokens[0] === '.') { if (i === 1 && tokens[0] === '.') {
// If there is a `[start]sub.example.com` here, remove it
node[SENTINEL] = false; node[SENTINEL] = false;
// Trying to add `.sub.example.com` where there is already a `blog.sub.example.com` in the trie
// Removing the rest of the child nodes by creating a new node and disconnecting the old one
const newNode = createNode(node); const newNode = createNode(node);
node.set('.', newNode); node.set('.', newNode);
node = newNode; node = newNode;
@@ -225,13 +227,11 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
node = nodeStack.pop()!; node = nodeStack.pop()!;
if (node[SENTINEL]) { if (node[SENTINEL]) {
if (suffix !== inputTokens) { // found match, delete it from set
// found match, delete it from set if (hostnameMode) {
if (hostnameMode) { set.delete((suffix as string[]).join(''));
set.delete((suffix as string[]).join('')); } else if (suffix !== inputTokens) {
} else { set.delete(suffix as string);
set.delete(suffix as string);
}
} }
} }
@@ -317,37 +317,22 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
return node[SENTINEL]; return node[SENTINEL];
}; };
if (Array.isArray(from)) {
for (let i = 0, l = from.length; i < l; i++) {
add(from[i]);
}
} else if (from) {
from.forEach(add);
}
const dump = () => { const dump = () => {
const nodeStack: TrieNode[] = []; const nodeStack: TrieNode[] = [];
const suffixStack: Array<string | string[]> = []; const suffixStack: Array<string | string[]> = [];
// Resolving initial string
const suffix = hostnameMode ? [] : '';
nodeStack.push(root); nodeStack.push(root);
suffixStack.push(suffix); // Resolving initial string (begin the start of the stack)
suffixStack.push(hostnameMode ? [] : '');
const results: string[] = []; const results: string[] = [];
let node: TrieNode; let node: TrieNode;
do { do {
let hasValue = false;
node = nodeStack.pop()!; node = nodeStack.pop()!;
const suffix = suffixStack.pop()!; const suffix = suffixStack.pop()!;
if (node[SENTINEL]) {
hasValue = true;
}
node.forEach((childNode, k) => { node.forEach((childNode, k) => {
nodeStack.push(childNode); nodeStack.push(childNode);
@@ -358,16 +343,22 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
} }
}); });
if (hasValue) { if (node[SENTINEL]) {
results.push( results.push(hostnameMode ? (suffix as string[]).join('') : (suffix as string));
hostnameMode ? (suffix as string[]).join('') : (suffix as string)
);
} }
} while (nodeStack.length); } while (nodeStack.length);
return results; return results;
}; };
if (Array.isArray(from)) {
for (let i = 0, l = from.length; i < l; i++) {
add(from[i]);
}
} else if (from) {
from.forEach(add);
}
return { return {
add, add,
contains, contains,