Perf: remove cached tld parse

This commit is contained in:
SukkaW
2024-05-12 00:50:50 +08:00
parent 160e7bfab7
commit 35aa11f361
3 changed files with 26 additions and 46 deletions

View File

@@ -1,9 +0,0 @@
import { createCache } from './cache-apply';
import type { PublicSuffixList } from '@gorhill/publicsuffixlist';
let gorhillGetDomainCache: ReturnType<typeof createCache> | null = null;
export const createCachedGorhillGetDomain = (gorhill: PublicSuffixList) => {
gorhillGetDomainCache ??= createCache('cached-gorhill-get-domain', true);
return (domain: string) => gorhillGetDomainCache! // we do know gothillGetDomainCache exists here
.sync(domain, () => gorhill.getDomain(domain[0] === '.' ? domain.slice(1) : domain));
};

View File

@@ -2,7 +2,6 @@ import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
import { processDomainLists } from './parse-filter';
import * as tldts from 'tldts';
import { createTrie } from './trie';
import { createCachedGorhillGetDomain } from './cached-tld-parse';
import { processLine } from './process-line';
import { TTL } from './cache-filesystem';
import { isCI } from 'ci-info';
@@ -130,7 +129,6 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
});
const domainCountMap: Record<string, number> = {};
const getDomain = createCachedGorhillGetDomain(gorhill);
span.traceChildSync('process phishing domain set', () => {
const domainArr = Array.from(domainSet);
@@ -139,7 +137,7 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
const line = processLine(domainArr[i]);
if (!line) continue;
const apexDomain = getDomain(line);
const apexDomain = gorhill.getDomain(line);
if (!apexDomain) continue;
domainCountMap[apexDomain] ||= 0;

View File

@@ -79,11 +79,10 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
if (node.has(token)) {
node = node.get(token)!;
if (smolTree) {
if (node.get('.')?.[SENTINEL] === true) {
return;
}
// return;
// During the adding of `[start]blog.skk.moe` and find out that there is a `[start].skk.moe` in the trie
// Dedupe the covered subdomain by skipping
if (smolTree && (node.get('.')?.[SENTINEL])) {
return;
}
} else {
const newNode = createNode(node);
@@ -92,9 +91,12 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
}
if (smolTree) {
// Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
if (i === 1 && tokens[0] === '.') {
// If there is a `[start]sub.example.com` here, remove it
node[SENTINEL] = false;
// Trying to add `.sub.example.com` where there is already a `blog.sub.example.com` in the trie
// Removing the rest of the child nodes by creating a new node and disconnecting the old one
const newNode = createNode(node);
node.set('.', newNode);
node = newNode;
@@ -225,13 +227,11 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
node = nodeStack.pop()!;
if (node[SENTINEL]) {
if (suffix !== inputTokens) {
// found match, delete it from set
if (hostnameMode) {
set.delete((suffix as string[]).join(''));
} else {
set.delete(suffix as string);
}
// found match, delete it from set
if (hostnameMode) {
set.delete((suffix as string[]).join(''));
} else if (suffix !== inputTokens) {
set.delete(suffix as string);
}
}
@@ -317,37 +317,22 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
return node[SENTINEL];
};
if (Array.isArray(from)) {
for (let i = 0, l = from.length; i < l; i++) {
add(from[i]);
}
} else if (from) {
from.forEach(add);
}
const dump = () => {
const nodeStack: TrieNode[] = [];
const suffixStack: Array<string | string[]> = [];
// Resolving initial string
const suffix = hostnameMode ? [] : '';
nodeStack.push(root);
suffixStack.push(suffix);
// Resolving initial string (begin the start of the stack)
suffixStack.push(hostnameMode ? [] : '');
const results: string[] = [];
let node: TrieNode;
do {
let hasValue = false;
node = nodeStack.pop()!;
const suffix = suffixStack.pop()!;
if (node[SENTINEL]) {
hasValue = true;
}
node.forEach((childNode, k) => {
nodeStack.push(childNode);
@@ -358,16 +343,22 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
}
});
if (hasValue) {
results.push(
hostnameMode ? (suffix as string[]).join('') : (suffix as string)
);
if (node[SENTINEL]) {
results.push(hostnameMode ? (suffix as string[]).join('') : (suffix as string));
}
} while (nodeStack.length);
return results;
};
if (Array.isArray(from)) {
for (let i = 0, l = from.length; i < l; i++) {
add(from[i]);
}
} else if (from) {
from.forEach(add);
}
return {
add,
contains,