Perf: tldts allow loose input

This commit is contained in:
SukkaW 2024-06-08 17:27:41 +08:00
parent ac76f10957
commit 3d676289f3
3 changed files with 3 additions and 75 deletions

View File

@ -1,64 +0,0 @@
import path from 'path';
import { processLine } from './lib/process-line';
import { readFileByLine } from './lib/fetch-text-by-line';
import { sortDomains } from './lib/stable-sort-domain';
import { task } from './trace';
import { compareAndWriteFile } from './lib/create-file';
import { domainDeduper } from './lib/domain-deduper';
import { sort } from './lib/timsort';
const escapeRegExp = (string = '') => string.replaceAll(/[$()*+.?[\\\]^{|}]/g, '\\$&');
const processLocalDomainSet = async (domainSetPath: string, set: Set<string>) => {
for await (const l of readFileByLine(domainSetPath)) {
const line = processLine(l);
if (line) {
set.add(line[0] === '.' ? line.slice(1) : line);
}
}
};
const processLocalRuleSet = async (ruleSetPath: string, set: Set<string>, keywords: Set<string>) => {
for await (const line of readFileByLine(ruleSetPath)) {
if (line.startsWith('DOMAIN-SUFFIX,')) {
set.add(line.slice(14));
} else if (line.startsWith('DOMAIN,')) {
set.add(line.slice(7));
} else if (line.startsWith('DOMAIN-KEYWORD')) {
keywords.add(escapeRegExp(line.slice(15)));
} else if (line.includes('USER-AGENT,') || line.includes('PROCESS-NAME,') || line.includes('URL-REGEX,') || line.includes('DOMAIN-WILDCARD')) {
// do nothing
} else if (processLine(line)) {
console.warn('[drop line from ruleset]', line);
}
}
};
export const buildInternalCDNDomains = task(import.meta.path, async (span) => {
const proxySet = new Set<string>();
const proxyKeywords = new Set<string>();
await Promise.all([
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/cdn.conf'), proxySet, proxyKeywords),
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global.conf'), proxySet, proxyKeywords),
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/my_proxy.conf'), proxySet, proxyKeywords),
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/my_plus.conf'), proxySet, proxyKeywords),
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/stream.conf'), proxySet, proxyKeywords),
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/telegram.conf'), proxySet, proxyKeywords),
processLocalDomainSet(path.resolve(import.meta.dir, '../List/domainset/cdn.conf'), proxySet),
processLocalDomainSet(path.resolve(import.meta.dir, '../List/domainset/download.conf'), proxySet)
]);
return compareAndWriteFile(
span,
[
...sortDomains(domainDeduper(Array.from(proxySet))).map(i => `SUFFIX,${i}`),
...sort(Array.from(proxyKeywords)).map(i => `REGEX,${i}`)
],
path.resolve(import.meta.dir, '../Internal/cdn.txt')
);
});
if (import.meta.main) {
buildInternalCDNDomains();
}

View File

@ -9,7 +9,6 @@ import { buildRejectDomainSet } from './build-reject-domainset';
import { buildTelegramCIDR } from './build-telegram-cidr';
import { buildChnCidr } from './build-chn-cidr';
import { buildSpeedtestDomainSet } from './build-speedtest-domainset';
import { buildInternalCDNDomains } from './build-internal-cdn-rules';
import { buildInternalReverseChnCIDR } from './build-internal-reverse-chn-cidr';
import { buildDomesticRuleset } from './build-domestic-ruleset';
import { buildStreamService } from './build-stream-service';
@ -51,10 +50,6 @@ process.on('unhandledRejection', (reason) => {
const buildTelegramCIDRPromise = downloadPreviousBuildPromise.then(() => buildTelegramCIDR(rootSpan));
const buildChnCidrPromise = downloadPreviousBuildPromise.then(() => buildChnCidr(rootSpan));
const buildSpeedtestDomainSetPromise = downloadPreviousBuildPromise.then(() => buildSpeedtestDomainSet(rootSpan));
const buildInternalCDNDomainsPromise = Promise.all([
buildCommonPromise,
buildCdnConfPromise
]).then(() => buildInternalCDNDomains(rootSpan));
const buildInternalReverseChnCIDRPromise = buildInternalReverseChnCIDR(rootSpan);
@ -86,7 +81,6 @@ process.on('unhandledRejection', (reason) => {
buildTelegramCIDRPromise,
buildChnCidrPromise,
buildSpeedtestDomainSetPromise,
buildInternalCDNDomainsPromise,
buildInternalReverseChnCIDRPromise,
buildInternalReverseChnCIDRPromise,
// buildInternalChnDomainsPromise,

View File

@ -118,20 +118,18 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
for (let i = 0, len = domainArr.length; i < len; i++) {
const line = domainArr[i];
const safeGorhillLine = line[0] === '.' ? line.slice(1) : line;
const {
publicSuffix: tld,
domain: apexDomain,
subdomain
} = parse(safeGorhillLine, looseTldtsOpt);
} = parse(line, looseTldtsOpt);
if (!tld) {
console.log(picocolors.yellow('[phishing domains] E0001'), 'missing tld', { line, safeGorhillLine, tld });
console.log(picocolors.yellow('[phishing domains] E0001'), 'missing tld', { line, tld });
continue;
}
if (!apexDomain) {
console.log(picocolors.yellow('[phishing domains] E0002'), 'missing domain', { line, safeGorhillLine, apexDomain });
console.log(picocolors.yellow('[phishing domains] E0002'), 'missing domain', { line, apexDomain });
continue;
}