Perf: read most of files in one pass

This commit is contained in:
SukkaW 2024-01-15 13:32:12 +08:00
parent a3e1a85c70
commit a7fc13b355
7 changed files with 46 additions and 35 deletions

View File

@ -1,7 +1,7 @@
// @ts-check // @ts-check
import path from 'path'; import path from 'path';
import { createRuleset } from './lib/create-file'; import { createRuleset } from './lib/create-file';
import { fetchRemoteTextByLine, readFileByLine } from './lib/fetch-text-by-line'; import { fetchRemoteTextByLine, readFileByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
import { processLine } from './lib/process-line'; import { processLine } from './lib/process-line';
import { task } from './trace'; import { task } from './trace';
import { SHARED_DESCRIPTION } from './lib/constants'; import { SHARED_DESCRIPTION } from './lib/constants';
@ -34,14 +34,7 @@ const getBogusNxDomainIPsPromise = fsCache.apply(
); );
export const buildAntiBogusDomain = task(import.meta.path, async (span) => { export const buildAntiBogusDomain = task(import.meta.path, async (span) => {
const result: string[] = []; const result: string[] = await readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/ip/reject.conf'));
for await (const line of readFileByLine(path.resolve(import.meta.dir, '../Source/ip/reject.conf'))) {
const l = processLine(line);
if (l) {
result.push(l);
}
}
result.push(...(await getBogusNxDomainIPsPromise)); result.push(...(await getBogusNxDomainIPsPromise));
const description = [ const description = [

View File

@ -18,7 +18,7 @@ export const getAppleCdnDomainsPromise = createMemoizedPromise(() => fsCache.app
)); ));
export const buildAppleCdn = task(import.meta.path, async (span) => { export const buildAppleCdn = task(import.meta.path, async (span) => {
const res = await getAppleCdnDomainsPromise(); const res = await span.traceChild('get apple cdn domains').traceAsyncFn(getAppleCdnDomainsPromise);
const description = [ const description = [
...SHARED_DESCRIPTION, ...SHARED_DESCRIPTION,

View File

@ -1,9 +1,8 @@
import path from 'path'; import path from 'path';
import { createRuleset } from './lib/create-file'; import { createRuleset } from './lib/create-file';
import { readFileByLine } from './lib/fetch-text-by-line'; import { readFileIntoProcessedArray } from './lib/fetch-text-by-line';
import { createTrie } from './lib/trie'; import { createTrie } from './lib/trie';
import { task } from './trace'; import { task } from './trace';
import { processLine } from './lib/process-line';
import { SHARED_DESCRIPTION } from './lib/constants'; import { SHARED_DESCRIPTION } from './lib/constants';
import { getPublicSuffixListTextPromise } from './download-publicsuffixlist'; import { getPublicSuffixListTextPromise } from './download-publicsuffixlist';
@ -44,15 +43,7 @@ const getS3OSSDomainsPromise = (async (): Promise<Set<string>> => {
const buildCdnConf = task(import.meta.path, async (span) => { const buildCdnConf = task(import.meta.path, async (span) => {
/** @type {string[]} */ /** @type {string[]} */
const cdnDomainsList: string[] = []; const cdnDomainsList: string[] = await readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/non_ip/cdn.conf'));
for await (const l of readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/cdn.conf'))) {
const line = processLine(l);
if (line) {
cdnDomainsList.push(line);
}
}
(await getS3OSSDomainsPromise).forEach((domain: string) => { cdnDomainsList.push(`DOMAIN-SUFFIX,${domain}`); }); (await getS3OSSDomainsPromise).forEach((domain: string) => { cdnDomainsList.push(`DOMAIN-SUFFIX,${domain}`); });
const description: string[] = [ const description: string[] = [

View File

@ -1,15 +1,14 @@
// @ts-check // @ts-check
import path from 'path'; import path from 'path';
import { DOMESTICS } from '../Source/non_ip/domestic'; import { DOMESTICS } from '../Source/non_ip/domestic';
import { readFileByLine } from './lib/fetch-text-by-line'; import { readFileIntoProcessedArray } from './lib/fetch-text-by-line';
import { processLineFromReadline } from './lib/process-line';
import { compareAndWriteFile, createRuleset } from './lib/create-file'; import { compareAndWriteFile, createRuleset } from './lib/create-file';
import { task } from './trace'; import { task } from './trace';
import { SHARED_DESCRIPTION } from './lib/constants'; import { SHARED_DESCRIPTION } from './lib/constants';
import { createMemoizedPromise } from './lib/memo-promise'; import { createMemoizedPromise } from './lib/memo-promise';
export const getDomesticDomainsRulesetPromise = createMemoizedPromise(async () => { export const getDomesticDomainsRulesetPromise = createMemoizedPromise(async () => {
const results = await processLineFromReadline(readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/domestic.conf'))); const results = await readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/non_ip/domestic.conf'));
results.push( results.push(
...Object.entries(DOMESTICS).reduce<string[]>((acc, [key, { domains }]) => { ...Object.entries(DOMESTICS).reduce<string[]>((acc, [key, { domains }]) => {

View File

@ -1,8 +1,7 @@
import { getAppleCdnDomainsPromise } from './build-apple-cdn'; import { getAppleCdnDomainsPromise } from './build-apple-cdn';
import { getDomesticDomainsRulesetPromise } from './build-domestic-ruleset'; import { getDomesticDomainsRulesetPromise } from './build-domestic-ruleset';
import { surgeRulesetToClashClassicalTextRuleset } from './lib/clash'; import { surgeRulesetToClashClassicalTextRuleset } from './lib/clash';
import { readFileByLine } from './lib/fetch-text-by-line'; import { readFileIntoProcessedArray } from './lib/fetch-text-by-line';
import { processLineFromReadline } from './lib/process-line';
import { task } from './trace'; import { task } from './trace';
import path from 'path'; import path from 'path';
@ -49,19 +48,19 @@ export const buildSSPanelUIMAppProfile = task(import.meta.path, async (span) =>
getDomesticDomainsRulesetPromise().then(surgeRulesetToClashClassicalTextRuleset), getDomesticDomainsRulesetPromise().then(surgeRulesetToClashClassicalTextRuleset),
getAppleCdnDomainsPromise().then(domains => domains.map(domain => `DOMAIN-SUFFIX,${domain}`)), getAppleCdnDomainsPromise().then(domains => domains.map(domain => `DOMAIN-SUFFIX,${domain}`)),
getMicrosoftCdnRulesetPromise().then(surgeRulesetToClashClassicalTextRuleset), getMicrosoftCdnRulesetPromise().then(surgeRulesetToClashClassicalTextRuleset),
processLineFromReadline(readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/apple_cn.conf'))), readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/non_ip/apple_cn.conf')),
processLineFromReadline(readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/neteasemusic.conf'))).then(surgeRulesetToClashClassicalTextRuleset), readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/non_ip/neteasemusic.conf')).then(surgeRulesetToClashClassicalTextRuleset),
// microsoft & apple - domains // microsoft & apple - domains
processLineFromReadline(readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/microsoft.conf'))), readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/non_ip/microsoft.conf')),
(processLineFromReadline(readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/apple_services.conf')))).then(surgeRulesetToClashClassicalTextRuleset), readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/non_ip/apple_services.conf')).then(surgeRulesetToClashClassicalTextRuleset),
// stream - domains // stream - domains
surgeRulesetToClashClassicalTextRuleset(AllStreamServices.flatMap((i) => i.rules)), surgeRulesetToClashClassicalTextRuleset(AllStreamServices.flatMap((i) => i.rules)),
// global - domains // global - domains
processLineFromReadline(readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/global.conf'))).then(surgeRulesetToClashClassicalTextRuleset), readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/non_ip/global.conf')).then(surgeRulesetToClashClassicalTextRuleset),
processLineFromReadline(readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/global_plus.conf'))).then(surgeRulesetToClashClassicalTextRuleset), readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/non_ip/global_plus.conf')).then(surgeRulesetToClashClassicalTextRuleset),
processLineFromReadline(readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/telegram.conf'))).then(surgeRulesetToClashClassicalTextRuleset), readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/non_ip/telegram.conf')).then(surgeRulesetToClashClassicalTextRuleset),
// lan - domains // lan - domains
processLineFromReadline(readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/lan.conf'))), readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/non_ip/lan.conf')),
// domestic - ip cidr // domestic - ip cidr
getChnCidrPromise().then(cidrs => cidrs.map(cidr => `IP-CIDR,${cidr}`)), getChnCidrPromise().then(cidrs => cidrs.map(cidr => `IP-CIDR,${cidr}`)),
AllStreamServices.flatMap((i) => ( AllStreamServices.flatMap((i) => (
@ -75,7 +74,7 @@ export const buildSSPanelUIMAppProfile = task(import.meta.path, async (span) =>
// global - ip cidr // global - ip cidr
getTelegramCIDRPromise(), getTelegramCIDRPromise(),
// lan - ip cidr // lan - ip cidr
processLineFromReadline(readFileByLine(path.resolve(import.meta.dir, '../Source/ip/lan.conf'))) readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/ip/lan.conf'))
] as const); ] as const);
const telegramCidrs = rawTelegramCidrs.map(removeNoResolved); const telegramCidrs = rawTelegramCidrs.map(removeNoResolved);

View File

@ -0,0 +1,17 @@
import { bench, group, run } from 'mitata';
import { processLine, processLineFromReadline } from './process-line';
import { readFileByLine } from './fetch-text-by-line';
import path from 'path';
import fsp from 'fs/promises';
const file = path.resolve(import.meta.dir, '../../Source/domainset/cdn.conf');
group('read file by line', () => {
bench('readline', () => processLineFromReadline(readFileByLine(file)));
bench('fsp.readFile', () => fsp.readFile(file, 'utf-8').then((content) => content.split('\n').filter(processLine)));
bench('Bun.file', () => Bun.file(file).text().then((content) => content.split('\n').filter(processLine)));
});
run();

View File

@ -3,6 +3,7 @@ import { fetchWithRetry, defaultRequestInit } from './fetch-retry';
import { TextLineStream } from './text-line-transform-stream'; import { TextLineStream } from './text-line-transform-stream';
import { PolyfillTextDecoderStream } from './text-decoder-stream'; import { PolyfillTextDecoderStream } from './text-decoder-stream';
import { processLine } from './process-line';
// function createTextLineStreamFromStreamSource(stream: ReadableStream<Uint8Array>) { // function createTextLineStreamFromStreamSource(stream: ReadableStream<Uint8Array>) {
// return stream // return stream
// .pipeThrough(new PolyfillTextDecoderStream()) // .pipeThrough(new PolyfillTextDecoderStream())
@ -54,3 +55,14 @@ export function createReadlineInterfaceFromResponse(this: void, resp: Response)
export function fetchRemoteTextByLine(url: string | URL) { export function fetchRemoteTextByLine(url: string | URL) {
return fetchWithRetry(url, defaultRequestInit).then(createReadlineInterfaceFromResponse); return fetchWithRetry(url, defaultRequestInit).then(createReadlineInterfaceFromResponse);
} }
export async function readFileIntoProcessedArray(file: string | URL | BunFile) {
if (typeof file === 'string') {
file = Bun.file(file);
} else if (!('writer' in file)) {
file = Bun.file(file);
}
const content = await file.text();
return content.split('\n').filter(processLine);
}