Perf: use readline to readFileByLine (50% faster)

This commit is contained in:
SukkaW 2024-11-13 11:12:38 +08:00
parent 7499fea63f
commit 3107e72787
2 changed files with 20 additions and 9 deletions

View File

@ -1,15 +1,19 @@
import { bench, group, run } from 'mitata';
import { processLine, processLineFromReadline } from './process-line'; import { processLine, processLineFromReadline } from './process-line';
import { readFileByLine } from './fetch-text-by-line'; import { readFileByLine, readFileByLineLegacy } from './fetch-text-by-line';
import path from 'node:path'; import path from 'node:path';
import fsp from 'node:fs/promises'; import fsp from 'node:fs/promises';
import { SOURCE_DIR } from '../constants/dir'; import { SOURCE_DIR } from '../constants/dir';
const file = path.join(SOURCE_DIR, 'domainset/cdn.conf'); const file = path.join(SOURCE_DIR, 'domainset/cdn.conf');
(async () => {
const { bench, group, run } = await import('mitata');
group(() => { group(() => {
bench('readFileByLine', () => processLineFromReadline(readFileByLine(file))); bench('readFileByLine', () => processLineFromReadline(readFileByLine(file)));
bench('readFileByLineLegacy', () => processLineFromReadline(readFileByLineLegacy(file)));
bench('fsp.readFile', () => fsp.readFile(file, 'utf-8').then((content) => content.split('\n').filter(processLine))); bench('fsp.readFile', () => fsp.readFile(file, 'utf-8').then((content) => content.split('\n').filter(processLine)));
}); });
run(); run();
})();

View File

@ -1,6 +1,7 @@
import fs from 'node:fs'; import fs from 'node:fs';
import { Readable } from 'node:stream'; import { Readable } from 'node:stream';
import type { FileHandle } from 'node:fs/promises'; import type { FileHandle } from 'node:fs/promises';
import readline from 'node:readline';
import { TextLineStream } from './text-line-transform-stream'; import { TextLineStream } from './text-line-transform-stream';
import type { ReadableStream } from 'node:stream/web'; import type { ReadableStream } from 'node:stream/web';
@ -18,11 +19,17 @@ function getReadableStream(file: string | FileHandle): ReadableStream {
} }
return file.readableWebStream(); return file.readableWebStream();
} }
// TODO: use FileHandle.readLine() // TODO: use FileHandle.readLine()
export const readFileByLine: ((file: string | FileHandle) => AsyncIterable<string>) = (file: string | FileHandle) => getReadableStream(file) export const readFileByLineLegacy: ((file: string /* | FileHandle */) => AsyncIterable<string>) = (file: string | FileHandle) => getReadableStream(file)
.pipeThrough(new TextDecoderStream()) .pipeThrough(new TextDecoderStream())
.pipeThrough(new TextLineStream()); .pipeThrough(new TextLineStream());
export const readFileByLine: ((file: string /* | FileHandle */) => AsyncIterable<string>) = (file: string) => readline.createInterface({
input: fs.createReadStream(file/* , { encoding: 'utf-8' } */),
crlfDelay: Infinity
});
function ensureResponseBody<T extends NodeFetchResponse | UndiciResponseData | UnidiciWebResponse>(resp: T): NonNullable<T['body']> { function ensureResponseBody<T extends NodeFetchResponse | UndiciResponseData | UnidiciWebResponse>(resp: T): NonNullable<T['body']> {
if (resp.body == null) { if (resp.body == null) {
throw new Error('Failed to fetch remote text'); throw new Error('Failed to fetch remote text');
@ -53,7 +60,7 @@ export function fetchRemoteTextByLine(url: string) {
return $fetch(url).then(createReadlineInterfaceFromResponse); return $fetch(url).then(createReadlineInterfaceFromResponse);
} }
export async function readFileIntoProcessedArray(file: string | FileHandle) { export async function readFileIntoProcessedArray(file: string /* | FileHandle */) {
const results = []; const results = [];
for await (const line of readFileByLine(file)) { for await (const line of readFileByLine(file)) {
if (processLine(line)) { if (processLine(line)) {