mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 17:20:35 +08:00
Chore: process line stream
This commit is contained in:
parent
e2920de2fa
commit
f64fa201e9
@ -5,18 +5,14 @@ import { task } from './trace';
|
|||||||
import { SHARED_DESCRIPTION } from './constants/description';
|
import { SHARED_DESCRIPTION } from './constants/description';
|
||||||
import { appendArrayInPlace } from './lib/append-array-in-place';
|
import { appendArrayInPlace } from './lib/append-array-in-place';
|
||||||
import { SOURCE_DIR } from './constants/dir';
|
import { SOURCE_DIR } from './constants/dir';
|
||||||
import { processLine } from './lib/process-line';
|
|
||||||
import { DomainsetOutput } from './lib/create-file';
|
import { DomainsetOutput } from './lib/create-file';
|
||||||
import { CRASHLYTICS_WHITELIST } from './constants/reject-data-source';
|
import { CRASHLYTICS_WHITELIST } from './constants/reject-data-source';
|
||||||
|
|
||||||
const getS3OSSDomainsPromise = (async (): Promise<string[]> => {
|
const getS3OSSDomainsPromise = (async (): Promise<string[]> => {
|
||||||
const trie = new HostnameTrie();
|
const trie = new HostnameTrie();
|
||||||
|
|
||||||
for await (const line of await fetchRemoteTextByLine('https://publicsuffix.org/list/public_suffix_list.dat')) {
|
for await (const line of await fetchRemoteTextByLine('https://publicsuffix.org/list/public_suffix_list.dat', true)) {
|
||||||
const tmp = processLine(line);
|
trie.add(line);
|
||||||
if (tmp) {
|
|
||||||
trie.add(tmp);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -1,5 +1,4 @@
|
|||||||
import { fetchRemoteTextByLine } from './lib/fetch-text-by-line';
|
import { fetchRemoteTextByLine } from './lib/fetch-text-by-line';
|
||||||
import { processLineFromReadline } from './lib/process-line';
|
|
||||||
import { task } from './trace';
|
import { task } from './trace';
|
||||||
|
|
||||||
import { contains as containsCidr, exclude as excludeCidr } from 'fast-cidr-tools';
|
import { contains as containsCidr, exclude as excludeCidr } from 'fast-cidr-tools';
|
||||||
@ -19,8 +18,8 @@ const PROBE_CHN_CIDR_V4 = [
|
|||||||
export const getChnCidrPromise = createMemoizedPromise(cachedOnlyFail(
|
export const getChnCidrPromise = createMemoizedPromise(cachedOnlyFail(
|
||||||
async function getChnCidr() {
|
async function getChnCidr() {
|
||||||
const [_cidr4, cidr6] = await Promise.all([
|
const [_cidr4, cidr6] = await Promise.all([
|
||||||
fetchRemoteTextByLine('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt').then(processLineFromReadline),
|
fetchRemoteTextByLine('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt', true).then(Array.fromAsync<string>),
|
||||||
fetchRemoteTextByLine('https://gaoyifan.github.io/china-operator-ip/china6.txt').then(processLineFromReadline)
|
fetchRemoteTextByLine('https://gaoyifan.github.io/china-operator-ip/china6.txt', true).then(Array.fromAsync<string>)
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const cidr4 = excludeCidr(
|
const cidr4 = excludeCidr(
|
||||||
|
|||||||
@ -15,7 +15,7 @@ const getBogusNxDomainIPsPromise: Promise<[ipv4: string[], ipv6: string[]]> = $f
|
|||||||
const ipv4: string[] = [];
|
const ipv4: string[] = [];
|
||||||
const ipv6: string[] = [];
|
const ipv6: string[] = [];
|
||||||
|
|
||||||
for await (const line of createReadlineInterfaceFromResponse(resp)) {
|
for await (const line of createReadlineInterfaceFromResponse(resp, true)) {
|
||||||
if (line.startsWith('bogus-nxdomain=')) {
|
if (line.startsWith('bogus-nxdomain=')) {
|
||||||
const ip = line.slice(15).trim();
|
const ip = line.slice(15).trim();
|
||||||
if (isProbablyIpv4(ip)) {
|
if (isProbablyIpv4(ip)) {
|
||||||
|
|||||||
@ -1,7 +1,6 @@
|
|||||||
// @ts-check
|
// @ts-check
|
||||||
import { createReadlineInterfaceFromResponse } from './lib/fetch-text-by-line';
|
import { createReadlineInterfaceFromResponse } from './lib/fetch-text-by-line';
|
||||||
import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip';
|
import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip';
|
||||||
import { processLine } from './lib/process-line';
|
|
||||||
import { task } from './trace';
|
import { task } from './trace';
|
||||||
import { SHARED_DESCRIPTION } from './constants/description';
|
import { SHARED_DESCRIPTION } from './constants/description';
|
||||||
import { createMemoizedPromise } from './lib/memo-promise';
|
import { createMemoizedPromise } from './lib/memo-promise';
|
||||||
@ -16,10 +15,7 @@ export const getTelegramCIDRPromise = createMemoizedPromise(async () => {
|
|||||||
const ipcidr: string[] = [];
|
const ipcidr: string[] = [];
|
||||||
const ipcidr6: string[] = [];
|
const ipcidr6: string[] = [];
|
||||||
|
|
||||||
for await (const line of createReadlineInterfaceFromResponse(resp)) {
|
for await (const cidr of createReadlineInterfaceFromResponse(resp, true)) {
|
||||||
const cidr = processLine(line);
|
|
||||||
if (!cidr) continue;
|
|
||||||
|
|
||||||
const [subnet] = cidr.split('/');
|
const [subnet] = cidr.split('/');
|
||||||
if (isProbablyIpv4(subnet)) {
|
if (isProbablyIpv4(subnet)) {
|
||||||
ipcidr.push(cidr);
|
ipcidr.push(cidr);
|
||||||
|
|||||||
@ -1,5 +1,4 @@
|
|||||||
import { fetchRemoteTextByLine } from './fetch-text-by-line';
|
import { fetchRemoteTextByLine } from './fetch-text-by-line';
|
||||||
import { processLineFromReadline } from './process-line';
|
|
||||||
|
|
||||||
import createKeywordFilter from './aho-corasick';
|
import createKeywordFilter from './aho-corasick';
|
||||||
|
|
||||||
@ -36,7 +35,7 @@ if (require.main === module) {
|
|||||||
(async () => {
|
(async () => {
|
||||||
const { bench, group, run } = await import('mitata');
|
const { bench, group, run } = await import('mitata');
|
||||||
|
|
||||||
const data = await processLineFromReadline(await fetchRemoteTextByLine('https://easylist.to/easylist/easylist.txt'));
|
const data = await Array.fromAsync(await fetchRemoteTextByLine('https://easylist.to/easylist/easylist.txt', true));
|
||||||
console.log({ dataLen: data.length });
|
console.log({ dataLen: data.length });
|
||||||
const keywordsSet = [
|
const keywordsSet = [
|
||||||
'!',
|
'!',
|
||||||
|
|||||||
@ -1,4 +1,3 @@
|
|||||||
import { processLine, processLineFromReadline } from './process-line';
|
|
||||||
import { readFileByLine, readFileByLineLegacy } from './fetch-text-by-line';
|
import { readFileByLine, readFileByLineLegacy } from './fetch-text-by-line';
|
||||||
import path from 'node:path';
|
import path from 'node:path';
|
||||||
import fsp from 'node:fs/promises';
|
import fsp from 'node:fs/promises';
|
||||||
@ -10,9 +9,9 @@ const file = path.join(SOURCE_DIR, 'domainset/cdn.conf');
|
|||||||
const { bench, group, run } = await import('mitata');
|
const { bench, group, run } = await import('mitata');
|
||||||
|
|
||||||
group(() => {
|
group(() => {
|
||||||
bench('readFileByLine', () => processLineFromReadline(readFileByLine(file)));
|
bench('readFileByLine', () => Array.fromAsync(readFileByLine(file)));
|
||||||
bench('readFileByLineLegacy', () => processLineFromReadline(readFileByLineLegacy(file)));
|
bench('readFileByLineLegacy', () => Array.fromAsync(readFileByLineLegacy(file)));
|
||||||
bench('fsp.readFile', () => fsp.readFile(file, 'utf-8').then((content) => content.split('\n').filter(processLine)));
|
bench('fsp.readFile', () => fsp.readFile(file, 'utf-8').then((content) => content.split('\n')));
|
||||||
});
|
});
|
||||||
|
|
||||||
run();
|
run();
|
||||||
|
|||||||
@ -6,7 +6,7 @@ import readline from 'node:readline';
|
|||||||
import { TextLineStream } from './text-line-transform-stream';
|
import { TextLineStream } from './text-line-transform-stream';
|
||||||
import type { ReadableStream } from 'node:stream/web';
|
import type { ReadableStream } from 'node:stream/web';
|
||||||
import { TextDecoderStream } from 'node:stream/web';
|
import { TextDecoderStream } from 'node:stream/web';
|
||||||
import { processLine } from './process-line';
|
import { processLine, ProcessLineStream } from './process-line';
|
||||||
import { $fetch } from './make-fetch-happen';
|
import { $fetch } from './make-fetch-happen';
|
||||||
import type { NodeFetchResponse } from './make-fetch-happen';
|
import type { NodeFetchResponse } from './make-fetch-happen';
|
||||||
import type { UndiciResponseData } from './fetch-retry';
|
import type { UndiciResponseData } from './fetch-retry';
|
||||||
@ -40,7 +40,7 @@ function ensureResponseBody<T extends NodeFetchResponse | UndiciResponseData | U
|
|||||||
return resp.body;
|
return resp.body;
|
||||||
}
|
}
|
||||||
|
|
||||||
export const createReadlineInterfaceFromResponse: ((resp: NodeFetchResponse | UndiciResponseData | UnidiciWebResponse) => AsyncIterable<string>) = (resp) => {
|
export const createReadlineInterfaceFromResponse: ((resp: NodeFetchResponse | UndiciResponseData | UnidiciWebResponse, processLine?: boolean) => ReadableStream<string>) = (resp, processLine = false) => {
|
||||||
const stream = ensureResponseBody(resp);
|
const stream = ensureResponseBody(resp);
|
||||||
|
|
||||||
const webStream: ReadableStream<Uint8Array> = 'getReader' in stream
|
const webStream: ReadableStream<Uint8Array> = 'getReader' in stream
|
||||||
@ -51,13 +51,18 @@ export const createReadlineInterfaceFromResponse: ((resp: NodeFetchResponse | Un
|
|||||||
: Readable.toWeb(new Readable().wrap(stream))
|
: Readable.toWeb(new Readable().wrap(stream))
|
||||||
);
|
);
|
||||||
|
|
||||||
return webStream
|
const resultStream = webStream
|
||||||
.pipeThrough(new TextDecoderStream())
|
.pipeThrough(new TextDecoderStream())
|
||||||
.pipeThrough(new TextLineStream());
|
.pipeThrough(new TextLineStream());
|
||||||
|
|
||||||
|
if (processLine) {
|
||||||
|
return resultStream.pipeThrough(new ProcessLineStream());
|
||||||
|
}
|
||||||
|
return resultStream;
|
||||||
};
|
};
|
||||||
|
|
||||||
export function fetchRemoteTextByLine(url: string) {
|
export function fetchRemoteTextByLine(url: string, processLine = false): Promise<AsyncIterable<string>> {
|
||||||
return $fetch(url).then(createReadlineInterfaceFromResponse);
|
return $fetch(url).then(resp => createReadlineInterfaceFromResponse(resp, processLine));
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function readFileIntoProcessedArray(file: string /* | FileHandle */) {
|
export async function readFileIntoProcessedArray(file: string /* | FileHandle */) {
|
||||||
|
|||||||
@ -19,7 +19,7 @@ export function extractDomainsFromFelixDnsmasq(line: string): string | null {
|
|||||||
export async function parseFelixDnsmasqFromResp(resp: NodeFetchResponse | UndiciResponseData | Response): Promise<string[]> {
|
export async function parseFelixDnsmasqFromResp(resp: NodeFetchResponse | UndiciResponseData | Response): Promise<string[]> {
|
||||||
const results: string[] = [];
|
const results: string[] = [];
|
||||||
|
|
||||||
for await (const line of createReadlineInterfaceFromResponse(resp)) {
|
for await (const line of createReadlineInterfaceFromResponse(resp, true)) {
|
||||||
const domain = extractDomainsFromFelixDnsmasq(line);
|
const domain = extractDomainsFromFelixDnsmasq(line);
|
||||||
if (domain && isDomainLoose(domain)) {
|
if (domain && isDomainLoose(domain)) {
|
||||||
results.push(domain);
|
results.push(domain);
|
||||||
|
|||||||
@ -1,3 +1,5 @@
|
|||||||
|
import { TransformStream } from 'node:stream/web';
|
||||||
|
|
||||||
export function processLine(line: string): string | null {
|
export function processLine(line: string): string | null {
|
||||||
if (!line) {
|
if (!line) {
|
||||||
return null;
|
return null;
|
||||||
@ -11,8 +13,7 @@ export function processLine(line: string): string | null {
|
|||||||
const line_0: string = trimmed[0];
|
const line_0: string = trimmed[0];
|
||||||
|
|
||||||
if (
|
if (
|
||||||
line_0 === '#'
|
line_0 === ' '
|
||||||
|| line_0 === ' '
|
|
||||||
|| line_0 === '\r'
|
|| line_0 === '\r'
|
||||||
|| line_0 === '\n'
|
|| line_0 === '\n'
|
||||||
|| line_0 === '!'
|
|| line_0 === '!'
|
||||||
@ -21,16 +22,48 @@ export function processLine(line: string): string | null {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (line_0 === '#') {
|
||||||
|
if (trimmed[1] !== '#') {
|
||||||
|
// # Comment
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (trimmed[2] === '#' && trimmed[3] === '#') {
|
||||||
|
// ################## EOF ##################
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* AdGuard Filter can be:
|
||||||
|
*
|
||||||
|
* ##.class
|
||||||
|
* ##tag.class
|
||||||
|
* ###id
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
return trimmed;
|
return trimmed;
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function processLineFromReadline(rl: AsyncIterable<string>): Promise<string[]> {
|
export class ProcessLineStream extends TransformStream<string, string> {
|
||||||
const res: string[] = [];
|
// private __buf = '';
|
||||||
for await (const line of rl) {
|
constructor() {
|
||||||
const l: string | null = processLine(line);
|
super({
|
||||||
if (l) {
|
transform(l, controller) {
|
||||||
res.push(l);
|
const line = processLine(l);
|
||||||
|
if (line) {
|
||||||
|
controller.enqueue(line);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return res;
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// export class ProcessLineNodeStream extends Transform {
|
||||||
|
// _transform(chunk: string, encoding: BufferEncoding, callback: TransformCallback) {
|
||||||
|
// // Convert chunk to string and then to uppercase
|
||||||
|
// const upperCased = chunk.toUpperCase();
|
||||||
|
// // Push transformed data to readable side
|
||||||
|
// this.push(upperCased);
|
||||||
|
// // Call callback when done
|
||||||
|
// callback();
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|||||||
@ -1,10 +1,9 @@
|
|||||||
import { fetchRemoteTextByLine } from './fetch-text-by-line';
|
import { fetchRemoteTextByLine } from './fetch-text-by-line';
|
||||||
import { processLineFromReadline } from './process-line';
|
|
||||||
|
|
||||||
import { bench, group, run } from 'mitata';
|
import { bench, group, run } from 'mitata';
|
||||||
|
|
||||||
(async () => {
|
(async () => {
|
||||||
const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt'));
|
const data = await Array.fromAsync(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true));
|
||||||
|
|
||||||
group(() => {
|
group(() => {
|
||||||
bench('setAddFromArray', () => {
|
bench('setAddFromArray', () => {
|
||||||
|
|||||||
@ -1,11 +1,10 @@
|
|||||||
import { fetchRemoteTextByLine } from './fetch-text-by-line';
|
import { fetchRemoteTextByLine } from './fetch-text-by-line';
|
||||||
import { processLineFromReadline } from './process-line';
|
|
||||||
import { sortDomains } from './stable-sort-domain';
|
import { sortDomains } from './stable-sort-domain';
|
||||||
|
|
||||||
import { bench, group, run } from 'mitata';
|
import { bench, group, run } from 'mitata';
|
||||||
|
|
||||||
(async () => {
|
(async () => {
|
||||||
const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt'));
|
const data = await Array.fromAsync(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true));
|
||||||
|
|
||||||
group(() => {
|
group(() => {
|
||||||
bench('sortDomains', () => sortDomains(data));
|
bench('sortDomains', () => sortDomains(data));
|
||||||
|
|||||||
@ -1,5 +1,4 @@
|
|||||||
import { fetchRemoteTextByLine } from './fetch-text-by-line';
|
import { fetchRemoteTextByLine } from './fetch-text-by-line';
|
||||||
import { processLineFromReadline } from './process-line';
|
|
||||||
|
|
||||||
import { bench, group, run } from 'mitata';
|
import { bench, group, run } from 'mitata';
|
||||||
|
|
||||||
@ -7,7 +6,7 @@ import * as tldts from 'tldts';
|
|||||||
import * as tldtsExperimental from 'tldts-experimental';
|
import * as tldtsExperimental from 'tldts-experimental';
|
||||||
|
|
||||||
(async () => {
|
(async () => {
|
||||||
const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt'));
|
const data = await Array.fromAsync(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true));
|
||||||
|
|
||||||
const tldtsOpt: Parameters<typeof tldts.getDomain>[1] = {
|
const tldtsOpt: Parameters<typeof tldts.getDomain>[1] = {
|
||||||
allowPrivateDomains: false,
|
allowPrivateDomains: false,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user