diff --git a/Build/build-speedtest-domainset.ts b/Build/build-speedtest-domainset.ts index f2084052..7481c277 100644 --- a/Build/build-speedtest-domainset.ts +++ b/Build/build-speedtest-domainset.ts @@ -8,18 +8,19 @@ import { getHostname } from 'tldts'; import { task } from './trace'; import { fetchWithRetry } from './lib/fetch-retry'; import { SHARED_DESCRIPTION } from './lib/constants'; -import picocolors from 'picocolors'; import { readFileIntoProcessedArray } from './lib/fetch-text-by-line'; import { TTL, deserializeArray, fsFetchCache, serializeArray } from './lib/cache-filesystem'; import { createTrie } from './lib/trie'; -import { peek, track } from './lib/bun'; const s = new Sema(2); const latestTopUserAgentsPromise = fsFetchCache.apply( 'https://cdn.jsdelivr.net/npm/top-user-agents@latest/src/desktop.json', - () => fetchWithRetry('https://cdn.jsdelivr.net/npm/top-user-agents@latest/src/desktop.json') + () => fetchWithRetry( + 'https://cdn.jsdelivr.net/npm/top-user-agents@latest/src/desktop.json', + { signal: AbortSignal.timeout(1000 * 60) } + ) .then(res => res.json() as Promise) .then((userAgents) => userAgents.filter(ua => ua.startsWith('Mozilla/5.0 '))), { @@ -56,7 +57,7 @@ const querySpeedtestApi = async (keyword: string): Promise> } : {}) }, - signal: AbortSignal.timeout(1000 * 4), + signal: AbortSignal.timeout(1000 * 60), retry: { retries: 2 } @@ -194,63 +195,44 @@ export const buildSpeedtestDomainSet = task(require.main === module, __filename) } ); - await new Promise((resolve, reject) => { - const pMap = ([ - 'Hong Kong', - 'Taiwan', - 'China Telecom', - 'China Mobile', - 'China Unicom', - 'Japan', - 'Tokyo', - 'Singapore', - 'Korea', - 'Seoul', - 'Canada', - 'Toronto', - 'Montreal', - 'Los Ang', - 'San Jos', - 'Seattle', - 'New York', - 'Dallas', - 'Miami', - 'Berlin', - 'Frankfurt', - 'London', - 'Paris', - 'Amsterdam', - 'Moscow', - 'Australia', - 'Sydney', - 'Brazil', - 'Turkey' - ]).reduce>>((pMap, keyword) => { - pMap[keyword] = track(span.traceChildAsync(`fetch speedtest endpoints: ${keyword}`, () => querySpeedtestApi(keyword)).then(hostnameGroup => { - return hostnameGroup.forEach(hostname => { - if (hostname) { - domainTrie.add(hostname); - } - }); - })); - - return pMap; - }, {}); - - const timer = setTimeout(() => { - console.error(picocolors.red('Task timeout!')); - Object.entries(pMap).forEach(([name, p]) => { - console.log(`[${name}]`, peek(p)); - }); - - resolve(); - }, 1000 * 60 * 1.5); - - Promise.all(Object.values(pMap)).then(() => { - clearTimeout(timer); - return resolve(); - }).catch(() => reject); - }); + await Promise.all([ + 'Hong Kong', + 'Taiwan', + 'China Telecom', + 'China Mobile', + 'China Unicom', + 'Japan', + 'Tokyo', + 'Singapore', + 'Korea', + 'Seoul', + 'Canada', + 'Toronto', + 'Montreal', + 'Los Ang', + 'San Jos', + 'Seattle', + 'New York', + 'Dallas', + 'Miami', + 'Berlin', + 'Frankfurt', + 'London', + 'Paris', + 'Amsterdam', + 'Moscow', + 'Australia', + 'Sydney', + 'Brazil', + 'Turkey' + ].map((keyword) => span.traceChildAsync( + `fetch speedtest endpoints: ${keyword}`, + () => querySpeedtestApi(keyword) + ).then(hostnameGroup => hostnameGroup.forEach(hostname => { + if (hostname) { + domainTrie.add(hostname); + } + })))); const deduped = span.traceChildSync('sort result', () => sortDomains(domainDeduper(domainTrie))); diff --git a/Build/lib/fetch-retry.ts b/Build/lib/fetch-retry.ts index afee348f..1973a664 100644 --- a/Build/lib/fetch-retry.ts +++ b/Build/lib/fetch-retry.ts @@ -2,12 +2,6 @@ import retry from 'async-retry'; import picocolors from 'picocolors'; import { setTimeout } from 'timers/promises'; -// retry settings -const MIN_TIMEOUT = 10; -const MAX_RETRIES = 5; -const MAX_RETRY_AFTER = 20; -const FACTOR = 6; - function isClientError(err: unknown): err is NodeJS.ErrnoException { if (!err || typeof err !== 'object') return false; @@ -55,10 +49,10 @@ interface FetchWithRetry { const DEFAULT_OPT: Required = { // timeouts will be [10, 60, 360, 2160, 12960] // (before randomization is added) - minTimeout: MIN_TIMEOUT, - retries: MAX_RETRIES, - factor: FACTOR, - maxRetryAfter: MAX_RETRY_AFTER, + minTimeout: 10, + retries: 5, + factor: 6, + maxRetryAfter: 20, retryOnAborted: false, retryOnNon2xx: true }; diff --git a/Build/lib/fetch-text-by-line.ts b/Build/lib/fetch-text-by-line.ts index c3970e0a..9483f2bf 100644 --- a/Build/lib/fetch-text-by-line.ts +++ b/Build/lib/fetch-text-by-line.ts @@ -8,47 +8,16 @@ import type { ReadableStream } from 'stream/web'; import { TextDecoderStream } from 'stream/web'; import { processLine } from './process-line'; -const enableTextLineStream = !!process.env.ENABLE_TEXT_LINE_STREAM; - -const decoder = new TextDecoder('utf-8'); -async function *createTextLineAsyncIterableFromStreamSource(stream: ReadableStream): AsyncIterable { - let buf = ''; - - const reader = stream.getReader(); - - while (true) { - const res = await reader.read(); - if (res.done) { - break; - } - const chunkStr = decoder.decode(res.value).replaceAll('\r\n', '\n'); - for (let i = 0, len = chunkStr.length; i < len; i++) { - const char = chunkStr[i]; - if (char === '\n') { - yield buf; - buf = ''; - } else { - buf += char; - } - } - } - - if (buf) { - yield buf; - } -} - const getReadableStream = (file: string | FileHandle): ReadableStream => { if (typeof file === 'string') { - return Readable.toWeb(fs.createReadStream(file /* { encoding: 'utf-8' } */)); + return Readable.toWeb(fs.createReadStream(file/* , { encoding: 'utf-8' } */)); } return file.readableWebStream(); }; - // TODO: use FileHandle.readLine() -export const readFileByLine: ((file: string | FileHandle) => AsyncIterable) = enableTextLineStream - ? (file: string | FileHandle) => getReadableStream(file).pipeThrough(new TextDecoderStream()).pipeThrough(new TextLineStream()) - : (file: string | FileHandle) => createTextLineAsyncIterableFromStreamSource(getReadableStream(file)); +export const readFileByLine: ((file: string | FileHandle) => AsyncIterable) = (file: string | FileHandle) => getReadableStream(file) + .pipeThrough(new TextDecoderStream()) + .pipeThrough(new TextLineStream()); const ensureResponseBody = (resp: Response) => { if (!resp.body) { @@ -60,9 +29,9 @@ const ensureResponseBody = (resp: Response) => { return resp.body; }; -export const createReadlineInterfaceFromResponse: ((resp: Response) => AsyncIterable) = enableTextLineStream - ? (resp) => ensureResponseBody(resp).pipeThrough(new TextDecoderStream()).pipeThrough(new TextLineStream()) - : (resp) => createTextLineAsyncIterableFromStreamSource(ensureResponseBody(resp)); +export const createReadlineInterfaceFromResponse: ((resp: Response) => AsyncIterable) = (resp) => ensureResponseBody(resp) + .pipeThrough(new TextDecoderStream()) + .pipeThrough(new TextLineStream()); export function fetchRemoteTextByLine(url: string | URL) { return fetchWithRetry(url, defaultRequestInit).then(createReadlineInterfaceFromResponse); diff --git a/Build/lib/get-gorhill-publicsuffix.ts b/Build/lib/get-gorhill-publicsuffix.ts index 090d1931..146937b6 100644 --- a/Build/lib/get-gorhill-publicsuffix.ts +++ b/Build/lib/get-gorhill-publicsuffix.ts @@ -4,7 +4,8 @@ import { createMemoizedPromise } from './memo-promise'; import { getPublicSuffixListTextPromise } from './download-publicsuffixlist'; import { fileURLToPath } from 'url'; -const customFetch = async (url: string | URL) => { +// TODO: node undfici fetch doesn't support file URL reading yet +const customFetch = async (url: URL) => { const filePath = fileURLToPath(url); const file = await fsp.readFile(filePath); return new Blob([file]) as any; diff --git a/package.json b/package.json index e489aff6..57033b1a 100644 --- a/package.json +++ b/package.json @@ -9,10 +9,11 @@ }, "type": "commonjs", "scripts": { - "node": "SWCRC=true ENABLE_TEXT_LINE_STREAM=true node -r @swc-node/register", - "dexnode": "SWCRC=true ENABLE_TEXT_LINE_STREAM=true dexnode -r @swc-node/register", + "node": "SWCRC=true node -r @swc-node/register", + "dexnode": "SWCRC=true dexnode -r @swc-node/register", "build": "pnpm run node ./Build/index.ts", "build-profile": "pnpm run dexnode -r @swc-node/register ./Build/index.ts", + "build-webstream": "ENABLE_EXPERIMENTAL_WEBSTREAMS=true pnpm run node ./Build/index.ts", "lint": "eslint --format=sukka ." }, "author": "",