diff --git a/Build/build-apple-cdn.ts b/Build/build-apple-cdn.ts index 856b13dc..130def55 100644 --- a/Build/build-apple-cdn.ts +++ b/Build/build-apple-cdn.ts @@ -1,18 +1,16 @@ -import { parseFelixDnsmasq } from './lib/parse-dnsmasq'; +import { parseFelixDnsmasqFromResp } from './lib/parse-dnsmasq'; import { task } from './trace'; import { SHARED_DESCRIPTION } from './lib/constants'; import { createMemoizedPromise } from './lib/memo-promise'; -import { TTL, deserializeArray, fsFetchCache, serializeArray, createCacheKey } from './lib/cache-filesystem'; +import { deserializeArray, fsFetchCache, serializeArray, getFileContentHash } from './lib/cache-filesystem'; import { DomainsetOutput } from './lib/create-file'; -const cacheKey = createCacheKey(__filename); - const url = 'https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/apple.china.conf'; -export const getAppleCdnDomainsPromise = createMemoizedPromise(() => fsFetchCache.apply( - cacheKey(url), - () => parseFelixDnsmasq(url), +export const getAppleCdnDomainsPromise = createMemoizedPromise(() => fsFetchCache.applyWithHttp304( + url, + getFileContentHash(__filename), + parseFelixDnsmasqFromResp, { - ttl: TTL.THREE_DAYS(), serializer: serializeArray, deserializer: deserializeArray } diff --git a/Build/build-reject-ip-list.ts b/Build/build-reject-ip-list.ts index e05ef242..069b5df8 100644 --- a/Build/build-reject-ip-list.ts +++ b/Build/build-reject-ip-list.ts @@ -1,10 +1,10 @@ // @ts-check import path from 'node:path'; -import { fetchRemoteTextByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line'; +import { createReadlineInterfaceFromResponse, readFileIntoProcessedArray } from './lib/fetch-text-by-line'; import { task } from './trace'; import { SHARED_DESCRIPTION } from './lib/constants'; import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip'; -import { TTL, fsFetchCache, createCacheKey } from './lib/cache-filesystem'; +import { TTL, fsFetchCache, createCacheKey, getFileContentHash } from './lib/cache-filesystem'; import { fetchAssets } from './lib/fetch-assets'; import { processLine } from './lib/process-line'; import { RulesetOutput } from './lib/create-file'; @@ -14,12 +14,14 @@ const cacheKey = createCacheKey(__filename); const BOGUS_NXDOMAIN_URL = 'https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf'; -const getBogusNxDomainIPsPromise = fsFetchCache.apply<[ipv4: string[], ipv6: string[]]>( - cacheKey(BOGUS_NXDOMAIN_URL), - async () => { +const getBogusNxDomainIPsPromise = fsFetchCache.applyWithHttp304( + BOGUS_NXDOMAIN_URL, + getFileContentHash(__filename), + async (resp) => { const ipv4: string[] = []; const ipv6: string[] = []; - for await (const line of await fetchRemoteTextByLine(BOGUS_NXDOMAIN_URL)) { + + for await (const line of createReadlineInterfaceFromResponse(resp)) { if (line.startsWith('bogus-nxdomain=')) { const ip = line.slice(15).trim(); if (isProbablyIpv4(ip)) { @@ -32,7 +34,6 @@ const getBogusNxDomainIPsPromise = fsFetchCache.apply<[ipv4: string[], ipv6: str return [ipv4, ipv6] as const; }, { - ttl: TTL.ONE_WEEK(), serializer: JSON.stringify, deserializer: JSON.parse } diff --git a/Build/build-speedtest-domainset.ts b/Build/build-speedtest-domainset.ts index 5646c8e7..f8235fe6 100644 --- a/Build/build-speedtest-domainset.ts +++ b/Build/build-speedtest-domainset.ts @@ -139,18 +139,16 @@ const PREDEFINE_DOMAINS = [ const s = new Sema(2); const cacheKey = createCacheKey(__filename); -const latestTopUserAgentsPromise = fsFetchCache.apply( +const latestTopUserAgentsPromise = fsFetchCache.applyWithHttp304( + 'https://cdn.jsdelivr.net/npm/top-user-agents@latest/src/desktop.json', cacheKey('https://cdn.jsdelivr.net/npm/top-user-agents@latest/src/desktop.json'), - () => fetchWithRetry( - 'https://cdn.jsdelivr.net/npm/top-user-agents@latest/src/desktop.json', - { signal: AbortSignal.timeout(1000 * 60) } - ) - .then(res => res.json() as Promise) - .then((userAgents) => userAgents.filter(ua => ua.startsWith('Mozilla/5.0 '))), + async (res) => { + const userAgents = await (res.json() as Promise); + return userAgents.filter(ua => ua.startsWith('Mozilla/5.0 ')); + }, { serializer: serializeArray, - deserializer: deserializeArray, - ttl: TTL.THREE_DAYS() + deserializer: deserializeArray } ); diff --git a/Build/lib/cache-filesystem.ts b/Build/lib/cache-filesystem.ts index b5c6df2a..f91bc173 100644 --- a/Build/lib/cache-filesystem.ts +++ b/Build/lib/cache-filesystem.ts @@ -4,10 +4,11 @@ import os from 'node:os'; import path from 'node:path'; import { mkdirSync } from 'node:fs'; import picocolors from 'picocolors'; -import { fastStringArrayJoin, identity } from './misc'; +import { fastStringArrayJoin, identity, mergeHeaders } from './misc'; import { performance } from 'node:perf_hooks'; import fs from 'node:fs'; import { stringHash } from './string-hash'; +import { defaultRequestInit, fetchWithRetry } from './fetch-retry'; const enum CacheStatus { Hit = 'hit', @@ -44,6 +45,7 @@ const ONE_HOUR = 60 * 60 * 1000; const ONE_DAY = 24 * ONE_HOUR; // Add some randomness to the cache ttl to avoid thundering herd export const TTL = { + useHttp304: Symbol('useHttp304'), humanReadable(ttl: number) { if (ttl >= ONE_DAY) { return `${Math.round(ttl / 24 / 60 / 60 / 1000)}d`; @@ -56,6 +58,7 @@ export const TTL = { THREE_HOURS: () => randomInt(1, 3) * ONE_HOUR, TWLVE_HOURS: () => randomInt(8, 12) * ONE_HOUR, ONE_DAY: () => randomInt(23, 25) * ONE_HOUR, + ONE_WEEK_STATIC: ONE_DAY * 7, THREE_DAYS: () => randomInt(1, 3) * ONE_DAY, ONE_WEEK: () => randomInt(4, 7) * ONE_DAY, TEN_DAYS: () => randomInt(7, 10) * ONE_DAY, @@ -204,6 +207,75 @@ export class Cache { return deserializer(cached); } + async applyWithHttp304( + url: string, + extraCacheKey: string, + fn: (resp: Response) => Promise, + opt: Omit, 'ttl' | 'incrementTtlWhenHit'>, + requestInit?: RequestInit + ) { + const { temporaryBypass } = opt; + + const ttl = TTL.ONE_WEEK_STATIC; + + if (temporaryBypass) { + return fn(await fetchWithRetry(url, requestInit ?? defaultRequestInit)); + } + + const baseKey = url + '$' + extraCacheKey; + const etagKey = baseKey + '$etag'; + const cachedKey = baseKey + '$cached'; + + const onMiss = (resp: Response) => { + console.log(picocolors.yellow('[cache] miss'), url, picocolors.gray(`ttl: ${TTL.humanReadable(ttl)}`)); + + const serializer = 'serializer' in opt ? opt.serializer : identity as any; + + const etag = resp.headers.get('etag'); + + if (!etag) { + console.log(picocolors.red('[cache] no etag'), picocolors.gray(url)); + return fn(resp); + } + const promise = fn(resp); + + return promise.then((value) => { + this.set(etagKey, etag, ttl); + this.set(cachedKey, serializer(value), ttl); + return value; + }); + }; + + const cached = this.get(cachedKey); + if (cached == null) { + return onMiss(await fetchWithRetry(url, requestInit ?? defaultRequestInit)); + } + + const etag = this.get(etagKey); + const resp = await fetchWithRetry( + url, + { + ...(requestInit ?? defaultRequestInit), + headers: (typeof etag === 'string' && etag.length > 0) + ? mergeHeaders( + (requestInit ?? defaultRequestInit).headers, + { 'If-None-Match': etag } + ) + : (requestInit ?? defaultRequestInit).headers + } + ); + + if (resp.status !== 304) { + return onMiss(resp); + } + + console.log(picocolors.green('[cache] http 304'), picocolors.gray(url)); + this.updateTtl(cachedKey, ttl); + + const deserializer = 'deserializer' in opt ? opt.deserializer : identity as any; + return deserializer(cached); + } + destroy() { this.db.close(); } @@ -222,7 +294,8 @@ export const deserializeSet = (str: string) => new Set(str.split(separator)); export const serializeArray = (arr: string[]) => fastStringArrayJoin(arr, separator); export const deserializeArray = (str: string) => str.split(separator); +export const getFileContentHash = (filename: string) => stringHash(fs.readFileSync(filename, 'utf-8')); export const createCacheKey = (filename: string) => { - const fileHash = stringHash(fs.readFileSync(filename, 'utf-8')); + const fileHash = getFileContentHash(filename); return (key: string) => key + '$' + fileHash + '$'; }; diff --git a/Build/lib/download-publicsuffixlist.ts b/Build/lib/download-publicsuffixlist.ts index 8a947155..08cfdeba 100644 --- a/Build/lib/download-publicsuffixlist.ts +++ b/Build/lib/download-publicsuffixlist.ts @@ -1,18 +1,14 @@ -import { TTL, deserializeArray, fsFetchCache, serializeArray, createCacheKey } from './cache-filesystem'; -import { defaultRequestInit, fetchWithRetry } from './fetch-retry'; +import { deserializeArray, fsFetchCache, getFileContentHash, serializeArray } from './cache-filesystem'; import { createMemoizedPromise } from './memo-promise'; -const cacheKey = createCacheKey(__filename); - -export const getPublicSuffixListTextPromise = createMemoizedPromise(() => fsFetchCache.apply( - cacheKey('https://publicsuffix.org/list/public_suffix_list.dat'), - () => fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat', defaultRequestInit) - .then(r => r.text()).then(text => text.split('\n')), +export const getPublicSuffixListTextPromise = createMemoizedPromise(() => fsFetchCache.applyWithHttp304( + 'https://publicsuffix.org/list/public_suffix_list.dat', + getFileContentHash(__filename), + (r) => r.text().then(text => text.split('\n')), { // https://github.com/publicsuffix/list/blob/master/.github/workflows/tld-update.yml // Though the action runs every 24 hours, the IANA list is updated every 7 days. // So a 3 day TTL should be enough. - ttl: TTL.THREE_DAYS(), serializer: serializeArray, deserializer: deserializeArray } diff --git a/Build/lib/fetch-retry.ts b/Build/lib/fetch-retry.ts index f68600fe..ccbc7526 100644 --- a/Build/lib/fetch-retry.ts +++ b/Build/lib/fetch-retry.ts @@ -89,7 +89,7 @@ function createFetchRetry($fetch: typeof fetch): FetchWithRetry { } throw new ResponseError(res); } else { - if (!res.ok && retryOpts.retryOnNon2xx) { + if ((!res.ok && res.status !== 304) && retryOpts.retryOnNon2xx) { throw new ResponseError(res); } return res; @@ -106,7 +106,7 @@ function createFetchRetry($fetch: typeof fetch): FetchWithRetry { return bail(err) as never; } - console.log(picocolors.gray('[fetch fail]'), url); + console.log(picocolors.gray('[fetch fail]'), url, err); throw err; } }, retryOpts); diff --git a/Build/lib/misc.ts b/Build/lib/misc.ts index 170bea79..b364064a 100644 --- a/Build/lib/misc.ts +++ b/Build/lib/misc.ts @@ -95,3 +95,30 @@ export function withBannerArray(title: string, description: string[] | readonly '################## EOF ##################' ]; }; + +export const mergeHeaders = (headersA: RequestInit['headers'] | undefined, headersB: RequestInit['headers']) => { + if (headersA == null) { + return headersB; + } + + if (Array.isArray(headersB)) { + throw new TypeError('Array headers is not supported'); + } + + const result = new Headers(headersA); + + if (headersB instanceof Headers) { + headersB.forEach((value, key) => { + result.set(key, value); + }); + return result; + } + + for (const key in headersB) { + if (Object.hasOwn(headersB, key)) { + result.set(key, (headersB as Record)[key]); + } + } + + return result; +}; diff --git a/Build/lib/parse-dnsmasq.ts b/Build/lib/parse-dnsmasq.ts index f3972027..4a8523bd 100644 --- a/Build/lib/parse-dnsmasq.ts +++ b/Build/lib/parse-dnsmasq.ts @@ -1,5 +1,6 @@ -import { fetchRemoteTextByLine } from './fetch-text-by-line'; +import { createReadlineInterfaceFromResponse } from './fetch-text-by-line'; import { parse as tldtsParse } from 'tldts'; +import { fetchWithRetry, defaultRequestInit } from './fetch-retry'; const isDomainLoose = (domain: string): boolean => { const { isIcann, isPrivate, isIp } = tldtsParse(domain); @@ -13,14 +14,20 @@ export const extractDomainsFromFelixDnsmasq = (line: string): string | null => { return null; }; -export const parseFelixDnsmasq = async (url: string | URL): Promise => { - const res: string[] = []; - for await (const line of await fetchRemoteTextByLine(url)) { +export const parseFelixDnsmasqFromResp = async (resp: Response): Promise => { + const results: string[] = []; + + for await (const line of createReadlineInterfaceFromResponse(resp)) { const domain = extractDomainsFromFelixDnsmasq(line); if (domain && isDomainLoose(domain)) { - res.push(domain); + results.push(domain); } } - return res; + return results; +}; + +export const parseFelixDnsmasq = async (url: string | URL): Promise => { + const resp = await fetchWithRetry(url, defaultRequestInit); + return parseFelixDnsmasqFromResp(resp); }; diff --git a/Build/lib/parse-filter.ts b/Build/lib/parse-filter.ts index 39f0514e..b97380e9 100644 --- a/Build/lib/parse-filter.ts +++ b/Build/lib/parse-filter.ts @@ -159,11 +159,7 @@ export async function processFilterRules( ttl: number | null = null, allowThirdParty = false ): Promise<{ white: string[], black: string[], foundDebugDomain: boolean }> { - const [white, black, warningMessages] = await parentSpan.traceChild(`process filter rules: ${filterRulesUrl}`).traceAsyncFn((span) => fsFetchCache.apply>( + const [white, black, warningMessages] = await parentSpan.traceChild(`process filter rules: ${filterRulesUrl}`).traceAsyncFn((span) => fsFetchCache.apply>( cacheKey(filterRulesUrl), async () => { const whitelistDomainSets = new Set();