Feat: implement HTTP 304 with SQLite Cache (#42)

This commit is contained in:
Sukka 2024-10-09 09:25:25 +08:00 committed by GitHub
parent abf924c977
commit 07d3fdf05b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 144 additions and 48 deletions

View File

@ -1,18 +1,16 @@
import { parseFelixDnsmasq } from './lib/parse-dnsmasq';
import { parseFelixDnsmasqFromResp } from './lib/parse-dnsmasq';
import { task } from './trace';
import { SHARED_DESCRIPTION } from './lib/constants';
import { createMemoizedPromise } from './lib/memo-promise';
import { TTL, deserializeArray, fsFetchCache, serializeArray, createCacheKey } from './lib/cache-filesystem';
import { deserializeArray, fsFetchCache, serializeArray, getFileContentHash } from './lib/cache-filesystem';
import { DomainsetOutput } from './lib/create-file';
const cacheKey = createCacheKey(__filename);
const url = 'https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/apple.china.conf';
export const getAppleCdnDomainsPromise = createMemoizedPromise(() => fsFetchCache.apply(
cacheKey(url),
() => parseFelixDnsmasq(url),
export const getAppleCdnDomainsPromise = createMemoizedPromise(() => fsFetchCache.applyWithHttp304(
url,
getFileContentHash(__filename),
parseFelixDnsmasqFromResp,
{
ttl: TTL.THREE_DAYS(),
serializer: serializeArray,
deserializer: deserializeArray
}

View File

@ -1,10 +1,10 @@
// @ts-check
import path from 'node:path';
import { fetchRemoteTextByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
import { createReadlineInterfaceFromResponse, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
import { task } from './trace';
import { SHARED_DESCRIPTION } from './lib/constants';
import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip';
import { TTL, fsFetchCache, createCacheKey } from './lib/cache-filesystem';
import { TTL, fsFetchCache, createCacheKey, getFileContentHash } from './lib/cache-filesystem';
import { fetchAssets } from './lib/fetch-assets';
import { processLine } from './lib/process-line';
import { RulesetOutput } from './lib/create-file';
@ -14,12 +14,14 @@ const cacheKey = createCacheKey(__filename);
const BOGUS_NXDOMAIN_URL = 'https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf';
const getBogusNxDomainIPsPromise = fsFetchCache.apply<[ipv4: string[], ipv6: string[]]>(
cacheKey(BOGUS_NXDOMAIN_URL),
async () => {
const getBogusNxDomainIPsPromise = fsFetchCache.applyWithHttp304(
BOGUS_NXDOMAIN_URL,
getFileContentHash(__filename),
async (resp) => {
const ipv4: string[] = [];
const ipv6: string[] = [];
for await (const line of await fetchRemoteTextByLine(BOGUS_NXDOMAIN_URL)) {
for await (const line of createReadlineInterfaceFromResponse(resp)) {
if (line.startsWith('bogus-nxdomain=')) {
const ip = line.slice(15).trim();
if (isProbablyIpv4(ip)) {
@ -32,7 +34,6 @@ const getBogusNxDomainIPsPromise = fsFetchCache.apply<[ipv4: string[], ipv6: str
return [ipv4, ipv6] as const;
},
{
ttl: TTL.ONE_WEEK(),
serializer: JSON.stringify,
deserializer: JSON.parse
}

View File

@ -139,18 +139,16 @@ const PREDEFINE_DOMAINS = [
const s = new Sema(2);
const cacheKey = createCacheKey(__filename);
const latestTopUserAgentsPromise = fsFetchCache.apply(
const latestTopUserAgentsPromise = fsFetchCache.applyWithHttp304<string[]>(
'https://cdn.jsdelivr.net/npm/top-user-agents@latest/src/desktop.json',
cacheKey('https://cdn.jsdelivr.net/npm/top-user-agents@latest/src/desktop.json'),
() => fetchWithRetry(
'https://cdn.jsdelivr.net/npm/top-user-agents@latest/src/desktop.json',
{ signal: AbortSignal.timeout(1000 * 60) }
)
.then(res => res.json() as Promise<string[]>)
.then((userAgents) => userAgents.filter(ua => ua.startsWith('Mozilla/5.0 '))),
async (res) => {
const userAgents = await (res.json() as Promise<string[]>);
return userAgents.filter(ua => ua.startsWith('Mozilla/5.0 '));
},
{
serializer: serializeArray,
deserializer: deserializeArray,
ttl: TTL.THREE_DAYS()
deserializer: deserializeArray
}
);

View File

@ -4,10 +4,11 @@ import os from 'node:os';
import path from 'node:path';
import { mkdirSync } from 'node:fs';
import picocolors from 'picocolors';
import { fastStringArrayJoin, identity } from './misc';
import { fastStringArrayJoin, identity, mergeHeaders } from './misc';
import { performance } from 'node:perf_hooks';
import fs from 'node:fs';
import { stringHash } from './string-hash';
import { defaultRequestInit, fetchWithRetry } from './fetch-retry';
const enum CacheStatus {
Hit = 'hit',
@ -44,6 +45,7 @@ const ONE_HOUR = 60 * 60 * 1000;
const ONE_DAY = 24 * ONE_HOUR;
// Add some randomness to the cache ttl to avoid thundering herd
export const TTL = {
useHttp304: Symbol('useHttp304'),
humanReadable(ttl: number) {
if (ttl >= ONE_DAY) {
return `${Math.round(ttl / 24 / 60 / 60 / 1000)}d`;
@ -56,6 +58,7 @@ export const TTL = {
THREE_HOURS: () => randomInt(1, 3) * ONE_HOUR,
TWLVE_HOURS: () => randomInt(8, 12) * ONE_HOUR,
ONE_DAY: () => randomInt(23, 25) * ONE_HOUR,
ONE_WEEK_STATIC: ONE_DAY * 7,
THREE_DAYS: () => randomInt(1, 3) * ONE_DAY,
ONE_WEEK: () => randomInt(4, 7) * ONE_DAY,
TEN_DAYS: () => randomInt(7, 10) * ONE_DAY,
@ -204,6 +207,75 @@ export class Cache<S = string> {
return deserializer(cached);
}
async applyWithHttp304<T>(
url: string,
extraCacheKey: string,
fn: (resp: Response) => Promise<T>,
opt: Omit<CacheApplyOption<T, S>, 'ttl' | 'incrementTtlWhenHit'>,
requestInit?: RequestInit
) {
const { temporaryBypass } = opt;
const ttl = TTL.ONE_WEEK_STATIC;
if (temporaryBypass) {
return fn(await fetchWithRetry(url, requestInit ?? defaultRequestInit));
}
const baseKey = url + '$' + extraCacheKey;
const etagKey = baseKey + '$etag';
const cachedKey = baseKey + '$cached';
const onMiss = (resp: Response) => {
console.log(picocolors.yellow('[cache] miss'), url, picocolors.gray(`ttl: ${TTL.humanReadable(ttl)}`));
const serializer = 'serializer' in opt ? opt.serializer : identity as any;
const etag = resp.headers.get('etag');
if (!etag) {
console.log(picocolors.red('[cache] no etag'), picocolors.gray(url));
return fn(resp);
}
const promise = fn(resp);
return promise.then((value) => {
this.set(etagKey, etag, ttl);
this.set(cachedKey, serializer(value), ttl);
return value;
});
};
const cached = this.get(cachedKey);
if (cached == null) {
return onMiss(await fetchWithRetry(url, requestInit ?? defaultRequestInit));
}
const etag = this.get(etagKey);
const resp = await fetchWithRetry(
url,
{
...(requestInit ?? defaultRequestInit),
headers: (typeof etag === 'string' && etag.length > 0)
? mergeHeaders(
(requestInit ?? defaultRequestInit).headers,
{ 'If-None-Match': etag }
)
: (requestInit ?? defaultRequestInit).headers
}
);
if (resp.status !== 304) {
return onMiss(resp);
}
console.log(picocolors.green('[cache] http 304'), picocolors.gray(url));
this.updateTtl(cachedKey, ttl);
const deserializer = 'deserializer' in opt ? opt.deserializer : identity as any;
return deserializer(cached);
}
destroy() {
this.db.close();
}
@ -222,7 +294,8 @@ export const deserializeSet = (str: string) => new Set(str.split(separator));
export const serializeArray = (arr: string[]) => fastStringArrayJoin(arr, separator);
export const deserializeArray = (str: string) => str.split(separator);
export const getFileContentHash = (filename: string) => stringHash(fs.readFileSync(filename, 'utf-8'));
export const createCacheKey = (filename: string) => {
const fileHash = stringHash(fs.readFileSync(filename, 'utf-8'));
const fileHash = getFileContentHash(filename);
return (key: string) => key + '$' + fileHash + '$';
};

View File

@ -1,18 +1,14 @@
import { TTL, deserializeArray, fsFetchCache, serializeArray, createCacheKey } from './cache-filesystem';
import { defaultRequestInit, fetchWithRetry } from './fetch-retry';
import { deserializeArray, fsFetchCache, getFileContentHash, serializeArray } from './cache-filesystem';
import { createMemoizedPromise } from './memo-promise';
const cacheKey = createCacheKey(__filename);
export const getPublicSuffixListTextPromise = createMemoizedPromise(() => fsFetchCache.apply(
cacheKey('https://publicsuffix.org/list/public_suffix_list.dat'),
() => fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat', defaultRequestInit)
.then(r => r.text()).then(text => text.split('\n')),
export const getPublicSuffixListTextPromise = createMemoizedPromise(() => fsFetchCache.applyWithHttp304<string[]>(
'https://publicsuffix.org/list/public_suffix_list.dat',
getFileContentHash(__filename),
(r) => r.text().then(text => text.split('\n')),
{
// https://github.com/publicsuffix/list/blob/master/.github/workflows/tld-update.yml
// Though the action runs every 24 hours, the IANA list is updated every 7 days.
// So a 3 day TTL should be enough.
ttl: TTL.THREE_DAYS(),
serializer: serializeArray,
deserializer: deserializeArray
}

View File

@ -89,7 +89,7 @@ function createFetchRetry($fetch: typeof fetch): FetchWithRetry {
}
throw new ResponseError(res);
} else {
if (!res.ok && retryOpts.retryOnNon2xx) {
if ((!res.ok && res.status !== 304) && retryOpts.retryOnNon2xx) {
throw new ResponseError(res);
}
return res;
@ -106,7 +106,7 @@ function createFetchRetry($fetch: typeof fetch): FetchWithRetry {
return bail(err) as never;
}
console.log(picocolors.gray('[fetch fail]'), url);
console.log(picocolors.gray('[fetch fail]'), url, err);
throw err;
}
}, retryOpts);

View File

@ -95,3 +95,30 @@ export function withBannerArray(title: string, description: string[] | readonly
'################## EOF ##################'
];
};
export const mergeHeaders = (headersA: RequestInit['headers'] | undefined, headersB: RequestInit['headers']) => {
if (headersA == null) {
return headersB;
}
if (Array.isArray(headersB)) {
throw new TypeError('Array headers is not supported');
}
const result = new Headers(headersA);
if (headersB instanceof Headers) {
headersB.forEach((value, key) => {
result.set(key, value);
});
return result;
}
for (const key in headersB) {
if (Object.hasOwn(headersB, key)) {
result.set(key, (headersB as Record<string, string>)[key]);
}
}
return result;
};

View File

@ -1,5 +1,6 @@
import { fetchRemoteTextByLine } from './fetch-text-by-line';
import { createReadlineInterfaceFromResponse } from './fetch-text-by-line';
import { parse as tldtsParse } from 'tldts';
import { fetchWithRetry, defaultRequestInit } from './fetch-retry';
const isDomainLoose = (domain: string): boolean => {
const { isIcann, isPrivate, isIp } = tldtsParse(domain);
@ -13,14 +14,20 @@ export const extractDomainsFromFelixDnsmasq = (line: string): string | null => {
return null;
};
export const parseFelixDnsmasq = async (url: string | URL): Promise<string[]> => {
const res: string[] = [];
for await (const line of await fetchRemoteTextByLine(url)) {
export const parseFelixDnsmasqFromResp = async (resp: Response): Promise<string[]> => {
const results: string[] = [];
for await (const line of createReadlineInterfaceFromResponse(resp)) {
const domain = extractDomainsFromFelixDnsmasq(line);
if (domain && isDomainLoose(domain)) {
res.push(domain);
results.push(domain);
}
}
return res;
return results;
};
export const parseFelixDnsmasq = async (url: string | URL): Promise<string[]> => {
const resp = await fetchWithRetry(url, defaultRequestInit);
return parseFelixDnsmasqFromResp(resp);
};

View File

@ -159,11 +159,7 @@ export async function processFilterRules(
ttl: number | null = null,
allowThirdParty = false
): Promise<{ white: string[], black: string[], foundDebugDomain: boolean }> {
const [white, black, warningMessages] = await parentSpan.traceChild(`process filter rules: ${filterRulesUrl}`).traceAsyncFn((span) => fsFetchCache.apply<Readonly<[
white: string[],
black: string[],
warningMessages: string[]
]>>(
const [white, black, warningMessages] = await parentSpan.traceChild(`process filter rules: ${filterRulesUrl}`).traceAsyncFn((span) => fsFetchCache.apply<Readonly<[ white: string[], black: string[], warningMessages: string[] ]>>(
cacheKey(filterRulesUrl),
async () => {
const whitelistDomainSets = new Set<string>();