From f761546a05495516352fb843d002d6503f9264da Mon Sep 17 00:00:00 2001 From: SukkaW Date: Sun, 4 Aug 2024 23:13:23 +0800 Subject: [PATCH] Chore: add cache key to fs cache --- Build/build-apple-cdn.ts | 6 ++-- Build/build-reject-ip-list.ts | 17 ++++++--- Build/build-speedtest-domainset.ts | 7 ++-- Build/lib/cache-filesystem.ts | 7 ++++ Build/lib/download-publicsuffixlist.ts | 6 ++-- Build/lib/parse-filter.ts | 10 +++--- Build/lib/string-hash.ts | 48 ++++++++++++++++++++++++++ 7 files changed, 85 insertions(+), 16 deletions(-) create mode 100644 Build/lib/string-hash.ts diff --git a/Build/build-apple-cdn.ts b/Build/build-apple-cdn.ts index 7d0b688a..4302634e 100644 --- a/Build/build-apple-cdn.ts +++ b/Build/build-apple-cdn.ts @@ -5,10 +5,12 @@ import { parseFelixDnsmasq } from './lib/parse-dnsmasq'; import { task } from './trace'; import { SHARED_DESCRIPTION } from './lib/constants'; import { createMemoizedPromise } from './lib/memo-promise'; -import { TTL, deserializeArray, fsFetchCache, serializeArray } from './lib/cache-filesystem'; +import { TTL, deserializeArray, fsFetchCache, serializeArray, createCacheKey } from './lib/cache-filesystem'; + +const cacheKey = createCacheKey(__filename); export const getAppleCdnDomainsPromise = createMemoizedPromise(() => fsFetchCache.apply( - 'https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/apple.china.conf', + cacheKey('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/apple.china.conf'), () => parseFelixDnsmasq('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/apple.china.conf'), { ttl: TTL.THREE_DAYS(), diff --git a/Build/build-reject-ip-list.ts b/Build/build-reject-ip-list.ts index f3397fc7..ba25c9f3 100644 --- a/Build/build-reject-ip-list.ts +++ b/Build/build-reject-ip-list.ts @@ -5,15 +5,17 @@ import { fetchRemoteTextByLine, readFileIntoProcessedArray } from './lib/fetch-t import { task } from './trace'; import { SHARED_DESCRIPTION } from './lib/constants'; import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip'; -import { TTL, deserializeArray, fsFetchCache, serializeArray } from './lib/cache-filesystem'; +import { TTL, deserializeArray, fsFetchCache, serializeArray, createCacheKey } from './lib/cache-filesystem'; import { fetchAssets } from './lib/fetch-assets'; import { processLine } from './lib/process-line'; import { appendArrayInPlace } from './lib/append-array-in-place'; +const cacheKey = createCacheKey(__filename); + const BOGUS_NXDOMAIN_URL = 'https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf'; const getBogusNxDomainIPsPromise = fsFetchCache.apply( - BOGUS_NXDOMAIN_URL, + cacheKey(BOGUS_NXDOMAIN_URL), async () => { const result: string[] = []; for await (const line of await fetchRemoteTextByLine(BOGUS_NXDOMAIN_URL)) { @@ -43,7 +45,7 @@ const BOTNET_FILTER_MIRROR_URL = [ ]; const getBotNetFilterIPsPromise = fsFetchCache.apply( - BOTNET_FILTER_URL, + cacheKey(BOTNET_FILTER_URL), async () => { const text = await fetchAssets(BOTNET_FILTER_URL, BOTNET_FILTER_MIRROR_URL); return text.split('\n').reduce((acc, cur) => { @@ -70,8 +72,13 @@ const localRejectIPSourcesPromise = readFileIntoProcessedArray(path.resolve(__di export const buildRejectIPList = task(require.main === module, __filename)(async (span) => { const result = await localRejectIPSourcesPromise; - const bogusNxDomainIPs = await span.traceChildPromise('get bogus nxdomain ips', getBogusNxDomainIPsPromise); - const botNetIPs = await span.traceChildPromise('get botnet ips', getBotNetFilterIPsPromise); + const results = await Promise.all([ + span.traceChildPromise('get bogus nxdomain ips', getBogusNxDomainIPsPromise), + span.traceChildPromise('get botnet ips', getBotNetFilterIPsPromise) + ]); + + const bogusNxDomainIPs = results[0]; + const botNetIPs = results[1]; appendArrayInPlace(result, bogusNxDomainIPs); appendArrayInPlace(result, botNetIPs); diff --git a/Build/build-speedtest-domainset.ts b/Build/build-speedtest-domainset.ts index 7481c277..ffe502ae 100644 --- a/Build/build-speedtest-domainset.ts +++ b/Build/build-speedtest-domainset.ts @@ -9,14 +9,15 @@ import { task } from './trace'; import { fetchWithRetry } from './lib/fetch-retry'; import { SHARED_DESCRIPTION } from './lib/constants'; import { readFileIntoProcessedArray } from './lib/fetch-text-by-line'; -import { TTL, deserializeArray, fsFetchCache, serializeArray } from './lib/cache-filesystem'; +import { TTL, deserializeArray, fsFetchCache, serializeArray, createCacheKey } from './lib/cache-filesystem'; import { createTrie } from './lib/trie'; const s = new Sema(2); +const cacheKey = createCacheKey(__filename); const latestTopUserAgentsPromise = fsFetchCache.apply( - 'https://cdn.jsdelivr.net/npm/top-user-agents@latest/src/desktop.json', + cacheKey('https://cdn.jsdelivr.net/npm/top-user-agents@latest/src/desktop.json'), () => fetchWithRetry( 'https://cdn.jsdelivr.net/npm/top-user-agents@latest/src/desktop.json', { signal: AbortSignal.timeout(1000 * 60) } @@ -39,7 +40,7 @@ const querySpeedtestApi = async (keyword: string): Promise> const randomUserAgent = topUserAgents[Math.floor(Math.random() * topUserAgents.length)]; return await fsFetchCache.apply( - url, + cacheKey(url), () => s.acquire().then(() => fetchWithRetry(url, { headers: { dnt: '1', diff --git a/Build/lib/cache-filesystem.ts b/Build/lib/cache-filesystem.ts index 85a8b895..e39cc2be 100644 --- a/Build/lib/cache-filesystem.ts +++ b/Build/lib/cache-filesystem.ts @@ -6,6 +6,8 @@ import { mkdirSync } from 'fs'; import picocolors from 'picocolors'; import { fastStringArrayJoin } from './misc'; import { performance } from 'perf_hooks'; +import fs from 'fs'; +import { stringHash } from './string-hash'; const identity = (x: any) => x; @@ -213,3 +215,8 @@ export const serializeSet = (set: Set) => fastStringArrayJoin(Array.from export const deserializeSet = (str: string) => new Set(str.split(separator)); export const serializeArray = (arr: string[]) => fastStringArrayJoin(arr, separator); export const deserializeArray = (str: string) => str.split(separator); + +export const createCacheKey = (filename: string) => { + const fileHash = stringHash(fs.readFileSync(filename, 'utf-8')); + return (key: string) => key + '$' + fileHash; +}; diff --git a/Build/lib/download-publicsuffixlist.ts b/Build/lib/download-publicsuffixlist.ts index caa172a3..8a947155 100644 --- a/Build/lib/download-publicsuffixlist.ts +++ b/Build/lib/download-publicsuffixlist.ts @@ -1,9 +1,11 @@ -import { TTL, deserializeArray, fsFetchCache, serializeArray } from './cache-filesystem'; +import { TTL, deserializeArray, fsFetchCache, serializeArray, createCacheKey } from './cache-filesystem'; import { defaultRequestInit, fetchWithRetry } from './fetch-retry'; import { createMemoizedPromise } from './memo-promise'; +const cacheKey = createCacheKey(__filename); + export const getPublicSuffixListTextPromise = createMemoizedPromise(() => fsFetchCache.apply( - 'https://publicsuffix.org/list/public_suffix_list.dat array', + cacheKey('https://publicsuffix.org/list/public_suffix_list.dat'), () => fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat', defaultRequestInit) .then(r => r.text()).then(text => text.split('\n')), { diff --git a/Build/lib/parse-filter.ts b/Build/lib/parse-filter.ts index 50cfbd76..c095758f 100644 --- a/Build/lib/parse-filter.ts +++ b/Build/lib/parse-filter.ts @@ -7,7 +7,7 @@ import tldts from 'tldts-experimental'; import picocolors from 'picocolors'; import { normalizeDomain } from './normalize-domain'; import { fetchAssets } from './fetch-assets'; -import { deserializeArray, fsFetchCache, serializeArray } from './cache-filesystem'; +import { deserializeArray, fsFetchCache, serializeArray, createCacheKey } from './cache-filesystem'; import type { Span } from '../trace'; import createKeywordFilter from './aho-corasick'; import { looseTldtsOpt } from '../constants/loose-tldts-opt'; @@ -31,9 +31,11 @@ const domainListLineCb = (l: string, set: string[], includeAllSubDomain: boolean set.push(includeAllSubDomain ? `.${line}` : line); }; +const cacheKey = createCacheKey(__filename); + export function processDomainLists(span: Span, domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, ttl: number | null = null) { return span.traceChild(`process domainlist: ${domainListsUrl}`).traceAsyncFn((childSpan) => fsFetchCache.apply( - domainListsUrl, + cacheKey(domainListsUrl), async () => { const domainSets: string[] = []; @@ -88,7 +90,7 @@ const hostsLineCb = (l: string, set: string[], includeAllSubDomain: boolean, met export function processHosts(span: Span, hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, ttl: number | null = null) { return span.traceChild(`processhosts: ${hostsUrl}`).traceAsyncFn((childSpan) => fsFetchCache.apply( - hostsUrl, + cacheKey(hostsUrl), async () => { const domainSets: string[] = []; @@ -140,7 +142,7 @@ export async function processFilterRules( black: string[], warningMessages: string[] ]>>( - filterRulesUrl, + cacheKey(filterRulesUrl), async () => { const whitelistDomainSets = new Set(); const blacklistDomainSets = new Set(); diff --git a/Build/lib/string-hash.ts b/Build/lib/string-hash.ts new file mode 100644 index 00000000..f5029e6e --- /dev/null +++ b/Build/lib/string-hash.ts @@ -0,0 +1,48 @@ +/** + * FNV-1a Hash implementation + * @author Travis Webb (tjwebb) + * + * Ported from https://github.com/tjwebb/fnv-plus/blob/master/index.js + * + * Simplified, optimized and add modified for 52 bit, which provides a larger hash space + * and still making use of Javascript's 53-bit integer space. + */ +export const fnv1a52 = (str: string) => { + const len = str.length; + let i = 0, + t0 = 0, + v0 = 0x2325, + t1 = 0, + v1 = 0x8422, + t2 = 0, + v2 = 0x9CE4, + t3 = 0, + v3 = 0xCBF2; + + while (i < len) { + v0 ^= str.charCodeAt(i++); + t0 = v0 * 435; + t1 = v1 * 435; + t2 = v2 * 435; + t3 = v3 * 435; + t2 += v0 << 8; + t3 += v1 << 8; + t1 += t0 >>> 16; + v0 = t0 & 65535; + t2 += t1 >>> 16; + v1 = t1 & 65535; + v3 = (t3 + (t2 >>> 16)) & 65535; + v2 = t2 & 65535; + } + + return ( + (v3 & 15) * 281_474_976_710_656 + + v2 * 4_294_967_296 + + v1 * 65536 + + (v0 ^ (v3 >> 4)) + ); +}; + +export const stringHash = (payload: string) => { + return fnv1a52(payload).toString(36) + payload.length.toString(36); +};