diff --git a/Build/lib/fs-memo.ts b/Build/lib/fs-memo.ts index ed7a43fe..efcd1b1e 100644 --- a/Build/lib/fs-memo.ts +++ b/Build/lib/fs-memo.ts @@ -1,123 +1,67 @@ import path from 'node:path'; -import { Cache } from './cache-filesystem'; -import type { CacheApplyOption } from './cache-filesystem'; import { isCI } from 'ci-info'; -import { xxhash64 } from 'hash-wasm'; - import picocolors from 'picocolors'; -import { fastStringArrayJoin } from 'foxts/fast-string-array-join'; -import { identity } from 'foxts/identity'; +import { Cache } from './cache-filesystem'; +import { createMemoize } from 'foxts/serialized-memo'; +import type { MemoizeStorageProvider } from 'foxts/serialized-memo'; const fsMemoCache = new Cache({ cachePath: path.resolve(__dirname, '../../.cache'), tableName: 'fs_memo_cache' }); +const fsMemoCacheProvider: MemoizeStorageProvider = { + has(key) { + return fsMemoCache.get(key) !== null; + }, + delete() { + // noop + }, + get(key) { + return fsMemoCache.get(key) ?? undefined; + }, + set(key, value, ttl) { + fsMemoCache.set(key, value, ttl); + }, + updateTtl(key, ttl) { + fsMemoCache.updateTtl(key, ttl); + } +}; + const TTL = isCI // We run CI daily, so 1.5 days TTL is enough to persist the cache across runs ? 1.5 * 86400 * 1000 // We run locally less frequently, so we need to persist the cache for longer, 7 days : 7 * 86400 * 1000; -type TypedArray = Int8Array | Uint8Array | Uint8ClampedArray | Int16Array | Uint16Array | Int32Array | Uint32Array | Float32Array | Float64Array | BigInt64Array | BigUint64Array; - -// https://github.com/Rich-Harris/devalue/blob/f3fd2aa93d79f21746555671f955a897335edb1b/src/stringify.js#L77 -type Devalue = - | number - | string - | boolean - | bigint - | Date - | RegExp - | Set - | Devalue[] - | null - | undefined - | Map - | DevalueObject - | TypedArray - | ArrayBuffer; - -// Has to use an interface to avoid circular reference -interface DevalueObject { - [key: string]: Devalue -} - -export type FsMemoCacheOptions = CacheApplyOption & { - ttl?: undefined | never -}; - -function createCache(onlyUseCachedIfFail: boolean) { - return function cache( - fn: (...args: Args) => Promise, - opt: FsMemoCacheOptions - ): (...args: Args) => Promise { - if (opt.temporaryBypass) { - return fn; +export const cache = createMemoize(fsMemoCacheProvider, { + defaultTtl: TTL, + onCacheMiss(key, { humanReadableName, isUseCachedIfFail }) { + const cacheName = picocolors.gray(humanReadableName); + if (isUseCachedIfFail) { + console.log(picocolors.red('[fail] and no cache, throwing'), cacheName); + } else { + console.log(picocolors.yellow('[cache] miss'), cacheName); } + }, + onCacheUpdate(key, { humanReadableName, isUseCachedIfFail }) { + const cacheName = picocolors.gray(humanReadableName); + if (isUseCachedIfFail) { + console.log(picocolors.gray('[cache] update'), cacheName); + } + }, + onCacheHit(key, { humanReadableName, isUseCachedIfFail }) { + const cacheName = picocolors.gray(humanReadableName); + if (isUseCachedIfFail) { + console.log(picocolors.yellow('[fail] try cache'), cacheName); + } else { + console.log(picocolors.green('[cache] hit'), cacheName); + } + } +}); - const serializer = 'serializer' in opt ? opt.serializer : identity; - const deserializer = 'deserializer' in opt ? opt.deserializer : identity; +export const cachedOnlyFail = createMemoize(fsMemoCacheProvider, { + defaultTtl: TTL, + onlyUseCachedIfFail: true +}); - const fixedKey = fn.toString(); - - const fixedKeyHashPromise = xxhash64(fixedKey); - const devalueModulePromise = import('devalue'); - - return async function cachedCb(...args: Args) { - const devalueStringify = (await devalueModulePromise).stringify; - - // Construct the complete cache key for this function invocation - // typeson.stringify is still limited. For now we uses typescript to guard the args. - const cacheKey = fastStringArrayJoin( - await Promise.all([ - fixedKeyHashPromise, - xxhash64(devalueStringify(args)) - ]), - '|' - ); - - const cacheName = picocolors.gray(fn.name || fixedKey || cacheKey); - - const cached = fsMemoCache.get(cacheKey); - - if (onlyUseCachedIfFail) { - try { - const value = await fn(...args); - - console.log(picocolors.gray('[cache] update'), cacheName); - fsMemoCache.set(cacheKey, serializer(value), TTL); - - return value; - } catch (e) { - if (cached == null) { - console.log(picocolors.red('[fail] and no cache, throwing'), cacheName); - throw e; - } - - fsMemoCache.updateTtl(cacheKey, TTL); - - console.log(picocolors.yellow('[fail] try cache'), cacheName); - - return deserializer(cached); - } - } else { - if (cached == null) { - console.log(picocolors.yellow('[cache] miss'), cacheName); - - const value = await fn(...args); - - fsMemoCache.set(cacheKey, serializer(value), TTL); - return value; - } - - console.log(picocolors.green('[cache] hit'), cacheName); - - fsMemoCache.updateTtl(cacheKey, TTL); - - return deserializer(cached); - } - }; - }; -} - -export const cache = createCache(false); -export const cachedOnlyFail = createCache(true); +// export const cache = createCache(false); +// export const cachedOnlyFail = createCache(true); diff --git a/Build/lib/get-phishing-domains.ts b/Build/lib/get-phishing-domains.ts index c5eeb3ae..f0b61645 100644 --- a/Build/lib/get-phishing-domains.ts +++ b/Build/lib/get-phishing-domains.ts @@ -110,7 +110,7 @@ const lowKeywords = createKeywordFilter([ 'banking' ]); -const processPhihsingDomains = cache(function processPhihsingDomains(domainArr: string[]): Promise { +const processPhihsingDomains = cache(function processPhihsingDomains(domainArr: string[]): string[] { const domainCountMap = new Map(); const domainScoreMap: Record = {}; @@ -197,7 +197,7 @@ const processPhihsingDomains = cache(function processPhihsingDomains(domainArr: // console.log({ duplicateCount, domainArrLen: domainArr.length }); - return Promise.resolve(domainArr); + return domainArr; }, { serializer: serializeArray, deserializer: deserializeArray,