mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-13 09:40:34 +08:00
Minor changes to fs memo implementation / Adapt fs memo
This commit is contained in:
parent
24b928dc32
commit
a8c9cc5ac5
@ -28,7 +28,6 @@ export interface CacheOptions<S = string> {
|
||||
|
||||
interface CacheApplyRawOption {
|
||||
ttl?: number | null,
|
||||
cacheName?: string,
|
||||
temporaryBypass?: boolean,
|
||||
incrementTtlWhenHit?: boolean
|
||||
}
|
||||
@ -187,45 +186,6 @@ export class Cache<S = string> {
|
||||
this.db.prepare(`DELETE FROM ${this.tableName} WHERE key = ?`).run(key);
|
||||
}
|
||||
|
||||
async apply<T>(
|
||||
key: string,
|
||||
fn: () => Promise<T>,
|
||||
opt: CacheApplyOption<T, S>
|
||||
): Promise<T> {
|
||||
const { ttl, temporaryBypass, incrementTtlWhenHit, cacheName } = opt;
|
||||
|
||||
if (temporaryBypass) {
|
||||
return fn();
|
||||
}
|
||||
if (ttl == null) {
|
||||
this.del(key);
|
||||
return fn();
|
||||
}
|
||||
|
||||
const cached = this.get(key);
|
||||
if (cached == null) {
|
||||
console.log(picocolors.yellow('[cache] miss'), picocolors.gray(cacheName || key), picocolors.gray(`ttl: ${TTL.humanReadable(ttl)}`));
|
||||
|
||||
const serializer = 'serializer' in opt ? opt.serializer : identity as any;
|
||||
|
||||
const promise = fn();
|
||||
|
||||
return promise.then((value) => {
|
||||
this.set(key, serializer(value), ttl);
|
||||
return value;
|
||||
});
|
||||
}
|
||||
|
||||
console.log(picocolors.green('[cache] hit'), picocolors.gray(cacheName || key));
|
||||
|
||||
if (incrementTtlWhenHit) {
|
||||
this.updateTtl(key, ttl);
|
||||
}
|
||||
|
||||
const deserializer = 'deserializer' in opt ? opt.deserializer : identity as any;
|
||||
return deserializer(cached);
|
||||
}
|
||||
|
||||
async applyWithHttp304<T>(
|
||||
url: string,
|
||||
extraCacheKey: string,
|
||||
|
||||
@ -3,7 +3,17 @@ import { Cache } from './cache-filesystem';
|
||||
import type { CacheApplyOption } from './cache-filesystem';
|
||||
import { isCI } from 'ci-info';
|
||||
|
||||
const fsMemoCache = new Cache({ cachePath: path.resolve(__dirname, '../../.cache') });
|
||||
import { Typeson, set, map, typedArrays } from 'typeson-registry';
|
||||
import picocolors from 'picocolors';
|
||||
import { identity } from './misc';
|
||||
|
||||
const typeson = new Typeson().register([
|
||||
typedArrays,
|
||||
set,
|
||||
map
|
||||
]);
|
||||
|
||||
const fsMemoCache = new Cache({ cachePath: path.resolve(__dirname, '../../.cache'), tableName: 'fs_memo_cache' });
|
||||
|
||||
const TTL = isCI
|
||||
// We run CI daily, so 1.5 days TTL is enough to persist the cache across runs
|
||||
@ -11,41 +21,64 @@ const TTL = isCI
|
||||
// We run locally less frequently, so we need to persist the cache for longer, 7 days
|
||||
: 7 * 86400 * 1000;
|
||||
|
||||
type JSONValue =
|
||||
type TypesonValue =
|
||||
| string
|
||||
| number
|
||||
| boolean
|
||||
| null
|
||||
| JSONObject
|
||||
| JSONArray;
|
||||
| Set<any>
|
||||
| Map<any, any>
|
||||
| TypesonObject
|
||||
| TypesonArray;
|
||||
|
||||
interface JSONObject {
|
||||
[key: string]: JSONValue
|
||||
interface TypesonObject {
|
||||
[key: string]: TypesonValue
|
||||
}
|
||||
|
||||
interface JSONArray extends Array<JSONValue> {}
|
||||
interface TypesonArray extends Array<TypesonValue> { }
|
||||
|
||||
export function cache<Args extends JSONValue[], T>(
|
||||
cb: (...args: Args) => Promise<T>,
|
||||
opt: Omit<CacheApplyOption<T, string>, 'ttl'>
|
||||
export type FsMemoCacheOptions<T> = CacheApplyOption<T, string> & {
|
||||
ttl?: undefined | never
|
||||
};
|
||||
|
||||
export function cache<Args extends TypesonValue[], T>(
|
||||
fn: (...args: Args) => Promise<T>,
|
||||
opt: FsMemoCacheOptions<T>
|
||||
): (...args: Args) => Promise<T> {
|
||||
// TODO if cb.toString() is long we should hash it
|
||||
const fixedKey = cb.toString();
|
||||
const fixedKey = fn.toString();
|
||||
|
||||
return async function cachedCb(...args: Args) {
|
||||
// Construct the complete cache key for this function invocation
|
||||
// TODO stringify is limited. For now we uses typescript to guard the args.
|
||||
const cacheKey = `${fixedKey}|${JSON.stringify(args)}`;
|
||||
const cacheName = cb.name || cacheKey;
|
||||
// typeson.stringify is still limited. For now we uses typescript to guard the args.
|
||||
const cacheKey = `${fixedKey}|${typeson.stringifySync(args)}`;
|
||||
const cacheName = fn.name || cacheKey;
|
||||
|
||||
return fsMemoCache.apply(
|
||||
cacheKey,
|
||||
cb,
|
||||
{
|
||||
cacheName,
|
||||
...opt,
|
||||
ttl: TTL
|
||||
} as CacheApplyOption<T, string>
|
||||
);
|
||||
const { temporaryBypass, incrementTtlWhenHit } = opt;
|
||||
|
||||
if (temporaryBypass) {
|
||||
return fn(...args);
|
||||
}
|
||||
|
||||
const cached = fsMemoCache.get(cacheKey);
|
||||
if (cached == null) {
|
||||
console.log(picocolors.yellow('[cache] miss'), picocolors.gray(cacheName || cacheKey));
|
||||
|
||||
const serializer = 'serializer' in opt ? opt.serializer : identity as any;
|
||||
|
||||
const value = await fn(...args);
|
||||
|
||||
fsMemoCache.set(cacheKey, serializer(value), TTL);
|
||||
return value;
|
||||
}
|
||||
|
||||
console.log(picocolors.green('[cache] hit'), picocolors.gray(cacheName || cacheKey));
|
||||
|
||||
if (incrementTtlWhenHit) {
|
||||
fsMemoCache.updateTtl(cacheKey, TTL);
|
||||
}
|
||||
|
||||
const deserializer = 'deserializer' in opt ? opt.deserializer : identity as any;
|
||||
return deserializer(cached);
|
||||
};
|
||||
}
|
||||
|
||||
@ -8,9 +8,8 @@ import { PHISHING_DOMAIN_LISTS_EXTRA, PHISHING_HOSTS_EXTRA } from '../constants/
|
||||
import { loosTldOptWithPrivateDomains } from '../constants/loose-tldts-opt';
|
||||
import picocolors from 'picocolors';
|
||||
import createKeywordFilter from './aho-corasick';
|
||||
import { createCacheKey, deserializeArray, fsFetchCache, serializeArray } from './cache-filesystem';
|
||||
import { fastStringArrayJoin } from './misc';
|
||||
import { stringHash } from './string-hash';
|
||||
import { createCacheKey, deserializeArray, serializeArray } from './cache-filesystem';
|
||||
import { cache } from './fs-memo';
|
||||
|
||||
const BLACK_TLD = new Set([
|
||||
'accountant', 'art', 'autos',
|
||||
@ -102,32 +101,7 @@ const lowKeywords = createKeywordFilter([
|
||||
|
||||
const cacheKey = createCacheKey(__filename);
|
||||
|
||||
export function getPhishingDomains(parentSpan: Span) {
|
||||
return parentSpan.traceChild('get phishing domains').traceAsyncFn(async (span) => {
|
||||
const domainArr = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => {
|
||||
const domainArr: string[] = [];
|
||||
|
||||
(await Promise.all(PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(curSpan, ...entry, cacheKey))))
|
||||
.forEach(appendArrayInPlaceCurried(domainArr));
|
||||
(await Promise.all(PHISHING_HOSTS_EXTRA.map(entry => processHosts(curSpan, ...entry, cacheKey))))
|
||||
.forEach(appendArrayInPlaceCurried(domainArr));
|
||||
|
||||
return domainArr;
|
||||
});
|
||||
|
||||
const cacheHash = span.traceChildSync('get hash', () => stringHash(fastStringArrayJoin(domainArr, '|')));
|
||||
|
||||
return span.traceChildAsync(
|
||||
'process phishing domain set',
|
||||
() => processPhihsingDomains(domainArr, cacheHash)
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
async function processPhihsingDomains(domainArr: string[], cacheHash = '') {
|
||||
return fsFetchCache.apply(
|
||||
cacheKey('processPhihsingDomains|' + cacheHash),
|
||||
() => {
|
||||
const processPhihsingDomains = cache(function processPhihsingDomains(domainArr: string[]): Promise<string[]> {
|
||||
const domainCountMap: Record<string, number> = {};
|
||||
const domainScoreMap: Record<string, number> = {};
|
||||
|
||||
@ -189,14 +163,29 @@ async function processPhihsingDomains(domainArr: string[], cacheHash = '') {
|
||||
}
|
||||
|
||||
return Promise.resolve(domainArr);
|
||||
},
|
||||
{
|
||||
ttl: 2 * 86400 * 1000,
|
||||
}, {
|
||||
serializer: serializeArray,
|
||||
deserializer: deserializeArray,
|
||||
incrementTtlWhenHit: true
|
||||
}
|
||||
deserializer: deserializeArray
|
||||
});
|
||||
|
||||
export function getPhishingDomains(parentSpan: Span) {
|
||||
return parentSpan.traceChild('get phishing domains').traceAsyncFn(async (span) => {
|
||||
const domainArr = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => {
|
||||
const domainArr: string[] = [];
|
||||
|
||||
(await Promise.all(PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(curSpan, ...entry, cacheKey))))
|
||||
.forEach(appendArrayInPlaceCurried(domainArr));
|
||||
(await Promise.all(PHISHING_HOSTS_EXTRA.map(entry => processHosts(curSpan, ...entry, cacheKey))))
|
||||
.forEach(appendArrayInPlaceCurried(domainArr));
|
||||
|
||||
return domainArr;
|
||||
});
|
||||
|
||||
return span.traceChildAsync(
|
||||
'process phishing domain set',
|
||||
() => processPhihsingDomains(domainArr)
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
export function calcDomainAbuseScore(subdomain: string, fullDomain: string = subdomain) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user