mirror of
https://github.com/SukkaW/Surge.git
synced 2026-01-29 01:51:52 +08:00
Feat: implement parallel fetch w/ HTTP 304 (#43)
This commit is contained in:
@@ -4,14 +4,11 @@ import { createReadlineInterfaceFromResponse, readFileIntoProcessedArray } from
|
|||||||
import { task } from './trace';
|
import { task } from './trace';
|
||||||
import { SHARED_DESCRIPTION } from './lib/constants';
|
import { SHARED_DESCRIPTION } from './lib/constants';
|
||||||
import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip';
|
import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip';
|
||||||
import { TTL, fsFetchCache, createCacheKey, getFileContentHash } from './lib/cache-filesystem';
|
import { fsFetchCache, getFileContentHash } from './lib/cache-filesystem';
|
||||||
import { fetchAssets } from './lib/fetch-assets';
|
|
||||||
import { processLine } from './lib/process-line';
|
import { processLine } from './lib/process-line';
|
||||||
import { RulesetOutput } from './lib/create-file';
|
import { RulesetOutput } from './lib/create-file';
|
||||||
import { SOURCE_DIR } from './constants/dir';
|
import { SOURCE_DIR } from './constants/dir';
|
||||||
|
|
||||||
const cacheKey = createCacheKey(__filename);
|
|
||||||
|
|
||||||
const BOGUS_NXDOMAIN_URL = 'https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf';
|
const BOGUS_NXDOMAIN_URL = 'https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf';
|
||||||
|
|
||||||
const getBogusNxDomainIPsPromise = fsFetchCache.applyWithHttp304(
|
const getBogusNxDomainIPsPromise = fsFetchCache.applyWithHttp304(
|
||||||
@@ -39,31 +36,31 @@ const getBogusNxDomainIPsPromise = fsFetchCache.applyWithHttp304(
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
const BOTNET_FILTER_URL = 'https://curbengh.github.io/botnet-filter/botnet-filter-dnscrypt-blocked-ips.txt';
|
const BOTNET_FILTER_URL = 'https://malware-filter.pages.dev/botnet-filter-dnscrypt-blocked-ips.txt';
|
||||||
const BOTNET_FILTER_MIRROR_URL = [
|
const BOTNET_FILTER_MIRROR_URL = [
|
||||||
'https://curbengh.github.io/malware-filter/botnet-filter-dnscrypt-blocked-ips.txt',
|
'https://botnet-filter.pages.dev/botnet-filter-dnscrypt-blocked-ips.txt',
|
||||||
'https://malware-filter.gitlab.io/malware-filter/botnet-filter-dnscrypt-blocked-ips.txt',
|
'https://malware-filter.gitlab.io/malware-filter/botnet-filter-dnscrypt-blocked-ips.txt',
|
||||||
'https://malware-filter.pages.dev/botnet-filter-dnscrypt-blocked-ips.txt'
|
'https://malware-filter.gitlab.io/botnet-filter/botnet-filter-dnscrypt-blocked-ips.txt'
|
||||||
|
// 'https://curbengh.github.io/botnet-filter/botnet-filter-dnscrypt-blocked-ips.txt',
|
||||||
|
// https://curbengh.github.io/malware-filter/botnet-filter-dnscrypt-blocked-ips.txt
|
||||||
];
|
];
|
||||||
|
|
||||||
const getBotNetFilterIPsPromise = fsFetchCache.apply<[ipv4: string[], ipv6: string[]]>(
|
const getBotNetFilterIPsPromise = fsFetchCache.applyWithHttp304AndMirrors<[ipv4: string[], ipv6: string[]]>(
|
||||||
cacheKey(BOTNET_FILTER_URL),
|
BOTNET_FILTER_URL,
|
||||||
async () => {
|
BOTNET_FILTER_MIRROR_URL,
|
||||||
const text = await fetchAssets(BOTNET_FILTER_URL, BOTNET_FILTER_MIRROR_URL);
|
getFileContentHash(__filename),
|
||||||
return text.split('\n').reduce<[ipv4: string[], ipv6: string[]]>((acc, cur) => {
|
(text) => text.split('\n').reduce<[ipv4: string[], ipv6: string[]]>((acc, cur) => {
|
||||||
const ip = processLine(cur);
|
const ip = processLine(cur);
|
||||||
if (ip) {
|
if (ip) {
|
||||||
if (isProbablyIpv4(ip)) {
|
if (isProbablyIpv4(ip)) {
|
||||||
acc[0].push(ip);
|
acc[0].push(ip);
|
||||||
} else if (isProbablyIpv6(ip)) {
|
} else if (isProbablyIpv6(ip)) {
|
||||||
acc[1].push(ip);
|
acc[1].push(ip);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return acc;
|
}
|
||||||
}, [[], []]);
|
return acc;
|
||||||
},
|
}, [[], []]),
|
||||||
{
|
{
|
||||||
ttl: TTL.TWLVE_HOURS(),
|
|
||||||
serializer: JSON.stringify,
|
serializer: JSON.stringify,
|
||||||
deserializer: JSON.parse
|
deserializer: JSON.parse
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -273,13 +273,13 @@ export const ADGUARD_FILTERS_EXTRA: AdGuardFilterSource[] = [
|
|||||||
'https://secure.fanboy.co.nz/fanboy-cookiemonster_ubo.txt'
|
'https://secure.fanboy.co.nz/fanboy-cookiemonster_ubo.txt'
|
||||||
],
|
],
|
||||||
TTL.TWLVE_HOURS()
|
TTL.TWLVE_HOURS()
|
||||||
],
|
|
||||||
// Bypass Paywall Cleaner
|
|
||||||
[
|
|
||||||
'https://gitflic.ru/project/magnolia1234/bypass-paywalls-clean-filters/blob/raw?file=bpc-paywall-filter.txt',
|
|
||||||
[],
|
|
||||||
TTL.ONE_DAY()
|
|
||||||
]
|
]
|
||||||
|
// Bypass Paywall Cleaner
|
||||||
|
// [
|
||||||
|
// 'https://gitflic.ru/project/magnolia1234/bypass-paywalls-clean-filters/blob/raw?file=bpc-paywall-filter.txt',
|
||||||
|
// [],
|
||||||
|
// TTL.ONE_DAY()
|
||||||
|
// ]
|
||||||
];
|
];
|
||||||
|
|
||||||
// In a hostile network like when an ad blocker is present, apps might be crashing, and these errors need to be
|
// In a hostile network like when an ad blocker is present, apps might be crashing, and these errors need to be
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import process from 'node:process';
|
import process from 'node:process';
|
||||||
import os from 'node:os';
|
import os from 'node:os';
|
||||||
|
import wtf from 'wtfnode';
|
||||||
|
|
||||||
import { downloadPreviousBuild } from './download-previous-build';
|
import { downloadPreviousBuild } from './download-previous-build';
|
||||||
import { buildCommon } from './build-common';
|
import { buildCommon } from './build-common';
|
||||||
@@ -121,6 +122,7 @@ process.on('unhandledRejection', (reason) => {
|
|||||||
printTraceResult(rootSpan.traceResult);
|
printTraceResult(rootSpan.traceResult);
|
||||||
|
|
||||||
// Finish the build to avoid leaking timer/fetch ref
|
// Finish the build to avoid leaking timer/fetch ref
|
||||||
|
wtf.dump();
|
||||||
process.exit(0);
|
process.exit(0);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.trace(e);
|
console.trace(e);
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import { performance } from 'node:perf_hooks';
|
|||||||
import fs from 'node:fs';
|
import fs from 'node:fs';
|
||||||
import { stringHash } from './string-hash';
|
import { stringHash } from './string-hash';
|
||||||
import { defaultRequestInit, fetchWithRetry } from './fetch-retry';
|
import { defaultRequestInit, fetchWithRetry } from './fetch-retry';
|
||||||
|
import { Custom304NotModifiedError, CustomAbortError, CustomNoETagFallbackError, fetchAssets, sleepWithAbort } from './fetch-assets';
|
||||||
|
|
||||||
const enum CacheStatus {
|
const enum CacheStatus {
|
||||||
Hit = 'hit',
|
Hit = 'hit',
|
||||||
@@ -211,14 +212,10 @@ export class Cache<S = string> {
|
|||||||
url: string,
|
url: string,
|
||||||
extraCacheKey: string,
|
extraCacheKey: string,
|
||||||
fn: (resp: Response) => Promise<T>,
|
fn: (resp: Response) => Promise<T>,
|
||||||
opt: Omit<CacheApplyOption<T, S>, 'ttl' | 'incrementTtlWhenHit'>,
|
opt: Omit<CacheApplyOption<T, S>, 'incrementTtlWhenHit'>,
|
||||||
requestInit?: RequestInit
|
requestInit?: RequestInit
|
||||||
) {
|
) {
|
||||||
const { temporaryBypass } = opt;
|
if (opt.temporaryBypass) {
|
||||||
|
|
||||||
const ttl = TTL.ONE_WEEK_STATIC;
|
|
||||||
|
|
||||||
if (temporaryBypass) {
|
|
||||||
return fn(await fetchWithRetry(url, requestInit ?? defaultRequestInit));
|
return fn(await fetchWithRetry(url, requestInit ?? defaultRequestInit));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -226,22 +223,34 @@ export class Cache<S = string> {
|
|||||||
const etagKey = baseKey + '$etag';
|
const etagKey = baseKey + '$etag';
|
||||||
const cachedKey = baseKey + '$cached';
|
const cachedKey = baseKey + '$cached';
|
||||||
|
|
||||||
const onMiss = (resp: Response) => {
|
const etag = this.get(etagKey);
|
||||||
console.log(picocolors.yellow('[cache] miss'), url, picocolors.gray(`ttl: ${TTL.humanReadable(ttl)}`));
|
|
||||||
|
|
||||||
|
const onMiss = (resp: Response) => {
|
||||||
const serializer = 'serializer' in opt ? opt.serializer : identity as any;
|
const serializer = 'serializer' in opt ? opt.serializer : identity as any;
|
||||||
|
|
||||||
const etag = resp.headers.get('etag');
|
|
||||||
|
|
||||||
if (!etag) {
|
|
||||||
console.log(picocolors.red('[cache] no etag'), picocolors.gray(url));
|
|
||||||
return fn(resp);
|
|
||||||
}
|
|
||||||
const promise = fn(resp);
|
const promise = fn(resp);
|
||||||
|
|
||||||
return promise.then((value) => {
|
return promise.then((value) => {
|
||||||
this.set(etagKey, etag, ttl);
|
if (resp.headers.has('ETag')) {
|
||||||
this.set(cachedKey, serializer(value), ttl);
|
let serverETag = resp.headers.get('ETag')!;
|
||||||
|
// FUCK someonewhocares.org
|
||||||
|
if (url.includes('someonewhocares.org')) {
|
||||||
|
serverETag = serverETag.replace('-gzip', '');
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(picocolors.yellow('[cache] miss'), url, { cachedETag: etag, serverETag });
|
||||||
|
|
||||||
|
this.set(etagKey, serverETag, TTL.ONE_WEEK_STATIC);
|
||||||
|
this.set(cachedKey, serializer(value), TTL.ONE_WEEK_STATIC);
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.del(etagKey);
|
||||||
|
console.log(picocolors.red('[cache] no etag'), picocolors.gray(url));
|
||||||
|
if (opt.ttl) {
|
||||||
|
this.set(cachedKey, serializer(value), opt.ttl);
|
||||||
|
}
|
||||||
|
|
||||||
return value;
|
return value;
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
@@ -251,7 +260,6 @@ export class Cache<S = string> {
|
|||||||
return onMiss(await fetchWithRetry(url, requestInit ?? defaultRequestInit));
|
return onMiss(await fetchWithRetry(url, requestInit ?? defaultRequestInit));
|
||||||
}
|
}
|
||||||
|
|
||||||
const etag = this.get(etagKey);
|
|
||||||
const resp = await fetchWithRetry(
|
const resp = await fetchWithRetry(
|
||||||
url,
|
url,
|
||||||
{
|
{
|
||||||
@@ -265,17 +273,154 @@ export class Cache<S = string> {
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
if (resp.status !== 304) {
|
// Only miss if previously a ETag was present and the server responded with a 304
|
||||||
|
if (resp.headers.has('ETag') && resp.status !== 304) {
|
||||||
return onMiss(resp);
|
return onMiss(resp);
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(picocolors.green('[cache] http 304'), picocolors.gray(url));
|
console.log(picocolors.green(`[cache] ${resp.status === 304 ? 'http 304' : 'cache hit'}`), picocolors.gray(url));
|
||||||
this.updateTtl(cachedKey, ttl);
|
this.updateTtl(cachedKey, TTL.ONE_WEEK_STATIC);
|
||||||
|
|
||||||
const deserializer = 'deserializer' in opt ? opt.deserializer : identity as any;
|
const deserializer = 'deserializer' in opt ? opt.deserializer : identity as any;
|
||||||
return deserializer(cached);
|
return deserializer(cached);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async applyWithHttp304AndMirrors<T>(
|
||||||
|
primaryUrl: string,
|
||||||
|
mirrorUrls: string[],
|
||||||
|
extraCacheKey: string,
|
||||||
|
fn: (resp: string) => Promise<T> | T,
|
||||||
|
opt: Omit<CacheApplyOption<T, S>, 'incrementTtlWhenHit'>
|
||||||
|
): Promise<T> {
|
||||||
|
if (opt.temporaryBypass) {
|
||||||
|
return fn(await fetchAssets(primaryUrl, mirrorUrls));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mirrorUrls.length === 0) {
|
||||||
|
return this.applyWithHttp304(primaryUrl, extraCacheKey, async (resp) => fn(await resp.text()), opt);
|
||||||
|
}
|
||||||
|
|
||||||
|
const baseKey = primaryUrl + '$' + extraCacheKey;
|
||||||
|
const getETagKey = (url: string) => baseKey + '$' + url + '$etag';
|
||||||
|
const cachedKey = baseKey + '$cached';
|
||||||
|
const controller = new AbortController();
|
||||||
|
|
||||||
|
const previouslyCached = this.get(cachedKey);
|
||||||
|
|
||||||
|
const primaryETag = this.get(getETagKey(primaryUrl));
|
||||||
|
const fetchMainPromise = fetchWithRetry(
|
||||||
|
primaryUrl,
|
||||||
|
{
|
||||||
|
signal: controller.signal,
|
||||||
|
...defaultRequestInit,
|
||||||
|
headers: (typeof primaryETag === 'string' && primaryETag.length > 0)
|
||||||
|
? mergeHeaders(
|
||||||
|
defaultRequestInit.headers,
|
||||||
|
{ 'If-None-Match': primaryETag }
|
||||||
|
)
|
||||||
|
: defaultRequestInit.headers
|
||||||
|
}
|
||||||
|
).then(r => {
|
||||||
|
if (r.headers.has('etag')) {
|
||||||
|
this.set(getETagKey(primaryUrl), r.headers.get('etag')!, TTL.ONE_WEEK_STATIC);
|
||||||
|
|
||||||
|
// If we do not have a cached value, we ignore 304
|
||||||
|
if (r.status === 304 && previouslyCached != null) {
|
||||||
|
controller.abort();
|
||||||
|
throw new Custom304NotModifiedError(primaryUrl);
|
||||||
|
}
|
||||||
|
} else if (!primaryETag && previouslyCached) {
|
||||||
|
throw new CustomNoETagFallbackError(previouslyCached as string);
|
||||||
|
}
|
||||||
|
|
||||||
|
return r.text();
|
||||||
|
}).then(text => {
|
||||||
|
controller.abort();
|
||||||
|
return text;
|
||||||
|
});
|
||||||
|
|
||||||
|
const createFetchFallbackPromise = async (url: string, index: number) => {
|
||||||
|
// Most assets can be downloaded within 250ms. To avoid wasting bandwidth, we will wait for 500ms before downloading from the fallback URL.
|
||||||
|
try {
|
||||||
|
await sleepWithAbort(300 + (index + 1) * 10, controller.signal);
|
||||||
|
} catch {
|
||||||
|
console.log(picocolors.gray('[fetch cancelled early]'), picocolors.gray(url));
|
||||||
|
throw new CustomAbortError();
|
||||||
|
}
|
||||||
|
if (controller.signal.aborted) {
|
||||||
|
console.log(picocolors.gray('[fetch cancelled]'), picocolors.gray(url));
|
||||||
|
throw new CustomAbortError();
|
||||||
|
}
|
||||||
|
|
||||||
|
const etag = this.get(getETagKey(url));
|
||||||
|
const res = await fetchWithRetry(
|
||||||
|
url,
|
||||||
|
{
|
||||||
|
signal: controller.signal,
|
||||||
|
...defaultRequestInit,
|
||||||
|
headers: (typeof etag === 'string' && etag.length > 0)
|
||||||
|
? mergeHeaders(
|
||||||
|
defaultRequestInit.headers,
|
||||||
|
{ 'If-None-Match': etag }
|
||||||
|
)
|
||||||
|
: defaultRequestInit.headers
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (res.headers.has('etag')) {
|
||||||
|
this.set(getETagKey(url), res.headers.get('etag')!, TTL.ONE_WEEK_STATIC);
|
||||||
|
|
||||||
|
// If we do not have a cached value, we ignore 304
|
||||||
|
if (res.status === 304 && previouslyCached != null) {
|
||||||
|
controller.abort();
|
||||||
|
throw new Custom304NotModifiedError(url);
|
||||||
|
}
|
||||||
|
} else if (!primaryETag && previouslyCached) {
|
||||||
|
controller.abort();
|
||||||
|
throw new CustomNoETagFallbackError(previouslyCached as string);
|
||||||
|
}
|
||||||
|
|
||||||
|
const text = await res.text();
|
||||||
|
controller.abort();
|
||||||
|
return text;
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
const text = await Promise.any([
|
||||||
|
fetchMainPromise,
|
||||||
|
...mirrorUrls.map(createFetchFallbackPromise)
|
||||||
|
]);
|
||||||
|
|
||||||
|
console.log(picocolors.yellow('[cache] miss'), primaryUrl);
|
||||||
|
const serializer = 'serializer' in opt ? opt.serializer : identity as any;
|
||||||
|
|
||||||
|
const value = await fn(text);
|
||||||
|
|
||||||
|
this.set(cachedKey, serializer(value), opt.ttl ?? TTL.ONE_WEEK_STATIC);
|
||||||
|
|
||||||
|
return value;
|
||||||
|
} catch (e) {
|
||||||
|
if (e instanceof AggregateError) {
|
||||||
|
const deserializer = 'deserializer' in opt ? opt.deserializer : identity as any;
|
||||||
|
|
||||||
|
for (const error of e.errors) {
|
||||||
|
if (error instanceof Custom304NotModifiedError) {
|
||||||
|
console.log(picocolors.green('[cache] http 304'), picocolors.gray(primaryUrl));
|
||||||
|
this.updateTtl(cachedKey, TTL.ONE_WEEK_STATIC);
|
||||||
|
return deserializer(previouslyCached);
|
||||||
|
}
|
||||||
|
if (error instanceof CustomNoETagFallbackError) {
|
||||||
|
console.log(picocolors.green('[cache] hit'), picocolors.gray(primaryUrl));
|
||||||
|
return deserializer(error.data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Download Rule for [${primaryUrl}] failed`);
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
destroy() {
|
destroy() {
|
||||||
this.db.close();
|
this.db.close();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,12 +3,30 @@ import { defaultRequestInit, fetchWithRetry } from './fetch-retry';
|
|||||||
import { setTimeout } from 'node:timers/promises';
|
import { setTimeout } from 'node:timers/promises';
|
||||||
|
|
||||||
// eslint-disable-next-line sukka/unicorn/custom-error-definition -- typescript is better
|
// eslint-disable-next-line sukka/unicorn/custom-error-definition -- typescript is better
|
||||||
class CustomAbortError extends Error {
|
export class CustomAbortError extends Error {
|
||||||
public readonly name = 'AbortError';
|
public readonly name = 'AbortError';
|
||||||
public readonly digest = 'AbortError';
|
public readonly digest = 'AbortError';
|
||||||
}
|
}
|
||||||
|
|
||||||
const sleepWithAbort = (ms: number, signal: AbortSignal) => new Promise<void>((resolve, reject) => {
|
export class Custom304NotModifiedError extends Error {
|
||||||
|
public readonly name = 'Custom304NotModifiedError';
|
||||||
|
public readonly digest = 'Custom304NotModifiedError';
|
||||||
|
|
||||||
|
constructor(public readonly url: string) {
|
||||||
|
super('304 Not Modified');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class CustomNoETagFallbackError extends Error {
|
||||||
|
public readonly name = 'CustomNoETagFallbackError';
|
||||||
|
public readonly digest = 'CustomNoETagFallbackError';
|
||||||
|
|
||||||
|
constructor(public readonly data: string) {
|
||||||
|
super('No ETag Fallback');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const sleepWithAbort = (ms: number, signal: AbortSignal) => new Promise<void>((resolve, reject) => {
|
||||||
if (signal.aborted) {
|
if (signal.aborted) {
|
||||||
reject(signal.reason as Error);
|
reject(signal.reason as Error);
|
||||||
return;
|
return;
|
||||||
@@ -34,7 +52,7 @@ export async function fetchAssets(url: string, fallbackUrls: string[] | readonly
|
|||||||
const createFetchFallbackPromise = async (url: string, index: number) => {
|
const createFetchFallbackPromise = async (url: string, index: number) => {
|
||||||
// Most assets can be downloaded within 250ms. To avoid wasting bandwidth, we will wait for 500ms before downloading from the fallback URL.
|
// Most assets can be downloaded within 250ms. To avoid wasting bandwidth, we will wait for 500ms before downloading from the fallback URL.
|
||||||
try {
|
try {
|
||||||
await sleepWithAbort(500 + (index + 1) * 20, controller.signal);
|
await sleepWithAbort(500 + (index + 1) * 10, controller.signal);
|
||||||
} catch {
|
} catch {
|
||||||
console.log(picocolors.gray('[fetch cancelled early]'), picocolors.gray(url));
|
console.log(picocolors.gray('[fetch cancelled early]'), picocolors.gray(url));
|
||||||
throw new CustomAbortError();
|
throw new CustomAbortError();
|
||||||
|
|||||||
@@ -2,9 +2,9 @@ import retry from 'async-retry';
|
|||||||
import picocolors from 'picocolors';
|
import picocolors from 'picocolors';
|
||||||
import { setTimeout } from 'node:timers/promises';
|
import { setTimeout } from 'node:timers/promises';
|
||||||
|
|
||||||
import { setGlobalDispatcher, Agent } from 'undici';
|
import { setGlobalDispatcher, EnvHttpProxyAgent } from 'undici';
|
||||||
|
|
||||||
setGlobalDispatcher(new Agent({ allowH2: true }));
|
setGlobalDispatcher(new EnvHttpProxyAgent({ allowH2: true }));
|
||||||
|
|
||||||
function isClientError(err: unknown): err is NodeJS.ErrnoException {
|
function isClientError(err: unknown): err is NodeJS.ErrnoException {
|
||||||
if (!err || typeof err !== 'object') return false;
|
if (!err || typeof err !== 'object') return false;
|
||||||
|
|||||||
@@ -1,13 +1,11 @@
|
|||||||
// @ts-check
|
// @ts-check
|
||||||
import { fetchRemoteTextByLine } from './fetch-text-by-line';
|
|
||||||
import { NetworkFilter } from '@cliqz/adblocker';
|
import { NetworkFilter } from '@cliqz/adblocker';
|
||||||
import { processLine } from './process-line';
|
import { processLine } from './process-line';
|
||||||
import tldts from 'tldts-experimental';
|
import tldts from 'tldts-experimental';
|
||||||
|
|
||||||
import picocolors from 'picocolors';
|
import picocolors from 'picocolors';
|
||||||
import { normalizeDomain } from './normalize-domain';
|
import { normalizeDomain } from './normalize-domain';
|
||||||
import { fetchAssets } from './fetch-assets';
|
import { deserializeArray, fsFetchCache, serializeArray, getFileContentHash } from './cache-filesystem';
|
||||||
import { deserializeArray, fsFetchCache, serializeArray, createCacheKey } from './cache-filesystem';
|
|
||||||
import type { Span } from '../trace';
|
import type { Span } from '../trace';
|
||||||
import createKeywordFilter from './aho-corasick';
|
import createKeywordFilter from './aho-corasick';
|
||||||
import { looseTldtsOpt } from '../constants/loose-tldts-opt';
|
import { looseTldtsOpt } from '../constants/loose-tldts-opt';
|
||||||
@@ -43,33 +41,24 @@ const domainListLineCb = (l: string, set: string[], includeAllSubDomain: boolean
|
|||||||
set.push(includeAllSubDomain ? `.${line}` : line);
|
set.push(includeAllSubDomain ? `.${line}` : line);
|
||||||
};
|
};
|
||||||
|
|
||||||
const cacheKey = createCacheKey(__filename);
|
|
||||||
|
|
||||||
export function processDomainLists(
|
export function processDomainLists(
|
||||||
span: Span,
|
span: Span,
|
||||||
domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false,
|
domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false,
|
||||||
ttl: number | null = null, extraCacheKey: (input: string) => string = identity
|
ttl: number | null = null, extraCacheKey: (input: string) => string = identity
|
||||||
) {
|
) {
|
||||||
return span.traceChild(`process domainlist: ${domainListsUrl}`).traceAsyncFn((childSpan) => fsFetchCache.apply(
|
return span.traceChild(`process domainlist: ${domainListsUrl}`).traceAsyncFn((childSpan) => fsFetchCache.applyWithHttp304AndMirrors<string[]>(
|
||||||
extraCacheKey(cacheKey(domainListsUrl)),
|
domainListsUrl,
|
||||||
async () => {
|
mirrors ?? [],
|
||||||
|
extraCacheKey(getFileContentHash(__filename)),
|
||||||
|
(text) => {
|
||||||
const domainSets: string[] = [];
|
const domainSets: string[] = [];
|
||||||
|
const filterRules = text.split('\n');
|
||||||
|
|
||||||
if (mirrors == null || mirrors.length === 0) {
|
childSpan.traceChild('parse domain list').traceSyncFn(() => {
|
||||||
for await (const l of await fetchRemoteTextByLine(domainListsUrl)) {
|
for (let i = 0, len = filterRules.length; i < len; i++) {
|
||||||
domainListLineCb(l, domainSets, includeAllSubDomain, domainListsUrl);
|
domainListLineCb(filterRules[i], domainSets, includeAllSubDomain, domainListsUrl);
|
||||||
}
|
}
|
||||||
} else {
|
});
|
||||||
const filterRules = await childSpan
|
|
||||||
.traceChild('download domain list')
|
|
||||||
.traceAsyncFn(() => fetchAssets(domainListsUrl, mirrors).then(text => text.split('\n')));
|
|
||||||
|
|
||||||
childSpan.traceChild('parse domain list').traceSyncFn(() => {
|
|
||||||
for (let i = 0, len = filterRules.length; i < len; i++) {
|
|
||||||
domainListLineCb(filterRules[i], domainSets, includeAllSubDomain, domainListsUrl);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
return domainSets;
|
return domainSets;
|
||||||
},
|
},
|
||||||
@@ -109,26 +98,20 @@ export function processHosts(
|
|||||||
hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false,
|
hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false,
|
||||||
ttl: number | null = null, extraCacheKey: (input: string) => string = identity
|
ttl: number | null = null, extraCacheKey: (input: string) => string = identity
|
||||||
) {
|
) {
|
||||||
return span.traceChild(`processhosts: ${hostsUrl}`).traceAsyncFn((childSpan) => fsFetchCache.apply(
|
return span.traceChild(`processhosts: ${hostsUrl}`).traceAsyncFn((childSpan) => fsFetchCache.applyWithHttp304AndMirrors<string[]>(
|
||||||
extraCacheKey(cacheKey(hostsUrl)),
|
hostsUrl,
|
||||||
async () => {
|
mirrors ?? [],
|
||||||
|
extraCacheKey(getFileContentHash(__filename)),
|
||||||
|
(text) => {
|
||||||
const domainSets: string[] = [];
|
const domainSets: string[] = [];
|
||||||
|
|
||||||
if (mirrors == null || mirrors.length === 0) {
|
const filterRules = text.split('\n');
|
||||||
for await (const l of await fetchRemoteTextByLine(hostsUrl)) {
|
|
||||||
hostsLineCb(l, domainSets, includeAllSubDomain, hostsUrl);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
const filterRules = await childSpan
|
|
||||||
.traceChild('download hosts')
|
|
||||||
.traceAsyncFn(() => fetchAssets(hostsUrl, mirrors).then(text => text.split('\n')));
|
|
||||||
|
|
||||||
childSpan.traceChild('parse hosts').traceSyncFn(() => {
|
childSpan.traceChild('parse hosts').traceSyncFn(() => {
|
||||||
for (let i = 0, len = filterRules.length; i < len; i++) {
|
for (let i = 0, len = filterRules.length; i < len; i++) {
|
||||||
hostsLineCb(filterRules[i], domainSets, includeAllSubDomain, hostsUrl);
|
hostsLineCb(filterRules[i], domainSets, includeAllSubDomain, hostsUrl);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
|
||||||
|
|
||||||
return domainSets;
|
return domainSets;
|
||||||
},
|
},
|
||||||
@@ -155,13 +138,15 @@ export { type ParseType };
|
|||||||
export async function processFilterRules(
|
export async function processFilterRules(
|
||||||
parentSpan: Span,
|
parentSpan: Span,
|
||||||
filterRulesUrl: string,
|
filterRulesUrl: string,
|
||||||
fallbackUrls?: readonly string[] | null,
|
fallbackUrls?: string[] | null,
|
||||||
ttl: number | null = null,
|
ttl: number | null = null,
|
||||||
allowThirdParty = false
|
allowThirdParty = false
|
||||||
): Promise<{ white: string[], black: string[], foundDebugDomain: boolean }> {
|
): Promise<{ white: string[], black: string[], foundDebugDomain: boolean }> {
|
||||||
const [white, black, warningMessages] = await parentSpan.traceChild(`process filter rules: ${filterRulesUrl}`).traceAsyncFn((span) => fsFetchCache.apply<Readonly<[ white: string[], black: string[], warningMessages: string[] ]>>(
|
const [white, black, warningMessages] = await parentSpan.traceChild(`process filter rules: ${filterRulesUrl}`).traceAsyncFn((span) => fsFetchCache.applyWithHttp304AndMirrors<Readonly<[ white: string[], black: string[], warningMessages: string[] ]>>(
|
||||||
cacheKey(filterRulesUrl),
|
filterRulesUrl,
|
||||||
async () => {
|
fallbackUrls ?? [],
|
||||||
|
getFileContentHash(__filename),
|
||||||
|
(text) => {
|
||||||
const whitelistDomainSets = new Set<string>();
|
const whitelistDomainSets = new Set<string>();
|
||||||
const blacklistDomainSets = new Set<string>();
|
const blacklistDomainSets = new Set<string>();
|
||||||
|
|
||||||
@@ -221,20 +206,13 @@ export async function processFilterRules(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!fallbackUrls || fallbackUrls.length === 0) {
|
const filterRules = text.split('\n');
|
||||||
for await (const line of await fetchRemoteTextByLine(filterRulesUrl)) {
|
|
||||||
// don't trim here
|
|
||||||
lineCb(line);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
const filterRules = await span.traceChild('download adguard filter').traceAsyncFn(() => fetchAssets(filterRulesUrl, fallbackUrls).then(text => text.split('\n')));
|
|
||||||
|
|
||||||
span.traceChild('parse adguard filter').traceSyncFn(() => {
|
span.traceChild('parse adguard filter').traceSyncFn(() => {
|
||||||
for (let i = 0, len = filterRules.length; i < len; i++) {
|
for (let i = 0, len = filterRules.length; i < len; i++) {
|
||||||
lineCb(filterRules[i]);
|
lineCb(filterRules[i]);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
|
||||||
|
|
||||||
return [
|
return [
|
||||||
Array.from(whitelistDomainSets),
|
Array.from(whitelistDomainSets),
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import { basename, extname } from 'node:path';
|
import { basename, extname } from 'node:path';
|
||||||
import picocolors from 'picocolors';
|
import picocolors from 'picocolors';
|
||||||
|
import wtf from 'wtfnode';
|
||||||
|
|
||||||
const SPAN_STATUS_START = 0;
|
const SPAN_STATUS_START = 0;
|
||||||
const SPAN_STATUS_END = 1;
|
const SPAN_STATUS_END = 1;
|
||||||
@@ -101,7 +102,9 @@ export const task = (importMetaMain: boolean, importMetaPath: string) => <T>(fn:
|
|||||||
|
|
||||||
const dummySpan = createSpan(taskName);
|
const dummySpan = createSpan(taskName);
|
||||||
if (importMetaMain) {
|
if (importMetaMain) {
|
||||||
fn(dummySpan);
|
fn(dummySpan).finally(() => {
|
||||||
|
console.log(wtf.dump());
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
return async (span?: Span) => {
|
return async (span?: Span) => {
|
||||||
|
|||||||
@@ -39,6 +39,7 @@
|
|||||||
"tldts": "^6.1.50",
|
"tldts": "^6.1.50",
|
||||||
"tldts-experimental": "^6.1.50",
|
"tldts-experimental": "^6.1.50",
|
||||||
"undici": "^6.19.8",
|
"undici": "^6.19.8",
|
||||||
|
"wtfnode": "^0.9.3",
|
||||||
"yaml": "^2.5.1"
|
"yaml": "^2.5.1"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
@@ -52,6 +53,7 @@
|
|||||||
"@types/punycode": "^2.1.4",
|
"@types/punycode": "^2.1.4",
|
||||||
"@types/tar-fs": "^2.0.4",
|
"@types/tar-fs": "^2.0.4",
|
||||||
"@types/tar-stream": "^3.1.3",
|
"@types/tar-stream": "^3.1.3",
|
||||||
|
"@types/wtfnode": "^0.7.3",
|
||||||
"chai": "4",
|
"chai": "4",
|
||||||
"eslint": "^9.12.0",
|
"eslint": "^9.12.0",
|
||||||
"eslint-config-sukka": "^6.6.1",
|
"eslint-config-sukka": "^6.6.1",
|
||||||
|
|||||||
17
pnpm-lock.yaml
generated
17
pnpm-lock.yaml
generated
@@ -68,6 +68,9 @@ importers:
|
|||||||
undici:
|
undici:
|
||||||
specifier: ^6.19.8
|
specifier: ^6.19.8
|
||||||
version: 6.19.8
|
version: 6.19.8
|
||||||
|
wtfnode:
|
||||||
|
specifier: ^0.9.3
|
||||||
|
version: 0.9.3
|
||||||
yaml:
|
yaml:
|
||||||
specifier: ^2.5.1
|
specifier: ^2.5.1
|
||||||
version: 2.5.1
|
version: 2.5.1
|
||||||
@@ -102,6 +105,9 @@ importers:
|
|||||||
'@types/tar-stream':
|
'@types/tar-stream':
|
||||||
specifier: ^3.1.3
|
specifier: ^3.1.3
|
||||||
version: 3.1.3
|
version: 3.1.3
|
||||||
|
'@types/wtfnode':
|
||||||
|
specifier: ^0.7.3
|
||||||
|
version: 0.7.3
|
||||||
chai:
|
chai:
|
||||||
specifier: '4'
|
specifier: '4'
|
||||||
version: 4.4.1
|
version: 4.4.1
|
||||||
@@ -470,6 +476,9 @@ packages:
|
|||||||
'@types/tar-stream@3.1.3':
|
'@types/tar-stream@3.1.3':
|
||||||
resolution: {integrity: sha512-Zbnx4wpkWBMBSu5CytMbrT5ZpMiF55qgM+EpHzR4yIDu7mv52cej8hTkOc6K+LzpkOAbxwn/m7j3iO+/l42YkQ==}
|
resolution: {integrity: sha512-Zbnx4wpkWBMBSu5CytMbrT5ZpMiF55qgM+EpHzR4yIDu7mv52cej8hTkOc6K+LzpkOAbxwn/m7j3iO+/l42YkQ==}
|
||||||
|
|
||||||
|
'@types/wtfnode@0.7.3':
|
||||||
|
resolution: {integrity: sha512-UMkHpx+o2xRWLJ7PmT3bBzvIA9/0oFw80oPtY/xO4jfdq+Gznn4wP7K9B/JjMxyxy+wF+5oRPIykxeBbEDjwRg==}
|
||||||
|
|
||||||
'@typescript-eslint/eslint-plugin@8.7.0':
|
'@typescript-eslint/eslint-plugin@8.7.0':
|
||||||
resolution: {integrity: sha512-RIHOoznhA3CCfSTFiB6kBGLQtB/sox+pJ6jeFu6FxJvqL8qRxq/FfGO/UhsGgQM9oGdXkV4xUgli+dt26biB6A==}
|
resolution: {integrity: sha512-RIHOoznhA3CCfSTFiB6kBGLQtB/sox+pJ6jeFu6FxJvqL8qRxq/FfGO/UhsGgQM9oGdXkV4xUgli+dt26biB6A==}
|
||||||
engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0}
|
engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0}
|
||||||
@@ -1500,6 +1509,10 @@ packages:
|
|||||||
wrappy@1.0.2:
|
wrappy@1.0.2:
|
||||||
resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==}
|
resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==}
|
||||||
|
|
||||||
|
wtfnode@0.9.3:
|
||||||
|
resolution: {integrity: sha512-MXjgxJovNVYUkD85JBZTKT5S5ng/e56sNuRZlid7HcGTNrIODa5UPtqE3i0daj7fJ2SGj5Um2VmiphQVyVKK5A==}
|
||||||
|
hasBin: true
|
||||||
|
|
||||||
y18n@5.0.8:
|
y18n@5.0.8:
|
||||||
resolution: {integrity: sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==}
|
resolution: {integrity: sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==}
|
||||||
engines: {node: '>=10'}
|
engines: {node: '>=10'}
|
||||||
@@ -1877,6 +1890,8 @@ snapshots:
|
|||||||
dependencies:
|
dependencies:
|
||||||
'@types/node': 20.14.11
|
'@types/node': 20.14.11
|
||||||
|
|
||||||
|
'@types/wtfnode@0.7.3': {}
|
||||||
|
|
||||||
'@typescript-eslint/eslint-plugin@8.7.0(@typescript-eslint/parser@8.7.0(eslint@9.12.0)(typescript@5.6.2))(eslint@9.12.0)(typescript@5.6.2)':
|
'@typescript-eslint/eslint-plugin@8.7.0(@typescript-eslint/parser@8.7.0(eslint@9.12.0)(typescript@5.6.2))(eslint@9.12.0)(typescript@5.6.2)':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@eslint-community/regexpp': 4.11.1
|
'@eslint-community/regexpp': 4.11.1
|
||||||
@@ -2987,6 +3002,8 @@ snapshots:
|
|||||||
|
|
||||||
wrappy@1.0.2: {}
|
wrappy@1.0.2: {}
|
||||||
|
|
||||||
|
wtfnode@0.9.3: {}
|
||||||
|
|
||||||
y18n@5.0.8: {}
|
y18n@5.0.8: {}
|
||||||
|
|
||||||
yaml@2.5.1: {}
|
yaml@2.5.1: {}
|
||||||
|
|||||||
Reference in New Issue
Block a user