mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Perf: use filesystem cache
This commit is contained in:
parent
6ed3695e36
commit
85801b1b9e
@ -12,17 +12,17 @@ import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
|
||||
import picocolors from 'picocolors';
|
||||
import { fetchRemoteTextByLine } from './lib/fetch-text-by-line';
|
||||
import { processLine } from './lib/process-line';
|
||||
import { TTL, deserializeArray, fsCache, serializeArray } from './lib/cache-filesystem';
|
||||
|
||||
const s = new Sema(2);
|
||||
|
||||
const latestTopUserAgentsPromise = fetchWithRetry('https://unpkg.com/top-user-agents@latest/index.json')
|
||||
.then(res => res.json<string[]>());
|
||||
.then(res => res.json<string[]>()).then(userAgents => userAgents.filter(ua => ua.startsWith('Mozilla/5.0 ')));
|
||||
|
||||
const querySpeedtestApi = async (keyword: string): Promise<Array<string | null>> => {
|
||||
const topUserAgents = (await Promise.all([
|
||||
latestTopUserAgentsPromise,
|
||||
s.acquire()
|
||||
]))[0];
|
||||
const topUserAgents = await latestTopUserAgentsPromise;
|
||||
|
||||
const url = `https://www.speedtest.net/api/js/servers?engine=js&search=${keyword}&limit=100`;
|
||||
|
||||
try {
|
||||
const randomUserAgent = topUserAgents[Math.floor(Math.random() * topUserAgents.length)];
|
||||
@ -30,39 +30,51 @@ const querySpeedtestApi = async (keyword: string): Promise<Array<string | null>>
|
||||
console.log(key);
|
||||
console.time(key);
|
||||
|
||||
const res = await fetchWithRetry(`https://www.speedtest.net/api/js/servers?engine=js&search=${keyword}&limit=100`, {
|
||||
headers: {
|
||||
dnt: '1',
|
||||
Referer: 'https://www.speedtest.net/',
|
||||
accept: 'application/json, text/plain, */*',
|
||||
'User-Agent': randomUserAgent,
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
...(randomUserAgent.includes('Chrome')
|
||||
? {
|
||||
'Sec-Ch-Ua-Mobile': '?0',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-origin',
|
||||
'Sec-Gpc': '1'
|
||||
const json = await fsCache.apply(
|
||||
url,
|
||||
() => s.acquire().then(() => fetchWithRetry(url, {
|
||||
headers: {
|
||||
dnt: '1',
|
||||
Referer: 'https://www.speedtest.net/',
|
||||
accept: 'application/json, text/plain, */*',
|
||||
'User-Agent': randomUserAgent,
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
...(randomUserAgent.includes('Chrome')
|
||||
? {
|
||||
'Sec-Ch-Ua-Mobile': '?0',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-origin',
|
||||
'Sec-Gpc': '1'
|
||||
}
|
||||
: {})
|
||||
},
|
||||
signal: AbortSignal.timeout(1000 * 4),
|
||||
retry: {
|
||||
retries: 2
|
||||
}
|
||||
})).then(r => r.json<Array<{ url: string }>>()).then(data => data.reduce<string[]>(
|
||||
(prev, cur) => {
|
||||
const hn = tldts.getHostname(cur.url, { detectIp: false });
|
||||
if (hn) {
|
||||
prev.push(hn);
|
||||
}
|
||||
: {})
|
||||
},
|
||||
signal: AbortSignal.timeout(1000 * 4),
|
||||
retry: {
|
||||
retries: 2
|
||||
return prev;
|
||||
}, []
|
||||
)).finally(() => s.release()),
|
||||
{
|
||||
ttl: TTL.ONE_WEEK(),
|
||||
serializer: serializeArray,
|
||||
deserializer: deserializeArray
|
||||
}
|
||||
});
|
||||
|
||||
const json = await res.json<Array<{ url: string }>>();
|
||||
);
|
||||
|
||||
console.timeEnd(key);
|
||||
|
||||
return json.map(({ url }) => tldts.getHostname(url, { detectIp: false }));
|
||||
return json;
|
||||
} catch (e) {
|
||||
console.log(e);
|
||||
return [];
|
||||
} finally {
|
||||
s.release();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -4,10 +4,10 @@ import path from 'path';
|
||||
import { fetchWithRetry } from './lib/fetch-retry';
|
||||
|
||||
const ASSETS_LIST = {
|
||||
'www-google-analytics-com_ga.js': 'https://unpkg.com/@adguard/scriptlets@1/dist/redirect-files/google-analytics-ga.js',
|
||||
'www-googletagservices-com_gpt.js': 'https://unpkg.com/@adguard/scriptlets@1/dist/redirect-files/googletagservices-gpt.js',
|
||||
'www-google-analytics-com_analytics.js': 'https://unpkg.com/@adguard/scriptlets@1/dist/redirect-files/google-analytics.js',
|
||||
'www-googlesyndication-com_adsbygoogle.js': 'https://unpkg.com/@adguard/scriptlets@1/dist/redirect-files/googlesyndication-adsbygoogle.js'
|
||||
'www-google-analytics-com_ga.js': 'https://raw.githubusercontent.com/AdguardTeam/Scriptlets/master/dist/redirect-files/google-analytics-ga.js',
|
||||
'www-googletagservices-com_gpt.js': 'https://raw.githubusercontent.com/AdguardTeam/Scriptlets/master/dist/redirect-files/googletagservices-gpt.js',
|
||||
'www-google-analytics-com_analytics.js': 'https://raw.githubusercontent.com/AdguardTeam/Scriptlets/master/dist/redirect-files/google-analytics.js',
|
||||
'www-googlesyndication-com_adsbygoogle.js': 'https://raw.githubusercontent.com/AdguardTeam/Scriptlets/master/dist/redirect-files/googlesyndication-adsbygoogle.js'
|
||||
} as const;
|
||||
|
||||
const mockDir = path.resolve(import.meta.dir, '../Mock');
|
||||
|
||||
@ -1,5 +1,20 @@
|
||||
import { TTL, fsCache } from './lib/cache-filesystem';
|
||||
import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry';
|
||||
import { createMemoizedPromise } from './lib/memo-promise';
|
||||
import { traceAsync } from './lib/trace-runner';
|
||||
|
||||
export const getPublicSuffixListTextPromise = createMemoizedPromise(() => traceAsync('obtain public_suffix_list', () => fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat', defaultRequestInit).then(r => r.text())));
|
||||
export const getPublicSuffixListTextPromise = createMemoizedPromise(
|
||||
() => traceAsync(
|
||||
'obtain public_suffix_list',
|
||||
() => fsCache.apply(
|
||||
'https://publicsuffix.org/list/public_suffix_list.dat',
|
||||
() => fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat', defaultRequestInit).then(r => r.text()),
|
||||
{
|
||||
// https://github.com/publicsuffix/list/blob/master/.github/workflows/tld-update.yml
|
||||
// Though the action runs every 24 hours, the IANA list is updated every 7 days.
|
||||
// So a 3 day TTL should be enough.
|
||||
ttl: TTL.THREE_DAYS()
|
||||
}
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
@ -127,12 +127,28 @@ export class Cache {
|
||||
}
|
||||
}
|
||||
|
||||
// export const fsCache = new Cache({ cachePath: path.resolve(import.meta.dir, '../../.cache') });
|
||||
export const fsCache = new Cache({ cachePath: path.resolve(import.meta.dir, '../../.cache') });
|
||||
// process.on('exit', () => {
|
||||
// fsCache.destroy();
|
||||
// });
|
||||
|
||||
const separator = String.fromCharCode(0);
|
||||
const randomInt = (min: number, max: number) => Math.floor(Math.random() * (max - min + 1)) + min;
|
||||
|
||||
// Add some randomness to the cache ttl to avoid thundering herd
|
||||
export const TTL = {
|
||||
TWLVE_HOURS: () => randomInt(9, 14) * 60 * 60 * 1000,
|
||||
THREE_DAYS: () => randomInt(2, 4) * 24 * 60 * 60 * 1000,
|
||||
ONE_WEEK: () => randomInt(5, 8) * 24 * 60 * 60 * 1000,
|
||||
TWO_WEEKS: () => randomInt(12, 16) * 24 * 60 * 60 * 1000,
|
||||
TEN_DAYS: () => randomInt(9, 11) * 24 * 60 * 60 * 1000
|
||||
};
|
||||
|
||||
const separator = String.fromCharCode(0);
|
||||
// const textEncoder = new TextEncoder();
|
||||
// const textDecoder = new TextDecoder();
|
||||
// export const serializeString = (str: string) => textEncoder.encode(str);
|
||||
// export const deserializeString = (str: string) => textDecoder.decode(new Uint8Array(str.split(separator).map(Number)));
|
||||
export const serializeSet = (set: Set<string>) => Array.from(set).join(separator);
|
||||
export const deserializeSet = (str: string) => new Set(str.split(separator));
|
||||
export const serializeArray = (arr: string[]) => arr.join(separator);
|
||||
export const deserializeArray = (str: string) => str.split(separator);
|
||||
|
||||
@ -9,14 +9,15 @@ import { traceAsync } from './trace-runner';
|
||||
import picocolors from 'picocolors';
|
||||
import { normalizeDomain } from './normalize-domain';
|
||||
import { fetchAssets } from './fetch-assets';
|
||||
import { deserializeSet, fsCache, serializeSet } from './cache-filesystem';
|
||||
|
||||
const DEBUG_DOMAIN_TO_FIND: string | null = null; // example.com | null
|
||||
let foundDebugDomain = false;
|
||||
|
||||
export function processDomainLists(domainListsUrl: string, includeAllSubDomain = false, _ttl: number | null = null) {
|
||||
return traceAsync(`- processDomainLists: ${domainListsUrl}`, /* () => fsCache.apply(
|
||||
export function processDomainLists(domainListsUrl: string, includeAllSubDomain = false, ttl: number | null = null) {
|
||||
return traceAsync(`- processDomainLists: ${domainListsUrl}`, () => fsCache.apply(
|
||||
domainListsUrl,
|
||||
*/async () => {
|
||||
async () => {
|
||||
const domainSets = new Set<string>();
|
||||
|
||||
for await (const line of await fetchRemoteTextByLine(domainListsUrl)) {
|
||||
@ -32,19 +33,19 @@ export function processDomainLists(domainListsUrl: string, includeAllSubDomain =
|
||||
}
|
||||
|
||||
return domainSets;
|
||||
});/* ,
|
||||
},
|
||||
{
|
||||
ttl,
|
||||
temporaryBypass: DEBUG_DOMAIN_TO_FIND !== null,
|
||||
serializer: serializeSet,
|
||||
deserializer: deserializeSet
|
||||
}
|
||||
)); */
|
||||
));
|
||||
}
|
||||
export function processHosts(hostsUrl: string, includeAllSubDomain = false, skipDomainCheck = false, _ttl: number | null = null) {
|
||||
return traceAsync(`- processHosts: ${hostsUrl}`, /* () => fsCache.apply(
|
||||
export function processHosts(hostsUrl: string, includeAllSubDomain = false, skipDomainCheck = false, ttl: number | null = null) {
|
||||
return traceAsync(`- processHosts: ${hostsUrl}`, () => fsCache.apply(
|
||||
hostsUrl,
|
||||
*/async () => {
|
||||
async () => {
|
||||
const domainSets = new Set<string>();
|
||||
|
||||
for await (const l of await fetchRemoteTextByLine(hostsUrl)) {
|
||||
@ -73,14 +74,14 @@ export function processHosts(hostsUrl: string, includeAllSubDomain = false, skip
|
||||
console.log(picocolors.gray('[process hosts]'), picocolors.gray(hostsUrl), picocolors.gray(domainSets.size));
|
||||
|
||||
return domainSets;
|
||||
});
|
||||
/* {
|
||||
},
|
||||
{
|
||||
ttl,
|
||||
temporaryBypass: DEBUG_DOMAIN_TO_FIND !== null,
|
||||
serializer: serializeSet,
|
||||
deserializer: deserializeSet
|
||||
}
|
||||
) */
|
||||
));
|
||||
}
|
||||
|
||||
// eslint-disable-next-line sukka-ts/no-const-enum -- bun bundler is smart, maybe?
|
||||
@ -95,15 +96,15 @@ const enum ParseType {
|
||||
export async function processFilterRules(
|
||||
filterRulesUrl: string,
|
||||
fallbackUrls?: readonly string[] | undefined | null,
|
||||
_ttl: number | null = null
|
||||
ttl: number | null = null
|
||||
): Promise<{ white: string[], black: string[], foundDebugDomain: boolean }> {
|
||||
const [white, black, warningMessages] = await traceAsync(`- processFilterRules: ${filterRulesUrl}`, /* () => fsCache.apply<[
|
||||
const [white, black, warningMessages] = await traceAsync(`- processFilterRules: ${filterRulesUrl}`, () => fsCache.apply<[
|
||||
white: string[],
|
||||
black: string[],
|
||||
warningMessages: string[]
|
||||
]>(
|
||||
filterRulesUrl,
|
||||
*/async () => {
|
||||
async () => {
|
||||
const whitelistDomainSets = new Set<string>();
|
||||
const blacklistDomainSets = new Set<string>();
|
||||
|
||||
@ -168,7 +169,7 @@ export async function processFilterRules(
|
||||
// TODO-SUKKA: add cache here
|
||||
if (!fallbackUrls || fallbackUrls.length === 0) {
|
||||
for await (const line of await fetchRemoteTextByLine(filterRulesUrl)) {
|
||||
// don't trim here
|
||||
// don't trim here
|
||||
lineCb(line);
|
||||
}
|
||||
} else {
|
||||
@ -191,14 +192,14 @@ export async function processFilterRules(
|
||||
Array.from(blacklistDomainSets),
|
||||
warningMessages
|
||||
];
|
||||
});
|
||||
/* {
|
||||
},
|
||||
{
|
||||
ttl,
|
||||
temporaryBypass: DEBUG_DOMAIN_TO_FIND !== null,
|
||||
serializer: JSON.stringify,
|
||||
deserializer: JSON.parse
|
||||
}
|
||||
) */
|
||||
));
|
||||
|
||||
warningMessages.forEach(msg => {
|
||||
console.warn(
|
||||
|
||||
16
Build/lib/random-int.bench.ts
Normal file
16
Build/lib/random-int.bench.ts
Normal file
@ -0,0 +1,16 @@
|
||||
import { bench, group, run } from 'mitata';
|
||||
import { randomInt as nativeRandomInt } from 'crypto';
|
||||
|
||||
const randomInt = (min: number, max: number) => Math.floor(Math.random() * (max - min + 1)) + min;
|
||||
|
||||
group('random-int', () => {
|
||||
bench('crypto.randomInt', () => {
|
||||
nativeRandomInt(3, 7);
|
||||
});
|
||||
|
||||
bench('Math.random', () => {
|
||||
randomInt(3, 7);
|
||||
});
|
||||
});
|
||||
|
||||
run();
|
||||
@ -1,14 +1,20 @@
|
||||
import { TTL } from './cache-filesystem';
|
||||
|
||||
export const HOSTS = [
|
||||
['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true],
|
||||
['https://someonewhocares.org/hosts/hosts', true],
|
||||
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', false],
|
||||
['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', true],
|
||||
// no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl
|
||||
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', false, false, TTL.THREE_DAYS()],
|
||||
// have not been updated for more than a year, so we set a 14 days cache ttl
|
||||
['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', true, false, TTL.TWO_WEEKS()],
|
||||
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', false],
|
||||
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false],
|
||||
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-Extension.txt', false],
|
||||
// ad-wars is not actively maintained, so we set a 7 days cache ttl
|
||||
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false, false, TTL.ONE_WEEK()],
|
||||
['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true],
|
||||
// CoinBlockerList
|
||||
// Although the hosts file is still actively maintained, the hosts_browser file is not updated since 2021-07, so we set a 10 days cache ttl
|
||||
['https://zerodot1.gitlab.io/CoinBlockerLists/hosts_browser', true, true, 10 * 24 * 60 * 60 * 1000],
|
||||
// Although the hosts file is still actively maintained, the hosts_browser file is not updated since 2021-07, so we set a 14 days cache ttl
|
||||
['https://zerodot1.gitlab.io/CoinBlockerLists/hosts_browser', true, true, TTL.TWO_WEEKS()],
|
||||
// Curben's UrlHaus Malicious URL Blocklist
|
||||
// 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
|
||||
// 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
|
||||
@ -21,23 +27,24 @@ export const HOSTS = [
|
||||
// Curben's PUP Domains Blocklist
|
||||
// 'https://curbengh.github.io/pup-filter/pup-filter-agh.txt'
|
||||
// 'https://pup-filter.pages.dev/pup-filter-agh.txt'
|
||||
// The PUP filter has paused the update since 2023-05, so we set a 7 days cache ttl
|
||||
['https://curbengh.github.io/pup-filter/pup-filter-hosts.txt', true, true, 7 * 24 * 60 * 60 * 1000]
|
||||
// The PUP filter has paused the update since 2023-05, so we set a 14 days cache ttl
|
||||
['https://curbengh.github.io/pup-filter/pup-filter-hosts.txt', true, true, TTL.TWO_WEEKS()]
|
||||
] as const;
|
||||
|
||||
export const DOMAIN_LISTS = [
|
||||
// BarbBlock
|
||||
// The barbblock list has never been updated since 2019-05, so we set a 10 days cache ttl
|
||||
['https://paulgb.github.io/BarbBlock/blacklists/domain-list.txt', true, 10 * 24 * 60 * 60 * 1000],
|
||||
// The barbblock list has never been updated since 2019-05, so we set a 14 days cache ttl
|
||||
['https://paulgb.github.io/BarbBlock/blacklists/domain-list.txt', true, TTL.TWO_WEEKS()],
|
||||
// DigitalSide Threat-Intel - OSINT Hub
|
||||
['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true],
|
||||
// Update once per day
|
||||
['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true, 24 * 60 * 60 * 1000],
|
||||
// AdGuard CNAME Filter Combined
|
||||
// Update on a 7 days basis, so we add a 36 hours cache ttl
|
||||
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads_justdomains.txt', true, 36 * 60 * 60 * 1000],
|
||||
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_trackers_justdomains.txt', true, 36 * 60 * 60 * 1000],
|
||||
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_clickthroughs_justdomains.txt', true, 36 * 60 * 60 * 1000],
|
||||
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_microsites_justdomains.txt', true, 36 * 60 * 60 * 1000],
|
||||
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_mail_trackers_justdomains.txt', true, 36 * 60 * 60 * 1000]
|
||||
// Update on a 7 days basis, so we add a 3 hours cache ttl
|
||||
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads_justdomains.txt', true, TTL.THREE_DAYS()],
|
||||
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_trackers_justdomains.txt', true, TTL.THREE_DAYS()],
|
||||
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_clickthroughs_justdomains.txt', true, TTL.THREE_DAYS()],
|
||||
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_microsites_justdomains.txt', true, TTL.THREE_DAYS()],
|
||||
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_mail_trackers_justdomains.txt', true, TTL.THREE_DAYS()]
|
||||
] as const;
|
||||
|
||||
export const ADGUARD_FILTERS = [
|
||||
@ -130,14 +137,17 @@ export const ADGUARD_FILTERS = [
|
||||
// GameConsoleAdblockList
|
||||
'https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt',
|
||||
// PiHoleBlocklist
|
||||
// Update almost once per 3 months, let's set a 10 days cache ttl
|
||||
[
|
||||
'https://perflyst.github.io/PiHoleBlocklist/SmartTV-AGH.txt',
|
||||
[
|
||||
'https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/SmartTV-AGH.txt'
|
||||
]
|
||||
],
|
||||
TTL.TEN_DAYS()
|
||||
],
|
||||
// Spam404
|
||||
'https://raw.githubusercontent.com/Spam404/lists/master/adblock-list.txt',
|
||||
// Not actively maintained, let's use a 10 days cache ttl
|
||||
['https://raw.githubusercontent.com/Spam404/lists/master/adblock-list.txt', null, TTL.TEN_DAYS()],
|
||||
// Brave First Party & First Party CNAME
|
||||
'https://raw.githubusercontent.com/brave/adblock-lists/master/brave-lists/brave-firstparty.txt'
|
||||
] as const;
|
||||
|
||||
@ -39,6 +39,7 @@
|
||||
"eslint": "^8.56.0",
|
||||
"eslint-config-sukka": "4.1.10-beta.2",
|
||||
"eslint-formatter-sukka": "4.1.9",
|
||||
"mitata": "^0.1.6",
|
||||
"typescript": "^5.3.3"
|
||||
},
|
||||
"resolutions": {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user