Perf: add more cache

This commit is contained in:
SukkaW 2023-12-31 21:05:59 +08:00
parent 702b9d9596
commit 8c150e87c2
5 changed files with 38 additions and 30 deletions

View File

@ -7,10 +7,7 @@ import { processLine } from './lib/process-line';
import { SHARED_DESCRIPTION } from './lib/constants'; import { SHARED_DESCRIPTION } from './lib/constants';
import { getPublicSuffixListTextPromise } from './download-publicsuffixlist'; import { getPublicSuffixListTextPromise } from './download-publicsuffixlist';
const getS3OSSDomains = async (): Promise<Set<string>> => { const getS3OSSDomains = async (): Promise<Set<string>> => {
const trie = createTrie(); const trie = createTrie((await getPublicSuffixListTextPromise()).split('\n'));
for await (const line of (await getPublicSuffixListTextPromise()).split('\n')) {
trie.add(line);
}
/** /**
* Extract OSS domain from publicsuffix list * Extract OSS domain from publicsuffix list

View File

@ -136,7 +136,9 @@ const randomInt = (min: number, max: number) => Math.floor(Math.random() * (max
// Add some randomness to the cache ttl to avoid thundering herd // Add some randomness to the cache ttl to avoid thundering herd
export const TTL = { export const TTL = {
THREE_HOURS: () => randomInt(2, 4) * 60 * 60 * 1000,
TWLVE_HOURS: () => randomInt(9, 14) * 60 * 60 * 1000, TWLVE_HOURS: () => randomInt(9, 14) * 60 * 60 * 1000,
ONE_DAY: () => randomInt(23, 25) * 60 * 60 * 1000,
THREE_DAYS: () => randomInt(2, 4) * 24 * 60 * 60 * 1000, THREE_DAYS: () => randomInt(2, 4) * 24 * 60 * 60 * 1000,
ONE_WEEK: () => randomInt(5, 8) * 24 * 60 * 60 * 1000, ONE_WEEK: () => randomInt(5, 8) * 24 * 60 * 60 * 1000,
TWO_WEEKS: () => randomInt(12, 16) * 24 * 60 * 60 * 1000, TWO_WEEKS: () => randomInt(12, 16) * 24 * 60 * 60 * 1000,

View File

@ -3,9 +3,9 @@ import { traceAsync } from './trace-runner';
import { createMemoizedPromise } from './memo-promise'; import { createMemoizedPromise } from './memo-promise';
import { getPublicSuffixListTextPromise } from '../download-publicsuffixlist'; import { getPublicSuffixListTextPromise } from '../download-publicsuffixlist';
export const getGorhillPublicSuffixPromise = createMemoizedPromise(() => traceAsync('create gorhill public suffix instance', async () => { const customFetch = (url: string | URL) => Promise.resolve(Bun.file(url));
const customFetch = (url: string | URL) => Promise.resolve(Bun.file(url));
export const getGorhillPublicSuffixPromise = createMemoizedPromise(() => traceAsync('create gorhill public suffix instance', async () => {
const [publicSuffixListDat, { default: gorhill }] = await Promise.all([ const [publicSuffixListDat, { default: gorhill }] = await Promise.all([
getPublicSuffixListTextPromise(), getPublicSuffixListTextPromise(),
import('@gorhill/publicsuffixlist') import('@gorhill/publicsuffixlist')

View File

@ -173,6 +173,9 @@ export async function processFilterRules(
lineCb(line); lineCb(line);
} }
} else { } else {
// Avoid event loop starvation, so we wait for a macrotask before we start fetching.
await Promise.resolve();
const filterRules = (await traceAsync( const filterRules = (await traceAsync(
picocolors.gray(`- download ${filterRulesUrl}`), picocolors.gray(`- download ${filterRulesUrl}`),
() => fetchAssets(filterRulesUrl, fallbackUrls), () => fetchAssets(filterRulesUrl, fallbackUrls),
@ -191,7 +194,7 @@ export async function processFilterRules(
Array.from(whitelistDomainSets), Array.from(whitelistDomainSets),
Array.from(blacklistDomainSets), Array.from(blacklistDomainSets),
warningMessages warningMessages
]; ] as const;
}, },
{ {
ttl, ttl,

View File

@ -1,8 +1,8 @@
import { TTL } from './cache-filesystem'; import { TTL } from './cache-filesystem';
export const HOSTS = [ export const HOSTS = [
['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true], ['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true, false, TTL.THREE_HOURS()],
['https://someonewhocares.org/hosts/hosts', true], ['https://someonewhocares.org/hosts/hosts', true, false, TTL.THREE_HOURS()],
// no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl // no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', false, false, TTL.THREE_DAYS()], ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', false, false, TTL.THREE_DAYS()],
// have not been updated for more than a year, so we set a 14 days cache ttl // have not been updated for more than a year, so we set a 14 days cache ttl
@ -11,14 +11,11 @@ export const HOSTS = [
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', false, false, TTL.THREE_DAYS()], ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', false, false, TTL.THREE_DAYS()],
// ad-wars is not actively maintained, so we set a 7 days cache ttl // ad-wars is not actively maintained, so we set a 7 days cache ttl
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false, false, TTL.ONE_WEEK()], ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false, false, TTL.ONE_WEEK()],
['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true], ['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true, false, TTL.THREE_HOURS()],
// CoinBlockerList
// Although the hosts file is still actively maintained, the hosts_browser file is not updated since 2021-07, so we set a 14 days cache ttl
['https://zerodot1.gitlab.io/CoinBlockerLists/hosts_browser', true, true, TTL.TWO_WEEKS()],
// Curben's UrlHaus Malicious URL Blocklist // Curben's UrlHaus Malicious URL Blocklist
// 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt', // 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
// 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt', // 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, true], ['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, true, TTL.THREE_HOURS()],
// Curben's Phishing URL Blocklist // Curben's Phishing URL Blocklist
// Covered by lib/get-phishing-domains.ts // Covered by lib/get-phishing-domains.ts
// 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt' // 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt'
@ -32,12 +29,15 @@ export const HOSTS = [
] as const; ] as const;
export const DOMAIN_LISTS = [ export const DOMAIN_LISTS = [
// CoinBlockerList
// Although the hosts file is still actively maintained, the hosts_browser file is not updated since 2021-07, so we set a 14 days cache ttl
['https://zerodot1.gitlab.io/CoinBlockerLists/list_browser.txt', true, TTL.TWO_WEEKS()],
// BarbBlock // BarbBlock
// The barbblock list has never been updated since 2019-05, so we set a 14 days cache ttl // The barbblock list has never been updated since 2019-05, so we set a 14 days cache ttl
['https://paulgb.github.io/BarbBlock/blacklists/domain-list.txt', true, TTL.TWO_WEEKS()], ['https://paulgb.github.io/BarbBlock/blacklists/domain-list.txt', true, TTL.TWO_WEEKS()],
// DigitalSide Threat-Intel - OSINT Hub // DigitalSide Threat-Intel - OSINT Hub
// Update once per day // Update once per day
['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true, 24 * 60 * 60 * 1000], ['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true, TTL.ONE_DAY()],
// AdGuard CNAME Filter Combined // AdGuard CNAME Filter Combined
// Update on a 7 days basis, so we add a 3 hours cache ttl // Update on a 7 days basis, so we add a 3 hours cache ttl
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads_justdomains.txt', true, TTL.THREE_DAYS()], ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads_justdomains.txt', true, TTL.THREE_DAYS()],
@ -52,11 +52,11 @@ export const ADGUARD_FILTERS = [
[ [
'https://easylist.to/easylist/easylist.txt', 'https://easylist.to/easylist/easylist.txt',
[ [
'https://ublockorigin.github.io/uAssetsCDN/thirdparties/easylist.txt',
'https://ublockorigin.pages.dev/thirdparties/easylist.txt',
'https://easylist-downloads.adblockplus.org/easylist.txt', 'https://easylist-downloads.adblockplus.org/easylist.txt',
'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easylist.txt', 'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easylist.txt',
'https://secure.fanboy.co.nz/easylist.txt', 'https://secure.fanboy.co.nz/easylist.txt'
'https://ublockorigin.github.io/uAssetsCDN/thirdparties/easylist.txt',
'https://ublockorigin.pages.dev/thirdparties/easylist.txt'
], ],
TTL.TWLVE_HOURS() TTL.TWLVE_HOURS()
], ],
@ -86,21 +86,24 @@ export const ADGUARD_FILTERS = [
'https://ublockorigin.github.io/uAssetsCDN/filters/filters.min.txt', 'https://ublockorigin.github.io/uAssetsCDN/filters/filters.min.txt',
[ [
'https://ublockorigin.pages.dev/filters/filters.min.txt' 'https://ublockorigin.pages.dev/filters/filters.min.txt'
] ],
TTL.THREE_HOURS()
], ],
// uBlock Origin Badware Risk List // uBlock Origin Badware Risk List
[ [
'https://ublockorigin.github.io/uAssetsCDN/filters/badware.min.txt', 'https://ublockorigin.github.io/uAssetsCDN/filters/badware.min.txt',
[ [
'https://ublockorigin.pages.dev/filters/badware.min.txt' 'https://ublockorigin.pages.dev/filters/badware.min.txt'
] ],
TTL.THREE_HOURS()
], ],
// uBlock Origin Privacy List // uBlock Origin Privacy List
[ [
'https://ublockorigin.github.io/uAssetsCDN/filters/privacy.min.txt', 'https://ublockorigin.github.io/uAssetsCDN/filters/privacy.min.txt',
[ [
'https://ublockorigin.pages.dev/filters/privacy.min.txt' 'https://ublockorigin.pages.dev/filters/privacy.min.txt'
] ],
TTL.THREE_HOURS()
], ],
// uBlock Origin Resource Abuse: merged in uBlock Origin Privacy List // uBlock Origin Resource Abuse: merged in uBlock Origin Privacy List
// [ // [
@ -114,29 +117,32 @@ export const ADGUARD_FILTERS = [
'https://ublockorigin.github.io/uAssetsCDN/filters/unbreak.min.txt', 'https://ublockorigin.github.io/uAssetsCDN/filters/unbreak.min.txt',
[ [
'https://ublockorigin.pages.dev/filters/unbreak.min.txt' 'https://ublockorigin.pages.dev/filters/unbreak.min.txt'
] ],
TTL.THREE_HOURS()
], ],
// AdGuard Base Filter // AdGuard Base Filter
'https://filters.adtidy.org/extension/ublock/filters/2_without_easylist.txt', ['https://filters.adtidy.org/extension/ublock/filters/2_without_easylist.txt', null, TTL.THREE_HOURS()],
// AdGuard Mobile AD // AdGuard Mobile AD
'https://filters.adtidy.org/extension/ublock/filters/11_optimized.txt', ['https://filters.adtidy.org/extension/ublock/filters/11_optimized.txt', null, TTL.THREE_HOURS()],
// AdGuard Tracking Protection // AdGuard Tracking Protection
'https://filters.adtidy.org/extension/ublock/filters/3_optimized.txt', ['https://filters.adtidy.org/extension/ublock/filters/3_optimized.txt', null, TTL.THREE_HOURS()],
// AdGuard Japanese filter // AdGuard Japanese filter
'https://filters.adtidy.org/extension/ublock/filters/7_optimized.txt', ['https://filters.adtidy.org/extension/ublock/filters/7_optimized.txt', null, TTL.THREE_HOURS()],
// AdGuard Chinese filter (EasyList China + AdGuard Chinese filter) // AdGuard Chinese filter (EasyList China + AdGuard Chinese filter)
'https://filters.adtidy.org/extension/ublock/filters/224_optimized.txt', ['https://filters.adtidy.org/extension/ublock/filters/224_optimized.txt', null, TTL.THREE_HOURS()],
// AdGuard Annoyances filter // AdGuard Annoyances filter
'https://filters.adtidy.org/android/filters/14_optimized.txt', ['https://filters.adtidy.org/android/filters/14_optimized.txt', null, TTL.THREE_HOURS()],
// EasyList Germany filter // EasyList Germany filter
[ [
'https://easylist.to/easylistgermany/easylistgermany.txt', 'https://easylist.to/easylistgermany/easylistgermany.txt',
[ [
'https://easylist-downloads.adblockplus.org/easylistgermany.txt' 'https://easylist-downloads.adblockplus.org/easylistgermany.txt'
] ],
TTL.TWLVE_HOURS()
], ],
// GameConsoleAdblockList // GameConsoleAdblockList
'https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt', // Update almost once per 1 to 3 months, let's set a 10 days cache ttl
['https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt', null, TTL.TEN_DAYS()],
// PiHoleBlocklist // PiHoleBlocklist
// Update almost once per 3 months, let's set a 10 days cache ttl // Update almost once per 3 months, let's set a 10 days cache ttl
[ [