Perf: add more cache

This commit is contained in:
SukkaW 2023-12-31 21:05:59 +08:00
parent 702b9d9596
commit 8c150e87c2
5 changed files with 38 additions and 30 deletions

View File

@ -7,10 +7,7 @@ import { processLine } from './lib/process-line';
import { SHARED_DESCRIPTION } from './lib/constants';
import { getPublicSuffixListTextPromise } from './download-publicsuffixlist';
const getS3OSSDomains = async (): Promise<Set<string>> => {
const trie = createTrie();
for await (const line of (await getPublicSuffixListTextPromise()).split('\n')) {
trie.add(line);
}
const trie = createTrie((await getPublicSuffixListTextPromise()).split('\n'));
/**
* Extract OSS domain from publicsuffix list

View File

@ -136,7 +136,9 @@ const randomInt = (min: number, max: number) => Math.floor(Math.random() * (max
// Add some randomness to the cache ttl to avoid thundering herd
export const TTL = {
THREE_HOURS: () => randomInt(2, 4) * 60 * 60 * 1000,
TWLVE_HOURS: () => randomInt(9, 14) * 60 * 60 * 1000,
ONE_DAY: () => randomInt(23, 25) * 60 * 60 * 1000,
THREE_DAYS: () => randomInt(2, 4) * 24 * 60 * 60 * 1000,
ONE_WEEK: () => randomInt(5, 8) * 24 * 60 * 60 * 1000,
TWO_WEEKS: () => randomInt(12, 16) * 24 * 60 * 60 * 1000,

View File

@ -3,9 +3,9 @@ import { traceAsync } from './trace-runner';
import { createMemoizedPromise } from './memo-promise';
import { getPublicSuffixListTextPromise } from '../download-publicsuffixlist';
export const getGorhillPublicSuffixPromise = createMemoizedPromise(() => traceAsync('create gorhill public suffix instance', async () => {
const customFetch = (url: string | URL) => Promise.resolve(Bun.file(url));
export const getGorhillPublicSuffixPromise = createMemoizedPromise(() => traceAsync('create gorhill public suffix instance', async () => {
const [publicSuffixListDat, { default: gorhill }] = await Promise.all([
getPublicSuffixListTextPromise(),
import('@gorhill/publicsuffixlist')

View File

@ -173,6 +173,9 @@ export async function processFilterRules(
lineCb(line);
}
} else {
// Avoid event loop starvation, so we wait for a macrotask before we start fetching.
await Promise.resolve();
const filterRules = (await traceAsync(
picocolors.gray(`- download ${filterRulesUrl}`),
() => fetchAssets(filterRulesUrl, fallbackUrls),
@ -191,7 +194,7 @@ export async function processFilterRules(
Array.from(whitelistDomainSets),
Array.from(blacklistDomainSets),
warningMessages
];
] as const;
},
{
ttl,

View File

@ -1,8 +1,8 @@
import { TTL } from './cache-filesystem';
export const HOSTS = [
['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true],
['https://someonewhocares.org/hosts/hosts', true],
['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true, false, TTL.THREE_HOURS()],
['https://someonewhocares.org/hosts/hosts', true, false, TTL.THREE_HOURS()],
// no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', false, false, TTL.THREE_DAYS()],
// have not been updated for more than a year, so we set a 14 days cache ttl
@ -11,14 +11,11 @@ export const HOSTS = [
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', false, false, TTL.THREE_DAYS()],
// ad-wars is not actively maintained, so we set a 7 days cache ttl
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false, false, TTL.ONE_WEEK()],
['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true],
// CoinBlockerList
// Although the hosts file is still actively maintained, the hosts_browser file is not updated since 2021-07, so we set a 14 days cache ttl
['https://zerodot1.gitlab.io/CoinBlockerLists/hosts_browser', true, true, TTL.TWO_WEEKS()],
['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true, false, TTL.THREE_HOURS()],
// Curben's UrlHaus Malicious URL Blocklist
// 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
// 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, true],
['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, true, TTL.THREE_HOURS()],
// Curben's Phishing URL Blocklist
// Covered by lib/get-phishing-domains.ts
// 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt'
@ -32,12 +29,15 @@ export const HOSTS = [
] as const;
export const DOMAIN_LISTS = [
// CoinBlockerList
// Although the hosts file is still actively maintained, the hosts_browser file is not updated since 2021-07, so we set a 14 days cache ttl
['https://zerodot1.gitlab.io/CoinBlockerLists/list_browser.txt', true, TTL.TWO_WEEKS()],
// BarbBlock
// The barbblock list has never been updated since 2019-05, so we set a 14 days cache ttl
['https://paulgb.github.io/BarbBlock/blacklists/domain-list.txt', true, TTL.TWO_WEEKS()],
// DigitalSide Threat-Intel - OSINT Hub
// Update once per day
['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true, 24 * 60 * 60 * 1000],
['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true, TTL.ONE_DAY()],
// AdGuard CNAME Filter Combined
// Update on a 7 days basis, so we add a 3 hours cache ttl
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads_justdomains.txt', true, TTL.THREE_DAYS()],
@ -52,11 +52,11 @@ export const ADGUARD_FILTERS = [
[
'https://easylist.to/easylist/easylist.txt',
[
'https://ublockorigin.github.io/uAssetsCDN/thirdparties/easylist.txt',
'https://ublockorigin.pages.dev/thirdparties/easylist.txt',
'https://easylist-downloads.adblockplus.org/easylist.txt',
'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easylist.txt',
'https://secure.fanboy.co.nz/easylist.txt',
'https://ublockorigin.github.io/uAssetsCDN/thirdparties/easylist.txt',
'https://ublockorigin.pages.dev/thirdparties/easylist.txt'
'https://secure.fanboy.co.nz/easylist.txt'
],
TTL.TWLVE_HOURS()
],
@ -86,21 +86,24 @@ export const ADGUARD_FILTERS = [
'https://ublockorigin.github.io/uAssetsCDN/filters/filters.min.txt',
[
'https://ublockorigin.pages.dev/filters/filters.min.txt'
]
],
TTL.THREE_HOURS()
],
// uBlock Origin Badware Risk List
[
'https://ublockorigin.github.io/uAssetsCDN/filters/badware.min.txt',
[
'https://ublockorigin.pages.dev/filters/badware.min.txt'
]
],
TTL.THREE_HOURS()
],
// uBlock Origin Privacy List
[
'https://ublockorigin.github.io/uAssetsCDN/filters/privacy.min.txt',
[
'https://ublockorigin.pages.dev/filters/privacy.min.txt'
]
],
TTL.THREE_HOURS()
],
// uBlock Origin Resource Abuse: merged in uBlock Origin Privacy List
// [
@ -114,29 +117,32 @@ export const ADGUARD_FILTERS = [
'https://ublockorigin.github.io/uAssetsCDN/filters/unbreak.min.txt',
[
'https://ublockorigin.pages.dev/filters/unbreak.min.txt'
]
],
TTL.THREE_HOURS()
],
// AdGuard Base Filter
'https://filters.adtidy.org/extension/ublock/filters/2_without_easylist.txt',
['https://filters.adtidy.org/extension/ublock/filters/2_without_easylist.txt', null, TTL.THREE_HOURS()],
// AdGuard Mobile AD
'https://filters.adtidy.org/extension/ublock/filters/11_optimized.txt',
['https://filters.adtidy.org/extension/ublock/filters/11_optimized.txt', null, TTL.THREE_HOURS()],
// AdGuard Tracking Protection
'https://filters.adtidy.org/extension/ublock/filters/3_optimized.txt',
['https://filters.adtidy.org/extension/ublock/filters/3_optimized.txt', null, TTL.THREE_HOURS()],
// AdGuard Japanese filter
'https://filters.adtidy.org/extension/ublock/filters/7_optimized.txt',
['https://filters.adtidy.org/extension/ublock/filters/7_optimized.txt', null, TTL.THREE_HOURS()],
// AdGuard Chinese filter (EasyList China + AdGuard Chinese filter)
'https://filters.adtidy.org/extension/ublock/filters/224_optimized.txt',
['https://filters.adtidy.org/extension/ublock/filters/224_optimized.txt', null, TTL.THREE_HOURS()],
// AdGuard Annoyances filter
'https://filters.adtidy.org/android/filters/14_optimized.txt',
['https://filters.adtidy.org/android/filters/14_optimized.txt', null, TTL.THREE_HOURS()],
// EasyList Germany filter
[
'https://easylist.to/easylistgermany/easylistgermany.txt',
[
'https://easylist-downloads.adblockplus.org/easylistgermany.txt'
]
],
TTL.TWLVE_HOURS()
],
// GameConsoleAdblockList
'https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt',
// Update almost once per 1 to 3 months, let's set a 10 days cache ttl
['https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt', null, TTL.TEN_DAYS()],
// PiHoleBlocklist
// Update almost once per 3 months, let's set a 10 days cache ttl
[