diff --git a/Build/build-cdn-conf.ts b/Build/build-cdn-conf.ts index ca719323..a4174a48 100644 --- a/Build/build-cdn-conf.ts +++ b/Build/build-cdn-conf.ts @@ -7,10 +7,7 @@ import { processLine } from './lib/process-line'; import { SHARED_DESCRIPTION } from './lib/constants'; import { getPublicSuffixListTextPromise } from './download-publicsuffixlist'; const getS3OSSDomains = async (): Promise> => { - const trie = createTrie(); - for await (const line of (await getPublicSuffixListTextPromise()).split('\n')) { - trie.add(line); - } + const trie = createTrie((await getPublicSuffixListTextPromise()).split('\n')); /** * Extract OSS domain from publicsuffix list diff --git a/Build/lib/cache-filesystem.ts b/Build/lib/cache-filesystem.ts index d886cc5e..1686d364 100644 --- a/Build/lib/cache-filesystem.ts +++ b/Build/lib/cache-filesystem.ts @@ -136,7 +136,9 @@ const randomInt = (min: number, max: number) => Math.floor(Math.random() * (max // Add some randomness to the cache ttl to avoid thundering herd export const TTL = { + THREE_HOURS: () => randomInt(2, 4) * 60 * 60 * 1000, TWLVE_HOURS: () => randomInt(9, 14) * 60 * 60 * 1000, + ONE_DAY: () => randomInt(23, 25) * 60 * 60 * 1000, THREE_DAYS: () => randomInt(2, 4) * 24 * 60 * 60 * 1000, ONE_WEEK: () => randomInt(5, 8) * 24 * 60 * 60 * 1000, TWO_WEEKS: () => randomInt(12, 16) * 24 * 60 * 60 * 1000, diff --git a/Build/lib/get-gorhill-publicsuffix.ts b/Build/lib/get-gorhill-publicsuffix.ts index f3765590..a49d5d22 100644 --- a/Build/lib/get-gorhill-publicsuffix.ts +++ b/Build/lib/get-gorhill-publicsuffix.ts @@ -3,9 +3,9 @@ import { traceAsync } from './trace-runner'; import { createMemoizedPromise } from './memo-promise'; import { getPublicSuffixListTextPromise } from '../download-publicsuffixlist'; -export const getGorhillPublicSuffixPromise = createMemoizedPromise(() => traceAsync('create gorhill public suffix instance', async () => { - const customFetch = (url: string | URL) => Promise.resolve(Bun.file(url)); +const customFetch = (url: string | URL) => Promise.resolve(Bun.file(url)); +export const getGorhillPublicSuffixPromise = createMemoizedPromise(() => traceAsync('create gorhill public suffix instance', async () => { const [publicSuffixListDat, { default: gorhill }] = await Promise.all([ getPublicSuffixListTextPromise(), import('@gorhill/publicsuffixlist') diff --git a/Build/lib/parse-filter.ts b/Build/lib/parse-filter.ts index 9035f1b2..68e9c621 100644 --- a/Build/lib/parse-filter.ts +++ b/Build/lib/parse-filter.ts @@ -173,6 +173,9 @@ export async function processFilterRules( lineCb(line); } } else { + // Avoid event loop starvation, so we wait for a macrotask before we start fetching. + await Promise.resolve(); + const filterRules = (await traceAsync( picocolors.gray(`- download ${filterRulesUrl}`), () => fetchAssets(filterRulesUrl, fallbackUrls), @@ -191,7 +194,7 @@ export async function processFilterRules( Array.from(whitelistDomainSets), Array.from(blacklistDomainSets), warningMessages - ]; + ] as const; }, { ttl, diff --git a/Build/lib/reject-data-source.ts b/Build/lib/reject-data-source.ts index 87f27849..13a052ad 100644 --- a/Build/lib/reject-data-source.ts +++ b/Build/lib/reject-data-source.ts @@ -1,8 +1,8 @@ import { TTL } from './cache-filesystem'; export const HOSTS = [ - ['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true], - ['https://someonewhocares.org/hosts/hosts', true], + ['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true, false, TTL.THREE_HOURS()], + ['https://someonewhocares.org/hosts/hosts', true, false, TTL.THREE_HOURS()], // no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', false, false, TTL.THREE_DAYS()], // have not been updated for more than a year, so we set a 14 days cache ttl @@ -11,14 +11,11 @@ export const HOSTS = [ ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', false, false, TTL.THREE_DAYS()], // ad-wars is not actively maintained, so we set a 7 days cache ttl ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false, false, TTL.ONE_WEEK()], - ['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true], - // CoinBlockerList - // Although the hosts file is still actively maintained, the hosts_browser file is not updated since 2021-07, so we set a 14 days cache ttl - ['https://zerodot1.gitlab.io/CoinBlockerLists/hosts_browser', true, true, TTL.TWO_WEEKS()], + ['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true, false, TTL.THREE_HOURS()], // Curben's UrlHaus Malicious URL Blocklist // 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt', // 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt', - ['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, true], + ['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, true, TTL.THREE_HOURS()], // Curben's Phishing URL Blocklist // Covered by lib/get-phishing-domains.ts // 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt' @@ -32,12 +29,15 @@ export const HOSTS = [ ] as const; export const DOMAIN_LISTS = [ + // CoinBlockerList + // Although the hosts file is still actively maintained, the hosts_browser file is not updated since 2021-07, so we set a 14 days cache ttl + ['https://zerodot1.gitlab.io/CoinBlockerLists/list_browser.txt', true, TTL.TWO_WEEKS()], // BarbBlock // The barbblock list has never been updated since 2019-05, so we set a 14 days cache ttl ['https://paulgb.github.io/BarbBlock/blacklists/domain-list.txt', true, TTL.TWO_WEEKS()], // DigitalSide Threat-Intel - OSINT Hub // Update once per day - ['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true, 24 * 60 * 60 * 1000], + ['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true, TTL.ONE_DAY()], // AdGuard CNAME Filter Combined // Update on a 7 days basis, so we add a 3 hours cache ttl ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads_justdomains.txt', true, TTL.THREE_DAYS()], @@ -52,11 +52,11 @@ export const ADGUARD_FILTERS = [ [ 'https://easylist.to/easylist/easylist.txt', [ + 'https://ublockorigin.github.io/uAssetsCDN/thirdparties/easylist.txt', + 'https://ublockorigin.pages.dev/thirdparties/easylist.txt', 'https://easylist-downloads.adblockplus.org/easylist.txt', 'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easylist.txt', - 'https://secure.fanboy.co.nz/easylist.txt', - 'https://ublockorigin.github.io/uAssetsCDN/thirdparties/easylist.txt', - 'https://ublockorigin.pages.dev/thirdparties/easylist.txt' + 'https://secure.fanboy.co.nz/easylist.txt' ], TTL.TWLVE_HOURS() ], @@ -86,21 +86,24 @@ export const ADGUARD_FILTERS = [ 'https://ublockorigin.github.io/uAssetsCDN/filters/filters.min.txt', [ 'https://ublockorigin.pages.dev/filters/filters.min.txt' - ] + ], + TTL.THREE_HOURS() ], // uBlock Origin Badware Risk List [ 'https://ublockorigin.github.io/uAssetsCDN/filters/badware.min.txt', [ 'https://ublockorigin.pages.dev/filters/badware.min.txt' - ] + ], + TTL.THREE_HOURS() ], // uBlock Origin Privacy List [ 'https://ublockorigin.github.io/uAssetsCDN/filters/privacy.min.txt', [ 'https://ublockorigin.pages.dev/filters/privacy.min.txt' - ] + ], + TTL.THREE_HOURS() ], // uBlock Origin Resource Abuse: merged in uBlock Origin Privacy List // [ @@ -114,29 +117,32 @@ export const ADGUARD_FILTERS = [ 'https://ublockorigin.github.io/uAssetsCDN/filters/unbreak.min.txt', [ 'https://ublockorigin.pages.dev/filters/unbreak.min.txt' - ] + ], + TTL.THREE_HOURS() ], // AdGuard Base Filter - 'https://filters.adtidy.org/extension/ublock/filters/2_without_easylist.txt', + ['https://filters.adtidy.org/extension/ublock/filters/2_without_easylist.txt', null, TTL.THREE_HOURS()], // AdGuard Mobile AD - 'https://filters.adtidy.org/extension/ublock/filters/11_optimized.txt', + ['https://filters.adtidy.org/extension/ublock/filters/11_optimized.txt', null, TTL.THREE_HOURS()], // AdGuard Tracking Protection - 'https://filters.adtidy.org/extension/ublock/filters/3_optimized.txt', + ['https://filters.adtidy.org/extension/ublock/filters/3_optimized.txt', null, TTL.THREE_HOURS()], // AdGuard Japanese filter - 'https://filters.adtidy.org/extension/ublock/filters/7_optimized.txt', + ['https://filters.adtidy.org/extension/ublock/filters/7_optimized.txt', null, TTL.THREE_HOURS()], // AdGuard Chinese filter (EasyList China + AdGuard Chinese filter) - 'https://filters.adtidy.org/extension/ublock/filters/224_optimized.txt', + ['https://filters.adtidy.org/extension/ublock/filters/224_optimized.txt', null, TTL.THREE_HOURS()], // AdGuard Annoyances filter - 'https://filters.adtidy.org/android/filters/14_optimized.txt', + ['https://filters.adtidy.org/android/filters/14_optimized.txt', null, TTL.THREE_HOURS()], // EasyList Germany filter [ 'https://easylist.to/easylistgermany/easylistgermany.txt', [ 'https://easylist-downloads.adblockplus.org/easylistgermany.txt' - ] + ], + TTL.TWLVE_HOURS() ], // GameConsoleAdblockList - 'https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt', + // Update almost once per 1 to 3 months, let's set a 10 days cache ttl + ['https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt', null, TTL.TEN_DAYS()], // PiHoleBlocklist // Update almost once per 3 months, let's set a 10 days cache ttl [