diff --git a/Build/build-speedtest-domainset.ts b/Build/build-speedtest-domainset.ts index f0d45e62..1b0091ab 100644 --- a/Build/build-speedtest-domainset.ts +++ b/Build/build-speedtest-domainset.ts @@ -6,9 +6,11 @@ import domainSorter from './lib/stable-sort-domain'; import { Sema } from 'async-sema'; import * as tldts from 'tldts'; import { task } from './lib/trace-runner'; +import { fetchWithRetry } from './lib/fetch-retry'; + const s = new Sema(3); -const latestTopUserAgentsPromise = fetch('https://unpkg.com/top-user-agents@latest/index.json') +const latestTopUserAgentsPromise = fetchWithRetry('https://unpkg.com/top-user-agents@latest/index.json') .then(res => res.json() as Promise); const querySpeedtestApi = async (keyword: string): Promise<(string | null)[]> => { @@ -23,7 +25,7 @@ const querySpeedtestApi = async (keyword: string): Promise<(string | null)[]> => const key = `fetch speedtest endpoints: ${keyword}`; console.time(key); - const res = await fetch(`https://www.speedtest.net/api/js/servers?engine=js&search=${keyword}&limit=100`, { + const res = await fetchWithRetry(`https://www.speedtest.net/api/js/servers?engine=js&search=${keyword}&limit=100`, { headers: { dnt: '1', Referer: 'https://www.speedtest.net/', diff --git a/Build/build-telegram-cidr.ts b/Build/build-telegram-cidr.ts index 191084e8..f409fed7 100644 --- a/Build/build-telegram-cidr.ts +++ b/Build/build-telegram-cidr.ts @@ -1,5 +1,5 @@ // @ts-check -import { fetchWithRetry } from './lib/fetch-retry'; +import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry'; import { createReadlineInterfaceFromResponse } from './lib/fetch-remote-text-by-line'; import path from 'path'; import { isIPv4, isIPv6 } from 'net'; @@ -9,7 +9,7 @@ import { task } from './lib/trace-runner'; export const buildTelegramCIDR = task(__filename, async () => { /** @type {Response} */ - const resp = await fetchWithRetry('https://core.telegram.org/resources/cidr.txt'); + const resp = await fetchWithRetry('https://core.telegram.org/resources/cidr.txt', defaultRequestInit); const lastModified = resp.headers.get('last-modified'); const date = lastModified ? new Date(lastModified) : new Date(); diff --git a/Build/download-previous-build.ts b/Build/download-previous-build.ts index 7369d057..87954950 100644 --- a/Build/download-previous-build.ts +++ b/Build/download-previous-build.ts @@ -8,6 +8,7 @@ import { pipeline } from 'stream/promises'; import { readFileByLine } from './lib/fetch-remote-text-by-line'; import { isCI } from 'ci-info'; import { task, traceAsync } from './lib/trace-runner'; +import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry'; const IS_READING_BUILD_OUTPUT = 1 << 2; const ALL_FILES_EXISTS = 1 << 3; @@ -51,7 +52,7 @@ export const downloadPreviousBuild = task(__filename, async () => { await traceAsync( 'Download and extract previous build', () => Promise.all([ - fetch('https://codeload.github.com/sukkalab/ruleset.skk.moe/tar.gz/master'), + fetchWithRetry('https://codeload.github.com/sukkalab/ruleset.skk.moe/tar.gz/master', defaultRequestInit), fsp.mkdir(extractedPath, { recursive: true }) ]).then(([resp]) => pipeline( Readable.fromWeb(resp.body!), @@ -82,7 +83,7 @@ export const downloadPublicSuffixList = task(__filename, async () => { const publicSuffixPath = path.join(publicSuffixDir, 'public_suffix_list_dat.txt'); const [resp] = await Promise.all([ - fetch('https://publicsuffix.org/list/public_suffix_list.dat'), + fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat', defaultRequestInit), fsp.mkdir(publicSuffixDir, { recursive: true }) ]); diff --git a/Build/lib/fetch-remote-text-by-line.ts b/Build/lib/fetch-remote-text-by-line.ts index 2c5225ee..a9b8a317 100644 --- a/Build/lib/fetch-remote-text-by-line.ts +++ b/Build/lib/fetch-remote-text-by-line.ts @@ -1,5 +1,5 @@ import type { BunFile } from 'bun'; -import { fetchWithRetry } from './fetch-retry'; +import { fetchWithRetry, defaultRequestInit } from './fetch-retry'; const decoder = new TextDecoder('utf-8'); @@ -56,6 +56,6 @@ export async function* createReadlineInterfaceFromResponse(resp: Response): Asyn } } -export function fetchRemoteTextAndCreateReadlineInterface(url: string | URL, opt?: RequestInit): Promise> { - return fetchWithRetry(url, opt).then(res => createReadlineInterfaceFromResponse(res)); +export function fetchRemoteTextAndCreateReadlineInterface(url: string | URL): Promise> { + return fetchWithRetry(url, defaultRequestInit).then(res => createReadlineInterfaceFromResponse(res)); } diff --git a/Build/lib/fetch-retry.ts b/Build/lib/fetch-retry.ts index 1bba35ea..0f8cb2a2 100644 --- a/Build/lib/fetch-retry.ts +++ b/Build/lib/fetch-retry.ts @@ -1,4 +1,10 @@ // @ts-expect-error -- missing types import createFetchRetry from '@vercel/fetch-retry'; +export const defaultRequestInit: RequestInit = { + headers: { + 'User-Agent': 'curl/8.1.2 (https://github.com/SukkaW/Surge)' + } +} + export const fetchWithRetry: typeof fetch = createFetchRetry(fetch); diff --git a/Build/lib/get-gorhill-publicsuffix.ts b/Build/lib/get-gorhill-publicsuffix.ts index fa0058ce..d2e64658 100644 --- a/Build/lib/get-gorhill-publicsuffix.ts +++ b/Build/lib/get-gorhill-publicsuffix.ts @@ -1,6 +1,7 @@ import { toASCII } from 'punycode'; import path from 'path'; import { traceAsync } from './trace-runner'; +import { defaultRequestInit, fetchWithRetry } from './fetch-retry'; import type { PublicSuffixList } from 'gorhill-publicsuffixlist'; const publicSuffixPath = path.resolve(__dirname, '../../node_modules/.cache/public_suffix_list_dat.txt'); @@ -13,7 +14,7 @@ const getGorhillPublicSuffix = () => traceAsync('create gorhill public suffix in const [publicSuffixListDat, { default: gorhill }] = await Promise.all([ await publicSuffixFile.exists() ? publicSuffixFile.text() - : fetch('https://publicsuffix.org/list/public_suffix_list.dat').then(r => { + : fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat', defaultRequestInit).then(r => { console.log('public_suffix_list.dat not found, fetch directly from remote.'); return r.text(); }), diff --git a/Build/lib/parse-filter.ts b/Build/lib/parse-filter.ts index 98df18d0..04f5a7f5 100644 --- a/Build/lib/parse-filter.ts +++ b/Build/lib/parse-filter.ts @@ -1,5 +1,5 @@ // @ts-check -import { fetchWithRetry } from './fetch-retry'; +import { defaultRequestInit, fetchWithRetry } from './fetch-retry'; import * as tldts from './cached-tld-parse'; import { fetchRemoteTextAndCreateReadlineInterface } from './fetch-remote-text-by-line'; import { NetworkFilter } from '@cliqz/adblocker'; @@ -198,7 +198,7 @@ export async function processFilterRules( filterRules = ( await Promise.any( [filterRulesUrl, ...(fallbackUrls || [])].map(async url => { - const r = await fetchWithRetry(url, { signal: controller.signal }); + const r = await fetchWithRetry(url, { signal: controller.signal, ...defaultRequestInit }); const text = await r.text(); controller.abort(); diff --git a/Build/lib/reject-data-source.ts b/Build/lib/reject-data-source.ts index 24abbcc3..3785db01 100644 --- a/Build/lib/reject-data-source.ts +++ b/Build/lib/reject-data-source.ts @@ -106,9 +106,10 @@ export const ADGUARD_FILTERS = [ [ 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt', [ - 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt' + 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt', // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while - // 'https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-agh-online.txt' + // 'https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-agh-online.txt', + 'https://ublockorigin.github.io/uAssets/thirdparties/urlhaus-filter/urlhaus-filter-online.txt', ] ], // Curben's Phishing URL Blocklist