diff --git a/Build/build-internal-cdn-rules.ts b/Build/build-internal-cdn-rules.ts index a6c2a408..da64319f 100644 --- a/Build/build-internal-cdn-rules.ts +++ b/Build/build-internal-cdn-rules.ts @@ -58,7 +58,7 @@ export const buildInternalCDNDomains = task(import.meta.path, async () => { } }; - const [gorhill] = await Promise.all([ + const gorhill = (await Promise.all([ getGorhillPublicSuffixPromise(), processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/cdn.conf')), processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global.conf')), @@ -70,7 +70,7 @@ export const buildInternalCDNDomains = task(import.meta.path, async () => { processLocalDomainSet(path.resolve(import.meta.dir, '../List/domainset/download.conf')), fsp.mkdir(path.resolve(import.meta.dir, '../List/internal'), { recursive: true }) - ]); + ]))[0]; return compareAndWriteFile( [ diff --git a/Build/build-internal-chn-domains.ts b/Build/build-internal-chn-domains.ts index 821c1bd6..1bc1aaa0 100644 --- a/Build/build-internal-chn-domains.ts +++ b/Build/build-internal-chn-domains.ts @@ -5,10 +5,10 @@ import { task } from './lib/trace-runner'; import { compareAndWriteFile } from './lib/create-file'; export const buildInternalChnDomains = task(import.meta.path, async () => { - const [result] = await Promise.all([ + const result = (await Promise.all([ parseFelixDnsmasq('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf'), fsp.mkdir(path.resolve(import.meta.dir, '../List/internal'), { recursive: true }) - ]); + ]))[0]; return compareAndWriteFile( result.map(line => `SUFFIX,${line}`), diff --git a/Build/build-internal-reverse-chn-cidr.ts b/Build/build-internal-reverse-chn-cidr.ts index c8081db9..5d9213b0 100644 --- a/Build/build-internal-reverse-chn-cidr.ts +++ b/Build/build-internal-reverse-chn-cidr.ts @@ -25,10 +25,10 @@ const RESERVED_IPV4_CIDR = [ ]; export const buildInternalReverseChnCIDR = task(import.meta.path, async () => { - const [cidr] = await Promise.all([ + const cidr = (await Promise.all([ processLineFromReadline(await fetchRemoteTextAndReadByLine('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')), fsp.mkdir(path.resolve(import.meta.dir, '../List/internal'), { recursive: true }) - ]); + ]))[0]; const reversedCidr = exclude( [ diff --git a/Build/build-speedtest-domainset.ts b/Build/build-speedtest-domainset.ts index dfa4dcb4..b8aebfe6 100644 --- a/Build/build-speedtest-domainset.ts +++ b/Build/build-speedtest-domainset.ts @@ -16,10 +16,10 @@ const latestTopUserAgentsPromise = fetchWithRetry('https://unpkg.com/top-user-ag .then(res => res.json() as Promise); const querySpeedtestApi = async (keyword: string): Promise> => { - const [topUserAgents] = await Promise.all([ + const topUserAgents = (await Promise.all([ latestTopUserAgentsPromise, s.acquire() - ]); + ]))[0]; const randomUserAgent = topUserAgents[Math.floor(Math.random() * topUserAgents.length)]; diff --git a/Build/download-previous-build.ts b/Build/download-previous-build.ts index 436f079c..592b9b0f 100644 --- a/Build/download-previous-build.ts +++ b/Build/download-previous-build.ts @@ -53,10 +53,10 @@ export const downloadPreviousBuild = task(import.meta.path, async () => { await traceAsync( 'Download and extract previous build', async () => { - const [resp] = await Promise.all([ + const resp = (await Promise.all([ fetchWithRetry('https://codeload.github.com/sukkalab/ruleset.skk.moe/tar.gz/master', defaultRequestInit), fsp.mkdir(extractedPath, { recursive: true }) - ]); + ]))[0]; const extract = tarStream.extract(); Readable.fromWeb(resp.body!).pipe(zlib.createGunzip()).pipe(extract); @@ -88,10 +88,10 @@ export const downloadPublicSuffixList = task(import.meta.path, async () => { const publicSuffixDir = path.resolve(import.meta.dir, '../node_modules/.cache'); const publicSuffixPath = path.join(publicSuffixDir, 'public_suffix_list_dat.txt'); - const [resp] = await Promise.all([ + const resp = (await Promise.all([ fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat', defaultRequestInit), fsp.mkdir(publicSuffixDir, { recursive: true }) - ]); + ]))[0]; return Bun.write(publicSuffixPath, resp as Response); }, 'download-publicsuffixlist'); diff --git a/Build/lib/cached-tld-parse.ts b/Build/lib/cached-tld-parse.ts index b5ced6de..73ccfeae 100644 --- a/Build/lib/cached-tld-parse.ts +++ b/Build/lib/cached-tld-parse.ts @@ -1,6 +1,6 @@ import * as tldts from 'tldts'; import { createCache } from './cache-apply'; -import type { PublicSuffixList } from 'gorhill-publicsuffixlist'; +import type { PublicSuffixList } from '@gorhill/publicsuffixlist'; const cache = createCache('cached-tld-parse', true); diff --git a/Build/lib/fetch-assets.ts b/Build/lib/fetch-assets.ts new file mode 100644 index 00000000..775bd3bc --- /dev/null +++ b/Build/lib/fetch-assets.ts @@ -0,0 +1,61 @@ +import picocolors from 'picocolors'; +import { defaultRequestInit, fetchWithRetry } from './fetch-retry'; + +class CustomAbortError extends Error { + public readonly name = 'AbortError'; + public readonly digest = 'AbortError'; +} + +const sleepWithAbort = (ms: number, signal: AbortSignal) => new Promise((resolve, reject) => { + signal.throwIfAborted(); + signal.addEventListener('abort', stop); + Bun.sleep(ms).then(done).catch(doReject); + + function done() { + signal.removeEventListener('abort', stop); + resolve(); + } + function stop(this: AbortSignal) { + reject(this.reason); + } + function doReject(reason: unknown) { + signal.removeEventListener('abort', stop); + reject(reason); + } +}); + +export async function fetchAssets(url: string, fallbackUrls: string[] | readonly string[]) { + const controller = new AbortController(); + + const fetchMainPromise = fetchWithRetry(url, { signal: controller.signal, ...defaultRequestInit }) + .then(r => r.text()) + .then(text => { + controller.abort(); + return text; + }); + const createFetchFallbackPromise = async (url: string, index: number) => { + // Most assets can be downloaded within 250ms. To avoid wasting bandwidth, we will wait for 500ms before downloading from the fallback URL. + try { + await sleepWithAbort(500 + (index + 1) * 20, controller.signal); + } catch { + console.log(picocolors.gray('[fetch cancelled early]'), picocolors.gray(url)); + throw new CustomAbortError(); + } + if (controller.signal.aborted) { + console.log(picocolors.gray('[fetch cancelled]'), picocolors.gray(url)); + throw new CustomAbortError(); + } + const res = await fetchWithRetry(url, { signal: controller.signal, ...defaultRequestInit }); + const text = await res.text(); + controller.abort(); + return text; + }; + + return Promise.any([ + fetchMainPromise, + ...fallbackUrls.map(createFetchFallbackPromise) + ]).catch(e => { + console.log(`Download Rule for [${url}] failed`); + throw e; + }); +} diff --git a/Build/lib/get-gorhill-publicsuffix.ts b/Build/lib/get-gorhill-publicsuffix.ts index 0d74af1f..df010aa9 100644 --- a/Build/lib/get-gorhill-publicsuffix.ts +++ b/Build/lib/get-gorhill-publicsuffix.ts @@ -2,7 +2,7 @@ import { toASCII } from 'punycode'; import path from 'path'; import { traceAsync } from './trace-runner'; import { defaultRequestInit, fetchWithRetry } from './fetch-retry'; -import type { PublicSuffixList } from 'gorhill-publicsuffixlist'; +import type { PublicSuffixList } from '@gorhill/publicsuffixlist'; const publicSuffixPath = path.resolve(import.meta.dir, '../../node_modules/.cache/public_suffix_list_dat.txt'); @@ -18,7 +18,7 @@ const getGorhillPublicSuffix = () => traceAsync('create gorhill public suffix in console.log('public_suffix_list.dat not found, fetch directly from remote.'); return r.text(); }), - import('gorhill-publicsuffixlist') + import('@gorhill/publicsuffixlist') ]); gorhill.parse(publicSuffixListDat, toASCII); diff --git a/Build/lib/parse-filter.ts b/Build/lib/parse-filter.ts index f1e7d4b4..d6edd0f6 100644 --- a/Build/lib/parse-filter.ts +++ b/Build/lib/parse-filter.ts @@ -1,21 +1,20 @@ // @ts-check -import { defaultRequestInit, fetchWithRetry } from './fetch-retry'; - import { fetchRemoteTextAndReadByLine } from './fetch-text-by-line'; import { NetworkFilter } from '@cliqz/adblocker'; import { processLine } from './process-line'; import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix'; -import type { PublicSuffixList } from 'gorhill-publicsuffixlist'; +import type { PublicSuffixList } from '@gorhill/publicsuffixlist'; import { traceAsync } from './trace-runner'; import picocolors from 'picocolors'; import { normalizeDomain } from './normalize-domain'; +import { fetchAssets } from './fetch-assets'; const DEBUG_DOMAIN_TO_FIND: string | null = null; // example.com | null let foundDebugDomain = false; const warnOnceUrl = new Set(); -const warnOnce = (url: string, isWhite: boolean, ...message: any[]) => { +const warnOnce = (url: string, isWhite: boolean, ...message: string[]) => { const key = `${url}${isWhite ? 'white' : 'black'}`; if (warnOnceUrl.has(key)) { return; @@ -54,7 +53,7 @@ export function processHosts(hostsUrl: string, includeAllSubDomain = false, skip continue; } - const [, domain] = line.split(/\s/); + const domain = line.split(/\s/)[1]; if (!domain) { continue; } @@ -185,7 +184,9 @@ export async function processFilterRules( } const R_KNOWN_NOT_NETWORK_FILTER_PATTERN = /[#%&=~]/; -const R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2 = /(\$popup|\$removeparam|\$popunder)/; +const R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2 = /(\$popup|\$removeparam|\$popunder|\$cname)/; +// cname exceptional filter can not be parsed by NetworkFilter +// Surge / Clash can't handle CNAME either, so we just ignore them function parse($line: string, gorhill: PublicSuffixList): null | [hostname: string, flag: ParseType] { if ( @@ -213,15 +214,15 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri return null; } - const firstChar = line[0]; - const lastChar = line[len - 1]; + const firstCharCode = line[0].charCodeAt(0); + const lastCharCode = line[len - 1].charCodeAt(0); if ( - firstChar === '/' + firstCharCode === 47 // 47 `/` // ends with - || lastChar === '.' // || line.endsWith('.') - || lastChar === '-' // || line.endsWith('-') - || lastChar === '_' // || line.endsWith('_') + || lastCharCode === 46 // 46 `.`, line.endsWith('.') + || lastCharCode === 45 // 45 `-`, line.endsWith('-') + || lastCharCode === 95 // 95 `_`, line.endsWith('_') // special modifier || R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2.test(line) // || line.includes('$popup') @@ -238,6 +239,8 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri const filter = NetworkFilter.parse(line); if (filter) { if ( + // filter.isCosmeticFilter() // always false + // filter.isNetworkFilter() // always true filter.isElemHide() || filter.isGenericHide() || filter.isSpecificHide() @@ -253,8 +256,7 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri if ( filter.hostname // filter.hasHostname() // must have - && filter.isPlain() - // && (!filter.isRegex()) // isPlain() === !isRegex() + && filter.isPlain() // isPlain() === !isRegex() && (!filter.isFullRegex()) ) { const hostname = normalizeDomain(filter.hostname); @@ -286,95 +288,106 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri } } - /** - * abnormal filter that can not be parsed by NetworkFilter - */ + // After NetworkFilter.parse, it means the line can not be parsed by cliqz NetworkFilter + // We now need to "salvage" the line as much as possible + /* + * From now on, we are mostly facing non-standard domain rules (some are regex like) + * We first skip third-party and frame rules, as Surge / Clash can't handle them + * + * `.sharecounter.$third-party` + * `.bbelements.com^$third-party` + * `://o0e.ru^$third-party` + * `.1.1.1.l80.js^$third-party` + */ if (line.includes('$third-party') || line.includes('$frame')) { - /* - * `.bbelements.com^$third-party` - * `://o0e.ru^$third-party` - */ return null; } /** @example line.endsWith('^') */ - const linedEndsWithCaret = lastChar === '^'; + const linedEndsWithCaret = lastCharCode === 94; // lastChar === '^'; /** @example line.endsWith('^|') */ - const lineEndsWithCaretVerticalBar = lastChar === '|' && line[len - 2] === '^'; + const lineEndsWithCaretVerticalBar = (lastCharCode === 124 /** lastChar === '|' */) && line[len - 2] === '^'; /** @example line.endsWith('^') || line.endsWith('^|') */ const lineEndsWithCaretOrCaretVerticalBar = linedEndsWithCaret || lineEndsWithCaretVerticalBar; // whitelist (exception) - if (firstChar === '@' && line[1] === '@') { - /** - * cname exceptional filter can not be parsed by NetworkFilter - * - * `@@||m.faz.net^$cname` - * - * Surge / Clash can't handle CNAME either, so we just ignore them - */ - if (line.endsWith('$cname')) { - return null; - } - + if ( + firstCharCode === 64 // 64 `@` + && line[1] === '@' + ) { /** * Some "malformed" regex-based filters can not be parsed by NetworkFilter - * "$genericblock`" is also not supported by NetworkFilter + * "$genericblock`" is also not supported by NetworkFilter, see: + * https://github.com/ghostery/adblocker/blob/62caf7786ba10ef03beffecd8cd4eec111bcd5ec/packages/adblocker/test/parsing.test.ts#L950 * * `@@||cmechina.net^$genericblock` * `@@|ftp.bmp.ovh^|` * `@@|adsterra.com^|` + * `@@.atlassian.net$document` + * `@@||ad.alimama.com^$genericblock` */ - if ( - ( - // line.startsWith('@@|') - line[2] === '|' - // line.startsWith('@@.') - || line[2] === '.' - /** - * line.startsWith('@@://') - * - * `@@://googleadservices.com^|` - * `@@://www.googleadservices.com^|` - */ - || (line[2] === ':' && line[3] === '/' && line[4] === '/') - ) - && ( - lineEndsWithCaretOrCaretVerticalBar - || line.endsWith('$genericblock') - || line.endsWith('$document') - ) - ) { - const _domain = line - .replace('@@||', '') - .replace('@@://', '') - .replace('@@|', '') - .replace('@@.', '') - .replace('^|', '') - .replace('^$genericblock', '') - .replace('$genericblock', '') - .replace('^$document', '') - .replace('$document', '') - .replaceAll('^', '') - .trim(); - const domain = normalizeDomain(_domain); + let sliceStart = 0; + let sliceEnd: number | undefined; + + // line.startsWith('@@|') || line.startsWith('@@.') + if (line[2] === '|' || line[2] === '.') { + sliceStart = 3; + // line.startsWith('@@||') + if (line[3] === '|') { + sliceStart = 4; + } + } + + /** + * line.startsWith('@@://') + * + * `@@://googleadservices.com^|` + * `@@://www.googleadservices.com^|` + */ + if (line[2] === ':' && line[3] === '/' && line[4] === '/') { + sliceStart = 5; + } + + if (lineEndsWithCaretOrCaretVerticalBar) { + sliceEnd = -2; + } else if (line.endsWith('$genericblock')) { + sliceEnd = -13; + if (line[len - 14] === '^') { // line.endsWith('^$genericblock') + sliceEnd = -14; + } + } else if (line.endsWith('$document')) { + sliceEnd = -9; + if (line[len - 10] === '^') { // line.endsWith('^$document') + sliceEnd = -10; + } + } + + if (sliceStart !== 0 || sliceEnd !== undefined) { + const sliced = line.slice(sliceStart, sliceEnd); + const domain = normalizeDomain(sliced); if (domain) { return [domain, ParseType.WhiteIncludeSubdomain]; } - return [ - `[parse-filter E0001] (white) invalid domain: ${_domain}`, + `[parse-filter E0001] (white) invalid domain: ${JSON.stringify({ + line, sliced, sliceStart, sliceEnd + })}`, ParseType.ErrorMessage ]; } + + return [ + `[parse-filter E0006] (white) failed to parse: ${JSON.stringify({ + line, sliceStart, sliceEnd + })}`, + ParseType.ErrorMessage + ]; } - if (firstChar === '|') { - const lineEndswithCname = line.endsWith('$cname'); - - if (lineEndsWithCaretOrCaretVerticalBar || lineEndswithCname) { + if (firstCharCode === 124) { // 124 `|` + if (lineEndsWithCaretOrCaretVerticalBar) { /** * Some malformed filters can not be parsed by NetworkFilter: * @@ -387,12 +400,11 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri const includeAllSubDomain = line[1] === '|'; const sliceStart = includeAllSubDomain ? 2 : 1; - const sliceEnd = lastChar === '^' + const sliceEnd = lastCharCode === 94 // lastChar === '^' ? -1 - : lineEndsWithCaretOrCaretVerticalBar + : (lineEndsWithCaretVerticalBar ? -2 - // eslint-disable-next-line sukka/unicorn/no-nested-ternary -- speed - : (lineEndswithCname ? -6 : 0); + : undefined); const _domain = line .slice(sliceStart, sliceEnd) // we already make sure line startsWith "|" @@ -410,7 +422,7 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri } } - const lineStartsWithSingleDot = firstChar === '.'; + const lineStartsWithSingleDot = firstCharCode === 46; // 46 `.` if ( lineStartsWithSingleDot && lineEndsWithCaretOrCaretVerticalBar @@ -489,7 +501,10 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri * `-logging.nextmedia.com` * `_social_tracking.js^` */ - if (firstChar !== '|' && lastChar === '^') { + if ( + firstCharCode !== 124 // 124 `|` + && lastCharCode === 94 // 94 `^` + ) { const _domain = line.slice(0, -1); const suffix = gorhill.getPublicSuffix(_domain); @@ -553,63 +568,3 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri ParseType.ErrorMessage ]; } - -class CustomAbortError extends Error { - public readonly name = 'AbortError'; - public readonly digest = 'AbortError'; -} - -const sleepWithAbort = (ms: number, signal: AbortSignal) => new Promise((resolve, reject) => { - signal.throwIfAborted(); - signal.addEventListener('abort', stop); - Bun.sleep(ms).then(done).catch(doReject); - - function done() { - signal.removeEventListener('abort', stop); - resolve(); - } - function stop(this: AbortSignal) { - reject(this.reason); - } - function doReject(reason: unknown) { - signal.removeEventListener('abort', stop); - reject(reason); - } -}); - -async function fetchAssets(url: string, fallbackUrls: string[] | readonly string[]) { - const controller = new AbortController(); - - const fetchMainPromise = fetchWithRetry(url, { signal: controller.signal, ...defaultRequestInit }) - .then(r => r.text()) - .then(text => { - console.log(picocolors.gray('[fetch finish]'), picocolors.gray(url)); - controller.abort(); - return text; - }); - const createFetchFallbackPromise = async (url: string, index: number) => { - // Most assets can be downloaded within 250ms. To avoid wasting bandwidth, we will wait for 350ms before downloading from the fallback URL. - try { - await sleepWithAbort(300 + (index + 1) * 20, controller.signal); - } catch { - console.log(picocolors.gray('[fetch cancelled early]'), picocolors.gray(url)); - throw new CustomAbortError(); - } - if (controller.signal.aborted) { - console.log(picocolors.gray('[fetch cancelled]'), picocolors.gray(url)); - throw new CustomAbortError(); - } - const res = await fetchWithRetry(url, { signal: controller.signal, ...defaultRequestInit }); - const text = await res.text(); - controller.abort(); - return text; - }; - - return Promise.any([ - fetchMainPromise, - ...fallbackUrls.map(createFetchFallbackPromise) - ]).catch(e => { - console.log(`Download Rule for [${url}] failed`); - throw e; - }); -} diff --git a/Build/lib/stable-sort-domain.ts b/Build/lib/stable-sort-domain.ts index d8516922..8b6da71b 100644 --- a/Build/lib/stable-sort-domain.ts +++ b/Build/lib/stable-sort-domain.ts @@ -1,4 +1,4 @@ -import type { PublicSuffixList } from 'gorhill-publicsuffixlist'; +import type { PublicSuffixList } from '@gorhill/publicsuffixlist'; import { createCachedGorhillGetDomain } from './cached-tld-parse'; const compare = (a: string | null, b: string | null) => { diff --git a/Build/mod.d.ts b/Build/mod.d.ts index 5e6e1299..212ff778 100644 --- a/Build/mod.d.ts +++ b/Build/mod.d.ts @@ -1,4 +1,4 @@ -declare module 'gorhill-publicsuffixlist' { +declare module '@gorhill/publicsuffixlist' { type Selfie = | string | { diff --git a/bun.lockb b/bun.lockb index 27cc6219..c90c6ab1 100755 Binary files a/bun.lockb and b/bun.lockb differ diff --git a/package.json b/package.json index 1b8d59da..55ce43f8 100644 --- a/package.json +++ b/package.json @@ -15,13 +15,13 @@ "license": "ISC", "dependencies": { "@cliqz/adblocker": "^1.26.12", + "@gorhill/publicsuffixlist": "^3.0.1", "@sukka/listdir": "^0.3.1", "async-retry": "^1.3.3", "async-sema": "^3.1.1", "ci-info": "^4.0.0", "csv-parse": "^5.5.3", "fast-cidr-tools": "^0.2.2", - "gorhill-publicsuffixlist": "github:gorhill/publicsuffixlist.js", "mnemonist": "^0.39.6", "path-scurry": "^1.10.1", "picocolors": "^1.0.0",