mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Refactor: speed up reject parsing
This commit is contained in:
parent
91ed783d73
commit
16a08bd07d
@ -58,7 +58,7 @@ export const buildInternalCDNDomains = task(import.meta.path, async () => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const [gorhill] = await Promise.all([
|
const gorhill = (await Promise.all([
|
||||||
getGorhillPublicSuffixPromise(),
|
getGorhillPublicSuffixPromise(),
|
||||||
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/cdn.conf')),
|
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/cdn.conf')),
|
||||||
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global.conf')),
|
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global.conf')),
|
||||||
@ -70,7 +70,7 @@ export const buildInternalCDNDomains = task(import.meta.path, async () => {
|
|||||||
processLocalDomainSet(path.resolve(import.meta.dir, '../List/domainset/download.conf')),
|
processLocalDomainSet(path.resolve(import.meta.dir, '../List/domainset/download.conf')),
|
||||||
|
|
||||||
fsp.mkdir(path.resolve(import.meta.dir, '../List/internal'), { recursive: true })
|
fsp.mkdir(path.resolve(import.meta.dir, '../List/internal'), { recursive: true })
|
||||||
]);
|
]))[0];
|
||||||
|
|
||||||
return compareAndWriteFile(
|
return compareAndWriteFile(
|
||||||
[
|
[
|
||||||
|
|||||||
@ -5,10 +5,10 @@ import { task } from './lib/trace-runner';
|
|||||||
import { compareAndWriteFile } from './lib/create-file';
|
import { compareAndWriteFile } from './lib/create-file';
|
||||||
|
|
||||||
export const buildInternalChnDomains = task(import.meta.path, async () => {
|
export const buildInternalChnDomains = task(import.meta.path, async () => {
|
||||||
const [result] = await Promise.all([
|
const result = (await Promise.all([
|
||||||
parseFelixDnsmasq('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf'),
|
parseFelixDnsmasq('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf'),
|
||||||
fsp.mkdir(path.resolve(import.meta.dir, '../List/internal'), { recursive: true })
|
fsp.mkdir(path.resolve(import.meta.dir, '../List/internal'), { recursive: true })
|
||||||
]);
|
]))[0];
|
||||||
|
|
||||||
return compareAndWriteFile(
|
return compareAndWriteFile(
|
||||||
result.map(line => `SUFFIX,${line}`),
|
result.map(line => `SUFFIX,${line}`),
|
||||||
|
|||||||
@ -25,10 +25,10 @@ const RESERVED_IPV4_CIDR = [
|
|||||||
];
|
];
|
||||||
|
|
||||||
export const buildInternalReverseChnCIDR = task(import.meta.path, async () => {
|
export const buildInternalReverseChnCIDR = task(import.meta.path, async () => {
|
||||||
const [cidr] = await Promise.all([
|
const cidr = (await Promise.all([
|
||||||
processLineFromReadline(await fetchRemoteTextAndReadByLine('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')),
|
processLineFromReadline(await fetchRemoteTextAndReadByLine('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')),
|
||||||
fsp.mkdir(path.resolve(import.meta.dir, '../List/internal'), { recursive: true })
|
fsp.mkdir(path.resolve(import.meta.dir, '../List/internal'), { recursive: true })
|
||||||
]);
|
]))[0];
|
||||||
|
|
||||||
const reversedCidr = exclude(
|
const reversedCidr = exclude(
|
||||||
[
|
[
|
||||||
|
|||||||
@ -16,10 +16,10 @@ const latestTopUserAgentsPromise = fetchWithRetry('https://unpkg.com/top-user-ag
|
|||||||
.then(res => res.json() as Promise<string[]>);
|
.then(res => res.json() as Promise<string[]>);
|
||||||
|
|
||||||
const querySpeedtestApi = async (keyword: string): Promise<Array<string | null>> => {
|
const querySpeedtestApi = async (keyword: string): Promise<Array<string | null>> => {
|
||||||
const [topUserAgents] = await Promise.all([
|
const topUserAgents = (await Promise.all([
|
||||||
latestTopUserAgentsPromise,
|
latestTopUserAgentsPromise,
|
||||||
s.acquire()
|
s.acquire()
|
||||||
]);
|
]))[0];
|
||||||
|
|
||||||
const randomUserAgent = topUserAgents[Math.floor(Math.random() * topUserAgents.length)];
|
const randomUserAgent = topUserAgents[Math.floor(Math.random() * topUserAgents.length)];
|
||||||
|
|
||||||
|
|||||||
@ -53,10 +53,10 @@ export const downloadPreviousBuild = task(import.meta.path, async () => {
|
|||||||
await traceAsync(
|
await traceAsync(
|
||||||
'Download and extract previous build',
|
'Download and extract previous build',
|
||||||
async () => {
|
async () => {
|
||||||
const [resp] = await Promise.all([
|
const resp = (await Promise.all([
|
||||||
fetchWithRetry('https://codeload.github.com/sukkalab/ruleset.skk.moe/tar.gz/master', defaultRequestInit),
|
fetchWithRetry('https://codeload.github.com/sukkalab/ruleset.skk.moe/tar.gz/master', defaultRequestInit),
|
||||||
fsp.mkdir(extractedPath, { recursive: true })
|
fsp.mkdir(extractedPath, { recursive: true })
|
||||||
]);
|
]))[0];
|
||||||
|
|
||||||
const extract = tarStream.extract();
|
const extract = tarStream.extract();
|
||||||
Readable.fromWeb(resp.body!).pipe(zlib.createGunzip()).pipe(extract);
|
Readable.fromWeb(resp.body!).pipe(zlib.createGunzip()).pipe(extract);
|
||||||
@ -88,10 +88,10 @@ export const downloadPublicSuffixList = task(import.meta.path, async () => {
|
|||||||
const publicSuffixDir = path.resolve(import.meta.dir, '../node_modules/.cache');
|
const publicSuffixDir = path.resolve(import.meta.dir, '../node_modules/.cache');
|
||||||
const publicSuffixPath = path.join(publicSuffixDir, 'public_suffix_list_dat.txt');
|
const publicSuffixPath = path.join(publicSuffixDir, 'public_suffix_list_dat.txt');
|
||||||
|
|
||||||
const [resp] = await Promise.all([
|
const resp = (await Promise.all([
|
||||||
fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat', defaultRequestInit),
|
fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat', defaultRequestInit),
|
||||||
fsp.mkdir(publicSuffixDir, { recursive: true })
|
fsp.mkdir(publicSuffixDir, { recursive: true })
|
||||||
]);
|
]))[0];
|
||||||
|
|
||||||
return Bun.write(publicSuffixPath, resp as Response);
|
return Bun.write(publicSuffixPath, resp as Response);
|
||||||
}, 'download-publicsuffixlist');
|
}, 'download-publicsuffixlist');
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
import * as tldts from 'tldts';
|
import * as tldts from 'tldts';
|
||||||
import { createCache } from './cache-apply';
|
import { createCache } from './cache-apply';
|
||||||
import type { PublicSuffixList } from 'gorhill-publicsuffixlist';
|
import type { PublicSuffixList } from '@gorhill/publicsuffixlist';
|
||||||
|
|
||||||
const cache = createCache('cached-tld-parse', true);
|
const cache = createCache('cached-tld-parse', true);
|
||||||
|
|
||||||
|
|||||||
61
Build/lib/fetch-assets.ts
Normal file
61
Build/lib/fetch-assets.ts
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
import picocolors from 'picocolors';
|
||||||
|
import { defaultRequestInit, fetchWithRetry } from './fetch-retry';
|
||||||
|
|
||||||
|
class CustomAbortError extends Error {
|
||||||
|
public readonly name = 'AbortError';
|
||||||
|
public readonly digest = 'AbortError';
|
||||||
|
}
|
||||||
|
|
||||||
|
const sleepWithAbort = (ms: number, signal: AbortSignal) => new Promise<void>((resolve, reject) => {
|
||||||
|
signal.throwIfAborted();
|
||||||
|
signal.addEventListener('abort', stop);
|
||||||
|
Bun.sleep(ms).then(done).catch(doReject);
|
||||||
|
|
||||||
|
function done() {
|
||||||
|
signal.removeEventListener('abort', stop);
|
||||||
|
resolve();
|
||||||
|
}
|
||||||
|
function stop(this: AbortSignal) {
|
||||||
|
reject(this.reason);
|
||||||
|
}
|
||||||
|
function doReject(reason: unknown) {
|
||||||
|
signal.removeEventListener('abort', stop);
|
||||||
|
reject(reason);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
export async function fetchAssets(url: string, fallbackUrls: string[] | readonly string[]) {
|
||||||
|
const controller = new AbortController();
|
||||||
|
|
||||||
|
const fetchMainPromise = fetchWithRetry(url, { signal: controller.signal, ...defaultRequestInit })
|
||||||
|
.then(r => r.text())
|
||||||
|
.then(text => {
|
||||||
|
controller.abort();
|
||||||
|
return text;
|
||||||
|
});
|
||||||
|
const createFetchFallbackPromise = async (url: string, index: number) => {
|
||||||
|
// Most assets can be downloaded within 250ms. To avoid wasting bandwidth, we will wait for 500ms before downloading from the fallback URL.
|
||||||
|
try {
|
||||||
|
await sleepWithAbort(500 + (index + 1) * 20, controller.signal);
|
||||||
|
} catch {
|
||||||
|
console.log(picocolors.gray('[fetch cancelled early]'), picocolors.gray(url));
|
||||||
|
throw new CustomAbortError();
|
||||||
|
}
|
||||||
|
if (controller.signal.aborted) {
|
||||||
|
console.log(picocolors.gray('[fetch cancelled]'), picocolors.gray(url));
|
||||||
|
throw new CustomAbortError();
|
||||||
|
}
|
||||||
|
const res = await fetchWithRetry(url, { signal: controller.signal, ...defaultRequestInit });
|
||||||
|
const text = await res.text();
|
||||||
|
controller.abort();
|
||||||
|
return text;
|
||||||
|
};
|
||||||
|
|
||||||
|
return Promise.any([
|
||||||
|
fetchMainPromise,
|
||||||
|
...fallbackUrls.map(createFetchFallbackPromise)
|
||||||
|
]).catch(e => {
|
||||||
|
console.log(`Download Rule for [${url}] failed`);
|
||||||
|
throw e;
|
||||||
|
});
|
||||||
|
}
|
||||||
@ -2,7 +2,7 @@ import { toASCII } from 'punycode';
|
|||||||
import path from 'path';
|
import path from 'path';
|
||||||
import { traceAsync } from './trace-runner';
|
import { traceAsync } from './trace-runner';
|
||||||
import { defaultRequestInit, fetchWithRetry } from './fetch-retry';
|
import { defaultRequestInit, fetchWithRetry } from './fetch-retry';
|
||||||
import type { PublicSuffixList } from 'gorhill-publicsuffixlist';
|
import type { PublicSuffixList } from '@gorhill/publicsuffixlist';
|
||||||
|
|
||||||
const publicSuffixPath = path.resolve(import.meta.dir, '../../node_modules/.cache/public_suffix_list_dat.txt');
|
const publicSuffixPath = path.resolve(import.meta.dir, '../../node_modules/.cache/public_suffix_list_dat.txt');
|
||||||
|
|
||||||
@ -18,7 +18,7 @@ const getGorhillPublicSuffix = () => traceAsync('create gorhill public suffix in
|
|||||||
console.log('public_suffix_list.dat not found, fetch directly from remote.');
|
console.log('public_suffix_list.dat not found, fetch directly from remote.');
|
||||||
return r.text();
|
return r.text();
|
||||||
}),
|
}),
|
||||||
import('gorhill-publicsuffixlist')
|
import('@gorhill/publicsuffixlist')
|
||||||
]);
|
]);
|
||||||
|
|
||||||
gorhill.parse(publicSuffixListDat, toASCII);
|
gorhill.parse(publicSuffixListDat, toASCII);
|
||||||
|
|||||||
@ -1,21 +1,20 @@
|
|||||||
// @ts-check
|
// @ts-check
|
||||||
import { defaultRequestInit, fetchWithRetry } from './fetch-retry';
|
|
||||||
|
|
||||||
import { fetchRemoteTextAndReadByLine } from './fetch-text-by-line';
|
import { fetchRemoteTextAndReadByLine } from './fetch-text-by-line';
|
||||||
import { NetworkFilter } from '@cliqz/adblocker';
|
import { NetworkFilter } from '@cliqz/adblocker';
|
||||||
import { processLine } from './process-line';
|
import { processLine } from './process-line';
|
||||||
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
|
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
|
||||||
import type { PublicSuffixList } from 'gorhill-publicsuffixlist';
|
import type { PublicSuffixList } from '@gorhill/publicsuffixlist';
|
||||||
|
|
||||||
import { traceAsync } from './trace-runner';
|
import { traceAsync } from './trace-runner';
|
||||||
import picocolors from 'picocolors';
|
import picocolors from 'picocolors';
|
||||||
import { normalizeDomain } from './normalize-domain';
|
import { normalizeDomain } from './normalize-domain';
|
||||||
|
import { fetchAssets } from './fetch-assets';
|
||||||
|
|
||||||
const DEBUG_DOMAIN_TO_FIND: string | null = null; // example.com | null
|
const DEBUG_DOMAIN_TO_FIND: string | null = null; // example.com | null
|
||||||
let foundDebugDomain = false;
|
let foundDebugDomain = false;
|
||||||
|
|
||||||
const warnOnceUrl = new Set<string>();
|
const warnOnceUrl = new Set<string>();
|
||||||
const warnOnce = (url: string, isWhite: boolean, ...message: any[]) => {
|
const warnOnce = (url: string, isWhite: boolean, ...message: string[]) => {
|
||||||
const key = `${url}${isWhite ? 'white' : 'black'}`;
|
const key = `${url}${isWhite ? 'white' : 'black'}`;
|
||||||
if (warnOnceUrl.has(key)) {
|
if (warnOnceUrl.has(key)) {
|
||||||
return;
|
return;
|
||||||
@ -54,7 +53,7 @@ export function processHosts(hostsUrl: string, includeAllSubDomain = false, skip
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const [, domain] = line.split(/\s/);
|
const domain = line.split(/\s/)[1];
|
||||||
if (!domain) {
|
if (!domain) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -185,7 +184,9 @@ export async function processFilterRules(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const R_KNOWN_NOT_NETWORK_FILTER_PATTERN = /[#%&=~]/;
|
const R_KNOWN_NOT_NETWORK_FILTER_PATTERN = /[#%&=~]/;
|
||||||
const R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2 = /(\$popup|\$removeparam|\$popunder)/;
|
const R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2 = /(\$popup|\$removeparam|\$popunder|\$cname)/;
|
||||||
|
// cname exceptional filter can not be parsed by NetworkFilter
|
||||||
|
// Surge / Clash can't handle CNAME either, so we just ignore them
|
||||||
|
|
||||||
function parse($line: string, gorhill: PublicSuffixList): null | [hostname: string, flag: ParseType] {
|
function parse($line: string, gorhill: PublicSuffixList): null | [hostname: string, flag: ParseType] {
|
||||||
if (
|
if (
|
||||||
@ -213,15 +214,15 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
const firstChar = line[0];
|
const firstCharCode = line[0].charCodeAt(0);
|
||||||
const lastChar = line[len - 1];
|
const lastCharCode = line[len - 1].charCodeAt(0);
|
||||||
|
|
||||||
if (
|
if (
|
||||||
firstChar === '/'
|
firstCharCode === 47 // 47 `/`
|
||||||
// ends with
|
// ends with
|
||||||
|| lastChar === '.' // || line.endsWith('.')
|
|| lastCharCode === 46 // 46 `.`, line.endsWith('.')
|
||||||
|| lastChar === '-' // || line.endsWith('-')
|
|| lastCharCode === 45 // 45 `-`, line.endsWith('-')
|
||||||
|| lastChar === '_' // || line.endsWith('_')
|
|| lastCharCode === 95 // 95 `_`, line.endsWith('_')
|
||||||
// special modifier
|
// special modifier
|
||||||
|| R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2.test(line)
|
|| R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2.test(line)
|
||||||
// || line.includes('$popup')
|
// || line.includes('$popup')
|
||||||
@ -238,6 +239,8 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri
|
|||||||
const filter = NetworkFilter.parse(line);
|
const filter = NetworkFilter.parse(line);
|
||||||
if (filter) {
|
if (filter) {
|
||||||
if (
|
if (
|
||||||
|
// filter.isCosmeticFilter() // always false
|
||||||
|
// filter.isNetworkFilter() // always true
|
||||||
filter.isElemHide()
|
filter.isElemHide()
|
||||||
|| filter.isGenericHide()
|
|| filter.isGenericHide()
|
||||||
|| filter.isSpecificHide()
|
|| filter.isSpecificHide()
|
||||||
@ -253,8 +256,7 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri
|
|||||||
|
|
||||||
if (
|
if (
|
||||||
filter.hostname // filter.hasHostname() // must have
|
filter.hostname // filter.hasHostname() // must have
|
||||||
&& filter.isPlain()
|
&& filter.isPlain() // isPlain() === !isRegex()
|
||||||
// && (!filter.isRegex()) // isPlain() === !isRegex()
|
|
||||||
&& (!filter.isFullRegex())
|
&& (!filter.isFullRegex())
|
||||||
) {
|
) {
|
||||||
const hostname = normalizeDomain(filter.hostname);
|
const hostname = normalizeDomain(filter.hostname);
|
||||||
@ -286,95 +288,106 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// After NetworkFilter.parse, it means the line can not be parsed by cliqz NetworkFilter
|
||||||
* abnormal filter that can not be parsed by NetworkFilter
|
// We now need to "salvage" the line as much as possible
|
||||||
*/
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* From now on, we are mostly facing non-standard domain rules (some are regex like)
|
||||||
|
* We first skip third-party and frame rules, as Surge / Clash can't handle them
|
||||||
|
*
|
||||||
|
* `.sharecounter.$third-party`
|
||||||
|
* `.bbelements.com^$third-party`
|
||||||
|
* `://o0e.ru^$third-party`
|
||||||
|
* `.1.1.1.l80.js^$third-party`
|
||||||
|
*/
|
||||||
if (line.includes('$third-party') || line.includes('$frame')) {
|
if (line.includes('$third-party') || line.includes('$frame')) {
|
||||||
/*
|
|
||||||
* `.bbelements.com^$third-party`
|
|
||||||
* `://o0e.ru^$third-party`
|
|
||||||
*/
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @example line.endsWith('^') */
|
/** @example line.endsWith('^') */
|
||||||
const linedEndsWithCaret = lastChar === '^';
|
const linedEndsWithCaret = lastCharCode === 94; // lastChar === '^';
|
||||||
/** @example line.endsWith('^|') */
|
/** @example line.endsWith('^|') */
|
||||||
const lineEndsWithCaretVerticalBar = lastChar === '|' && line[len - 2] === '^';
|
const lineEndsWithCaretVerticalBar = (lastCharCode === 124 /** lastChar === '|' */) && line[len - 2] === '^';
|
||||||
/** @example line.endsWith('^') || line.endsWith('^|') */
|
/** @example line.endsWith('^') || line.endsWith('^|') */
|
||||||
const lineEndsWithCaretOrCaretVerticalBar = linedEndsWithCaret || lineEndsWithCaretVerticalBar;
|
const lineEndsWithCaretOrCaretVerticalBar = linedEndsWithCaret || lineEndsWithCaretVerticalBar;
|
||||||
|
|
||||||
// whitelist (exception)
|
// whitelist (exception)
|
||||||
if (firstChar === '@' && line[1] === '@') {
|
if (
|
||||||
/**
|
firstCharCode === 64 // 64 `@`
|
||||||
* cname exceptional filter can not be parsed by NetworkFilter
|
&& line[1] === '@'
|
||||||
*
|
) {
|
||||||
* `@@||m.faz.net^$cname`
|
|
||||||
*
|
|
||||||
* Surge / Clash can't handle CNAME either, so we just ignore them
|
|
||||||
*/
|
|
||||||
if (line.endsWith('$cname')) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Some "malformed" regex-based filters can not be parsed by NetworkFilter
|
* Some "malformed" regex-based filters can not be parsed by NetworkFilter
|
||||||
* "$genericblock`" is also not supported by NetworkFilter
|
* "$genericblock`" is also not supported by NetworkFilter, see:
|
||||||
|
* https://github.com/ghostery/adblocker/blob/62caf7786ba10ef03beffecd8cd4eec111bcd5ec/packages/adblocker/test/parsing.test.ts#L950
|
||||||
*
|
*
|
||||||
* `@@||cmechina.net^$genericblock`
|
* `@@||cmechina.net^$genericblock`
|
||||||
* `@@|ftp.bmp.ovh^|`
|
* `@@|ftp.bmp.ovh^|`
|
||||||
* `@@|adsterra.com^|`
|
* `@@|adsterra.com^|`
|
||||||
|
* `@@.atlassian.net$document`
|
||||||
|
* `@@||ad.alimama.com^$genericblock`
|
||||||
*/
|
*/
|
||||||
if (
|
|
||||||
(
|
|
||||||
// line.startsWith('@@|')
|
|
||||||
line[2] === '|'
|
|
||||||
// line.startsWith('@@.')
|
|
||||||
|| line[2] === '.'
|
|
||||||
/**
|
|
||||||
* line.startsWith('@@://')
|
|
||||||
*
|
|
||||||
* `@@://googleadservices.com^|`
|
|
||||||
* `@@://www.googleadservices.com^|`
|
|
||||||
*/
|
|
||||||
|| (line[2] === ':' && line[3] === '/' && line[4] === '/')
|
|
||||||
)
|
|
||||||
&& (
|
|
||||||
lineEndsWithCaretOrCaretVerticalBar
|
|
||||||
|| line.endsWith('$genericblock')
|
|
||||||
|| line.endsWith('$document')
|
|
||||||
)
|
|
||||||
) {
|
|
||||||
const _domain = line
|
|
||||||
.replace('@@||', '')
|
|
||||||
.replace('@@://', '')
|
|
||||||
.replace('@@|', '')
|
|
||||||
.replace('@@.', '')
|
|
||||||
.replace('^|', '')
|
|
||||||
.replace('^$genericblock', '')
|
|
||||||
.replace('$genericblock', '')
|
|
||||||
.replace('^$document', '')
|
|
||||||
.replace('$document', '')
|
|
||||||
.replaceAll('^', '')
|
|
||||||
.trim();
|
|
||||||
|
|
||||||
const domain = normalizeDomain(_domain);
|
let sliceStart = 0;
|
||||||
|
let sliceEnd: number | undefined;
|
||||||
|
|
||||||
|
// line.startsWith('@@|') || line.startsWith('@@.')
|
||||||
|
if (line[2] === '|' || line[2] === '.') {
|
||||||
|
sliceStart = 3;
|
||||||
|
// line.startsWith('@@||')
|
||||||
|
if (line[3] === '|') {
|
||||||
|
sliceStart = 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* line.startsWith('@@://')
|
||||||
|
*
|
||||||
|
* `@@://googleadservices.com^|`
|
||||||
|
* `@@://www.googleadservices.com^|`
|
||||||
|
*/
|
||||||
|
if (line[2] === ':' && line[3] === '/' && line[4] === '/') {
|
||||||
|
sliceStart = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lineEndsWithCaretOrCaretVerticalBar) {
|
||||||
|
sliceEnd = -2;
|
||||||
|
} else if (line.endsWith('$genericblock')) {
|
||||||
|
sliceEnd = -13;
|
||||||
|
if (line[len - 14] === '^') { // line.endsWith('^$genericblock')
|
||||||
|
sliceEnd = -14;
|
||||||
|
}
|
||||||
|
} else if (line.endsWith('$document')) {
|
||||||
|
sliceEnd = -9;
|
||||||
|
if (line[len - 10] === '^') { // line.endsWith('^$document')
|
||||||
|
sliceEnd = -10;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sliceStart !== 0 || sliceEnd !== undefined) {
|
||||||
|
const sliced = line.slice(sliceStart, sliceEnd);
|
||||||
|
const domain = normalizeDomain(sliced);
|
||||||
if (domain) {
|
if (domain) {
|
||||||
return [domain, ParseType.WhiteIncludeSubdomain];
|
return [domain, ParseType.WhiteIncludeSubdomain];
|
||||||
}
|
}
|
||||||
|
|
||||||
return [
|
return [
|
||||||
`[parse-filter E0001] (white) invalid domain: ${_domain}`,
|
`[parse-filter E0001] (white) invalid domain: ${JSON.stringify({
|
||||||
|
line, sliced, sliceStart, sliceEnd
|
||||||
|
})}`,
|
||||||
ParseType.ErrorMessage
|
ParseType.ErrorMessage
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return [
|
||||||
|
`[parse-filter E0006] (white) failed to parse: ${JSON.stringify({
|
||||||
|
line, sliceStart, sliceEnd
|
||||||
|
})}`,
|
||||||
|
ParseType.ErrorMessage
|
||||||
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (firstChar === '|') {
|
if (firstCharCode === 124) { // 124 `|`
|
||||||
const lineEndswithCname = line.endsWith('$cname');
|
if (lineEndsWithCaretOrCaretVerticalBar) {
|
||||||
|
|
||||||
if (lineEndsWithCaretOrCaretVerticalBar || lineEndswithCname) {
|
|
||||||
/**
|
/**
|
||||||
* Some malformed filters can not be parsed by NetworkFilter:
|
* Some malformed filters can not be parsed by NetworkFilter:
|
||||||
*
|
*
|
||||||
@ -387,12 +400,11 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri
|
|||||||
const includeAllSubDomain = line[1] === '|';
|
const includeAllSubDomain = line[1] === '|';
|
||||||
|
|
||||||
const sliceStart = includeAllSubDomain ? 2 : 1;
|
const sliceStart = includeAllSubDomain ? 2 : 1;
|
||||||
const sliceEnd = lastChar === '^'
|
const sliceEnd = lastCharCode === 94 // lastChar === '^'
|
||||||
? -1
|
? -1
|
||||||
: lineEndsWithCaretOrCaretVerticalBar
|
: (lineEndsWithCaretVerticalBar
|
||||||
? -2
|
? -2
|
||||||
// eslint-disable-next-line sukka/unicorn/no-nested-ternary -- speed
|
: undefined);
|
||||||
: (lineEndswithCname ? -6 : 0);
|
|
||||||
|
|
||||||
const _domain = line
|
const _domain = line
|
||||||
.slice(sliceStart, sliceEnd) // we already make sure line startsWith "|"
|
.slice(sliceStart, sliceEnd) // we already make sure line startsWith "|"
|
||||||
@ -410,7 +422,7 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const lineStartsWithSingleDot = firstChar === '.';
|
const lineStartsWithSingleDot = firstCharCode === 46; // 46 `.`
|
||||||
if (
|
if (
|
||||||
lineStartsWithSingleDot
|
lineStartsWithSingleDot
|
||||||
&& lineEndsWithCaretOrCaretVerticalBar
|
&& lineEndsWithCaretOrCaretVerticalBar
|
||||||
@ -489,7 +501,10 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri
|
|||||||
* `-logging.nextmedia.com`
|
* `-logging.nextmedia.com`
|
||||||
* `_social_tracking.js^`
|
* `_social_tracking.js^`
|
||||||
*/
|
*/
|
||||||
if (firstChar !== '|' && lastChar === '^') {
|
if (
|
||||||
|
firstCharCode !== 124 // 124 `|`
|
||||||
|
&& lastCharCode === 94 // 94 `^`
|
||||||
|
) {
|
||||||
const _domain = line.slice(0, -1);
|
const _domain = line.slice(0, -1);
|
||||||
|
|
||||||
const suffix = gorhill.getPublicSuffix(_domain);
|
const suffix = gorhill.getPublicSuffix(_domain);
|
||||||
@ -553,63 +568,3 @@ function parse($line: string, gorhill: PublicSuffixList): null | [hostname: stri
|
|||||||
ParseType.ErrorMessage
|
ParseType.ErrorMessage
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
class CustomAbortError extends Error {
|
|
||||||
public readonly name = 'AbortError';
|
|
||||||
public readonly digest = 'AbortError';
|
|
||||||
}
|
|
||||||
|
|
||||||
const sleepWithAbort = (ms: number, signal: AbortSignal) => new Promise<void>((resolve, reject) => {
|
|
||||||
signal.throwIfAborted();
|
|
||||||
signal.addEventListener('abort', stop);
|
|
||||||
Bun.sleep(ms).then(done).catch(doReject);
|
|
||||||
|
|
||||||
function done() {
|
|
||||||
signal.removeEventListener('abort', stop);
|
|
||||||
resolve();
|
|
||||||
}
|
|
||||||
function stop(this: AbortSignal) {
|
|
||||||
reject(this.reason);
|
|
||||||
}
|
|
||||||
function doReject(reason: unknown) {
|
|
||||||
signal.removeEventListener('abort', stop);
|
|
||||||
reject(reason);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
async function fetchAssets(url: string, fallbackUrls: string[] | readonly string[]) {
|
|
||||||
const controller = new AbortController();
|
|
||||||
|
|
||||||
const fetchMainPromise = fetchWithRetry(url, { signal: controller.signal, ...defaultRequestInit })
|
|
||||||
.then(r => r.text())
|
|
||||||
.then(text => {
|
|
||||||
console.log(picocolors.gray('[fetch finish]'), picocolors.gray(url));
|
|
||||||
controller.abort();
|
|
||||||
return text;
|
|
||||||
});
|
|
||||||
const createFetchFallbackPromise = async (url: string, index: number) => {
|
|
||||||
// Most assets can be downloaded within 250ms. To avoid wasting bandwidth, we will wait for 350ms before downloading from the fallback URL.
|
|
||||||
try {
|
|
||||||
await sleepWithAbort(300 + (index + 1) * 20, controller.signal);
|
|
||||||
} catch {
|
|
||||||
console.log(picocolors.gray('[fetch cancelled early]'), picocolors.gray(url));
|
|
||||||
throw new CustomAbortError();
|
|
||||||
}
|
|
||||||
if (controller.signal.aborted) {
|
|
||||||
console.log(picocolors.gray('[fetch cancelled]'), picocolors.gray(url));
|
|
||||||
throw new CustomAbortError();
|
|
||||||
}
|
|
||||||
const res = await fetchWithRetry(url, { signal: controller.signal, ...defaultRequestInit });
|
|
||||||
const text = await res.text();
|
|
||||||
controller.abort();
|
|
||||||
return text;
|
|
||||||
};
|
|
||||||
|
|
||||||
return Promise.any([
|
|
||||||
fetchMainPromise,
|
|
||||||
...fallbackUrls.map(createFetchFallbackPromise)
|
|
||||||
]).catch(e => {
|
|
||||||
console.log(`Download Rule for [${url}] failed`);
|
|
||||||
throw e;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
import type { PublicSuffixList } from 'gorhill-publicsuffixlist';
|
import type { PublicSuffixList } from '@gorhill/publicsuffixlist';
|
||||||
import { createCachedGorhillGetDomain } from './cached-tld-parse';
|
import { createCachedGorhillGetDomain } from './cached-tld-parse';
|
||||||
|
|
||||||
const compare = (a: string | null, b: string | null) => {
|
const compare = (a: string | null, b: string | null) => {
|
||||||
|
|||||||
2
Build/mod.d.ts
vendored
2
Build/mod.d.ts
vendored
@ -1,4 +1,4 @@
|
|||||||
declare module 'gorhill-publicsuffixlist' {
|
declare module '@gorhill/publicsuffixlist' {
|
||||||
type Selfie =
|
type Selfie =
|
||||||
| string
|
| string
|
||||||
| {
|
| {
|
||||||
|
|||||||
@ -15,13 +15,13 @@
|
|||||||
"license": "ISC",
|
"license": "ISC",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@cliqz/adblocker": "^1.26.12",
|
"@cliqz/adblocker": "^1.26.12",
|
||||||
|
"@gorhill/publicsuffixlist": "^3.0.1",
|
||||||
"@sukka/listdir": "^0.3.1",
|
"@sukka/listdir": "^0.3.1",
|
||||||
"async-retry": "^1.3.3",
|
"async-retry": "^1.3.3",
|
||||||
"async-sema": "^3.1.1",
|
"async-sema": "^3.1.1",
|
||||||
"ci-info": "^4.0.0",
|
"ci-info": "^4.0.0",
|
||||||
"csv-parse": "^5.5.3",
|
"csv-parse": "^5.5.3",
|
||||||
"fast-cidr-tools": "^0.2.2",
|
"fast-cidr-tools": "^0.2.2",
|
||||||
"gorhill-publicsuffixlist": "github:gorhill/publicsuffixlist.js",
|
|
||||||
"mnemonist": "^0.39.6",
|
"mnemonist": "^0.39.6",
|
||||||
"path-scurry": "^1.10.1",
|
"path-scurry": "^1.10.1",
|
||||||
"picocolors": "^1.0.0",
|
"picocolors": "^1.0.0",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user