From af8cce4f45c2be220d5a73456dac2f109d81882f Mon Sep 17 00:00:00 2001 From: SukkaW Date: Mon, 22 Jan 2024 12:09:08 +0800 Subject: [PATCH] Update Reject Hosts / Add mirror support for Hosts Source --- Build/build-common.ts | 52 +++++++++++++++++---------------- Build/build-reject-domainset.ts | 10 ++++--- Build/lib/parse-filter.ts | 31 ++++++++++++++++---- Build/lib/reject-data-source.ts | 37 +++++++++++++++-------- Build/lib/trace-runner.ts | 8 +---- Build/trace/index.ts | 2 +- Source/non_ip/reject.conf | 5 ++-- 7 files changed, 88 insertions(+), 57 deletions(-) diff --git a/Build/build-common.ts b/Build/build-common.ts index 91530951..770615ea 100644 --- a/Build/build-common.ts +++ b/Build/build-common.ts @@ -91,34 +91,36 @@ const processFile = (span: Span, sourcePath: string) => { }); }; -async function transformDomainset(parentSpan: Span, sourcePath: string, relativePath: string) { - const span = parentSpan.traceChild(`transform domainset: ${path.basename(sourcePath, path.extname(sourcePath))}`); +function transformDomainset(parentSpan: Span, sourcePath: string, relativePath: string) { + return parentSpan + .traceChild(`transform domainset: ${path.basename(sourcePath, path.extname(sourcePath))}`) + .traceAsyncFn(async (span) => { + const res = await processFile(span, sourcePath); + if (!res) return; - const res = await processFile(span, sourcePath); - if (!res) return; + const [title, descriptions, lines] = res; - const [title, descriptions, lines] = res; + const deduped = domainDeduper(lines); + const description = [ + ...SHARED_DESCRIPTION, + ...( + descriptions.length + ? ['', ...descriptions] + : [] + ) + ]; - const deduped = domainDeduper(lines); - const description = [ - ...SHARED_DESCRIPTION, - ...( - descriptions.length - ? ['', ...descriptions] - : [] - ) - ]; - - return span.traceAsyncFn(() => createRuleset( - span, - title, - description, - new Date(), - deduped, - 'domainset', - path.resolve(outputSurgeDir, relativePath), - path.resolve(outputClashDir, `${relativePath.slice(0, -path.extname(relativePath).length)}.txt`) - )); + return createRuleset( + span, + title, + description, + new Date(), + deduped, + 'domainset', + path.resolve(outputSurgeDir, relativePath), + path.resolve(outputClashDir, `${relativePath.slice(0, -path.extname(relativePath).length)}.txt`) + ); + }); } /** diff --git a/Build/build-reject-domainset.ts b/Build/build-reject-domainset.ts index 36b71060..4a747091 100644 --- a/Build/build-reject-domainset.ts +++ b/Build/build-reject-domainset.ts @@ -27,14 +27,15 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => { const domainSets = new Set(); - let shouldStop = false; // Parse from AdGuard Filters - await span + const shouldStop = await span .traceChild('download and process hosts / adblock filter rules') .traceAsyncFn(async (childSpan) => { + // eslint-disable-next-line sukka/no-single-return -- not single return + let shouldStop = false; await Promise.all([ // Parse from remote hosts & domain lists - ...HOSTS.map(entry => processHosts(childSpan, entry[0], entry[1], entry[2]).then(hosts => SetHelpers.add(domainSets, hosts))), + ...HOSTS.map(entry => processHosts(childSpan, entry[0], entry[1], entry[2], entry[3]).then(hosts => SetHelpers.add(domainSets, hosts))), ...DOMAIN_LISTS.map(entry => processDomainLists(childSpan, entry[0], entry[1], entry[2]).then(hosts => SetHelpers.add(domainSets, hosts))), @@ -44,6 +45,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => { : processFilterRules(childSpan, input[0], input[1], input[2]) ).then(({ white, black, foundDebugDomain }) => { if (foundDebugDomain) { + // eslint-disable-next-line sukka/no-single-return -- not single return shouldStop = true; // we should not break here, as we want to see full matches from all data source } @@ -65,7 +67,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => { setAddFromArray(domainSets, await readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/domainset/reject_sukka.conf'))); }) ]); - + // eslint-disable-next-line sukka/no-single-return -- not single return return shouldStop; }); diff --git a/Build/lib/parse-filter.ts b/Build/lib/parse-filter.ts index 06aeb011..340db2e4 100644 --- a/Build/lib/parse-filter.ts +++ b/Build/lib/parse-filter.ts @@ -44,25 +44,25 @@ export function processDomainLists(span: Span, domainListsUrl: string, includeAl } )); } -export function processHosts(span: Span, hostsUrl: string, includeAllSubDomain = false, ttl: number | null = null) { - return span.traceChild(`processhosts: ${hostsUrl}`).traceAsyncFn(() => fsCache.apply( +export function processHosts(span: Span, hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, ttl: number | null = null) { + return span.traceChild(`processhosts: ${hostsUrl}`).traceAsyncFn((childSpan) => fsCache.apply( hostsUrl, async () => { const domainSets = new Set(); - for await (const l of await fetchRemoteTextByLine(hostsUrl)) { + const lineCb = (l: string) => { const line = processLine(l); if (!line) { - continue; + return; } const _domain = line.split(/\s/)[1]?.trim(); if (!_domain) { - continue; + return; } const domain = normalizeDomain(_domain); if (!domain) { - continue; + return; } if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) { console.warn(picocolors.red(hostsUrl), '(black)', domain.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND))); @@ -70,6 +70,25 @@ export function processHosts(span: Span, hostsUrl: string, includeAllSubDomain = } domainSets.add(includeAllSubDomain ? `.${domain}` : domain); + }; + + if (mirrors == null || mirrors.length === 0) { + for await (const l of await fetchRemoteTextByLine(hostsUrl)) { + lineCb(l); + } + } else { + // Avoid event loop starvation, so we wait for a macrotask before we start fetching. + await Promise.resolve(); + + const filterRules = await childSpan.traceChild('download hosts').traceAsyncFn(() => { + return fetchAssets(hostsUrl, mirrors).then(text => text.split('\n')); + }); + + childSpan.traceChild('parse hosts').traceSyncFn(() => { + for (let i = 0, len = filterRules.length; i < len; i++) { + lineCb(filterRules[i]); + } + }); } console.log(picocolors.gray('[process hosts]'), picocolors.gray(hostsUrl), picocolors.gray(domainSets.size)); diff --git a/Build/lib/reject-data-source.ts b/Build/lib/reject-data-source.ts index e6ada558..e9913f78 100644 --- a/Build/lib/reject-data-source.ts +++ b/Build/lib/reject-data-source.ts @@ -1,21 +1,34 @@ import { TTL } from './cache-filesystem'; -export const HOSTS = [ - ['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true, TTL.THREE_HOURS()], - ['https://someonewhocares.org/hosts/hosts', true, TTL.THREE_HOURS()], +type HostsSource = [main: string, mirrors: string[] | null, includeAllSubDomain: boolean, ttl: number]; + +export const HOSTS: HostsSource[] = [ + [ + 'https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', + ['https://raw.githubusercontent.com/uBlockOrigin/uAssets/master/thirdparties/pgl.yoyo.org/as/serverlist'], + true, + TTL.THREE_HOURS() + ], + ['https://someonewhocares.org/hosts/hosts', null, true, TTL.THREE_HOURS()], // no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl - ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', true, TTL.THREE_DAYS()], + ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', null, true, TTL.THREE_DAYS()], // have not been updated for more than a year, so we set a 14 days cache ttl - ['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', true, TTL.TWO_WEEKS()], - ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', false, TTL.THREE_DAYS()], - ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', false, TTL.THREE_DAYS()], + ['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', null, true, TTL.TWO_WEEKS()], + ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', null, false, TTL.THREE_DAYS()], + ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', null, false, TTL.THREE_DAYS()], // ad-wars is not actively maintained, so we set a 7 days cache ttl - ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false, TTL.ONE_WEEK()], - ['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true, TTL.THREE_HOURS()], + ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', null, false, TTL.ONE_WEEK()], + ['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', null, true, TTL.THREE_HOURS()], // Curben's UrlHaus Malicious URL Blocklist - // 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt', - // 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt', - ['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, TTL.THREE_HOURS()] + [ + 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', + [ + 'https://urlhaus-filter.pages.dev/urlhaus-filter-hosts.txt', + 'https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-hosts.txt' + ], + true, + TTL.THREE_HOURS() + ] // Curben's Phishing URL Blocklist // Covered by lib/get-phishing-domains.ts // 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt' diff --git a/Build/lib/trace-runner.ts b/Build/lib/trace-runner.ts index 3bfe71f1..d1eed852 100644 --- a/Build/lib/trace-runner.ts +++ b/Build/lib/trace-runner.ts @@ -9,7 +9,7 @@ export function traceSync(prefix: string, fn: () => T, timeFormatter: Formatt console.log(`${timeFormatter(`[${((end - start) / 1e6).toFixed(3)}ms]`)} ${prefix}`); return result; } -traceSync.skip = (_prefix: string, fn: () => T): T => fn(); +// traceSync.skip = (_prefix: string, fn: () => T): T => fn(); export const traceAsync = async (prefix: string, fn: () => Promise, timeFormatter: Formatter = picocolors.blue): Promise => { const start = Bun.nanoseconds(); @@ -18,9 +18,3 @@ export const traceAsync = async (prefix: string, fn: () => Promise, timeFo console.log(`${timeFormatter(`[${((end - start) / 1e6).toFixed(3)}ms]`)} ${prefix}`); return result; }; - -export interface TaskResult { - readonly start: number, - readonly end: number, - readonly taskName: string -} diff --git a/Build/trace/index.ts b/Build/trace/index.ts index 89cee4ed..0f505ced 100644 --- a/Build/trace/index.ts +++ b/Build/trace/index.ts @@ -52,7 +52,7 @@ export const createSpan = (name: string, parentTraceResult?: TraceResult): Span const stop = (time?: number) => { if (status === SPAN_STATUS_END) { - throw new Error('span already stopped'); + throw new Error(`span already stopped: ${name}`); } const end = time ?? Bun.nanoseconds(); diff --git a/Source/non_ip/reject.conf b/Source/non_ip/reject.conf index 06adfc30..5ee9d01f 100644 --- a/Source/non_ip/reject.conf +++ b/Source/non_ip/reject.conf @@ -108,9 +108,7 @@ DOMAIN-KEYWORD,adjust. DOMAIN-KEYWORD,appsflyer DOMAIN-KEYWORD,dnserror DOMAIN-KEYWORD,marketing.net -AND,((DOMAIN-KEYWORD,f-log), (DOMAIN-SUFFIX,grammarly.io)) DOMAIN,stun.smartgslb.com -AND,((DOMAIN-SUFFIX,prod.hosts.ooklaserver.net), (DOMAIN-KEYWORD,.ad.)) DOMAIN-KEYWORD,_vmind.qqvideo.tc.qq.com DOMAIN-KEYWORD,-logging.nextmedia.com @@ -120,7 +118,10 @@ DOMAIN-KEYWORD,.engage.3m. # -telemetry.officeapps.live.com DOMAIN-KEYWORD,telemetry.officeapps.live.com DOMAIN-KEYWORD,-launches.appsflyersdk.com +DOMAIN-KEYWORD,-s2s.sensic.net +AND,((DOMAIN-KEYWORD,f-log), (DOMAIN-SUFFIX,grammarly.io)) +AND,((DOMAIN-SUFFIX,prod.hosts.ooklaserver.net), (DOMAIN-KEYWORD,.ad.)) AND,((DOMAIN-KEYWORD,genuine), (DOMAIN-KEYWORD,autodesk)) # Important: Force add the following domains without whitelisting