From 5e0780af35367079cac79c19ba9d66f4193ae8d2 Mon Sep 17 00:00:00 2001 From: SukkaW Date: Sat, 18 Jan 2025 03:17:38 +0800 Subject: [PATCH] Perf: preload all reject data source --- Build/build-reject-domainset.ts | 37 ++++++------ Build/lib/parse-filter.test.ts | 16 +----- Build/lib/parse-filter/filters.ts | 96 +++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+), 32 deletions(-) diff --git a/Build/build-reject-domainset.ts b/Build/build-reject-domainset.ts index cf35fad4..55603c33 100644 --- a/Build/build-reject-domainset.ts +++ b/Build/build-reject-domainset.ts @@ -4,7 +4,7 @@ import process from 'node:process'; import { processHostsWithPreload } from './lib/parse-filter/hosts'; import { processDomainListsWithPreload } from './lib/parse-filter/domainlists'; -import { processFilterRules } from './lib/parse-filter/filters'; +import { processFilterRulesWithPreload } from './lib/parse-filter/filters'; import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, DOMAIN_LISTS, HOSTS_EXTRA, DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_EXTRA, PHISHING_DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_WHITELIST } from './constants/reject-data-source'; import { compareAndWriteFile } from './lib/create-file'; @@ -33,6 +33,9 @@ const hostsDownloads = HOSTS.map(entry => processHostsWithPreload(...entry)); const hostsExtraDownloads = HOSTS_EXTRA.map(entry => processHostsWithPreload(...entry)); const domainListsDownloads = DOMAIN_LISTS.map(entry => processDomainListsWithPreload(...entry)); const domainListsExtraDownloads = DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry)); +const adguardFiltersDownloads = ADGUARD_FILTERS.map(entry => processFilterRulesWithPreload(...entry)); +const adguardFiltersExtraDownloads = ADGUARD_FILTERS_EXTRA.map(entry => processFilterRulesWithPreload(...entry)); +const adguardFiltersWhitelistsDownloads = ADGUARD_FILTERS_WHITELIST.map(entry => processFilterRulesWithPreload(...entry)); export const buildRejectDomainSet = task(require.main === module, __filename)(async (span) => { const rejectBaseDescription = [ @@ -81,24 +84,24 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as domainListsDownloads.map(task => task(childSpan).then(appendArrayToRejectOutput)), domainListsExtraDownloads.map(task => task(childSpan).then(appendArrayToRejectExtraOutput)), - ADGUARD_FILTERS.map( - entry => processFilterRules(childSpan, ...entry) - .then(({ white, black }) => { - addArrayElementsToSet(filterRuleWhitelistDomainSets, white); - appendArrayToRejectOutput(black); - }) + adguardFiltersDownloads.map( + task => task(childSpan).then(({ white, black }) => { + addArrayElementsToSet(filterRuleWhitelistDomainSets, white); + appendArrayToRejectOutput(black); + }) ), - ADGUARD_FILTERS_EXTRA.map( - entry => processFilterRules(childSpan, ...entry) - .then(({ white, black }) => { - addArrayElementsToSet(filterRuleWhitelistDomainSets, white); - appendArrayToRejectExtraOutput(black); - }) + adguardFiltersExtraDownloads.map( + task => task(childSpan).then(({ white, black }) => { + addArrayElementsToSet(filterRuleWhitelistDomainSets, white); + appendArrayToRejectExtraOutput(black); + }) + ), + adguardFiltersWhitelistsDownloads.map( + task => task(childSpan).then(({ white, black }) => { + addArrayElementsToSet(filterRuleWhitelistDomainSets, white); + addArrayElementsToSet(filterRuleWhitelistDomainSets, black); + }) ), - ADGUARD_FILTERS_WHITELIST.map(entry => processFilterRules(childSpan, ...entry).then(({ white, black }) => { - addArrayElementsToSet(filterRuleWhitelistDomainSets, white); - addArrayElementsToSet(filterRuleWhitelistDomainSets, black); - })), getPhishingDomains(childSpan).then(appendArrayToRejectExtraOutput), readLocalRejectDomainsetPromise.then(appendArrayToRejectOutput), readLocalRejectDomainsetPromise.then(appendArrayToRejectExtraOutput), diff --git a/Build/lib/parse-filter.test.ts b/Build/lib/parse-filter.test.ts index 043571bb..e112bd0b 100644 --- a/Build/lib/parse-filter.test.ts +++ b/Build/lib/parse-filter.test.ts @@ -1,11 +1,7 @@ import { describe, it } from 'mocha'; -import { parse, processFilterRules } from './parse-filter/filters'; +import { parse } from './parse-filter/filters'; import type { ParseType } from './parse-filter/filters'; -import { createCacheKey } from './cache-filesystem'; -import { createSpan } from '../trace'; - -const cacheKey = createCacheKey(__filename); describe('parse', () => { const MUTABLE_PARSE_LINE_RESULT: [string, ParseType] = ['', 1000]; @@ -14,13 +10,3 @@ describe('parse', () => { console.log(parse('||top.mail.ru^$badfilter', MUTABLE_PARSE_LINE_RESULT, false)); }); }); - -describe.skip('processFilterRules', () => { - it('https://filters.adtidy.org/extension/ublock/filters/18_optimized.txt', () => { - console.log(processFilterRules( - createSpan('noop'), - cacheKey('https://filters.adtidy.org/extension/ublock/filters/18_optimized.txt'), - [] - )); - }); -}); diff --git a/Build/lib/parse-filter/filters.ts b/Build/lib/parse-filter/filters.ts index 132ab06d..b2db3acc 100644 --- a/Build/lib/parse-filter/filters.ts +++ b/Build/lib/parse-filter/filters.ts @@ -20,6 +20,102 @@ const enum ParseType { export { type ParseType }; +export function processFilterRulesWithPreload( + filterRulesUrl: string, + fallbackUrls?: string[] | null, + allowThirdParty = false +) { + const downloadPromise = fetchAssets(filterRulesUrl, fallbackUrls); + + return (span: Span) => span.traceChildAsync<{ white: string[], black: string[] }>(`process filter rules: ${filterRulesUrl}`, async (span) => { + const text = await span.traceChildPromise('download', downloadPromise); + + const whitelistDomainSets = new Set(); + const blacklistDomainSets = new Set(); + + const warningMessages: string[] = []; + + const MUTABLE_PARSE_LINE_RESULT: [string, ParseType] = ['', ParseType.NotParsed]; + /** + * @param {string} line + */ + const lineCb = (line: string) => { + const result = parse(line, MUTABLE_PARSE_LINE_RESULT, allowThirdParty); + const flag = result[1]; + + if (flag === ParseType.NotParsed) { + throw new Error(`Didn't parse line: ${line}`); + } + if (flag === ParseType.Null) { + return; + } + + const hostname = result[0]; + + if (flag === ParseType.WhiteIncludeSubdomain || flag === ParseType.WhiteAbsolute) { + onWhiteFound(hostname, filterRulesUrl); + } else { + onBlackFound(hostname, filterRulesUrl); + } + + switch (flag) { + case ParseType.WhiteIncludeSubdomain: + if (hostname[0] === '.') { + whitelistDomainSets.add(hostname); + } else { + whitelistDomainSets.add(`.${hostname}`); + } + break; + case ParseType.WhiteAbsolute: + whitelistDomainSets.add(hostname); + break; + case ParseType.BlackIncludeSubdomain: + if (hostname[0] === '.') { + blacklistDomainSets.add(hostname); + } else { + blacklistDomainSets.add(`.${hostname}`); + } + break; + case ParseType.BlackAbsolute: + blacklistDomainSets.add(hostname); + break; + case ParseType.ErrorMessage: + warningMessages.push(hostname); + break; + default: + break; + } + }; + + const filterRules = text.split('\n'); + + span.traceChild('parse adguard filter').traceSyncFn(() => { + for (let i = 0, len = filterRules.length; i < len; i++) { + lineCb(filterRules[i]); + } + }); + + for (let i = 0, len = warningMessages.length; i < len; i++) { + console.warn( + picocolors.yellow(warningMessages[i]), + picocolors.gray(picocolors.underline(filterRulesUrl)) + ); + } + + console.log( + picocolors.gray('[process filter]'), + picocolors.gray(filterRulesUrl), + picocolors.gray(`white: ${whitelistDomainSets.size}`), + picocolors.gray(`black: ${blacklistDomainSets.size}`) + ); + + return { + white: Array.from(whitelistDomainSets), + black: Array.from(blacklistDomainSets) + }; + }); +} + export async function processFilterRules( parentSpan: Span, filterRulesUrl: string,