Perf: preload all reject data source

This commit is contained in:
SukkaW 2025-01-18 03:17:38 +08:00
parent 4cedc81b6c
commit 5e0780af35
3 changed files with 117 additions and 32 deletions

View File

@ -4,7 +4,7 @@ import process from 'node:process';
import { processHostsWithPreload } from './lib/parse-filter/hosts';
import { processDomainListsWithPreload } from './lib/parse-filter/domainlists';
import { processFilterRules } from './lib/parse-filter/filters';
import { processFilterRulesWithPreload } from './lib/parse-filter/filters';
import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, DOMAIN_LISTS, HOSTS_EXTRA, DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_EXTRA, PHISHING_DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_WHITELIST } from './constants/reject-data-source';
import { compareAndWriteFile } from './lib/create-file';
@ -33,6 +33,9 @@ const hostsDownloads = HOSTS.map(entry => processHostsWithPreload(...entry));
const hostsExtraDownloads = HOSTS_EXTRA.map(entry => processHostsWithPreload(...entry));
const domainListsDownloads = DOMAIN_LISTS.map(entry => processDomainListsWithPreload(...entry));
const domainListsExtraDownloads = DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry));
const adguardFiltersDownloads = ADGUARD_FILTERS.map(entry => processFilterRulesWithPreload(...entry));
const adguardFiltersExtraDownloads = ADGUARD_FILTERS_EXTRA.map(entry => processFilterRulesWithPreload(...entry));
const adguardFiltersWhitelistsDownloads = ADGUARD_FILTERS_WHITELIST.map(entry => processFilterRulesWithPreload(...entry));
export const buildRejectDomainSet = task(require.main === module, __filename)(async (span) => {
const rejectBaseDescription = [
@ -81,24 +84,24 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
domainListsDownloads.map(task => task(childSpan).then(appendArrayToRejectOutput)),
domainListsExtraDownloads.map(task => task(childSpan).then(appendArrayToRejectExtraOutput)),
ADGUARD_FILTERS.map(
entry => processFilterRules(childSpan, ...entry)
.then(({ white, black }) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
appendArrayToRejectOutput(black);
})
adguardFiltersDownloads.map(
task => task(childSpan).then(({ white, black }) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
appendArrayToRejectOutput(black);
})
),
ADGUARD_FILTERS_EXTRA.map(
entry => processFilterRules(childSpan, ...entry)
.then(({ white, black }) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
appendArrayToRejectExtraOutput(black);
})
adguardFiltersExtraDownloads.map(
task => task(childSpan).then(({ white, black }) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
appendArrayToRejectExtraOutput(black);
})
),
adguardFiltersWhitelistsDownloads.map(
task => task(childSpan).then(({ white, black }) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
addArrayElementsToSet(filterRuleWhitelistDomainSets, black);
})
),
ADGUARD_FILTERS_WHITELIST.map(entry => processFilterRules(childSpan, ...entry).then(({ white, black }) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
addArrayElementsToSet(filterRuleWhitelistDomainSets, black);
})),
getPhishingDomains(childSpan).then(appendArrayToRejectExtraOutput),
readLocalRejectDomainsetPromise.then(appendArrayToRejectOutput),
readLocalRejectDomainsetPromise.then(appendArrayToRejectExtraOutput),

View File

@ -1,11 +1,7 @@
import { describe, it } from 'mocha';
import { parse, processFilterRules } from './parse-filter/filters';
import { parse } from './parse-filter/filters';
import type { ParseType } from './parse-filter/filters';
import { createCacheKey } from './cache-filesystem';
import { createSpan } from '../trace';
const cacheKey = createCacheKey(__filename);
describe('parse', () => {
const MUTABLE_PARSE_LINE_RESULT: [string, ParseType] = ['', 1000];
@ -14,13 +10,3 @@ describe('parse', () => {
console.log(parse('||top.mail.ru^$badfilter', MUTABLE_PARSE_LINE_RESULT, false));
});
});
describe.skip('processFilterRules', () => {
it('https://filters.adtidy.org/extension/ublock/filters/18_optimized.txt', () => {
console.log(processFilterRules(
createSpan('noop'),
cacheKey('https://filters.adtidy.org/extension/ublock/filters/18_optimized.txt'),
[]
));
});
});

View File

@ -20,6 +20,102 @@ const enum ParseType {
export { type ParseType };
export function processFilterRulesWithPreload(
filterRulesUrl: string,
fallbackUrls?: string[] | null,
allowThirdParty = false
) {
const downloadPromise = fetchAssets(filterRulesUrl, fallbackUrls);
return (span: Span) => span.traceChildAsync<{ white: string[], black: string[] }>(`process filter rules: ${filterRulesUrl}`, async (span) => {
const text = await span.traceChildPromise('download', downloadPromise);
const whitelistDomainSets = new Set<string>();
const blacklistDomainSets = new Set<string>();
const warningMessages: string[] = [];
const MUTABLE_PARSE_LINE_RESULT: [string, ParseType] = ['', ParseType.NotParsed];
/**
* @param {string} line
*/
const lineCb = (line: string) => {
const result = parse(line, MUTABLE_PARSE_LINE_RESULT, allowThirdParty);
const flag = result[1];
if (flag === ParseType.NotParsed) {
throw new Error(`Didn't parse line: ${line}`);
}
if (flag === ParseType.Null) {
return;
}
const hostname = result[0];
if (flag === ParseType.WhiteIncludeSubdomain || flag === ParseType.WhiteAbsolute) {
onWhiteFound(hostname, filterRulesUrl);
} else {
onBlackFound(hostname, filterRulesUrl);
}
switch (flag) {
case ParseType.WhiteIncludeSubdomain:
if (hostname[0] === '.') {
whitelistDomainSets.add(hostname);
} else {
whitelistDomainSets.add(`.${hostname}`);
}
break;
case ParseType.WhiteAbsolute:
whitelistDomainSets.add(hostname);
break;
case ParseType.BlackIncludeSubdomain:
if (hostname[0] === '.') {
blacklistDomainSets.add(hostname);
} else {
blacklistDomainSets.add(`.${hostname}`);
}
break;
case ParseType.BlackAbsolute:
blacklistDomainSets.add(hostname);
break;
case ParseType.ErrorMessage:
warningMessages.push(hostname);
break;
default:
break;
}
};
const filterRules = text.split('\n');
span.traceChild('parse adguard filter').traceSyncFn(() => {
for (let i = 0, len = filterRules.length; i < len; i++) {
lineCb(filterRules[i]);
}
});
for (let i = 0, len = warningMessages.length; i < len; i++) {
console.warn(
picocolors.yellow(warningMessages[i]),
picocolors.gray(picocolors.underline(filterRulesUrl))
);
}
console.log(
picocolors.gray('[process filter]'),
picocolors.gray(filterRulesUrl),
picocolors.gray(`white: ${whitelistDomainSets.size}`),
picocolors.gray(`black: ${blacklistDomainSets.size}`)
);
return {
white: Array.from(whitelistDomainSets),
black: Array.from(blacklistDomainSets)
};
});
}
export async function processFilterRules(
parentSpan: Span,
filterRulesUrl: string,