Separate DOMAIN/DOMAIN-SUFFIX when AdGuard parsing filters
Some checks are pending
Build / Build (push) Waiting to run
Build / Deploy to Cloudflare Pages (push) Blocked by required conditions
Build / Deploy to GitHub and GitLab (push) Blocked by required conditions

This commit is contained in:
SukkaW 2025-01-21 00:24:07 +08:00
parent 1ce322a71c
commit b378a4e87c
4 changed files with 39 additions and 135 deletions

View File

@ -85,21 +85,29 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
domainListsExtraDownloads.map(task => task(childSpan).then(appendArrayToRejectExtraOutput)), domainListsExtraDownloads.map(task => task(childSpan).then(appendArrayToRejectExtraOutput)),
adguardFiltersDownloads.map( adguardFiltersDownloads.map(
task => task(childSpan).then(({ white, black }) => { task => task(childSpan).then(({ whiteDomains, whiteDomainSuffixes, blackDomains, blackDomainSuffixes }) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, white); addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomains);
appendArrayToRejectOutput(black); addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomainSuffixes, suffix => '.' + suffix);
rejectOutput.bulkAddDomain(blackDomains);
rejectOutput.bulkAddDomainSuffix(blackDomainSuffixes);
}) })
), ),
adguardFiltersExtraDownloads.map( adguardFiltersExtraDownloads.map(
task => task(childSpan).then(({ white, black }) => { task => task(childSpan).then(({ whiteDomains, whiteDomainSuffixes, blackDomains, blackDomainSuffixes }) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, white); addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomains);
appendArrayToRejectExtraOutput(black); addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomainSuffixes, suffix => '.' + suffix);
rejectOutput.bulkAddDomain(blackDomains);
rejectOutput.bulkAddDomainSuffix(blackDomainSuffixes);
}) })
), ),
adguardFiltersWhitelistsDownloads.map( adguardFiltersWhitelistsDownloads.map(
task => task(childSpan).then(({ white, black }) => { task => task(childSpan).then(({ whiteDomains, whiteDomainSuffixes, blackDomains, blackDomainSuffixes }) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, white); addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomains);
addArrayElementsToSet(filterRuleWhitelistDomainSets, black); addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomainSuffixes, suffix => '.' + suffix);
addArrayElementsToSet(filterRuleWhitelistDomainSets, blackDomains);
addArrayElementsToSet(filterRuleWhitelistDomainSets, blackDomainSuffixes, suffix => '.' + suffix);
}) })
), ),
getPhishingDomains(childSpan).then(appendArrayToRejectExtraOutput), getPhishingDomains(childSpan).then(appendArrayToRejectExtraOutput),

View File

@ -27,11 +27,14 @@ export function processFilterRulesWithPreload(
) { ) {
const downloadPromise = fetchAssets(filterRulesUrl, fallbackUrls); const downloadPromise = fetchAssets(filterRulesUrl, fallbackUrls);
return (span: Span) => span.traceChildAsync<{ white: string[], black: string[] }>(`process filter rules: ${filterRulesUrl}`, async (span) => { return (span: Span) => span.traceChildAsync<Record<'whiteDomains' | 'whiteDomainSuffixes' | 'blackDomains' | 'blackDomainSuffixes', string[]>>(`process filter rules: ${filterRulesUrl}`, async (span) => {
const text = await span.traceChildPromise('download', downloadPromise); const text = await span.traceChildPromise('download', downloadPromise);
const whitelistDomainSets = new Set<string>(); const whiteDomains = new Set<string>();
const blacklistDomainSets = new Set<string>(); const whiteDomainSuffixes = new Set<string>();
const blackDomains = new Set<string>();
const blackDomainSuffixes = new Set<string>();
const warningMessages: string[] = []; const warningMessages: string[] = [];
@ -60,24 +63,16 @@ export function processFilterRulesWithPreload(
switch (flag) { switch (flag) {
case ParseType.WhiteIncludeSubdomain: case ParseType.WhiteIncludeSubdomain:
if (hostname[0] === '.') { whiteDomainSuffixes.add(hostname);
whitelistDomainSets.add(hostname);
} else {
whitelistDomainSets.add(`.${hostname}`);
}
break; break;
case ParseType.WhiteAbsolute: case ParseType.WhiteAbsolute:
whitelistDomainSets.add(hostname); whiteDomains.add(hostname);
break; break;
case ParseType.BlackIncludeSubdomain: case ParseType.BlackIncludeSubdomain:
if (hostname[0] === '.') { blackDomainSuffixes.add(hostname);
blacklistDomainSets.add(hostname);
} else {
blacklistDomainSets.add(`.${hostname}`);
}
break; break;
case ParseType.BlackAbsolute: case ParseType.BlackAbsolute:
blacklistDomainSets.add(hostname); blackDomains.add(hostname);
break; break;
case ParseType.ErrorMessage: case ParseType.ErrorMessage:
warningMessages.push(hostname); warningMessages.push(hostname);
@ -105,118 +100,19 @@ export function processFilterRulesWithPreload(
console.log( console.log(
picocolors.gray('[process filter]'), picocolors.gray('[process filter]'),
picocolors.gray(filterRulesUrl), picocolors.gray(filterRulesUrl),
picocolors.gray(`white: ${whitelistDomainSets.size}`), picocolors.gray(`white: ${whiteDomains.size + whiteDomainSuffixes.size}`),
picocolors.gray(`black: ${blacklistDomainSets.size}`) picocolors.gray(`black: ${blackDomains.size + blackDomainSuffixes.size}`)
); );
return { return {
white: Array.from(whitelistDomainSets), whiteDomains: Array.from(whiteDomains),
black: Array.from(blacklistDomainSets) whiteDomainSuffixes: Array.from(whiteDomainSuffixes),
blackDomains: Array.from(blackDomains),
blackDomainSuffixes: Array.from(blackDomainSuffixes)
}; };
}); });
} }
export async function processFilterRules(
parentSpan: Span,
filterRulesUrl: string,
fallbackUrls?: string[] | null,
includeThirdParty = false
): Promise<{ white: string[], black: string[] }> {
const [white, black, warningMessages] = await parentSpan.traceChild(`process filter rules: ${filterRulesUrl}`).traceAsyncFn(async (span) => {
const text = await span.traceChildAsync('download', () => fetchAssets(filterRulesUrl, fallbackUrls));
const whitelistDomainSets = new Set<string>();
const blacklistDomainSets = new Set<string>();
const warningMessages: string[] = [];
const MUTABLE_PARSE_LINE_RESULT: [string, ParseType] = ['', ParseType.NotParsed];
/**
* @param {string} line
*/
const lineCb = (line: string) => {
const result = parse(line, MUTABLE_PARSE_LINE_RESULT, includeThirdParty);
const flag = result[1];
if (flag === ParseType.NotParsed) {
throw new Error(`Didn't parse line: ${line}`);
}
if (flag === ParseType.Null) {
return;
}
const hostname = result[0];
if (flag === ParseType.WhiteIncludeSubdomain || flag === ParseType.WhiteAbsolute) {
onWhiteFound(hostname, filterRulesUrl);
} else {
onBlackFound(hostname, filterRulesUrl);
}
switch (flag) {
case ParseType.WhiteIncludeSubdomain:
if (hostname[0] === '.') {
whitelistDomainSets.add(hostname);
} else {
whitelistDomainSets.add(`.${hostname}`);
}
break;
case ParseType.WhiteAbsolute:
whitelistDomainSets.add(hostname);
break;
case ParseType.BlackIncludeSubdomain:
if (hostname[0] === '.') {
blacklistDomainSets.add(hostname);
} else {
blacklistDomainSets.add(`.${hostname}`);
}
break;
case ParseType.BlackAbsolute:
blacklistDomainSets.add(hostname);
break;
case ParseType.ErrorMessage:
warningMessages.push(hostname);
break;
default:
break;
}
};
const filterRules = text.split('\n');
span.traceChild('parse adguard filter').traceSyncFn(() => {
for (let i = 0, len = filterRules.length; i < len; i++) {
lineCb(filterRules[i]);
}
});
return [
Array.from(whitelistDomainSets),
Array.from(blacklistDomainSets),
warningMessages
] as const;
});
for (let i = 0, len = warningMessages.length; i < len; i++) {
console.warn(
picocolors.yellow(warningMessages[i]),
picocolors.gray(picocolors.underline(filterRulesUrl))
);
}
console.log(
picocolors.gray('[process filter]'),
picocolors.gray(filterRulesUrl),
picocolors.gray(`white: ${white.length}`),
picocolors.gray(`black: ${black.length}`)
);
return {
white,
black
};
}
// const R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2 = /(\$popup|\$removeparam|\$popunder|\$cname)/; // const R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2 = /(\$popup|\$removeparam|\$popunder|\$cname)/;
// cname exceptional filter can not be parsed by NetworkFilter // cname exceptional filter can not be parsed by NetworkFilter
// Surge / Clash can't handle CNAME either, so we just ignore them // Surge / Clash can't handle CNAME either, so we just ignore them

View File

@ -31,7 +31,7 @@
"fast-cidr-tools": "^0.3.1", "fast-cidr-tools": "^0.3.1",
"fast-fifo": "^1.3.2", "fast-fifo": "^1.3.2",
"fdir": "^6.4.3", "fdir": "^6.4.3",
"foxts": "^1.1.6", "foxts": "^1.1.7",
"hash-wasm": "^4.12.0", "hash-wasm": "^4.12.0",
"json-stringify-pretty-compact": "^3.0.0", "json-stringify-pretty-compact": "^3.0.0",
"picocolors": "^1.1.1", "picocolors": "^1.1.1",

10
pnpm-lock.yaml generated
View File

@ -53,8 +53,8 @@ importers:
specifier: ^6.4.3 specifier: ^6.4.3
version: 6.4.3(picomatch@4.0.2) version: 6.4.3(picomatch@4.0.2)
foxts: foxts:
specifier: ^1.1.6 specifier: ^1.1.7
version: 1.1.6 version: 1.1.7
hash-wasm: hash-wasm:
specifier: ^4.12.0 specifier: ^4.12.0
version: 4.12.0 version: 4.12.0
@ -1136,8 +1136,8 @@ packages:
resolution: {integrity: sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==} resolution: {integrity: sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==}
engines: {node: '>= 6'} engines: {node: '>= 6'}
foxts@1.1.6: foxts@1.1.7:
resolution: {integrity: sha512-O2UR/MDLo0w4igcFHwLn2KyXUD84P6bE3U4OpVsxvcYrWLFvvDO8zKLBS/o++tFJTCq7p/3USR48E8/dF2vAAQ==} resolution: {integrity: sha512-Pw7S1yI20GY8gfj6RXt9usRE5TdQ/lgAqpy2EaWKUVNARC+jW0hxx/MQH8xkNlT3NSpt0X1P99CJTEvh3kVdUQ==}
fs-constants@1.0.0: fs-constants@1.0.0:
resolution: {integrity: sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==} resolution: {integrity: sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==}
@ -2934,7 +2934,7 @@ snapshots:
combined-stream: 1.0.8 combined-stream: 1.0.8
mime-types: 2.1.35 mime-types: 2.1.35
foxts@1.1.6: {} foxts@1.1.7: {}
fs-constants@1.0.0: {} fs-constants@1.0.0: {}