Separate DOMAIN/DOMAIN-SUFFIX when AdGuard parsing filters
Some checks are pending
Build / Build (push) Waiting to run
Build / Deploy to Cloudflare Pages (push) Blocked by required conditions
Build / Deploy to GitHub and GitLab (push) Blocked by required conditions

This commit is contained in:
SukkaW 2025-01-21 00:24:07 +08:00
parent 1ce322a71c
commit b378a4e87c
4 changed files with 39 additions and 135 deletions

View File

@ -85,21 +85,29 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
domainListsExtraDownloads.map(task => task(childSpan).then(appendArrayToRejectExtraOutput)),
adguardFiltersDownloads.map(
task => task(childSpan).then(({ white, black }) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
appendArrayToRejectOutput(black);
task => task(childSpan).then(({ whiteDomains, whiteDomainSuffixes, blackDomains, blackDomainSuffixes }) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomains);
addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomainSuffixes, suffix => '.' + suffix);
rejectOutput.bulkAddDomain(blackDomains);
rejectOutput.bulkAddDomainSuffix(blackDomainSuffixes);
})
),
adguardFiltersExtraDownloads.map(
task => task(childSpan).then(({ white, black }) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
appendArrayToRejectExtraOutput(black);
task => task(childSpan).then(({ whiteDomains, whiteDomainSuffixes, blackDomains, blackDomainSuffixes }) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomains);
addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomainSuffixes, suffix => '.' + suffix);
rejectOutput.bulkAddDomain(blackDomains);
rejectOutput.bulkAddDomainSuffix(blackDomainSuffixes);
})
),
adguardFiltersWhitelistsDownloads.map(
task => task(childSpan).then(({ white, black }) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
addArrayElementsToSet(filterRuleWhitelistDomainSets, black);
task => task(childSpan).then(({ whiteDomains, whiteDomainSuffixes, blackDomains, blackDomainSuffixes }) => {
addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomains);
addArrayElementsToSet(filterRuleWhitelistDomainSets, whiteDomainSuffixes, suffix => '.' + suffix);
addArrayElementsToSet(filterRuleWhitelistDomainSets, blackDomains);
addArrayElementsToSet(filterRuleWhitelistDomainSets, blackDomainSuffixes, suffix => '.' + suffix);
})
),
getPhishingDomains(childSpan).then(appendArrayToRejectExtraOutput),

View File

@ -27,11 +27,14 @@ export function processFilterRulesWithPreload(
) {
const downloadPromise = fetchAssets(filterRulesUrl, fallbackUrls);
return (span: Span) => span.traceChildAsync<{ white: string[], black: string[] }>(`process filter rules: ${filterRulesUrl}`, async (span) => {
return (span: Span) => span.traceChildAsync<Record<'whiteDomains' | 'whiteDomainSuffixes' | 'blackDomains' | 'blackDomainSuffixes', string[]>>(`process filter rules: ${filterRulesUrl}`, async (span) => {
const text = await span.traceChildPromise('download', downloadPromise);
const whitelistDomainSets = new Set<string>();
const blacklistDomainSets = new Set<string>();
const whiteDomains = new Set<string>();
const whiteDomainSuffixes = new Set<string>();
const blackDomains = new Set<string>();
const blackDomainSuffixes = new Set<string>();
const warningMessages: string[] = [];
@ -60,24 +63,16 @@ export function processFilterRulesWithPreload(
switch (flag) {
case ParseType.WhiteIncludeSubdomain:
if (hostname[0] === '.') {
whitelistDomainSets.add(hostname);
} else {
whitelistDomainSets.add(`.${hostname}`);
}
whiteDomainSuffixes.add(hostname);
break;
case ParseType.WhiteAbsolute:
whitelistDomainSets.add(hostname);
whiteDomains.add(hostname);
break;
case ParseType.BlackIncludeSubdomain:
if (hostname[0] === '.') {
blacklistDomainSets.add(hostname);
} else {
blacklistDomainSets.add(`.${hostname}`);
}
blackDomainSuffixes.add(hostname);
break;
case ParseType.BlackAbsolute:
blacklistDomainSets.add(hostname);
blackDomains.add(hostname);
break;
case ParseType.ErrorMessage:
warningMessages.push(hostname);
@ -105,118 +100,19 @@ export function processFilterRulesWithPreload(
console.log(
picocolors.gray('[process filter]'),
picocolors.gray(filterRulesUrl),
picocolors.gray(`white: ${whitelistDomainSets.size}`),
picocolors.gray(`black: ${blacklistDomainSets.size}`)
picocolors.gray(`white: ${whiteDomains.size + whiteDomainSuffixes.size}`),
picocolors.gray(`black: ${blackDomains.size + blackDomainSuffixes.size}`)
);
return {
white: Array.from(whitelistDomainSets),
black: Array.from(blacklistDomainSets)
whiteDomains: Array.from(whiteDomains),
whiteDomainSuffixes: Array.from(whiteDomainSuffixes),
blackDomains: Array.from(blackDomains),
blackDomainSuffixes: Array.from(blackDomainSuffixes)
};
});
}
export async function processFilterRules(
parentSpan: Span,
filterRulesUrl: string,
fallbackUrls?: string[] | null,
includeThirdParty = false
): Promise<{ white: string[], black: string[] }> {
const [white, black, warningMessages] = await parentSpan.traceChild(`process filter rules: ${filterRulesUrl}`).traceAsyncFn(async (span) => {
const text = await span.traceChildAsync('download', () => fetchAssets(filterRulesUrl, fallbackUrls));
const whitelistDomainSets = new Set<string>();
const blacklistDomainSets = new Set<string>();
const warningMessages: string[] = [];
const MUTABLE_PARSE_LINE_RESULT: [string, ParseType] = ['', ParseType.NotParsed];
/**
* @param {string} line
*/
const lineCb = (line: string) => {
const result = parse(line, MUTABLE_PARSE_LINE_RESULT, includeThirdParty);
const flag = result[1];
if (flag === ParseType.NotParsed) {
throw new Error(`Didn't parse line: ${line}`);
}
if (flag === ParseType.Null) {
return;
}
const hostname = result[0];
if (flag === ParseType.WhiteIncludeSubdomain || flag === ParseType.WhiteAbsolute) {
onWhiteFound(hostname, filterRulesUrl);
} else {
onBlackFound(hostname, filterRulesUrl);
}
switch (flag) {
case ParseType.WhiteIncludeSubdomain:
if (hostname[0] === '.') {
whitelistDomainSets.add(hostname);
} else {
whitelistDomainSets.add(`.${hostname}`);
}
break;
case ParseType.WhiteAbsolute:
whitelistDomainSets.add(hostname);
break;
case ParseType.BlackIncludeSubdomain:
if (hostname[0] === '.') {
blacklistDomainSets.add(hostname);
} else {
blacklistDomainSets.add(`.${hostname}`);
}
break;
case ParseType.BlackAbsolute:
blacklistDomainSets.add(hostname);
break;
case ParseType.ErrorMessage:
warningMessages.push(hostname);
break;
default:
break;
}
};
const filterRules = text.split('\n');
span.traceChild('parse adguard filter').traceSyncFn(() => {
for (let i = 0, len = filterRules.length; i < len; i++) {
lineCb(filterRules[i]);
}
});
return [
Array.from(whitelistDomainSets),
Array.from(blacklistDomainSets),
warningMessages
] as const;
});
for (let i = 0, len = warningMessages.length; i < len; i++) {
console.warn(
picocolors.yellow(warningMessages[i]),
picocolors.gray(picocolors.underline(filterRulesUrl))
);
}
console.log(
picocolors.gray('[process filter]'),
picocolors.gray(filterRulesUrl),
picocolors.gray(`white: ${white.length}`),
picocolors.gray(`black: ${black.length}`)
);
return {
white,
black
};
}
// const R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2 = /(\$popup|\$removeparam|\$popunder|\$cname)/;
// cname exceptional filter can not be parsed by NetworkFilter
// Surge / Clash can't handle CNAME either, so we just ignore them

View File

@ -31,7 +31,7 @@
"fast-cidr-tools": "^0.3.1",
"fast-fifo": "^1.3.2",
"fdir": "^6.4.3",
"foxts": "^1.1.6",
"foxts": "^1.1.7",
"hash-wasm": "^4.12.0",
"json-stringify-pretty-compact": "^3.0.0",
"picocolors": "^1.1.1",

10
pnpm-lock.yaml generated
View File

@ -53,8 +53,8 @@ importers:
specifier: ^6.4.3
version: 6.4.3(picomatch@4.0.2)
foxts:
specifier: ^1.1.6
version: 1.1.6
specifier: ^1.1.7
version: 1.1.7
hash-wasm:
specifier: ^4.12.0
version: 4.12.0
@ -1136,8 +1136,8 @@ packages:
resolution: {integrity: sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==}
engines: {node: '>= 6'}
foxts@1.1.6:
resolution: {integrity: sha512-O2UR/MDLo0w4igcFHwLn2KyXUD84P6bE3U4OpVsxvcYrWLFvvDO8zKLBS/o++tFJTCq7p/3USR48E8/dF2vAAQ==}
foxts@1.1.7:
resolution: {integrity: sha512-Pw7S1yI20GY8gfj6RXt9usRE5TdQ/lgAqpy2EaWKUVNARC+jW0hxx/MQH8xkNlT3NSpt0X1P99CJTEvh3kVdUQ==}
fs-constants@1.0.0:
resolution: {integrity: sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==}
@ -2934,7 +2934,7 @@ snapshots:
combined-stream: 1.0.8
mime-types: 2.1.35
foxts@1.1.6: {}
foxts@1.1.7: {}
fs-constants@1.0.0: {}