mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Refactor: separate modules
This commit is contained in:
parent
eca2949062
commit
29410eb1c3
@ -2,7 +2,9 @@
|
||||
import path from 'node:path';
|
||||
import process from 'node:process';
|
||||
|
||||
import { processHosts, processFilterRules, processDomainLists } from './lib/parse-filter';
|
||||
import { processHosts } from './lib/parse-filter/hosts';
|
||||
import { processDomainLists } from './lib/parse-filter/domainlists';
|
||||
import { processFilterRules } from './lib/parse-filter/filters';
|
||||
|
||||
import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, DOMAIN_LISTS, HOSTS_EXTRA, DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_EXTRA, PHISHING_DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_WHITELIST } from './constants/reject-data-source';
|
||||
import { compareAndWriteFile } from './lib/create-file';
|
||||
@ -18,6 +20,7 @@ import { addArrayElementsToSet } from 'foxts/add-array-elements-to-set';
|
||||
import { appendArrayInPlace } from './lib/append-array-in-place';
|
||||
import { OUTPUT_INTERNAL_DIR, SOURCE_DIR } from './constants/dir';
|
||||
import { DomainsetOutput } from './lib/create-file';
|
||||
import { foundDebugDomain } from './lib/parse-filter/shared';
|
||||
|
||||
const readLocalRejectDomainsetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/reject_sukka.conf'));
|
||||
const readLocalRejectExtraDomainsetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/reject_sukka_extra.conf'));
|
||||
@ -63,65 +66,49 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
|
||||
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
|
||||
|
||||
// Parse from AdGuard Filters
|
||||
const shouldStop = await span
|
||||
await span
|
||||
.traceChild('download and process hosts / adblock filter rules')
|
||||
.traceAsyncFn(async (childSpan) => {
|
||||
// eslint-disable-next-line sukka/no-single-return -- not single return
|
||||
let shouldStop = false;
|
||||
await Promise.all([
|
||||
// Parse from remote hosts & domain lists
|
||||
HOSTS.map(entry => processHosts(childSpan, ...entry).then(appendArrayToRejectOutput)),
|
||||
HOSTS_EXTRA.map(entry => processHosts(childSpan, ...entry).then(appendArrayToRejectExtraOutput)),
|
||||
.traceAsyncFn((childSpan) => Promise.all([
|
||||
// Parse from remote hosts & domain lists
|
||||
HOSTS.map(entry => processHosts(childSpan, ...entry).then(appendArrayToRejectOutput)),
|
||||
HOSTS_EXTRA.map(entry => processHosts(childSpan, ...entry).then(appendArrayToRejectExtraOutput)),
|
||||
|
||||
DOMAIN_LISTS.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToRejectOutput)),
|
||||
DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToRejectExtraOutput)),
|
||||
DOMAIN_LISTS.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToRejectOutput)),
|
||||
DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(childSpan, ...entry).then(appendArrayToRejectExtraOutput)),
|
||||
|
||||
ADGUARD_FILTERS.map(
|
||||
entry => processFilterRules(childSpan, ...entry)
|
||||
.then(({ white, black, foundDebugDomain }) => {
|
||||
if (foundDebugDomain) {
|
||||
// eslint-disable-next-line sukka/no-single-return -- not single return
|
||||
shouldStop = true;
|
||||
// we should not break here, as we want to see full matches from all data source
|
||||
}
|
||||
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
|
||||
appendArrayToRejectOutput(black);
|
||||
})
|
||||
),
|
||||
ADGUARD_FILTERS_EXTRA.map(
|
||||
entry => processFilterRules(childSpan, ...entry)
|
||||
.then(({ white, black, foundDebugDomain }) => {
|
||||
if (foundDebugDomain) {
|
||||
// eslint-disable-next-line sukka/no-single-return -- not single return
|
||||
shouldStop = true;
|
||||
// we should not break here, as we want to see full matches from all data source
|
||||
}
|
||||
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
|
||||
appendArrayToRejectExtraOutput(black);
|
||||
})
|
||||
),
|
||||
ADGUARD_FILTERS_WHITELIST.map(entry => processFilterRules(childSpan, ...entry).then(({ white, black }) => {
|
||||
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
|
||||
addArrayElementsToSet(filterRuleWhitelistDomainSets, black);
|
||||
})),
|
||||
getPhishingDomains(childSpan).then(appendArrayToRejectExtraOutput),
|
||||
readLocalRejectDomainsetPromise.then(appendArrayToRejectOutput),
|
||||
readLocalRejectDomainsetPromise.then(appendArrayToRejectExtraOutput),
|
||||
readLocalRejectExtraDomainsetPromise.then(appendArrayToRejectExtraOutput),
|
||||
// Dedupe domainSets
|
||||
// span.traceChildAsync('collect black keywords/suffixes', async () =>
|
||||
/**
|
||||
ADGUARD_FILTERS.map(
|
||||
entry => processFilterRules(childSpan, ...entry)
|
||||
.then(({ white, black }) => {
|
||||
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
|
||||
appendArrayToRejectOutput(black);
|
||||
})
|
||||
),
|
||||
ADGUARD_FILTERS_EXTRA.map(
|
||||
entry => processFilterRules(childSpan, ...entry)
|
||||
.then(({ white, black }) => {
|
||||
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
|
||||
appendArrayToRejectExtraOutput(black);
|
||||
})
|
||||
),
|
||||
ADGUARD_FILTERS_WHITELIST.map(entry => processFilterRules(childSpan, ...entry).then(({ white, black }) => {
|
||||
addArrayElementsToSet(filterRuleWhitelistDomainSets, white);
|
||||
addArrayElementsToSet(filterRuleWhitelistDomainSets, black);
|
||||
})),
|
||||
getPhishingDomains(childSpan).then(appendArrayToRejectExtraOutput),
|
||||
readLocalRejectDomainsetPromise.then(appendArrayToRejectOutput),
|
||||
readLocalRejectDomainsetPromise.then(appendArrayToRejectExtraOutput),
|
||||
readLocalRejectExtraDomainsetPromise.then(appendArrayToRejectExtraOutput),
|
||||
// Dedupe domainSets
|
||||
// span.traceChildAsync('collect black keywords/suffixes', async () =>
|
||||
/**
|
||||
* Collect DOMAIN, DOMAIN-SUFFIX, and DOMAIN-KEYWORD from non_ip/reject.conf for deduplication
|
||||
* DOMAIN-WILDCARD is not really useful for deduplication, it is only included in AdGuardHome output
|
||||
*/
|
||||
rejectOutput.addFromRuleset(readLocalRejectRulesetPromise),
|
||||
rejectExtraOutput.addFromRuleset(readLocalRejectRulesetPromise)
|
||||
].flat());
|
||||
// eslint-disable-next-line sukka/no-single-return -- not single return
|
||||
return shouldStop;
|
||||
});
|
||||
rejectOutput.addFromRuleset(readLocalRejectRulesetPromise),
|
||||
rejectExtraOutput.addFromRuleset(readLocalRejectRulesetPromise)
|
||||
].flat()));
|
||||
|
||||
if (shouldStop) {
|
||||
if (foundDebugDomain.value) {
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
|
||||
@ -1,4 +1,6 @@
|
||||
import { processDomainLists, processHosts } from './parse-filter';
|
||||
import { processHosts } from './parse-filter/hosts';
|
||||
import { processDomainLists } from './parse-filter/domainlists';
|
||||
|
||||
import * as tldts from 'tldts-experimental';
|
||||
|
||||
import { dummySpan, printTraceResult } from '../trace';
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
import { describe, it } from 'mocha';
|
||||
|
||||
import { parse, processFilterRules } from './parse-filter';
|
||||
import type { ParseType } from './parse-filter';
|
||||
import { parse, processFilterRules } from './parse-filter/filters';
|
||||
import type { ParseType } from './parse-filter/filters';
|
||||
import { createCacheKey } from './cache-filesystem';
|
||||
import { createSpan } from '../trace';
|
||||
|
||||
@ -20,8 +20,7 @@ describe.skip('processFilterRules', () => {
|
||||
console.log(processFilterRules(
|
||||
createSpan('noop'),
|
||||
cacheKey('https://filters.adtidy.org/extension/ublock/filters/18_optimized.txt'),
|
||||
[],
|
||||
7_200_000
|
||||
[]
|
||||
));
|
||||
});
|
||||
});
|
||||
|
||||
51
Build/lib/parse-filter/domainlists.ts
Normal file
51
Build/lib/parse-filter/domainlists.ts
Normal file
@ -0,0 +1,51 @@
|
||||
import picocolors from 'picocolors';
|
||||
import { normalizeDomain } from '../normalize-domain';
|
||||
import { processLine } from '../process-line';
|
||||
import { onBlackFound } from './shared';
|
||||
import { fetchAssetsWithout304 } from '../fetch-assets';
|
||||
import type { Span } from '../../trace';
|
||||
|
||||
function domainListLineCb(l: string, set: string[], includeAllSubDomain: boolean, meta: string) {
|
||||
let line = processLine(l);
|
||||
if (!line) return;
|
||||
line = line.toLowerCase();
|
||||
|
||||
const domain = normalizeDomain(line);
|
||||
if (!domain) return;
|
||||
if (domain !== line) {
|
||||
console.log(
|
||||
picocolors.red('[process domain list]'),
|
||||
picocolors.gray(`line: ${line}`),
|
||||
picocolors.gray(`domain: ${domain}`),
|
||||
picocolors.gray(meta)
|
||||
);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
onBlackFound(domain, meta);
|
||||
|
||||
set.push(includeAllSubDomain ? `.${line}` : line);
|
||||
}
|
||||
|
||||
export function processDomainLists(
|
||||
span: Span,
|
||||
domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false
|
||||
) {
|
||||
return span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => {
|
||||
const text = await span.traceChildAsync(`process domainlist: ${domainListsUrl}`, () => fetchAssetsWithout304(
|
||||
domainListsUrl,
|
||||
mirrors
|
||||
));
|
||||
const domainSets: string[] = [];
|
||||
const filterRules = text.split('\n');
|
||||
|
||||
span.traceChildSync('parse domain list', () => {
|
||||
for (let i = 0, len = filterRules.length; i < len; i++) {
|
||||
domainListLineCb(filterRules[i], domainSets, includeAllSubDomain, domainListsUrl);
|
||||
}
|
||||
});
|
||||
|
||||
return domainSets;
|
||||
});
|
||||
}
|
||||
@ -1,121 +1,12 @@
|
||||
import { NetworkFilter } from '@ghostery/adblocker';
|
||||
import { processLine } from './process-line';
|
||||
import tldts from 'tldts-experimental';
|
||||
|
||||
import picocolors from 'picocolors';
|
||||
import { normalizeDomain } from './normalize-domain';
|
||||
import type { Span } from '../trace';
|
||||
import type { Span } from '../../trace';
|
||||
import { fetchAssetsWithout304 } from '../fetch-assets';
|
||||
import { onBlackFound, onWhiteFound } from './shared';
|
||||
import { createRetrieKeywordFilter as createKeywordFilter } from 'foxts/retrie';
|
||||
import { looseTldtsOpt } from '../constants/loose-tldts-opt';
|
||||
import { DEBUG_DOMAIN_TO_FIND } from '../constants/reject-data-source';
|
||||
import { noop } from 'foxts/noop';
|
||||
import { fetchAssetsWithout304 } from './fetch-assets';
|
||||
|
||||
let foundDebugDomain = false;
|
||||
|
||||
const onBlackFound = DEBUG_DOMAIN_TO_FIND
|
||||
? (line: string, meta: string) => {
|
||||
if (line.includes(DEBUG_DOMAIN_TO_FIND!)) {
|
||||
console.warn(picocolors.red(meta), '(black)', line.replaceAll(DEBUG_DOMAIN_TO_FIND!, picocolors.bold(DEBUG_DOMAIN_TO_FIND)));
|
||||
foundDebugDomain = true;
|
||||
}
|
||||
}
|
||||
: noop;
|
||||
|
||||
const onWhiteFound = DEBUG_DOMAIN_TO_FIND
|
||||
? (line: string, meta: string) => {
|
||||
if (line.includes(DEBUG_DOMAIN_TO_FIND!)) {
|
||||
console.warn(picocolors.red(meta), '(white)', line.replaceAll(DEBUG_DOMAIN_TO_FIND!, picocolors.bold(DEBUG_DOMAIN_TO_FIND)));
|
||||
foundDebugDomain = true;
|
||||
}
|
||||
}
|
||||
: noop;
|
||||
|
||||
function domainListLineCb(l: string, set: string[], includeAllSubDomain: boolean, meta: string) {
|
||||
let line = processLine(l);
|
||||
if (!line) return;
|
||||
line = line.toLowerCase();
|
||||
|
||||
const domain = normalizeDomain(line);
|
||||
if (!domain) return;
|
||||
if (domain !== line) {
|
||||
console.log(
|
||||
picocolors.red('[process domain list]'),
|
||||
picocolors.gray(`line: ${line}`),
|
||||
picocolors.gray(`domain: ${domain}`),
|
||||
picocolors.gray(meta)
|
||||
);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
onBlackFound(domain, meta);
|
||||
|
||||
set.push(includeAllSubDomain ? `.${line}` : line);
|
||||
}
|
||||
|
||||
export function processDomainLists(
|
||||
span: Span,
|
||||
domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false
|
||||
) {
|
||||
return span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => {
|
||||
const text = await span.traceChildAsync(`process domainlist: ${domainListsUrl}`, () => fetchAssetsWithout304(
|
||||
domainListsUrl,
|
||||
mirrors
|
||||
));
|
||||
const domainSets: string[] = [];
|
||||
const filterRules = text.split('\n');
|
||||
|
||||
span.traceChildSync('parse domain list', () => {
|
||||
for (let i = 0, len = filterRules.length; i < len; i++) {
|
||||
domainListLineCb(filterRules[i], domainSets, includeAllSubDomain, domainListsUrl);
|
||||
}
|
||||
});
|
||||
|
||||
return domainSets;
|
||||
});
|
||||
}
|
||||
|
||||
function hostsLineCb(l: string, set: string[], includeAllSubDomain: boolean, meta: string) {
|
||||
const line = processLine(l);
|
||||
if (!line) {
|
||||
return;
|
||||
}
|
||||
|
||||
const _domain = line.split(/\s/)[1]?.trim();
|
||||
if (!_domain) {
|
||||
return;
|
||||
}
|
||||
const domain = normalizeDomain(_domain);
|
||||
if (!domain) {
|
||||
return;
|
||||
}
|
||||
|
||||
onBlackFound(domain, meta);
|
||||
|
||||
set.push(includeAllSubDomain ? `.${domain}` : domain);
|
||||
}
|
||||
|
||||
export function processHosts(
|
||||
span: Span,
|
||||
hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false
|
||||
) {
|
||||
return span.traceChildAsync(`process hosts: ${hostsUrl}`, async (span) => {
|
||||
const text = await span.traceChild('download').traceAsyncFn(() => fetchAssetsWithout304(hostsUrl, mirrors));
|
||||
|
||||
const domainSets: string[] = [];
|
||||
|
||||
const filterRules = text.split('\n');
|
||||
|
||||
span.traceChild('parse hosts').traceSyncFn(() => {
|
||||
for (let i = 0, len = filterRules.length; i < len; i++) {
|
||||
hostsLineCb(filterRules[i], domainSets, includeAllSubDomain, hostsUrl);
|
||||
}
|
||||
});
|
||||
|
||||
return domainSets;
|
||||
});
|
||||
}
|
||||
import { normalizeDomain } from '../normalize-domain';
|
||||
import { looseTldtsOpt } from '../../constants/loose-tldts-opt';
|
||||
import tldts from 'tldts-experimental';
|
||||
import { NetworkFilter } from '@ghostery/adblocker';
|
||||
|
||||
const enum ParseType {
|
||||
WhiteIncludeSubdomain = 0,
|
||||
@ -134,7 +25,7 @@ export async function processFilterRules(
|
||||
filterRulesUrl: string,
|
||||
fallbackUrls?: string[] | null,
|
||||
allowThirdParty = false
|
||||
): Promise<{ white: string[], black: string[], foundDebugDomain: boolean }> {
|
||||
): Promise<{ white: string[], black: string[] }> {
|
||||
const [white, black, warningMessages] = await parentSpan.traceChild(`process filter rules: ${filterRulesUrl}`).traceAsyncFn(async (span) => {
|
||||
const text = await fetchAssetsWithout304(filterRulesUrl, fallbackUrls);
|
||||
|
||||
@ -226,8 +117,7 @@ export async function processFilterRules(
|
||||
|
||||
return {
|
||||
white,
|
||||
black,
|
||||
foundDebugDomain
|
||||
black
|
||||
};
|
||||
}
|
||||
|
||||
46
Build/lib/parse-filter/hosts.ts
Normal file
46
Build/lib/parse-filter/hosts.ts
Normal file
@ -0,0 +1,46 @@
|
||||
import type { Span } from '../../trace';
|
||||
import { fetchAssetsWithout304 } from '../fetch-assets';
|
||||
import { normalizeDomain } from '../normalize-domain';
|
||||
import { processLine } from '../process-line';
|
||||
import { onBlackFound } from './shared';
|
||||
|
||||
function hostsLineCb(l: string, set: string[], includeAllSubDomain: boolean, meta: string) {
|
||||
const line = processLine(l);
|
||||
if (!line) {
|
||||
return;
|
||||
}
|
||||
|
||||
const _domain = line.split(/\s/)[1]?.trim();
|
||||
if (!_domain) {
|
||||
return;
|
||||
}
|
||||
const domain = normalizeDomain(_domain);
|
||||
if (!domain) {
|
||||
return;
|
||||
}
|
||||
|
||||
onBlackFound(domain, meta);
|
||||
|
||||
set.push(includeAllSubDomain ? `.${domain}` : domain);
|
||||
}
|
||||
|
||||
export function processHosts(
|
||||
span: Span,
|
||||
hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false
|
||||
) {
|
||||
return span.traceChildAsync(`process hosts: ${hostsUrl}`, async (span) => {
|
||||
const text = await span.traceChild('download').traceAsyncFn(() => fetchAssetsWithout304(hostsUrl, mirrors));
|
||||
|
||||
const domainSets: string[] = [];
|
||||
|
||||
const filterRules = text.split('\n');
|
||||
|
||||
span.traceChild('parse hosts').traceSyncFn(() => {
|
||||
for (let i = 0, len = filterRules.length; i < len; i++) {
|
||||
hostsLineCb(filterRules[i], domainSets, includeAllSubDomain, hostsUrl);
|
||||
}
|
||||
});
|
||||
|
||||
return domainSets;
|
||||
});
|
||||
}
|
||||
23
Build/lib/parse-filter/shared.ts
Normal file
23
Build/lib/parse-filter/shared.ts
Normal file
@ -0,0 +1,23 @@
|
||||
import picocolors from 'picocolors';
|
||||
import { DEBUG_DOMAIN_TO_FIND } from '../../constants/reject-data-source';
|
||||
import { noop } from 'foxts/noop';
|
||||
|
||||
export const foundDebugDomain = { value: false };
|
||||
|
||||
export const onBlackFound = DEBUG_DOMAIN_TO_FIND
|
||||
? (line: string, meta: string) => {
|
||||
if (line.includes(DEBUG_DOMAIN_TO_FIND!)) {
|
||||
console.warn(picocolors.red(meta), '(black)', line.replaceAll(DEBUG_DOMAIN_TO_FIND!, picocolors.bold(DEBUG_DOMAIN_TO_FIND)));
|
||||
foundDebugDomain.value = true;
|
||||
}
|
||||
}
|
||||
: noop;
|
||||
|
||||
export const onWhiteFound = DEBUG_DOMAIN_TO_FIND
|
||||
? (line: string, meta: string) => {
|
||||
if (line.includes(DEBUG_DOMAIN_TO_FIND!)) {
|
||||
console.warn(picocolors.red(meta), '(white)', line.replaceAll(DEBUG_DOMAIN_TO_FIND!, picocolors.bold(DEBUG_DOMAIN_TO_FIND)));
|
||||
foundDebugDomain.value = true;
|
||||
}
|
||||
}
|
||||
: noop;
|
||||
Loading…
x
Reference in New Issue
Block a user