mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Update Reject Infra & Data Source
This commit is contained in:
parent
c158217dda
commit
0d1fddcb81
@ -4,7 +4,7 @@ import process from 'node:process';
|
||||
|
||||
import { processHosts, processFilterRules, processDomainLists } from './lib/parse-filter';
|
||||
|
||||
import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, DOMAIN_LISTS, HOSTS_EXTRA, DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_EXTRA, PHISHING_DOMAIN_LISTS_EXTRA } from './constants/reject-data-source';
|
||||
import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, DOMAIN_LISTS, HOSTS_EXTRA, DOMAIN_LISTS_EXTRA, ADGUARD_FILTERS_EXTRA, PHISHING_DOMAIN_LISTS_EXTRA, PHISHING_HOSTS_EXTRA, ADGUARD_FILTERS_WHITELIST } from './constants/reject-data-source';
|
||||
import { compareAndWriteFile } from './lib/create-file';
|
||||
import { readFileByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
|
||||
import { task } from './trace';
|
||||
@ -44,7 +44,8 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
|
||||
...HOSTS_EXTRA.map(host => ` - ${host[0]}`),
|
||||
...DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`),
|
||||
...ADGUARD_FILTERS_EXTRA.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`),
|
||||
...PHISHING_DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`)
|
||||
...PHISHING_DOMAIN_LISTS_EXTRA.map(domainList => ` - ${domainList[0]}`),
|
||||
...PHISHING_HOSTS_EXTRA.map(host => ` - ${host[0]}`)
|
||||
]);
|
||||
|
||||
const appendArrayToRejectOutput = rejectOutput.addFromDomainset.bind(rejectOutput);
|
||||
@ -91,18 +92,25 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
|
||||
appendArrayToRejectExtraOutput(black);
|
||||
})
|
||||
),
|
||||
|
||||
([
|
||||
'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exceptions.txt',
|
||||
'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exclusions.txt'
|
||||
].map(
|
||||
input => processFilterRules(childSpan, input).then(({ white, black }) => {
|
||||
setAddFromArray(filterRuleWhitelistDomainSets, white);
|
||||
setAddFromArray(filterRuleWhitelistDomainSets, black);
|
||||
})
|
||||
)),
|
||||
ADGUARD_FILTERS_WHITELIST.map(entry => processFilterRules(childSpan, ...entry).then(({ white, black }) => {
|
||||
setAddFromArray(filterRuleWhitelistDomainSets, white);
|
||||
setAddFromArray(filterRuleWhitelistDomainSets, black);
|
||||
})),
|
||||
getPhishingDomains(childSpan).then(appendArrayToRejectExtraOutput),
|
||||
readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/reject_sukka.conf')).then(appendArrayToRejectOutput)
|
||||
readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/reject_sukka.conf')).then(appendArrayToRejectOutput),
|
||||
// Dedupe domainSets
|
||||
span.traceChildAsync('collect black keywords/suffixes', async () => {
|
||||
/** Collect DOMAIN-KEYWORD from non_ip/reject.conf for deduplication */
|
||||
for await (const line of readFileByLine(path.resolve(__dirname, '../Source/non_ip/reject.conf'))) {
|
||||
const [type, value] = line.split(',');
|
||||
if (type === 'DOMAIN-KEYWORD') {
|
||||
rejectOutput.addDomainKeyword(value); // Add for later deduplication
|
||||
rejectExtraOutput.addDomainKeyword(value); // Add for later deduplication
|
||||
} else if (type === 'DOMAIN-SUFFIX') {
|
||||
filterRuleWhitelistDomainSets.add('.' + value);
|
||||
}
|
||||
}
|
||||
})
|
||||
].flat());
|
||||
// eslint-disable-next-line sukka/no-single-return -- not single return
|
||||
return shouldStop;
|
||||
@ -112,22 +120,6 @@ export const buildRejectDomainSet = task(require.main === module, __filename)(as
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Dedupe domainSets
|
||||
await span.traceChildAsync('collect black keywords/suffixes', async () => {
|
||||
/** Collect DOMAIN-KEYWORD from non_ip/reject.conf for deduplication */
|
||||
for await (const line of readFileByLine(path.resolve(__dirname, '../Source/non_ip/reject.conf'))) {
|
||||
const [type, value] = line.split(',');
|
||||
|
||||
if (type === 'DOMAIN-KEYWORD') {
|
||||
rejectOutput.addDomainKeyword(value); // Add for later deduplication
|
||||
rejectExtraOutput.addDomainKeyword(value); // Add for later deduplication
|
||||
} else if (type === 'DOMAIN-SUFFIX') {
|
||||
rejectOutput.whitelistDomain('.' + value); // Add for later deduplication
|
||||
rejectExtraOutput.whitelistDomain('.' + value); // Add for later deduplication
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
await Promise.all([
|
||||
rejectOutput.done(),
|
||||
rejectExtraOutput.done()
|
||||
|
||||
@ -3,14 +3,12 @@ import { TTL } from '../lib/cache-filesystem';
|
||||
type HostsSource = [main: string, mirrors: string[] | null, includeAllSubDomain: boolean, ttl: number];
|
||||
|
||||
export const HOSTS: HostsSource[] = [
|
||||
// no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl
|
||||
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', null, true, TTL.THREE_DAYS()],
|
||||
// have not been updated for more than a year, so we set a 14 days cache ttl
|
||||
['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', null, true, TTL.TWO_WEEKS()],
|
||||
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', null, false, TTL.ONE_WEEK()],
|
||||
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', null, false, TTL.ONE_WEEK()],
|
||||
['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', null, true, TTL.THREE_HOURS()],
|
||||
['https://raw.githubusercontent.com/durablenapkin/block/refs/heads/master/tvstream.txt', null, true, TTL.THREE_HOURS()]
|
||||
['https://raw.githubusercontent.com/durablenapkin/block/master/tvstream.txt', null, true, TTL.THREE_HOURS()]
|
||||
];
|
||||
|
||||
export const HOSTS_EXTRA: HostsSource[] = [
|
||||
@ -24,7 +22,7 @@ export const HOSTS_EXTRA: HostsSource[] = [
|
||||
// Dan Pollock's hosts file, 0.0.0.0 version is 30 KiB smaller
|
||||
['https://someonewhocares.org/hosts/zero/hosts', null, true, TTL.THREE_HOURS()],
|
||||
// ad-wars is not actively maintained, so we set a 7 days cache ttl
|
||||
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', null, false, TTL.ONE_WEEK()]
|
||||
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', null, false, TTL.TWO_WEEKS()]
|
||||
];
|
||||
|
||||
export const DOMAIN_LISTS: HostsSource[] = [
|
||||
@ -86,9 +84,12 @@ export const PHISHING_DOMAIN_LISTS_EXTRA: HostsSource[] = [
|
||||
'https://phishing.army/download/phishing_army_blocklist.txt',
|
||||
[],
|
||||
true, TTL.THREE_HOURS()
|
||||
],
|
||||
]
|
||||
];
|
||||
|
||||
export const PHISHING_HOSTS_EXTRA: HostsSource[] = [
|
||||
[
|
||||
'https://raw.githubusercontent.com/durablenapkin/scamblocklist/refs/heads/master/hosts.txt',
|
||||
'https://raw.githubusercontent.com/durablenapkin/scamblocklist/master/hosts.txt',
|
||||
[],
|
||||
true, TTL.TWLVE_HOURS()
|
||||
]
|
||||
@ -97,14 +98,16 @@ export const PHISHING_DOMAIN_LISTS_EXTRA: HostsSource[] = [
|
||||
type AdGuardFilterSource = [main: string, mirrors: string[] | null, ttl: number, allowThirdParty?: boolean];
|
||||
|
||||
export const ADGUARD_FILTERS: AdGuardFilterSource[] = [
|
||||
// no coin list adguard list is more maintained than its hosts
|
||||
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/nocoin.txt', [], TTL.TWO_WEEKS()],
|
||||
// EasyList
|
||||
[
|
||||
'https://easylist.to/easylist/easylist.txt',
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/thirdparties/easylist.txt',
|
||||
'https://ublockorigin.pages.dev/thirdparties/easylist.txt',
|
||||
'https://easylist-downloads.adblockplus.org/easylist.txt',
|
||||
'https://secure.fanboy.co.nz/easylist.txt',
|
||||
'https://ublockorigin.github.io/uAssetsCDN/thirdparties/easylist.txt',
|
||||
'https://ublockorigin.pages.dev/thirdparties/easylist.txt',
|
||||
'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easylist.txt'
|
||||
],
|
||||
TTL.TWLVE_HOURS()
|
||||
@ -113,11 +116,11 @@ export const ADGUARD_FILTERS: AdGuardFilterSource[] = [
|
||||
[
|
||||
'https://easylist.to/easylist/easyprivacy.txt',
|
||||
[
|
||||
'https://secure.fanboy.co.nz/easyprivacy.txt',
|
||||
'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easyprivacy.txt',
|
||||
'https://easylist-downloads.adblockplus.org/easyprivacy.txt',
|
||||
'https://secure.fanboy.co.nz/easyprivacy.txt',
|
||||
'https://ublockorigin.github.io/uAssetsCDN/thirdparties/easyprivacy.txt',
|
||||
'https://ublockorigin.pages.dev/thirdparties/easyprivacy.txt'
|
||||
'https://ublockorigin.pages.dev/thirdparties/easyprivacy.txt',
|
||||
'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easyprivacy.txt'
|
||||
],
|
||||
TTL.TWLVE_HOURS()
|
||||
],
|
||||
@ -150,23 +153,46 @@ export const ADGUARD_FILTERS: AdGuardFilterSource[] = [
|
||||
],
|
||||
TTL.TEN_DAYS()
|
||||
],
|
||||
// Brave First Party & First Party CNAME
|
||||
['https://raw.githubusercontent.com/brave/adblock-lists/master/brave-lists/brave-firstparty.txt', null, TTL.ONE_DAY()]
|
||||
// uBlock Origin Unbreak
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/unbreak.min.txt',
|
||||
[
|
||||
'https://ublockorigin.pages.dev/filters/unbreak.min.txt'
|
||||
],
|
||||
TTL.THREE_HOURS()
|
||||
]
|
||||
];
|
||||
|
||||
export const ADGUARD_FILTERS_WHITELIST: AdGuardFilterSource[] = [
|
||||
[
|
||||
'https://adguardteam.github.io/AdGuardSDNSFilter/Filters/exceptions.txt',
|
||||
[
|
||||
'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exceptions.txt'
|
||||
],
|
||||
TTL.THREE_HOURS()
|
||||
],
|
||||
[
|
||||
'https://adguardteam.github.io/AdGuardSDNSFilter/Filters/exclusions.txt',
|
||||
[
|
||||
'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exclusions.txt'
|
||||
],
|
||||
TTL.THREE_HOURS()
|
||||
]
|
||||
];
|
||||
|
||||
export const ADGUARD_FILTERS_EXTRA: AdGuardFilterSource[] = [
|
||||
// AdGuard Annoyances filter
|
||||
['https://filters.adtidy.org/android/filters/14_optimized.txt', null, TTL.THREE_HOURS(), true],
|
||||
// AdGuard Cookie Notices
|
||||
['https://filters.adtidy.org/extension/ublock/filters/18_optimized.txt', null, TTL.THREE_HOURS(), true],
|
||||
// EasyList Germany filter
|
||||
[
|
||||
'https://easylist.to/easylistgermany/easylistgermany.txt',
|
||||
[
|
||||
'https://easylist-downloads.adblockplus.org/easylistgermany.txt'
|
||||
],
|
||||
TTL.TWLVE_HOURS()
|
||||
],
|
||||
['https://filters.adtidy.org/extension/ublock/filters/14_optimized.txt', null, TTL.THREE_HOURS(), true],
|
||||
// AdGuard Cookie Notices, included in Annoyances filter
|
||||
// ['https://filters.adtidy.org/extension/ublock/filters/18_optimized.txt', null, TTL.THREE_HOURS(), true],
|
||||
// EasyList Germany filter, not even included in extra for now
|
||||
// [
|
||||
// 'https://easylist.to/easylistgermany/easylistgermany.txt',
|
||||
// [
|
||||
// 'https://easylist-downloads.adblockplus.org/easylistgermany.txt'
|
||||
// ],
|
||||
// TTL.TWLVE_HOURS()
|
||||
// ],
|
||||
// AdGuard Japanese filter
|
||||
['https://filters.adtidy.org/extension/ublock/filters/7_optimized.txt', null, TTL.THREE_HOURS()],
|
||||
// uBlock Origin Filter List
|
||||
@ -177,8 +203,8 @@ export const ADGUARD_FILTERS_EXTRA: AdGuardFilterSource[] = [
|
||||
],
|
||||
TTL.THREE_HOURS()
|
||||
],
|
||||
// AdGuard Popup Overlay
|
||||
['https://filters.adtidy.org/extension/ublock/filters/19_optimized.txt', null, TTL.THREE_HOURS(), true],
|
||||
// AdGuard Popup Overlay - included in Annoyances filter
|
||||
// ['https://filters.adtidy.org/extension/ublock/filters/19_optimized.txt', null, TTL.THREE_HOURS(), true],
|
||||
// AdGuard Mobile Banner
|
||||
// almost all generic rule
|
||||
// ['https://filters.adtidy.org/extension/ublock/filters/20_optimized.txt', null, TTL.THREE_HOURS()],
|
||||
@ -205,14 +231,6 @@ export const ADGUARD_FILTERS_EXTRA: AdGuardFilterSource[] = [
|
||||
// 'https://ublockorigin.pages.dev/filters/resource-abuse.txt'
|
||||
// ]
|
||||
// ],
|
||||
// uBlock Origin Unbreak
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/unbreak.min.txt',
|
||||
[
|
||||
'https://ublockorigin.pages.dev/filters/unbreak.min.txt'
|
||||
],
|
||||
TTL.THREE_HOURS()
|
||||
],
|
||||
// uBlock Origin Annoyances
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/annoyances.min.txt',
|
||||
@ -229,6 +247,8 @@ export const ADGUARD_FILTERS_EXTRA: AdGuardFilterSource[] = [
|
||||
],
|
||||
TTL.THREE_HOURS()
|
||||
],
|
||||
// Dandelion Sprout's Annoyances
|
||||
['https://filters.adtidy.org/extension/ublock/filters/250_optimized.txt', null, TTL.THREE_HOURS(), true],
|
||||
// EasyList - Newsletters
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/thirdparties/easylist-newsletters.txt',
|
||||
@ -253,6 +273,12 @@ export const ADGUARD_FILTERS_EXTRA: AdGuardFilterSource[] = [
|
||||
'https://secure.fanboy.co.nz/fanboy-cookiemonster_ubo.txt'
|
||||
],
|
||||
TTL.TWLVE_HOURS()
|
||||
],
|
||||
// Bypass Paywall Cleaner
|
||||
[
|
||||
'https://gitflic.ru/project/magnolia1234/bypass-paywalls-clean-filters/blob/raw?file=bpc-paywall-filter.txt',
|
||||
[],
|
||||
TTL.ONE_DAY()
|
||||
]
|
||||
];
|
||||
|
||||
@ -269,6 +295,7 @@ export const PREDEFINED_WHITELIST = [
|
||||
'.ip6-allhosts',
|
||||
'.mcastprefix',
|
||||
'.skk.moe',
|
||||
'.cdn.cloudflare.net', // Surge/Clash doesn't support CNAME
|
||||
'analytics.google.com',
|
||||
'.cloud.answerhub.com',
|
||||
'ae01.alicdn.com',
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
import { processDomainLists } from './parse-filter';
|
||||
import { processDomainLists, processHosts } from './parse-filter';
|
||||
import * as tldts from 'tldts-experimental';
|
||||
|
||||
import { dummySpan } from '../trace';
|
||||
import type { Span } from '../trace';
|
||||
import { appendArrayInPlaceCurried } from './append-array-in-place';
|
||||
import { PHISHING_DOMAIN_LISTS_EXTRA } from '../constants/reject-data-source';
|
||||
import { PHISHING_DOMAIN_LISTS_EXTRA, PHISHING_HOSTS_EXTRA } from '../constants/reject-data-source';
|
||||
import { loosTldOptWithPrivateDomains } from '../constants/loose-tldts-opt';
|
||||
import picocolors from 'picocolors';
|
||||
import createKeywordFilter from './aho-corasick';
|
||||
@ -162,6 +162,8 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
||||
|
||||
(await Promise.all(PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(curSpan, ...entry, cacheKey))))
|
||||
.forEach(appendArrayInPlaceCurried(domainArr));
|
||||
(await Promise.all(PHISHING_HOSTS_EXTRA.map(entry => processHosts(curSpan, ...entry, cacheKey))))
|
||||
.forEach(appendArrayInPlaceCurried(domainArr));
|
||||
|
||||
return domainArr;
|
||||
});
|
||||
|
||||
@ -45,7 +45,11 @@ const domainListLineCb = (l: string, set: string[], includeAllSubDomain: boolean
|
||||
|
||||
const cacheKey = createCacheKey(__filename);
|
||||
|
||||
export function processDomainLists(span: Span, domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, ttl: number | null = null, extraCacheKey: (input: string) => string = identity) {
|
||||
export function processDomainLists(
|
||||
span: Span,
|
||||
domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false,
|
||||
ttl: number | null = null, extraCacheKey: (input: string) => string = identity
|
||||
) {
|
||||
return span.traceChild(`process domainlist: ${domainListsUrl}`).traceAsyncFn((childSpan) => fsFetchCache.apply(
|
||||
extraCacheKey(cacheKey(domainListsUrl)),
|
||||
async () => {
|
||||
@ -100,9 +104,13 @@ const hostsLineCb = (l: string, set: string[], includeAllSubDomain: boolean, met
|
||||
set.push(includeAllSubDomain ? `.${domain}` : domain);
|
||||
};
|
||||
|
||||
export function processHosts(span: Span, hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, ttl: number | null = null) {
|
||||
export function processHosts(
|
||||
span: Span,
|
||||
hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false,
|
||||
ttl: number | null = null, extraCacheKey: (input: string) => string = identity
|
||||
) {
|
||||
return span.traceChild(`processhosts: ${hostsUrl}`).traceAsyncFn((childSpan) => fsFetchCache.apply(
|
||||
cacheKey(hostsUrl),
|
||||
extraCacheKey(cacheKey(hostsUrl)),
|
||||
async () => {
|
||||
const domainSets: string[] = [];
|
||||
|
||||
|
||||
@ -214,8 +214,8 @@ export class RulesetOutput extends RuleOutput<Preprocessed> {
|
||||
}
|
||||
}
|
||||
|
||||
console.error(picocolors.bold('Parsed Failed'));
|
||||
if (parsedFailures.length > 0) {
|
||||
console.error(picocolors.bold('Parsed Failed'));
|
||||
console.table(parsedFailures);
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user