mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 17:20:35 +08:00
Chore: prefer domain list
This commit is contained in:
parent
6b0151be29
commit
e4429a62ee
@ -36,14 +36,14 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
|||||||
let shouldStop = false;
|
let shouldStop = false;
|
||||||
await Promise.all([
|
await Promise.all([
|
||||||
// Parse from remote hosts & domain lists
|
// Parse from remote hosts & domain lists
|
||||||
...HOSTS.map(entry => processHosts(childSpan, entry[0], entry[1], entry[2], entry[3]).then(setAddFromArrayCurried(domainSets))),
|
...HOSTS.map(entry => processHosts(childSpan, ...entry).then(setAddFromArrayCurried(domainSets))),
|
||||||
|
|
||||||
...DOMAIN_LISTS.map(entry => processDomainLists(childSpan, entry[0], entry[1], entry[2]).then(setAddFromArrayCurried(domainSets))),
|
...DOMAIN_LISTS.map(entry => processDomainLists(childSpan, ...entry).then(setAddFromArrayCurried(domainSets))),
|
||||||
|
|
||||||
...ADGUARD_FILTERS.map(input => (
|
...ADGUARD_FILTERS.map(input => (
|
||||||
typeof input === 'string'
|
typeof input === 'string'
|
||||||
? processFilterRules(childSpan, input)
|
? processFilterRules(childSpan, input)
|
||||||
: processFilterRules(childSpan, input[0], input[1], input[2])
|
: processFilterRules(childSpan, ...input)
|
||||||
).then(({ white, black, foundDebugDomain }) => {
|
).then(({ white, black, foundDebugDomain }) => {
|
||||||
if (foundDebugDomain) {
|
if (foundDebugDomain) {
|
||||||
// eslint-disable-next-line sukka/no-single-return -- not single return
|
// eslint-disable-next-line sukka/no-single-return -- not single return
|
||||||
|
|||||||
@ -19,3 +19,5 @@ export function appendArrayInPlace<T>(dest: T[], source: T[]) {
|
|||||||
}
|
}
|
||||||
return dest;
|
return dest;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const appendArrayInPlaceCurried = <T>(dest: T[]) => (source: T[]) => appendArrayInPlace(dest, source);
|
||||||
|
|||||||
@ -4,7 +4,8 @@ import { getSubdomain, getPublicSuffix } from 'tldts-experimental';
|
|||||||
import { TTL } from './cache-filesystem';
|
import { TTL } from './cache-filesystem';
|
||||||
|
|
||||||
import type { Span } from '../trace';
|
import type { Span } from '../trace';
|
||||||
import { appendArrayInPlace } from './append-array-in-place';
|
import { appendArrayInPlace, appendArrayInPlaceCurried } from './append-array-in-place';
|
||||||
|
import { PHISHING_DOMAIN_LISTS } from './reject-data-source';
|
||||||
|
|
||||||
const BLACK_TLD = new Set([
|
const BLACK_TLD = new Set([
|
||||||
'accountant',
|
'accountant',
|
||||||
@ -101,12 +102,10 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
|||||||
const gorhill = await getGorhillPublicSuffixPromise();
|
const gorhill = await getGorhillPublicSuffixPromise();
|
||||||
|
|
||||||
const domainArr = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => {
|
const domainArr = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => {
|
||||||
const [domainSet, domainSet2] = await Promise.all([
|
const domainSet: string[] = [];
|
||||||
processDomainLists(curSpan, 'https://curbengh.github.io/phishing-filter/phishing-filter-domains.txt', true, TTL.THREE_HOURS()),
|
|
||||||
processDomainLists(curSpan, 'https://phishing.army/download/phishing_army_blocklist.txt', true, TTL.THREE_HOURS())
|
|
||||||
]);
|
|
||||||
|
|
||||||
appendArrayInPlace(domainSet, domainSet2);
|
(await Promise.all(PHISHING_DOMAIN_LISTS.map(entry => processDomainLists(curSpan, ...entry))))
|
||||||
|
.forEach(appendArrayInPlaceCurried(domainSet));
|
||||||
|
|
||||||
return domainSet;
|
return domainSet;
|
||||||
});
|
});
|
||||||
|
|||||||
@ -16,24 +16,41 @@ const DEBUG_DOMAIN_TO_FIND: string | null = null; // example.com | null
|
|||||||
let foundDebugDomain = false;
|
let foundDebugDomain = false;
|
||||||
const temporaryBypass = DEBUG_DOMAIN_TO_FIND !== null;
|
const temporaryBypass = DEBUG_DOMAIN_TO_FIND !== null;
|
||||||
|
|
||||||
export function processDomainLists(span: Span, domainListsUrl: string, includeAllSubDomain = false, ttl: number | null = null) {
|
const domainListLineCb = (l: string, set: string[], includeAllSubDomain: boolean, meta: string) => {
|
||||||
return span.traceChild(`process domainlist: ${domainListsUrl}`).traceAsyncFn(() => fsFetchCache.apply(
|
let line = processLine(l);
|
||||||
|
if (!line) return;
|
||||||
|
|
||||||
|
line = normalizeDomain(line);
|
||||||
|
if (!line) return;
|
||||||
|
|
||||||
|
if (DEBUG_DOMAIN_TO_FIND && line.includes(DEBUG_DOMAIN_TO_FIND)) {
|
||||||
|
console.warn(picocolors.red(meta), '(black)', line.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND)));
|
||||||
|
foundDebugDomain = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
set.push(includeAllSubDomain ? `.${line}` : line);
|
||||||
|
};
|
||||||
|
|
||||||
|
export function processDomainLists(span: Span, domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, ttl: number | null = null) {
|
||||||
|
return span.traceChild(`process domainlist: ${domainListsUrl}`).traceAsyncFn((childSpan) => fsFetchCache.apply(
|
||||||
domainListsUrl,
|
domainListsUrl,
|
||||||
async () => {
|
async () => {
|
||||||
const domainSets: string[] = [];
|
const domainSets: string[] = [];
|
||||||
|
|
||||||
for await (const line of await fetchRemoteTextByLine(domainListsUrl)) {
|
if (mirrors == null || mirrors.length === 0) {
|
||||||
let domainToAdd = processLine(line);
|
for await (const l of await fetchRemoteTextByLine(domainListsUrl)) {
|
||||||
if (!domainToAdd) continue;
|
domainListLineCb(l, domainSets, includeAllSubDomain, domainListsUrl);
|
||||||
domainToAdd = normalizeDomain(domainToAdd);
|
|
||||||
if (!domainToAdd) continue;
|
|
||||||
|
|
||||||
if (DEBUG_DOMAIN_TO_FIND && domainToAdd.includes(DEBUG_DOMAIN_TO_FIND)) {
|
|
||||||
console.warn(picocolors.red(domainListsUrl), '(black)', domainToAdd.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND)));
|
|
||||||
foundDebugDomain = true;
|
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
const filterRules = await childSpan
|
||||||
|
.traceChild('download domain list')
|
||||||
|
.traceAsyncFn(() => fetchAssets(domainListsUrl, mirrors).then(text => text.split('\n')));
|
||||||
|
|
||||||
domainSets.push(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd);
|
childSpan.traceChild('parse domain list').traceSyncFn(() => {
|
||||||
|
for (let i = 0, len = filterRules.length; i < len; i++) {
|
||||||
|
domainListLineCb(filterRules[i], domainSets, includeAllSubDomain, domainListsUrl);
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
return domainSets;
|
return domainSets;
|
||||||
|
|||||||
@ -19,46 +19,63 @@ export const HOSTS: HostsSource[] = [
|
|||||||
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', null, false, TTL.THREE_DAYS()],
|
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', null, false, TTL.THREE_DAYS()],
|
||||||
// ad-wars is not actively maintained, so we set a 7 days cache ttl
|
// ad-wars is not actively maintained, so we set a 7 days cache ttl
|
||||||
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', null, false, TTL.ONE_WEEK()],
|
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', null, false, TTL.ONE_WEEK()],
|
||||||
['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', null, true, TTL.THREE_HOURS()],
|
['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', null, true, TTL.THREE_HOURS()]
|
||||||
// Curben's UrlHaus Malicious URL Blocklist
|
|
||||||
[
|
|
||||||
'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt',
|
|
||||||
[
|
|
||||||
'https://urlhaus-filter.pages.dev/urlhaus-filter-hosts.txt',
|
|
||||||
'https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-hosts.txt'
|
|
||||||
],
|
|
||||||
true,
|
|
||||||
TTL.THREE_HOURS()
|
|
||||||
]
|
|
||||||
// Curben's Phishing URL Blocklist
|
|
||||||
// Covered by lib/get-phishing-domains.ts
|
|
||||||
// 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt'
|
|
||||||
// 'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
|
|
||||||
// ['https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true],
|
|
||||||
] as const;
|
] as const;
|
||||||
|
|
||||||
export const DOMAIN_LISTS = [
|
export const DOMAIN_LISTS: HostsSource[] = [
|
||||||
// CoinBlockerList
|
// CoinBlockerList
|
||||||
// Although the hosts file is still actively maintained, the hosts_browser file is not updated since 2021-07, so we set a 14 days cache ttl
|
// Although the hosts file is still actively maintained, the hosts_browser file is not updated since 2021-07, so we set a 14 days cache ttl
|
||||||
['https://zerodot1.gitlab.io/CoinBlockerLists/list_browser.txt', true, TTL.TWO_WEEKS()],
|
['https://zerodot1.gitlab.io/CoinBlockerLists/list_browser.txt', [], true, TTL.TWO_WEEKS()],
|
||||||
// BarbBlock
|
// BarbBlock
|
||||||
// The barbblock list has never been updated since 2019-05, so we set a 14 days cache ttl
|
// The barbblock list has never been updated since 2019-05, so we set a 14 days cache ttl
|
||||||
['https://paulgb.github.io/BarbBlock/blacklists/domain-list.txt', true, TTL.TWO_WEEKS()],
|
['https://paulgb.github.io/BarbBlock/blacklists/domain-list.txt', [], true, TTL.TWO_WEEKS()],
|
||||||
// DigitalSide Threat-Intel - OSINT Hub
|
// DigitalSide Threat-Intel - OSINT Hub
|
||||||
// Update once per day
|
// Update once per day
|
||||||
['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true, TTL.ONE_DAY()],
|
['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', [], true, TTL.ONE_DAY()],
|
||||||
// Curben's PUP Domains Blocklist
|
// Curben's PUP Domains Blocklist
|
||||||
// 'https://curbengh.github.io/pup-filter/pup-filter-agh.txt'
|
// 'https://curbengh.github.io/pup-filter/pup-filter-agh.txt'
|
||||||
// 'https://pup-filter.pages.dev/pup-filter-agh.txt'
|
// 'https://pup-filter.pages.dev/pup-filter-agh.txt'
|
||||||
// The PUP filter has paused the update since 2023-05, so we set a 14 days cache ttl
|
// The PUP filter has paused the update since 2023-05, so we set a 14 days cache ttl
|
||||||
['https://curbengh.github.io/pup-filter/pup-filter-domains.txt', true, TTL.TWO_WEEKS()],
|
[
|
||||||
|
'https://curbengh.github.io/pup-filter/pup-filter-domains.txt',
|
||||||
|
[
|
||||||
|
'https://pup-filter.pages.dev/pup-filter-domains.txt',
|
||||||
|
'https://malware-filter.gitlab.io/pup-filter/pup-filter-domains.txt'
|
||||||
|
],
|
||||||
|
true, TTL.TWO_WEEKS()
|
||||||
|
],
|
||||||
|
// Curben's UrlHaus Malicious URL Blocklist
|
||||||
|
[
|
||||||
|
'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-domains.txt',
|
||||||
|
[
|
||||||
|
'https://urlhaus-filter.pages.dev/urlhaus-filter-domains.txt',
|
||||||
|
'https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-domains.txt'
|
||||||
|
],
|
||||||
|
true, TTL.THREE_HOURS()
|
||||||
|
],
|
||||||
// AdGuard CNAME Filter Combined
|
// AdGuard CNAME Filter Combined
|
||||||
// Update on a 7 days basis, so we add a 3 hours cache ttl
|
// Update on a 7 days basis, so we add a 3 hours cache ttl
|
||||||
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads_justdomains.txt', true, TTL.THREE_DAYS()],
|
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads_justdomains.txt', [], true, TTL.THREE_DAYS()],
|
||||||
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_trackers_justdomains.txt', true, TTL.THREE_DAYS()],
|
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_trackers_justdomains.txt', [], true, TTL.THREE_DAYS()],
|
||||||
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_clickthroughs_justdomains.txt', true, TTL.THREE_DAYS()],
|
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_clickthroughs_justdomains.txt', [], true, TTL.THREE_DAYS()],
|
||||||
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_microsites_justdomains.txt', true, TTL.THREE_DAYS()],
|
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_microsites_justdomains.txt', [], true, TTL.THREE_DAYS()],
|
||||||
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_mail_trackers_justdomains.txt', true, TTL.THREE_DAYS()]
|
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_mail_trackers_justdomains.txt', [], true, TTL.THREE_DAYS()]
|
||||||
|
] as const;
|
||||||
|
|
||||||
|
export const PHISHING_DOMAIN_LISTS: [HostsSource, HostsSource] = [
|
||||||
|
[
|
||||||
|
'https://curbengh.github.io/phishing-filter/phishing-filter-domains.txt',
|
||||||
|
[
|
||||||
|
'https://phishing-filter.pages.dev/phishing-filter-domains.txt',
|
||||||
|
'https://malware-filter.gitlab.io/malware-filter/phishing-filter-domains.txt'
|
||||||
|
],
|
||||||
|
true, TTL.THREE_HOURS()
|
||||||
|
],
|
||||||
|
[
|
||||||
|
'https://phishing.army/download/phishing_army_blocklist.txt',
|
||||||
|
[],
|
||||||
|
true, TTL.THREE_HOURS()
|
||||||
|
]
|
||||||
] as const;
|
] as const;
|
||||||
|
|
||||||
type AdGuardFilterSource = string | [main: string, mirrors: string[] | null, ttl: number];
|
type AdGuardFilterSource = string | [main: string, mirrors: string[] | null, ttl: number];
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user