mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 09:10:35 +08:00
Fix: force normalize reject domains
This commit is contained in:
parent
937d3002de
commit
53fc370774
@ -32,12 +32,12 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
|
|||||||
const [gorhill] = await Promise.all([
|
const [gorhill] = await Promise.all([
|
||||||
getGorhillPublicSuffixPromise(),
|
getGorhillPublicSuffixPromise(),
|
||||||
// Parse from remote hosts & domain lists
|
// Parse from remote hosts & domain lists
|
||||||
...HOSTS.map(entry => processHosts(entry[0], entry[1], entry[2], entry[3]).then(hosts => {
|
...HOSTS.map(entry => processHosts(entry[0], entry[1], entry[2]).then(hosts => {
|
||||||
hosts.forEach(host => {
|
hosts.forEach(host => {
|
||||||
domainSets.add(host);
|
domainSets.add(host);
|
||||||
});
|
});
|
||||||
})),
|
})),
|
||||||
...DOMAIN_LISTS.map(entry => processDomainLists(entry[0], entry[1], entry[2], entry[3])),
|
...DOMAIN_LISTS.map(entry => processDomainLists(entry[0], entry[1], entry[2])),
|
||||||
...ADGUARD_FILTERS.map(input => {
|
...ADGUARD_FILTERS.map(input => {
|
||||||
const promise = typeof input === 'string'
|
const promise = typeof input === 'string'
|
||||||
? processFilterRules(input)
|
? processFilterRules(input)
|
||||||
@ -154,7 +154,7 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
|
|||||||
dudupedDominArray.reduce<Record<string, number>>((acc, cur) => {
|
dudupedDominArray.reduce<Record<string, number>>((acc, cur) => {
|
||||||
const suffix = tldts.getDomain(cur, { allowPrivateDomains: false, detectIp: false, validateHostname: false });
|
const suffix = tldts.getDomain(cur, { allowPrivateDomains: false, detectIp: false, validateHostname: false });
|
||||||
if (suffix) {
|
if (suffix) {
|
||||||
acc[suffix] = (acc[suffix] ?? 0) + 1;
|
acc[suffix] = (acc[suffix] || 0) + 1;
|
||||||
}
|
}
|
||||||
return acc;
|
return acc;
|
||||||
}, {})
|
}, {})
|
||||||
|
|||||||
@ -6,6 +6,7 @@ import { createTrie } from './trie';
|
|||||||
import { createCachedGorhillGetDomain } from './cached-tld-parse';
|
import { createCachedGorhillGetDomain } from './cached-tld-parse';
|
||||||
import { processLine } from './process-line';
|
import { processLine } from './process-line';
|
||||||
import { TTL } from './cache-filesystem';
|
import { TTL } from './cache-filesystem';
|
||||||
|
import { isCI } from 'ci-info';
|
||||||
|
|
||||||
const WHITELIST_DOMAIN = new Set([
|
const WHITELIST_DOMAIN = new Set([
|
||||||
'w3s.link',
|
'w3s.link',
|
||||||
@ -85,11 +86,13 @@ const BLACK_TLD = new Set([
|
|||||||
|
|
||||||
export const getPhishingDomains = () => traceAsync('get phishing domains', async () => {
|
export const getPhishingDomains = () => traceAsync('get phishing domains', async () => {
|
||||||
const [domainSet, domainSet2, gorhill] = await Promise.all([
|
const [domainSet, domainSet2, gorhill] = await Promise.all([
|
||||||
processDomainLists('https://curbengh.github.io/phishing-filter/phishing-filter-domains.txt', true, false, TTL.THREE_HOURS()),
|
processDomainLists('https://curbengh.github.io/phishing-filter/phishing-filter-domains.txt', true, TTL.THREE_HOURS()),
|
||||||
processDomainLists('https://phishing.army/download/phishing_army_blocklist.txt', true, true, TTL.THREE_HOURS()),
|
isCI
|
||||||
|
? processDomainLists('https://phishing.army/download/phishing_army_blocklist.txt', true, TTL.THREE_HOURS())
|
||||||
|
: null,
|
||||||
getGorhillPublicSuffixPromise()
|
getGorhillPublicSuffixPromise()
|
||||||
]);
|
]);
|
||||||
domainSet2.forEach((domain) => domainSet.add(domain));
|
domainSet2?.forEach((domain) => domainSet.add(domain));
|
||||||
|
|
||||||
traceSync.skip('* whitelisting phishing domains', () => {
|
traceSync.skip('* whitelisting phishing domains', () => {
|
||||||
const trieForRemovingWhiteListed = createTrie(domainSet);
|
const trieForRemovingWhiteListed = createTrie(domainSet);
|
||||||
|
|||||||
@ -11,10 +11,10 @@ import { normalizeDomain } from './normalize-domain';
|
|||||||
import { fetchAssets } from './fetch-assets';
|
import { fetchAssets } from './fetch-assets';
|
||||||
import { deserializeSet, fsCache, serializeSet } from './cache-filesystem';
|
import { deserializeSet, fsCache, serializeSet } from './cache-filesystem';
|
||||||
|
|
||||||
const DEBUG_DOMAIN_TO_FIND: string | null = '.j3.4z0vc.chileinsumos.cl'; // example.com | null
|
const DEBUG_DOMAIN_TO_FIND: string | null = null; // example.com | null
|
||||||
let foundDebugDomain = false;
|
let foundDebugDomain = false;
|
||||||
|
|
||||||
export function processDomainLists(domainListsUrl: string, includeAllSubDomain = false, skipDomainCheck = false, ttl: number | null = null) {
|
export function processDomainLists(domainListsUrl: string, includeAllSubDomain = false, ttl: number | null = null) {
|
||||||
return traceAsync(`- processDomainLists: ${domainListsUrl}`, () => fsCache.apply(
|
return traceAsync(`- processDomainLists: ${domainListsUrl}`, () => fsCache.apply(
|
||||||
domainListsUrl,
|
domainListsUrl,
|
||||||
async () => {
|
async () => {
|
||||||
@ -23,11 +23,7 @@ export function processDomainLists(domainListsUrl: string, includeAllSubDomain =
|
|||||||
for await (const line of await fetchRemoteTextByLine(domainListsUrl)) {
|
for await (const line of await fetchRemoteTextByLine(domainListsUrl)) {
|
||||||
let domainToAdd = processLine(line);
|
let domainToAdd = processLine(line);
|
||||||
if (!domainToAdd) continue;
|
if (!domainToAdd) continue;
|
||||||
|
domainToAdd = normalizeDomain(domainToAdd);
|
||||||
if (!skipDomainCheck) {
|
|
||||||
domainToAdd = normalizeDomain(domainToAdd);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!domainToAdd) continue;
|
if (!domainToAdd) continue;
|
||||||
|
|
||||||
if (DEBUG_DOMAIN_TO_FIND && domainToAdd.includes(DEBUG_DOMAIN_TO_FIND)) {
|
if (DEBUG_DOMAIN_TO_FIND && domainToAdd.includes(DEBUG_DOMAIN_TO_FIND)) {
|
||||||
@ -48,7 +44,7 @@ export function processDomainLists(domainListsUrl: string, includeAllSubDomain =
|
|||||||
}
|
}
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
export function processHosts(hostsUrl: string, includeAllSubDomain = false, skipDomainCheck = false, ttl: number | null = null) {
|
export function processHosts(hostsUrl: string, includeAllSubDomain = false, ttl: number | null = null) {
|
||||||
return traceAsync(`- processHosts: ${hostsUrl}`, () => fsCache.apply(
|
return traceAsync(`- processHosts: ${hostsUrl}`, () => fsCache.apply(
|
||||||
hostsUrl,
|
hostsUrl,
|
||||||
async () => {
|
async () => {
|
||||||
@ -71,10 +67,12 @@ export function processHosts(hostsUrl: string, includeAllSubDomain = false, skip
|
|||||||
foundDebugDomain = true;
|
foundDebugDomain = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
const domainToAdd = skipDomainCheck ? _domain : normalizeDomain(_domain);
|
const domainToAdd = normalizeDomain(_domain);
|
||||||
if (domainToAdd) {
|
if (!domainToAdd) {
|
||||||
domainSets.add(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd);
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
domainSets.add(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd);
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(picocolors.gray('[process hosts]'), picocolors.gray(hostsUrl), picocolors.gray(domainSets.size));
|
console.log(picocolors.gray('[process hosts]'), picocolors.gray(hostsUrl), picocolors.gray(domainSets.size));
|
||||||
|
|||||||
@ -1,21 +1,21 @@
|
|||||||
import { TTL } from './cache-filesystem';
|
import { TTL } from './cache-filesystem';
|
||||||
|
|
||||||
export const HOSTS = [
|
export const HOSTS = [
|
||||||
['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true, false, TTL.THREE_HOURS()],
|
['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true, TTL.THREE_HOURS()],
|
||||||
['https://someonewhocares.org/hosts/hosts', true, false, TTL.THREE_HOURS()],
|
['https://someonewhocares.org/hosts/hosts', true, TTL.THREE_HOURS()],
|
||||||
// no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl
|
// no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl
|
||||||
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', false, false, TTL.THREE_DAYS()],
|
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', false, TTL.THREE_DAYS()],
|
||||||
// have not been updated for more than a year, so we set a 14 days cache ttl
|
// have not been updated for more than a year, so we set a 14 days cache ttl
|
||||||
['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', true, false, TTL.TWO_WEEKS()],
|
['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', true, TTL.TWO_WEEKS()],
|
||||||
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', false, false, TTL.THREE_DAYS()],
|
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', false, TTL.THREE_DAYS()],
|
||||||
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', false, false, TTL.THREE_DAYS()],
|
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', false, TTL.THREE_DAYS()],
|
||||||
// ad-wars is not actively maintained, so we set a 7 days cache ttl
|
// ad-wars is not actively maintained, so we set a 7 days cache ttl
|
||||||
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false, false, TTL.ONE_WEEK()],
|
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false, TTL.ONE_WEEK()],
|
||||||
['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true, false, TTL.THREE_HOURS()],
|
['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true, TTL.THREE_HOURS()],
|
||||||
// Curben's UrlHaus Malicious URL Blocklist
|
// Curben's UrlHaus Malicious URL Blocklist
|
||||||
// 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
|
// 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
|
||||||
// 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
|
// 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
|
||||||
['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, true, TTL.THREE_HOURS()],
|
['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, TTL.THREE_HOURS()],
|
||||||
// Curben's Phishing URL Blocklist
|
// Curben's Phishing URL Blocklist
|
||||||
// Covered by lib/get-phishing-domains.ts
|
// Covered by lib/get-phishing-domains.ts
|
||||||
// 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt'
|
// 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt'
|
||||||
@ -25,26 +25,26 @@ export const HOSTS = [
|
|||||||
// 'https://curbengh.github.io/pup-filter/pup-filter-agh.txt'
|
// 'https://curbengh.github.io/pup-filter/pup-filter-agh.txt'
|
||||||
// 'https://pup-filter.pages.dev/pup-filter-agh.txt'
|
// 'https://pup-filter.pages.dev/pup-filter-agh.txt'
|
||||||
// The PUP filter has paused the update since 2023-05, so we set a 14 days cache ttl
|
// The PUP filter has paused the update since 2023-05, so we set a 14 days cache ttl
|
||||||
['https://curbengh.github.io/pup-filter/pup-filter-hosts.txt', true, true, TTL.TWO_WEEKS()]
|
['https://curbengh.github.io/pup-filter/pup-filter-hosts.txt', true, TTL.TWO_WEEKS()]
|
||||||
] as const;
|
] as const;
|
||||||
|
|
||||||
export const DOMAIN_LISTS = [
|
export const DOMAIN_LISTS = [
|
||||||
// CoinBlockerList
|
// CoinBlockerList
|
||||||
// Although the hosts file is still actively maintained, the hosts_browser file is not updated since 2021-07, so we set a 14 days cache ttl
|
// Although the hosts file is still actively maintained, the hosts_browser file is not updated since 2021-07, so we set a 14 days cache ttl
|
||||||
['https://zerodot1.gitlab.io/CoinBlockerLists/list_browser.txt', true, true, TTL.TWO_WEEKS()],
|
['https://zerodot1.gitlab.io/CoinBlockerLists/list_browser.txt', true, TTL.TWO_WEEKS()],
|
||||||
// BarbBlock
|
// BarbBlock
|
||||||
// The barbblock list has never been updated since 2019-05, so we set a 14 days cache ttl
|
// The barbblock list has never been updated since 2019-05, so we set a 14 days cache ttl
|
||||||
['https://paulgb.github.io/BarbBlock/blacklists/domain-list.txt', true, true, TTL.TWO_WEEKS()],
|
['https://paulgb.github.io/BarbBlock/blacklists/domain-list.txt', true, TTL.TWO_WEEKS()],
|
||||||
// DigitalSide Threat-Intel - OSINT Hub
|
// DigitalSide Threat-Intel - OSINT Hub
|
||||||
// Update once per day
|
// Update once per day
|
||||||
['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true, true, TTL.ONE_DAY()],
|
['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true, TTL.ONE_DAY()],
|
||||||
// AdGuard CNAME Filter Combined
|
// AdGuard CNAME Filter Combined
|
||||||
// Update on a 7 days basis, so we add a 3 hours cache ttl
|
// Update on a 7 days basis, so we add a 3 hours cache ttl
|
||||||
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads_justdomains.txt', true, true, TTL.THREE_DAYS()],
|
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads_justdomains.txt', true, TTL.THREE_DAYS()],
|
||||||
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_trackers_justdomains.txt', true, true, TTL.THREE_DAYS()],
|
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_trackers_justdomains.txt', true, TTL.THREE_DAYS()],
|
||||||
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_clickthroughs_justdomains.txt', true, true, TTL.THREE_DAYS()],
|
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_clickthroughs_justdomains.txt', true, TTL.THREE_DAYS()],
|
||||||
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_microsites_justdomains.txt', true, true, TTL.THREE_DAYS()],
|
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_microsites_justdomains.txt', true, TTL.THREE_DAYS()],
|
||||||
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_mail_trackers_justdomains.txt', true, true, TTL.THREE_DAYS()]
|
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_mail_trackers_justdomains.txt', true, TTL.THREE_DAYS()]
|
||||||
] as const;
|
] as const;
|
||||||
|
|
||||||
export const ADGUARD_FILTERS = [
|
export const ADGUARD_FILTERS = [
|
||||||
|
|||||||
@ -34,8 +34,9 @@
|
|||||||
"@eslint-sukka/node": "4.1.10-beta.2",
|
"@eslint-sukka/node": "4.1.10-beta.2",
|
||||||
"@eslint-sukka/ts": "4.1.10-beta.2",
|
"@eslint-sukka/ts": "4.1.10-beta.2",
|
||||||
"@types/async-retry": "^1.4.8",
|
"@types/async-retry": "^1.4.8",
|
||||||
|
"@types/bun": "^1.0.0",
|
||||||
"@types/tar-stream": "^3.1.3",
|
"@types/tar-stream": "^3.1.3",
|
||||||
"bun-types": "^1.0.18-1",
|
"bun-types": "^1.0.21",
|
||||||
"eslint": "^8.56.0",
|
"eslint": "^8.56.0",
|
||||||
"eslint-config-sukka": "4.1.10-beta.2",
|
"eslint-config-sukka": "4.1.10-beta.2",
|
||||||
"eslint-formatter-sukka": "4.1.9",
|
"eslint-formatter-sukka": "4.1.9",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user