diff --git a/Build/build-reject-domainset.ts b/Build/build-reject-domainset.ts index 2c53c11d..d109c1ab 100644 --- a/Build/build-reject-domainset.ts +++ b/Build/build-reject-domainset.ts @@ -32,12 +32,12 @@ export const buildRejectDomainSet = task(import.meta.path, async () => { const [gorhill] = await Promise.all([ getGorhillPublicSuffixPromise(), // Parse from remote hosts & domain lists - ...HOSTS.map(entry => processHosts(entry[0], entry[1], entry[2], entry[3]).then(hosts => { + ...HOSTS.map(entry => processHosts(entry[0], entry[1], entry[2]).then(hosts => { hosts.forEach(host => { domainSets.add(host); }); })), - ...DOMAIN_LISTS.map(entry => processDomainLists(entry[0], entry[1], entry[2], entry[3])), + ...DOMAIN_LISTS.map(entry => processDomainLists(entry[0], entry[1], entry[2])), ...ADGUARD_FILTERS.map(input => { const promise = typeof input === 'string' ? processFilterRules(input) @@ -154,7 +154,7 @@ export const buildRejectDomainSet = task(import.meta.path, async () => { dudupedDominArray.reduce>((acc, cur) => { const suffix = tldts.getDomain(cur, { allowPrivateDomains: false, detectIp: false, validateHostname: false }); if (suffix) { - acc[suffix] = (acc[suffix] ?? 0) + 1; + acc[suffix] = (acc[suffix] || 0) + 1; } return acc; }, {}) diff --git a/Build/lib/get-phishing-domains.ts b/Build/lib/get-phishing-domains.ts index db0a0209..3ca9a54d 100644 --- a/Build/lib/get-phishing-domains.ts +++ b/Build/lib/get-phishing-domains.ts @@ -6,6 +6,7 @@ import { createTrie } from './trie'; import { createCachedGorhillGetDomain } from './cached-tld-parse'; import { processLine } from './process-line'; import { TTL } from './cache-filesystem'; +import { isCI } from 'ci-info'; const WHITELIST_DOMAIN = new Set([ 'w3s.link', @@ -85,11 +86,13 @@ const BLACK_TLD = new Set([ export const getPhishingDomains = () => traceAsync('get phishing domains', async () => { const [domainSet, domainSet2, gorhill] = await Promise.all([ - processDomainLists('https://curbengh.github.io/phishing-filter/phishing-filter-domains.txt', true, false, TTL.THREE_HOURS()), - processDomainLists('https://phishing.army/download/phishing_army_blocklist.txt', true, true, TTL.THREE_HOURS()), + processDomainLists('https://curbengh.github.io/phishing-filter/phishing-filter-domains.txt', true, TTL.THREE_HOURS()), + isCI + ? processDomainLists('https://phishing.army/download/phishing_army_blocklist.txt', true, TTL.THREE_HOURS()) + : null, getGorhillPublicSuffixPromise() ]); - domainSet2.forEach((domain) => domainSet.add(domain)); + domainSet2?.forEach((domain) => domainSet.add(domain)); traceSync.skip('* whitelisting phishing domains', () => { const trieForRemovingWhiteListed = createTrie(domainSet); diff --git a/Build/lib/parse-filter.ts b/Build/lib/parse-filter.ts index ee828a83..74e4bc63 100644 --- a/Build/lib/parse-filter.ts +++ b/Build/lib/parse-filter.ts @@ -11,10 +11,10 @@ import { normalizeDomain } from './normalize-domain'; import { fetchAssets } from './fetch-assets'; import { deserializeSet, fsCache, serializeSet } from './cache-filesystem'; -const DEBUG_DOMAIN_TO_FIND: string | null = '.j3.4z0vc.chileinsumos.cl'; // example.com | null +const DEBUG_DOMAIN_TO_FIND: string | null = null; // example.com | null let foundDebugDomain = false; -export function processDomainLists(domainListsUrl: string, includeAllSubDomain = false, skipDomainCheck = false, ttl: number | null = null) { +export function processDomainLists(domainListsUrl: string, includeAllSubDomain = false, ttl: number | null = null) { return traceAsync(`- processDomainLists: ${domainListsUrl}`, () => fsCache.apply( domainListsUrl, async () => { @@ -23,11 +23,7 @@ export function processDomainLists(domainListsUrl: string, includeAllSubDomain = for await (const line of await fetchRemoteTextByLine(domainListsUrl)) { let domainToAdd = processLine(line); if (!domainToAdd) continue; - - if (!skipDomainCheck) { - domainToAdd = normalizeDomain(domainToAdd); - } - + domainToAdd = normalizeDomain(domainToAdd); if (!domainToAdd) continue; if (DEBUG_DOMAIN_TO_FIND && domainToAdd.includes(DEBUG_DOMAIN_TO_FIND)) { @@ -48,7 +44,7 @@ export function processDomainLists(domainListsUrl: string, includeAllSubDomain = } )); } -export function processHosts(hostsUrl: string, includeAllSubDomain = false, skipDomainCheck = false, ttl: number | null = null) { +export function processHosts(hostsUrl: string, includeAllSubDomain = false, ttl: number | null = null) { return traceAsync(`- processHosts: ${hostsUrl}`, () => fsCache.apply( hostsUrl, async () => { @@ -71,10 +67,12 @@ export function processHosts(hostsUrl: string, includeAllSubDomain = false, skip foundDebugDomain = true; } - const domainToAdd = skipDomainCheck ? _domain : normalizeDomain(_domain); - if (domainToAdd) { - domainSets.add(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd); + const domainToAdd = normalizeDomain(_domain); + if (!domainToAdd) { + continue; } + + domainSets.add(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd); } console.log(picocolors.gray('[process hosts]'), picocolors.gray(hostsUrl), picocolors.gray(domainSets.size)); diff --git a/Build/lib/reject-data-source.ts b/Build/lib/reject-data-source.ts index 95f0df52..03687295 100644 --- a/Build/lib/reject-data-source.ts +++ b/Build/lib/reject-data-source.ts @@ -1,21 +1,21 @@ import { TTL } from './cache-filesystem'; export const HOSTS = [ - ['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true, false, TTL.THREE_HOURS()], - ['https://someonewhocares.org/hosts/hosts', true, false, TTL.THREE_HOURS()], + ['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true, TTL.THREE_HOURS()], + ['https://someonewhocares.org/hosts/hosts', true, TTL.THREE_HOURS()], // no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl - ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', false, false, TTL.THREE_DAYS()], + ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', false, TTL.THREE_DAYS()], // have not been updated for more than a year, so we set a 14 days cache ttl - ['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', true, false, TTL.TWO_WEEKS()], - ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', false, false, TTL.THREE_DAYS()], - ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', false, false, TTL.THREE_DAYS()], + ['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', true, TTL.TWO_WEEKS()], + ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', false, TTL.THREE_DAYS()], + ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', false, TTL.THREE_DAYS()], // ad-wars is not actively maintained, so we set a 7 days cache ttl - ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false, false, TTL.ONE_WEEK()], - ['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true, false, TTL.THREE_HOURS()], + ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false, TTL.ONE_WEEK()], + ['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true, TTL.THREE_HOURS()], // Curben's UrlHaus Malicious URL Blocklist // 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt', // 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt', - ['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, true, TTL.THREE_HOURS()], + ['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, TTL.THREE_HOURS()], // Curben's Phishing URL Blocklist // Covered by lib/get-phishing-domains.ts // 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt' @@ -25,26 +25,26 @@ export const HOSTS = [ // 'https://curbengh.github.io/pup-filter/pup-filter-agh.txt' // 'https://pup-filter.pages.dev/pup-filter-agh.txt' // The PUP filter has paused the update since 2023-05, so we set a 14 days cache ttl - ['https://curbengh.github.io/pup-filter/pup-filter-hosts.txt', true, true, TTL.TWO_WEEKS()] + ['https://curbengh.github.io/pup-filter/pup-filter-hosts.txt', true, TTL.TWO_WEEKS()] ] as const; export const DOMAIN_LISTS = [ // CoinBlockerList // Although the hosts file is still actively maintained, the hosts_browser file is not updated since 2021-07, so we set a 14 days cache ttl - ['https://zerodot1.gitlab.io/CoinBlockerLists/list_browser.txt', true, true, TTL.TWO_WEEKS()], + ['https://zerodot1.gitlab.io/CoinBlockerLists/list_browser.txt', true, TTL.TWO_WEEKS()], // BarbBlock // The barbblock list has never been updated since 2019-05, so we set a 14 days cache ttl - ['https://paulgb.github.io/BarbBlock/blacklists/domain-list.txt', true, true, TTL.TWO_WEEKS()], + ['https://paulgb.github.io/BarbBlock/blacklists/domain-list.txt', true, TTL.TWO_WEEKS()], // DigitalSide Threat-Intel - OSINT Hub // Update once per day - ['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true, true, TTL.ONE_DAY()], + ['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true, TTL.ONE_DAY()], // AdGuard CNAME Filter Combined // Update on a 7 days basis, so we add a 3 hours cache ttl - ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads_justdomains.txt', true, true, TTL.THREE_DAYS()], - ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_trackers_justdomains.txt', true, true, TTL.THREE_DAYS()], - ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_clickthroughs_justdomains.txt', true, true, TTL.THREE_DAYS()], - ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_microsites_justdomains.txt', true, true, TTL.THREE_DAYS()], - ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_mail_trackers_justdomains.txt', true, true, TTL.THREE_DAYS()] + ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads_justdomains.txt', true, TTL.THREE_DAYS()], + ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_trackers_justdomains.txt', true, TTL.THREE_DAYS()], + ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_clickthroughs_justdomains.txt', true, TTL.THREE_DAYS()], + ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_microsites_justdomains.txt', true, TTL.THREE_DAYS()], + ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_mail_trackers_justdomains.txt', true, TTL.THREE_DAYS()] ] as const; export const ADGUARD_FILTERS = [ diff --git a/bun.lockb b/bun.lockb index 8dba67e4..b31b682b 100755 Binary files a/bun.lockb and b/bun.lockb differ diff --git a/package.json b/package.json index d6a4a65b..b6727dc6 100644 --- a/package.json +++ b/package.json @@ -34,8 +34,9 @@ "@eslint-sukka/node": "4.1.10-beta.2", "@eslint-sukka/ts": "4.1.10-beta.2", "@types/async-retry": "^1.4.8", + "@types/bun": "^1.0.0", "@types/tar-stream": "^3.1.3", - "bun-types": "^1.0.18-1", + "bun-types": "^1.0.21", "eslint": "^8.56.0", "eslint-config-sukka": "4.1.10-beta.2", "eslint-formatter-sukka": "4.1.9",