diff --git a/Build/constants/reject-data-source.ts b/Build/constants/reject-data-source.ts index 902bb251..96b24efd 100644 --- a/Build/constants/reject-data-source.ts +++ b/Build/constants/reject-data-source.ts @@ -1,15 +1,13 @@ -import { TTL } from '../lib/cache-filesystem'; - export const DEBUG_DOMAIN_TO_FIND: string | null = null; // example.com | null -type HostsSource = [main: string, mirrors: string[] | null, includeAllSubDomain: boolean, ttl: number]; +type HostsSource = [main: string, mirrors: string[] | null, includeAllSubDomain: boolean]; export const HOSTS: HostsSource[] = [ // have not been updated for more than a year, so we set a 14 days cache ttl - ['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', null, true, TTL.TWO_WEEKS()], - ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', null, false, TTL.ONE_WEEK()], - ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', null, false, TTL.ONE_WEEK()], - ['https://raw.githubusercontent.com/durablenapkin/block/master/tvstream.txt', null, true, TTL.THREE_HOURS()] + ['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', null, true], + ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', null, false], + ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', null, false], + ['https://raw.githubusercontent.com/durablenapkin/block/master/tvstream.txt', null, true] ]; export const HOSTS_EXTRA: HostsSource[] = [ @@ -17,18 +15,18 @@ export const HOSTS_EXTRA: HostsSource[] = [ [ 'https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', ['https://raw.githubusercontent.com/uBlockOrigin/uAssets/master/thirdparties/pgl.yoyo.org/as/serverlist'], - true, - TTL.THREE_HOURS() + true + ], // Dan Pollock's hosts file, 0.0.0.0 version is 30 KiB smaller [ 'https://someonewhocares.org/hosts/zero/hosts', ['https://proxy.cdn.skk.moe/?https://someonewhocares.org/hosts/zero/hosts'], - true, - TTL.THREE_HOURS() + true + ], // ad-wars is not actively maintained, so we set a 7 days cache ttl - ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', null, false, TTL.TWO_WEEKS()] + ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', null, false] ]; export const DOMAIN_LISTS: HostsSource[] = [ @@ -40,7 +38,7 @@ export const DOMAIN_LISTS: HostsSource[] = [ // 'https://proxy.cdn.skk.moe/?https://zerodot1.gitlab.io/CoinBlockerLists/list_browser.txt' // ], // true, - // TTL.TWO_WEEKS() + // // ] ]; @@ -51,7 +49,7 @@ export const DOMAIN_LISTS_EXTRA: HostsSource[] = [ // 'https://zerodot1.gitlab.io/CoinBlockerLists/list.txt', // ['https://proxy.cdn.skk.moe/?https://zerodot1.gitlab.io/CoinBlockerLists/list.txt'], // true, - // TTL.TWO_WEEKS() + // // ], // BarbBlock @@ -59,19 +57,19 @@ export const DOMAIN_LISTS_EXTRA: HostsSource[] = [ [ 'https://paulgb.github.io/BarbBlock/blacklists/domain-list.txt', ['https://raw.githubusercontent.com/paulgb/BarbBlock/refs/heads/main/blacklists/domain-list.txt'], - true, - TTL.TWO_WEEKS() + true + ], // DigitalSide Threat-Intel - OSINT Hub // Update once per day - ['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', [], true, TTL.ONE_DAY()], + ['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', [], true], // AdGuard CNAME Filter Combined // Update on a 7 days basis, so we add a 3 hours cache ttl - ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads_justdomains.txt', [], true, TTL.THREE_DAYS()], - ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_trackers_justdomains.txt', [], true, TTL.THREE_DAYS()], - ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_clickthroughs_justdomains.txt', [], true, TTL.THREE_DAYS()], - ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_microsites_justdomains.txt', [], true, TTL.THREE_DAYS()], - // ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_mail_trackers_justdomains.txt', [], true, TTL.THREE_DAYS()], + ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads_justdomains.txt', [], true], + ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_trackers_justdomains.txt', [], true], + ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_clickthroughs_justdomains.txt', [], true], + ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_microsites_justdomains.txt', [], true], + // ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_mail_trackers_justdomains.txt', [], true], // Curben's PUP Domains Blocklist // The PUP filter has paused the update since 2023-05, so we set a 14 days cache ttl, and move it to extra // [ @@ -83,7 +81,7 @@ export const DOMAIN_LISTS_EXTRA: HostsSource[] = [ // // 'https://curbengh.github.io/pup-filter/pup-filter-domains.txt', // // 'https://malware-filter.pages.dev/pup-filter-domains.txt' // ], - // true, TTL.TWO_WEEKS() + // true // ], // Curben's UrlHaus Malicious URL Blocklist [ @@ -94,15 +92,15 @@ export const DOMAIN_LISTS_EXTRA: HostsSource[] = [ 'https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-domains.txt', 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-domains.txt' ], - true, TTL.THREE_HOURS() + true ], // Spam404 // Not actively maintained, let's use a 10 days cache ttl - ['https://raw.githubusercontent.com/Spam404/lists/master/main-blacklist.txt', [], true, TTL.TEN_DAYS()] + ['https://raw.githubusercontent.com/Spam404/lists/master/main-blacklist.txt', [], true] ]; export const PHISHING_HOSTS_EXTRA: HostsSource[] = [ - ['https://raw.githubusercontent.com/durablenapkin/scamblocklist/master/hosts.txt', [], true, TTL.THREE_DAYS()] + ['https://raw.githubusercontent.com/durablenapkin/scamblocklist/master/hosts.txt', [], true] ]; export const PHISHING_DOMAIN_LISTS_EXTRA: HostsSource[] = [ @@ -114,20 +112,20 @@ export const PHISHING_DOMAIN_LISTS_EXTRA: HostsSource[] = [ 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-domains.txt', 'https://curbengh.github.io/phishing-filter/phishing-filter-domains.txt' ], - true, TTL.THREE_HOURS() + true ], [ 'https://phishing.army/download/phishing_army_blocklist.txt', [], - true, TTL.THREE_HOURS() + true ] ]; -type AdGuardFilterSource = [main: string, mirrors: string[] | null, ttl: number, allowThirdParty?: boolean]; +type AdGuardFilterSource = [main: string, mirrors: string[] | null, allowThirdParty?: boolean]; export const ADGUARD_FILTERS: AdGuardFilterSource[] = [ // no coin list adguard list is more maintained than its hosts - ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/nocoin.txt', [], TTL.TWO_WEEKS()], + ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/nocoin.txt', []], // EasyList [ 'https://easylist.to/easylist/easylist.txt', @@ -138,8 +136,8 @@ export const ADGUARD_FILTERS: AdGuardFilterSource[] = [ 'https://ublockorigin.pages.dev/thirdparties/easylist.txt', 'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easylist.txt', 'https://filters.adtidy.org/extension/ublock/filters/101_optimized.txt' - ], - TTL.TWLVE_HOURS() + ] + ], // EasyPrivacy [ @@ -151,8 +149,8 @@ export const ADGUARD_FILTERS: AdGuardFilterSource[] = [ 'https://ublockorigin.pages.dev/thirdparties/easyprivacy.txt', 'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easyprivacy.txt', 'https://filters.adtidy.org/extension/ublock/filters/118_optimized.txt' - ], - TTL.TWLVE_HOURS() + ] + ], // AdGuard DNS Filter [ @@ -160,52 +158,52 @@ export const ADGUARD_FILTERS: AdGuardFilterSource[] = [ [ 'https://filters.adtidy.org/extension/ublock/filters/15_optimized.txt', 'https://adguardteam.github.io/HostlistsRegistry/assets/filter_1.txt' - ], - TTL.TWLVE_HOURS() + ] + ], // AdGuard Base Filter [ 'https://filters.adtidy.org/extension/ublock/filters/2_without_easylist.txt', - ['https://proxy.cdn.skk.moe/?https://filters.adtidy.org/extension/ublock/filters/2_without_easylist.txt'], - TTL.THREE_HOURS() + ['https://proxy.cdn.skk.moe/?https://filters.adtidy.org/extension/ublock/filters/2_without_easylist.txt'] + ], // AdGuard Mobile AD [ 'https://filters.adtidy.org/extension/ublock/filters/11_optimized.txt', - ['https://proxy.cdn.skk.moe/?https://filters.adtidy.org/extension/ublock/filters/2_without_easylist.txt'], - TTL.THREE_HOURS() + ['https://proxy.cdn.skk.moe/?https://filters.adtidy.org/extension/ublock/filters/2_without_easylist.txt'] + ], // AdGuard Tracking Protection [ 'https://filters.adtidy.org/extension/ublock/filters/3_optimized.txt', - ['https://proxy.cdn.skk.moe/?https://filters.adtidy.org/extension/ublock/filters/3_optimized.txt'], - TTL.THREE_HOURS() + ['https://proxy.cdn.skk.moe/?https://filters.adtidy.org/extension/ublock/filters/3_optimized.txt'] + ], // AdGuard Chinese filter (EasyList China + AdGuard Chinese filter) [ 'https://filters.adtidy.org/extension/ublock/filters/224_optimized.txt', - ['https://proxy.cdn.skk.moe/?https://filters.adtidy.org/extension/ublock/filters/224_optimized.txt'], - TTL.THREE_HOURS() + ['https://proxy.cdn.skk.moe/?https://filters.adtidy.org/extension/ublock/filters/224_optimized.txt'] + ], // GameConsoleAdblockList // Update almost once per 1 to 3 months, let's set a 10 days cache ttl - ['https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt', null, TTL.TEN_DAYS()], + ['https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt', null], // PiHoleBlocklist // Update almost once per 3 months, let's set a 10 days cache ttl [ 'https://perflyst.github.io/PiHoleBlocklist/SmartTV-AGH.txt', [ 'https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/SmartTV-AGH.txt' - ], - TTL.TEN_DAYS() + ] + ], // uBlock Origin Unbreak [ 'https://ublockorigin.github.io/uAssetsCDN/filters/unbreak.min.txt', [ 'https://ublockorigin.pages.dev/filters/unbreak.min.txt' - ], - TTL.THREE_HOURS() + ] + ] ]; @@ -214,15 +212,15 @@ export const ADGUARD_FILTERS_WHITELIST: AdGuardFilterSource[] = [ 'https://adguardteam.github.io/AdGuardSDNSFilter/Filters/exceptions.txt', [ 'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exceptions.txt' - ], - TTL.THREE_HOURS() + ] + ], [ 'https://adguardteam.github.io/AdGuardSDNSFilter/Filters/exclusions.txt', [ 'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exclusions.txt' - ], - TTL.THREE_HOURS() + ] + ] ]; @@ -231,53 +229,52 @@ export const ADGUARD_FILTERS_EXTRA: AdGuardFilterSource[] = [ [ 'https://filters.adtidy.org/extension/ublock/filters/14_optimized.txt', ['https://proxy.cdn.skk.moe/?https://filters.adtidy.org/extension/ublock/filters/14_optimized.txt'], - TTL.THREE_HOURS(), true ], // AdGuard Cookie Notices, included in Annoyances filter - // ['https://filters.adtidy.org/extension/ublock/filters/18_optimized.txt', null, TTL.THREE_HOURS(), true], + // ['https://filters.adtidy.org/extension/ublock/filters/18_optimized.txt', null, true], // EasyList Germany filter, not even included in extra for now // [ // 'https://easylist.to/easylistgermany/easylistgermany.txt', // [ // 'https://easylist-downloads.adblockplus.org/easylistgermany.txt' // ], - // TTL.TWLVE_HOURS() + // // ], // AdGuard Japanese filter [ 'https://filters.adtidy.org/extension/ublock/filters/7_optimized.txt', - ['https://proxy.cdn.skk.moe/?https://filters.adtidy.org/extension/ublock/filters/7_optimized.txt'], - TTL.THREE_HOURS() + ['https://proxy.cdn.skk.moe/?https://filters.adtidy.org/extension/ublock/filters/7_optimized.txt'] + ], // uBlock Origin Filter List [ 'https://ublockorigin.github.io/uAssetsCDN/filters/filters.min.txt', [ 'https://ublockorigin.pages.dev/filters/filters.min.txt' - ], - TTL.THREE_HOURS() + ] + ], // AdGuard Popup Overlay - included in Annoyances filter - // ['https://filters.adtidy.org/extension/ublock/filters/19_optimized.txt', null, TTL.THREE_HOURS(), true], + // ['https://filters.adtidy.org/extension/ublock/filters/19_optimized.txt', null, true], // AdGuard Mobile Banner // almost all generic rule - // ['https://filters.adtidy.org/extension/ublock/filters/20_optimized.txt', null, TTL.THREE_HOURS()], + // ['https://filters.adtidy.org/extension/ublock/filters/20_optimized.txt', null], // uBlock Origin Badware Risk List [ 'https://ublockorigin.github.io/uAssetsCDN/filters/badware.min.txt', [ 'https://ublockorigin.pages.dev/filters/badware.min.txt' - ], - TTL.THREE_HOURS() + ] + ], // uBlock Origin Privacy List [ 'https://ublockorigin.github.io/uAssetsCDN/filters/privacy.min.txt', [ 'https://ublockorigin.pages.dev/filters/privacy.min.txt' - ], - TTL.THREE_HOURS() + ] + ], // uBlock Origin Resource Abuse: merged in uBlock Origin Privacy List // [ @@ -287,26 +284,26 @@ export const ADGUARD_FILTERS_EXTRA: AdGuardFilterSource[] = [ // uBlock Origin Annoyances (the un-merged of Fanboy Annoyances List) [ 'https://ublockorigin.github.io/uAssetsCDN/filters/annoyances.min.txt', - ['https://ublockorigin.pages.dev/filters/annoyances.min.txt'], - TTL.THREE_HOURS() + ['https://ublockorigin.pages.dev/filters/annoyances.min.txt'] + ], // EasyList Annoyances [ 'https://ublockorigin.github.io/uAssetsCDN/thirdparties/easylist-annoyances.txt', - ['https://ublockorigin.pages.dev/thirdparties/easylist-annoyances.txt'], - TTL.THREE_HOURS() + ['https://ublockorigin.pages.dev/thirdparties/easylist-annoyances.txt'] + ], // EasyList - Newsletters [ 'https://ublockorigin.github.io/uAssetsCDN/thirdparties/easylist-newsletters.txt', - ['https://ublockorigin.pages.dev/thirdparties/easylist-newsletters.txt'], - TTL.THREE_HOURS() + ['https://ublockorigin.pages.dev/thirdparties/easylist-newsletters.txt'] + ], // EasyList - Notifications [ 'https://ublockorigin.github.io/uAssets/thirdparties/easylist-notifications.txt', - ['https://ublockorigin.pages.dev/thirdparties/easylist-notifications.txt'], - TTL.THREE_HOURS() + ['https://ublockorigin.pages.dev/thirdparties/easylist-notifications.txt'] + ], // Fanboy Cookie Monster (EasyList Cookie List) [ @@ -314,14 +311,13 @@ export const ADGUARD_FILTERS_EXTRA: AdGuardFilterSource[] = [ [ 'https://ublockorigin.pages.dev/thirdparties/easylist-cookies.txt', 'https://secure.fanboy.co.nz/fanboy-cookiemonster_ubo.txt' - ], - TTL.TWLVE_HOURS() + ] + ], // Dandelion Sprout's Annoyances [ 'https://filters.adtidy.org/extension/ublock/filters/250_optimized.txt', ['https://proxy.cdn.skk.moe/?https://filters.adtidy.org/extension/ublock/filters/250_optimized.txt'], - TTL.THREE_HOURS(), true ], // Adblock Warning Removal List @@ -330,7 +326,6 @@ export const ADGUARD_FILTERS_EXTRA: AdGuardFilterSource[] = [ [ 'https://filters.adtidy.org/extension/ublock/filters/207_optimized.txt' ], - TTL.TWLVE_HOURS(), true ] ]; diff --git a/Build/lib/get-phishing-domains.ts b/Build/lib/get-phishing-domains.ts index f5b617dc..989dec96 100644 --- a/Build/lib/get-phishing-domains.ts +++ b/Build/lib/get-phishing-domains.ts @@ -8,7 +8,7 @@ import { DEBUG_DOMAIN_TO_FIND, PHISHING_DOMAIN_LISTS_EXTRA, PHISHING_HOSTS_EXTRA import { loosTldOptWithPrivateDomains } from '../constants/loose-tldts-opt'; import picocolors from 'picocolors'; import { createRetrieKeywordFilter as createKeywordFilter } from 'foxts/retrie'; -import { createCacheKey, deserializeArray, serializeArray } from './cache-filesystem'; +import { deserializeArray, serializeArray } from './cache-filesystem'; import { cache } from './fs-memo'; import { isCI } from 'ci-info'; @@ -205,16 +205,14 @@ const processPhihsingDomains = cache(function processPhihsingDomains(domainArr: temporaryBypass: !isCI || DEBUG_DOMAIN_TO_FIND !== null }); -const cacheKey = createCacheKey(__filename); - export function getPhishingDomains(parentSpan: Span) { return parentSpan.traceChild('get phishing domains').traceAsyncFn(async (span) => { const domainArr = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => { const domainArr: string[] = []; await Promise.all([ - ...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(curSpan, ...entry, cacheKey)), - ...PHISHING_HOSTS_EXTRA.map(entry => processHosts(curSpan, ...entry, cacheKey)) + ...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainLists(curSpan, ...entry)), + ...PHISHING_HOSTS_EXTRA.map(entry => processHosts(curSpan, ...entry)) ]).then(domainGroups => domainGroups.forEach(appendArrayInPlaceCurried(domainArr))); return domainArr;