From fe3c5d95c1853b648b6c2671fd9358fd311e4a0a Mon Sep 17 00:00:00 2001 From: SukkaW Date: Sat, 19 Apr 2025 20:47:46 +0800 Subject: [PATCH] Remove Outdated/Dead Domains --- Build/tools-dedupe-src.ts | 52 +++++++++++++++++----------- Source/domainset/cdn.conf | 10 +----- Source/domainset/reject.conf | 55 ------------------------------ Source/domainset/reject_extra.conf | 10 ------ Source/non_ip/reject.conf | 52 ++++++++++++++++++++++++++++ 5 files changed, 85 insertions(+), 94 deletions(-) diff --git a/Build/tools-dedupe-src.ts b/Build/tools-dedupe-src.ts index 3d6e6b25..6feab81f 100644 --- a/Build/tools-dedupe-src.ts +++ b/Build/tools-dedupe-src.ts @@ -3,9 +3,18 @@ import path from 'node:path'; import fsp from 'node:fs/promises'; import { SOURCE_DIR } from './constants/dir'; import { readFileByLine } from './lib/fetch-text-by-line'; +import { processLine } from './lib/process-line'; -const WHITELIST: string[] = ['packages.argotunnel.com', 'compass-ssl.xbox.com', 'static.agilebits.com', 'ntp.api.bz', 'softwareupdate.vmware.com', 'ftp.apache.org', 'ftp.cuhk.edu.hk', 'apache.belnet.be', 'mirrors.viethosting.com', 'apache.01link.hk', 'artfiles.org.org', 'mirror.synyx.de', 'apache.mediamirrors.org', 'wwwftp.ciril.fr', 'mirror.dkd.de', 'apache.javapipe.com', 'ftp.heikorichter.name', 'apache.panu.it', 'mirrors.supportex.net', 'apache.forsale.plus', 'apache.spinellicreations.com', 'ftp.itu.edu.tr', 'mirror1.spango.com', 'apache.oshte.net', 'mirrors.koehn.com', 'apache.dattatec.com', 'download.nextag.com', 'mirror.jre655.com', 'mirror.kiu.ac.ug', 'apache.cp.if.ua', 'mirrors.sorengard.com', 'ftp.igh.cnrs.fr', 'mirrors.hostingromania.ro', 'mirror.bhoovd.com', 'download.xs4all.nl', 'cpan.panu.it', 'cpan.nctu.edu.tw', 'mirror.serverbeheren.nl', 'cpan.llarian.net', 'cpan.etla.org', 'mirrors.syringanetworks.net', 'mirror.met.hu', 'cpan.cs.uu.nl', 'mirror.teklinks.com', 'mirror.rasanegar.com', 'ctan.kako-dev.de', 'ctan.ijs.si', 'mirrors.chevalier.io', 'mirror.yongbok.net', '1-mirrors.in.sahilister.net', '2-mirrors.in.sahilister.net', 'cc.uoc.gr', 'mirror.sergal.org', 'mirrors.mi.ras.ru', 'ctan.cs.uu.nl', 'mirrors.tripadvisor.com', 'gnu.spinellicreations.com', 'ftp.neowiz.com', 'mirror.rackdc.com', 'mirror.veriportal.com', 'ftp.pbone.net', 'downloader.cursor.sh', 'redrockdigimark.commirror', 'nimiq.by', 'aaxdetect.com', 'ctan.epst-tlemcen.dz', 'udahce.com', 'rs-staticart.ybcdn.net', 'doumpaq.com', 'c.medialytics.com', 'keybut.com', 'adserver.ubiyoo.com', 'kaspa-classic.com', 'minafacil.com', 'jiandanpool.com', 'xn--blockchan-n5a.com', 'alphax.pro', 'crypto-pool.online', 'bbqpool.org', 'nyxcoin.org', 'lpool.name', 'tsfpool.xyz', 'ltcmaster.xyz', '8282.space', 'myminingpool.uk', 'binance.live', 'mining.garden', 'scaleway.ovh', 'atpool.party', 'nimiq.by', 'binance.directory', 'onyx.run', 'lucky-pool.co.uk', 'ra7.xyz']; +const ENFORCED_WHITELIST = [ + 'hola.sk', + 'hola.org', + 'iadmatapk.nosdn.127.net', + 'httpdns.bilivideo.com', + 'httpdns-v6.gslb.yy.com', + 'twemoji.maxcdn.com' +]; +const WHITELIST: string[] = ['ton.local.twitter.com', 'prod.msocdn.com', 'twemoji.maxcdn.com', 'img.urlnode.com', 'ipfsgate.com', 'googleplay.pro', 'iadmatapk.nosdn.127.net', 'hola-shopping.com', 'brdtest.co', 'mynextphone.io', 'hola.hk', 'holashop.org', 'hola.sk', 'hola.com.sg', 'c.medialytics.com', 'adstats.mgc-games.com', 'search.mgc-games.com', 'kissdoujin.com', 'newminersage.com', 'trossmining.de', 'hashncash.net', 'microsolt.ru', 'moneropool.ru', 'hashforcash.us', 'bitcoinn.biz', 'webmining.co', 'lamba.top', 'httpdns.bilivideo.com', 'httpdns-v6.gslb.yy.com', 'k-cdn.depot.dev', 'li-cdn.com']; (async () => { const files = await new Fdir() .withFullPaths() @@ -19,37 +28,32 @@ const WHITELIST: string[] = ['packages.argotunnel.com', 'compass-ssl.xbox.com', .crawl(SOURCE_DIR) .withPromise(); - await Promise.all(files.map(dedupeFile)); + const whitelist = WHITELIST.filter((item) => ENFORCED_WHITELIST.every((whitelistItem) => !isDomainSuffix(whitelistItem, item))); + + await Promise.all(files.map(file => dedupeFile(file, whitelist))); })(); -async function dedupeFile(file: string) { +async function dedupeFile(file: string, whitelist: string[]) { const set = new Set(); const result: string[] = []; - for await (const line of readFileByLine(file)) { - if (line.length === 0) { - result.push(line); - continue; - } - if (line[0] === '#') { - result.push(line); + for await (const l of readFileByLine(file)) { + const line = processLine(l); + if (!line) { + if (l.startsWith('# $ skip_dedupe_src')) { + return; + } + + result.push(l); continue; } + if (set.has(line)) { continue; } // We can't use a trie here since we need to keep the order - if (WHITELIST.some((item) => { - if (item.length > line.length) { - return false; - } - - return ( - item === line // exact match - || line.endsWith('.' + item) // the whitelist is considered as a domain-suffix - ); - })) { + if (whitelist.some((item) => isDomainSuffix(item, line))) { continue; } @@ -59,3 +63,11 @@ async function dedupeFile(file: string) { return fsp.writeFile(file, result.join('\n') + '\n'); } + +function isDomainSuffix(suffixRule: string, domain: string) { + if (suffixRule.length > domain.length + 1) { + return false; + } + + return suffixRule === domain || domain.endsWith('.' + suffixRule); +} diff --git a/Source/domainset/cdn.conf b/Source/domainset/cdn.conf index f6a05f0c..de536337 100644 --- a/Source/domainset/cdn.conf +++ b/Source/domainset/cdn.conf @@ -361,7 +361,6 @@ cdnstatic.tencentcs.com cdn.cms-twdigitalassets.com fonts.twitter.com # Twitter Corp Network leaked to sourcemap -ton.local.twitter.com # Header: Server: AmazonS3 platform.twitter.com # fix twitter @@ -550,7 +549,6 @@ cdn.prod.ext.web.purview.azure.com res-geo.cdn.office.net # statics.teams.cdn.office.net # china mainland cdn static2.sharepointonline.com -prod.msocdn.com spoprod-a.akamaihd.net img-prod-cms-rt-microsoft-com.akamaized.net prod-streaming-video-msn-com.akamaized.net @@ -897,7 +895,6 @@ s.imgflip.com .datocms-assets.com images.pexels.com image.nmb.best -img.urlnode.com pomf2.lain.la img.xhacgn.com img.m.mw @@ -1198,6 +1195,7 @@ cdn.seondf.com app.brightback.com .udify.app flo.uri.sh +.bigcontent.io cdn.c1.amplience.net .media.amplience.net .static.amplience.net @@ -1499,7 +1497,6 @@ ipfs.d.tube .ipfs.xoqq.ch .ipfs.anonymize.com .ipfs.scalaproject.io -.search.ipfsgate.com .ipfs.decoo.io .ipfs.uploads.nu .ipfs1.pixura.io @@ -3186,7 +3183,6 @@ static.bytepan.com js.recurly.com ni-assets.azureedge.net js.short.io -js.braintreegateway.com assets.braintreegateway.com images.web3.storage api.web3.storage @@ -3869,8 +3865,6 @@ cdn.hinative.com img.atwiki.jp .imgikzy.com .imgikuncdn.com -.bigcontent.io -.media.amplience.net images.tokopedia.net assets.tokopedia.net asset.chase.com @@ -4027,7 +4021,6 @@ cdn.thenewstack.io cdn.wikiwiki.jp static.pingcap.com p.depot.dev -k-cdn.depot.dev s.slideme.org media.telanganatoday.com image.telanganatoday.com @@ -4278,7 +4271,6 @@ dz2cdn1.dzone.com newfold.scene7.com mta.newfold.com stbff.newfold.com -.li-cdn.com static.buydomains.com static.registration.bluehost.com cdn.one.store diff --git a/Source/domainset/reject.conf b/Source/domainset/reject.conf index ccf1eef6..a3ca7e28 100644 --- a/Source/domainset/reject.conf +++ b/Source/domainset/reject.conf @@ -304,7 +304,6 @@ inst.360safe.com .ipv4dns.com .ipxx.pro .dkonto.pl -.googleplay.pro .printondemandmerchandise.com .thebitmeister.com .tggame.xyz @@ -533,7 +532,6 @@ xy-log.tagtic.cn .adroll.com .media.net .app-ads-services.com -.ad.10010.com ad.caiyunapp.com ad.huajiao.com ad.hzyoka.com @@ -2594,56 +2592,6 @@ switch.cup.com.cn .pub.tom.com .discovery.tom.com -# brightdata (luminati) SDK -.l-err.biz -.luminati-china.biz -.luminati-china.co -.amazonaws.com -.binaryoptionz.com -.holacdn.com -.hola-shopping.com -.lum-bext.com -.luminatinet.com -.lum-lpm.com -.lumtest.com -.proxy-review.com -.svd-cdn.com -.thecodeil.com -.tukif.com -.whatismyippro.com -.zon-networks.com -.whoisit.co -.hola.hk -.lum-api.io -.lum-cn.io -.lum-ext.io -.luminati.io -.luminati-china.io -.lum-int.io -.lum-sdk.io -.mynextphone.io -.topvpn.io -.l-agent.me -.luminatichina.net -.hola.org -.holashop.org -.h-vpn.org -.hola.com.sg -.hola.sk -.proxyway.com -.optmd.com -.adincube.com -.brdtest.co -.l-cdn.com -.earnapp.com -.brightvpn.com -.brightinitiative.com -.brightdata.com -.bright-sdk.com -.brdtest.com -# .hola.ph # expired -.hola.ph - # EasyPrivacy Migration .click.signaturemarket.co .link.clubmanagergame.com @@ -2813,7 +2761,6 @@ bu2.duba.com bu1.duba.com c.bing.com .cdn.creative.medialytics.com -c.medialytics.com .adadapted.com .nearbyad.com @@ -2878,8 +2825,6 @@ adstat.izuiyou.com wkrd.tingyun.com ma-adx.ctrip.com vapi.tiandi.com -adstats.mgc-games.com -search.mgc-games.com t.adbxb.com e-ad-monitor.huya.com ana.masky.biddingx.com diff --git a/Source/domainset/reject_extra.conf b/Source/domainset/reject_extra.conf index eb188230..fb729474 100644 --- a/Source/domainset/reject_extra.conf +++ b/Source/domainset/reject_extra.conf @@ -1078,7 +1078,6 @@ nimiq.terorie.com .jixiangrong.com .jquery-js.com .jscoinminer.com -.kissdoujin.com .kmdmonster.com .kredsexplorer.com .laserveradedomaina.com @@ -1246,7 +1245,6 @@ nimiq.terorie.com .skralg.com .12finance.com .terorie.com -.newminersage.com .litecointools.com .stitthappens.com .tgservers.com @@ -1354,7 +1352,6 @@ nimiq.terorie.com .object.de .xmrpool.de .rebootcamp.de -.trossmining.de .trustaproiam.de .trusteverything.de .testserverino.de @@ -1492,7 +1489,6 @@ nimiq.terorie.com .voidr.net .neuropool.net .richpool.net -.hashncash.net .phpcoin.net .blockdiggers.net .fastblocks.net @@ -1855,8 +1851,6 @@ nimiq.terorie.com .rupoolproject.ru .xmr5b.ru .mainpool.ru -.microsolt.ru -.moneropool.ru .progaming-cheats.ru .ru-poolbe.ru .statpipe.ru @@ -1893,7 +1887,6 @@ nimiq.terorie.com .whitewalr.us .squishycoin.us .freakhouse.us -.hashforcash.us .vaporumpool.us .poolbe.us .scryptpool.us @@ -2177,7 +2170,6 @@ nimiq.terorie.com .advisorinvest.biz .ashour.biz .atlantistrade.biz -.bitcoinn.biz .bitday.biz .mpool.biz .egopastor.biz @@ -2304,7 +2296,6 @@ nimiq.terorie.com .lightminer.co .mmsubtitles.co .monitoringservice.co -.webmining.co .ufocoin.co .abcpool.co .btc.top @@ -2322,7 +2313,6 @@ nimiq.terorie.com .coolpool.top .chia-apool.top .speedpool.top -.lamba.top .nitsche.top .qukuai.top .uralscoin.info diff --git a/Source/non_ip/reject.conf b/Source/non_ip/reject.conf index 80bd1339..9d7ae98c 100644 --- a/Source/non_ip/reject.conf +++ b/Source/non_ip/reject.conf @@ -1,5 +1,6 @@ # $ meta_title Sukka's Ruleset - Reject Domains # $ meta_description The ruleset supports AD blocking, tracking protection, privacy protection, anti-phishing, anti-mining +# $ skip_dedupe_src enforce some blocking to reduce file size DOMAIN,this_rule_set_is_made_by_sukkaw.skk.moe @@ -157,6 +158,57 @@ DOMAIN-SUFFIX,esgltc.com DOMAIN-SUFFIX,kimo.tw DOMAIN-SUFFIX,lambda-ix.net +# >> brightdata (luminati) SDK +# Though most of domains are expired, still including them to reduce the file size +DOMAIN-SUFFIX,l-err.biz +DOMAIN-SUFFIX,luminati-china.biz +DOMAIN-SUFFIX,luminati-china.co +DOMAIN-SUFFIX,amazonaws.com +DOMAIN-SUFFIX,binaryoptionz.com +DOMAIN-SUFFIX,holacdn.com +DOMAIN-SUFFIX,hola-shopping.com +DOMAIN-SUFFIX,lum-bext.com +DOMAIN-SUFFIX,luminatinet.com +DOMAIN-SUFFIX,lum-lpm.com +DOMAIN-SUFFIX,lumtest.com +DOMAIN-SUFFIX,proxy-review.com +DOMAIN-SUFFIX,svd-cdn.com +DOMAIN-SUFFIX,thecodeil.com +DOMAIN-SUFFIX,tukif.com +DOMAIN-SUFFIX,whatismyippro.com +DOMAIN-SUFFIX,zon-networks.com +DOMAIN-SUFFIX,whoisit.co +DOMAIN-SUFFIX,hola.hk +DOMAIN-SUFFIX,lum-api.io +DOMAIN-SUFFIX,lum-cn.io +DOMAIN-SUFFIX,lum-ext.io +DOMAIN-SUFFIX,luminati.io +DOMAIN-SUFFIX,luminati-china.io +DOMAIN-SUFFIX,lum-int.io +DOMAIN-SUFFIX,lum-sdk.io +DOMAIN-SUFFIX,mynextphone.io +DOMAIN-SUFFIX,topvpn.io +DOMAIN-SUFFIX,l-agent.me +DOMAIN-SUFFIX,luminatichina.net +DOMAIN-SUFFIX,hola.org +DOMAIN-SUFFIX,holashop.org +DOMAIN-SUFFIX,h-vpn.org +DOMAIN-SUFFIX,hola.com.sg +DOMAIN-SUFFIX,hola.sk +DOMAIN-SUFFIX,proxyway.com +DOMAIN-SUFFIX,optmd.com +DOMAIN-SUFFIX,adincube.com +DOMAIN-SUFFIX,brdtest.co +DOMAIN-SUFFIX,l-cdn.com +DOMAIN-SUFFIX,earnapp.com +DOMAIN-SUFFIX,brightvpn.com +DOMAIN-SUFFIX,brightinitiative.com +DOMAIN-SUFFIX,brightdata.com +DOMAIN-SUFFIX,bright-sdk.com +DOMAIN-SUFFIX,brdtest.com +# .hola.ph # expired +DOMAIN-SUFFIX,hola.ph + # >> Google # DOMAIN-KEYWORD,adsense # unblocks adsense.google.com DOMAIN-KEYWORD,adwords