From ff60e86a773bb45ce805dc10fdeced3bf9715d09 Mon Sep 17 00:00:00 2001 From: SukkaW Date: Mon, 11 Aug 2025 22:01:15 +0800 Subject: [PATCH] Remove dead/outdated/duplicated entries --- Build/tools-dedupe-src.ts | 64 ++++++++++++++++-------------- Source/domainset/cdn.conf | 2 - Source/domainset/download.conf | 2 - Source/domainset/reject.conf | 10 ----- Source/domainset/reject_extra.conf | 37 ----------------- 5 files changed, 34 insertions(+), 81 deletions(-) diff --git a/Build/tools-dedupe-src.ts b/Build/tools-dedupe-src.ts index 12732454..0365d372 100644 --- a/Build/tools-dedupe-src.ts +++ b/Build/tools-dedupe-src.ts @@ -4,7 +4,8 @@ import fsp from 'node:fs/promises'; import { SOURCE_DIR } from './constants/dir'; import { readFileByLine } from './lib/fetch-text-by-line'; import { processLine } from './lib/process-line'; -import { HostnameSmolTrie } from './lib/trie'; +import { HostnameSmolTrie, HostnameTrie } from './lib/trie'; +import { task } from './trace'; const ENFORCED_WHITELIST = [ 'hola.sk', @@ -20,10 +21,10 @@ const ENFORCED_WHITELIST = [ 'samsungqbe.com' ]; -const WHITELIST: string[] = ['.lightspeedmining.com', 'samsungqbe.com', '.zbeos.com', '.holashop.org', '.jdie.pl', '.sponsor.printondemandagency.com', '.bmcm.pw', '.vplay.life', '.hola.hk', '.peopleland.net', '.120bit.com', '.tekyboycrypto.xyz', '.rocketpool.pro', '.cryptoloot.pro', '.weminerpool.site', '.timg135.top', '.binance.associates', '.lafermedumineur.fr', '.goldencoin.online', '.hola.sk', '.hola.com.sg', '.acashtech.com', '.bitoreum.org', '.mixpools.org', '.decapool.net', '.taichicoin.org', '.luxxeeu.com']; +const WHITELIST: string[] = ['.dxdhd.com', '.tokto-motion.net', '.hola-shopping.com', '.luxxeeu.com', '.newzgames.com', '.hola.com.sg', 'pengtu.cc', '.cdn-js-query.com', 'samsungcloudsolution.net', 'samsungcloudsolution.com', 'static.estebull.com', '.drawservant.com', '.enjoy7plains.xyz', '.zmfindyourhalf.top', '.mineblocks.eu', '.cointaft.com', '.chain-pool.com', '.lamby-crypto.com', '.grftpool.com', '.onebtcplace.com', '.pepecore.com', '.punchsub.net', '.imzlabs.net', '.datapaw.net', '.smpool.net', '.yetimining.net', '.igrid.org', '.50centfreedom.us', '.cyg2016.xyz', '.easypool.xyz', '.arhash.xyz', '.enviromint.xyz', '.pool.space', '.anomp.cc', '.bitconnectpool.co', '.cryptopool.space', '.automatix.to', '.coolmine.to', '.coolpool.to', '.dpool.to', '.template-download.to', '.aurum7.to', '.sunpool.to', '.speedpool.to', '.cfcnet.to', '.pool.do', '.pool.bit34.com', '.eos.zhizhu.to', '.mubicdn.com', 'cdn.fastmediaing.com', '.webinfcdn.com', '.aosikaimage.com']; -(async () => { - const files = await new Fdir() +task(require.main === module, __filename)(async (span) => { + const files = await span.traceChildAsync('crawl thru all files', () => new Fdir() .withFullPaths() .filter((filepath, isDirectory) => { if (isDirectory) return true; @@ -33,55 +34,58 @@ const WHITELIST: string[] = ['.lightspeedmining.com', 'samsungqbe.com', '.zbeos. return extname !== '.js' && extname !== '.ts'; }) .crawl(SOURCE_DIR) - .withPromise(); + .withPromise()); - const whiteTrie = new HostnameSmolTrie(WHITELIST); - ENFORCED_WHITELIST.forEach((item) => whiteTrie.whitelist(item)); - const whitelist = whiteTrie.dump(); + const whiteTrie = span.traceChildSync('build whitelist trie', () => { + const trie = new HostnameSmolTrie(WHITELIST); + ENFORCED_WHITELIST.forEach((item) => trie.whitelist(item)); + return trie; + }); - await Promise.all(files.map(file => dedupeFile(file, whitelist))); -})(); + await Promise.all(files.map(file => span.traceChildAsync('dedupe ' + file, () => dedupeFile(file, whiteTrie)))); +}); -async function dedupeFile(file: string, whitelist: string[]) { - const set = new Set(); +async function dedupeFile(file: string, whitelist: HostnameSmolTrie) { const result: string[] = []; + const trie = new HostnameTrie(); + for await (const l of readFileByLine(file)) { const line = processLine(l); + if (!line) { if (l.startsWith('# $ skip_dedupe_src')) { return; } - result.push(l); + result.push(l); // keep all comments and blank lines continue; } - if (set.has(line)) { - continue; + if (trie.has(line)) { + continue; // drop duplicate } - // We can't use a trie here since we need to keep the order - if (whitelist.some((whiteItem) => isDomainSuffix(whiteItem, line))) { - continue; + if (whitelist.has(line)) { + continue; // drop whitelisted items } - set.add(line); + trie.add(line); result.push(line); } return fsp.writeFile(file, result.join('\n') + '\n'); } -function isDomainSuffix(whiteItem: string, incomingItem: string) { - const whiteIncludeDomain = whiteItem[0] === '.'; - whiteItem = whiteItem[0] === '.' ? whiteItem.slice(1) : whiteItem; +// function isDomainSuffix(whiteItem: string, incomingItem: string) { +// const whiteIncludeDomain = whiteItem[0] === '.'; +// whiteItem = whiteItem[0] === '.' ? whiteItem.slice(1) : whiteItem; - if (whiteItem === incomingItem) { - return true; // as long as exact match, we don't care if subdomain is included or not - } - if (whiteIncludeDomain) { - return incomingItem.endsWith('.' + whiteItem); - } - return false; -} +// if (whiteItem === incomingItem) { +// return true; // as long as exact match, we don't care if subdomain is included or not +// } +// if (whiteIncludeDomain) { +// return incomingItem.endsWith('.' + whiteItem); +// } +// return false; +// } diff --git a/Source/domainset/cdn.conf b/Source/domainset/cdn.conf index 9c0a8b93..c28ad371 100644 --- a/Source/domainset/cdn.conf +++ b/Source/domainset/cdn.conf @@ -4290,7 +4290,6 @@ cdn1.techbang.com cdn2.techbang.com cdn0-i.techbang.com s3.sitepoint.com -assets.getpocket.com hips.hearstapps.com media.newyorker.com .pocket-image-cache.com @@ -4470,7 +4469,6 @@ cdn-img3.iporntv.net .mxmcdn.net .myspacecdn.com media.assettype.com -gadgets360cdn.com amp.akamaized.net d.newsweek.com g.newsweek.com diff --git a/Source/domainset/download.conf b/Source/domainset/download.conf index 047efee7..5bcf915e 100644 --- a/Source/domainset/download.conf +++ b/Source/domainset/download.conf @@ -758,7 +758,6 @@ mirror.netcologne.de cpan.noris.de ftp.hosteurope.de mirror.bibleonline.ru -mirrors.dotsrc.org ftp.rediris.es osl.ugr.es cpan.ip-connect.vn.ua @@ -906,7 +905,6 @@ mirrors.iu13.net mirror.leitecastro.com mirrors.xmissions.com kr.freebsd.org -nic.funet.fi mirror.ossplanet.net repository.su lysator.liu.se diff --git a/Source/domainset/reject.conf b/Source/domainset/reject.conf index b427989c..92d9b1c2 100644 --- a/Source/domainset/reject.conf +++ b/Source/domainset/reject.conf @@ -112,7 +112,6 @@ bad.third-party.site .oastify.com # Looks like public cdn, in fact location.href hijacker .js-query.com -.cdn-js-query.com # Network Util Tools Scam .clashforwindows.net .clashforwindows.org @@ -1433,8 +1432,6 @@ ceuswatcab01.blob.core.windows.net ac3.msn.com ads.eu.msn.com ads.msn.com -ads1.msn.com -ads2.msn.com adsyndication.msn.com analytics.msn.com c.msn.com @@ -1920,7 +1917,6 @@ show.look.360.cn mclean.lato.cloud.360safe.com mvconf.lato.cloud.360safe.com mclean.cloud.360safe.com -mvconf.cloud.360safe.com mclean.uk.cloud.360safe.com mvconf.uk.cloud.360safe.com aicleaner.shouji.360.cn @@ -2084,7 +2080,6 @@ union.gdtimg.com # v2.gdt.qq.com # win.gdt.qq.com pgdt.gtimg.cn -pgdt.ugdtimg.com pingma.qq.com sngmta.qq.com splashqqlive.gtimg.com @@ -2108,7 +2103,6 @@ wxsnsdythumb.wxs.qq.com adsmind.gdtimg.com adsmind.ugdtimg.com qzs.gdtimg.com -qzs.qq.com rmonitor.qq.com sdk.e.qq.com sdkconfig.video.qq.com @@ -2660,7 +2654,6 @@ biz.live.xunlei.com ct.niu.xunlei.com mou.niu.xunlei.com scene.vip.xunlei.com -advertpay.vip.xunlei.com static.m.sjzhushou.com etl.xlmc.sandai.net @@ -2707,7 +2700,6 @@ mqtt.zhihu.com .monsetting.toutiao.com .mon.zijieapi.com .ad.zijieapi.com -log.snssdk.com toblog.ctobsnssdk.com frontier-aweme-hl.snssdk.com @@ -2784,7 +2776,6 @@ adm.10jqka.com.cn stat.10jqka.com.cn # >> UC -applog.uc.cn applog-perf.uc.cn applog.ucdns.uc.cn gjapplog.uc.cn @@ -3285,7 +3276,6 @@ ad.where.com ftpcontent.worldnow.com ads.saymedia.com adcontent.saymedia.com -static.estebull.com go.vrvm.com c.vrvm.com .phluant.com diff --git a/Source/domainset/reject_extra.conf b/Source/domainset/reject_extra.conf index 7436684f..4f2ed2b3 100644 --- a/Source/domainset/reject_extra.conf +++ b/Source/domainset/reject_extra.conf @@ -163,7 +163,6 @@ nimiq.terorie.com .cosmosjackson.com .decoroustitle.com .decoycreation.com -.drawservant.com .energeticexample.com .evanescentedge.com .farethief.com @@ -321,7 +320,6 @@ nimiq.terorie.com .crydconnect.com .danceview.ru .easycucina.net -.enjoy7plains.xyz .exploreshops.net .findallgainssurvey.top .findallgainsurvey.top @@ -355,7 +353,6 @@ nimiq.terorie.com .uservalidate.xyz .wiki-review.net .worldsguide.net -.zmfindyourhalf.top .zmprofitsurvey.top # >> Migrate from CoinBlockerLists @@ -410,7 +407,6 @@ nimiq.terorie.com .guugll.eu .hashgate.eu .poolbe.eu -.mineblocks.eu .minergalaxy.eu .xmrpool.eu .multicoin.eu @@ -912,7 +908,6 @@ nimiq.terorie.com .swinemine.com .apple-bitcoin.com .coinminex.com -.cointaft.com .aprilcoin.com .virdpool.com .arbitracoin.com @@ -1119,7 +1114,6 @@ nimiq.terorie.com .statdynamic.com .poolflare.com .maxeter.com -.chain-pool.com .coleganet.com .iqmining.com .chekazpools.com @@ -1256,9 +1250,7 @@ nimiq.terorie.com .fsocietychain.com .gainprox.com .kattcoin.com -.lamby-crypto.com .nikitonium.com -.pepecore.com .pinchpool.com .redblockcoin.com .sevabit.com @@ -1295,7 +1287,6 @@ nimiq.terorie.com .monxpool.com .gpugold.com .greenchiapool.com -.grftpool.com .grosscrypto.com .haopool.com .raspi-ninja.com @@ -1308,7 +1299,6 @@ nimiq.terorie.com .upxpool.com .xmrminerpro.com .stakeunited.com -.onebtcplace.com .papoto.com .infamylists.com .inhive.com @@ -1749,7 +1739,6 @@ nimiq.terorie.com .cpu-pool.net .cryptotab.net .notallmine.net -.datapaw.net .minershq.net .deltapool.net .deepbit.net @@ -1772,7 +1761,6 @@ nimiq.terorie.com .gay-hotvideo.net .hashhorse.net .nourpool.net -.imzlabs.net .kinohabr.net .kisshentai.net .lyncoin.net @@ -1781,13 +1769,10 @@ nimiq.terorie.com .monero-miner.net .mycoinwallet.net .nimiqtest.net -.punchsub.net .roastedvolt.net -.smpool.net .sweetbook.net .vcrypt.net .vidfile.net -.yetimining.net .yobit.net .serverpower.net .haqo.net @@ -1979,7 +1964,6 @@ nimiq.terorie.com .globalpool.org .oddpools.org .luckpool.org -.igrid.org .keepool.org .kosmoplovci.org .kroma.org @@ -2076,7 +2060,6 @@ nimiq.terorie.com .1ds.us .p2poolmining.us .multipool.us -.50centfreedom.us .binance.us .acmining.us .gridcoin.us @@ -2094,11 +2077,9 @@ nimiq.terorie.com .fungibly.xyz .cryptopool.xyz .ionize.xyz -.easypool.xyz .blockcrushers.xyz .deipool.xyz .cojin.xyz -.arhash.xyz .p2p-spb.xyz .pecadol.xyz .303365.xyz @@ -2118,7 +2099,6 @@ nimiq.terorie.com .etcoin.xyz .avero.xyz .bitcoin-rebooted.xyz -.enviromint.xyz .blockify.xyz .newpool.xyz .capung.xyz @@ -2127,7 +2107,6 @@ nimiq.terorie.com .crpool.xyz .cryptominers.xyz .cryptopine.xyz -.cyg2016.xyz .xazab.xyz .elphyrecoin.xyz .flyhash.xyz @@ -2322,7 +2301,6 @@ nimiq.terorie.com .suprnova.cc .trustpool.cc .reactor.cc -.anomp.cc .dpool.cc .minero.cc .smartcash.cc @@ -2372,7 +2350,6 @@ nimiq.terorie.com .quickpool.tech .sia.tech .freecontent.date -.cryptopool.space .coinminer.space .hashing.space .goodzen.space @@ -2385,7 +2362,6 @@ nimiq.terorie.com .infinium.space .mineradnow.space .ukkey3.space -.pool.space .blocx.space .cryptomorons.space .blockhunters.space @@ -2459,7 +2435,6 @@ nimiq.terorie.com .vectorium.co .azakus.co .bitconnect.co -.bitconnectpool.co .onepool.co .hashcoin.co .okcash.co @@ -2725,12 +2700,7 @@ nimiq.terorie.com .hash.green .btc.to .hashrate.to -.aurum7.to -.automatix.to .xmr.to -.coolmine.to -.coolpool.to -.dpool.to .estream.to .foxx.to .planet.to @@ -2739,12 +2709,8 @@ nimiq.terorie.com .miningpool.to .piratebay.to .rig.to -.speedpool.to .streamplay.to -.sunpool.to -.template-download.to .more.to -.cfcnet.to .darkco.in .10xbitco.in .freico.in @@ -2885,7 +2851,6 @@ nimiq.terorie.com .zona.pl .worldcoin.global .fireants.global -.pool.do .datasecu.download .jqwww.download .mine.bz @@ -3132,7 +3097,6 @@ kingsminer.ddnsking.com .pepperscorecoin.wixsite.com .plugin.brfiles.com .pool.4i7i.com -.pool.bit34.com .pool.groupfabric.com .pool.paprikaex.com .pool.stalwartbucks.com @@ -3435,7 +3399,6 @@ d3iz6lralvg77g.cloudfront.net .evolution-project.go.ro .max.csrss.website .monero.us.to -.eos.zhizhu.to .a45.bulehero.in .a46.bulehero.in .a88.bulehero.in