Remove dead/outdated/duplicated entries

This commit is contained in:
SukkaW 2025-08-11 22:01:15 +08:00
parent 59812f38e5
commit ff60e86a77
5 changed files with 34 additions and 81 deletions

View File

@ -4,7 +4,8 @@ import fsp from 'node:fs/promises';
import { SOURCE_DIR } from './constants/dir'; import { SOURCE_DIR } from './constants/dir';
import { readFileByLine } from './lib/fetch-text-by-line'; import { readFileByLine } from './lib/fetch-text-by-line';
import { processLine } from './lib/process-line'; import { processLine } from './lib/process-line';
import { HostnameSmolTrie } from './lib/trie'; import { HostnameSmolTrie, HostnameTrie } from './lib/trie';
import { task } from './trace';
const ENFORCED_WHITELIST = [ const ENFORCED_WHITELIST = [
'hola.sk', 'hola.sk',
@ -20,10 +21,10 @@ const ENFORCED_WHITELIST = [
'samsungqbe.com' 'samsungqbe.com'
]; ];
const WHITELIST: string[] = ['.lightspeedmining.com', 'samsungqbe.com', '.zbeos.com', '.holashop.org', '.jdie.pl', '.sponsor.printondemandagency.com', '.bmcm.pw', '.vplay.life', '.hola.hk', '.peopleland.net', '.120bit.com', '.tekyboycrypto.xyz', '.rocketpool.pro', '.cryptoloot.pro', '.weminerpool.site', '.timg135.top', '.binance.associates', '.lafermedumineur.fr', '.goldencoin.online', '.hola.sk', '.hola.com.sg', '.acashtech.com', '.bitoreum.org', '.mixpools.org', '.decapool.net', '.taichicoin.org', '.luxxeeu.com']; const WHITELIST: string[] = ['.dxdhd.com', '.tokto-motion.net', '.hola-shopping.com', '.luxxeeu.com', '.newzgames.com', '.hola.com.sg', 'pengtu.cc', '.cdn-js-query.com', 'samsungcloudsolution.net', 'samsungcloudsolution.com', 'static.estebull.com', '.drawservant.com', '.enjoy7plains.xyz', '.zmfindyourhalf.top', '.mineblocks.eu', '.cointaft.com', '.chain-pool.com', '.lamby-crypto.com', '.grftpool.com', '.onebtcplace.com', '.pepecore.com', '.punchsub.net', '.imzlabs.net', '.datapaw.net', '.smpool.net', '.yetimining.net', '.igrid.org', '.50centfreedom.us', '.cyg2016.xyz', '.easypool.xyz', '.arhash.xyz', '.enviromint.xyz', '.pool.space', '.anomp.cc', '.bitconnectpool.co', '.cryptopool.space', '.automatix.to', '.coolmine.to', '.coolpool.to', '.dpool.to', '.template-download.to', '.aurum7.to', '.sunpool.to', '.speedpool.to', '.cfcnet.to', '.pool.do', '.pool.bit34.com', '.eos.zhizhu.to', '.mubicdn.com', 'cdn.fastmediaing.com', '.webinfcdn.com', '.aosikaimage.com'];
(async () => { task(require.main === module, __filename)(async (span) => {
const files = await new Fdir() const files = await span.traceChildAsync('crawl thru all files', () => new Fdir()
.withFullPaths() .withFullPaths()
.filter((filepath, isDirectory) => { .filter((filepath, isDirectory) => {
if (isDirectory) return true; if (isDirectory) return true;
@ -33,55 +34,58 @@ const WHITELIST: string[] = ['.lightspeedmining.com', 'samsungqbe.com', '.zbeos.
return extname !== '.js' && extname !== '.ts'; return extname !== '.js' && extname !== '.ts';
}) })
.crawl(SOURCE_DIR) .crawl(SOURCE_DIR)
.withPromise(); .withPromise());
const whiteTrie = new HostnameSmolTrie(WHITELIST); const whiteTrie = span.traceChildSync('build whitelist trie', () => {
ENFORCED_WHITELIST.forEach((item) => whiteTrie.whitelist(item)); const trie = new HostnameSmolTrie(WHITELIST);
const whitelist = whiteTrie.dump(); ENFORCED_WHITELIST.forEach((item) => trie.whitelist(item));
return trie;
});
await Promise.all(files.map(file => dedupeFile(file, whitelist))); await Promise.all(files.map(file => span.traceChildAsync('dedupe ' + file, () => dedupeFile(file, whiteTrie))));
})(); });
async function dedupeFile(file: string, whitelist: string[]) { async function dedupeFile(file: string, whitelist: HostnameSmolTrie) {
const set = new Set<string>();
const result: string[] = []; const result: string[] = [];
const trie = new HostnameTrie();
for await (const l of readFileByLine(file)) { for await (const l of readFileByLine(file)) {
const line = processLine(l); const line = processLine(l);
if (!line) { if (!line) {
if (l.startsWith('# $ skip_dedupe_src')) { if (l.startsWith('# $ skip_dedupe_src')) {
return; return;
} }
result.push(l); result.push(l); // keep all comments and blank lines
continue; continue;
} }
if (set.has(line)) { if (trie.has(line)) {
continue; continue; // drop duplicate
} }
// We can't use a trie here since we need to keep the order if (whitelist.has(line)) {
if (whitelist.some((whiteItem) => isDomainSuffix(whiteItem, line))) { continue; // drop whitelisted items
continue;
} }
set.add(line); trie.add(line);
result.push(line); result.push(line);
} }
return fsp.writeFile(file, result.join('\n') + '\n'); return fsp.writeFile(file, result.join('\n') + '\n');
} }
function isDomainSuffix(whiteItem: string, incomingItem: string) { // function isDomainSuffix(whiteItem: string, incomingItem: string) {
const whiteIncludeDomain = whiteItem[0] === '.'; // const whiteIncludeDomain = whiteItem[0] === '.';
whiteItem = whiteItem[0] === '.' ? whiteItem.slice(1) : whiteItem; // whiteItem = whiteItem[0] === '.' ? whiteItem.slice(1) : whiteItem;
if (whiteItem === incomingItem) { // if (whiteItem === incomingItem) {
return true; // as long as exact match, we don't care if subdomain is included or not // return true; // as long as exact match, we don't care if subdomain is included or not
} // }
if (whiteIncludeDomain) { // if (whiteIncludeDomain) {
return incomingItem.endsWith('.' + whiteItem); // return incomingItem.endsWith('.' + whiteItem);
} // }
return false; // return false;
} // }

View File

@ -4290,7 +4290,6 @@ cdn1.techbang.com
cdn2.techbang.com cdn2.techbang.com
cdn0-i.techbang.com cdn0-i.techbang.com
s3.sitepoint.com s3.sitepoint.com
assets.getpocket.com
hips.hearstapps.com hips.hearstapps.com
media.newyorker.com media.newyorker.com
.pocket-image-cache.com .pocket-image-cache.com
@ -4470,7 +4469,6 @@ cdn-img3.iporntv.net
.mxmcdn.net .mxmcdn.net
.myspacecdn.com .myspacecdn.com
media.assettype.com media.assettype.com
gadgets360cdn.com
amp.akamaized.net amp.akamaized.net
d.newsweek.com d.newsweek.com
g.newsweek.com g.newsweek.com

View File

@ -758,7 +758,6 @@ mirror.netcologne.de
cpan.noris.de cpan.noris.de
ftp.hosteurope.de ftp.hosteurope.de
mirror.bibleonline.ru mirror.bibleonline.ru
mirrors.dotsrc.org
ftp.rediris.es ftp.rediris.es
osl.ugr.es osl.ugr.es
cpan.ip-connect.vn.ua cpan.ip-connect.vn.ua
@ -906,7 +905,6 @@ mirrors.iu13.net
mirror.leitecastro.com mirror.leitecastro.com
mirrors.xmissions.com mirrors.xmissions.com
kr.freebsd.org kr.freebsd.org
nic.funet.fi
mirror.ossplanet.net mirror.ossplanet.net
repository.su repository.su
lysator.liu.se lysator.liu.se

View File

@ -112,7 +112,6 @@ bad.third-party.site
.oastify.com .oastify.com
# Looks like public cdn, in fact location.href hijacker # Looks like public cdn, in fact location.href hijacker
.js-query.com .js-query.com
.cdn-js-query.com
# Network Util Tools Scam # Network Util Tools Scam
.clashforwindows.net .clashforwindows.net
.clashforwindows.org .clashforwindows.org
@ -1433,8 +1432,6 @@ ceuswatcab01.blob.core.windows.net
ac3.msn.com ac3.msn.com
ads.eu.msn.com ads.eu.msn.com
ads.msn.com ads.msn.com
ads1.msn.com
ads2.msn.com
adsyndication.msn.com adsyndication.msn.com
analytics.msn.com analytics.msn.com
c.msn.com c.msn.com
@ -1920,7 +1917,6 @@ show.look.360.cn
mclean.lato.cloud.360safe.com mclean.lato.cloud.360safe.com
mvconf.lato.cloud.360safe.com mvconf.lato.cloud.360safe.com
mclean.cloud.360safe.com mclean.cloud.360safe.com
mvconf.cloud.360safe.com
mclean.uk.cloud.360safe.com mclean.uk.cloud.360safe.com
mvconf.uk.cloud.360safe.com mvconf.uk.cloud.360safe.com
aicleaner.shouji.360.cn aicleaner.shouji.360.cn
@ -2084,7 +2080,6 @@ union.gdtimg.com
# v2.gdt.qq.com # v2.gdt.qq.com
# win.gdt.qq.com # win.gdt.qq.com
pgdt.gtimg.cn pgdt.gtimg.cn
pgdt.ugdtimg.com
pingma.qq.com pingma.qq.com
sngmta.qq.com sngmta.qq.com
splashqqlive.gtimg.com splashqqlive.gtimg.com
@ -2108,7 +2103,6 @@ wxsnsdythumb.wxs.qq.com
adsmind.gdtimg.com adsmind.gdtimg.com
adsmind.ugdtimg.com adsmind.ugdtimg.com
qzs.gdtimg.com qzs.gdtimg.com
qzs.qq.com
rmonitor.qq.com rmonitor.qq.com
sdk.e.qq.com sdk.e.qq.com
sdkconfig.video.qq.com sdkconfig.video.qq.com
@ -2660,7 +2654,6 @@ biz.live.xunlei.com
ct.niu.xunlei.com ct.niu.xunlei.com
mou.niu.xunlei.com mou.niu.xunlei.com
scene.vip.xunlei.com scene.vip.xunlei.com
advertpay.vip.xunlei.com
static.m.sjzhushou.com static.m.sjzhushou.com
etl.xlmc.sandai.net etl.xlmc.sandai.net
@ -2707,7 +2700,6 @@ mqtt.zhihu.com
.monsetting.toutiao.com .monsetting.toutiao.com
.mon.zijieapi.com .mon.zijieapi.com
.ad.zijieapi.com .ad.zijieapi.com
log.snssdk.com
toblog.ctobsnssdk.com toblog.ctobsnssdk.com
frontier-aweme-hl.snssdk.com frontier-aweme-hl.snssdk.com
@ -2784,7 +2776,6 @@ adm.10jqka.com.cn
stat.10jqka.com.cn stat.10jqka.com.cn
# >> UC # >> UC
applog.uc.cn
applog-perf.uc.cn applog-perf.uc.cn
applog.ucdns.uc.cn applog.ucdns.uc.cn
gjapplog.uc.cn gjapplog.uc.cn
@ -3285,7 +3276,6 @@ ad.where.com
ftpcontent.worldnow.com ftpcontent.worldnow.com
ads.saymedia.com ads.saymedia.com
adcontent.saymedia.com adcontent.saymedia.com
static.estebull.com
go.vrvm.com go.vrvm.com
c.vrvm.com c.vrvm.com
.phluant.com .phluant.com

View File

@ -163,7 +163,6 @@ nimiq.terorie.com
.cosmosjackson.com .cosmosjackson.com
.decoroustitle.com .decoroustitle.com
.decoycreation.com .decoycreation.com
.drawservant.com
.energeticexample.com .energeticexample.com
.evanescentedge.com .evanescentedge.com
.farethief.com .farethief.com
@ -321,7 +320,6 @@ nimiq.terorie.com
.crydconnect.com .crydconnect.com
.danceview.ru .danceview.ru
.easycucina.net .easycucina.net
.enjoy7plains.xyz
.exploreshops.net .exploreshops.net
.findallgainssurvey.top .findallgainssurvey.top
.findallgainsurvey.top .findallgainsurvey.top
@ -355,7 +353,6 @@ nimiq.terorie.com
.uservalidate.xyz .uservalidate.xyz
.wiki-review.net .wiki-review.net
.worldsguide.net .worldsguide.net
.zmfindyourhalf.top
.zmprofitsurvey.top .zmprofitsurvey.top
# >> Migrate from CoinBlockerLists # >> Migrate from CoinBlockerLists
@ -410,7 +407,6 @@ nimiq.terorie.com
.guugll.eu .guugll.eu
.hashgate.eu .hashgate.eu
.poolbe.eu .poolbe.eu
.mineblocks.eu
.minergalaxy.eu .minergalaxy.eu
.xmrpool.eu .xmrpool.eu
.multicoin.eu .multicoin.eu
@ -912,7 +908,6 @@ nimiq.terorie.com
.swinemine.com .swinemine.com
.apple-bitcoin.com .apple-bitcoin.com
.coinminex.com .coinminex.com
.cointaft.com
.aprilcoin.com .aprilcoin.com
.virdpool.com .virdpool.com
.arbitracoin.com .arbitracoin.com
@ -1119,7 +1114,6 @@ nimiq.terorie.com
.statdynamic.com .statdynamic.com
.poolflare.com .poolflare.com
.maxeter.com .maxeter.com
.chain-pool.com
.coleganet.com .coleganet.com
.iqmining.com .iqmining.com
.chekazpools.com .chekazpools.com
@ -1256,9 +1250,7 @@ nimiq.terorie.com
.fsocietychain.com .fsocietychain.com
.gainprox.com .gainprox.com
.kattcoin.com .kattcoin.com
.lamby-crypto.com
.nikitonium.com .nikitonium.com
.pepecore.com
.pinchpool.com .pinchpool.com
.redblockcoin.com .redblockcoin.com
.sevabit.com .sevabit.com
@ -1295,7 +1287,6 @@ nimiq.terorie.com
.monxpool.com .monxpool.com
.gpugold.com .gpugold.com
.greenchiapool.com .greenchiapool.com
.grftpool.com
.grosscrypto.com .grosscrypto.com
.haopool.com .haopool.com
.raspi-ninja.com .raspi-ninja.com
@ -1308,7 +1299,6 @@ nimiq.terorie.com
.upxpool.com .upxpool.com
.xmrminerpro.com .xmrminerpro.com
.stakeunited.com .stakeunited.com
.onebtcplace.com
.papoto.com .papoto.com
.infamylists.com .infamylists.com
.inhive.com .inhive.com
@ -1749,7 +1739,6 @@ nimiq.terorie.com
.cpu-pool.net .cpu-pool.net
.cryptotab.net .cryptotab.net
.notallmine.net .notallmine.net
.datapaw.net
.minershq.net .minershq.net
.deltapool.net .deltapool.net
.deepbit.net .deepbit.net
@ -1772,7 +1761,6 @@ nimiq.terorie.com
.gay-hotvideo.net .gay-hotvideo.net
.hashhorse.net .hashhorse.net
.nourpool.net .nourpool.net
.imzlabs.net
.kinohabr.net .kinohabr.net
.kisshentai.net .kisshentai.net
.lyncoin.net .lyncoin.net
@ -1781,13 +1769,10 @@ nimiq.terorie.com
.monero-miner.net .monero-miner.net
.mycoinwallet.net .mycoinwallet.net
.nimiqtest.net .nimiqtest.net
.punchsub.net
.roastedvolt.net .roastedvolt.net
.smpool.net
.sweetbook.net .sweetbook.net
.vcrypt.net .vcrypt.net
.vidfile.net .vidfile.net
.yetimining.net
.yobit.net .yobit.net
.serverpower.net .serverpower.net
.haqo.net .haqo.net
@ -1979,7 +1964,6 @@ nimiq.terorie.com
.globalpool.org .globalpool.org
.oddpools.org .oddpools.org
.luckpool.org .luckpool.org
.igrid.org
.keepool.org .keepool.org
.kosmoplovci.org .kosmoplovci.org
.kroma.org .kroma.org
@ -2076,7 +2060,6 @@ nimiq.terorie.com
.1ds.us .1ds.us
.p2poolmining.us .p2poolmining.us
.multipool.us .multipool.us
.50centfreedom.us
.binance.us .binance.us
.acmining.us .acmining.us
.gridcoin.us .gridcoin.us
@ -2094,11 +2077,9 @@ nimiq.terorie.com
.fungibly.xyz .fungibly.xyz
.cryptopool.xyz .cryptopool.xyz
.ionize.xyz .ionize.xyz
.easypool.xyz
.blockcrushers.xyz .blockcrushers.xyz
.deipool.xyz .deipool.xyz
.cojin.xyz .cojin.xyz
.arhash.xyz
.p2p-spb.xyz .p2p-spb.xyz
.pecadol.xyz .pecadol.xyz
.303365.xyz .303365.xyz
@ -2118,7 +2099,6 @@ nimiq.terorie.com
.etcoin.xyz .etcoin.xyz
.avero.xyz .avero.xyz
.bitcoin-rebooted.xyz .bitcoin-rebooted.xyz
.enviromint.xyz
.blockify.xyz .blockify.xyz
.newpool.xyz .newpool.xyz
.capung.xyz .capung.xyz
@ -2127,7 +2107,6 @@ nimiq.terorie.com
.crpool.xyz .crpool.xyz
.cryptominers.xyz .cryptominers.xyz
.cryptopine.xyz .cryptopine.xyz
.cyg2016.xyz
.xazab.xyz .xazab.xyz
.elphyrecoin.xyz .elphyrecoin.xyz
.flyhash.xyz .flyhash.xyz
@ -2322,7 +2301,6 @@ nimiq.terorie.com
.suprnova.cc .suprnova.cc
.trustpool.cc .trustpool.cc
.reactor.cc .reactor.cc
.anomp.cc
.dpool.cc .dpool.cc
.minero.cc .minero.cc
.smartcash.cc .smartcash.cc
@ -2372,7 +2350,6 @@ nimiq.terorie.com
.quickpool.tech .quickpool.tech
.sia.tech .sia.tech
.freecontent.date .freecontent.date
.cryptopool.space
.coinminer.space .coinminer.space
.hashing.space .hashing.space
.goodzen.space .goodzen.space
@ -2385,7 +2362,6 @@ nimiq.terorie.com
.infinium.space .infinium.space
.mineradnow.space .mineradnow.space
.ukkey3.space .ukkey3.space
.pool.space
.blocx.space .blocx.space
.cryptomorons.space .cryptomorons.space
.blockhunters.space .blockhunters.space
@ -2459,7 +2435,6 @@ nimiq.terorie.com
.vectorium.co .vectorium.co
.azakus.co .azakus.co
.bitconnect.co .bitconnect.co
.bitconnectpool.co
.onepool.co .onepool.co
.hashcoin.co .hashcoin.co
.okcash.co .okcash.co
@ -2725,12 +2700,7 @@ nimiq.terorie.com
.hash.green .hash.green
.btc.to .btc.to
.hashrate.to .hashrate.to
.aurum7.to
.automatix.to
.xmr.to .xmr.to
.coolmine.to
.coolpool.to
.dpool.to
.estream.to .estream.to
.foxx.to .foxx.to
.planet.to .planet.to
@ -2739,12 +2709,8 @@ nimiq.terorie.com
.miningpool.to .miningpool.to
.piratebay.to .piratebay.to
.rig.to .rig.to
.speedpool.to
.streamplay.to .streamplay.to
.sunpool.to
.template-download.to
.more.to .more.to
.cfcnet.to
.darkco.in .darkco.in
.10xbitco.in .10xbitco.in
.freico.in .freico.in
@ -2885,7 +2851,6 @@ nimiq.terorie.com
.zona.pl .zona.pl
.worldcoin.global .worldcoin.global
.fireants.global .fireants.global
.pool.do
.datasecu.download .datasecu.download
.jqwww.download .jqwww.download
.mine.bz .mine.bz
@ -3132,7 +3097,6 @@ kingsminer.ddnsking.com
.pepperscorecoin.wixsite.com .pepperscorecoin.wixsite.com
.plugin.brfiles.com .plugin.brfiles.com
.pool.4i7i.com .pool.4i7i.com
.pool.bit34.com
.pool.groupfabric.com .pool.groupfabric.com
.pool.paprikaex.com .pool.paprikaex.com
.pool.stalwartbucks.com .pool.stalwartbucks.com
@ -3435,7 +3399,6 @@ d3iz6lralvg77g.cloudfront.net
.evolution-project.go.ro .evolution-project.go.ro
.max.csrss.website .max.csrss.website
.monero.us.to .monero.us.to
.eos.zhizhu.to
.a45.bulehero.in .a45.bulehero.in
.a46.bulehero.in .a46.bulehero.in
.a88.bulehero.in .a88.bulehero.in