Remove dead/outdated/duplicated entries

This commit is contained in:
SukkaW 2025-08-11 22:01:15 +08:00
parent 59812f38e5
commit ff60e86a77
5 changed files with 34 additions and 81 deletions

View File

@ -4,7 +4,8 @@ import fsp from 'node:fs/promises';
import { SOURCE_DIR } from './constants/dir';
import { readFileByLine } from './lib/fetch-text-by-line';
import { processLine } from './lib/process-line';
import { HostnameSmolTrie } from './lib/trie';
import { HostnameSmolTrie, HostnameTrie } from './lib/trie';
import { task } from './trace';
const ENFORCED_WHITELIST = [
'hola.sk',
@ -20,10 +21,10 @@ const ENFORCED_WHITELIST = [
'samsungqbe.com'
];
const WHITELIST: string[] = ['.lightspeedmining.com', 'samsungqbe.com', '.zbeos.com', '.holashop.org', '.jdie.pl', '.sponsor.printondemandagency.com', '.bmcm.pw', '.vplay.life', '.hola.hk', '.peopleland.net', '.120bit.com', '.tekyboycrypto.xyz', '.rocketpool.pro', '.cryptoloot.pro', '.weminerpool.site', '.timg135.top', '.binance.associates', '.lafermedumineur.fr', '.goldencoin.online', '.hola.sk', '.hola.com.sg', '.acashtech.com', '.bitoreum.org', '.mixpools.org', '.decapool.net', '.taichicoin.org', '.luxxeeu.com'];
const WHITELIST: string[] = ['.dxdhd.com', '.tokto-motion.net', '.hola-shopping.com', '.luxxeeu.com', '.newzgames.com', '.hola.com.sg', 'pengtu.cc', '.cdn-js-query.com', 'samsungcloudsolution.net', 'samsungcloudsolution.com', 'static.estebull.com', '.drawservant.com', '.enjoy7plains.xyz', '.zmfindyourhalf.top', '.mineblocks.eu', '.cointaft.com', '.chain-pool.com', '.lamby-crypto.com', '.grftpool.com', '.onebtcplace.com', '.pepecore.com', '.punchsub.net', '.imzlabs.net', '.datapaw.net', '.smpool.net', '.yetimining.net', '.igrid.org', '.50centfreedom.us', '.cyg2016.xyz', '.easypool.xyz', '.arhash.xyz', '.enviromint.xyz', '.pool.space', '.anomp.cc', '.bitconnectpool.co', '.cryptopool.space', '.automatix.to', '.coolmine.to', '.coolpool.to', '.dpool.to', '.template-download.to', '.aurum7.to', '.sunpool.to', '.speedpool.to', '.cfcnet.to', '.pool.do', '.pool.bit34.com', '.eos.zhizhu.to', '.mubicdn.com', 'cdn.fastmediaing.com', '.webinfcdn.com', '.aosikaimage.com'];
(async () => {
const files = await new Fdir()
task(require.main === module, __filename)(async (span) => {
const files = await span.traceChildAsync('crawl thru all files', () => new Fdir()
.withFullPaths()
.filter((filepath, isDirectory) => {
if (isDirectory) return true;
@ -33,55 +34,58 @@ const WHITELIST: string[] = ['.lightspeedmining.com', 'samsungqbe.com', '.zbeos.
return extname !== '.js' && extname !== '.ts';
})
.crawl(SOURCE_DIR)
.withPromise();
.withPromise());
const whiteTrie = new HostnameSmolTrie(WHITELIST);
ENFORCED_WHITELIST.forEach((item) => whiteTrie.whitelist(item));
const whitelist = whiteTrie.dump();
const whiteTrie = span.traceChildSync('build whitelist trie', () => {
const trie = new HostnameSmolTrie(WHITELIST);
ENFORCED_WHITELIST.forEach((item) => trie.whitelist(item));
return trie;
});
await Promise.all(files.map(file => dedupeFile(file, whitelist)));
})();
await Promise.all(files.map(file => span.traceChildAsync('dedupe ' + file, () => dedupeFile(file, whiteTrie))));
});
async function dedupeFile(file: string, whitelist: string[]) {
const set = new Set<string>();
async function dedupeFile(file: string, whitelist: HostnameSmolTrie) {
const result: string[] = [];
const trie = new HostnameTrie();
for await (const l of readFileByLine(file)) {
const line = processLine(l);
if (!line) {
if (l.startsWith('# $ skip_dedupe_src')) {
return;
}
result.push(l);
result.push(l); // keep all comments and blank lines
continue;
}
if (set.has(line)) {
continue;
if (trie.has(line)) {
continue; // drop duplicate
}
// We can't use a trie here since we need to keep the order
if (whitelist.some((whiteItem) => isDomainSuffix(whiteItem, line))) {
continue;
if (whitelist.has(line)) {
continue; // drop whitelisted items
}
set.add(line);
trie.add(line);
result.push(line);
}
return fsp.writeFile(file, result.join('\n') + '\n');
}
function isDomainSuffix(whiteItem: string, incomingItem: string) {
const whiteIncludeDomain = whiteItem[0] === '.';
whiteItem = whiteItem[0] === '.' ? whiteItem.slice(1) : whiteItem;
// function isDomainSuffix(whiteItem: string, incomingItem: string) {
// const whiteIncludeDomain = whiteItem[0] === '.';
// whiteItem = whiteItem[0] === '.' ? whiteItem.slice(1) : whiteItem;
if (whiteItem === incomingItem) {
return true; // as long as exact match, we don't care if subdomain is included or not
}
if (whiteIncludeDomain) {
return incomingItem.endsWith('.' + whiteItem);
}
return false;
}
// if (whiteItem === incomingItem) {
// return true; // as long as exact match, we don't care if subdomain is included or not
// }
// if (whiteIncludeDomain) {
// return incomingItem.endsWith('.' + whiteItem);
// }
// return false;
// }

View File

@ -4290,7 +4290,6 @@ cdn1.techbang.com
cdn2.techbang.com
cdn0-i.techbang.com
s3.sitepoint.com
assets.getpocket.com
hips.hearstapps.com
media.newyorker.com
.pocket-image-cache.com
@ -4470,7 +4469,6 @@ cdn-img3.iporntv.net
.mxmcdn.net
.myspacecdn.com
media.assettype.com
gadgets360cdn.com
amp.akamaized.net
d.newsweek.com
g.newsweek.com

View File

@ -758,7 +758,6 @@ mirror.netcologne.de
cpan.noris.de
ftp.hosteurope.de
mirror.bibleonline.ru
mirrors.dotsrc.org
ftp.rediris.es
osl.ugr.es
cpan.ip-connect.vn.ua
@ -906,7 +905,6 @@ mirrors.iu13.net
mirror.leitecastro.com
mirrors.xmissions.com
kr.freebsd.org
nic.funet.fi
mirror.ossplanet.net
repository.su
lysator.liu.se

View File

@ -112,7 +112,6 @@ bad.third-party.site
.oastify.com
# Looks like public cdn, in fact location.href hijacker
.js-query.com
.cdn-js-query.com
# Network Util Tools Scam
.clashforwindows.net
.clashforwindows.org
@ -1433,8 +1432,6 @@ ceuswatcab01.blob.core.windows.net
ac3.msn.com
ads.eu.msn.com
ads.msn.com
ads1.msn.com
ads2.msn.com
adsyndication.msn.com
analytics.msn.com
c.msn.com
@ -1920,7 +1917,6 @@ show.look.360.cn
mclean.lato.cloud.360safe.com
mvconf.lato.cloud.360safe.com
mclean.cloud.360safe.com
mvconf.cloud.360safe.com
mclean.uk.cloud.360safe.com
mvconf.uk.cloud.360safe.com
aicleaner.shouji.360.cn
@ -2084,7 +2080,6 @@ union.gdtimg.com
# v2.gdt.qq.com
# win.gdt.qq.com
pgdt.gtimg.cn
pgdt.ugdtimg.com
pingma.qq.com
sngmta.qq.com
splashqqlive.gtimg.com
@ -2108,7 +2103,6 @@ wxsnsdythumb.wxs.qq.com
adsmind.gdtimg.com
adsmind.ugdtimg.com
qzs.gdtimg.com
qzs.qq.com
rmonitor.qq.com
sdk.e.qq.com
sdkconfig.video.qq.com
@ -2660,7 +2654,6 @@ biz.live.xunlei.com
ct.niu.xunlei.com
mou.niu.xunlei.com
scene.vip.xunlei.com
advertpay.vip.xunlei.com
static.m.sjzhushou.com
etl.xlmc.sandai.net
@ -2707,7 +2700,6 @@ mqtt.zhihu.com
.monsetting.toutiao.com
.mon.zijieapi.com
.ad.zijieapi.com
log.snssdk.com
toblog.ctobsnssdk.com
frontier-aweme-hl.snssdk.com
@ -2784,7 +2776,6 @@ adm.10jqka.com.cn
stat.10jqka.com.cn
# >> UC
applog.uc.cn
applog-perf.uc.cn
applog.ucdns.uc.cn
gjapplog.uc.cn
@ -3285,7 +3276,6 @@ ad.where.com
ftpcontent.worldnow.com
ads.saymedia.com
adcontent.saymedia.com
static.estebull.com
go.vrvm.com
c.vrvm.com
.phluant.com

View File

@ -163,7 +163,6 @@ nimiq.terorie.com
.cosmosjackson.com
.decoroustitle.com
.decoycreation.com
.drawservant.com
.energeticexample.com
.evanescentedge.com
.farethief.com
@ -321,7 +320,6 @@ nimiq.terorie.com
.crydconnect.com
.danceview.ru
.easycucina.net
.enjoy7plains.xyz
.exploreshops.net
.findallgainssurvey.top
.findallgainsurvey.top
@ -355,7 +353,6 @@ nimiq.terorie.com
.uservalidate.xyz
.wiki-review.net
.worldsguide.net
.zmfindyourhalf.top
.zmprofitsurvey.top
# >> Migrate from CoinBlockerLists
@ -410,7 +407,6 @@ nimiq.terorie.com
.guugll.eu
.hashgate.eu
.poolbe.eu
.mineblocks.eu
.minergalaxy.eu
.xmrpool.eu
.multicoin.eu
@ -912,7 +908,6 @@ nimiq.terorie.com
.swinemine.com
.apple-bitcoin.com
.coinminex.com
.cointaft.com
.aprilcoin.com
.virdpool.com
.arbitracoin.com
@ -1119,7 +1114,6 @@ nimiq.terorie.com
.statdynamic.com
.poolflare.com
.maxeter.com
.chain-pool.com
.coleganet.com
.iqmining.com
.chekazpools.com
@ -1256,9 +1250,7 @@ nimiq.terorie.com
.fsocietychain.com
.gainprox.com
.kattcoin.com
.lamby-crypto.com
.nikitonium.com
.pepecore.com
.pinchpool.com
.redblockcoin.com
.sevabit.com
@ -1295,7 +1287,6 @@ nimiq.terorie.com
.monxpool.com
.gpugold.com
.greenchiapool.com
.grftpool.com
.grosscrypto.com
.haopool.com
.raspi-ninja.com
@ -1308,7 +1299,6 @@ nimiq.terorie.com
.upxpool.com
.xmrminerpro.com
.stakeunited.com
.onebtcplace.com
.papoto.com
.infamylists.com
.inhive.com
@ -1749,7 +1739,6 @@ nimiq.terorie.com
.cpu-pool.net
.cryptotab.net
.notallmine.net
.datapaw.net
.minershq.net
.deltapool.net
.deepbit.net
@ -1772,7 +1761,6 @@ nimiq.terorie.com
.gay-hotvideo.net
.hashhorse.net
.nourpool.net
.imzlabs.net
.kinohabr.net
.kisshentai.net
.lyncoin.net
@ -1781,13 +1769,10 @@ nimiq.terorie.com
.monero-miner.net
.mycoinwallet.net
.nimiqtest.net
.punchsub.net
.roastedvolt.net
.smpool.net
.sweetbook.net
.vcrypt.net
.vidfile.net
.yetimining.net
.yobit.net
.serverpower.net
.haqo.net
@ -1979,7 +1964,6 @@ nimiq.terorie.com
.globalpool.org
.oddpools.org
.luckpool.org
.igrid.org
.keepool.org
.kosmoplovci.org
.kroma.org
@ -2076,7 +2060,6 @@ nimiq.terorie.com
.1ds.us
.p2poolmining.us
.multipool.us
.50centfreedom.us
.binance.us
.acmining.us
.gridcoin.us
@ -2094,11 +2077,9 @@ nimiq.terorie.com
.fungibly.xyz
.cryptopool.xyz
.ionize.xyz
.easypool.xyz
.blockcrushers.xyz
.deipool.xyz
.cojin.xyz
.arhash.xyz
.p2p-spb.xyz
.pecadol.xyz
.303365.xyz
@ -2118,7 +2099,6 @@ nimiq.terorie.com
.etcoin.xyz
.avero.xyz
.bitcoin-rebooted.xyz
.enviromint.xyz
.blockify.xyz
.newpool.xyz
.capung.xyz
@ -2127,7 +2107,6 @@ nimiq.terorie.com
.crpool.xyz
.cryptominers.xyz
.cryptopine.xyz
.cyg2016.xyz
.xazab.xyz
.elphyrecoin.xyz
.flyhash.xyz
@ -2322,7 +2301,6 @@ nimiq.terorie.com
.suprnova.cc
.trustpool.cc
.reactor.cc
.anomp.cc
.dpool.cc
.minero.cc
.smartcash.cc
@ -2372,7 +2350,6 @@ nimiq.terorie.com
.quickpool.tech
.sia.tech
.freecontent.date
.cryptopool.space
.coinminer.space
.hashing.space
.goodzen.space
@ -2385,7 +2362,6 @@ nimiq.terorie.com
.infinium.space
.mineradnow.space
.ukkey3.space
.pool.space
.blocx.space
.cryptomorons.space
.blockhunters.space
@ -2459,7 +2435,6 @@ nimiq.terorie.com
.vectorium.co
.azakus.co
.bitconnect.co
.bitconnectpool.co
.onepool.co
.hashcoin.co
.okcash.co
@ -2725,12 +2700,7 @@ nimiq.terorie.com
.hash.green
.btc.to
.hashrate.to
.aurum7.to
.automatix.to
.xmr.to
.coolmine.to
.coolpool.to
.dpool.to
.estream.to
.foxx.to
.planet.to
@ -2739,12 +2709,8 @@ nimiq.terorie.com
.miningpool.to
.piratebay.to
.rig.to
.speedpool.to
.streamplay.to
.sunpool.to
.template-download.to
.more.to
.cfcnet.to
.darkco.in
.10xbitco.in
.freico.in
@ -2885,7 +2851,6 @@ nimiq.terorie.com
.zona.pl
.worldcoin.global
.fireants.global
.pool.do
.datasecu.download
.jqwww.download
.mine.bz
@ -3132,7 +3097,6 @@ kingsminer.ddnsking.com
.pepperscorecoin.wixsite.com
.plugin.brfiles.com
.pool.4i7i.com
.pool.bit34.com
.pool.groupfabric.com
.pool.paprikaex.com
.pool.stalwartbucks.com
@ -3435,7 +3399,6 @@ d3iz6lralvg77g.cloudfront.net
.evolution-project.go.ro
.max.csrss.website
.monero.us.to
.eos.zhizhu.to
.a45.bulehero.in
.a46.bulehero.in
.a88.bulehero.in