diff --git a/Build/build-reject-domainset.js b/Build/build-reject-domainset.js index a7c91db9..2dae8b80 100644 --- a/Build/build-reject-domainset.js +++ b/Build/build-reject-domainset.js @@ -3,11 +3,13 @@ const { promises: fsPromises } = require('fs'); const { resolve: pathResolve } = require('path'); let cliProgress; +let Piscina; try { + Piscina = require('piscina'); cliProgress = require('cli-progress'); } catch (e) { console.log('Dependencies not found'); - console.log('"npm i cli-progress" then try again!'); + console.log('"npm i cli-progress piscina" then try again!'); console.error(e); process.exit(1); @@ -54,7 +56,11 @@ async function processFilterRules(filterRulesUrl) { } /** @type Set */ - const whitelistDomainSets = new Set(['localhost', 'analytics.google.com']); + const whitelistDomainSets = new Set([ + 'localhost', + 'analytics.google.com', + 'msa.cdn.mediaset.net' // Added manually using DOMAIN-KEYWORDS + ]); /** @type Set */ const blacklistDomainSets = new Set(); @@ -75,10 +81,28 @@ async function processFilterRules(filterRulesUrl) { return; } - if (line.startsWith('@@||') && line.endsWith('^')) { - whitelistDomainSets.add(`${line.replaceAll('@@||', '').replaceAll('^', '')}`.trim()); - } else if (line.startsWith('||') && line.endsWith('^')) { - blacklistDomainSets.add(`${line.replaceAll('||', '').replaceAll('^', '')}`.trim()); + if (line.startsWith('@@||') + && ( + line.endsWith('^') + || line.endsWith('^|') + ) + ) { + whitelistDomainSets.add(`${line.replaceAll('@@||', '').replaceAll('^|', '').replaceAll('^', '')}`.trim()); + } else if ( + line.startsWith('||') + && ( + line.endsWith('^') + || line.endsWith('^|') + ) + ) { + blacklistDomainSets.add(`.${line.replaceAll('||', '').replaceAll('^|', '').replaceAll('^', '')}`.trim()); + } else if (line.startsWith('://') + && ( + line.endsWith('^') + || line.endsWith('^|') + ) + ) { + blacklistDomainSets.add(`${line.replaceAll('://', '').replaceAll('^|', '').replaceAll('^', '')}`.trim()); } }); @@ -132,8 +156,6 @@ async function processFilterRules(filterRulesUrl) { // Parse from AdGuard Filters /** @type Set */ const filterRuleWhitelistDomainSets = new Set(); - /** @type Set */ - const filterRuleBlacklistDomainSets = new Set(); (await Promise.all([ processFilterRules('https://easylist.to/easylist/easylist.txt'), processFilterRules('https://adguardteam.github.io/AdGuardSDNSFilter/Filters/filter.txt'), @@ -143,15 +165,10 @@ async function processFilterRules(filterRulesUrl) { processFilterRules('https://raw.githubusercontent.com/AdguardTeam/FiltersRegistry/master/filters/filter_224_Chinese/filter.txt') ])).forEach(({ white, black }) => { white.forEach(i => filterRuleWhitelistDomainSets.add(i)); - black.forEach(i => filterRuleBlacklistDomainSets.add(i)); + black.forEach(i => domainSets.add(i)); }); - for (const black of filterRuleBlacklistDomainSets) { - domainSets.add(`.${black}`); - } - - console.log(`Import ${filterRuleBlacklistDomainSets.size} black rules from adguard filters!`); - console.log(`Import ${filterRuleWhitelistDomainSets.size} white rules from adguard filters!`); + console.log(`Import rules from adguard filters!`); // Read DOMAIN Keyword const domainKeywordsSet = new Set(); @@ -168,55 +185,29 @@ async function processFilterRules(filterRulesUrl) { // Dedupe domainSets console.log(`Start deduping!`); - const bar2 = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic); - bar2.start(domainSets.size, 0); + const piscina = new Piscina({ + filename: pathResolve(__dirname, 'worker/build-reject-domainset-worker.js') + }); - for (const domain of domainSets) { - bar2.increment(); + const res = await Promise.all([ + piscina.run({ keywords: domainKeywordsSet, input: domainSets }, { name: 'dedupeKeywords' }), + piscina.run({ whiteList: filterRuleWhitelistDomainSets, input: domainSets }, { name: 'whitelisted' }), + ...sliceIntoChunks(Array.from(domainSets), 5000).map(chunk => piscina.run({ input: chunk, fullSet: domainSets }, { name: 'dedupe' })) + ]); - let shouldContinue = false; - - for (const white of filterRuleWhitelistDomainSets) { - if (domain.includes(white) || white.includes(domain)) { - domainSets.delete(domain); - shouldContinue = true; - break; - } - } - - if (shouldContinue) { - continue; - } - - for (const keyword of domainKeywordsSet) { - if (domain.includes(keyword) || keyword.includes(domain)) { - domainSets.delete(domain); - shouldContinue = true; - break; - } - } - - if (shouldContinue) { - continue; - } - - for (const domain2 of domainSets) { - if ( - domain2.startsWith('.') - && domain2 !== domain - && ( - domain.endsWith(domain2) - || `.${domain}` === domain2 - ) - ) { - domainSets.delete(domain); - break; - } - } - } - - bar2.stop(); + res.forEach(set => { + set.forEach(i => domainSets.delete(i)); + }); return fsPromises.writeFile(pathResolve(__dirname, '../List/domainset/reject.conf'), `${[...domainSets].join('\n')}\n`); })(); + +function sliceIntoChunks(arr, chunkSize) { + const res = []; + for (let i = 0; i < arr.length; i += chunkSize) { + const chunk = arr.slice(i, i + chunkSize); + res.push(chunk); + } + return res; +} diff --git a/Build/worker/build-reject-domainset-worker.js b/Build/worker/build-reject-domainset-worker.js new file mode 100644 index 00000000..a6ed809d --- /dev/null +++ b/Build/worker/build-reject-domainset-worker.js @@ -0,0 +1,51 @@ +exports.dedupe = ({ fullSet, input }) => { + const output = new Set(); + + for (const domain of input) { + for (const domain2 of fullSet) { + if ( + domain2.startsWith('.') + && domain2 !== domain + && ( + domain.endsWith(domain2) + || `.${domain}` === domain2 + ) + ) { + output.add(domain); + break; + } + } + } + + return output; +}; + +exports.whitelisted = ({ whiteList, input }) => { + const output = new Set(); + + for (const domain of input) { + for (const white of whiteList) { + if (domain.includes(white) || white.includes(domain)) { + output.add(domain); + break; + } + } + } + + return output; +}; + +exports.dedupeKeywords = ({ keywords, input }) => { + const output = new Set(); + + for (const domain of input) { + for (const keyword of keywords) { + if (domain.includes(keyword) || keyword.includes(domain)) { + output.add(domain); + break; + } + } + } + + return output; +} diff --git a/List/domainset/reject.conf b/List/domainset/reject.conf index 374f4e67..92268dff 100644 --- a/List/domainset/reject.conf +++ b/List/domainset/reject.conf @@ -12,7 +12,6 @@ .24log.de .24pm-affiliation.com .2mdn.net -.2o7.net .2znp09oa.com .30ads.com .3337723.com @@ -262,7 +261,6 @@ .adjug.com .adjuggler.com .adjuggler.yourdictionary.com -.adjust.com .adjustnetwork.com .adk2.co .adk2.com @@ -889,7 +887,6 @@ .ambiguousquilt.com .ambitiousagreement.com .americash.com -.amplitude.com .amung.us .analytics-production.hapyak.com .analytics.adpost.org @@ -935,7 +932,6 @@ .apolloprogram.io .app-analytics.snapchat.com .appboycdn.com -.appsflyer.com .aps.hearstnp.com .apsalar.com .apture.com @@ -981,7 +977,6 @@ .awakebird.com .awempire.com .awesomeagreement.com -.awin1.com .awstrack.me .awzbijw.com .axiomaticalley.com @@ -1397,7 +1392,6 @@ .crsspxl.com .crta.dailymail.co.uk .crtv.mate1.com -.crwdcntrl.net .crypto-loot.org .ctnetwork.hu .cubics.com @@ -1490,7 +1484,6 @@ .dotmetrics.net .doubleclick.com .doubleclick.de -.doubleclick.net .doublepimp.com .doubleverify.com .doubtfulrainstorm.com @@ -1786,7 +1779,6 @@ .harrenmedia.com .harrenmedianetwork.com .havamedia.net -.hb.afl.rakuten.co.jp .hbb.afl.rakuten.co.jp .hdscout.com .heap.com @@ -1959,7 +1951,6 @@ .jaizouji.com .jauchuwa.net .jcount.com -.jdoqocy.com .jinkads.de .joetec.net .js-agent.newrelic.com @@ -2209,7 +2200,6 @@ .mlsend.com .mltrk.io .mmismm.com -.mmstat.com .mmtro.com .moartraffic.com .moat.com @@ -2363,7 +2353,6 @@ .onenetworkdirect.com .onestat.com .onestatfree.com -.online-metrix.net .online.miarroba.com .onlinecash.com .onlinecashmethod.com @@ -2382,7 +2371,6 @@ .openx.angelsgroup.org.uk .openx.cairo360.com .openx.kgmedia.eu -.openx.net .openx.skinet.cz .openx.smcaen.fr .openx2.kytary.cz @@ -2933,7 +2921,6 @@ .stat.zenon.net .stat24.com .stat24.meta.ua -.statcounter.com .statdynamic.com .staticads.btopenworld.com .statistik-gallup.net @@ -3041,7 +3028,6 @@ .tealeaf.com .tealium.cbsnews.com .tealium.com -.tealiumiq.com .tedioustooth.com .teenrevenue.com .teenyvolcano.com @@ -3170,7 +3156,6 @@ .trackmysales.com .trackuhub.com .tradeadexchange.com -.tradedoubler.com .trading-rtbg.com .traffic-exchange.com .traffic-redirecting.com @@ -3998,7 +3983,6 @@ cache.soloth.com c-adash.m.taobao.com cal.meizu.com calopenupdate.comm.miui.com -cas.pxl.ace.advertising.com catalog.video.msn.com c.baidu.com cb.alimama.cn @@ -4037,7 +4021,6 @@ clicklog.moviebox.baofeng.net click.mz.simba.taobao.com click.qianqian.com clicks.beap.bc.yahoo.com -click.simba.taobao.com click.tianyaui.com click.tz.simba.taobao.com click.uve.mobile.sina.cn @@ -4619,7 +4602,6 @@ mazu.3g.qq.com m.bailingjiankang.com m.breezily168.com mb.yidianzixun.com -mclick.simba.taobao.com m.clkservice.youdao.com m.clubske.com mcore.vcgame.cn @@ -4836,7 +4818,6 @@ pindao.huoban.taobao.com ping.acc.sogou.com pinghot.qq.com ping.pinyin.sogou.com -pixel.advertising.com pix.impdesk.com pixiu.shangshufang.ksosoft.com plat.videojj.com @@ -4863,7 +4844,6 @@ psfq.gou.sogou.com ps.haohaowan8.com p.store.qq.com publish-pic-cpu.baidu.com -pub.pxl.ace.advertising.com pub.se.360.cn puds.test.uae.uc.cn pull.push.sogou.com @@ -4948,7 +4928,6 @@ rs.haohaowan8.com rtas.videocc.net rt.funshion.net rtlog.vidown.cn -rub.pxl.ace.advertising.com s.051352.com s.0594529.com s1.cmfu.com @@ -5638,18 +5617,6 @@ iotapi.adups.com ph5t.adups.com push5.adups.com f.aduwant.com -pxl.ace.advertising.com -ace-tag.advertising.com -cmp.advertising.com -cdn.cmp.advertising.com -o.cmp.advertising.com -sp.advertising.com -tag.sp.advertising.com -ssp.advertising.com -prod-m-node-1111.ssp.advertising.com -prod-m-node-1211.ssp.advertising.com -prod-m-node-2111.ssp.advertising.com -prod-m-node-3111.ssp.advertising.com adview.pl d.adx.io s.adx.io @@ -6720,7 +6687,6 @@ cdn.madkudu.com track.mads.ph acc.magixite.com api.magmamobile.com -ad.mail.ru assets.mantisadnetwork.com ecs.mantisadnetwork.com resize.mantisadnetwork.com @@ -14744,7 +14710,6 @@ file.apicvn.com .imaarracon.co .imageadvantage.net .imagineposition.com -.imasdk.googleapis.com .imbetan.info .imcpqxmtlapagng.com .imediarevenue.com @@ -22925,7 +22890,6 @@ file.apicvn.com .ad.outsidehub.com .ad.reklamport.com .ad.smartmediarep.com -.adaptv.advertising.com .ads-sg.tiktok.com .ads.audio.thisisdax.com .ads.sportradar.com @@ -23405,6 +23369,10 @@ file.apicvn.com .x0r.urlgalleries.net .yotta.scrolller.com .ytre9jk.txxx.com +.arkfacialdaybreak.com +.ligninenchant.com +.ouphoarg.com +.hearthatdistant.top .aggingleag.one .oweqas.xyz .gyrivehmic.com @@ -26370,7 +26338,6 @@ file.apicvn.com .adready.com .adreadytractions.com .adrecord.com -.adrelayer.com .adresellers.com .adrise.de .adro.co @@ -26680,7 +26647,6 @@ file.apicvn.com .cuelinks.com .currentlyobsessed.me .cybmas.com -.dable.io .datawrkz.com .dating-service.net .datinggold.com @@ -26699,7 +26665,6 @@ file.apicvn.com .dochase.com .dollarsponsor.com .domainadvertising.com -.dotomi.com .dreamaquarium.com .dt00.net .dt07.net @@ -27226,6 +27191,7 @@ file.apicvn.com .sndkorea.co.kr .sni.ps .snigelweb.com +.snowads.net .so-excited.com .soagitet.net .soalouve.com @@ -27305,7 +27271,6 @@ file.apicvn.com .tapnative.com .tardangro.com .targeterra.info -.targetspot.com .tattomedia.com .tbaffiliate.com .tcadops.ca @@ -27519,7 +27484,6 @@ file.apicvn.com .api.ads.watchabc.go.com .ad.prismamediadigital.com .socdm.com -.appodeal.com .a.320981a9244924ef86ebdbb9eb877e9f21ce83f1e3cc89b2c5e7c3ff.com .a.76674bdad304297eda3d325f449f6f49.com .geoloc.yospace.com @@ -27600,6 +27564,7 @@ file.apicvn.com .restartad.com .appgiftwall.oss-us-west-1.aliyuncs.com .appgift.sinaapp.com +.ad.mail.ru .servedby.o2.co.uk .amillionads.com .mcoreads.com @@ -28015,6 +27980,7 @@ file.apicvn.com .n152adserv.com .rg.yottos.com .weboohost.com +.mon.snssdk.com .log-hl.snssdk.com .ib.snssdk.com .xlog.snssdk.com @@ -28118,7 +28084,6 @@ file.apicvn.com .kissmetrics.io .counter.kikibobo.top .c1.politexpert.ru -.logrocket.com .ipmeta.io .datasign.co .analytics.daasrv.net @@ -28702,7 +28667,6 @@ file.apicvn.com .pfx.sma-clsystem.info .tangankanan.net .topiz.ru -.hit.interia.pl .socialhoney.co .trk.olnl.net .ustat.pro @@ -28744,7 +28708,6 @@ file.apicvn.com .plugin.management .back.marketing .nsaudience.pl -.rutarget.ru .adx.com.ru .visitor-analytics.io .giraff.io @@ -29511,7 +29474,6 @@ file.apicvn.com .gallupnet.fi .gbotvisit.com .geistm.com -.gemius.pl .gemtrackers.com .generaltracking.de .genieesspv.jp @@ -29811,7 +29773,6 @@ file.apicvn.com .logaholic.com .logcounter.com .logdy.com -.logentries.com .loger.ru .lognormal.net .logxp.ru @@ -30073,8 +30034,6 @@ file.apicvn.com .pclicks.com .pcspeedup.com .peakcounter.dk -.peerius.com -.perfdrive.com .performax.cz .performtracking.com .perion.com @@ -30329,7 +30288,6 @@ file.apicvn.com .shrinktheweb.com .siftscience.com .signup-way.com -.silverpop.com .silverpush.co .simplereach.com .simplycast.us @@ -31435,6 +31393,8 @@ file.apicvn.com .um-public-panel-prod.s3.amazonaws.com .collection-endpoint-prod.herokuapp.com .collection-endpoint-staging.herokuapp.com +.log.appdriver.jp +.gtm.udemy.com .t.clic2buy.com .blue.fox8.com .astat.nikkei.co.jp @@ -32623,7 +32583,6 @@ file.apicvn.com .analytics.archive.org .analytics.artirix.com .analytics.aweber.com -.analytics.edgekey.net .analytics.edgesuite.net .analytics.electro-com.ru .analytics.fairfax.com.au @@ -33013,6 +32972,14 @@ file.apicvn.com .int.vlancaa.site .int.vlancaa.fun .tok.vaicore.xyz +.aff-report.lbesecapi.com +.aff-policy.lbesecapi.com +.psi.lbesecapi.com +.report-ps.meettech.net +.f-log-mobile.grammarly.io +.f-log-mobile-ios.grammarly.io +.ada-tr.staff-start.com +.analytics.liveact-vault.com .api.statsig.com .logs-01.loggly.com .analytics.300624.com @@ -33255,7 +33222,6 @@ file.apicvn.com .api.crittercism.com .api.gimbal.com .api.sec.miui.com -.appmetrica.yandex.com .beacon.carfax.com .beacon.shazam.com .beha.ksmobile.com @@ -33602,7 +33568,6 @@ file.apicvn.com .geotargetly-1a441.appspot.com .geotargetly.co .getambassador.com -.getblueshift.com .getpublica.com .getviously.com .gglscr.online @@ -33752,7 +33717,6 @@ file.apicvn.com .mynsystems.com .mytrack.pro .n74s9.com -.nakanohito.jp .natero.com .ndf81.com .nejmqianyan.cn @@ -33886,7 +33850,6 @@ file.apicvn.com .segmetrics.io .selectmedia.asia .sellebrity.com -.sellpoint.net .semantic-finder.com .seon.io .seondnsresolve.com @@ -33973,7 +33936,6 @@ file.apicvn.com .trackonomics.net .traffer.net .trafficfuel.com -.trafficguard.ai .trafficroots.com .transactionale.com .traq.li @@ -34770,7 +34732,6 @@ file.apicvn.com .events.split.io .events.yourcx.io .eventtracker.videostrip.com -.exacttarget.com .fcmatch.google.com .fcmatch.youtube.com .filament-stats.herokuapp.com @@ -35383,7 +35344,6 @@ file.apicvn.com .adplan7.com .adpon.jp .af-z.jp -.afi-b.com .aid-ad.jp .amoad.com .analyticsip.net @@ -35394,7 +35354,6 @@ file.apicvn.com .d2-apps.net .deteql.net .e-click.jp -.ebis.ne.jp .ec-concier.com .ec-optimizer.com .eltex.co.jp @@ -35405,7 +35364,6 @@ file.apicvn.com .gmossp-sp.jp .gsspcln.jp .gyro-n.com -.i2i.jp .iid-network.jp .iogous.com .kaizenplatform.net @@ -36011,7 +35969,6 @@ file.apicvn.com .hzucji.com .0iqiehoa35.ru .inforuss.site -.ouphoarg.com .dfffpyu8fhawcnd.ru .dfnetwork.link .fodorw.com @@ -37187,7 +37144,6 @@ file.apicvn.com .adward.ru .adwidgets.ru .adwile.net -.adwolf.ru .adxgeeks.com .adylalahb.ru .aerontre.com @@ -40527,7 +40483,6 @@ file.apicvn.com .aff.jskyservices.com .a-i-ad.com .managedweb.net -.thench.net .bannerfarm.aolp.jp .koukoku.red .amaprop.net @@ -42356,6 +42311,7 @@ file.apicvn.com .wfgo5vjs6cp0cdjx7bms3cn9lof2p01xho0pxgm1fd2fah3j5d8qd7x1d6eu.me .govguangxi.cn .w8sg6.xyz +.98765432100.site .00-gov.cn .000123456789.site .0024aaaa.com diff --git a/List/non_ip/reject.conf b/List/non_ip/reject.conf index b4b8f854..6f581072 100644 --- a/List/non_ip/reject.conf +++ b/List/non_ip/reject.conf @@ -34,6 +34,8 @@ DOMAIN-KEYWORD,adsyndication DOMAIN-KEYWORD,bahoom,REJECT DOMAIN,daisydiskapp.com,REJECT +AND, ((DOMAIN-SUFFIX,msa.cdn.mediaset.net), (DOMAIN-KEYWORD,adv0)) + # >> Tencent Lemon PROCESS-NAME,Tencent Lemon,REJECT