Update Rules & Tools

This commit is contained in:
SukkaW 2021-11-23 02:19:48 +08:00
parent 369dc87e92
commit 936adaf2c6
4 changed files with 123 additions and 123 deletions

View File

@ -3,11 +3,13 @@ const { promises: fsPromises } = require('fs');
const { resolve: pathResolve } = require('path');
let cliProgress;
let Piscina;
try {
Piscina = require('piscina');
cliProgress = require('cli-progress');
} catch (e) {
console.log('Dependencies not found');
console.log('"npm i cli-progress" then try again!');
console.log('"npm i cli-progress piscina" then try again!');
console.error(e);
process.exit(1);
@ -54,7 +56,11 @@ async function processFilterRules(filterRulesUrl) {
}
/** @type Set<string> */
const whitelistDomainSets = new Set(['localhost', 'analytics.google.com']);
const whitelistDomainSets = new Set([
'localhost',
'analytics.google.com',
'msa.cdn.mediaset.net' // Added manually using DOMAIN-KEYWORDS
]);
/** @type Set<string> */
const blacklistDomainSets = new Set();
@ -75,10 +81,28 @@ async function processFilterRules(filterRulesUrl) {
return;
}
if (line.startsWith('@@||') && line.endsWith('^')) {
whitelistDomainSets.add(`${line.replaceAll('@@||', '').replaceAll('^', '')}`.trim());
} else if (line.startsWith('||') && line.endsWith('^')) {
blacklistDomainSets.add(`${line.replaceAll('||', '').replaceAll('^', '')}`.trim());
if (line.startsWith('@@||')
&& (
line.endsWith('^')
|| line.endsWith('^|')
)
) {
whitelistDomainSets.add(`${line.replaceAll('@@||', '').replaceAll('^|', '').replaceAll('^', '')}`.trim());
} else if (
line.startsWith('||')
&& (
line.endsWith('^')
|| line.endsWith('^|')
)
) {
blacklistDomainSets.add(`.${line.replaceAll('||', '').replaceAll('^|', '').replaceAll('^', '')}`.trim());
} else if (line.startsWith('://')
&& (
line.endsWith('^')
|| line.endsWith('^|')
)
) {
blacklistDomainSets.add(`${line.replaceAll('://', '').replaceAll('^|', '').replaceAll('^', '')}`.trim());
}
});
@ -132,8 +156,6 @@ async function processFilterRules(filterRulesUrl) {
// Parse from AdGuard Filters
/** @type Set<string> */
const filterRuleWhitelistDomainSets = new Set();
/** @type Set<string> */
const filterRuleBlacklistDomainSets = new Set();
(await Promise.all([
processFilterRules('https://easylist.to/easylist/easylist.txt'),
processFilterRules('https://adguardteam.github.io/AdGuardSDNSFilter/Filters/filter.txt'),
@ -143,15 +165,10 @@ async function processFilterRules(filterRulesUrl) {
processFilterRules('https://raw.githubusercontent.com/AdguardTeam/FiltersRegistry/master/filters/filter_224_Chinese/filter.txt')
])).forEach(({ white, black }) => {
white.forEach(i => filterRuleWhitelistDomainSets.add(i));
black.forEach(i => filterRuleBlacklistDomainSets.add(i));
black.forEach(i => domainSets.add(i));
});
for (const black of filterRuleBlacklistDomainSets) {
domainSets.add(`.${black}`);
}
console.log(`Import ${filterRuleBlacklistDomainSets.size} black rules from adguard filters!`);
console.log(`Import ${filterRuleWhitelistDomainSets.size} white rules from adguard filters!`);
console.log(`Import rules from adguard filters!`);
// Read DOMAIN Keyword
const domainKeywordsSet = new Set();
@ -168,55 +185,29 @@ async function processFilterRules(filterRulesUrl) {
// Dedupe domainSets
console.log(`Start deduping!`);
const bar2 = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
bar2.start(domainSets.size, 0);
const piscina = new Piscina({
filename: pathResolve(__dirname, 'worker/build-reject-domainset-worker.js')
});
for (const domain of domainSets) {
bar2.increment();
const res = await Promise.all([
piscina.run({ keywords: domainKeywordsSet, input: domainSets }, { name: 'dedupeKeywords' }),
piscina.run({ whiteList: filterRuleWhitelistDomainSets, input: domainSets }, { name: 'whitelisted' }),
...sliceIntoChunks(Array.from(domainSets), 5000).map(chunk => piscina.run({ input: chunk, fullSet: domainSets }, { name: 'dedupe' }))
]);
let shouldContinue = false;
for (const white of filterRuleWhitelistDomainSets) {
if (domain.includes(white) || white.includes(domain)) {
domainSets.delete(domain);
shouldContinue = true;
break;
}
}
if (shouldContinue) {
continue;
}
for (const keyword of domainKeywordsSet) {
if (domain.includes(keyword) || keyword.includes(domain)) {
domainSets.delete(domain);
shouldContinue = true;
break;
}
}
if (shouldContinue) {
continue;
}
for (const domain2 of domainSets) {
if (
domain2.startsWith('.')
&& domain2 !== domain
&& (
domain.endsWith(domain2)
|| `.${domain}` === domain2
)
) {
domainSets.delete(domain);
break;
}
}
}
bar2.stop();
res.forEach(set => {
set.forEach(i => domainSets.delete(i));
});
return fsPromises.writeFile(pathResolve(__dirname, '../List/domainset/reject.conf'), `${[...domainSets].join('\n')}\n`);
})();
function sliceIntoChunks(arr, chunkSize) {
const res = [];
for (let i = 0; i < arr.length; i += chunkSize) {
const chunk = arr.slice(i, i + chunkSize);
res.push(chunk);
}
return res;
}

View File

@ -0,0 +1,51 @@
exports.dedupe = ({ fullSet, input }) => {
const output = new Set();
for (const domain of input) {
for (const domain2 of fullSet) {
if (
domain2.startsWith('.')
&& domain2 !== domain
&& (
domain.endsWith(domain2)
|| `.${domain}` === domain2
)
) {
output.add(domain);
break;
}
}
}
return output;
};
exports.whitelisted = ({ whiteList, input }) => {
const output = new Set();
for (const domain of input) {
for (const white of whiteList) {
if (domain.includes(white) || white.includes(domain)) {
output.add(domain);
break;
}
}
}
return output;
};
exports.dedupeKeywords = ({ keywords, input }) => {
const output = new Set();
for (const domain of input) {
for (const keyword of keywords) {
if (domain.includes(keyword) || keyword.includes(domain)) {
output.add(domain);
break;
}
}
}
return output;
}

View File

@ -12,7 +12,6 @@
.24log.de
.24pm-affiliation.com
.2mdn.net
.2o7.net
.2znp09oa.com
.30ads.com
.3337723.com
@ -262,7 +261,6 @@
.adjug.com
.adjuggler.com
.adjuggler.yourdictionary.com
.adjust.com
.adjustnetwork.com
.adk2.co
.adk2.com
@ -889,7 +887,6 @@
.ambiguousquilt.com
.ambitiousagreement.com
.americash.com
.amplitude.com
.amung.us
.analytics-production.hapyak.com
.analytics.adpost.org
@ -935,7 +932,6 @@
.apolloprogram.io
.app-analytics.snapchat.com
.appboycdn.com
.appsflyer.com
.aps.hearstnp.com
.apsalar.com
.apture.com
@ -981,7 +977,6 @@
.awakebird.com
.awempire.com
.awesomeagreement.com
.awin1.com
.awstrack.me
.awzbijw.com
.axiomaticalley.com
@ -1397,7 +1392,6 @@
.crsspxl.com
.crta.dailymail.co.uk
.crtv.mate1.com
.crwdcntrl.net
.crypto-loot.org
.ctnetwork.hu
.cubics.com
@ -1490,7 +1484,6 @@
.dotmetrics.net
.doubleclick.com
.doubleclick.de
.doubleclick.net
.doublepimp.com
.doubleverify.com
.doubtfulrainstorm.com
@ -1786,7 +1779,6 @@
.harrenmedia.com
.harrenmedianetwork.com
.havamedia.net
.hb.afl.rakuten.co.jp
.hbb.afl.rakuten.co.jp
.hdscout.com
.heap.com
@ -1959,7 +1951,6 @@
.jaizouji.com
.jauchuwa.net
.jcount.com
.jdoqocy.com
.jinkads.de
.joetec.net
.js-agent.newrelic.com
@ -2209,7 +2200,6 @@
.mlsend.com
.mltrk.io
.mmismm.com
.mmstat.com
.mmtro.com
.moartraffic.com
.moat.com
@ -2363,7 +2353,6 @@
.onenetworkdirect.com
.onestat.com
.onestatfree.com
.online-metrix.net
.online.miarroba.com
.onlinecash.com
.onlinecashmethod.com
@ -2382,7 +2371,6 @@
.openx.angelsgroup.org.uk
.openx.cairo360.com
.openx.kgmedia.eu
.openx.net
.openx.skinet.cz
.openx.smcaen.fr
.openx2.kytary.cz
@ -2933,7 +2921,6 @@
.stat.zenon.net
.stat24.com
.stat24.meta.ua
.statcounter.com
.statdynamic.com
.staticads.btopenworld.com
.statistik-gallup.net
@ -3041,7 +3028,6 @@
.tealeaf.com
.tealium.cbsnews.com
.tealium.com
.tealiumiq.com
.tedioustooth.com
.teenrevenue.com
.teenyvolcano.com
@ -3170,7 +3156,6 @@
.trackmysales.com
.trackuhub.com
.tradeadexchange.com
.tradedoubler.com
.trading-rtbg.com
.traffic-exchange.com
.traffic-redirecting.com
@ -3998,7 +3983,6 @@ cache.soloth.com
c-adash.m.taobao.com
cal.meizu.com
calopenupdate.comm.miui.com
cas.pxl.ace.advertising.com
catalog.video.msn.com
c.baidu.com
cb.alimama.cn
@ -4037,7 +4021,6 @@ clicklog.moviebox.baofeng.net
click.mz.simba.taobao.com
click.qianqian.com
clicks.beap.bc.yahoo.com
click.simba.taobao.com
click.tianyaui.com
click.tz.simba.taobao.com
click.uve.mobile.sina.cn
@ -4619,7 +4602,6 @@ mazu.3g.qq.com
m.bailingjiankang.com
m.breezily168.com
mb.yidianzixun.com
mclick.simba.taobao.com
m.clkservice.youdao.com
m.clubske.com
mcore.vcgame.cn
@ -4836,7 +4818,6 @@ pindao.huoban.taobao.com
ping.acc.sogou.com
pinghot.qq.com
ping.pinyin.sogou.com
pixel.advertising.com
pix.impdesk.com
pixiu.shangshufang.ksosoft.com
plat.videojj.com
@ -4863,7 +4844,6 @@ psfq.gou.sogou.com
ps.haohaowan8.com
p.store.qq.com
publish-pic-cpu.baidu.com
pub.pxl.ace.advertising.com
pub.se.360.cn
puds.test.uae.uc.cn
pull.push.sogou.com
@ -4948,7 +4928,6 @@ rs.haohaowan8.com
rtas.videocc.net
rt.funshion.net
rtlog.vidown.cn
rub.pxl.ace.advertising.com
s.051352.com
s.0594529.com
s1.cmfu.com
@ -5638,18 +5617,6 @@ iotapi.adups.com
ph5t.adups.com
push5.adups.com
f.aduwant.com
pxl.ace.advertising.com
ace-tag.advertising.com
cmp.advertising.com
cdn.cmp.advertising.com
o.cmp.advertising.com
sp.advertising.com
tag.sp.advertising.com
ssp.advertising.com
prod-m-node-1111.ssp.advertising.com
prod-m-node-1211.ssp.advertising.com
prod-m-node-2111.ssp.advertising.com
prod-m-node-3111.ssp.advertising.com
adview.pl
d.adx.io
s.adx.io
@ -6720,7 +6687,6 @@ cdn.madkudu.com
track.mads.ph
acc.magixite.com
api.magmamobile.com
ad.mail.ru
assets.mantisadnetwork.com
ecs.mantisadnetwork.com
resize.mantisadnetwork.com
@ -14744,7 +14710,6 @@ file.apicvn.com
.imaarracon.co
.imageadvantage.net
.imagineposition.com
.imasdk.googleapis.com
.imbetan.info
.imcpqxmtlapagng.com
.imediarevenue.com
@ -22925,7 +22890,6 @@ file.apicvn.com
.ad.outsidehub.com
.ad.reklamport.com
.ad.smartmediarep.com
.adaptv.advertising.com
.ads-sg.tiktok.com
.ads.audio.thisisdax.com
.ads.sportradar.com
@ -23405,6 +23369,10 @@ file.apicvn.com
.x0r.urlgalleries.net
.yotta.scrolller.com
.ytre9jk.txxx.com
.arkfacialdaybreak.com
.ligninenchant.com
.ouphoarg.com
.hearthatdistant.top
.aggingleag.one
.oweqas.xyz
.gyrivehmic.com
@ -26370,7 +26338,6 @@ file.apicvn.com
.adready.com
.adreadytractions.com
.adrecord.com
.adrelayer.com
.adresellers.com
.adrise.de
.adro.co
@ -26680,7 +26647,6 @@ file.apicvn.com
.cuelinks.com
.currentlyobsessed.me
.cybmas.com
.dable.io
.datawrkz.com
.dating-service.net
.datinggold.com
@ -26699,7 +26665,6 @@ file.apicvn.com
.dochase.com
.dollarsponsor.com
.domainadvertising.com
.dotomi.com
.dreamaquarium.com
.dt00.net
.dt07.net
@ -27226,6 +27191,7 @@ file.apicvn.com
.sndkorea.co.kr
.sni.ps
.snigelweb.com
.snowads.net
.so-excited.com
.soagitet.net
.soalouve.com
@ -27305,7 +27271,6 @@ file.apicvn.com
.tapnative.com
.tardangro.com
.targeterra.info
.targetspot.com
.tattomedia.com
.tbaffiliate.com
.tcadops.ca
@ -27519,7 +27484,6 @@ file.apicvn.com
.api.ads.watchabc.go.com
.ad.prismamediadigital.com
.socdm.com
.appodeal.com
.a.320981a9244924ef86ebdbb9eb877e9f21ce83f1e3cc89b2c5e7c3ff.com
.a.76674bdad304297eda3d325f449f6f49.com
.geoloc.yospace.com
@ -27600,6 +27564,7 @@ file.apicvn.com
.restartad.com
.appgiftwall.oss-us-west-1.aliyuncs.com
.appgift.sinaapp.com
.ad.mail.ru
.servedby.o2.co.uk
.amillionads.com
.mcoreads.com
@ -28015,6 +27980,7 @@ file.apicvn.com
.n152adserv.com
.rg.yottos.com
.weboohost.com
.mon.snssdk.com
.log-hl.snssdk.com
.ib.snssdk.com
.xlog.snssdk.com
@ -28118,7 +28084,6 @@ file.apicvn.com
.kissmetrics.io
.counter.kikibobo.top
.c1.politexpert.ru
.logrocket.com
.ipmeta.io
.datasign.co
.analytics.daasrv.net
@ -28702,7 +28667,6 @@ file.apicvn.com
.pfx.sma-clsystem.info
.tangankanan.net
.topiz.ru
.hit.interia.pl
.socialhoney.co
.trk.olnl.net
.ustat.pro
@ -28744,7 +28708,6 @@ file.apicvn.com
.plugin.management
.back.marketing
.nsaudience.pl
.rutarget.ru
.adx.com.ru
.visitor-analytics.io
.giraff.io
@ -29511,7 +29474,6 @@ file.apicvn.com
.gallupnet.fi
.gbotvisit.com
.geistm.com
.gemius.pl
.gemtrackers.com
.generaltracking.de
.genieesspv.jp
@ -29811,7 +29773,6 @@ file.apicvn.com
.logaholic.com
.logcounter.com
.logdy.com
.logentries.com
.loger.ru
.lognormal.net
.logxp.ru
@ -30073,8 +30034,6 @@ file.apicvn.com
.pclicks.com
.pcspeedup.com
.peakcounter.dk
.peerius.com
.perfdrive.com
.performax.cz
.performtracking.com
.perion.com
@ -30329,7 +30288,6 @@ file.apicvn.com
.shrinktheweb.com
.siftscience.com
.signup-way.com
.silverpop.com
.silverpush.co
.simplereach.com
.simplycast.us
@ -31435,6 +31393,8 @@ file.apicvn.com
.um-public-panel-prod.s3.amazonaws.com
.collection-endpoint-prod.herokuapp.com
.collection-endpoint-staging.herokuapp.com
.log.appdriver.jp
.gtm.udemy.com
.t.clic2buy.com
.blue.fox8.com
.astat.nikkei.co.jp
@ -32623,7 +32583,6 @@ file.apicvn.com
.analytics.archive.org
.analytics.artirix.com
.analytics.aweber.com
.analytics.edgekey.net
.analytics.edgesuite.net
.analytics.electro-com.ru
.analytics.fairfax.com.au
@ -33013,6 +32972,14 @@ file.apicvn.com
.int.vlancaa.site
.int.vlancaa.fun
.tok.vaicore.xyz
.aff-report.lbesecapi.com
.aff-policy.lbesecapi.com
.psi.lbesecapi.com
.report-ps.meettech.net
.f-log-mobile.grammarly.io
.f-log-mobile-ios.grammarly.io
.ada-tr.staff-start.com
.analytics.liveact-vault.com
.api.statsig.com
.logs-01.loggly.com
.analytics.300624.com
@ -33255,7 +33222,6 @@ file.apicvn.com
.api.crittercism.com
.api.gimbal.com
.api.sec.miui.com
.appmetrica.yandex.com
.beacon.carfax.com
.beacon.shazam.com
.beha.ksmobile.com
@ -33602,7 +33568,6 @@ file.apicvn.com
.geotargetly-1a441.appspot.com
.geotargetly.co
.getambassador.com
.getblueshift.com
.getpublica.com
.getviously.com
.gglscr.online
@ -33752,7 +33717,6 @@ file.apicvn.com
.mynsystems.com
.mytrack.pro
.n74s9.com
.nakanohito.jp
.natero.com
.ndf81.com
.nejmqianyan.cn
@ -33886,7 +33850,6 @@ file.apicvn.com
.segmetrics.io
.selectmedia.asia
.sellebrity.com
.sellpoint.net
.semantic-finder.com
.seon.io
.seondnsresolve.com
@ -33973,7 +33936,6 @@ file.apicvn.com
.trackonomics.net
.traffer.net
.trafficfuel.com
.trafficguard.ai
.trafficroots.com
.transactionale.com
.traq.li
@ -34770,7 +34732,6 @@ file.apicvn.com
.events.split.io
.events.yourcx.io
.eventtracker.videostrip.com
.exacttarget.com
.fcmatch.google.com
.fcmatch.youtube.com
.filament-stats.herokuapp.com
@ -35383,7 +35344,6 @@ file.apicvn.com
.adplan7.com
.adpon.jp
.af-z.jp
.afi-b.com
.aid-ad.jp
.amoad.com
.analyticsip.net
@ -35394,7 +35354,6 @@ file.apicvn.com
.d2-apps.net
.deteql.net
.e-click.jp
.ebis.ne.jp
.ec-concier.com
.ec-optimizer.com
.eltex.co.jp
@ -35405,7 +35364,6 @@ file.apicvn.com
.gmossp-sp.jp
.gsspcln.jp
.gyro-n.com
.i2i.jp
.iid-network.jp
.iogous.com
.kaizenplatform.net
@ -36011,7 +35969,6 @@ file.apicvn.com
.hzucji.com
.0iqiehoa35.ru
.inforuss.site
.ouphoarg.com
.dfffpyu8fhawcnd.ru
.dfnetwork.link
.fodorw.com
@ -37187,7 +37144,6 @@ file.apicvn.com
.adward.ru
.adwidgets.ru
.adwile.net
.adwolf.ru
.adxgeeks.com
.adylalahb.ru
.aerontre.com
@ -40527,7 +40483,6 @@ file.apicvn.com
.aff.jskyservices.com
.a-i-ad.com
.managedweb.net
.thench.net
.bannerfarm.aolp.jp
.koukoku.red
.amaprop.net
@ -42356,6 +42311,7 @@ file.apicvn.com
.wfgo5vjs6cp0cdjx7bms3cn9lof2p01xho0pxgm1fd2fah3j5d8qd7x1d6eu.me
.govguangxi.cn
.w8sg6.xyz
.98765432100.site
.00-gov.cn
.000123456789.site
.0024aaaa.com

View File

@ -34,6 +34,8 @@ DOMAIN-KEYWORD,adsyndication
DOMAIN-KEYWORD,bahoom,REJECT
DOMAIN,daisydiskapp.com,REJECT
AND, ((DOMAIN-SUFFIX,msa.cdn.mediaset.net), (DOMAIN-KEYWORD,adv0))
# >> Tencent Lemon
PROCESS-NAME,Tencent Lemon,REJECT