Perf: replace psl with tldts (it is blazing fast!)

This commit is contained in:
SukkaW 2022-11-02 01:23:36 +08:00
parent 2ba69aa167
commit b69f4a75c1
5 changed files with 259 additions and 58 deletions

View File

@ -1,4 +1,4 @@
const psl = require('psl');
const tldts = require('tldts');
const { processFilterRules } = require('./lib/parse-filter.js');
const fs = require('fs');
const path = require('path');
@ -55,19 +55,20 @@ const BLACK_TLD = Array.from(new Set([
const domain = line.charCodeAt(0) === 46 ? line.slice(1) : line;
if (line.length > 25) {
const parsed = psl.parse(domain);
const parsed = tldts.parse(domain, { allowPrivateDomains: true });
if (parsed.input === parsed.tld) {
if (parsed.isIp || domain === parsed.publicSuffix) {
continue;
}
const apexDomain = parsed.domain
const apexDomain = parsed.domain;
if (apexDomain) {
if (WHITELIST_DOMAIN.has(apexDomain)) {
continue;
}
if (WHITELIST_DOMAIN.has(apexDomain)) {
continue;
domainCountMap[apexDomain] ||= 0;
domainCountMap[apexDomain] += 1;
}
domainCountMap[apexDomain] ||= 0;
domainCountMap[apexDomain] += 1;
}
}

View File

@ -1,9 +1,15 @@
const psl = require('psl');
const tldts = require('tldts');
const picocolors = require('picocolors');
const fs = require('fs');
const path = require('path');
const SPECIAL_SUFFIXES = new Set([
'linodeobjects.com', // only *.linodeobjects.com are public suffix
'vultrobjects.com', // only *.vultrobjects.com are public suffix
'dweb.link' // only *.dweb.link are public suffix
]);
(async () => {
const domainSetContent = await fs.promises.readFile(
path.resolve(__dirname, '../List/domainset/cdn.conf'),
@ -22,9 +28,14 @@ const path = require('path');
}
const domain = line.charCodeAt(0) === 46 ? line.slice(1) : line;
const parsed = psl.parse(domain);
const parsed = tldts.parse(domain, { allowPrivateDomains: true });
if (parsed.listed && parsed.input === parsed.tld) {
if (
(
parsed.isPrivate
|| parsed.isIcann
) && domain === parsed.publicSuffix
) {
console.error('Domain', picocolors.yellow(domain), picocolors.red('is in public suffix list!'));
}
}
@ -47,10 +58,12 @@ const path = require('path');
if (line.startsWith('DOMAIN-SUFFIX')) {
const domain = line.slice(14);
const parsed = psl.parse(domain);
const parsed = tldts.parse(domain, { allowPrivateDomains: true });
if (parsed.input !== parsed.tld) {
console.error('Domain', picocolors.yellow(domain), picocolors.green('is not in public suffix list!'));
if (domain !== parsed.publicSuffix) {
if (!SPECIAL_SUFFIXES.has(domain)) {
console.error('Domain', picocolors.yellow(domain), picocolors.green('is not in public suffix list!'));
}
}
}
}

View File

@ -72,6 +72,7 @@
.uanwtt.top
.ufrico.top
.uyylmo.top
.vclotq.top
.vlfhkj.top
.vodlvi.top
.vplhow.top
@ -106,10 +107,8 @@
.bfycgt.top
.blfqub.top
.bsqus.top
.buylcl.top
.ccqtxf.top
.cexa.top
.chjjv.top
.ctxbc.top
.dgddug.top
.dgjps.top
@ -121,14 +120,12 @@
.efupod.top
.ejtgs.top
.etwhg.top
.ewnd.top
.ffbjc.top
.fhfe.top
.ftbmjq.top
.gggdv.top
.gwumv.top
.hoaaip.top
.hxfse.top
.igevsy.top
.iljwpo.top
.ilzi.top
@ -179,20 +176,24 @@
.xzzuxm.top
.yaivdx.top
.ytlvjq.top
.buylcl.top
.cbdunw.top
.ceemly.top
.chjjv.top
.dnjrv.top
.eddqf.top
.ennuyv.top
.ewnd.top
.ewpfb.top
.furrqn.top
.hxfse.top
.kabpze.top
.kaxvx.top
.kfrbvn.top
.vmazao.top
.xgyufl.top
.ymklef.top
.yfpyin.top
.ymklef.top
.adbfjw.top
.ampzgd.top
.arokdo.top
@ -288,8 +289,8 @@
.zkddix.top
.zkqaun.top
.znnrhz.top
.square.site
.aezkgj.top
.awlcqy.top
.bmzcgy.top
.bpadlv.top
.caiknr.top
@ -322,7 +323,6 @@
.pmukqz.top
.punfaw.top
.pvnoai.top
.qeptvv.top
.qhumzl.top
.qoepuz.top
.qptcsd.top
@ -350,6 +350,7 @@
.zlxcfy.top
.acebmv.top
.aoxxdp.top
.awlcqy.top
.bayvlb.top
.bcvwpq.top
.bluxbq.top
@ -381,12 +382,12 @@
.lgjkee.top
.nfrtli.top
.nwgjza.top
.qeptvv.top
.yrwtop.top
.zkspkj.top
.auomwo.top
.vcdolz.top
.xzdxdl.top
.anieqb.top
.auomwo.top
.bhijai.top
.bhrzfm.top
.cildah.top
@ -416,6 +417,7 @@
.ybbisq.top
.yvfoys.top
.zqiqby.top
.anieqb.top
.cbzvio.top
.dzhunz.top
.ewqvvs.top
@ -437,7 +439,6 @@
.cbvb.top
.cuch.top
.dpnhyz.top
.dwre.top
.eenkw.top
.epmx.top
.gruc.top
@ -453,7 +454,6 @@
.rvjb.top
.snqhs.top
.tptvw.top
.tvxp.top
.ujse.top
.uqfj.top
.uvwg.top
@ -464,6 +464,7 @@
.wxwmg.top
.xufd.top
.yztkng.top
.dwre.top
.fwudv.top
.hedn.top
.lvotdn.top
@ -472,6 +473,7 @@
.qimjwm.top
.qkat.top
.qmug.top
.tvxp.top
.vkvp.top
.wdvw.top
.mxacw.top
@ -525,6 +527,8 @@
.yxfzox.top
.zqplva.top
.zwjolc.top
.lssopdie.icu
.namkdi.icu
.jhusd.icu
.ahibtgj.top
.ahidtgr.top
@ -548,7 +552,6 @@
.hwnnp.top
.jgeta.top
.kcywyn.top
.khvj.top
.kmpmmk.top
.knbau.top
.mdzxfl.top
@ -617,17 +620,21 @@
.jhikxq.top
.kpmvp.top
.kzrdoe.top
.oghtgf.top
.sagt.top
.txuhj.top
.acch.top
.khvj.top
.oghtgf.top
.rfba.top
.tkqjzf.top
.htstx.top
.nmkxf.top
.lzuzr.top
.gsyeidt.icu
.kaosid.icu
.gtxdifu.icu
.jssjue.icu
.msjdud.icu
.xzkuf.icu
.mjsudio.icu
.agvdnl.top
.aqapd.top
@ -635,11 +642,9 @@
.awkxmn.top
.bazujq.top
.bmqbn.top
.bqbtt.top
.cfxyt.top
.czcit.top
.djkew.top
.dskeb.top
.dxjkmy.top
.fepbf.top
.gesnaf.top
@ -652,7 +657,6 @@
.mjsipu.top
.nqkhfx.top
.oajotw.top
.owkbzp.top
.pzijie.top
.qbutp.top
.rascd.top
@ -673,7 +677,10 @@
.xvrd.top
.yxuku.top
.arujr.top
.bqbtt.top
.dskeb.top
.krteb.top
.owkbzp.top
.kxrjb.top
.qthrf.top
.tpdave.top
@ -683,6 +690,8 @@
.nishilaji.icu
.b5jsued.icu
.csndax.icu
.recordd.icu
.v3454.icu
.bfhju.icu
.baidu123.icu
.nsjdyues.icu
@ -695,9 +704,40 @@
.lsjdhi.icu
.lsloposl.icu
.vardir.icu
.tiafedf.icu
.4k2dci.icu
.687cem.icu
.ansklx.top
.ao0dkn.icu
.drkild.icu
.rmgomo.top
.sawjbk.top
.witojr.top
.wmygyx.top
.284nxw.icu
.bucyvo.top
.inkblog.ink
.nv9dtw.icu
.q0s2sm.icu
.xd1bpn.icu
.dbhlbm.top
.j77o38.icu
.mjdnpt.top
.nrhpkc.icu
.nu57wl.icu
.zoyqhl.top
.ajxdpg.top
.hmziti.top
.pe14pc.icu
.wsojbg.icu
.7a4s96.icu
.eahbrm.top
.sthtuh.top
.zovh96.icu
.vqeyuo.top
.krjqpe.top
.kvrhdp.top
.6c7bza.icu
.zhwpnq.top
.1345578.xyz
.2ebz23.icu
@ -722,7 +762,6 @@
.ebmedia.icu
.ebqzws.top
.ekqclm.top
.evcinc.top
.fhalul.top
.fjmcxh.top
.fph186.icu
@ -748,10 +787,8 @@
.lcacsa.top
.lnuxfx.top
.mbruzw.top
.mivflz.top
.mwsxcn.top
.mzbqid.top
.nljqmk.top
.noqaub.icu
.onazqm.top
.opyxhy.top
@ -768,7 +805,7 @@
.sblr001.xyz
.sblr009.xyz
.slbggp.top
.tbktyg.top
.syzyzz.ltd
.tdenvd.top
.tvvghs.top
.txbrht.top
@ -779,12 +816,10 @@
.v8vbdk.icu
.vkzvur.top
.voezvc.top
.vr0rx.icu
.vspuhg.top
.vxcgax.top
.wcidvq.top
.xgyaef.top
.ykjclz.top
.zlsrwe.top
.6nc5p5.icu
.ajkqym.top
@ -792,6 +827,7 @@
.axcgpp.top
.bokqmt.top
.cyqggi.top
.dchain.top
.dkvvuy.top
.dtyweh.top
.dzerdf.top
@ -802,9 +838,11 @@
.k43e5.icu
.lgn09m.icu
.lmhau8.icu
.metabrain.group
.nbdgwr.top
.nfffte.top
.njhpgs.top
.nljqmk.top
.ohmatb.top
.ozmubm.top
.q04nkf.icu
@ -812,13 +850,17 @@
.qmjded.top
.qogdsq.top
.qqwfas.top
.realme.cool
.sblr005.xyz
.sblr007.xyz
.sgrxpw.top
.tbktyg.top
.tmwhe.icu
.txqqzs.top
.tzwpfn.top
.vr0rx.icu
.yjefsg.top
.ykjclz.top
.znegsr.top
.zxj4025.xyz
.3997884.icu
@ -829,10 +871,11 @@
.ia4myc.icu
.ijowbb.top
.jhuejd.top
.jrignr.top
.kpygtz.top
.kqstye.top
.kxdnjf.top
.mfts.ltd
.mzrkkc.top
.ncatge.top
.pctmqj.top
.sd2ed.icu
@ -844,11 +887,14 @@
.2uz8yl.icu
.3667884.icu
.5xvtv1.icu
.evcinc.top
.ixk0s.icu
.lurojy.top
.mivflz.top
.rataxm.top
.ruesxv.top
.vtcgqk.top
.jrignr.top
.tgytgc.top
.ptohrn.top
.0nwxs.icu
@ -871,20 +917,23 @@
.2h4j6m.icu
.axuxfr.top
.dkfrva.top
.kgetic.top
.kulxbm.top
.nfygxz.top
.oepcxt.top
.oinm8.icu
.pibfhc.top
.pojxbh.top
.puwcxz.top
.qmzrde.top
.suqluj.top
.syzzhh.top
.tkdwso.top
.whfpks.top
.xmdjsc.top
.yo4xv.icu
.ywcjpn.top
.z4zj8.icu
.3488754.icu
.bkcuph.top
.finamv.top
.foj40c.icu
@ -892,6 +941,8 @@
.ndiuxo.top
.odiuwn.top
.ohkgcg.top
.pgofap.top
.rgpivj.top
.rgxcoc.top
.syd58.icu
.6b3qxy.icu
@ -901,6 +952,7 @@
.dpbjba.top
.jdjubw.top
.kmflyc.top
.ktyebr.top
.o5irm9.icu
.sgl2h3.icu
.yjdcrw.top
@ -915,15 +967,16 @@
.u3xxum.icu
.uvoeb.top
.6ltgw.icu
.77ma30.icu
.kjtxzz.top
.mugnqj.top
.suqluj.top
.svgsxn.top
.w10auy.icu
.0h539n.icu
.cjkin.top
.gbbxdr.top
.kfssyc.top
.qfi2r.icu
.ratasc.top
.rjfbqi.top
.yomghd.top
@ -940,8 +993,10 @@
.adorui.top
.hggluj.top
.kpdmbs.top
.nnicqa.top
.nolj49.icu
.okywvf.top
.qfi2r.icu
.rqdauw.top
.zynepg.top
.0s0oy.icu
@ -949,7 +1004,6 @@
.h80drs.icu
.jkuxmm.top
.lbvhv.icu
.nqrkii.top
.pubxgn.top
.sdqqgx.top
.1ncp3l.icu
@ -958,38 +1012,100 @@
.kvmh9r.icu
.ncligz.top
.ssjeoe.top
.2t1jl0.icu
.4sji2i.icu
.76-u-kkj18.xyz
.cyzemc.top
.dlfylt.top
.irllcd.top
.nqrkii.top
.nxuiqz.top
.ybvasb.top
.2t1jl0.icu
.bgjjoi.top
.gt0hs9.icu
.lxpinv.top
.phpump.top
.sde4rq.icu
.ki6lhg.icu
.zhhula.top
.jdftpe.top
.yzkznk.top
.qhnqwt.top
.ecabki.top
.ewnywn.top
.ylibav.top
.8sy2b.icu
.ehlcnb.top
.aeqmrz.top
.alcuth.top
.azrbwq.top
.bbjaoj.top
.bkosbt.top
.blzdnb.top
.byurlt.top
.cdkcfp.top
.darlqk.top
.dkhbub.top
.dmkqui.top
.dpzuhj.top
.ebczvy.top
.fczquz.top
.febcbf.top
.gkzemp.top
.hafygf.top
.hxesyj.top
.ibzlwr.top
.iusmdw.top
.jkonat.top
.kexpaf.top
.krumxv.top
.mgfvys.top
.mjbfjn.top
.nscsrw.top
.nxjsgk.top
.onjoto.top
.orxkxr.top
.pmineb.top
.rcmrcm.top
.rcwtyz.top
.rfpuac.top
.rmzndb.top
.rpgjkv.top
.shenem.top
.sxpzjv.top
.tnhwua.top
.ttdwdq.top
.tyjlpa.top
.uaatqg.top
.urbhth.top
.wclddi.top
.wjxjkl.top
.xkmdct.top
.xsnhjn.top
.zqtwoe.top
.bvuhlb.top
.elhicw.top
.eucvbc.top
.fhqemq.top
.jryuds.top
.neidee.top
.rvpepb.top
.xwitwq.top
.dkmimw.top
.nhewlh.top
.gtqklv.top
.hhxulg.top
.mxeanv.top
.pxzjwk.top
.qlmenm.top
.qwuchl.top
.nphgoc.top
.yyzded.top
.ejvocy.top
.mingrunfuzhuang05.xyz
.yhuvzv.top
.ytwbnq.top
.yyzded.top
.gntozh.top
.kutlzl.top
.wcpjfd.top
.xqpwkk.top
.xzlnlm.top
.citcfi.top
@ -997,29 +1113,50 @@
.glhdpz.top
.gxdhfa.top
.dgqqxx.top
.neidee.top
.xaexmo.top
.1112458.xyz
.demtjo.top
.hixcny.top
.voadax.top
.adlxbh.top
.jubsfg.top
.pxnctk.top
.iaklie.top
.pugxbo.top
.rhwdsq.top
.vrbvro.top
.oiknia.top
.dfdnsx.top
.gzmvuo.top
.oipful.top
.pxbkkk.top
.sftqjs.top
.kwoneq.top
.abluwk.top
.bvdadv.top
.tikoig.top
.usivdv.top
.xgtofb.top
.yifpgd.top
.zmlyhs.top
.fvkhnr.top
.qexccc.top
.afohvv.top
.zrbfzw.top
.bfsngf.top
.epwnsr.top
.hpbzkc.top
.ohzctx.top
.pysitg.top
.ilfbdb.top
.gdmdxy.top
.mingrunfuzhuang05.xyz
.nlujo.top
.xgjt.pro
.nkjfmc.top
.yphurc.top
.5577778.xyz
.exqkjx.top
.aempvw.top
.aggfpx.top
@ -1054,7 +1191,6 @@
.iyazyw.top
.jgyxok.top
.jsbwdy.top
.jwluwe.top
.kbxlax.top
.kjpzvx.top
.kzfgvw.top
@ -1069,7 +1205,6 @@
.rzvflv.top
.shxump.top
.sjfmje.top
.stkfns.top
.tndado.top
.tqmagv.top
.tvwump.top
@ -1093,6 +1228,7 @@
.gdxsmh.top
.hviljm.top
.jfysaw.top
.jwluwe.top
.ktmuae.top
.nbmlys.top
.ruqdog.top
@ -1106,62 +1242,104 @@
.ovnrqc.top
.pxibhf.top
.slruwb.top
.stkfns.top
.xdfglk.top
.xqajbk.top
.yjzswj.top
.crgcfw.top
.gymkux.top
.iysyln.top
.sllzad.top
.yjzswj.top
.zbpbhe.top
.bkmtxa.top
.qqoehr.top
.cknvrx.top
.pkwrdl.top
.azrsog.top
.nofalw.top
.svfocp.top
.oajqfn.top
.tvbcmo.top
.jsmfgc.top
.6f1mf.icu
.fpaksix.icu
.lasdas.icu
.lciskdu.icu
.lskoposi.icu
.lsoidike.icu
.lsqpoisue.icu
.lssncjud.icu
.msjhdyue.icu
.msjudi.icu
.olkijsd.icu
.pilidu.icu
.qhjkof.icu
.tyroess.icu
.xmsjude.icu
.xskieus.icu
.d3skoc332.icu
.gocdr.icu
.hajsued.icu
.hsyueiod.icu
.lijhdy.icu
.lsklsope.icu
.lsshkly.icu
.lsxoisd.icu
.lxiskjde.icu
.tsyudasd.icu
.vajdui.icu
.xmsjdhf.icu
.yianskd.icu
.bahusue.icu
.bvhjjd.icu
.dayuhao.icu
.ksudie.icu
.laiisode.icu
.lsposie.icu
.maediof.icu
.msodik.icu
.nsyheuks.icu
.qiehao.icu
.sinnod.icu
.tashpitum.icu
.usideq.icu
.ckfbs.icu
.harginsu.icu
.lsggppe.icu
.lshosie.icu
.ma4k6.icu
.msjdyf.icu
.nanish.icu
.nhduio.icu
.plaiksd.icu
.xlsoida.icu
.xzsywte.icu
.kvdxks.top
.isvuiy.top
.lianges.icu
.lsgsda.icu
.lsoiujds.icu
.mangesd.icu
.ncjhdyri.icu
.paddki.icu
.0e0db.icu
.huaxianone.icu
.losijdue.icu
.likijs.icu
.mpdskdf.icu
.tablegsh.icu
.trsas.icu
.xuyhsjd.icu
.lsoieuy.icu
.toutiaohao.icu
.asdaeer.icu
.asjdied.icu
.hliurkd.icu
.jpisi.icu
.kaosid.icu
.lijhdjsu.icu
.losijdue.icu
.xiuydw.icu
.lsasdfg.icu
.09vz0h.icu
.0a67vu.icu
.0bj313.icu
@ -1296,4 +1474,6 @@
.zobca6.icu
.cpnqor.shop
.dwbjs.top
.qslbjc.top
.qslbjc.top
.jfcxph.top
.lspxoiu.icu

View File

@ -78,8 +78,8 @@
"ci-info": "^3.5.0",
"picocolors": "^1.0.0",
"piscina": "^3.2.0",
"psl": "^1.9.0",
"table": "^6.8.0",
"tldts": "^5.7.97",
"undici": "5.11.0"
},
"devDependencies": {

19
pnpm-lock.yaml generated
View File

@ -6,8 +6,8 @@ specifiers:
ci-info: ^3.5.0
picocolors: ^1.0.0
piscina: ^3.2.0
psl: ^1.9.0
table: ^6.8.0
tldts: ^5.7.97
undici: 5.11.0
wireit: ^0.7.2
@ -17,8 +17,8 @@ dependencies:
ci-info: 3.5.0
picocolors: 1.0.0
piscina: 3.2.0
psl: 1.9.0
table: 6.8.0
tldts: 5.7.97
undici: 5.11.0
devDependencies:
@ -353,10 +353,6 @@ packages:
signal-exit: 3.0.7
dev: true
/psl/1.9.0:
resolution: {integrity: sha512-E/ZsdU4HLs/68gYzgGTkMicWTLPdAftJLfJFlLUAAKZGkStNU72sZjT66SnMDVOfOWY/YAoiD7Jxa9iHvngcag==}
dev: false
/punycode/2.1.1:
resolution: {integrity: sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==}
engines: {node: '>=6'}
@ -444,6 +440,17 @@ packages:
strip-ansi: 6.0.1
dev: false
/tldts-core/5.7.97:
resolution: {integrity: sha512-qxcuOSdlpee8rjH0gxbV26iONL91Foqe5nB/Gv/4Dh14cAX4DDhGGhpJnXbNP/7nxa8Nmvwx8k9/Wvr9IhUJRA==}
dev: false
/tldts/5.7.97:
resolution: {integrity: sha512-0A9BMNpg2RyzmOkEQRTRVfHUr1j7e2RxL2SgH/E/rLq//Fl8TNQ6D8NloCV66wOt3NeTzONrCDYFhBlN+SGRGg==}
hasBin: true
dependencies:
tldts-core: 5.7.97
dev: false
/to-regex-range/5.0.1:
resolution: {integrity: sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==}
engines: {node: '>=8.0'}