Update CDN & Phishing & AI Hosts

This commit is contained in:
SukkaW 2023-12-04 10:41:08 +08:00
parent da7c6764c5
commit 7e13ae7a24
4 changed files with 82 additions and 39 deletions

View File

@ -1,4 +1,4 @@
import { processHosts } from './lib/parse-filter';
import { processDomainLists, processHosts } from './lib/parse-filter';
import path from 'path';
import { createRuleset } from './lib/create-file';
import { processLine } from './lib/process-line';
@ -21,52 +21,69 @@ const WHITELIST_DOMAIN = new Set([
'notion.site'
]);
const BLACK_TLD = new Set([
'xyz',
'top',
'win',
'vip',
'site',
'space',
'online',
'icu',
'fun',
'shop',
'cool',
'cyou',
'id',
'pro',
'za.com',
'sa.com',
'ltd',
'group',
'rest',
'tech',
'link',
'ink',
'autos',
'bar',
'tokyo',
'tk',
'cf',
'gq',
'ga',
'ml',
'biz',
'bond',
'business',
'buzz',
'cc',
'cn',
'codes',
'cf',
'cfd',
'click',
'cloud',
'club',
'click',
'cfd',
'cn',
'codes',
'com.cn',
'cool',
'cyou',
'fit',
'fun',
'ga',
'gd',
'gq',
'group',
'host',
'icu',
'id',
'info',
'ink',
'life',
'live',
'link',
'ltd',
'ml',
'mobi',
'buzz',
'one',
'com.cn'
'online',
'pro',
'pl',
'pw',
'rest',
'rf.gd',
'sa.com',
'sbs',
'shop',
'site',
'space',
'store',
'tech',
'tk',
'tokyo',
'top',
'vip',
'vn',
'website',
'win',
'xyz',
'za.com'
]);
export const buildPhishingDomainSet = task(import.meta.path, async () => {
const [domainSet, gorhill] = await Promise.all([
processHosts('https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true),
// processDomainLists('https://phishing.army/download/phishing_army_blocklist.txt', true),
// processFilterRules(
// 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt',
// [
@ -78,6 +95,8 @@ export const buildPhishingDomainSet = task(import.meta.path, async () => {
getGorhillPublicSuffixPromise()
]);
// _domainSet2.forEach(i => domainSet.add(i));
traceSync('* whitelist', () => {
const trieForRemovingWhiteListed = createTrie(domainSet);
WHITELIST_DOMAIN.forEach(white => {

View File

@ -37,7 +37,7 @@ const normalizeDomain = (domain: string) => {
return null;
};
export async function processDomainLists(domainListsUrl: string | URL) {
export async function processDomainLists(domainListsUrl: string | URL, includeAllSubDomain = false) {
if (typeof domainListsUrl === 'string') {
domainListsUrl = new URL(domainListsUrl);
}
@ -55,7 +55,11 @@ export async function processDomainLists(domainListsUrl: string | URL) {
foundDebugDomain = true;
}
domainSets.add(domainToAdd);
if (includeAllSubDomain) {
domainSets.add(`.${domainToAdd}`);
} else {
domainSets.add(domainToAdd);
}
}
return domainSets;

View File

@ -169,8 +169,7 @@ packages-cf.termux.dev
# Homebrew
formulae.brew.sh
# crates.io
crates.io
static.crates.io
.crates.io
# PHP
.getcomposer.org
.packagist.org
@ -182,6 +181,7 @@ cdn.sheetjs.com
repo.nextdns.io
oss-binaries.phusionpassenger.com
release.runcloud.io
curl.se
# >> WordPress CDN
s0.wp.com
@ -995,6 +995,10 @@ static.maxmind.com
download.maxmind.com
# roblox
.rbxcdn.com
# EA.com (Origin)
cdn.mcr.ea.com
pl.ea.com
media.contentapi.ea.com
# LottieFiles
assets0.lottiefiles.com
assets1.lottiefiles.com
@ -1042,6 +1046,8 @@ static.tumblr.com
.media.tumblr.com
# SoundCloud
.sndcdn.com
# SoundCloud Widget
w.soundcloud.com
# Vercel
assets.vercel.com
image.ship.vercel.com
@ -1211,6 +1217,8 @@ static.grammarly.com
# OpenAI
.oaistatic.com
.oaiusercontent.com
# Claude AI
s-cdn.anthropic.com
# Manhuagui
i.hamreus.com
us.hamreus.com

12
Source/non_ip/ai.conf Normal file
View File

@ -0,0 +1,12 @@
# $ meta_title Sukka's Ruleset - AIGC Domains
# $ meta_description This file contains domains of OpenAI, Claude.
DOMAIN-SUFFIX,openai.com
DOMAIN-SUFFIX,oaistatic.com
DOMAIN-SUFFIX,oaiusercontent.com
DOMAIN-SUFFIX,ai.com
DOMAIN-SUFFIX,x.ai
DOMAIN-SUFFIX,openaiapi-site.azureedge.net
DOMAIN-SUFFIX,perplexity.ai
DOMAIN-SUFFIX,anthropic.com
DOMAIN-SUFFIX,claude.ai