Fix: enable domain check for some reject data source

This commit is contained in:
SukkaW 2024-01-07 01:05:20 +08:00
parent 61b88c5807
commit ca169b9db5
6 changed files with 32 additions and 19 deletions

View File

@ -37,7 +37,7 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
domainSets.add(host); domainSets.add(host);
}); });
})), })),
...DOMAIN_LISTS.map(entry => processDomainLists(entry[0], entry[1], entry[2])), ...DOMAIN_LISTS.map(entry => processDomainLists(entry[0], entry[1], entry[2], entry[3])),
...ADGUARD_FILTERS.map(input => { ...ADGUARD_FILTERS.map(input => {
const promise = typeof input === 'string' const promise = typeof input === 'string'
? processFilterRules(input) ? processFilterRules(input)

View File

@ -3,6 +3,7 @@ import { createCache } from './cache-apply';
import type { PublicSuffixList } from '@gorhill/publicsuffixlist'; import type { PublicSuffixList } from '@gorhill/publicsuffixlist';
const cache = createCache('cached-tld-parse', true); const cache = createCache('cached-tld-parse', true);
const cache2 = createCache('cached-tld-parse2', true);
const sharedConfig = { allowPrivateDomains: true }; const sharedConfig = { allowPrivateDomains: true };
const sharedConfig2 = { allowPrivateDomains: true, detectIp: false }; const sharedConfig2 = { allowPrivateDomains: true, detectIp: false };
@ -10,7 +11,7 @@ const sharedConfig2 = { allowPrivateDomains: true, detectIp: false };
/** { allowPrivateDomains: true } */ /** { allowPrivateDomains: true } */
export const parse = (domain: string) => cache.sync(domain, () => tldts.parse(domain, sharedConfig)); export const parse = (domain: string) => cache.sync(domain, () => tldts.parse(domain, sharedConfig));
/** { allowPrivateDomains: true, detectIp: false } */ /** { allowPrivateDomains: true, detectIp: false } */
export const parse2 = (domain: string) => cache.sync(domain, () => tldts.parse(domain, sharedConfig2)); export const parse2 = (domain: string) => cache2.sync(domain, () => tldts.parse(domain, sharedConfig2));
let gothillGetDomainCache: ReturnType<typeof createCache> | null = null; let gothillGetDomainCache: ReturnType<typeof createCache> | null = null;
export const createCachedGorhillGetDomain = (gorhill: PublicSuffixList) => { export const createCachedGorhillGetDomain = (gorhill: PublicSuffixList) => {

View File

@ -85,8 +85,8 @@ const BLACK_TLD = new Set([
export const getPhishingDomains = () => traceAsync('get phishing domains', async () => { export const getPhishingDomains = () => traceAsync('get phishing domains', async () => {
const [domainSet, domainSet2, gorhill] = await Promise.all([ const [domainSet, domainSet2, gorhill] = await Promise.all([
processHosts('https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true, TTL.THREE_HOURS()), processDomainLists('https://curbengh.github.io/phishing-filter/phishing-filter-domains.txt', true, false, TTL.THREE_HOURS()),
processDomainLists('https://phishing.army/download/phishing_army_blocklist.txt', true, TTL.THREE_HOURS()), processDomainLists('https://phishing.army/download/phishing_army_blocklist.txt', true, true, TTL.THREE_HOURS()),
getGorhillPublicSuffixPromise() getGorhillPublicSuffixPromise()
]); ]);
domainSet2.forEach((domain) => domainSet.add(domain)); domainSet2.forEach((domain) => domainSet.add(domain));

View File

@ -5,11 +5,14 @@ export const normalizeDomain = (domain: string) => {
if (isProbablyIpv4(domain)) return null; if (isProbablyIpv4(domain)) return null;
const parsed = tldts.parse2(domain); const parsed = tldts.parse2(domain);
if (parsed.isIp) return null; // if (parsed.isIp) return null;
if (!parsed.hostname) return null;
if (!parsed.isIcann && !parsed.isPrivate) return null; if (!parsed.isIcann && !parsed.isPrivate) return null;
const h = parsed.hostname; let h = parsed.hostname;
if (!h) return null; if (h[0] === '.') h = h.slice(1);
if (h.endsWith('.')) h = h.slice(0, -1);
return h[0] === '.' ? h.slice(1) : h; if (h) return h;
return null;
}; };

View File

@ -11,17 +11,23 @@ import { normalizeDomain } from './normalize-domain';
import { fetchAssets } from './fetch-assets'; import { fetchAssets } from './fetch-assets';
import { deserializeSet, fsCache, serializeSet } from './cache-filesystem'; import { deserializeSet, fsCache, serializeSet } from './cache-filesystem';
const DEBUG_DOMAIN_TO_FIND: string | null = null; // example.com | null const DEBUG_DOMAIN_TO_FIND: string | null = '.j3.4z0vc.chileinsumos.cl'; // example.com | null
let foundDebugDomain = false; let foundDebugDomain = false;
export function processDomainLists(domainListsUrl: string, includeAllSubDomain = false, ttl: number | null = null) { export function processDomainLists(domainListsUrl: string, includeAllSubDomain = false, skipDomainCheck = false, ttl: number | null = null) {
return traceAsync(`- processDomainLists: ${domainListsUrl}`, () => fsCache.apply( return traceAsync(`- processDomainLists: ${domainListsUrl}`, () => fsCache.apply(
domainListsUrl, domainListsUrl,
async () => { async () => {
const domainSets = new Set<string>(); const domainSets = new Set<string>();
for await (const line of await fetchRemoteTextByLine(domainListsUrl)) { for await (const line of await fetchRemoteTextByLine(domainListsUrl)) {
const domainToAdd = processLine(line); let domainToAdd = processLine(line);
if (!domainToAdd) continue;
if (!skipDomainCheck) {
domainToAdd = normalizeDomain(domainToAdd);
}
if (!domainToAdd) continue; if (!domainToAdd) continue;
if (DEBUG_DOMAIN_TO_FIND && domainToAdd.includes(DEBUG_DOMAIN_TO_FIND)) { if (DEBUG_DOMAIN_TO_FIND && domainToAdd.includes(DEBUG_DOMAIN_TO_FIND)) {
@ -123,6 +129,9 @@ export async function processFilterRules(
const flag = result[1]; const flag = result[1];
const hostname = result[0]; const hostname = result[0];
// if (hostname.endsWith('.')) {
// hostname = hostname.slice(0, -1);
// }
if (DEBUG_DOMAIN_TO_FIND) { if (DEBUG_DOMAIN_TO_FIND) {
if (hostname.includes(DEBUG_DOMAIN_TO_FIND)) { if (hostname.includes(DEBUG_DOMAIN_TO_FIND)) {

View File

@ -31,20 +31,20 @@ export const HOSTS = [
export const DOMAIN_LISTS = [ export const DOMAIN_LISTS = [
// CoinBlockerList // CoinBlockerList
// Although the hosts file is still actively maintained, the hosts_browser file is not updated since 2021-07, so we set a 14 days cache ttl // Although the hosts file is still actively maintained, the hosts_browser file is not updated since 2021-07, so we set a 14 days cache ttl
['https://zerodot1.gitlab.io/CoinBlockerLists/list_browser.txt', true, TTL.TWO_WEEKS()], ['https://zerodot1.gitlab.io/CoinBlockerLists/list_browser.txt', true, true, TTL.TWO_WEEKS()],
// BarbBlock // BarbBlock
// The barbblock list has never been updated since 2019-05, so we set a 14 days cache ttl // The barbblock list has never been updated since 2019-05, so we set a 14 days cache ttl
['https://paulgb.github.io/BarbBlock/blacklists/domain-list.txt', true, TTL.TWO_WEEKS()], ['https://paulgb.github.io/BarbBlock/blacklists/domain-list.txt', true, true, TTL.TWO_WEEKS()],
// DigitalSide Threat-Intel - OSINT Hub // DigitalSide Threat-Intel - OSINT Hub
// Update once per day // Update once per day
['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true, TTL.ONE_DAY()], ['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true, true, TTL.ONE_DAY()],
// AdGuard CNAME Filter Combined // AdGuard CNAME Filter Combined
// Update on a 7 days basis, so we add a 3 hours cache ttl // Update on a 7 days basis, so we add a 3 hours cache ttl
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads_justdomains.txt', true, TTL.THREE_DAYS()], ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads_justdomains.txt', true, true, TTL.THREE_DAYS()],
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_trackers_justdomains.txt', true, TTL.THREE_DAYS()], ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_trackers_justdomains.txt', true, true, TTL.THREE_DAYS()],
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_clickthroughs_justdomains.txt', true, TTL.THREE_DAYS()], ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_clickthroughs_justdomains.txt', true, true, TTL.THREE_DAYS()],
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_microsites_justdomains.txt', true, TTL.THREE_DAYS()], ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_microsites_justdomains.txt', true, true, TTL.THREE_DAYS()],
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_mail_trackers_justdomains.txt', true, TTL.THREE_DAYS()] ['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_mail_trackers_justdomains.txt', true, true, TTL.THREE_DAYS()]
] as const; ] as const;
export const ADGUARD_FILTERS = [ export const ADGUARD_FILTERS = [