Add new phishing feed / speed up domains sort

This commit is contained in:
SukkaW
2023-12-12 17:10:55 +08:00
parent e56f601fbc
commit e970006445
8 changed files with 72 additions and 89 deletions

View File

@@ -1,17 +1,16 @@
// @ts-check
import fsp from 'fs/promises';
import path from 'path';
import { processHosts, processFilterRules } from './lib/parse-filter';
import { processHosts, processFilterRules, processDomainLists } from './lib/parse-filter';
import { createTrie } from './lib/trie';
import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } from './lib/reject-data-source';
import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST, DOMAIN_LISTS } from './lib/reject-data-source';
import { createRuleset, compareAndWriteFile } from './lib/create-file';
import { processLine } from './lib/process-line';
import { domainDeduper } from './lib/domain-deduper';
import createKeywordFilter from './lib/aho-corasick';
import { readFileByLine } from './lib/fetch-text-by-line';
import { createDomainSorter } from './lib/stable-sort-domain';
import { sortDomains } from './lib/stable-sort-domain';
import { traceSync, task, traceAsync } from './lib/trace-runner';
import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
import * as tldts from 'tldts';
@@ -38,6 +37,7 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
domainSets.add(host);
});
})),
...DOMAIN_LISTS.map(entry => processDomainLists(entry[0], entry[1])),
...ADGUARD_FILTERS.map(input => {
const promise = typeof input === 'string'
? processFilterRules(input)
@@ -144,14 +144,15 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
// Dedupe domainSets
const dudupedDominArray = traceSync('* Dedupe from covered subdomain', () => domainDeduper(Array.from(domainSets)));
console.log(`Deduped ${previousSize - dudupedDominArray.length} rules!`);
console.log(`Deduped ${previousSize - dudupedDominArray.length} rules from covered subdomain!`);
console.log(`Final size ${dudupedDominArray.length}`);
// Create reject stats
const rejectDomainsStats: Array<[string, number]> = traceSync(
'* Collect reject domain stats',
() => Object.entries(
dudupedDominArray.reduce<Record<string, number>>((acc, cur) => {
const suffix = tldts.getDomain(cur, { allowPrivateDomains: false, detectIp: false });
const suffix = tldts.getDomain(cur, { allowPrivateDomains: false, detectIp: false, validateHostname: false });
if (suffix) {
acc[suffix] = (acc[suffix] ?? 0) + 1;
}
@@ -174,7 +175,10 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
'',
'Build from:',
...HOSTS.map(host => ` - ${host[0]}`),
...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`)
...DOMAIN_LISTS.map(domainList => ` - ${domainList[0]}`),
...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`),
' - https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt',
' - https://phishing.army/download/phishing_army_blocklist.txt'
];
return Promise.all([
@@ -182,7 +186,7 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
'Sukka\'s Ruleset - Reject Base',
description,
new Date(),
traceSync('* Sort reject domainset', () => dudupedDominArray.sort(createDomainSorter(gorhill))),
traceSync('* Sort reject domainset', () => sortDomains(dudupedDominArray, gorhill)),
'domainset',
path.resolve(import.meta.dir, '../List/domainset/reject.conf'),
path.resolve(import.meta.dir, '../Clash/domainset/reject.txt')