mirror of
https://github.com/SukkaW/Surge.git
synced 2026-01-29 01:51:52 +08:00
Add new phishing feed / speed up domains sort
This commit is contained in:
@@ -3,7 +3,7 @@ import path from 'path';
|
|||||||
import * as tldts from 'tldts';
|
import * as tldts from 'tldts';
|
||||||
import { processLine } from './lib/process-line';
|
import { processLine } from './lib/process-line';
|
||||||
import { readFileByLine } from './lib/fetch-text-by-line';
|
import { readFileByLine } from './lib/fetch-text-by-line';
|
||||||
import { createDomainSorter } from './lib/stable-sort-domain';
|
import { sortDomains } from './lib/stable-sort-domain';
|
||||||
import { task } from './lib/trace-runner';
|
import { task } from './lib/trace-runner';
|
||||||
import { compareAndWriteFile } from './lib/create-file';
|
import { compareAndWriteFile } from './lib/create-file';
|
||||||
import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
|
import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
|
||||||
@@ -58,8 +58,8 @@ export const buildInternalCDNDomains = task(import.meta.path, async () => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const [domainSorter] = await Promise.all([
|
const [gorhill] = await Promise.all([
|
||||||
getGorhillPublicSuffixPromise().then(createDomainSorter),
|
getGorhillPublicSuffixPromise(),
|
||||||
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/cdn.conf')),
|
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/cdn.conf')),
|
||||||
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global.conf')),
|
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global.conf')),
|
||||||
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global_plus.conf')),
|
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global_plus.conf')),
|
||||||
@@ -74,7 +74,7 @@ export const buildInternalCDNDomains = task(import.meta.path, async () => {
|
|||||||
|
|
||||||
return compareAndWriteFile(
|
return compareAndWriteFile(
|
||||||
[
|
[
|
||||||
...Array.from(set).sort(domainSorter).map(i => `SUFFIX,${i}`),
|
...sortDomains(Array.from(set), gorhill).map(i => `SUFFIX,${i}`),
|
||||||
...Array.from(keywords).sort().map(i => `REGEX,${i}`)
|
...Array.from(keywords).sort().map(i => `REGEX,${i}`)
|
||||||
],
|
],
|
||||||
path.resolve(import.meta.dir, '../List/internal/cdn.txt')
|
path.resolve(import.meta.dir, '../List/internal/cdn.txt')
|
||||||
|
|||||||
@@ -1,17 +1,16 @@
|
|||||||
// @ts-check
|
// @ts-check
|
||||||
import fsp from 'fs/promises';
|
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
|
|
||||||
import { processHosts, processFilterRules } from './lib/parse-filter';
|
import { processHosts, processFilterRules, processDomainLists } from './lib/parse-filter';
|
||||||
import { createTrie } from './lib/trie';
|
import { createTrie } from './lib/trie';
|
||||||
|
|
||||||
import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } from './lib/reject-data-source';
|
import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST, DOMAIN_LISTS } from './lib/reject-data-source';
|
||||||
import { createRuleset, compareAndWriteFile } from './lib/create-file';
|
import { createRuleset, compareAndWriteFile } from './lib/create-file';
|
||||||
import { processLine } from './lib/process-line';
|
import { processLine } from './lib/process-line';
|
||||||
import { domainDeduper } from './lib/domain-deduper';
|
import { domainDeduper } from './lib/domain-deduper';
|
||||||
import createKeywordFilter from './lib/aho-corasick';
|
import createKeywordFilter from './lib/aho-corasick';
|
||||||
import { readFileByLine } from './lib/fetch-text-by-line';
|
import { readFileByLine } from './lib/fetch-text-by-line';
|
||||||
import { createDomainSorter } from './lib/stable-sort-domain';
|
import { sortDomains } from './lib/stable-sort-domain';
|
||||||
import { traceSync, task, traceAsync } from './lib/trace-runner';
|
import { traceSync, task, traceAsync } from './lib/trace-runner';
|
||||||
import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
|
import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
|
||||||
import * as tldts from 'tldts';
|
import * as tldts from 'tldts';
|
||||||
@@ -38,6 +37,7 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
|
|||||||
domainSets.add(host);
|
domainSets.add(host);
|
||||||
});
|
});
|
||||||
})),
|
})),
|
||||||
|
...DOMAIN_LISTS.map(entry => processDomainLists(entry[0], entry[1])),
|
||||||
...ADGUARD_FILTERS.map(input => {
|
...ADGUARD_FILTERS.map(input => {
|
||||||
const promise = typeof input === 'string'
|
const promise = typeof input === 'string'
|
||||||
? processFilterRules(input)
|
? processFilterRules(input)
|
||||||
@@ -144,14 +144,15 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
|
|||||||
|
|
||||||
// Dedupe domainSets
|
// Dedupe domainSets
|
||||||
const dudupedDominArray = traceSync('* Dedupe from covered subdomain', () => domainDeduper(Array.from(domainSets)));
|
const dudupedDominArray = traceSync('* Dedupe from covered subdomain', () => domainDeduper(Array.from(domainSets)));
|
||||||
console.log(`Deduped ${previousSize - dudupedDominArray.length} rules!`);
|
console.log(`Deduped ${previousSize - dudupedDominArray.length} rules from covered subdomain!`);
|
||||||
|
console.log(`Final size ${dudupedDominArray.length}`);
|
||||||
|
|
||||||
// Create reject stats
|
// Create reject stats
|
||||||
const rejectDomainsStats: Array<[string, number]> = traceSync(
|
const rejectDomainsStats: Array<[string, number]> = traceSync(
|
||||||
'* Collect reject domain stats',
|
'* Collect reject domain stats',
|
||||||
() => Object.entries(
|
() => Object.entries(
|
||||||
dudupedDominArray.reduce<Record<string, number>>((acc, cur) => {
|
dudupedDominArray.reduce<Record<string, number>>((acc, cur) => {
|
||||||
const suffix = tldts.getDomain(cur, { allowPrivateDomains: false, detectIp: false });
|
const suffix = tldts.getDomain(cur, { allowPrivateDomains: false, detectIp: false, validateHostname: false });
|
||||||
if (suffix) {
|
if (suffix) {
|
||||||
acc[suffix] = (acc[suffix] ?? 0) + 1;
|
acc[suffix] = (acc[suffix] ?? 0) + 1;
|
||||||
}
|
}
|
||||||
@@ -174,7 +175,10 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
|
|||||||
'',
|
'',
|
||||||
'Build from:',
|
'Build from:',
|
||||||
...HOSTS.map(host => ` - ${host[0]}`),
|
...HOSTS.map(host => ` - ${host[0]}`),
|
||||||
...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`)
|
...DOMAIN_LISTS.map(domainList => ` - ${domainList[0]}`),
|
||||||
|
...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`),
|
||||||
|
' - https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt',
|
||||||
|
' - https://phishing.army/download/phishing_army_blocklist.txt'
|
||||||
];
|
];
|
||||||
|
|
||||||
return Promise.all([
|
return Promise.all([
|
||||||
@@ -182,7 +186,7 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
|
|||||||
'Sukka\'s Ruleset - Reject Base',
|
'Sukka\'s Ruleset - Reject Base',
|
||||||
description,
|
description,
|
||||||
new Date(),
|
new Date(),
|
||||||
traceSync('* Sort reject domainset', () => dudupedDominArray.sort(createDomainSorter(gorhill))),
|
traceSync('* Sort reject domainset', () => sortDomains(dudupedDominArray, gorhill)),
|
||||||
'domainset',
|
'domainset',
|
||||||
path.resolve(import.meta.dir, '../List/domainset/reject.conf'),
|
path.resolve(import.meta.dir, '../List/domainset/reject.conf'),
|
||||||
path.resolve(import.meta.dir, '../Clash/domainset/reject.txt')
|
path.resolve(import.meta.dir, '../Clash/domainset/reject.txt')
|
||||||
|
|||||||
@@ -1,13 +1,14 @@
|
|||||||
import { domainDeduper } from './lib/domain-deduper';
|
import { domainDeduper } from './lib/domain-deduper';
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
import { createRuleset } from './lib/create-file';
|
import { createRuleset } from './lib/create-file';
|
||||||
import domainSorter from './lib/stable-sort-domain';
|
import { sortDomains } from './lib/stable-sort-domain';
|
||||||
|
|
||||||
import { Sema } from 'async-sema';
|
import { Sema } from 'async-sema';
|
||||||
import * as tldts from 'tldts';
|
import * as tldts from 'tldts';
|
||||||
import { task } from './lib/trace-runner';
|
import { task } from './lib/trace-runner';
|
||||||
import { fetchWithRetry } from './lib/fetch-retry';
|
import { fetchWithRetry } from './lib/fetch-retry';
|
||||||
import { SHARED_DESCRIPTION } from './lib/constants';
|
import { SHARED_DESCRIPTION } from './lib/constants';
|
||||||
|
import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
|
||||||
|
|
||||||
const s = new Sema(3);
|
const s = new Sema(3);
|
||||||
|
|
||||||
@@ -140,7 +141,9 @@ export const buildSpeedtestDomainSet = task(import.meta.path, async () => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const deduped = domainDeduper(Array.from(domains)).sort(domainSorter);
|
const gorhill = await getGorhillPublicSuffixPromise();
|
||||||
|
const deduped = sortDomains(domainDeduper(Array.from(domains)), gorhill);
|
||||||
|
|
||||||
const description = [
|
const description = [
|
||||||
...SHARED_DESCRIPTION,
|
...SHARED_DESCRIPTION,
|
||||||
'',
|
'',
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import fsp from 'fs/promises';
|
import fsp from 'fs/promises';
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
|
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
|
||||||
import { processHosts } from './parse-filter';
|
import { processDomainLists, processHosts } from './parse-filter';
|
||||||
import { traceAsync, traceSync } from './trace-runner';
|
import { traceAsync, traceSync } from './trace-runner';
|
||||||
import * as tldts from 'tldts';
|
import * as tldts from 'tldts';
|
||||||
import { createTrie } from './trie';
|
import { createTrie } from './trie';
|
||||||
@@ -33,7 +33,12 @@ const BLACK_TLD = new Set([
|
|||||||
'club',
|
'club',
|
||||||
'cn',
|
'cn',
|
||||||
'codes',
|
'codes',
|
||||||
|
'co.uk',
|
||||||
|
'co.in',
|
||||||
|
'com.br',
|
||||||
'com.cn',
|
'com.cn',
|
||||||
|
'com.pl',
|
||||||
|
'com.vn',
|
||||||
'cool',
|
'cool',
|
||||||
'cyou',
|
'cyou',
|
||||||
'fit',
|
'fit',
|
||||||
@@ -53,6 +58,7 @@ const BLACK_TLD = new Set([
|
|||||||
'ltd',
|
'ltd',
|
||||||
'ml',
|
'ml',
|
||||||
'mobi',
|
'mobi',
|
||||||
|
'net.pl',
|
||||||
'one',
|
'one',
|
||||||
'online',
|
'online',
|
||||||
'pro',
|
'pro',
|
||||||
@@ -79,19 +85,12 @@ const BLACK_TLD = new Set([
|
|||||||
]);
|
]);
|
||||||
|
|
||||||
export const getPhishingDomains = () => traceAsync('get phishing domains', async () => {
|
export const getPhishingDomains = () => traceAsync('get phishing domains', async () => {
|
||||||
const [domainSet, gorhill] = await Promise.all([
|
const [domainSet, domainSet2, gorhill] = await Promise.all([
|
||||||
processHosts('https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true),
|
processHosts('https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true),
|
||||||
// processDomainLists('https://phishing.army/download/phishing_army_blocklist.txt', true),
|
processDomainLists('https://phishing.army/download/phishing_army_blocklist.txt', true),
|
||||||
// processFilterRules(
|
|
||||||
// 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt',
|
|
||||||
// [
|
|
||||||
// 'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
|
|
||||||
// // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
|
|
||||||
// // 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt'
|
|
||||||
// ]
|
|
||||||
// ),
|
|
||||||
getGorhillPublicSuffixPromise()
|
getGorhillPublicSuffixPromise()
|
||||||
]);
|
]);
|
||||||
|
domainSet2.forEach((domain) => domainSet.add(domain));
|
||||||
|
|
||||||
traceSync.skip('* whitelisting phishing domains', () => {
|
traceSync.skip('* whitelisting phishing domains', () => {
|
||||||
const trieForRemovingWhiteListed = createTrie(domainSet);
|
const trieForRemovingWhiteListed = createTrie(domainSet);
|
||||||
|
|||||||
@@ -37,27 +37,27 @@ const normalizeDomain = (domain: string) => {
|
|||||||
return h[0] === '.' ? h.slice(1) : h;
|
return h[0] === '.' ? h.slice(1) : h;
|
||||||
};
|
};
|
||||||
|
|
||||||
export async function processDomainLists(domainListsUrl: string, includeAllSubDomain = false) {
|
export function processDomainLists(domainListsUrl: string, includeAllSubDomain = false) {
|
||||||
const domainSets = new Set<string>();
|
return traceAsync(`- processDomainLists: ${domainListsUrl}`, async () => {
|
||||||
|
const domainSets = new Set<string>();
|
||||||
|
|
||||||
for await (const line of await fetchRemoteTextAndReadByLine(domainListsUrl)) {
|
for await (const line of await fetchRemoteTextAndReadByLine(domainListsUrl)) {
|
||||||
const domainToAdd = processLine(line);
|
const domainToAdd = processLine(line);
|
||||||
if (!domainToAdd) {
|
if (!domainToAdd) continue;
|
||||||
continue;
|
|
||||||
|
if (DEBUG_DOMAIN_TO_FIND && domainToAdd.includes(DEBUG_DOMAIN_TO_FIND)) {
|
||||||
|
warnOnce(domainListsUrl, false, DEBUG_DOMAIN_TO_FIND);
|
||||||
|
foundDebugDomain = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
domainSets.add(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (DEBUG_DOMAIN_TO_FIND && domainToAdd.includes(DEBUG_DOMAIN_TO_FIND)) {
|
return domainSets;
|
||||||
warnOnce(domainListsUrl, false, DEBUG_DOMAIN_TO_FIND);
|
});
|
||||||
foundDebugDomain = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
domainSets.add(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd);
|
|
||||||
}
|
|
||||||
|
|
||||||
return domainSets;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function processHosts(hostsUrl: string, includeAllSubDomain = false, skipDomainCheck = false) {
|
export function processHosts(hostsUrl: string, includeAllSubDomain = false, skipDomainCheck = false) {
|
||||||
return traceAsync(`- processHosts: ${hostsUrl}`, async () => {
|
return traceAsync(`- processHosts: ${hostsUrl}`, async () => {
|
||||||
const domainSets = new Set<string>();
|
const domainSets = new Set<string>();
|
||||||
|
|
||||||
|
|||||||
@@ -26,6 +26,11 @@ export const HOSTS = [
|
|||||||
['https://paulgb.github.io/BarbBlock/blacklists/hosts-file.txt', true, true]
|
['https://paulgb.github.io/BarbBlock/blacklists/hosts-file.txt', true, true]
|
||||||
] as const;
|
] as const;
|
||||||
|
|
||||||
|
export const DOMAIN_LISTS = [
|
||||||
|
// DigitalSide Threat-Intel - OSINT Hub
|
||||||
|
['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true]
|
||||||
|
] as const;
|
||||||
|
|
||||||
export const ADGUARD_FILTERS = [
|
export const ADGUARD_FILTERS = [
|
||||||
// EasyList
|
// EasyList
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -1,13 +0,0 @@
|
|||||||
import domainSorter from './stable-sort-domain';
|
|
||||||
// eslint-disable-next-line import/no-unresolved -- fuck eslint-import
|
|
||||||
import { describe, it, expect } from 'bun:test';
|
|
||||||
|
|
||||||
describe('stable-sort-domain', () => {
|
|
||||||
it('.ks.cn, .tag.unclaimedproperty.ks.gov', () => {
|
|
||||||
expect(domainSorter('.ks.cn', '.tag.unclaimedproperty.ks.gov')).toBe(-1);
|
|
||||||
});
|
|
||||||
|
|
||||||
it('.fgnzdb.xyz, .hub.fghtem.com', () => {
|
|
||||||
expect(domainSorter('.fgnzdb.xyz', '.hub.fghtem.com')).toBe(1);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
@@ -10,18 +10,16 @@ const compare = (a: string | null, b: string | null) => {
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (a.length !== b.length) {
|
const aLen = a.length;
|
||||||
const r = a.length - b.length;
|
const r = aLen - b.length;
|
||||||
if (r > 0) {
|
if (r > 0) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if (r < 0) {
|
if (r < 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (let i = 0; i < a.length; i++) {
|
for (let i = 0; i < aLen; i++) {
|
||||||
if (b[i] == null) {
|
if (b[i] == null) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@@ -35,34 +33,21 @@ const compare = (a: string | null, b: string | null) => {
|
|||||||
return 0;
|
return 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
const createDomainSorter = (gorhill: PublicSuffixList | null = null) => {
|
export const sortDomains = (inputs: string[], gorhill: PublicSuffixList) => {
|
||||||
if (gorhill) {
|
const getDomain = createCachedGorhillGetDomain(gorhill);
|
||||||
const getDomain = createCachedGorhillGetDomain(gorhill);
|
const domains = inputs.reduce<Record<string, string>>((acc, cur) => {
|
||||||
|
acc[cur] ||= getDomain(cur);
|
||||||
|
return acc;
|
||||||
|
}, {});
|
||||||
|
|
||||||
return (a: string, b: string) => {
|
const sorter = (a: string, b: string) => {
|
||||||
if (a === b) return 0;
|
|
||||||
|
|
||||||
const aDomain = getDomain(a);
|
|
||||||
const bDomain = getDomain(b);
|
|
||||||
|
|
||||||
const resultDomain = compare(aDomain, bDomain);
|
|
||||||
return resultDomain !== 0 ? resultDomain : compare(a, b);
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-var-requires -- fuck
|
|
||||||
const tldts = require('./cached-tld-parse');
|
|
||||||
|
|
||||||
return (a: string, b: string) => {
|
|
||||||
if (a === b) return 0;
|
if (a === b) return 0;
|
||||||
|
|
||||||
const aDomain = tldts.parse(a).domain;
|
const aDomain = domains[a];
|
||||||
const bDomain = tldts.parse(b).domain;
|
const bDomain = domains[b];
|
||||||
|
|
||||||
const resultDomain = compare(aDomain, bDomain);
|
return compare(aDomain, bDomain) || compare(a, b);
|
||||||
return resultDomain !== 0 ? resultDomain : compare(a, b);
|
|
||||||
};
|
};
|
||||||
};
|
|
||||||
|
|
||||||
export default createDomainSorter();
|
return inputs.sort(sorter);
|
||||||
export { createDomainSorter };
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user