mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Chore: simplify build infra / remove reject_phishing
This commit is contained in:
parent
42e9b4310f
commit
1928c052a9
@ -2,7 +2,7 @@
|
||||
import path from 'path';
|
||||
import { isIPv4, isIPv6 } from 'net';
|
||||
import { createRuleset } from './lib/create-file';
|
||||
import { fetchRemoteTextAndCreateReadlineInterface, readFileByLine } from './lib/fetch-remote-text-by-line';
|
||||
import { fetchRemoteTextAndReadByLine, readFileByLine } from './lib/fetch-text-by-line';
|
||||
import { processLine } from './lib/process-line';
|
||||
import { task } from './lib/trace-runner';
|
||||
import { SHARED_DESCRIPTION } from './lib/constants';
|
||||
@ -10,7 +10,7 @@ import { SHARED_DESCRIPTION } from './lib/constants';
|
||||
const getBogusNxDomainIPs = async () => {
|
||||
/** @type {string[]} */
|
||||
const result = [];
|
||||
for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf')) {
|
||||
for await (const line of await fetchRemoteTextAndReadByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf')) {
|
||||
if (line.startsWith('bogus-nxdomain=')) {
|
||||
const ip = line.slice(15).trim();
|
||||
if (isIPv4(ip)) {
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import path from 'path';
|
||||
import { createRuleset } from './lib/create-file';
|
||||
import { fetchRemoteTextAndCreateReadlineInterface, readFileByLine } from './lib/fetch-remote-text-by-line';
|
||||
import { fetchRemoteTextAndReadByLine, readFileByLine } from './lib/fetch-text-by-line';
|
||||
import { createTrie } from './lib/trie';
|
||||
import { task } from './lib/trace-runner';
|
||||
import { processLine } from './lib/process-line';
|
||||
@ -19,7 +19,7 @@ const getS3OSSDomains = async (): Promise<Set<string>> => {
|
||||
}
|
||||
} else {
|
||||
console.log('public_suffix_list.dat not found, fetch directly from remote.');
|
||||
for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://publicsuffix.org/list/public_suffix_list.dat')) {
|
||||
for await (const line of await fetchRemoteTextAndReadByLine('https://publicsuffix.org/list/public_suffix_list.dat')) {
|
||||
trie.add(line);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import { fetchRemoteTextAndCreateReadlineInterface } from './lib/fetch-remote-text-by-line';
|
||||
import { fetchRemoteTextAndReadByLine } from './lib/fetch-text-by-line';
|
||||
import { resolve as pathResolve } from 'path';
|
||||
import { compareAndWriteFile, withBannerArray } from './lib/create-file';
|
||||
import { processLineFromReadline } from './lib/process-line';
|
||||
@ -17,7 +17,7 @@ const INCLUDE_CIDRS = [
|
||||
];
|
||||
|
||||
export const buildChnCidr = task(import.meta.path, async () => {
|
||||
const cidr = await processLineFromReadline(await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt'));
|
||||
const cidr = await processLineFromReadline(await fetchRemoteTextAndReadByLine('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt'));
|
||||
const filteredCidr = exclude([...cidr, ...INCLUDE_CIDRS], EXCLUDE_CIDRS, true);
|
||||
|
||||
// Can not use SHARED_DESCRIPTION here as different license
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
import * as path from 'path';
|
||||
import { PathScurry } from 'path-scurry';
|
||||
import { readFileByLine } from './lib/fetch-remote-text-by-line';
|
||||
import { readFileByLine } from './lib/fetch-text-by-line';
|
||||
import { processLine } from './lib/process-line';
|
||||
import { createRuleset } from './lib/create-file';
|
||||
import { domainDeduper } from './lib/domain-deduper';
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
// @ts-check
|
||||
import path from 'path';
|
||||
import { DOMESTICS } from '../Source/non_ip/domestic';
|
||||
import { readFileByLine } from './lib/fetch-remote-text-by-line';
|
||||
import { readFileByLine } from './lib/fetch-text-by-line';
|
||||
import { processLineFromReadline } from './lib/process-line';
|
||||
import { compareAndWriteFile, createRuleset } from './lib/create-file';
|
||||
import { task } from './lib/trace-runner';
|
||||
|
||||
@ -2,7 +2,7 @@ import fsp from 'fs/promises';
|
||||
import path from 'path';
|
||||
import * as tldts from 'tldts';
|
||||
import { processLine } from './lib/process-line';
|
||||
import { readFileByLine } from './lib/fetch-remote-text-by-line';
|
||||
import { readFileByLine } from './lib/fetch-text-by-line';
|
||||
import { createDomainSorter } from './lib/stable-sort-domain';
|
||||
import { task } from './lib/trace-runner';
|
||||
import { compareAndWriteFile } from './lib/create-file';
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import { fetchRemoteTextAndCreateReadlineInterface } from './lib/fetch-remote-text-by-line';
|
||||
import { fetchRemoteTextAndReadByLine } from './lib/fetch-text-by-line';
|
||||
import { processLineFromReadline } from './lib/process-line';
|
||||
import path from 'path';
|
||||
import fsp from 'fs/promises';
|
||||
@ -26,7 +26,7 @@ const RESERVED_IPV4_CIDR = [
|
||||
|
||||
export const buildInternalReverseChnCIDR = task(import.meta.path, async () => {
|
||||
const [cidr] = await Promise.all([
|
||||
processLineFromReadline(await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')),
|
||||
processLineFromReadline(await fetchRemoteTextAndReadByLine('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')),
|
||||
fsp.mkdir(path.resolve(import.meta.dir, '../List/internal'), { recursive: true })
|
||||
]);
|
||||
|
||||
|
||||
@ -10,12 +10,13 @@ import { createRuleset, compareAndWriteFile } from './lib/create-file';
|
||||
import { processLine } from './lib/process-line';
|
||||
import { domainDeduper } from './lib/domain-deduper';
|
||||
import createKeywordFilter from './lib/aho-corasick';
|
||||
import { readFileByLine } from './lib/fetch-remote-text-by-line';
|
||||
import { readFileByLine } from './lib/fetch-text-by-line';
|
||||
import { createDomainSorter } from './lib/stable-sort-domain';
|
||||
import { traceSync, task } from './lib/trace-runner';
|
||||
import { traceSync, task, traceAsync } from './lib/trace-runner';
|
||||
import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
|
||||
import * as tldts from 'tldts';
|
||||
import { SHARED_DESCRIPTION } from './lib/constants';
|
||||
import { getPhishingDomains } from './lib/get-phishing-domains';
|
||||
|
||||
/** Whitelists */
|
||||
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
|
||||
@ -29,8 +30,7 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
|
||||
const domainSets = new Set<string>();
|
||||
|
||||
// Parse from AdGuard Filters
|
||||
console.time('* Download and process Hosts / AdBlock Filter Rules');
|
||||
|
||||
const [gorhill, shouldStop] = await traceAsync('* Download and process Hosts / AdBlock Filter Rules', async () => {
|
||||
let shouldStop = false;
|
||||
|
||||
const [gorhill] = await Promise.all([
|
||||
@ -46,40 +46,38 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
|
||||
...ADGUARD_FILTERS.map(input => {
|
||||
const promise = typeof input === 'string'
|
||||
? processFilterRules(input)
|
||||
: processFilterRules(input[0], input[1] || undefined);
|
||||
: processFilterRules(input[0], input[1]);
|
||||
|
||||
return promise.then((i) => {
|
||||
if (i) {
|
||||
const { white, black, foundDebugDomain } = i;
|
||||
return promise.then(({ white, black, foundDebugDomain }) => {
|
||||
if (foundDebugDomain) {
|
||||
shouldStop = true;
|
||||
// we should not break here, as we want to see full matches from all data source
|
||||
}
|
||||
white.forEach(i => filterRuleWhitelistDomainSets.add(i));
|
||||
black.forEach(i => domainSets.add(i));
|
||||
} else {
|
||||
process.exitCode = 1;
|
||||
throw new Error('Failed to process AdGuard Filter Rules!');
|
||||
}
|
||||
});
|
||||
}),
|
||||
...([
|
||||
'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exceptions.txt',
|
||||
'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exclusions.txt'
|
||||
].map(input => processFilterRules(input).then((i) => {
|
||||
if (i) {
|
||||
const { white, black } = i;
|
||||
].map(input => processFilterRules(input).then(({ white, black }) => {
|
||||
white.forEach(i => {
|
||||
filterRuleWhitelistDomainSets.add(i);
|
||||
});
|
||||
black.forEach(i => {
|
||||
filterRuleWhitelistDomainSets.add(i);
|
||||
});
|
||||
} else {
|
||||
process.exitCode = 1;
|
||||
throw new Error('Failed to process AdGuard Filter Rules!');
|
||||
}))),
|
||||
getPhishingDomains().then(([purePhishingDomains, fullDomainSet]) => {
|
||||
fullDomainSet.forEach(host => {
|
||||
if (host) {
|
||||
domainSets.add(host);
|
||||
}
|
||||
})))
|
||||
});
|
||||
purePhishingDomains.forEach(suffix => {
|
||||
domainSets.add(`.${suffix}`);
|
||||
});
|
||||
})
|
||||
]);
|
||||
|
||||
// remove pre-defined enforced blacklist from whitelist
|
||||
@ -88,7 +86,8 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
|
||||
trie0.find(enforcedBlack).forEach(found => filterRuleWhitelistDomainSets.delete(found));
|
||||
});
|
||||
|
||||
console.timeEnd('* Download and process Hosts / AdBlock Filter Rules');
|
||||
return [gorhill, shouldStop] as const;
|
||||
});
|
||||
|
||||
if (shouldStop) {
|
||||
process.exit(1);
|
||||
@ -117,20 +116,11 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
|
||||
}
|
||||
}
|
||||
|
||||
for await (const line of readFileByLine(path.resolve(import.meta.dir, '../List/domainset/reject_phishing.conf'))) {
|
||||
const l = processLine(line);
|
||||
if (l?.[0] === '.') {
|
||||
domainSuffixSet.add(l.slice(1));
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`Import ${domainKeywordsSet.size} black keywords and ${domainSuffixSet.size} black suffixes!`);
|
||||
|
||||
previousSize = domainSets.size;
|
||||
// Dedupe domainSets
|
||||
console.log(`Start deduping from black keywords/suffixes! (${previousSize})`);
|
||||
console.time('* Dedupe from black keywords/suffixes');
|
||||
|
||||
traceSync('* Dedupe from black keywords/suffixes', () => {
|
||||
const trie1 = createTrie(domainSets);
|
||||
domainSuffixSet.forEach(suffix => {
|
||||
trie1.find(suffix, true).forEach(f => domainSets.delete(f));
|
||||
@ -160,14 +150,12 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
|
||||
domainSets.delete(domain);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
console.timeEnd('* Dedupe from black keywords/suffixes');
|
||||
console.log(`Deduped ${previousSize} - ${domainSets.size} = ${previousSize - domainSets.size} from black keywords and suffixes!`);
|
||||
|
||||
previousSize = domainSets.size;
|
||||
// Dedupe domainSets
|
||||
console.log(`Start deduping! (${previousSize})`);
|
||||
|
||||
const dudupedDominArray = traceSync('* Dedupe from covered subdomain', () => domainDeduper(Array.from(domainSets)));
|
||||
console.log(`Deduped ${previousSize - dudupedDominArray.length} rules!`);
|
||||
|
||||
|
||||
@ -92,7 +92,11 @@ export const buildSpeedtestDomainSet = task(import.meta.path, async () => {
|
||||
'speedtest.upp.com',
|
||||
'.fast.com',
|
||||
'speedtest.macpaw.com',
|
||||
'.netspeedtestmaster.com'
|
||||
'.netspeedtestmaster.com',
|
||||
// Google Search Result of "speedtest", powered by this
|
||||
'.measurement-lab.org',
|
||||
// Google Fiber legacy speedtest site (new fiber speedtest use speedtestcustom.com)
|
||||
'.speed.googlefiber.net'
|
||||
]);
|
||||
|
||||
const hostnameGroups = await Promise.all([
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
// @ts-check
|
||||
import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry';
|
||||
import { createReadlineInterfaceFromResponse } from './lib/fetch-remote-text-by-line';
|
||||
import { createReadlineInterfaceFromResponse } from './lib/fetch-text-by-line';
|
||||
import path from 'path';
|
||||
import { isIPv4, isIPv6 } from 'net';
|
||||
import { processLine } from './lib/process-line';
|
||||
|
||||
@ -5,7 +5,7 @@ import path from 'path';
|
||||
import os from 'os';
|
||||
import { Readable } from 'stream';
|
||||
import { pipeline } from 'stream/promises';
|
||||
import { readFileByLine } from './lib/fetch-remote-text-by-line';
|
||||
import { readFileByLine } from './lib/fetch-text-by-line';
|
||||
import { isCI } from 'ci-info';
|
||||
import { task, traceAsync } from './lib/trace-runner';
|
||||
import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry';
|
||||
|
||||
@ -3,7 +3,6 @@ import { buildCommon } from './build-common';
|
||||
import { buildAntiBogusDomain } from './build-anti-bogus-domain';
|
||||
import { buildAppleCdn } from './build-apple-cdn';
|
||||
import { buildCdnConf } from './build-cdn-conf';
|
||||
import { buildPhishingDomainSet } from './build-phishing-domainset';
|
||||
import { buildRejectDomainSet } from './build-reject-domainset';
|
||||
import { buildTelegramCIDR } from './build-telegram-cidr';
|
||||
import { buildChnCidr } from './build-chn-cidr';
|
||||
@ -34,14 +33,9 @@ import { buildPublicHtml } from './build-public';
|
||||
downloadPreviousBuildPromise,
|
||||
downloadPublicSuffixListPromise
|
||||
]).then(() => buildCdnConf());
|
||||
const buildPhilishingDomainsetPromise = Promise.all([
|
||||
downloadPreviousBuildPromise,
|
||||
downloadPublicSuffixListPromise
|
||||
]).then(() => buildPhishingDomainSet());
|
||||
const buildRejectDomainSetPromise = Promise.all([
|
||||
downloadPreviousBuildPromise,
|
||||
downloadPublicSuffixListPromise,
|
||||
buildPhilishingDomainsetPromise
|
||||
downloadPublicSuffixListPromise
|
||||
]).then(() => buildRejectDomainSet());
|
||||
const buildTelegramCIDRPromise = downloadPreviousBuildPromise.then(() => buildTelegramCIDR());
|
||||
const buildChnCidrPromise = downloadPreviousBuildPromise.then(() => buildChnCidr());
|
||||
@ -77,7 +71,6 @@ import { buildPublicHtml } from './build-public';
|
||||
buildAntiBogusDomainPromise,
|
||||
buildAppleCdnPromise,
|
||||
buildCdnConfPromise,
|
||||
buildPhilishingDomainsetPromise,
|
||||
buildRejectDomainSetPromise,
|
||||
buildTelegramCIDRPromise,
|
||||
buildChnCidrPromise,
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
// @ts-check
|
||||
import { readFileByLine } from './fetch-remote-text-by-line';
|
||||
import { readFileByLine } from './fetch-text-by-line';
|
||||
import { surgeDomainsetToClashDomainset, surgeRulesetToClashClassicalTextRuleset } from './clash';
|
||||
import { traceAsync } from './trace-runner';
|
||||
|
||||
export async function compareAndWriteFile(linesA: string[], filePath: string) {
|
||||
let isEqual = true;
|
||||
@ -21,7 +22,7 @@ export async function compareAndWriteFile(linesA: string[], filePath: string) {
|
||||
const lineA = linesA[index];
|
||||
index++;
|
||||
|
||||
if (typeof lineA !== 'string') {
|
||||
if (lineA == null) {
|
||||
// The file becomes smaller
|
||||
isEqual = false;
|
||||
break;
|
||||
@ -37,7 +38,7 @@ export async function compareAndWriteFile(linesA: string[], filePath: string) {
|
||||
}
|
||||
}
|
||||
|
||||
if (index !== linesALen) {
|
||||
if (isEqual && index !== linesALen) {
|
||||
// The file becomes larger
|
||||
isEqual = false;
|
||||
}
|
||||
@ -48,13 +49,11 @@ export async function compareAndWriteFile(linesA: string[], filePath: string) {
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Writing ${filePath}...`);
|
||||
|
||||
const start = Bun.nanoseconds();
|
||||
|
||||
await traceAsync(`Writing ${filePath}`, async () => {
|
||||
if (linesALen < 10000) {
|
||||
await Bun.write(file, `${linesA.join('\n')}\n`);
|
||||
} else {
|
||||
return Bun.write(file, `${linesA.join('\n')}\n`);
|
||||
}
|
||||
|
||||
const writer = file.writer();
|
||||
|
||||
for (let i = 0; i < linesALen; i++) {
|
||||
@ -62,11 +61,9 @@ export async function compareAndWriteFile(linesA: string[], filePath: string) {
|
||||
writer.write('\n');
|
||||
}
|
||||
|
||||
writer.flush();
|
||||
await writer.end();
|
||||
}
|
||||
|
||||
console.log(`Done writing ${filePath} in ${(Bun.nanoseconds() - start) / 1e6}ms`);
|
||||
await writer.flush();
|
||||
return writer.end();
|
||||
});
|
||||
}
|
||||
|
||||
export const withBannerArray = (title: string, description: string[], date: Date, content: string[]) => {
|
||||
|
||||
@ -83,6 +83,7 @@ function createFetchRetry($fetch: typeof fetch): typeof fetch {
|
||||
} catch (err: unknown) {
|
||||
if (err instanceof Error) {
|
||||
if (err.name === 'AbortError') {
|
||||
console.log('[fetch abort]', url.toString());
|
||||
return bail(err);
|
||||
}
|
||||
}
|
||||
|
||||
@ -78,6 +78,6 @@ export async function *createReadlineInterfaceFromResponse(resp: Response): Asyn
|
||||
}
|
||||
}
|
||||
|
||||
export function fetchRemoteTextAndCreateReadlineInterface(url: string | URL) {
|
||||
export function fetchRemoteTextAndReadByLine(url: string | URL) {
|
||||
return fetchWithRetry(url, defaultRequestInit).then(res => createReadlineInterfaceFromResponse(res as Response));
|
||||
}
|
||||
@ -1,14 +1,12 @@
|
||||
import { processDomainLists, processHosts } from './lib/parse-filter';
|
||||
import fsp from 'fs/promises';
|
||||
import path from 'path';
|
||||
import { createRuleset } from './lib/create-file';
|
||||
import { processLine } from './lib/process-line';
|
||||
import { createDomainSorter } from './lib/stable-sort-domain';
|
||||
import { traceSync, task } from './lib/trace-runner';
|
||||
import { createTrie } from './lib/trie';
|
||||
import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
|
||||
import { createCachedGorhillGetDomain } from './lib/cached-tld-parse';
|
||||
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
|
||||
import { processHosts } from './parse-filter';
|
||||
import { traceAsync, traceSync } from './trace-runner';
|
||||
import * as tldts from 'tldts';
|
||||
import { SHARED_DESCRIPTION } from './lib/constants';
|
||||
import { createTrie } from './trie';
|
||||
import { createCachedGorhillGetDomain } from './cached-tld-parse';
|
||||
import { processLine } from './process-line';
|
||||
|
||||
const WHITELIST_DOMAIN = new Set([
|
||||
'w3s.link',
|
||||
@ -80,7 +78,7 @@ const BLACK_TLD = new Set([
|
||||
'za.com'
|
||||
]);
|
||||
|
||||
export const buildPhishingDomainSet = task(import.meta.path, async () => {
|
||||
export const getPhishingDomains = () => traceAsync('get phishing domains', async () => {
|
||||
const [domainSet, gorhill] = await Promise.all([
|
||||
processHosts('https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true),
|
||||
// processDomainLists('https://phishing.army/download/phishing_army_blocklist.txt', true),
|
||||
@ -92,25 +90,26 @@ export const buildPhishingDomainSet = task(import.meta.path, async () => {
|
||||
// // 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt'
|
||||
// ]
|
||||
// ),
|
||||
getGorhillPublicSuffixPromise()
|
||||
getGorhillPublicSuffixPromise(),
|
||||
// Remove old files
|
||||
fsp.rm(path.resolve(import.meta.dir, '../../List/domainset/reject_phishing.conf'), { force: true }),
|
||||
fsp.rm(path.resolve(import.meta.dir, '../../Clash/domainset/reject_phishing.txt'), { force: true })
|
||||
]);
|
||||
|
||||
// _domainSet2.forEach(i => domainSet.add(i));
|
||||
|
||||
traceSync('* whitelist', () => {
|
||||
traceSync.skip('* whitelisting phishing domains', () => {
|
||||
const trieForRemovingWhiteListed = createTrie(domainSet);
|
||||
WHITELIST_DOMAIN.forEach(white => {
|
||||
trieForRemovingWhiteListed.find(`.${white}`, false).forEach(f => domainSet.delete(f));
|
||||
if (trieForRemovingWhiteListed.has(white)) {
|
||||
// if (trieForRemovingWhiteListed.has(white)) {
|
||||
domainSet.delete(white);
|
||||
}
|
||||
// }
|
||||
});
|
||||
});
|
||||
|
||||
const domainCountMap: Record<string, number> = {};
|
||||
const getDomain = createCachedGorhillGetDomain(gorhill);
|
||||
|
||||
traceSync('* process domain set', () => {
|
||||
traceSync.skip('* process phishing domain set', () => {
|
||||
const domainArr = Array.from(domainSet);
|
||||
|
||||
for (let i = 0, len = domainArr.length; i < len; i++) {
|
||||
@ -171,36 +170,9 @@ export const buildPhishingDomainSet = task(import.meta.path, async () => {
|
||||
}
|
||||
});
|
||||
|
||||
const domainSorter = createDomainSorter(gorhill);
|
||||
const results = traceSync.skip('* get final phishing results', () => Object.entries(domainCountMap)
|
||||
.filter(([, count]) => count >= 5)
|
||||
.map(([apexDomain]) => apexDomain));
|
||||
|
||||
const results = traceSync('* get final results', () => Object.entries(domainCountMap)
|
||||
.reduce<string[]>((acc, [apexDomain, count]) => {
|
||||
if (count >= 5) {
|
||||
acc.push(`.${apexDomain}`);
|
||||
}
|
||||
return acc;
|
||||
}, [])
|
||||
.sort(domainSorter));
|
||||
|
||||
const description = [
|
||||
...SHARED_DESCRIPTION,
|
||||
'',
|
||||
'The domainset supports enhanced phishing protection',
|
||||
'Build from:',
|
||||
' - https://gitlab.com/malware-filter/phishing-filter'
|
||||
];
|
||||
|
||||
return Promise.all(createRuleset(
|
||||
'Sukka\'s Ruleset - Reject Phishing',
|
||||
description,
|
||||
new Date(),
|
||||
results,
|
||||
'domainset',
|
||||
path.resolve(import.meta.dir, '../List/domainset/reject_phishing.conf'),
|
||||
path.resolve(import.meta.dir, '../Clash/domainset/reject_phishing.txt')
|
||||
));
|
||||
return [results, domainSet] as const;
|
||||
});
|
||||
|
||||
if (import.meta.main) {
|
||||
buildPhishingDomainSet();
|
||||
}
|
||||
@ -1,4 +1,4 @@
|
||||
import { fetchRemoteTextAndCreateReadlineInterface } from './fetch-remote-text-by-line';
|
||||
import { fetchRemoteTextAndReadByLine } from './fetch-text-by-line';
|
||||
import { parse } from 'tldts';
|
||||
|
||||
const isDomainLoose = (domain: string): boolean => {
|
||||
@ -8,7 +8,7 @@ const isDomainLoose = (domain: string): boolean => {
|
||||
|
||||
export const parseFelixDnsmasq = async (url: string | URL): Promise<string[]> => {
|
||||
const res: string[] = [];
|
||||
for await (const line of await fetchRemoteTextAndCreateReadlineInterface(url)) {
|
||||
for await (const line of await fetchRemoteTextAndReadByLine(url)) {
|
||||
if (line.startsWith('server=/') && line.endsWith('/114.114.114.114')) {
|
||||
const domain = line.replace('server=/', '').replace('/114.114.114.114', '');
|
||||
if (isDomainLoose(domain)) {
|
||||
|
||||
@ -1,12 +1,13 @@
|
||||
// @ts-check
|
||||
import { defaultRequestInit, fetchWithRetry } from './fetch-retry';
|
||||
import * as tldts from './cached-tld-parse';
|
||||
import { fetchRemoteTextAndCreateReadlineInterface } from './fetch-remote-text-by-line';
|
||||
import { fetchRemoteTextAndReadByLine } from './fetch-text-by-line';
|
||||
import { NetworkFilter } from '@cliqz/adblocker';
|
||||
import { processLine } from './process-line';
|
||||
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
|
||||
import type { PublicSuffixList } from 'gorhill-publicsuffixlist';
|
||||
import { isProbablyIpv4 } from './is-fast-ip';
|
||||
import { traceAsync } from './trace-runner';
|
||||
|
||||
const DEBUG_DOMAIN_TO_FIND: string | null = null; // example.com | null
|
||||
let foundDebugDomain = false;
|
||||
@ -42,7 +43,7 @@ export async function processDomainLists(domainListsUrl: string | URL, includeAl
|
||||
|
||||
const domainSets = new Set<string>();
|
||||
|
||||
for await (const line of await fetchRemoteTextAndCreateReadlineInterface(domainListsUrl)) {
|
||||
for await (const line of await fetchRemoteTextAndReadByLine(domainListsUrl)) {
|
||||
const domainToAdd = processLine(line);
|
||||
if (!domainToAdd) {
|
||||
continue;
|
||||
@ -64,15 +65,14 @@ export async function processDomainLists(domainListsUrl: string | URL, includeAl
|
||||
}
|
||||
|
||||
export async function processHosts(hostsUrl: string | URL, includeAllSubDomain = false, skipDomainCheck = false) {
|
||||
console.time(`- processHosts: ${hostsUrl.toString()}`);
|
||||
|
||||
return traceAsync(`- processHosts: ${hostsUrl.toString()}`, async () => {
|
||||
if (typeof hostsUrl === 'string') {
|
||||
hostsUrl = new URL(hostsUrl);
|
||||
}
|
||||
|
||||
const domainSets = new Set<string>();
|
||||
|
||||
for await (const l of await fetchRemoteTextAndCreateReadlineInterface(hostsUrl)) {
|
||||
for await (const l of await fetchRemoteTextAndReadByLine(hostsUrl)) {
|
||||
const line = processLine(l);
|
||||
if (!line) {
|
||||
continue;
|
||||
@ -97,21 +97,18 @@ export async function processHosts(hostsUrl: string | URL, includeAllSubDomain =
|
||||
}
|
||||
}
|
||||
|
||||
console.timeEnd(` - processHosts: ${hostsUrl.toString()}`);
|
||||
|
||||
return domainSets;
|
||||
});
|
||||
}
|
||||
|
||||
export async function processFilterRules(
|
||||
filterRulesUrl: string | URL,
|
||||
fallbackUrls?: ReadonlyArray<string | URL> | undefined
|
||||
): Promise<{ white: Set<string>, black: Set<string>, foundDebugDomain: boolean }> {
|
||||
const runStart = Bun.nanoseconds();
|
||||
|
||||
const whitelistDomainSets = new Set<string>();
|
||||
const blacklistDomainSets = new Set<string>();
|
||||
|
||||
let downloadTime = 0;
|
||||
await traceAsync(`- processFilterRules: ${filterRulesUrl.toString()}`, async () => {
|
||||
const gorhill = await getGorhillPublicSuffixPromise();
|
||||
|
||||
/**
|
||||
@ -162,19 +159,13 @@ export async function processFilterRules(
|
||||
};
|
||||
|
||||
if (!fallbackUrls || fallbackUrls.length === 0) {
|
||||
downloadTime = 0;
|
||||
let last = Bun.nanoseconds();
|
||||
for await (const line of await fetchRemoteTextAndCreateReadlineInterface(filterRulesUrl)) {
|
||||
const now = Bun.nanoseconds();
|
||||
downloadTime += Bun.nanoseconds() - last;
|
||||
last = now;
|
||||
for await (const line of await fetchRemoteTextAndReadByLine(filterRulesUrl)) {
|
||||
// don't trim here
|
||||
lineCb(line);
|
||||
}
|
||||
} else {
|
||||
let filterRules;
|
||||
|
||||
const downloadStart = Bun.nanoseconds();
|
||||
try {
|
||||
const controller = new AbortController();
|
||||
|
||||
@ -185,6 +176,8 @@ export async function processFilterRules(
|
||||
const r = await fetchWithRetry(url, { signal: controller.signal, ...defaultRequestInit });
|
||||
const text = await r.text();
|
||||
|
||||
console.log('[fetch finish]', url.toString());
|
||||
|
||||
controller.abort();
|
||||
return text;
|
||||
})
|
||||
@ -194,15 +187,12 @@ export async function processFilterRules(
|
||||
console.log(`Download Rule for [${filterRulesUrl.toString()}] failed`);
|
||||
throw e;
|
||||
}
|
||||
downloadTime = Bun.nanoseconds() - downloadStart;
|
||||
|
||||
for (let i = 0, len = filterRules.length; i < len; i++) {
|
||||
lineCb(filterRules[i]);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(` ┬ processFilterRules (${filterRulesUrl.toString()}): ${((Bun.nanoseconds() - runStart) / 1e6).toFixed(3)}ms`);
|
||||
console.log(` └── download time: ${(downloadTime / 1e6).toFixed(3)}ms`);
|
||||
});
|
||||
|
||||
return {
|
||||
white: whitelistDomainSets,
|
||||
|
||||
@ -11,12 +11,13 @@ export const HOSTS = [
|
||||
// Curben's UrlHaus Malicious URL Blocklist
|
||||
// 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
|
||||
// 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
|
||||
// 'https://ublockorigin.github.io/uAssets/thirdparties/urlhaus-filter/urlhaus-filter-online.txt',
|
||||
// 'https://ublockorigin.github.io/uAssetsCDN/thirdparties/urlhaus-filter/urlhaus-filter-online.txt',
|
||||
['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, true],
|
||||
// Curben's Phishing URL Blocklist
|
||||
// Covered by lib/get-phishing-domains.ts
|
||||
// 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt'
|
||||
// 'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
|
||||
['https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true],
|
||||
// ['https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true],
|
||||
// Curben's PUP Domains Blocklist
|
||||
// 'https://curbengh.github.io/pup-filter/pup-filter-agh.txt'
|
||||
// 'https://pup-filter.pages.dev/pup-filter-agh.txt'
|
||||
@ -33,7 +34,7 @@ export const ADGUARD_FILTERS = [
|
||||
'https://easylist-downloads.adblockplus.org/easylist.txt',
|
||||
'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easylist.txt',
|
||||
'https://secure.fanboy.co.nz/easylist.txt',
|
||||
'https://ublockorigin.github.io/uAssets/thirdparties/easylist.txt',
|
||||
'https://ublockorigin.github.io/uAssetsCDN/thirdparties/easylist.txt',
|
||||
'https://ublockorigin.pages.dev/thirdparties/easylist.txt'
|
||||
]
|
||||
],
|
||||
@ -44,7 +45,7 @@ export const ADGUARD_FILTERS = [
|
||||
'https://secure.fanboy.co.nz/easyprivacy.txt',
|
||||
'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easyprivacy.txt',
|
||||
'https://easylist-downloads.adblockplus.org/easyprivacy.txt',
|
||||
'https://ublockorigin.github.io/uAssets/thirdparties/easyprivacy.txt',
|
||||
'https://ublockorigin.github.io/uAssetsCDN/thirdparties/easyprivacy.txt',
|
||||
'https://ublockorigin.pages.dev/thirdparties/easyprivacy.txt'
|
||||
]
|
||||
],
|
||||
@ -52,7 +53,7 @@ export const ADGUARD_FILTERS = [
|
||||
[
|
||||
'https://adguardteam.github.io/AdGuardSDNSFilter/Filters/filter.txt',
|
||||
[
|
||||
'https://filters.adtidy.org/extension/chromium/filters/15.txt'
|
||||
'https://filters.adtidy.org/extension/ublock/filters/15.txt'
|
||||
]
|
||||
],
|
||||
// AdGuard CNAME Filter Combined
|
||||
@ -62,42 +63,37 @@ export const ADGUARD_FILTERS = [
|
||||
'https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_microsites.txt',
|
||||
'https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_mail_trackers.txt',
|
||||
// uBlock Origin Filter List
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssets/filters/filters.min.txt',
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/filters.min.txt',
|
||||
[
|
||||
'https://ublockorigin.pages.dev/filters/filters.min.txt'
|
||||
]
|
||||
],
|
||||
// uBlock Origin Badware Risk List
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssets/filters/badware.min.txt',
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/badware.min.txt',
|
||||
[
|
||||
'https://ublockorigin.pages.dev/filters/badware.min.txt'
|
||||
]
|
||||
],
|
||||
// uBlock Origin Privacy List
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssets/filters/privacy.min.txt',
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/privacy.min.txt',
|
||||
[
|
||||
'https://ublockorigin.pages.dev/filters/privacy.min.txt'
|
||||
]
|
||||
],
|
||||
// uBlock Origin Resource Abuse: merged in uBlock Origin Privacy List
|
||||
// [
|
||||
// 'https://ublockorigin.github.io/uAssets/filters/resource-abuse.txt',
|
||||
// [
|
||||
// 'https://ublockorigin.github.io/uAssetsCDN/filters/resource-abuse.txt',
|
||||
// [
|
||||
// 'https://ublockorigin.pages.dev/filters/resource-abuse.txt'
|
||||
// ]
|
||||
// ],
|
||||
// uBlock Origin Unbreak
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssets/filters/unbreak.min.txt',
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/unbreak.min.txt',
|
||||
[
|
||||
'https://ublockorigin.pages.dev/filters/unbreak.min.txt'
|
||||
]
|
||||
],
|
||||
|
||||
@ -6,4 +6,8 @@ describe('stable-sort-domain', () => {
|
||||
it('.ks.cn, .tag.unclaimedproperty.ks.gov', () => {
|
||||
expect(domainSorter('.ks.cn', '.tag.unclaimedproperty.ks.gov')).toBe(-1);
|
||||
});
|
||||
|
||||
it('.fgnzdb.xyz, .hub.fghtem.com', () => {
|
||||
expect(domainSorter('.fgnzdb.xyz', '.hub.fghtem.com')).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
@ -1,19 +1,21 @@
|
||||
import path from 'path';
|
||||
import picocolors from 'picocolors';
|
||||
|
||||
const traceSync = <T>(prefix: string, fn: () => T): T => {
|
||||
function traceSync<T>(prefix: string, fn: () => T): T {
|
||||
const start = Bun.nanoseconds();
|
||||
const result = fn();
|
||||
const end = Bun.nanoseconds();
|
||||
console.log(`${prefix}: ${((end - start) / 1e6).toFixed(3)}ms`);
|
||||
console.log(`${picocolors.gray(`[${((end - start) / 1e6).toFixed(3)}ms]`)} ${prefix}`);
|
||||
return result;
|
||||
};
|
||||
}
|
||||
traceSync.skip = <T>(prefix: string, fn: () => T): T => fn();
|
||||
export { traceSync };
|
||||
|
||||
const traceAsync = async <T>(prefix: string, fn: () => Promise<T>): Promise<T> => {
|
||||
const start = Bun.nanoseconds();
|
||||
const result = await fn();
|
||||
const end = Bun.nanoseconds();
|
||||
console.log(`${prefix}: ${((end - start) / 1e6).toFixed(3)}ms`);
|
||||
console.log(`${picocolors.gray(`[${((end - start) / 1e6).toFixed(3)}ms]`)} ${prefix}`);
|
||||
return result;
|
||||
};
|
||||
export { traceAsync };
|
||||
@ -31,7 +33,7 @@ const task = <T>(importMetaPath: string, fn: () => Promise<T>, customname: strin
|
||||
const start = Bun.nanoseconds();
|
||||
await fn();
|
||||
const end = Bun.nanoseconds();
|
||||
console.log(`✅ [${taskName}] Executed successfully: ${((end - start) / 1e6).toFixed(3)}ms`);
|
||||
console.log(`✅ [${taskName}] [${((end - start) / 1e6).toFixed(3)}ms] Executed successfully`);
|
||||
|
||||
return { start, end, taskName } as TaskResult;
|
||||
};
|
||||
|
||||
@ -4,7 +4,7 @@ import * as tldts from 'tldts'; // hit ratio way too low, dont cache
|
||||
import picocolors from 'picocolors';
|
||||
import path from 'path';
|
||||
import listDir from '@sukka/listdir';
|
||||
import { readFileByLine } from './lib/fetch-remote-text-by-line';
|
||||
import { readFileByLine } from './lib/fetch-text-by-line';
|
||||
import { processLine } from './lib/process-line';
|
||||
import { task } from './lib/trace-runner';
|
||||
|
||||
|
||||
15
README.md
15
README.md
@ -19,16 +19,17 @@
|
||||
#### 广告拦截 / 隐私保护 / Malware 拦截 / Phiishing 拦截
|
||||
|
||||
- 自动生成
|
||||
- 数据来源、白名单域名列表和生成方式,请参考 [`build-reject-domainset.js`](Build/build-reject-domainset.js)
|
||||
- 数据来源、白名单域名列表和生成方式,请参考 [`build-reject-domainset.ts`](Build/build-reject-domainset.ts)
|
||||
- 仅建议在 Surge for Mac 上使用,移动平台请使用专门的工具(如 ADGuard for Android/iOS)以获得更好的性能
|
||||
- 不能替代浏览器广告屏蔽扩展(如 uBlock Origin)
|
||||
|
||||
**Surge**
|
||||
|
||||
```ini
|
||||
RULE-SET,https://ruleset.skk.moe/List/non_ip/reject.conf,REJECT
|
||||
# Non IP
|
||||
DOMAIN-SET,https://ruleset.skk.moe/List/domainset/reject.conf,REJECT-TINYGIF
|
||||
DOMAIN-SET,https://ruleset.skk.moe/List/domainset/reject_phishing.conf,REJECT
|
||||
RULE-SET,https://ruleset.skk.moe/List/non_ip/reject.conf,REJECT
|
||||
# IP
|
||||
RULE-SET,https://ruleset.skk.moe/List/ip/reject.conf,REJECT-DROP
|
||||
```
|
||||
|
||||
@ -51,13 +52,6 @@ rule-providers:
|
||||
interval: 43200
|
||||
url: https://ruleset.skk.moe/Clash/domainset/reject.txt
|
||||
path: ./sukkaw_ruleset/reject_domainset.txt
|
||||
reject_phishing_domainset:
|
||||
type: http
|
||||
behavior: domain
|
||||
format: text
|
||||
interval: 43200
|
||||
url: https://ruleset.skk.moe/Clash/domainset/reject_phishing.txt
|
||||
path: ./sukkaw_ruleset/reject_phishing_domainset.txt
|
||||
reject_ip:
|
||||
type: http
|
||||
behavior: classical
|
||||
@ -70,7 +64,6 @@ rules:
|
||||
- RULE-SET,reject_non_ip,REJECT
|
||||
# WARNING! Using reject_domainset can cause Clash out of memory due to the insufficient Clash implementation.
|
||||
- RULE-SET,reject_domainset,REJECT
|
||||
- RULE-SET,reject_phishing_domainset,REJECT
|
||||
- RULE-SET,reject_ip,REJECT
|
||||
```
|
||||
|
||||
|
||||
@ -1,13 +0,0 @@
|
||||
diff --git a/index.js b/index.js
|
||||
index e5eeb353f6da8968d926cb4e502207912b6c3594..eb1370dc19030404e3def46ce13938d34a9e6214 100644
|
||||
--- a/index.js
|
||||
+++ b/index.js
|
||||
@@ -66,7 +66,7 @@ function setup(fetch) {
|
||||
return res;
|
||||
}
|
||||
} catch (err) {
|
||||
- if (err.type === 'aborted') {
|
||||
+ if (err.type === 'aborted' || err.name === 'AbortError') {
|
||||
return bail(err);
|
||||
}
|
||||
const clientError = isClientError(err);
|
||||
Loading…
x
Reference in New Issue
Block a user