diff --git a/Build/build-anti-bogus-domain.ts b/Build/build-anti-bogus-domain.ts index b997eb4b..59886bde 100644 --- a/Build/build-anti-bogus-domain.ts +++ b/Build/build-anti-bogus-domain.ts @@ -2,7 +2,7 @@ import path from 'path'; import { isIPv4, isIPv6 } from 'net'; import { createRuleset } from './lib/create-file'; -import { fetchRemoteTextAndCreateReadlineInterface, readFileByLine } from './lib/fetch-remote-text-by-line'; +import { fetchRemoteTextAndReadByLine, readFileByLine } from './lib/fetch-text-by-line'; import { processLine } from './lib/process-line'; import { task } from './lib/trace-runner'; import { SHARED_DESCRIPTION } from './lib/constants'; @@ -10,7 +10,7 @@ import { SHARED_DESCRIPTION } from './lib/constants'; const getBogusNxDomainIPs = async () => { /** @type {string[]} */ const result = []; - for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf')) { + for await (const line of await fetchRemoteTextAndReadByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf')) { if (line.startsWith('bogus-nxdomain=')) { const ip = line.slice(15).trim(); if (isIPv4(ip)) { diff --git a/Build/build-cdn-conf.ts b/Build/build-cdn-conf.ts index a30864e1..6c35521a 100644 --- a/Build/build-cdn-conf.ts +++ b/Build/build-cdn-conf.ts @@ -1,6 +1,6 @@ import path from 'path'; import { createRuleset } from './lib/create-file'; -import { fetchRemoteTextAndCreateReadlineInterface, readFileByLine } from './lib/fetch-remote-text-by-line'; +import { fetchRemoteTextAndReadByLine, readFileByLine } from './lib/fetch-text-by-line'; import { createTrie } from './lib/trie'; import { task } from './lib/trace-runner'; import { processLine } from './lib/process-line'; @@ -19,7 +19,7 @@ const getS3OSSDomains = async (): Promise> => { } } else { console.log('public_suffix_list.dat not found, fetch directly from remote.'); - for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://publicsuffix.org/list/public_suffix_list.dat')) { + for await (const line of await fetchRemoteTextAndReadByLine('https://publicsuffix.org/list/public_suffix_list.dat')) { trie.add(line); } } diff --git a/Build/build-chn-cidr.ts b/Build/build-chn-cidr.ts index d37adde7..e4876c2a 100644 --- a/Build/build-chn-cidr.ts +++ b/Build/build-chn-cidr.ts @@ -1,4 +1,4 @@ -import { fetchRemoteTextAndCreateReadlineInterface } from './lib/fetch-remote-text-by-line'; +import { fetchRemoteTextAndReadByLine } from './lib/fetch-text-by-line'; import { resolve as pathResolve } from 'path'; import { compareAndWriteFile, withBannerArray } from './lib/create-file'; import { processLineFromReadline } from './lib/process-line'; @@ -17,7 +17,7 @@ const INCLUDE_CIDRS = [ ]; export const buildChnCidr = task(import.meta.path, async () => { - const cidr = await processLineFromReadline(await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')); + const cidr = await processLineFromReadline(await fetchRemoteTextAndReadByLine('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')); const filteredCidr = exclude([...cidr, ...INCLUDE_CIDRS], EXCLUDE_CIDRS, true); // Can not use SHARED_DESCRIPTION here as different license diff --git a/Build/build-common.ts b/Build/build-common.ts index b16571d4..4cc6a5f2 100644 --- a/Build/build-common.ts +++ b/Build/build-common.ts @@ -2,7 +2,7 @@ import * as path from 'path'; import { PathScurry } from 'path-scurry'; -import { readFileByLine } from './lib/fetch-remote-text-by-line'; +import { readFileByLine } from './lib/fetch-text-by-line'; import { processLine } from './lib/process-line'; import { createRuleset } from './lib/create-file'; import { domainDeduper } from './lib/domain-deduper'; diff --git a/Build/build-domestic-ruleset.ts b/Build/build-domestic-ruleset.ts index 9dd25f0f..89d03f7a 100644 --- a/Build/build-domestic-ruleset.ts +++ b/Build/build-domestic-ruleset.ts @@ -1,7 +1,7 @@ // @ts-check import path from 'path'; import { DOMESTICS } from '../Source/non_ip/domestic'; -import { readFileByLine } from './lib/fetch-remote-text-by-line'; +import { readFileByLine } from './lib/fetch-text-by-line'; import { processLineFromReadline } from './lib/process-line'; import { compareAndWriteFile, createRuleset } from './lib/create-file'; import { task } from './lib/trace-runner'; diff --git a/Build/build-internal-cdn-rules.ts b/Build/build-internal-cdn-rules.ts index adb26e13..739609bc 100644 --- a/Build/build-internal-cdn-rules.ts +++ b/Build/build-internal-cdn-rules.ts @@ -2,7 +2,7 @@ import fsp from 'fs/promises'; import path from 'path'; import * as tldts from 'tldts'; import { processLine } from './lib/process-line'; -import { readFileByLine } from './lib/fetch-remote-text-by-line'; +import { readFileByLine } from './lib/fetch-text-by-line'; import { createDomainSorter } from './lib/stable-sort-domain'; import { task } from './lib/trace-runner'; import { compareAndWriteFile } from './lib/create-file'; diff --git a/Build/build-internal-reverse-chn-cidr.ts b/Build/build-internal-reverse-chn-cidr.ts index c96c4228..c8081db9 100644 --- a/Build/build-internal-reverse-chn-cidr.ts +++ b/Build/build-internal-reverse-chn-cidr.ts @@ -1,4 +1,4 @@ -import { fetchRemoteTextAndCreateReadlineInterface } from './lib/fetch-remote-text-by-line'; +import { fetchRemoteTextAndReadByLine } from './lib/fetch-text-by-line'; import { processLineFromReadline } from './lib/process-line'; import path from 'path'; import fsp from 'fs/promises'; @@ -26,7 +26,7 @@ const RESERVED_IPV4_CIDR = [ export const buildInternalReverseChnCIDR = task(import.meta.path, async () => { const [cidr] = await Promise.all([ - processLineFromReadline(await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')), + processLineFromReadline(await fetchRemoteTextAndReadByLine('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')), fsp.mkdir(path.resolve(import.meta.dir, '../List/internal'), { recursive: true }) ]); diff --git a/Build/build-reject-domainset.ts b/Build/build-reject-domainset.ts index 0ddf98d1..54986879 100644 --- a/Build/build-reject-domainset.ts +++ b/Build/build-reject-domainset.ts @@ -10,12 +10,13 @@ import { createRuleset, compareAndWriteFile } from './lib/create-file'; import { processLine } from './lib/process-line'; import { domainDeduper } from './lib/domain-deduper'; import createKeywordFilter from './lib/aho-corasick'; -import { readFileByLine } from './lib/fetch-remote-text-by-line'; +import { readFileByLine } from './lib/fetch-text-by-line'; import { createDomainSorter } from './lib/stable-sort-domain'; -import { traceSync, task } from './lib/trace-runner'; +import { traceSync, task, traceAsync } from './lib/trace-runner'; import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix'; import * as tldts from 'tldts'; import { SHARED_DESCRIPTION } from './lib/constants'; +import { getPhishingDomains } from './lib/get-phishing-domains'; /** Whitelists */ const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST); @@ -29,67 +30,65 @@ export const buildRejectDomainSet = task(import.meta.path, async () => { const domainSets = new Set(); // Parse from AdGuard Filters - console.time('* Download and process Hosts / AdBlock Filter Rules'); + const [gorhill, shouldStop] = await traceAsync('* Download and process Hosts / AdBlock Filter Rules', async () => { + let shouldStop = false; - let shouldStop = false; + const [gorhill] = await Promise.all([ + getGorhillPublicSuffixPromise(), + // Parse from remote hosts & domain lists + ...HOSTS.map(entry => processHosts(entry[0], entry[1]).then(hosts => { + hosts.forEach(host => { + if (host) { + domainSets.add(host); + } + }); + })), + ...ADGUARD_FILTERS.map(input => { + const promise = typeof input === 'string' + ? processFilterRules(input) + : processFilterRules(input[0], input[1]); - const [gorhill] = await Promise.all([ - getGorhillPublicSuffixPromise(), - // Parse from remote hosts & domain lists - ...HOSTS.map(entry => processHosts(entry[0], entry[1]).then(hosts => { - hosts.forEach(host => { - if (host) { - domainSets.add(host); - } - }); - })), - ...ADGUARD_FILTERS.map(input => { - const promise = typeof input === 'string' - ? processFilterRules(input) - : processFilterRules(input[0], input[1] || undefined); - - return promise.then((i) => { - if (i) { - const { white, black, foundDebugDomain } = i; + return promise.then(({ white, black, foundDebugDomain }) => { if (foundDebugDomain) { shouldStop = true; // we should not break here, as we want to see full matches from all data source } white.forEach(i => filterRuleWhitelistDomainSets.add(i)); black.forEach(i => domainSets.add(i)); - } else { - process.exitCode = 1; - throw new Error('Failed to process AdGuard Filter Rules!'); - } - }); - }), - ...([ - 'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exceptions.txt', - 'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exclusions.txt' - ].map(input => processFilterRules(input).then((i) => { - if (i) { - const { white, black } = i; + }); + }), + ...([ + 'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exceptions.txt', + 'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exclusions.txt' + ].map(input => processFilterRules(input).then(({ white, black }) => { white.forEach(i => { filterRuleWhitelistDomainSets.add(i); }); black.forEach(i => { filterRuleWhitelistDomainSets.add(i); }); - } else { - process.exitCode = 1; - throw new Error('Failed to process AdGuard Filter Rules!'); - } - }))) - ]); + }))), + getPhishingDomains().then(([purePhishingDomains, fullDomainSet]) => { + fullDomainSet.forEach(host => { + if (host) { + domainSets.add(host); + } + }); + purePhishingDomains.forEach(suffix => { + domainSets.add(`.${suffix}`); + }); + }) + ]); - // remove pre-defined enforced blacklist from whitelist - const trie0 = createTrie(filterRuleWhitelistDomainSets); - PREDEFINED_ENFORCED_BACKLIST.forEach(enforcedBlack => { - trie0.find(enforcedBlack).forEach(found => filterRuleWhitelistDomainSets.delete(found)); + // remove pre-defined enforced blacklist from whitelist + const trie0 = createTrie(filterRuleWhitelistDomainSets); + PREDEFINED_ENFORCED_BACKLIST.forEach(enforcedBlack => { + trie0.find(enforcedBlack).forEach(found => filterRuleWhitelistDomainSets.delete(found)); + }); + + return [gorhill, shouldStop] as const; }); - console.timeEnd('* Download and process Hosts / AdBlock Filter Rules'); - if (shouldStop) { process.exit(1); } @@ -117,57 +116,46 @@ export const buildRejectDomainSet = task(import.meta.path, async () => { } } - for await (const line of readFileByLine(path.resolve(import.meta.dir, '../List/domainset/reject_phishing.conf'))) { - const l = processLine(line); - if (l?.[0] === '.') { - domainSuffixSet.add(l.slice(1)); - } - } - console.log(`Import ${domainKeywordsSet.size} black keywords and ${domainSuffixSet.size} black suffixes!`); previousSize = domainSets.size; // Dedupe domainSets - console.log(`Start deduping from black keywords/suffixes! (${previousSize})`); - console.time('* Dedupe from black keywords/suffixes'); + traceSync('* Dedupe from black keywords/suffixes', () => { + const trie1 = createTrie(domainSets); + domainSuffixSet.forEach(suffix => { + trie1.find(suffix, true).forEach(f => domainSets.delete(f)); + }); + filterRuleWhitelistDomainSets.forEach(suffix => { + trie1.find(suffix, true).forEach(f => domainSets.delete(f)); + }); - const trie1 = createTrie(domainSets); - domainSuffixSet.forEach(suffix => { - trie1.find(suffix, true).forEach(f => domainSets.delete(f)); - }); - filterRuleWhitelistDomainSets.forEach(suffix => { - trie1.find(suffix, true).forEach(f => domainSets.delete(f)); - }); + // remove pre-defined enforced blacklist from whitelist + const kwfilter = createKeywordFilter(domainKeywordsSet); - // remove pre-defined enforced blacklist from whitelist - const kwfilter = createKeywordFilter(domainKeywordsSet); - - // Build whitelist trie, to handle case like removing `g.msn.com` due to white `.g.msn.com` (`@@||g.msn.com`) - const trieWhite = createTrie(filterRuleWhitelistDomainSets); - for (const domain of domainSets) { - if (domain[0] === '.') { - if (trieWhite.contains(domain)) { + // Build whitelist trie, to handle case like removing `g.msn.com` due to white `.g.msn.com` (`@@||g.msn.com`) + const trieWhite = createTrie(filterRuleWhitelistDomainSets); + for (const domain of domainSets) { + if (domain[0] === '.') { + if (trieWhite.contains(domain)) { + domainSets.delete(domain); + continue; + } + } else if (trieWhite.has(`.${domain}`)) { domainSets.delete(domain); continue; } - } else if (trieWhite.has(`.${domain}`)) { - domainSets.delete(domain); - continue; - } - // Remove keyword - if (kwfilter.search(domain)) { - domainSets.delete(domain); + // Remove keyword + if (kwfilter.search(domain)) { + domainSets.delete(domain); + } } - } + }); - console.timeEnd('* Dedupe from black keywords/suffixes'); console.log(`Deduped ${previousSize} - ${domainSets.size} = ${previousSize - domainSets.size} from black keywords and suffixes!`); previousSize = domainSets.size; // Dedupe domainSets - console.log(`Start deduping! (${previousSize})`); - const dudupedDominArray = traceSync('* Dedupe from covered subdomain', () => domainDeduper(Array.from(domainSets))); console.log(`Deduped ${previousSize - dudupedDominArray.length} rules!`); diff --git a/Build/build-speedtest-domainset.ts b/Build/build-speedtest-domainset.ts index 34618629..c1623068 100644 --- a/Build/build-speedtest-domainset.ts +++ b/Build/build-speedtest-domainset.ts @@ -92,7 +92,11 @@ export const buildSpeedtestDomainSet = task(import.meta.path, async () => { 'speedtest.upp.com', '.fast.com', 'speedtest.macpaw.com', - '.netspeedtestmaster.com' + '.netspeedtestmaster.com', + // Google Search Result of "speedtest", powered by this + '.measurement-lab.org', + // Google Fiber legacy speedtest site (new fiber speedtest use speedtestcustom.com) + '.speed.googlefiber.net' ]); const hostnameGroups = await Promise.all([ diff --git a/Build/build-telegram-cidr.ts b/Build/build-telegram-cidr.ts index bf6e8fbf..d1539ad8 100644 --- a/Build/build-telegram-cidr.ts +++ b/Build/build-telegram-cidr.ts @@ -1,6 +1,6 @@ // @ts-check import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry'; -import { createReadlineInterfaceFromResponse } from './lib/fetch-remote-text-by-line'; +import { createReadlineInterfaceFromResponse } from './lib/fetch-text-by-line'; import path from 'path'; import { isIPv4, isIPv6 } from 'net'; import { processLine } from './lib/process-line'; diff --git a/Build/download-previous-build.ts b/Build/download-previous-build.ts index b90316aa..5b5d5a6d 100644 --- a/Build/download-previous-build.ts +++ b/Build/download-previous-build.ts @@ -5,7 +5,7 @@ import path from 'path'; import os from 'os'; import { Readable } from 'stream'; import { pipeline } from 'stream/promises'; -import { readFileByLine } from './lib/fetch-remote-text-by-line'; +import { readFileByLine } from './lib/fetch-text-by-line'; import { isCI } from 'ci-info'; import { task, traceAsync } from './lib/trace-runner'; import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry'; diff --git a/Build/index.ts b/Build/index.ts index 5b5da248..aa6c91b5 100644 --- a/Build/index.ts +++ b/Build/index.ts @@ -3,7 +3,6 @@ import { buildCommon } from './build-common'; import { buildAntiBogusDomain } from './build-anti-bogus-domain'; import { buildAppleCdn } from './build-apple-cdn'; import { buildCdnConf } from './build-cdn-conf'; -import { buildPhishingDomainSet } from './build-phishing-domainset'; import { buildRejectDomainSet } from './build-reject-domainset'; import { buildTelegramCIDR } from './build-telegram-cidr'; import { buildChnCidr } from './build-chn-cidr'; @@ -34,14 +33,9 @@ import { buildPublicHtml } from './build-public'; downloadPreviousBuildPromise, downloadPublicSuffixListPromise ]).then(() => buildCdnConf()); - const buildPhilishingDomainsetPromise = Promise.all([ - downloadPreviousBuildPromise, - downloadPublicSuffixListPromise - ]).then(() => buildPhishingDomainSet()); const buildRejectDomainSetPromise = Promise.all([ downloadPreviousBuildPromise, - downloadPublicSuffixListPromise, - buildPhilishingDomainsetPromise + downloadPublicSuffixListPromise ]).then(() => buildRejectDomainSet()); const buildTelegramCIDRPromise = downloadPreviousBuildPromise.then(() => buildTelegramCIDR()); const buildChnCidrPromise = downloadPreviousBuildPromise.then(() => buildChnCidr()); @@ -77,7 +71,6 @@ import { buildPublicHtml } from './build-public'; buildAntiBogusDomainPromise, buildAppleCdnPromise, buildCdnConfPromise, - buildPhilishingDomainsetPromise, buildRejectDomainSetPromise, buildTelegramCIDRPromise, buildChnCidrPromise, diff --git a/Build/lib/create-file.ts b/Build/lib/create-file.ts index ebe864b5..a0199022 100644 --- a/Build/lib/create-file.ts +++ b/Build/lib/create-file.ts @@ -1,6 +1,7 @@ // @ts-check -import { readFileByLine } from './fetch-remote-text-by-line'; +import { readFileByLine } from './fetch-text-by-line'; import { surgeDomainsetToClashDomainset, surgeRulesetToClashClassicalTextRuleset } from './clash'; +import { traceAsync } from './trace-runner'; export async function compareAndWriteFile(linesA: string[], filePath: string) { let isEqual = true; @@ -21,7 +22,7 @@ export async function compareAndWriteFile(linesA: string[], filePath: string) { const lineA = linesA[index]; index++; - if (typeof lineA !== 'string') { + if (lineA == null) { // The file becomes smaller isEqual = false; break; @@ -37,7 +38,7 @@ export async function compareAndWriteFile(linesA: string[], filePath: string) { } } - if (index !== linesALen) { + if (isEqual && index !== linesALen) { // The file becomes larger isEqual = false; } @@ -48,13 +49,11 @@ export async function compareAndWriteFile(linesA: string[], filePath: string) { return; } - console.log(`Writing ${filePath}...`); + await traceAsync(`Writing ${filePath}`, async () => { + if (linesALen < 10000) { + return Bun.write(file, `${linesA.join('\n')}\n`); + } - const start = Bun.nanoseconds(); - - if (linesALen < 10000) { - await Bun.write(file, `${linesA.join('\n')}\n`); - } else { const writer = file.writer(); for (let i = 0; i < linesALen; i++) { @@ -62,11 +61,9 @@ export async function compareAndWriteFile(linesA: string[], filePath: string) { writer.write('\n'); } - writer.flush(); - await writer.end(); - } - - console.log(`Done writing ${filePath} in ${(Bun.nanoseconds() - start) / 1e6}ms`); + await writer.flush(); + return writer.end(); + }); } export const withBannerArray = (title: string, description: string[], date: Date, content: string[]) => { diff --git a/Build/lib/fetch-retry.ts b/Build/lib/fetch-retry.ts index 8cb66166..296b51d8 100644 --- a/Build/lib/fetch-retry.ts +++ b/Build/lib/fetch-retry.ts @@ -83,6 +83,7 @@ function createFetchRetry($fetch: typeof fetch): typeof fetch { } catch (err: unknown) { if (err instanceof Error) { if (err.name === 'AbortError') { + console.log('[fetch abort]', url.toString()); return bail(err); } } diff --git a/Build/lib/fetch-remote-text-by-line.ts b/Build/lib/fetch-text-by-line.ts similarity index 96% rename from Build/lib/fetch-remote-text-by-line.ts rename to Build/lib/fetch-text-by-line.ts index 079fd794..c88cc9e0 100644 --- a/Build/lib/fetch-remote-text-by-line.ts +++ b/Build/lib/fetch-text-by-line.ts @@ -78,6 +78,6 @@ export async function *createReadlineInterfaceFromResponse(resp: Response): Asyn } } -export function fetchRemoteTextAndCreateReadlineInterface(url: string | URL) { +export function fetchRemoteTextAndReadByLine(url: string | URL) { return fetchWithRetry(url, defaultRequestInit).then(res => createReadlineInterfaceFromResponse(res as Response)); } diff --git a/Build/build-phishing-domainset.ts b/Build/lib/get-phishing-domains.ts similarity index 67% rename from Build/build-phishing-domainset.ts rename to Build/lib/get-phishing-domains.ts index f31f689e..1531b039 100644 --- a/Build/build-phishing-domainset.ts +++ b/Build/lib/get-phishing-domains.ts @@ -1,14 +1,12 @@ -import { processDomainLists, processHosts } from './lib/parse-filter'; +import fsp from 'fs/promises'; import path from 'path'; -import { createRuleset } from './lib/create-file'; -import { processLine } from './lib/process-line'; -import { createDomainSorter } from './lib/stable-sort-domain'; -import { traceSync, task } from './lib/trace-runner'; -import { createTrie } from './lib/trie'; -import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix'; -import { createCachedGorhillGetDomain } from './lib/cached-tld-parse'; +import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix'; +import { processHosts } from './parse-filter'; +import { traceAsync, traceSync } from './trace-runner'; import * as tldts from 'tldts'; -import { SHARED_DESCRIPTION } from './lib/constants'; +import { createTrie } from './trie'; +import { createCachedGorhillGetDomain } from './cached-tld-parse'; +import { processLine } from './process-line'; const WHITELIST_DOMAIN = new Set([ 'w3s.link', @@ -80,7 +78,7 @@ const BLACK_TLD = new Set([ 'za.com' ]); -export const buildPhishingDomainSet = task(import.meta.path, async () => { +export const getPhishingDomains = () => traceAsync('get phishing domains', async () => { const [domainSet, gorhill] = await Promise.all([ processHosts('https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true), // processDomainLists('https://phishing.army/download/phishing_army_blocklist.txt', true), @@ -92,25 +90,26 @@ export const buildPhishingDomainSet = task(import.meta.path, async () => { // // 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt' // ] // ), - getGorhillPublicSuffixPromise() + getGorhillPublicSuffixPromise(), + // Remove old files + fsp.rm(path.resolve(import.meta.dir, '../../List/domainset/reject_phishing.conf'), { force: true }), + fsp.rm(path.resolve(import.meta.dir, '../../Clash/domainset/reject_phishing.txt'), { force: true }) ]); - // _domainSet2.forEach(i => domainSet.add(i)); - - traceSync('* whitelist', () => { + traceSync.skip('* whitelisting phishing domains', () => { const trieForRemovingWhiteListed = createTrie(domainSet); WHITELIST_DOMAIN.forEach(white => { trieForRemovingWhiteListed.find(`.${white}`, false).forEach(f => domainSet.delete(f)); - if (trieForRemovingWhiteListed.has(white)) { - domainSet.delete(white); - } + // if (trieForRemovingWhiteListed.has(white)) { + domainSet.delete(white); + // } }); }); const domainCountMap: Record = {}; const getDomain = createCachedGorhillGetDomain(gorhill); - traceSync('* process domain set', () => { + traceSync.skip('* process phishing domain set', () => { const domainArr = Array.from(domainSet); for (let i = 0, len = domainArr.length; i < len; i++) { @@ -171,36 +170,9 @@ export const buildPhishingDomainSet = task(import.meta.path, async () => { } }); - const domainSorter = createDomainSorter(gorhill); + const results = traceSync.skip('* get final phishing results', () => Object.entries(domainCountMap) + .filter(([, count]) => count >= 5) + .map(([apexDomain]) => apexDomain)); - const results = traceSync('* get final results', () => Object.entries(domainCountMap) - .reduce((acc, [apexDomain, count]) => { - if (count >= 5) { - acc.push(`.${apexDomain}`); - } - return acc; - }, []) - .sort(domainSorter)); - - const description = [ - ...SHARED_DESCRIPTION, - '', - 'The domainset supports enhanced phishing protection', - 'Build from:', - ' - https://gitlab.com/malware-filter/phishing-filter' - ]; - - return Promise.all(createRuleset( - 'Sukka\'s Ruleset - Reject Phishing', - description, - new Date(), - results, - 'domainset', - path.resolve(import.meta.dir, '../List/domainset/reject_phishing.conf'), - path.resolve(import.meta.dir, '../Clash/domainset/reject_phishing.txt') - )); + return [results, domainSet] as const; }); - -if (import.meta.main) { - buildPhishingDomainSet(); -} diff --git a/Build/lib/parse-dnsmasq.ts b/Build/lib/parse-dnsmasq.ts index 18d772c6..2f711e87 100644 --- a/Build/lib/parse-dnsmasq.ts +++ b/Build/lib/parse-dnsmasq.ts @@ -1,4 +1,4 @@ -import { fetchRemoteTextAndCreateReadlineInterface } from './fetch-remote-text-by-line'; +import { fetchRemoteTextAndReadByLine } from './fetch-text-by-line'; import { parse } from 'tldts'; const isDomainLoose = (domain: string): boolean => { @@ -8,7 +8,7 @@ const isDomainLoose = (domain: string): boolean => { export const parseFelixDnsmasq = async (url: string | URL): Promise => { const res: string[] = []; - for await (const line of await fetchRemoteTextAndCreateReadlineInterface(url)) { + for await (const line of await fetchRemoteTextAndReadByLine(url)) { if (line.startsWith('server=/') && line.endsWith('/114.114.114.114')) { const domain = line.replace('server=/', '').replace('/114.114.114.114', ''); if (isDomainLoose(domain)) { diff --git a/Build/lib/parse-filter.ts b/Build/lib/parse-filter.ts index d5b3afec..ecc1f11b 100644 --- a/Build/lib/parse-filter.ts +++ b/Build/lib/parse-filter.ts @@ -1,12 +1,13 @@ // @ts-check import { defaultRequestInit, fetchWithRetry } from './fetch-retry'; import * as tldts from './cached-tld-parse'; -import { fetchRemoteTextAndCreateReadlineInterface } from './fetch-remote-text-by-line'; +import { fetchRemoteTextAndReadByLine } from './fetch-text-by-line'; import { NetworkFilter } from '@cliqz/adblocker'; import { processLine } from './process-line'; import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix'; import type { PublicSuffixList } from 'gorhill-publicsuffixlist'; import { isProbablyIpv4 } from './is-fast-ip'; +import { traceAsync } from './trace-runner'; const DEBUG_DOMAIN_TO_FIND: string | null = null; // example.com | null let foundDebugDomain = false; @@ -42,7 +43,7 @@ export async function processDomainLists(domainListsUrl: string | URL, includeAl const domainSets = new Set(); - for await (const line of await fetchRemoteTextAndCreateReadlineInterface(domainListsUrl)) { + for await (const line of await fetchRemoteTextAndReadByLine(domainListsUrl)) { const domainToAdd = processLine(line); if (!domainToAdd) { continue; @@ -64,145 +65,134 @@ export async function processDomainLists(domainListsUrl: string | URL, includeAl } export async function processHosts(hostsUrl: string | URL, includeAllSubDomain = false, skipDomainCheck = false) { - console.time(`- processHosts: ${hostsUrl.toString()}`); - - if (typeof hostsUrl === 'string') { - hostsUrl = new URL(hostsUrl); - } - - const domainSets = new Set(); - - for await (const l of await fetchRemoteTextAndCreateReadlineInterface(hostsUrl)) { - const line = processLine(l); - if (!line) { - continue; + return traceAsync(`- processHosts: ${hostsUrl.toString()}`, async () => { + if (typeof hostsUrl === 'string') { + hostsUrl = new URL(hostsUrl); } - const [, ...domains] = line.split(' '); - const _domain = domains.join(' ').trim(); + const domainSets = new Set(); - if (DEBUG_DOMAIN_TO_FIND && _domain.includes(DEBUG_DOMAIN_TO_FIND)) { - warnOnce(hostsUrl.href, false, DEBUG_DOMAIN_TO_FIND); - foundDebugDomain = true; - } + for await (const l of await fetchRemoteTextAndReadByLine(hostsUrl)) { + const line = processLine(l); + if (!line) { + continue; + } - const domain = skipDomainCheck ? _domain : normalizeDomain(_domain); + const [, ...domains] = line.split(' '); + const _domain = domains.join(' ').trim(); - if (domain) { - if (includeAllSubDomain) { - domainSets.add(`.${domain}`); - } else { - domainSets.add(domain); + if (DEBUG_DOMAIN_TO_FIND && _domain.includes(DEBUG_DOMAIN_TO_FIND)) { + warnOnce(hostsUrl.href, false, DEBUG_DOMAIN_TO_FIND); + foundDebugDomain = true; + } + + const domain = skipDomainCheck ? _domain : normalizeDomain(_domain); + + if (domain) { + if (includeAllSubDomain) { + domainSets.add(`.${domain}`); + } else { + domainSets.add(domain); + } } } - } - console.timeEnd(` - processHosts: ${hostsUrl.toString()}`); - - return domainSets; + return domainSets; + }); } export async function processFilterRules( filterRulesUrl: string | URL, fallbackUrls?: ReadonlyArray | undefined ): Promise<{ white: Set, black: Set, foundDebugDomain: boolean }> { - const runStart = Bun.nanoseconds(); - const whitelistDomainSets = new Set(); const blacklistDomainSets = new Set(); - let downloadTime = 0; - const gorhill = await getGorhillPublicSuffixPromise(); + await traceAsync(`- processFilterRules: ${filterRulesUrl.toString()}`, async () => { + const gorhill = await getGorhillPublicSuffixPromise(); - /** - * @param {string} line - */ - const lineCb = (line: string) => { - const result = parse(line, gorhill); - if (!result) { - return; - } + /** + * @param {string} line + */ + const lineCb = (line: string) => { + const result = parse(line, gorhill); + if (!result) { + return; + } - const flag = result[1]; - const hostname = result[0]; + const flag = result[1]; + const hostname = result[0]; - if (DEBUG_DOMAIN_TO_FIND) { - if (hostname.includes(DEBUG_DOMAIN_TO_FIND)) { - warnOnce(filterRulesUrl.toString(), flag === 0 || flag === -1, DEBUG_DOMAIN_TO_FIND); - foundDebugDomain = true; + if (DEBUG_DOMAIN_TO_FIND) { + if (hostname.includes(DEBUG_DOMAIN_TO_FIND)) { + warnOnce(filterRulesUrl.toString(), flag === 0 || flag === -1, DEBUG_DOMAIN_TO_FIND); + foundDebugDomain = true; - console.log({ result, flag }); + console.log({ result, flag }); + } + } + + switch (flag) { + case 0: + if (hostname[0] !== '.') { + whitelistDomainSets.add(`.${hostname}`); + } else { + whitelistDomainSets.add(hostname); + } + break; + case -1: + whitelistDomainSets.add(hostname); + break; + case 1: + blacklistDomainSets.add(hostname); + break; + case 2: + if (hostname[0] !== '.') { + blacklistDomainSets.add(`.${hostname}`); + } else { + blacklistDomainSets.add(hostname); + } + break; + default: + throw new Error(`Unknown flag: ${flag as any}`); + } + }; + + if (!fallbackUrls || fallbackUrls.length === 0) { + for await (const line of await fetchRemoteTextAndReadByLine(filterRulesUrl)) { + // don't trim here + lineCb(line); + } + } else { + let filterRules; + + try { + const controller = new AbortController(); + + /** @type string[] */ + filterRules = ( + await Promise.any( + [filterRulesUrl, ...fallbackUrls].map(async url => { + const r = await fetchWithRetry(url, { signal: controller.signal, ...defaultRequestInit }); + const text = await r.text(); + + console.log('[fetch finish]', url.toString()); + + controller.abort(); + return text; + }) + ) + ).split('\n'); + } catch (e) { + console.log(`Download Rule for [${filterRulesUrl.toString()}] failed`); + throw e; + } + + for (let i = 0, len = filterRules.length; i < len; i++) { + lineCb(filterRules[i]); } } - - switch (flag) { - case 0: - if (hostname[0] !== '.') { - whitelistDomainSets.add(`.${hostname}`); - } else { - whitelistDomainSets.add(hostname); - } - break; - case -1: - whitelistDomainSets.add(hostname); - break; - case 1: - blacklistDomainSets.add(hostname); - break; - case 2: - if (hostname[0] !== '.') { - blacklistDomainSets.add(`.${hostname}`); - } else { - blacklistDomainSets.add(hostname); - } - break; - default: - throw new Error(`Unknown flag: ${flag as any}`); - } - }; - - if (!fallbackUrls || fallbackUrls.length === 0) { - downloadTime = 0; - let last = Bun.nanoseconds(); - for await (const line of await fetchRemoteTextAndCreateReadlineInterface(filterRulesUrl)) { - const now = Bun.nanoseconds(); - downloadTime += Bun.nanoseconds() - last; - last = now; - // don't trim here - lineCb(line); - } - } else { - let filterRules; - - const downloadStart = Bun.nanoseconds(); - try { - const controller = new AbortController(); - - /** @type string[] */ - filterRules = ( - await Promise.any( - [filterRulesUrl, ...fallbackUrls].map(async url => { - const r = await fetchWithRetry(url, { signal: controller.signal, ...defaultRequestInit }); - const text = await r.text(); - - controller.abort(); - return text; - }) - ) - ).split('\n'); - } catch (e) { - console.log(`Download Rule for [${filterRulesUrl.toString()}] failed`); - throw e; - } - downloadTime = Bun.nanoseconds() - downloadStart; - - for (let i = 0, len = filterRules.length; i < len; i++) { - lineCb(filterRules[i]); - } - } - - console.log(` ┬ processFilterRules (${filterRulesUrl.toString()}): ${((Bun.nanoseconds() - runStart) / 1e6).toFixed(3)}ms`); - console.log(` └── download time: ${(downloadTime / 1e6).toFixed(3)}ms`); + }); return { white: whitelistDomainSets, diff --git a/Build/lib/reject-data-source.ts b/Build/lib/reject-data-source.ts index c96cfdb6..9742735d 100644 --- a/Build/lib/reject-data-source.ts +++ b/Build/lib/reject-data-source.ts @@ -11,12 +11,13 @@ export const HOSTS = [ // Curben's UrlHaus Malicious URL Blocklist // 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt', // 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt', - // 'https://ublockorigin.github.io/uAssets/thirdparties/urlhaus-filter/urlhaus-filter-online.txt', + // 'https://ublockorigin.github.io/uAssetsCDN/thirdparties/urlhaus-filter/urlhaus-filter-online.txt', ['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, true], // Curben's Phishing URL Blocklist + // Covered by lib/get-phishing-domains.ts // 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt' // 'https://phishing-filter.pages.dev/phishing-filter-agh.txt' - ['https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true], + // ['https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true], // Curben's PUP Domains Blocklist // 'https://curbengh.github.io/pup-filter/pup-filter-agh.txt' // 'https://pup-filter.pages.dev/pup-filter-agh.txt' @@ -33,7 +34,7 @@ export const ADGUARD_FILTERS = [ 'https://easylist-downloads.adblockplus.org/easylist.txt', 'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easylist.txt', 'https://secure.fanboy.co.nz/easylist.txt', - 'https://ublockorigin.github.io/uAssets/thirdparties/easylist.txt', + 'https://ublockorigin.github.io/uAssetsCDN/thirdparties/easylist.txt', 'https://ublockorigin.pages.dev/thirdparties/easylist.txt' ] ], @@ -44,7 +45,7 @@ export const ADGUARD_FILTERS = [ 'https://secure.fanboy.co.nz/easyprivacy.txt', 'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easyprivacy.txt', 'https://easylist-downloads.adblockplus.org/easyprivacy.txt', - 'https://ublockorigin.github.io/uAssets/thirdparties/easyprivacy.txt', + 'https://ublockorigin.github.io/uAssetsCDN/thirdparties/easyprivacy.txt', 'https://ublockorigin.pages.dev/thirdparties/easyprivacy.txt' ] ], @@ -52,7 +53,7 @@ export const ADGUARD_FILTERS = [ [ 'https://adguardteam.github.io/AdGuardSDNSFilter/Filters/filter.txt', [ - 'https://filters.adtidy.org/extension/chromium/filters/15.txt' + 'https://filters.adtidy.org/extension/ublock/filters/15.txt' ] ], // AdGuard CNAME Filter Combined @@ -63,41 +64,36 @@ export const ADGUARD_FILTERS = [ 'https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_mail_trackers.txt', // uBlock Origin Filter List [ - 'https://ublockorigin.github.io/uAssets/filters/filters.min.txt', + 'https://ublockorigin.github.io/uAssetsCDN/filters/filters.min.txt', [ - 'https://ublockorigin.github.io/uAssetsCDN/filters/filters.min.txt', 'https://ublockorigin.pages.dev/filters/filters.min.txt' ] ], // uBlock Origin Badware Risk List [ - 'https://ublockorigin.github.io/uAssets/filters/badware.min.txt', + 'https://ublockorigin.github.io/uAssetsCDN/filters/badware.min.txt', [ - 'https://ublockorigin.github.io/uAssetsCDN/filters/badware.min.txt', 'https://ublockorigin.pages.dev/filters/badware.min.txt' ] ], // uBlock Origin Privacy List [ - 'https://ublockorigin.github.io/uAssets/filters/privacy.min.txt', + 'https://ublockorigin.github.io/uAssetsCDN/filters/privacy.min.txt', [ - 'https://ublockorigin.github.io/uAssetsCDN/filters/privacy.min.txt', 'https://ublockorigin.pages.dev/filters/privacy.min.txt' ] ], // uBlock Origin Resource Abuse: merged in uBlock Origin Privacy List // [ - // 'https://ublockorigin.github.io/uAssets/filters/resource-abuse.txt', + // 'https://ublockorigin.github.io/uAssetsCDN/filters/resource-abuse.txt', // [ - // 'https://ublockorigin.github.io/uAssetsCDN/filters/resource-abuse.txt', // 'https://ublockorigin.pages.dev/filters/resource-abuse.txt' // ] // ], // uBlock Origin Unbreak [ - 'https://ublockorigin.github.io/uAssets/filters/unbreak.min.txt', + 'https://ublockorigin.github.io/uAssetsCDN/filters/unbreak.min.txt', [ - 'https://ublockorigin.github.io/uAssetsCDN/filters/unbreak.min.txt', 'https://ublockorigin.pages.dev/filters/unbreak.min.txt' ] ], diff --git a/Build/lib/stable-sort-domain.test.ts b/Build/lib/stable-sort-domain.test.ts index de3a79c7..ca18b30c 100644 --- a/Build/lib/stable-sort-domain.test.ts +++ b/Build/lib/stable-sort-domain.test.ts @@ -6,4 +6,8 @@ describe('stable-sort-domain', () => { it('.ks.cn, .tag.unclaimedproperty.ks.gov', () => { expect(domainSorter('.ks.cn', '.tag.unclaimedproperty.ks.gov')).toBe(-1); }); + + it('.fgnzdb.xyz, .hub.fghtem.com', () => { + expect(domainSorter('.fgnzdb.xyz', '.hub.fghtem.com')).toBe(1); + }); }); diff --git a/Build/lib/trace-runner.ts b/Build/lib/trace-runner.ts index 860c73bc..edbbb124 100644 --- a/Build/lib/trace-runner.ts +++ b/Build/lib/trace-runner.ts @@ -1,19 +1,21 @@ import path from 'path'; +import picocolors from 'picocolors'; -const traceSync = (prefix: string, fn: () => T): T => { +function traceSync(prefix: string, fn: () => T): T { const start = Bun.nanoseconds(); const result = fn(); const end = Bun.nanoseconds(); - console.log(`${prefix}: ${((end - start) / 1e6).toFixed(3)}ms`); + console.log(`${picocolors.gray(`[${((end - start) / 1e6).toFixed(3)}ms]`)} ${prefix}`); return result; -}; +} +traceSync.skip = (prefix: string, fn: () => T): T => fn(); export { traceSync }; const traceAsync = async (prefix: string, fn: () => Promise): Promise => { const start = Bun.nanoseconds(); const result = await fn(); const end = Bun.nanoseconds(); - console.log(`${prefix}: ${((end - start) / 1e6).toFixed(3)}ms`); + console.log(`${picocolors.gray(`[${((end - start) / 1e6).toFixed(3)}ms]`)} ${prefix}`); return result; }; export { traceAsync }; @@ -31,7 +33,7 @@ const task = (importMetaPath: string, fn: () => Promise, customname: strin const start = Bun.nanoseconds(); await fn(); const end = Bun.nanoseconds(); - console.log(`✅ [${taskName}] Executed successfully: ${((end - start) / 1e6).toFixed(3)}ms`); + console.log(`✅ [${taskName}] [${((end - start) / 1e6).toFixed(3)}ms] Executed successfully`); return { start, end, taskName } as TaskResult; }; diff --git a/Build/validate-domainset.ts b/Build/validate-domainset.ts index b3dbadd4..df81cd7c 100644 --- a/Build/validate-domainset.ts +++ b/Build/validate-domainset.ts @@ -4,7 +4,7 @@ import * as tldts from 'tldts'; // hit ratio way too low, dont cache import picocolors from 'picocolors'; import path from 'path'; import listDir from '@sukka/listdir'; -import { readFileByLine } from './lib/fetch-remote-text-by-line'; +import { readFileByLine } from './lib/fetch-text-by-line'; import { processLine } from './lib/process-line'; import { task } from './lib/trace-runner'; diff --git a/README.md b/README.md index b3560855..0f247c3f 100644 --- a/README.md +++ b/README.md @@ -19,16 +19,17 @@ #### 广告拦截 / 隐私保护 / Malware 拦截 / Phiishing 拦截 - 自动生成 -- 数据来源、白名单域名列表和生成方式,请参考 [`build-reject-domainset.js`](Build/build-reject-domainset.js) +- 数据来源、白名单域名列表和生成方式,请参考 [`build-reject-domainset.ts`](Build/build-reject-domainset.ts) - 仅建议在 Surge for Mac 上使用,移动平台请使用专门的工具(如 ADGuard for Android/iOS)以获得更好的性能 - 不能替代浏览器广告屏蔽扩展(如 uBlock Origin) **Surge** ```ini -RULE-SET,https://ruleset.skk.moe/List/non_ip/reject.conf,REJECT +# Non IP DOMAIN-SET,https://ruleset.skk.moe/List/domainset/reject.conf,REJECT-TINYGIF -DOMAIN-SET,https://ruleset.skk.moe/List/domainset/reject_phishing.conf,REJECT +RULE-SET,https://ruleset.skk.moe/List/non_ip/reject.conf,REJECT +# IP RULE-SET,https://ruleset.skk.moe/List/ip/reject.conf,REJECT-DROP ``` @@ -51,13 +52,6 @@ rule-providers: interval: 43200 url: https://ruleset.skk.moe/Clash/domainset/reject.txt path: ./sukkaw_ruleset/reject_domainset.txt - reject_phishing_domainset: - type: http - behavior: domain - format: text - interval: 43200 - url: https://ruleset.skk.moe/Clash/domainset/reject_phishing.txt - path: ./sukkaw_ruleset/reject_phishing_domainset.txt reject_ip: type: http behavior: classical @@ -70,7 +64,6 @@ rules: - RULE-SET,reject_non_ip,REJECT # WARNING! Using reject_domainset can cause Clash out of memory due to the insufficient Clash implementation. - RULE-SET,reject_domainset,REJECT - - RULE-SET,reject_phishing_domainset,REJECT - RULE-SET,reject_ip,REJECT ``` diff --git a/patches/@vercel__fetch-retry@5.1.3.patch b/patches/@vercel__fetch-retry@5.1.3.patch deleted file mode 100644 index 3f3af7c0..00000000 --- a/patches/@vercel__fetch-retry@5.1.3.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/index.js b/index.js -index e5eeb353f6da8968d926cb4e502207912b6c3594..eb1370dc19030404e3def46ce13938d34a9e6214 100644 ---- a/index.js -+++ b/index.js -@@ -66,7 +66,7 @@ function setup(fetch) { - return res; - } - } catch (err) { -- if (err.type === 'aborted') { -+ if (err.type === 'aborted' || err.name === 'AbortError') { - return bail(err); - } - const clientError = isClientError(err); \ No newline at end of file