diff --git a/Build/build-internal-cdn-rules.ts b/Build/build-internal-cdn-rules.ts index 8e783aae..8ac268f5 100644 --- a/Build/build-internal-cdn-rules.ts +++ b/Build/build-internal-cdn-rules.ts @@ -1,38 +1,18 @@ import path from 'path'; -import * as tldts from 'tldts'; import { processLine } from './lib/process-line'; import { readFileByLine } from './lib/fetch-text-by-line'; import { sortDomains } from './lib/stable-sort-domain'; import { task } from './lib/trace-runner'; import { compareAndWriteFile } from './lib/create-file'; import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix'; -// const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse'); const escapeRegExp = (string = '') => string.replaceAll(/[$()*+.?[\\\]^{|}]/g, '\\$&'); -const addApexDomain = (input: string, set: Set) => { - // We are including the private domains themselves - const d = tldts.getDomain(input, { allowPrivateDomains: false }); - if (d) { - set.add(d); - } -}; - const processLocalDomainSet = async (domainSetPath: string, set: Set) => { - for await (const line of readFileByLine(domainSetPath)) { - // console.log({ line }); - - const parsed = tldts.parse(line, { allowPrivateDomains: true, detectIp: false }); - if (parsed.isIp) continue; - if (parsed.isIcann || parsed.isPrivate) { - if (parsed.domain) { - set.add(parsed.domain); - } - continue; - } - - if (processLine(line)) { - console.warn('[drop line from domainset]', line); + for await (const l of readFileByLine(domainSetPath)) { + const line = processLine(l); + if (line) { + set.add(line[0] === '.' ? line.slice(1) : line); } } }; @@ -40,12 +20,12 @@ const processLocalDomainSet = async (domainSetPath: string, set: Set) => const processLocalRuleSet = async (ruleSetPath: string, set: Set, keywords: Set) => { for await (const line of readFileByLine(ruleSetPath)) { if (line.startsWith('DOMAIN-SUFFIX,')) { - addApexDomain(line.replace('DOMAIN-SUFFIX,', ''), set); + set.add(line.replace('DOMAIN-SUFFIX,', '')); } else if (line.startsWith('DOMAIN,')) { - addApexDomain(line.replace('DOMAIN,', ''), set); + set.add(line.replace('DOMAIN,', '')); } else if (line.startsWith('DOMAIN-KEYWORD')) { keywords.add(escapeRegExp(line.replace('DOMAIN-KEYWORD,', ''))); - } else if (line.startsWith('USER-AGENT,') || line.startsWith('PROCESS-NAME,') || line.startsWith('URL-REGEX,')) { + } else if (line.includes('USER-AGENT,') || line.includes('PROCESS-NAME,') || line.includes('URL-REGEX,')) { // do nothing } else if (processLine(line)) { console.warn('[drop line from ruleset]', line); diff --git a/Build/build-reject-domainset.ts b/Build/build-reject-domainset.ts index 8853e89c..428f9b56 100644 --- a/Build/build-reject-domainset.ts +++ b/Build/build-reject-domainset.ts @@ -23,8 +23,6 @@ import { setAddFromArray } from './lib/set-add-from-array'; export const buildRejectDomainSet = task(import.meta.path, async () => { /** Whitelists */ const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST); - const domainKeywordsSet = new Set(); - const domainSuffixSet = new Set(); const domainSets = new Set(); @@ -95,21 +93,23 @@ export const buildRejectDomainSet = task(import.meta.path, async () => { let previousSize = domainSets.size; console.log(`Import ${previousSize} rules from Hosts / AdBlock Filter Rules & reject_sukka.conf!`); - for await (const line of readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/reject.conf'))) { - const [type, keyword] = line.split(','); - - if (type === 'DOMAIN-KEYWORD') { - domainKeywordsSet.add(keyword.trim()); - } else if (type === 'DOMAIN-SUFFIX') { - domainSuffixSet.add(keyword.trim()); - } - } - - console.log(`Import ${domainKeywordsSet.size} black keywords and ${domainSuffixSet.size} black suffixes!`); - - previousSize = domainSets.size; // Dedupe domainSets - traceSync('* Dedupe from black keywords/suffixes', () => { + await traceAsync('* Dedupe from black keywords/suffixes', async () => { + /** Collect DOMAIN-SUFFIX from non_ip/reject.conf for deduplication */ + const domainSuffixSet = new Set(); + /** Collect DOMAIN-KEYWORD from non_ip/reject.conf for deduplication */ + const domainKeywordsSet = new Set(); + + for await (const line of readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/reject.conf'))) { + const [type, keyword] = line.split(','); + + if (type === 'DOMAIN-KEYWORD') { + domainKeywordsSet.add(keyword.trim()); + } else if (type === 'DOMAIN-SUFFIX') { + domainSuffixSet.add(keyword.trim()); + } + } + const trie1 = createTrie(domainSets); domainSuffixSet.forEach(suffix => { diff --git a/Build/build-speedtest-domainset.ts b/Build/build-speedtest-domainset.ts index 2f4a5f67..fdb2aecf 100644 --- a/Build/build-speedtest-domainset.ts +++ b/Build/build-speedtest-domainset.ts @@ -24,7 +24,7 @@ const latestTopUserAgentsPromise = fsCache.apply( { serializer: serializeArray, deserializer: deserializeArray, - ttl: TTL.ONE_DAY() + ttl: TTL.THREE_DAYS() } ); diff --git a/Build/download-previous-build.ts b/Build/download-previous-build.ts index b56a223b..4bf39473 100644 --- a/Build/download-previous-build.ts +++ b/Build/download-previous-build.ts @@ -1,7 +1,6 @@ -import fs from 'fs'; -import fsp from 'fs/promises'; +import { existsSync, createWriteStream } from 'fs'; +import { mkdir } from 'fs/promises'; import path from 'path'; -import { Readable } from 'stream'; import { pipeline } from 'stream/promises'; import { readFileByLine } from './lib/fetch-text-by-line'; import { isCI } from 'ci-info'; @@ -9,6 +8,7 @@ import { task } from './lib/trace-runner'; import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry'; import tarStream from 'tar-stream'; import zlib from 'zlib'; +import { Readable } from 'stream'; const IS_READING_BUILD_OUTPUT = 1 << 2; const ALL_FILES_EXISTS = 1 << 3; @@ -31,7 +31,7 @@ export const downloadPreviousBuild = task(import.meta.path, async () => { if (!isCI) { // Bun.file().exists() doesn't check directory - if (!fs.existsSync(path.join(import.meta.dir, '..', line))) { + if (!existsSync(path.join(import.meta.dir, '..', line))) { flag = flag & ~ALL_FILES_EXISTS; } } @@ -57,7 +57,7 @@ export const downloadPreviousBuild = task(import.meta.path, async () => { const extract = tarStream.extract(); const gunzip = zlib.createGunzip(); pipeline( - resp.body as any, + Readable.fromWeb(resp.body) as any, gunzip, extract ); @@ -78,10 +78,10 @@ export const downloadPreviousBuild = task(import.meta.path, async () => { const relativeEntryPath = entry.header.name.replace(pathPrefix, ''); const targetPath = path.join(import.meta.dir, '..', relativeEntryPath); - await fsp.mkdir(path.dirname(targetPath), { recursive: true }); + await mkdir(path.dirname(targetPath), { recursive: true }); await pipeline( entry as any, - fs.createWriteStream(targetPath) + createWriteStream(targetPath) ); } }); diff --git a/Build/download-publicsuffixlist.ts b/Build/download-publicsuffixlist.ts index 150189fb..7a1c9446 100644 --- a/Build/download-publicsuffixlist.ts +++ b/Build/download-publicsuffixlist.ts @@ -3,18 +3,16 @@ import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry'; import { createMemoizedPromise } from './lib/memo-promise'; import { traceAsync } from './lib/trace-runner'; -export const getPublicSuffixListTextPromise = createMemoizedPromise( - () => traceAsync( - 'obtain public_suffix_list', - () => fsCache.apply( - 'https://publicsuffix.org/list/public_suffix_list.dat', - () => fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat', defaultRequestInit).then(r => r.text()), - { - // https://github.com/publicsuffix/list/blob/master/.github/workflows/tld-update.yml - // Though the action runs every 24 hours, the IANA list is updated every 7 days. - // So a 3 day TTL should be enough. - ttl: TTL.THREE_DAYS() - } - ) +export const getPublicSuffixListTextPromise = createMemoizedPromise(() => traceAsync( + 'obtain public_suffix_list', + () => fsCache.apply( + 'https://publicsuffix.org/list/public_suffix_list.dat', + () => fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat', defaultRequestInit).then(r => r.text()), + { + // https://github.com/publicsuffix/list/blob/master/.github/workflows/tld-update.yml + // Though the action runs every 24 hours, the IANA list is updated every 7 days. + // So a 3 day TTL should be enough. + ttl: TTL.THREE_DAYS() + } ) -); +)); diff --git a/Build/lib/cache-filesystem.ts b/Build/lib/cache-filesystem.ts index bed3cbd8..fb0b02a1 100644 --- a/Build/lib/cache-filesystem.ts +++ b/Build/lib/cache-filesystem.ts @@ -2,8 +2,9 @@ import { Database } from 'bun:sqlite'; import os from 'os'; import path from 'path'; -import fs from 'fs'; +import { mkdirSync } from 'fs'; import picocolors from 'picocolors'; +import { traceSync } from './trace-runner'; const identity = (x: any) => x; @@ -64,7 +65,7 @@ export class Cache { constructor({ cachePath = path.join(os.tmpdir() || '/tmp', 'hdc'), tbd }: CacheOptions = {}) { this.cachePath = cachePath; - fs.mkdirSync(this.cachePath, { recursive: true }); + mkdirSync(this.cachePath, { recursive: true }); if (tbd != null) this.tbd = tbd; const db = new Database(path.join(this.cachePath, 'cache.db')); @@ -151,7 +152,7 @@ export class Cache { } } -export const fsCache = new Cache({ cachePath: path.resolve(import.meta.dir, '../../.cache') }); +export const fsCache = traceSync('initializing filesystem cache', () => new Cache({ cachePath: path.resolve(import.meta.dir, '../../.cache') })); // process.on('exit', () => { // fsCache.destroy(); // }); diff --git a/Build/lib/cached-tld-parse.ts b/Build/lib/cached-tld-parse.ts index 27b2892b..ef3ac47a 100644 --- a/Build/lib/cached-tld-parse.ts +++ b/Build/lib/cached-tld-parse.ts @@ -12,6 +12,7 @@ const sharedConfig2 = { allowPrivateDomains: true, detectIp: false }; export const parse = (domain: string) => cache.sync(domain, () => tldts.parse(domain, sharedConfig)); /** { allowPrivateDomains: true, detectIp: false } */ export const parse2 = (domain: string) => cache2.sync(domain, () => tldts.parse(domain, sharedConfig2)); +export const parseWithoutDetectIp = parse2; let gothillGetDomainCache: ReturnType | null = null; export const createCachedGorhillGetDomain = (gorhill: PublicSuffixList) => { diff --git a/Build/lib/create-file.ts b/Build/lib/create-file.ts index eec909ea..6fd14e14 100644 --- a/Build/lib/create-file.ts +++ b/Build/lib/create-file.ts @@ -18,7 +18,7 @@ export async function compareAndWriteFile(linesA: string[], filePath: string) { isEqual = false; } else { isEqual = await traceAsync( - picocolors.gray(`Comparing ${filePath}`), + picocolors.gray(`comparing ${filePath}`), async () => { let index = 0; @@ -62,11 +62,11 @@ export async function compareAndWriteFile(linesA: string[], filePath: string) { } if (isEqual) { - console.log(picocolors.gray(`Same Content, bail out writing: ${filePath}`)); + console.log(picocolors.dim(`same content, bail out writing: ${filePath}`)); return; } - await traceAsync(picocolors.gray(`Writing ${filePath}`), async () => { + await traceAsync(picocolors.gray(`writing ${filePath}`), async () => { if (linesALen < 10000) { return Bun.write(file, `${linesA.join('\n')}\n`); } diff --git a/Build/lib/fetch-text-by-line.ts b/Build/lib/fetch-text-by-line.ts index 48dae086..093eb3e6 100644 --- a/Build/lib/fetch-text-by-line.ts +++ b/Build/lib/fetch-text-by-line.ts @@ -3,34 +3,34 @@ import { fetchWithRetry, defaultRequestInit } from './fetch-retry'; import { TextLineStream } from './text-line-transform-stream'; import { PolyfillTextDecoderStream } from './text-decoder-stream'; -function createTextLineStreamFromStreamSource(stream: ReadableStream) { - return stream - .pipeThrough(new PolyfillTextDecoderStream()) - .pipeThrough(new TextLineStream()); -} - -// const decoder = new TextDecoder('utf-8'); -// async function *createTextLineAsyncGeneratorFromStreamSource(stream: ReadableStream): AsyncGenerator { -// let buf = ''; - -// for await (const chunk of stream) { -// const chunkStr = decoder.decode(chunk).replaceAll('\r\n', '\n'); -// for (let i = 0, len = chunkStr.length; i < len; i++) { -// const char = chunkStr[i]; -// if (char === '\n') { -// yield buf; -// buf = ''; -// } else { -// buf += char; -// } -// } -// } - -// if (buf) { -// yield buf; -// } +// function createTextLineStreamFromStreamSource(stream: ReadableStream) { +// return stream +// .pipeThrough(new PolyfillTextDecoderStream()) +// .pipeThrough(new TextLineStream()); // } +const decoder = new TextDecoder('utf-8'); +async function *createTextLineAsyncGeneratorFromStreamSource(stream: ReadableStream): AsyncGenerator { + let buf = ''; + + for await (const chunk of stream) { + const chunkStr = decoder.decode(chunk).replaceAll('\r\n', '\n'); + for (let i = 0, len = chunkStr.length; i < len; i++) { + const char = chunkStr[i]; + if (char === '\n') { + yield buf; + buf = ''; + } else { + buf += char; + } + } + } + + if (buf) { + yield buf; + } +} + export function readFileByLine(file: string | URL | BunFile) { if (typeof file === 'string') { file = Bun.file(file); @@ -38,7 +38,7 @@ export function readFileByLine(file: string | URL | BunFile) { file = Bun.file(file); } - return createTextLineStreamFromStreamSource(file.stream()); + return createTextLineAsyncGeneratorFromStreamSource(file.stream()); } export function createReadlineInterfaceFromResponse(resp: Response) { @@ -49,7 +49,7 @@ export function createReadlineInterfaceFromResponse(resp: Response) { throw new Error('Body has already been consumed.'); } - return createTextLineStreamFromStreamSource(resp.body); + return createTextLineAsyncGeneratorFromStreamSource(resp.body); } export function fetchRemoteTextByLine(url: string | URL) { diff --git a/Build/lib/parse-filter.ts b/Build/lib/parse-filter.ts index c352c897..1c915334 100644 --- a/Build/lib/parse-filter.ts +++ b/Build/lib/parse-filter.ts @@ -56,23 +56,20 @@ export function processHosts(hostsUrl: string, includeAllSubDomain = false, ttl: continue; } - const domain = line.split(/\s/)[1]; + const _domain = line.split(/\s/)[1]?.trim(); + if (!_domain) { + continue; + } + const domain = normalizeDomain(_domain); if (!domain) { continue; } - const _domain = domain.trim(); - - if (DEBUG_DOMAIN_TO_FIND && _domain.includes(DEBUG_DOMAIN_TO_FIND)) { - console.warn(picocolors.red(hostsUrl), '(black)', _domain.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND))); + if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) { + console.warn(picocolors.red(hostsUrl), '(black)', domain.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND))); foundDebugDomain = true; } - const domainToAdd = normalizeDomain(_domain); - if (!domainToAdd) { - continue; - } - - domainSets.add(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd); + domainSets.add(includeAllSubDomain ? `.${domain}` : domain); } console.log(picocolors.gray('[process hosts]'), picocolors.gray(hostsUrl), picocolors.gray(domainSets.size)); @@ -102,11 +99,11 @@ export async function processFilterRules( fallbackUrls?: readonly string[] | undefined | null, ttl: number | null = null ): Promise<{ white: string[], black: string[], foundDebugDomain: boolean }> { - const [white, black, warningMessages] = await traceAsync(`- processFilterRules: ${filterRulesUrl}`, () => fsCache.apply<[ + const [white, black, warningMessages] = await traceAsync(`- processFilterRules: ${filterRulesUrl}`, () => fsCache.apply( + ]>>( filterRulesUrl, async () => { const whitelistDomainSets = new Set(); diff --git a/Build/lib/reject-data-source.ts b/Build/lib/reject-data-source.ts index 933a4f01..ec4767f2 100644 --- a/Build/lib/reject-data-source.ts +++ b/Build/lib/reject-data-source.ts @@ -4,7 +4,7 @@ export const HOSTS = [ ['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true, TTL.THREE_HOURS()], ['https://someonewhocares.org/hosts/hosts', true, TTL.THREE_HOURS()], // no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl - ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', false, TTL.THREE_DAYS()], + ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', true, TTL.THREE_DAYS()], // have not been updated for more than a year, so we set a 14 days cache ttl ['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', true, TTL.TWO_WEEKS()], ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', false, TTL.THREE_DAYS()], diff --git a/Source/non_ip/domestic.js b/Source/non_ip/domestic.js index 5117417a..5da7dd01 100644 --- a/Source/non_ip/domestic.js +++ b/Source/non_ip/domestic.js @@ -12,6 +12,7 @@ module.exports.DOMESTICS = /** @type {const} */({ 'aliyun.com', 'aliyuncs.com', 'alikunlun.com', + 'cdngslb.com', 'alipay.com', 'alipay.cn', 'alipay.com.cn', @@ -130,6 +131,18 @@ module.exports.DOMESTICS = /** @type {const} */({ 'bilibilipay.com' ] }, + BILIBILI_ALI: { + dns: 'quic://223.5.5.5:853', + domains: [ + 'upos-sz-mirrorali.bilivideo.com' + ] + }, + BILIBILI_BD: { + dns: '180.76.76.76', + domains: [ + 'upos-sz-mirrorbos.bilivideo.com' + ] + }, XIAOMI: { dns: 'https://120.53.53.53/dns-query', domains: [ @@ -155,6 +168,8 @@ module.exports.DOMESTICS = /** @type {const} */({ 'toutiaoimg.cn', 'toutiaostatic.com', 'toutiaovod.com', + 'toutiaocloud.com', + 'toutiaopage.com', 'feiliao.com', 'iesdouyin.com', 'pstatp.com', diff --git a/Source/non_ip/my_reject.conf b/Source/non_ip/my_reject.conf index a7275e62..93eb5dd3 100644 --- a/Source/non_ip/my_reject.conf +++ b/Source/non_ip/my_reject.conf @@ -30,6 +30,7 @@ PROCESS-NAME,LemonService DEST-PORT,7680 # >> HTTPDNS +# https://github.com/VirgilClyne/GetSomeFries/wiki/%F0%9F%9A%AB-HTTPDNS # Aliyun DOMAIN,httpdns-api.aliyuncs.com