Minor changes

This commit is contained in:
SukkaW 2024-01-14 00:44:46 +08:00
parent 75c9e084a9
commit 897a505c32
13 changed files with 106 additions and 113 deletions

View File

@ -1,38 +1,18 @@
import path from 'path'; import path from 'path';
import * as tldts from 'tldts';
import { processLine } from './lib/process-line'; import { processLine } from './lib/process-line';
import { readFileByLine } from './lib/fetch-text-by-line'; import { readFileByLine } from './lib/fetch-text-by-line';
import { sortDomains } from './lib/stable-sort-domain'; import { sortDomains } from './lib/stable-sort-domain';
import { task } from './lib/trace-runner'; import { task } from './lib/trace-runner';
import { compareAndWriteFile } from './lib/create-file'; import { compareAndWriteFile } from './lib/create-file';
import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix'; import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
// const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse');
const escapeRegExp = (string = '') => string.replaceAll(/[$()*+.?[\\\]^{|}]/g, '\\$&'); const escapeRegExp = (string = '') => string.replaceAll(/[$()*+.?[\\\]^{|}]/g, '\\$&');
const addApexDomain = (input: string, set: Set<string>) => {
// We are including the private domains themselves
const d = tldts.getDomain(input, { allowPrivateDomains: false });
if (d) {
set.add(d);
}
};
const processLocalDomainSet = async (domainSetPath: string, set: Set<string>) => { const processLocalDomainSet = async (domainSetPath: string, set: Set<string>) => {
for await (const line of readFileByLine(domainSetPath)) { for await (const l of readFileByLine(domainSetPath)) {
// console.log({ line }); const line = processLine(l);
if (line) {
const parsed = tldts.parse(line, { allowPrivateDomains: true, detectIp: false }); set.add(line[0] === '.' ? line.slice(1) : line);
if (parsed.isIp) continue;
if (parsed.isIcann || parsed.isPrivate) {
if (parsed.domain) {
set.add(parsed.domain);
}
continue;
}
if (processLine(line)) {
console.warn('[drop line from domainset]', line);
} }
} }
}; };
@ -40,12 +20,12 @@ const processLocalDomainSet = async (domainSetPath: string, set: Set<string>) =>
const processLocalRuleSet = async (ruleSetPath: string, set: Set<string>, keywords: Set<string>) => { const processLocalRuleSet = async (ruleSetPath: string, set: Set<string>, keywords: Set<string>) => {
for await (const line of readFileByLine(ruleSetPath)) { for await (const line of readFileByLine(ruleSetPath)) {
if (line.startsWith('DOMAIN-SUFFIX,')) { if (line.startsWith('DOMAIN-SUFFIX,')) {
addApexDomain(line.replace('DOMAIN-SUFFIX,', ''), set); set.add(line.replace('DOMAIN-SUFFIX,', ''));
} else if (line.startsWith('DOMAIN,')) { } else if (line.startsWith('DOMAIN,')) {
addApexDomain(line.replace('DOMAIN,', ''), set); set.add(line.replace('DOMAIN,', ''));
} else if (line.startsWith('DOMAIN-KEYWORD')) { } else if (line.startsWith('DOMAIN-KEYWORD')) {
keywords.add(escapeRegExp(line.replace('DOMAIN-KEYWORD,', ''))); keywords.add(escapeRegExp(line.replace('DOMAIN-KEYWORD,', '')));
} else if (line.startsWith('USER-AGENT,') || line.startsWith('PROCESS-NAME,') || line.startsWith('URL-REGEX,')) { } else if (line.includes('USER-AGENT,') || line.includes('PROCESS-NAME,') || line.includes('URL-REGEX,')) {
// do nothing // do nothing
} else if (processLine(line)) { } else if (processLine(line)) {
console.warn('[drop line from ruleset]', line); console.warn('[drop line from ruleset]', line);

View File

@ -23,8 +23,6 @@ import { setAddFromArray } from './lib/set-add-from-array';
export const buildRejectDomainSet = task(import.meta.path, async () => { export const buildRejectDomainSet = task(import.meta.path, async () => {
/** Whitelists */ /** Whitelists */
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST); const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
const domainKeywordsSet = new Set<string>();
const domainSuffixSet = new Set<string>();
const domainSets = new Set<string>(); const domainSets = new Set<string>();
@ -95,21 +93,23 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
let previousSize = domainSets.size; let previousSize = domainSets.size;
console.log(`Import ${previousSize} rules from Hosts / AdBlock Filter Rules & reject_sukka.conf!`); console.log(`Import ${previousSize} rules from Hosts / AdBlock Filter Rules & reject_sukka.conf!`);
for await (const line of readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/reject.conf'))) {
const [type, keyword] = line.split(',');
if (type === 'DOMAIN-KEYWORD') {
domainKeywordsSet.add(keyword.trim());
} else if (type === 'DOMAIN-SUFFIX') {
domainSuffixSet.add(keyword.trim());
}
}
console.log(`Import ${domainKeywordsSet.size} black keywords and ${domainSuffixSet.size} black suffixes!`);
previousSize = domainSets.size;
// Dedupe domainSets // Dedupe domainSets
traceSync('* Dedupe from black keywords/suffixes', () => { await traceAsync('* Dedupe from black keywords/suffixes', async () => {
/** Collect DOMAIN-SUFFIX from non_ip/reject.conf for deduplication */
const domainSuffixSet = new Set<string>();
/** Collect DOMAIN-KEYWORD from non_ip/reject.conf for deduplication */
const domainKeywordsSet = new Set<string>();
for await (const line of readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/reject.conf'))) {
const [type, keyword] = line.split(',');
if (type === 'DOMAIN-KEYWORD') {
domainKeywordsSet.add(keyword.trim());
} else if (type === 'DOMAIN-SUFFIX') {
domainSuffixSet.add(keyword.trim());
}
}
const trie1 = createTrie(domainSets); const trie1 = createTrie(domainSets);
domainSuffixSet.forEach(suffix => { domainSuffixSet.forEach(suffix => {

View File

@ -24,7 +24,7 @@ const latestTopUserAgentsPromise = fsCache.apply(
{ {
serializer: serializeArray, serializer: serializeArray,
deserializer: deserializeArray, deserializer: deserializeArray,
ttl: TTL.ONE_DAY() ttl: TTL.THREE_DAYS()
} }
); );

View File

@ -1,7 +1,6 @@
import fs from 'fs'; import { existsSync, createWriteStream } from 'fs';
import fsp from 'fs/promises'; import { mkdir } from 'fs/promises';
import path from 'path'; import path from 'path';
import { Readable } from 'stream';
import { pipeline } from 'stream/promises'; import { pipeline } from 'stream/promises';
import { readFileByLine } from './lib/fetch-text-by-line'; import { readFileByLine } from './lib/fetch-text-by-line';
import { isCI } from 'ci-info'; import { isCI } from 'ci-info';
@ -9,6 +8,7 @@ import { task } from './lib/trace-runner';
import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry'; import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry';
import tarStream from 'tar-stream'; import tarStream from 'tar-stream';
import zlib from 'zlib'; import zlib from 'zlib';
import { Readable } from 'stream';
const IS_READING_BUILD_OUTPUT = 1 << 2; const IS_READING_BUILD_OUTPUT = 1 << 2;
const ALL_FILES_EXISTS = 1 << 3; const ALL_FILES_EXISTS = 1 << 3;
@ -31,7 +31,7 @@ export const downloadPreviousBuild = task(import.meta.path, async () => {
if (!isCI) { if (!isCI) {
// Bun.file().exists() doesn't check directory // Bun.file().exists() doesn't check directory
if (!fs.existsSync(path.join(import.meta.dir, '..', line))) { if (!existsSync(path.join(import.meta.dir, '..', line))) {
flag = flag & ~ALL_FILES_EXISTS; flag = flag & ~ALL_FILES_EXISTS;
} }
} }
@ -57,7 +57,7 @@ export const downloadPreviousBuild = task(import.meta.path, async () => {
const extract = tarStream.extract(); const extract = tarStream.extract();
const gunzip = zlib.createGunzip(); const gunzip = zlib.createGunzip();
pipeline( pipeline(
resp.body as any, Readable.fromWeb(resp.body) as any,
gunzip, gunzip,
extract extract
); );
@ -78,10 +78,10 @@ export const downloadPreviousBuild = task(import.meta.path, async () => {
const relativeEntryPath = entry.header.name.replace(pathPrefix, ''); const relativeEntryPath = entry.header.name.replace(pathPrefix, '');
const targetPath = path.join(import.meta.dir, '..', relativeEntryPath); const targetPath = path.join(import.meta.dir, '..', relativeEntryPath);
await fsp.mkdir(path.dirname(targetPath), { recursive: true }); await mkdir(path.dirname(targetPath), { recursive: true });
await pipeline( await pipeline(
entry as any, entry as any,
fs.createWriteStream(targetPath) createWriteStream(targetPath)
); );
} }
}); });

View File

@ -3,18 +3,16 @@ import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry';
import { createMemoizedPromise } from './lib/memo-promise'; import { createMemoizedPromise } from './lib/memo-promise';
import { traceAsync } from './lib/trace-runner'; import { traceAsync } from './lib/trace-runner';
export const getPublicSuffixListTextPromise = createMemoizedPromise( export const getPublicSuffixListTextPromise = createMemoizedPromise(() => traceAsync(
() => traceAsync( 'obtain public_suffix_list',
'obtain public_suffix_list', () => fsCache.apply(
() => fsCache.apply( 'https://publicsuffix.org/list/public_suffix_list.dat',
'https://publicsuffix.org/list/public_suffix_list.dat', () => fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat', defaultRequestInit).then(r => r.text()),
() => fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat', defaultRequestInit).then(r => r.text()), {
{ // https://github.com/publicsuffix/list/blob/master/.github/workflows/tld-update.yml
// https://github.com/publicsuffix/list/blob/master/.github/workflows/tld-update.yml // Though the action runs every 24 hours, the IANA list is updated every 7 days.
// Though the action runs every 24 hours, the IANA list is updated every 7 days. // So a 3 day TTL should be enough.
// So a 3 day TTL should be enough. ttl: TTL.THREE_DAYS()
ttl: TTL.THREE_DAYS() }
}
)
) )
); ));

View File

@ -2,8 +2,9 @@
import { Database } from 'bun:sqlite'; import { Database } from 'bun:sqlite';
import os from 'os'; import os from 'os';
import path from 'path'; import path from 'path';
import fs from 'fs'; import { mkdirSync } from 'fs';
import picocolors from 'picocolors'; import picocolors from 'picocolors';
import { traceSync } from './trace-runner';
const identity = (x: any) => x; const identity = (x: any) => x;
@ -64,7 +65,7 @@ export class Cache {
constructor({ cachePath = path.join(os.tmpdir() || '/tmp', 'hdc'), tbd }: CacheOptions = {}) { constructor({ cachePath = path.join(os.tmpdir() || '/tmp', 'hdc'), tbd }: CacheOptions = {}) {
this.cachePath = cachePath; this.cachePath = cachePath;
fs.mkdirSync(this.cachePath, { recursive: true }); mkdirSync(this.cachePath, { recursive: true });
if (tbd != null) this.tbd = tbd; if (tbd != null) this.tbd = tbd;
const db = new Database(path.join(this.cachePath, 'cache.db')); const db = new Database(path.join(this.cachePath, 'cache.db'));
@ -151,7 +152,7 @@ export class Cache {
} }
} }
export const fsCache = new Cache({ cachePath: path.resolve(import.meta.dir, '../../.cache') }); export const fsCache = traceSync('initializing filesystem cache', () => new Cache({ cachePath: path.resolve(import.meta.dir, '../../.cache') }));
// process.on('exit', () => { // process.on('exit', () => {
// fsCache.destroy(); // fsCache.destroy();
// }); // });

View File

@ -12,6 +12,7 @@ const sharedConfig2 = { allowPrivateDomains: true, detectIp: false };
export const parse = (domain: string) => cache.sync(domain, () => tldts.parse(domain, sharedConfig)); export const parse = (domain: string) => cache.sync(domain, () => tldts.parse(domain, sharedConfig));
/** { allowPrivateDomains: true, detectIp: false } */ /** { allowPrivateDomains: true, detectIp: false } */
export const parse2 = (domain: string) => cache2.sync(domain, () => tldts.parse(domain, sharedConfig2)); export const parse2 = (domain: string) => cache2.sync(domain, () => tldts.parse(domain, sharedConfig2));
export const parseWithoutDetectIp = parse2;
let gothillGetDomainCache: ReturnType<typeof createCache> | null = null; let gothillGetDomainCache: ReturnType<typeof createCache> | null = null;
export const createCachedGorhillGetDomain = (gorhill: PublicSuffixList) => { export const createCachedGorhillGetDomain = (gorhill: PublicSuffixList) => {

View File

@ -18,7 +18,7 @@ export async function compareAndWriteFile(linesA: string[], filePath: string) {
isEqual = false; isEqual = false;
} else { } else {
isEqual = await traceAsync( isEqual = await traceAsync(
picocolors.gray(`Comparing ${filePath}`), picocolors.gray(`comparing ${filePath}`),
async () => { async () => {
let index = 0; let index = 0;
@ -62,11 +62,11 @@ export async function compareAndWriteFile(linesA: string[], filePath: string) {
} }
if (isEqual) { if (isEqual) {
console.log(picocolors.gray(`Same Content, bail out writing: ${filePath}`)); console.log(picocolors.dim(`same content, bail out writing: ${filePath}`));
return; return;
} }
await traceAsync(picocolors.gray(`Writing ${filePath}`), async () => { await traceAsync(picocolors.gray(`writing ${filePath}`), async () => {
if (linesALen < 10000) { if (linesALen < 10000) {
return Bun.write(file, `${linesA.join('\n')}\n`); return Bun.write(file, `${linesA.join('\n')}\n`);
} }

View File

@ -3,34 +3,34 @@ import { fetchWithRetry, defaultRequestInit } from './fetch-retry';
import { TextLineStream } from './text-line-transform-stream'; import { TextLineStream } from './text-line-transform-stream';
import { PolyfillTextDecoderStream } from './text-decoder-stream'; import { PolyfillTextDecoderStream } from './text-decoder-stream';
function createTextLineStreamFromStreamSource(stream: ReadableStream<Uint8Array>) { // function createTextLineStreamFromStreamSource(stream: ReadableStream<Uint8Array>) {
return stream // return stream
.pipeThrough(new PolyfillTextDecoderStream()) // .pipeThrough(new PolyfillTextDecoderStream())
.pipeThrough(new TextLineStream()); // .pipeThrough(new TextLineStream());
}
// const decoder = new TextDecoder('utf-8');
// async function *createTextLineAsyncGeneratorFromStreamSource(stream: ReadableStream<Uint8Array>): AsyncGenerator<string> {
// let buf = '';
// for await (const chunk of stream) {
// const chunkStr = decoder.decode(chunk).replaceAll('\r\n', '\n');
// for (let i = 0, len = chunkStr.length; i < len; i++) {
// const char = chunkStr[i];
// if (char === '\n') {
// yield buf;
// buf = '';
// } else {
// buf += char;
// }
// }
// }
// if (buf) {
// yield buf;
// }
// } // }
const decoder = new TextDecoder('utf-8');
async function *createTextLineAsyncGeneratorFromStreamSource(stream: ReadableStream<Uint8Array>): AsyncGenerator<string> {
let buf = '';
for await (const chunk of stream) {
const chunkStr = decoder.decode(chunk).replaceAll('\r\n', '\n');
for (let i = 0, len = chunkStr.length; i < len; i++) {
const char = chunkStr[i];
if (char === '\n') {
yield buf;
buf = '';
} else {
buf += char;
}
}
}
if (buf) {
yield buf;
}
}
export function readFileByLine(file: string | URL | BunFile) { export function readFileByLine(file: string | URL | BunFile) {
if (typeof file === 'string') { if (typeof file === 'string') {
file = Bun.file(file); file = Bun.file(file);
@ -38,7 +38,7 @@ export function readFileByLine(file: string | URL | BunFile) {
file = Bun.file(file); file = Bun.file(file);
} }
return createTextLineStreamFromStreamSource(file.stream()); return createTextLineAsyncGeneratorFromStreamSource(file.stream());
} }
export function createReadlineInterfaceFromResponse(resp: Response) { export function createReadlineInterfaceFromResponse(resp: Response) {
@ -49,7 +49,7 @@ export function createReadlineInterfaceFromResponse(resp: Response) {
throw new Error('Body has already been consumed.'); throw new Error('Body has already been consumed.');
} }
return createTextLineStreamFromStreamSource(resp.body); return createTextLineAsyncGeneratorFromStreamSource(resp.body);
} }
export function fetchRemoteTextByLine(url: string | URL) { export function fetchRemoteTextByLine(url: string | URL) {

View File

@ -56,23 +56,20 @@ export function processHosts(hostsUrl: string, includeAllSubDomain = false, ttl:
continue; continue;
} }
const domain = line.split(/\s/)[1]; const _domain = line.split(/\s/)[1]?.trim();
if (!_domain) {
continue;
}
const domain = normalizeDomain(_domain);
if (!domain) { if (!domain) {
continue; continue;
} }
const _domain = domain.trim(); if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
console.warn(picocolors.red(hostsUrl), '(black)', domain.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND)));
if (DEBUG_DOMAIN_TO_FIND && _domain.includes(DEBUG_DOMAIN_TO_FIND)) {
console.warn(picocolors.red(hostsUrl), '(black)', _domain.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND)));
foundDebugDomain = true; foundDebugDomain = true;
} }
const domainToAdd = normalizeDomain(_domain); domainSets.add(includeAllSubDomain ? `.${domain}` : domain);
if (!domainToAdd) {
continue;
}
domainSets.add(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd);
} }
console.log(picocolors.gray('[process hosts]'), picocolors.gray(hostsUrl), picocolors.gray(domainSets.size)); console.log(picocolors.gray('[process hosts]'), picocolors.gray(hostsUrl), picocolors.gray(domainSets.size));
@ -102,11 +99,11 @@ export async function processFilterRules(
fallbackUrls?: readonly string[] | undefined | null, fallbackUrls?: readonly string[] | undefined | null,
ttl: number | null = null ttl: number | null = null
): Promise<{ white: string[], black: string[], foundDebugDomain: boolean }> { ): Promise<{ white: string[], black: string[], foundDebugDomain: boolean }> {
const [white, black, warningMessages] = await traceAsync(`- processFilterRules: ${filterRulesUrl}`, () => fsCache.apply<[ const [white, black, warningMessages] = await traceAsync(`- processFilterRules: ${filterRulesUrl}`, () => fsCache.apply<Readonly<[
white: string[], white: string[],
black: string[], black: string[],
warningMessages: string[] warningMessages: string[]
]>( ]>>(
filterRulesUrl, filterRulesUrl,
async () => { async () => {
const whitelistDomainSets = new Set<string>(); const whitelistDomainSets = new Set<string>();

View File

@ -4,7 +4,7 @@ export const HOSTS = [
['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true, TTL.THREE_HOURS()], ['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true, TTL.THREE_HOURS()],
['https://someonewhocares.org/hosts/hosts', true, TTL.THREE_HOURS()], ['https://someonewhocares.org/hosts/hosts', true, TTL.THREE_HOURS()],
// no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl // no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', false, TTL.THREE_DAYS()], ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', true, TTL.THREE_DAYS()],
// have not been updated for more than a year, so we set a 14 days cache ttl // have not been updated for more than a year, so we set a 14 days cache ttl
['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', true, TTL.TWO_WEEKS()], ['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', true, TTL.TWO_WEEKS()],
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', false, TTL.THREE_DAYS()], ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', false, TTL.THREE_DAYS()],

View File

@ -12,6 +12,7 @@ module.exports.DOMESTICS = /** @type {const} */({
'aliyun.com', 'aliyun.com',
'aliyuncs.com', 'aliyuncs.com',
'alikunlun.com', 'alikunlun.com',
'cdngslb.com',
'alipay.com', 'alipay.com',
'alipay.cn', 'alipay.cn',
'alipay.com.cn', 'alipay.com.cn',
@ -130,6 +131,18 @@ module.exports.DOMESTICS = /** @type {const} */({
'bilibilipay.com' 'bilibilipay.com'
] ]
}, },
BILIBILI_ALI: {
dns: 'quic://223.5.5.5:853',
domains: [
'upos-sz-mirrorali.bilivideo.com'
]
},
BILIBILI_BD: {
dns: '180.76.76.76',
domains: [
'upos-sz-mirrorbos.bilivideo.com'
]
},
XIAOMI: { XIAOMI: {
dns: 'https://120.53.53.53/dns-query', dns: 'https://120.53.53.53/dns-query',
domains: [ domains: [
@ -155,6 +168,8 @@ module.exports.DOMESTICS = /** @type {const} */({
'toutiaoimg.cn', 'toutiaoimg.cn',
'toutiaostatic.com', 'toutiaostatic.com',
'toutiaovod.com', 'toutiaovod.com',
'toutiaocloud.com',
'toutiaopage.com',
'feiliao.com', 'feiliao.com',
'iesdouyin.com', 'iesdouyin.com',
'pstatp.com', 'pstatp.com',

View File

@ -30,6 +30,7 @@ PROCESS-NAME,LemonService
DEST-PORT,7680 DEST-PORT,7680
# >> HTTPDNS # >> HTTPDNS
# https://github.com/VirgilClyne/GetSomeFries/wiki/%F0%9F%9A%AB-HTTPDNS
# Aliyun # Aliyun
DOMAIN,httpdns-api.aliyuncs.com DOMAIN,httpdns-api.aliyuncs.com