Minor changes

This commit is contained in:
SukkaW 2024-01-14 00:44:46 +08:00
parent 75c9e084a9
commit 897a505c32
13 changed files with 106 additions and 113 deletions

View File

@ -1,38 +1,18 @@
import path from 'path';
import * as tldts from 'tldts';
import { processLine } from './lib/process-line';
import { readFileByLine } from './lib/fetch-text-by-line';
import { sortDomains } from './lib/stable-sort-domain';
import { task } from './lib/trace-runner';
import { compareAndWriteFile } from './lib/create-file';
import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
// const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse');
const escapeRegExp = (string = '') => string.replaceAll(/[$()*+.?[\\\]^{|}]/g, '\\$&');
const addApexDomain = (input: string, set: Set<string>) => {
// We are including the private domains themselves
const d = tldts.getDomain(input, { allowPrivateDomains: false });
if (d) {
set.add(d);
}
};
const processLocalDomainSet = async (domainSetPath: string, set: Set<string>) => {
for await (const line of readFileByLine(domainSetPath)) {
// console.log({ line });
const parsed = tldts.parse(line, { allowPrivateDomains: true, detectIp: false });
if (parsed.isIp) continue;
if (parsed.isIcann || parsed.isPrivate) {
if (parsed.domain) {
set.add(parsed.domain);
}
continue;
}
if (processLine(line)) {
console.warn('[drop line from domainset]', line);
for await (const l of readFileByLine(domainSetPath)) {
const line = processLine(l);
if (line) {
set.add(line[0] === '.' ? line.slice(1) : line);
}
}
};
@ -40,12 +20,12 @@ const processLocalDomainSet = async (domainSetPath: string, set: Set<string>) =>
const processLocalRuleSet = async (ruleSetPath: string, set: Set<string>, keywords: Set<string>) => {
for await (const line of readFileByLine(ruleSetPath)) {
if (line.startsWith('DOMAIN-SUFFIX,')) {
addApexDomain(line.replace('DOMAIN-SUFFIX,', ''), set);
set.add(line.replace('DOMAIN-SUFFIX,', ''));
} else if (line.startsWith('DOMAIN,')) {
addApexDomain(line.replace('DOMAIN,', ''), set);
set.add(line.replace('DOMAIN,', ''));
} else if (line.startsWith('DOMAIN-KEYWORD')) {
keywords.add(escapeRegExp(line.replace('DOMAIN-KEYWORD,', '')));
} else if (line.startsWith('USER-AGENT,') || line.startsWith('PROCESS-NAME,') || line.startsWith('URL-REGEX,')) {
} else if (line.includes('USER-AGENT,') || line.includes('PROCESS-NAME,') || line.includes('URL-REGEX,')) {
// do nothing
} else if (processLine(line)) {
console.warn('[drop line from ruleset]', line);

View File

@ -23,8 +23,6 @@ import { setAddFromArray } from './lib/set-add-from-array';
export const buildRejectDomainSet = task(import.meta.path, async () => {
/** Whitelists */
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
const domainKeywordsSet = new Set<string>();
const domainSuffixSet = new Set<string>();
const domainSets = new Set<string>();
@ -95,21 +93,23 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
let previousSize = domainSets.size;
console.log(`Import ${previousSize} rules from Hosts / AdBlock Filter Rules & reject_sukka.conf!`);
for await (const line of readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/reject.conf'))) {
const [type, keyword] = line.split(',');
if (type === 'DOMAIN-KEYWORD') {
domainKeywordsSet.add(keyword.trim());
} else if (type === 'DOMAIN-SUFFIX') {
domainSuffixSet.add(keyword.trim());
}
}
console.log(`Import ${domainKeywordsSet.size} black keywords and ${domainSuffixSet.size} black suffixes!`);
previousSize = domainSets.size;
// Dedupe domainSets
traceSync('* Dedupe from black keywords/suffixes', () => {
await traceAsync('* Dedupe from black keywords/suffixes', async () => {
/** Collect DOMAIN-SUFFIX from non_ip/reject.conf for deduplication */
const domainSuffixSet = new Set<string>();
/** Collect DOMAIN-KEYWORD from non_ip/reject.conf for deduplication */
const domainKeywordsSet = new Set<string>();
for await (const line of readFileByLine(path.resolve(import.meta.dir, '../Source/non_ip/reject.conf'))) {
const [type, keyword] = line.split(',');
if (type === 'DOMAIN-KEYWORD') {
domainKeywordsSet.add(keyword.trim());
} else if (type === 'DOMAIN-SUFFIX') {
domainSuffixSet.add(keyword.trim());
}
}
const trie1 = createTrie(domainSets);
domainSuffixSet.forEach(suffix => {

View File

@ -24,7 +24,7 @@ const latestTopUserAgentsPromise = fsCache.apply(
{
serializer: serializeArray,
deserializer: deserializeArray,
ttl: TTL.ONE_DAY()
ttl: TTL.THREE_DAYS()
}
);

View File

@ -1,7 +1,6 @@
import fs from 'fs';
import fsp from 'fs/promises';
import { existsSync, createWriteStream } from 'fs';
import { mkdir } from 'fs/promises';
import path from 'path';
import { Readable } from 'stream';
import { pipeline } from 'stream/promises';
import { readFileByLine } from './lib/fetch-text-by-line';
import { isCI } from 'ci-info';
@ -9,6 +8,7 @@ import { task } from './lib/trace-runner';
import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry';
import tarStream from 'tar-stream';
import zlib from 'zlib';
import { Readable } from 'stream';
const IS_READING_BUILD_OUTPUT = 1 << 2;
const ALL_FILES_EXISTS = 1 << 3;
@ -31,7 +31,7 @@ export const downloadPreviousBuild = task(import.meta.path, async () => {
if (!isCI) {
// Bun.file().exists() doesn't check directory
if (!fs.existsSync(path.join(import.meta.dir, '..', line))) {
if (!existsSync(path.join(import.meta.dir, '..', line))) {
flag = flag & ~ALL_FILES_EXISTS;
}
}
@ -57,7 +57,7 @@ export const downloadPreviousBuild = task(import.meta.path, async () => {
const extract = tarStream.extract();
const gunzip = zlib.createGunzip();
pipeline(
resp.body as any,
Readable.fromWeb(resp.body) as any,
gunzip,
extract
);
@ -78,10 +78,10 @@ export const downloadPreviousBuild = task(import.meta.path, async () => {
const relativeEntryPath = entry.header.name.replace(pathPrefix, '');
const targetPath = path.join(import.meta.dir, '..', relativeEntryPath);
await fsp.mkdir(path.dirname(targetPath), { recursive: true });
await mkdir(path.dirname(targetPath), { recursive: true });
await pipeline(
entry as any,
fs.createWriteStream(targetPath)
createWriteStream(targetPath)
);
}
});

View File

@ -3,18 +3,16 @@ import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry';
import { createMemoizedPromise } from './lib/memo-promise';
import { traceAsync } from './lib/trace-runner';
export const getPublicSuffixListTextPromise = createMemoizedPromise(
() => traceAsync(
'obtain public_suffix_list',
() => fsCache.apply(
'https://publicsuffix.org/list/public_suffix_list.dat',
() => fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat', defaultRequestInit).then(r => r.text()),
{
// https://github.com/publicsuffix/list/blob/master/.github/workflows/tld-update.yml
// Though the action runs every 24 hours, the IANA list is updated every 7 days.
// So a 3 day TTL should be enough.
ttl: TTL.THREE_DAYS()
}
)
export const getPublicSuffixListTextPromise = createMemoizedPromise(() => traceAsync(
'obtain public_suffix_list',
() => fsCache.apply(
'https://publicsuffix.org/list/public_suffix_list.dat',
() => fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat', defaultRequestInit).then(r => r.text()),
{
// https://github.com/publicsuffix/list/blob/master/.github/workflows/tld-update.yml
// Though the action runs every 24 hours, the IANA list is updated every 7 days.
// So a 3 day TTL should be enough.
ttl: TTL.THREE_DAYS()
}
)
);
));

View File

@ -2,8 +2,9 @@
import { Database } from 'bun:sqlite';
import os from 'os';
import path from 'path';
import fs from 'fs';
import { mkdirSync } from 'fs';
import picocolors from 'picocolors';
import { traceSync } from './trace-runner';
const identity = (x: any) => x;
@ -64,7 +65,7 @@ export class Cache {
constructor({ cachePath = path.join(os.tmpdir() || '/tmp', 'hdc'), tbd }: CacheOptions = {}) {
this.cachePath = cachePath;
fs.mkdirSync(this.cachePath, { recursive: true });
mkdirSync(this.cachePath, { recursive: true });
if (tbd != null) this.tbd = tbd;
const db = new Database(path.join(this.cachePath, 'cache.db'));
@ -151,7 +152,7 @@ export class Cache {
}
}
export const fsCache = new Cache({ cachePath: path.resolve(import.meta.dir, '../../.cache') });
export const fsCache = traceSync('initializing filesystem cache', () => new Cache({ cachePath: path.resolve(import.meta.dir, '../../.cache') }));
// process.on('exit', () => {
// fsCache.destroy();
// });

View File

@ -12,6 +12,7 @@ const sharedConfig2 = { allowPrivateDomains: true, detectIp: false };
export const parse = (domain: string) => cache.sync(domain, () => tldts.parse(domain, sharedConfig));
/** { allowPrivateDomains: true, detectIp: false } */
export const parse2 = (domain: string) => cache2.sync(domain, () => tldts.parse(domain, sharedConfig2));
export const parseWithoutDetectIp = parse2;
let gothillGetDomainCache: ReturnType<typeof createCache> | null = null;
export const createCachedGorhillGetDomain = (gorhill: PublicSuffixList) => {

View File

@ -18,7 +18,7 @@ export async function compareAndWriteFile(linesA: string[], filePath: string) {
isEqual = false;
} else {
isEqual = await traceAsync(
picocolors.gray(`Comparing ${filePath}`),
picocolors.gray(`comparing ${filePath}`),
async () => {
let index = 0;
@ -62,11 +62,11 @@ export async function compareAndWriteFile(linesA: string[], filePath: string) {
}
if (isEqual) {
console.log(picocolors.gray(`Same Content, bail out writing: ${filePath}`));
console.log(picocolors.dim(`same content, bail out writing: ${filePath}`));
return;
}
await traceAsync(picocolors.gray(`Writing ${filePath}`), async () => {
await traceAsync(picocolors.gray(`writing ${filePath}`), async () => {
if (linesALen < 10000) {
return Bun.write(file, `${linesA.join('\n')}\n`);
}

View File

@ -3,34 +3,34 @@ import { fetchWithRetry, defaultRequestInit } from './fetch-retry';
import { TextLineStream } from './text-line-transform-stream';
import { PolyfillTextDecoderStream } from './text-decoder-stream';
function createTextLineStreamFromStreamSource(stream: ReadableStream<Uint8Array>) {
return stream
.pipeThrough(new PolyfillTextDecoderStream())
.pipeThrough(new TextLineStream());
}
// const decoder = new TextDecoder('utf-8');
// async function *createTextLineAsyncGeneratorFromStreamSource(stream: ReadableStream<Uint8Array>): AsyncGenerator<string> {
// let buf = '';
// for await (const chunk of stream) {
// const chunkStr = decoder.decode(chunk).replaceAll('\r\n', '\n');
// for (let i = 0, len = chunkStr.length; i < len; i++) {
// const char = chunkStr[i];
// if (char === '\n') {
// yield buf;
// buf = '';
// } else {
// buf += char;
// }
// }
// }
// if (buf) {
// yield buf;
// }
// function createTextLineStreamFromStreamSource(stream: ReadableStream<Uint8Array>) {
// return stream
// .pipeThrough(new PolyfillTextDecoderStream())
// .pipeThrough(new TextLineStream());
// }
const decoder = new TextDecoder('utf-8');
async function *createTextLineAsyncGeneratorFromStreamSource(stream: ReadableStream<Uint8Array>): AsyncGenerator<string> {
let buf = '';
for await (const chunk of stream) {
const chunkStr = decoder.decode(chunk).replaceAll('\r\n', '\n');
for (let i = 0, len = chunkStr.length; i < len; i++) {
const char = chunkStr[i];
if (char === '\n') {
yield buf;
buf = '';
} else {
buf += char;
}
}
}
if (buf) {
yield buf;
}
}
export function readFileByLine(file: string | URL | BunFile) {
if (typeof file === 'string') {
file = Bun.file(file);
@ -38,7 +38,7 @@ export function readFileByLine(file: string | URL | BunFile) {
file = Bun.file(file);
}
return createTextLineStreamFromStreamSource(file.stream());
return createTextLineAsyncGeneratorFromStreamSource(file.stream());
}
export function createReadlineInterfaceFromResponse(resp: Response) {
@ -49,7 +49,7 @@ export function createReadlineInterfaceFromResponse(resp: Response) {
throw new Error('Body has already been consumed.');
}
return createTextLineStreamFromStreamSource(resp.body);
return createTextLineAsyncGeneratorFromStreamSource(resp.body);
}
export function fetchRemoteTextByLine(url: string | URL) {

View File

@ -56,23 +56,20 @@ export function processHosts(hostsUrl: string, includeAllSubDomain = false, ttl:
continue;
}
const domain = line.split(/\s/)[1];
const _domain = line.split(/\s/)[1]?.trim();
if (!_domain) {
continue;
}
const domain = normalizeDomain(_domain);
if (!domain) {
continue;
}
const _domain = domain.trim();
if (DEBUG_DOMAIN_TO_FIND && _domain.includes(DEBUG_DOMAIN_TO_FIND)) {
console.warn(picocolors.red(hostsUrl), '(black)', _domain.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND)));
if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
console.warn(picocolors.red(hostsUrl), '(black)', domain.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND)));
foundDebugDomain = true;
}
const domainToAdd = normalizeDomain(_domain);
if (!domainToAdd) {
continue;
}
domainSets.add(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd);
domainSets.add(includeAllSubDomain ? `.${domain}` : domain);
}
console.log(picocolors.gray('[process hosts]'), picocolors.gray(hostsUrl), picocolors.gray(domainSets.size));
@ -102,11 +99,11 @@ export async function processFilterRules(
fallbackUrls?: readonly string[] | undefined | null,
ttl: number | null = null
): Promise<{ white: string[], black: string[], foundDebugDomain: boolean }> {
const [white, black, warningMessages] = await traceAsync(`- processFilterRules: ${filterRulesUrl}`, () => fsCache.apply<[
const [white, black, warningMessages] = await traceAsync(`- processFilterRules: ${filterRulesUrl}`, () => fsCache.apply<Readonly<[
white: string[],
black: string[],
warningMessages: string[]
]>(
]>>(
filterRulesUrl,
async () => {
const whitelistDomainSets = new Set<string>();

View File

@ -4,7 +4,7 @@ export const HOSTS = [
['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true, TTL.THREE_HOURS()],
['https://someonewhocares.org/hosts/hosts', true, TTL.THREE_HOURS()],
// no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', false, TTL.THREE_DAYS()],
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', true, TTL.THREE_DAYS()],
// have not been updated for more than a year, so we set a 14 days cache ttl
['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', true, TTL.TWO_WEEKS()],
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', false, TTL.THREE_DAYS()],

View File

@ -12,6 +12,7 @@ module.exports.DOMESTICS = /** @type {const} */({
'aliyun.com',
'aliyuncs.com',
'alikunlun.com',
'cdngslb.com',
'alipay.com',
'alipay.cn',
'alipay.com.cn',
@ -130,6 +131,18 @@ module.exports.DOMESTICS = /** @type {const} */({
'bilibilipay.com'
]
},
BILIBILI_ALI: {
dns: 'quic://223.5.5.5:853',
domains: [
'upos-sz-mirrorali.bilivideo.com'
]
},
BILIBILI_BD: {
dns: '180.76.76.76',
domains: [
'upos-sz-mirrorbos.bilivideo.com'
]
},
XIAOMI: {
dns: 'https://120.53.53.53/dns-query',
domains: [
@ -155,6 +168,8 @@ module.exports.DOMESTICS = /** @type {const} */({
'toutiaoimg.cn',
'toutiaostatic.com',
'toutiaovod.com',
'toutiaocloud.com',
'toutiaopage.com',
'feiliao.com',
'iesdouyin.com',
'pstatp.com',

View File

@ -30,6 +30,7 @@ PROCESS-NAME,LemonService
DEST-PORT,7680
# >> HTTPDNS
# https://github.com/VirgilClyne/GetSomeFries/wiki/%F0%9F%9A%AB-HTTPDNS
# Aliyun
DOMAIN,httpdns-api.aliyuncs.com