Chore/CI: use fs cache to save bandwidth

This commit is contained in:
SukkaW 2023-12-23 04:27:35 +08:00
parent 7fbd4a570f
commit 230ac3eb18
21 changed files with 358 additions and 206 deletions

View File

@ -15,6 +15,12 @@ jobs:
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
persist-credentials: false persist-credentials: false
- name: Cache cache.db
uses: actions/cache@v3
with:
path: .cache
key: ${{ runner.os }}-v1
- uses: oven-sh/setup-bun@v1 - uses: oven-sh/setup-bun@v1
with: with:
bun-version: latest bun-version: latest

1
.gitignore vendored
View File

@ -2,6 +2,7 @@
node_modules node_modules
.clinic .clinic
.wireit .wireit
.cache
public public
# $ build output # $ build output

View File

@ -1,7 +1,7 @@
// @ts-check // @ts-check
import path from 'path'; import path from 'path';
import { createRuleset } from './lib/create-file'; import { createRuleset } from './lib/create-file';
import { fetchRemoteTextAndReadByLine, readFileByLine } from './lib/fetch-text-by-line'; import { fetchRemoteTextByLine, readFileByLine } from './lib/fetch-text-by-line';
import { processLine } from './lib/process-line'; import { processLine } from './lib/process-line';
import { task } from './lib/trace-runner'; import { task } from './lib/trace-runner';
import { SHARED_DESCRIPTION } from './lib/constants'; import { SHARED_DESCRIPTION } from './lib/constants';
@ -9,7 +9,7 @@ import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip';
const getBogusNxDomainIPs = async () => { const getBogusNxDomainIPs = async () => {
const result: string[] = []; const result: string[] = [];
for await (const line of await fetchRemoteTextAndReadByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf')) { for await (const line of await fetchRemoteTextByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf')) {
if (line && line.startsWith('bogus-nxdomain=')) { if (line && line.startsWith('bogus-nxdomain=')) {
const ip = line.slice(15).trim(); const ip = line.slice(15).trim();
if (isProbablyIpv4(ip)) { if (isProbablyIpv4(ip)) {

View File

@ -1,27 +1,15 @@
import path from 'path'; import path from 'path';
import { createRuleset } from './lib/create-file'; import { createRuleset } from './lib/create-file';
import { fetchRemoteTextAndReadByLine, readFileByLine } from './lib/fetch-text-by-line'; import { readFileByLine } from './lib/fetch-text-by-line';
import { createTrie } from './lib/trie'; import { createTrie } from './lib/trie';
import { task } from './lib/trace-runner'; import { task } from './lib/trace-runner';
import { processLine } from './lib/process-line'; import { processLine } from './lib/process-line';
import { SHARED_DESCRIPTION } from './lib/constants'; import { SHARED_DESCRIPTION } from './lib/constants';
import { getPublicSuffixListTextPromise } from './download-publicsuffixlist';
const publicSuffixPath: string = path.resolve(import.meta.dir, '../node_modules/.cache/public_suffix_list_dat.txt');
const getS3OSSDomains = async (): Promise<Set<string>> => { const getS3OSSDomains = async (): Promise<Set<string>> => {
const trie = createTrie(); const trie = createTrie();
for await (const line of (await getPublicSuffixListTextPromise()).split('\n')) {
const publicSuffixFile = Bun.file(publicSuffixPath); trie.add(line);
if (await publicSuffixFile.exists()) {
for await (const line of readFileByLine(publicSuffixFile)) {
trie.add(line);
}
} else {
console.log('public_suffix_list.dat not found, fetch directly from remote.');
for await (const line of await fetchRemoteTextAndReadByLine('https://publicsuffix.org/list/public_suffix_list.dat')) {
trie.add(line);
}
} }
/** /**

View File

@ -1,4 +1,4 @@
import { fetchRemoteTextAndReadByLine } from './lib/fetch-text-by-line'; import { fetchRemoteTextByLine } from './lib/fetch-text-by-line';
import { resolve as pathResolve } from 'path'; import { resolve as pathResolve } from 'path';
import { compareAndWriteFile, withBannerArray } from './lib/create-file'; import { compareAndWriteFile, withBannerArray } from './lib/create-file';
import { processLineFromReadline } from './lib/process-line'; import { processLineFromReadline } from './lib/process-line';
@ -21,7 +21,7 @@ const INCLUDE_CIDRS = [
export const getChnCidrPromise = createMemoizedPromise(async () => { export const getChnCidrPromise = createMemoizedPromise(async () => {
const cidr = await traceAsync( const cidr = await traceAsync(
picocolors.gray('download chnroutes2'), picocolors.gray('download chnroutes2'),
async () => processLineFromReadline(await fetchRemoteTextAndReadByLine('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')), async () => processLineFromReadline(await fetchRemoteTextByLine('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')),
picocolors.gray picocolors.gray
); );
return traceSync( return traceSync(

View File

@ -1,4 +1,4 @@
import { fetchRemoteTextAndReadByLine } from './lib/fetch-text-by-line'; import { fetchRemoteTextByLine } from './lib/fetch-text-by-line';
import { processLineFromReadline } from './lib/process-line'; import { processLineFromReadline } from './lib/process-line';
import path from 'path'; import path from 'path';
import { task } from './lib/trace-runner'; import { task } from './lib/trace-runner';
@ -26,7 +26,7 @@ const RESERVED_IPV4_CIDR = [
]; ];
export const buildInternalReverseChnCIDR = task(import.meta.path, async () => { export const buildInternalReverseChnCIDR = task(import.meta.path, async () => {
const cidr = await processLineFromReadline(await fetchRemoteTextAndReadByLine('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')); const cidr = await processLineFromReadline(await fetchRemoteTextByLine('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt'));
const reversedCidr = merge( const reversedCidr = merge(
exclude( exclude(

View File

@ -1,7 +1,7 @@
import path from 'path'; import path from 'path';
import { task, traceAsync } from './lib/trace-runner'; import { task, traceAsync } from './lib/trace-runner';
import { createRuleset } from './lib/create-file'; import { createRuleset } from './lib/create-file';
import { fetchRemoteTextAndReadByLine } from './lib/fetch-text-by-line'; import { fetchRemoteTextByLine } from './lib/fetch-text-by-line';
import { createTrie } from './lib/trie'; import { createTrie } from './lib/trie';
import { SHARED_DESCRIPTION } from './lib/constants'; import { SHARED_DESCRIPTION } from './lib/constants';
import { createMemoizedPromise } from './lib/memo-promise'; import { createMemoizedPromise } from './lib/memo-promise';
@ -22,7 +22,7 @@ const BLACKLIST = [
export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => { export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => {
const set = await traceAsync('fetch accelerated-domains.china.conf', async () => { const set = await traceAsync('fetch accelerated-domains.china.conf', async () => {
const trie = createTrie(); const trie = createTrie();
for await (const line of await fetchRemoteTextAndReadByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) { for await (const line of await fetchRemoteTextByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) {
if (line.startsWith('server=/') && line.endsWith('/114.114.114.114')) { if (line.startsWith('server=/') && line.endsWith('/114.114.114.114')) {
const domain = line.slice(8, -16); const domain = line.slice(8, -16);
trie.add(domain); trie.add(domain);

View File

@ -32,16 +32,16 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
const [gorhill] = await Promise.all([ const [gorhill] = await Promise.all([
getGorhillPublicSuffixPromise(), getGorhillPublicSuffixPromise(),
// Parse from remote hosts & domain lists // Parse from remote hosts & domain lists
...HOSTS.map(entry => processHosts(entry[0], entry[1]).then(hosts => { ...HOSTS.map(entry => processHosts(entry[0], entry[1], entry[2], entry[3]).then(hosts => {
hosts.forEach(host => { hosts.forEach(host => {
domainSets.add(host); domainSets.add(host);
}); });
})), })),
...DOMAIN_LISTS.map(entry => processDomainLists(entry[0], entry[1])), ...DOMAIN_LISTS.map(entry => processDomainLists(entry[0], entry[1], entry[2])),
...ADGUARD_FILTERS.map(input => { ...ADGUARD_FILTERS.map(input => {
const promise = typeof input === 'string' const promise = typeof input === 'string'
? processFilterRules(input) ? processFilterRules(input)
: processFilterRules(input[0], input[1]); : processFilterRules(input[0], input[1], input[2]);
return promise.then(({ white, black, foundDebugDomain }) => { return promise.then(({ white, black, foundDebugDomain }) => {
if (foundDebugDomain) { if (foundDebugDomain) {

View File

@ -21,9 +21,8 @@ const querySpeedtestApi = async (keyword: string): Promise<Array<string | null>>
s.acquire() s.acquire()
]))[0]; ]))[0];
const randomUserAgent = topUserAgents[Math.floor(Math.random() * topUserAgents.length)];
try { try {
const randomUserAgent = topUserAgents[Math.floor(Math.random() * topUserAgents.length)];
const key = `fetch speedtest endpoints: ${keyword}`; const key = `fetch speedtest endpoints: ${keyword}`;
console.time(key); console.time(key);
@ -47,6 +46,7 @@ const querySpeedtestApi = async (keyword: string): Promise<Array<string | null>>
} }
const json = await res.json() as Array<{ url: string }>; const json = await res.json() as Array<{ url: string }>;
s.release(); s.release();
console.timeEnd(key); console.timeEnd(key);

View File

@ -1,7 +1,6 @@
import fs from 'fs'; import fs from 'fs';
import fsp from 'fs/promises'; import fsp from 'fs/promises';
import path from 'path'; import path from 'path';
import os from 'os';
import { Readable } from 'stream'; import { Readable } from 'stream';
import { pipeline } from 'stream/promises'; import { pipeline } from 'stream/promises';
import { readFileByLine } from './lib/fetch-text-by-line'; import { readFileByLine } from './lib/fetch-text-by-line';
@ -85,16 +84,6 @@ export const downloadPreviousBuild = task(import.meta.path, async () => {
); );
}); });
export const downloadPublicSuffixList = task(import.meta.path, async () => {
const publicSuffixPath = path.resolve(import.meta.dir, '../node_modules/.cache/public_suffix_list_dat.txt');
const resp = await fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat', defaultRequestInit);
return Bun.write(publicSuffixPath, resp as Response);
}, 'download-publicsuffixlist');
if (import.meta.main) { if (import.meta.main) {
Promise.all([ downloadPreviousBuild();
downloadPreviousBuild(),
downloadPublicSuffixList()
]);
} }

View File

@ -0,0 +1,10 @@
import { fsCache } from './lib/cache-filesystem';
import { defaultRequestInit, fetchWithRetry } from './lib/fetch-retry';
import { createMemoizedPromise } from './lib/memo-promise';
import { traceAsync } from './lib/trace-runner';
export const getPublicSuffixListTextPromise = createMemoizedPromise(() => traceAsync('obtain public_suffix_list', () => fsCache.apply(
'public_suffix_list.dat',
() => fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat', defaultRequestInit).then(r => r.text()),
{ ttl: 24 * 60 * 60 * 1000 }
)));

View File

@ -1,4 +1,4 @@
import { downloadPreviousBuild, downloadPublicSuffixList } from './download-previous-build'; import { downloadPreviousBuild } from './download-previous-build';
import { buildCommon } from './build-common'; import { buildCommon } from './build-common';
import { buildAntiBogusDomain } from './build-anti-bogus-domain'; import { buildAntiBogusDomain } from './build-anti-bogus-domain';
import { buildAppleCdn } from './build-apple-cdn'; import { buildAppleCdn } from './build-apple-cdn';
@ -33,23 +33,15 @@ import type { TaskResult } from './lib/trace-runner';
// const buildInternalReverseChnCIDRWorker = new Worker(new URL('./workers/build-internal-reverse-chn-cidr-worker.ts', import.meta.url)); // const buildInternalReverseChnCIDRWorker = new Worker(new URL('./workers/build-internal-reverse-chn-cidr-worker.ts', import.meta.url));
const downloadPreviousBuildPromise = downloadPreviousBuild(); const downloadPreviousBuildPromise = downloadPreviousBuild();
const downloadPublicSuffixListPromise = downloadPublicSuffixList();
const buildCommonPromise = downloadPreviousBuildPromise.then(() => buildCommon()); const buildCommonPromise = downloadPreviousBuildPromise.then(() => buildCommon());
const buildAntiBogusDomainPromise = downloadPreviousBuildPromise.then(() => buildAntiBogusDomain()); const buildAntiBogusDomainPromise = downloadPreviousBuildPromise.then(() => buildAntiBogusDomain());
const buildAppleCdnPromise = downloadPreviousBuildPromise.then(() => buildAppleCdn()); const buildAppleCdnPromise = downloadPreviousBuildPromise.then(() => buildAppleCdn());
const buildCdnConfPromise = Promise.all([ const buildCdnConfPromise = downloadPreviousBuildPromise.then(() => buildCdnConf());
downloadPreviousBuildPromise, const buildRejectDomainSetPromise = downloadPreviousBuildPromise.then(() => buildRejectDomainSet());
downloadPublicSuffixListPromise
]).then(() => buildCdnConf());
const buildRejectDomainSetPromise = Promise.all([
downloadPreviousBuildPromise,
downloadPublicSuffixListPromise
]).then(() => buildRejectDomainSet());
const buildTelegramCIDRPromise = downloadPreviousBuildPromise.then(() => buildTelegramCIDR()); const buildTelegramCIDRPromise = downloadPreviousBuildPromise.then(() => buildTelegramCIDR());
const buildChnCidrPromise = downloadPreviousBuildPromise.then(() => buildChnCidr()); const buildChnCidrPromise = downloadPreviousBuildPromise.then(() => buildChnCidr());
const buildSpeedtestDomainSetPromise = downloadPreviousBuildPromise.then(() => buildSpeedtestDomainSet()); const buildSpeedtestDomainSetPromise = downloadPreviousBuildPromise.then(() => buildSpeedtestDomainSet());
const buildInternalCDNDomainsPromise = Promise.all([ const buildInternalCDNDomainsPromise = Promise.all([
downloadPublicSuffixListPromise,
buildCommonPromise, buildCommonPromise,
buildCdnConfPromise buildCdnConfPromise
]).then(() => buildInternalCDNDomains()); ]).then(() => buildInternalCDNDomains());
@ -84,7 +76,6 @@ import type { TaskResult } from './lib/trace-runner';
const stats = await Promise.all([ const stats = await Promise.all([
downloadPreviousBuildPromise, downloadPreviousBuildPromise,
downloadPublicSuffixListPromise,
buildCommonPromise, buildCommonPromise,
buildAntiBogusDomainPromise, buildAntiBogusDomainPromise,
buildAppleCdnPromise, buildAppleCdnPromise,

View File

@ -0,0 +1,131 @@
// eslint-disable-next-line import/no-unresolved -- bun built-in module
import { Database } from 'bun:sqlite';
import os from 'os';
import path from 'path';
import fs from 'fs';
import picocolors from 'picocolors';
const identity = (x: any) => x;
// eslint-disable-next-line sukka-ts/no-const-enum -- bun is smart, right?
const enum CacheStatus {
Hit = 'hit',
Stale = 'stale',
Miss = 'miss'
}
export interface CacheOptions {
cachePath?: string,
tbd?: number
}
interface CacheApplyNonStringOption<T> {
ttl?: number | null,
serializer: (value: T) => string,
deserializer: (cached: string) => T,
temporaryBypass?: boolean
}
interface CacheApplyStringOption {
ttl?: number | null,
temporaryBypass?: boolean
}
type CacheApplyOption<T> = T extends string ? CacheApplyStringOption : CacheApplyNonStringOption<T>;
export class Cache {
db: Database;
tbd = 60 * 1000; // time before deletion
cachePath: string;
constructor({ cachePath = path.join(os.tmpdir() || '/tmp', 'hdc'), tbd }: CacheOptions = {}) {
this.cachePath = cachePath;
fs.mkdirSync(this.cachePath, { recursive: true });
if (tbd != null) this.tbd = tbd;
const db = new Database(path.join(this.cachePath, 'cache.db'));
db.exec('PRAGMA journal_mode = WAL');
db.prepare('CREATE TABLE IF NOT EXISTS cache (key TEXT PRIMARY KEY, value TEXT, ttl REAL NOT NULL);').run();
db.prepare('CREATE INDEX IF NOT EXISTS cache_ttl ON cache (ttl);').run();
// perform purge on startup
// ttl + tbd < now => ttl < now - tbd
const now = Date.now() - this.tbd;
db.prepare('DELETE FROM cache WHERE ttl < ?').run(now);
this.db = db;
}
set(key: string, value: string, ttl = 60 * 1000): void {
const insert = this.db.prepare(
'INSERT INTO cache (key, value, ttl) VALUES ($key, $value, $valid) ON CONFLICT(key) DO UPDATE SET value = $value, ttl = $valid'
);
insert.run({
$key: key,
$value: value,
$valid: Date.now() + ttl
});
}
get(key: string, defaultValue?: string): string | undefined {
const rv = this.db.prepare<{ value: string }, string>(
'SELECT value FROM cache WHERE key = ?'
).get(key);
if (!rv) return defaultValue;
return rv.value;
}
has(key: string): CacheStatus {
const now = Date.now();
const rv = this.db.prepare<{ ttl: number }, string>('SELECT ttl FROM cache WHERE key = ?').get(key);
return !rv ? CacheStatus.Miss : (rv.ttl > now ? CacheStatus.Hit : CacheStatus.Stale);
}
del(key: string): void {
this.db.prepare('DELETE FROM cache WHERE key = ?').run(key);
}
async apply<T>(
key: string,
fn: () => Promise<T>,
opt: CacheApplyOption<T>
): Promise<T> {
const { ttl, temporaryBypass } = opt;
if (temporaryBypass) {
return fn();
}
if (ttl === null) {
this.del(key);
return fn();
}
const cached = this.get(key);
let value: T;
if (cached == null) {
console.log(picocolors.yellow('[cache] miss'), picocolors.gray(key));
value = await fn();
const serializer = 'serializer' in opt ? opt.serializer : identity;
this.set(key, serializer(value), ttl);
} else {
console.log(picocolors.green('[cache] hit'), picocolors.gray(key));
const deserializer = 'deserializer' in opt ? opt.deserializer : identity;
value = deserializer(cached);
}
return value;
}
}
export const fsCache = new Cache({ cachePath: path.resolve(import.meta.dir, '../../.cache') });
const separator = String.fromCharCode(0);
export const serializeSet = (set: Set<string>) => Array.from(set).join(separator);
export const deserializeSet = (str: string) => new Set(str.split(separator));

View File

@ -1,5 +1,7 @@
import type { BunFile } from 'bun'; import type { BunFile } from 'bun';
import { fetchWithRetry, defaultRequestInit } from './fetch-retry'; import { fetchWithRetry, defaultRequestInit } from './fetch-retry';
import { fsCache } from './cache-filesystem';
import picocolors from 'picocolors';
// import { TextLineStream } from './text-line-transform-stream'; // import { TextLineStream } from './text-line-transform-stream';
// import { PolyfillTextDecoderStream } from './text-decoder-stream'; // import { PolyfillTextDecoderStream } from './text-decoder-stream';
@ -78,6 +80,6 @@ export async function *createReadlineInterfaceFromResponse(resp: Response): Asyn
} }
} }
export function fetchRemoteTextAndReadByLine(url: string | URL) { export function fetchRemoteTextByLine(url: string | URL) {
return fetchWithRetry(url, defaultRequestInit).then(res => createReadlineInterfaceFromResponse(res as Response)); return fetchWithRetry(url, defaultRequestInit).then(res => createReadlineInterfaceFromResponse(res as Response));
} }

View File

@ -1,23 +1,13 @@
import { toASCII } from 'punycode'; import { toASCII } from 'punycode';
import path from 'path';
import { traceAsync } from './trace-runner'; import { traceAsync } from './trace-runner';
import { defaultRequestInit, fetchWithRetry } from './fetch-retry';
import { createMemoizedPromise } from './memo-promise'; import { createMemoizedPromise } from './memo-promise';
import { getPublicSuffixListTextPromise } from '../download-publicsuffixlist';
const publicSuffixPath = path.resolve(import.meta.dir, '../../node_modules/.cache/public_suffix_list_dat.txt'); export const getGorhillPublicSuffixPromise = createMemoizedPromise(() => traceAsync('create gorhill public suffix instance', async () => {
const getGorhillPublicSuffix = () => traceAsync('create gorhill public suffix instance', async () => {
const customFetch = (url: string | URL) => Promise.resolve(Bun.file(url)); const customFetch = (url: string | URL) => Promise.resolve(Bun.file(url));
const publicSuffixFile = Bun.file(publicSuffixPath);
const [publicSuffixListDat, { default: gorhill }] = await Promise.all([ const [publicSuffixListDat, { default: gorhill }] = await Promise.all([
await publicSuffixFile.exists() getPublicSuffixListTextPromise(),
? publicSuffixFile.text()
: fetchWithRetry('https://publicsuffix.org/list/public_suffix_list.dat', defaultRequestInit).then(r => {
console.log('public_suffix_list.dat not found, fetch directly from remote.');
return r.text();
}),
import('@gorhill/publicsuffixlist') import('@gorhill/publicsuffixlist')
]); ]);
@ -25,6 +15,4 @@ const getGorhillPublicSuffix = () => traceAsync('create gorhill public suffix in
await gorhill.enableWASM({ customFetch }); await gorhill.enableWASM({ customFetch });
return gorhill; return gorhill;
}); }));
export const getGorhillPublicSuffixPromise = createMemoizedPromise(getGorhillPublicSuffix);

View File

@ -1,4 +1,4 @@
import { fetchRemoteTextAndReadByLine } from './fetch-text-by-line'; import { fetchRemoteTextByLine } from './fetch-text-by-line';
import { parse } from 'tldts'; import { parse } from 'tldts';
const isDomainLoose = (domain: string): boolean => { const isDomainLoose = (domain: string): boolean => {
@ -8,7 +8,7 @@ const isDomainLoose = (domain: string): boolean => {
export const parseFelixDnsmasq = async (url: string | URL): Promise<string[]> => { export const parseFelixDnsmasq = async (url: string | URL): Promise<string[]> => {
const res: string[] = []; const res: string[] = [];
for await (const line of await fetchRemoteTextAndReadByLine(url)) { for await (const line of await fetchRemoteTextByLine(url)) {
if (line.startsWith('server=/') && line.endsWith('/114.114.114.114')) { if (line.startsWith('server=/') && line.endsWith('/114.114.114.114')) {
const domain = line.replace('server=/', '').replace('/114.114.114.114', ''); const domain = line.replace('server=/', '').replace('/114.114.114.114', '');
if (isDomainLoose(domain)) { if (isDomainLoose(domain)) {

View File

@ -1,5 +1,5 @@
// @ts-check // @ts-check
import { fetchRemoteTextAndReadByLine } from './fetch-text-by-line'; import { fetchRemoteTextByLine } from './fetch-text-by-line';
import { NetworkFilter } from '@cliqz/adblocker'; import { NetworkFilter } from '@cliqz/adblocker';
import { processLine } from './process-line'; import { processLine } from './process-line';
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix'; import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
@ -9,61 +9,79 @@ import { traceAsync } from './trace-runner';
import picocolors from 'picocolors'; import picocolors from 'picocolors';
import { normalizeDomain } from './normalize-domain'; import { normalizeDomain } from './normalize-domain';
import { fetchAssets } from './fetch-assets'; import { fetchAssets } from './fetch-assets';
import { deserializeSet, fsCache, serializeSet } from './cache-filesystem';
const DEBUG_DOMAIN_TO_FIND: string | null = null; // example.com | null const DEBUG_DOMAIN_TO_FIND: string | null = null; // example.com | null
let foundDebugDomain = false; let foundDebugDomain = false;
export function processDomainLists(domainListsUrl: string, includeAllSubDomain = false) { export function processDomainLists(domainListsUrl: string, includeAllSubDomain = false, ttl: number | null = null) {
return traceAsync(`- processDomainLists: ${domainListsUrl}`, async () => { return traceAsync(`- processDomainLists: ${domainListsUrl}`, () => fsCache.apply(
const domainSets = new Set<string>(); domainListsUrl,
async () => {
const domainSets = new Set<string>();
for await (const line of await fetchRemoteTextAndReadByLine(domainListsUrl)) { for await (const line of await fetchRemoteTextByLine(domainListsUrl)) {
const domainToAdd = processLine(line); const domainToAdd = processLine(line);
if (!domainToAdd) continue; if (!domainToAdd) continue;
if (DEBUG_DOMAIN_TO_FIND && domainToAdd.includes(DEBUG_DOMAIN_TO_FIND)) { if (DEBUG_DOMAIN_TO_FIND && domainToAdd.includes(DEBUG_DOMAIN_TO_FIND)) {
console.warn(picocolors.red(domainListsUrl), '(black)', picocolors.bold(DEBUG_DOMAIN_TO_FIND)); console.warn(picocolors.red(domainListsUrl), '(black)', picocolors.bold(DEBUG_DOMAIN_TO_FIND));
foundDebugDomain = true; foundDebugDomain = true;
} }
domainSets.add(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd);
}
return domainSets;
});
}
export function processHosts(hostsUrl: string, includeAllSubDomain = false, skipDomainCheck = false) {
return traceAsync(`- processHosts: ${hostsUrl}`, async () => {
const domainSets = new Set<string>();
for await (const l of await fetchRemoteTextAndReadByLine(hostsUrl)) {
const line = processLine(l);
if (!line) {
continue;
}
const domain = line.split(/\s/)[1];
if (!domain) {
continue;
}
const _domain = domain.trim();
if (DEBUG_DOMAIN_TO_FIND && _domain.includes(DEBUG_DOMAIN_TO_FIND)) {
console.warn(picocolors.red(hostsUrl), '(black)', picocolors.bold(DEBUG_DOMAIN_TO_FIND));
foundDebugDomain = true;
}
const domainToAdd = skipDomainCheck ? _domain : normalizeDomain(_domain);
if (domainToAdd) {
domainSets.add(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd); domainSets.add(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd);
} }
return domainSets;
},
{
ttl,
temporaryBypass: DEBUG_DOMAIN_TO_FIND !== null,
serializer: serializeSet,
deserializer: deserializeSet
} }
));
}
export function processHosts(hostsUrl: string, includeAllSubDomain = false, skipDomainCheck = false, ttl: number | null = null) {
return traceAsync(`- processHosts: ${hostsUrl}`, () => fsCache.apply(
hostsUrl,
async () => {
const domainSets = new Set<string>();
console.log(picocolors.gray('[process hosts]'), picocolors.gray(hostsUrl), picocolors.gray(domainSets.size)); for await (const l of await fetchRemoteTextByLine(hostsUrl)) {
const line = processLine(l);
if (!line) {
continue;
}
return domainSets; const domain = line.split(/\s/)[1];
}); if (!domain) {
continue;
}
const _domain = domain.trim();
if (DEBUG_DOMAIN_TO_FIND && _domain.includes(DEBUG_DOMAIN_TO_FIND)) {
console.warn(picocolors.red(hostsUrl), '(black)', picocolors.bold(DEBUG_DOMAIN_TO_FIND));
foundDebugDomain = true;
}
const domainToAdd = skipDomainCheck ? _domain : normalizeDomain(_domain);
if (domainToAdd) {
domainSets.add(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd);
}
}
console.log(picocolors.gray('[process hosts]'), picocolors.gray(hostsUrl), picocolors.gray(domainSets.size));
return domainSets;
},
{
ttl,
temporaryBypass: DEBUG_DOMAIN_TO_FIND !== null,
serializer: serializeSet,
deserializer: deserializeSet
}
));
} }
// eslint-disable-next-line sukka-ts/no-const-enum -- bun bundler is smart, maybe? // eslint-disable-next-line sukka-ts/no-const-enum -- bun bundler is smart, maybe?
@ -77,90 +95,111 @@ const enum ParseType {
export async function processFilterRules( export async function processFilterRules(
filterRulesUrl: string, filterRulesUrl: string,
fallbackUrls?: readonly string[] | undefined fallbackUrls?: readonly string[] | undefined | null,
): Promise<{ white: Set<string>, black: Set<string>, foundDebugDomain: boolean }> { ttl: number | null = null
const whitelistDomainSets = new Set<string>(); ): Promise<{ white: string[], black: string[], foundDebugDomain: boolean }> {
const blacklistDomainSets = new Set<string>(); const [white, black, warningMessages] = await traceAsync(`- processFilterRules: ${filterRulesUrl}`, () => fsCache.apply<[
white: string[],
black: string[],
warningMessages: string[]
]>(
filterRulesUrl,
async () => {
const whitelistDomainSets = new Set<string>();
const blacklistDomainSets = new Set<string>();
const warningMessages: string[] = []; const warningMessages: string[] = [];
await traceAsync(`- processFilterRules: ${filterRulesUrl}`, async () => { const gorhill = await getGorhillPublicSuffixPromise();
const gorhill = await getGorhillPublicSuffixPromise();
/** /**
* @param {string} line * @param {string} line
*/ */
const lineCb = (line: string) => { const lineCb = (line: string) => {
const result = parse(line, gorhill); const result = parse(line, gorhill);
if (!result) { if (!result) {
return; return;
}
const flag = result[1];
const hostname = result[0];
if (DEBUG_DOMAIN_TO_FIND) {
if (hostname.includes(DEBUG_DOMAIN_TO_FIND)) {
console.warn(
picocolors.red(filterRulesUrl),
flag === ParseType.WhiteIncludeSubdomain || flag === ParseType.WhiteAbsolute
? '(white)'
: '(black)',
picocolors.bold(DEBUG_DOMAIN_TO_FIND)
);
foundDebugDomain = true;
} }
}
switch (flag) { const flag = result[1];
case ParseType.WhiteIncludeSubdomain: const hostname = result[0];
if (hostname[0] !== '.') {
whitelistDomainSets.add(`.${hostname}`); if (DEBUG_DOMAIN_TO_FIND) {
} else { if (hostname.includes(DEBUG_DOMAIN_TO_FIND)) {
console.warn(
picocolors.red(filterRulesUrl),
flag === ParseType.WhiteIncludeSubdomain || flag === ParseType.WhiteAbsolute
? '(white)'
: '(black)',
picocolors.bold(DEBUG_DOMAIN_TO_FIND)
);
foundDebugDomain = true;
}
}
switch (flag) {
case ParseType.WhiteIncludeSubdomain:
if (hostname[0] !== '.') {
whitelistDomainSets.add(`.${hostname}`);
} else {
whitelistDomainSets.add(hostname);
}
break;
case ParseType.WhiteAbsolute:
whitelistDomainSets.add(hostname); whitelistDomainSets.add(hostname);
} break;
break; case ParseType.BlackAbsolute:
case ParseType.WhiteAbsolute:
whitelistDomainSets.add(hostname);
break;
case ParseType.BlackAbsolute:
blacklistDomainSets.add(hostname);
break;
case ParseType.BlackIncludeSubdomain:
if (hostname[0] !== '.') {
blacklistDomainSets.add(`.${hostname}`);
} else {
blacklistDomainSets.add(hostname); blacklistDomainSets.add(hostname);
} break;
break; case ParseType.BlackIncludeSubdomain:
case ParseType.ErrorMessage: if (hostname[0] !== '.') {
warningMessages.push(hostname); blacklistDomainSets.add(`.${hostname}`);
break; } else {
default: blacklistDomainSets.add(hostname);
break; }
} break;
}; case ParseType.ErrorMessage:
warningMessages.push(hostname);
break;
default:
break;
}
};
if (!fallbackUrls || fallbackUrls.length === 0) { // TODO-SUKKA: add cache here
for await (const line of await fetchRemoteTextAndReadByLine(filterRulesUrl)) { if (!fallbackUrls || fallbackUrls.length === 0) {
for await (const line of await fetchRemoteTextByLine(filterRulesUrl)) {
// don't trim here // don't trim here
lineCb(line); lineCb(line);
} }
} else { } else {
const filterRules = (await traceAsync( const filterRules = (await traceAsync(
picocolors.gray(`- download ${filterRulesUrl}`), picocolors.gray(`- download ${filterRulesUrl}`),
() => fetchAssets(filterRulesUrl, fallbackUrls), () => fetchAssets(filterRulesUrl, fallbackUrls),
picocolors.gray picocolors.gray
)).split('\n'); )).split('\n');
const key = picocolors.gray(`- parse adguard filter ${filterRulesUrl}`); const key = picocolors.gray(`- parse adguard filter ${filterRulesUrl}`);
console.time(key); console.time(key);
for (let i = 0, len = filterRules.length; i < len; i++) { for (let i = 0, len = filterRules.length; i < len; i++) {
lineCb(filterRules[i]); lineCb(filterRules[i]);
}
console.timeEnd(key);
} }
console.timeEnd(key);
return [
Array.from(whitelistDomainSets),
Array.from(blacklistDomainSets),
warningMessages
];
},
{
ttl,
temporaryBypass: DEBUG_DOMAIN_TO_FIND !== null,
serializer: JSON.stringify,
deserializer: JSON.parse
} }
}); ));
warningMessages.forEach(msg => { warningMessages.forEach(msg => {
console.warn( console.warn(
@ -172,13 +211,13 @@ export async function processFilterRules(
console.log( console.log(
picocolors.gray('[process filter]'), picocolors.gray('[process filter]'),
picocolors.gray(filterRulesUrl), picocolors.gray(filterRulesUrl),
picocolors.gray(`white: ${whitelistDomainSets.size}`), picocolors.gray(`white: ${white.length}`),
picocolors.gray(`black: ${blacklistDomainSets.size}`) picocolors.gray(`black: ${black.length}`)
); );
return { return {
white: whitelistDomainSets, white,
black: blacklistDomainSets, black,
foundDebugDomain foundDebugDomain
}; };
} }

View File

@ -4,7 +4,7 @@ export const processLine = (line: string): string | null => {
} }
const trimmed: string = line.trim(); const trimmed: string = line.trim();
if (trimmed === '') { if (trimmed.length === 0) {
return null; return null;
} }

View File

@ -7,11 +7,11 @@ export const HOSTS = [
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false], ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false],
['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true], ['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true],
// CoinBlockerList // CoinBlockerList
['https://zerodot1.gitlab.io/CoinBlockerLists/hosts_browser', true, true], // Although the hosts file is still actively maintained, the hosts_browser file is not updated since 2021-07, so we set a 10 days cache ttl
['https://zerodot1.gitlab.io/CoinBlockerLists/hosts_browser', true, true, 10 * 24 * 60 * 60 * 1000],
// Curben's UrlHaus Malicious URL Blocklist // Curben's UrlHaus Malicious URL Blocklist
// 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt', // 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
// 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt', // 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
// 'https://ublockorigin.github.io/uAssetsCDN/thirdparties/urlhaus-filter/urlhaus-filter-online.txt',
['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, true], ['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, true],
// Curben's Phishing URL Blocklist // Curben's Phishing URL Blocklist
// Covered by lib/get-phishing-domains.ts // Covered by lib/get-phishing-domains.ts
@ -21,14 +21,24 @@ export const HOSTS = [
// Curben's PUP Domains Blocklist // Curben's PUP Domains Blocklist
// 'https://curbengh.github.io/pup-filter/pup-filter-agh.txt' // 'https://curbengh.github.io/pup-filter/pup-filter-agh.txt'
// 'https://pup-filter.pages.dev/pup-filter-agh.txt' // 'https://pup-filter.pages.dev/pup-filter-agh.txt'
['https://curbengh.github.io/pup-filter/pup-filter-hosts.txt', true, true], // The PUP filter has paused the update since 2023-05, so we set a 7 days cache ttl
['https://curbengh.github.io/pup-filter/pup-filter-hosts.txt', true, true, 7 * 24 * 60 * 60 * 1000],
// BarbBlock // BarbBlock
['https://paulgb.github.io/BarbBlock/blacklists/hosts-file.txt', true, true] // The barbblock list has never been updated since 2019-05, so we set a 10 days cache ttl
['https://paulgb.github.io/BarbBlock/blacklists/hosts-file.txt', true, true, 10 * 24 * 60 * 60 * 1000]
] as const; ] as const;
export const DOMAIN_LISTS = [ export const DOMAIN_LISTS = [
// DigitalSide Threat-Intel - OSINT Hub // DigitalSide Threat-Intel - OSINT Hub
['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true] ['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true],
// AdGuard CNAME Filter Combined
// Update on a 7 days basis, so we add a 36 hours cache ttl
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads_justdomains.txt', true, 36 * 60 * 60 * 1000],
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_trackers_justdomains.txt', true, 36 * 60 * 60 * 1000],
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_clickthroughs_justdomains.txt', true, 36 * 60 * 60 * 1000],
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_microsites_justdomains.txt', true, 36 * 60 * 60 * 1000],
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_mail_trackers_justdomains.txt', true, 36 * 60 * 60 * 1000]
] as const; ] as const;
export const ADGUARD_FILTERS = [ export const ADGUARD_FILTERS = [
@ -41,7 +51,8 @@ export const ADGUARD_FILTERS = [
'https://secure.fanboy.co.nz/easylist.txt', 'https://secure.fanboy.co.nz/easylist.txt',
'https://ublockorigin.github.io/uAssetsCDN/thirdparties/easylist.txt', 'https://ublockorigin.github.io/uAssetsCDN/thirdparties/easylist.txt',
'https://ublockorigin.pages.dev/thirdparties/easylist.txt' 'https://ublockorigin.pages.dev/thirdparties/easylist.txt'
] ],
12 * 60 * 60 * 1000
], ],
// EasyPrivacy // EasyPrivacy
[ [
@ -52,7 +63,8 @@ export const ADGUARD_FILTERS = [
'https://easylist-downloads.adblockplus.org/easyprivacy.txt', 'https://easylist-downloads.adblockplus.org/easyprivacy.txt',
'https://ublockorigin.github.io/uAssetsCDN/thirdparties/easyprivacy.txt', 'https://ublockorigin.github.io/uAssetsCDN/thirdparties/easyprivacy.txt',
'https://ublockorigin.pages.dev/thirdparties/easyprivacy.txt' 'https://ublockorigin.pages.dev/thirdparties/easyprivacy.txt'
] ],
12 * 60 * 60 * 1000
], ],
// AdGuard DNS Filter // AdGuard DNS Filter
[ [
@ -62,12 +74,6 @@ export const ADGUARD_FILTERS = [
'https://adguardteam.github.io/HostlistsRegistry/assets/filter_1.txt' 'https://adguardteam.github.io/HostlistsRegistry/assets/filter_1.txt'
] ]
], ],
// AdGuard CNAME Filter Combined
'https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads.txt',
'https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_trackers.txt',
'https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_clickthroughs.txt',
'https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_microsites.txt',
'https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_mail_trackers.txt',
// uBlock Origin Filter List // uBlock Origin Filter List
[ [
'https://ublockorigin.github.io/uAssetsCDN/filters/filters.min.txt', 'https://ublockorigin.github.io/uAssetsCDN/filters/filters.min.txt',

View File

@ -1,4 +1,4 @@
import { fetchRemoteTextAndReadByLine, readFileByLine } from './lib/fetch-text-by-line'; import { fetchRemoteTextByLine, readFileByLine } from './lib/fetch-text-by-line';
import { Readable } from 'stream'; import { Readable } from 'stream';
import { parse } from 'csv-parse'; import { parse } from 'csv-parse';
import { createTrie } from './lib/trie'; import { createTrie } from './lib/trie';
@ -7,7 +7,7 @@ import { processLine } from './lib/process-line';
export const parseDomesticList = async () => { export const parseDomesticList = async () => {
const set = new Set<string>(); const set = new Set<string>();
for await (const line of await fetchRemoteTextAndReadByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) { for await (const line of await fetchRemoteTextByLine('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf')) {
if (line.startsWith('server=/') && line.endsWith('/114.114.114.114')) { if (line.startsWith('server=/') && line.endsWith('/114.114.114.114')) {
const domain = line.slice(8, -16); const domain = line.slice(8, -16);
set.add(domain); set.add(domain);

View File

@ -2266,3 +2266,4 @@ ocecdn.oraclecloud.com
assets.humix.com assets.humix.com
.nelreports.net .nelreports.net
static.mediafire.com static.mediafire.com
player.louisvuitton.com