Update Reject Hosts / Add mirror support for Hosts Source

This commit is contained in:
SukkaW
2024-01-22 12:09:08 +08:00
parent 41b2f543f8
commit af8cce4f45
7 changed files with 88 additions and 57 deletions

View File

@@ -44,25 +44,25 @@ export function processDomainLists(span: Span, domainListsUrl: string, includeAl
}
));
}
export function processHosts(span: Span, hostsUrl: string, includeAllSubDomain = false, ttl: number | null = null) {
return span.traceChild(`processhosts: ${hostsUrl}`).traceAsyncFn(() => fsCache.apply(
export function processHosts(span: Span, hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, ttl: number | null = null) {
return span.traceChild(`processhosts: ${hostsUrl}`).traceAsyncFn((childSpan) => fsCache.apply(
hostsUrl,
async () => {
const domainSets = new Set<string>();
for await (const l of await fetchRemoteTextByLine(hostsUrl)) {
const lineCb = (l: string) => {
const line = processLine(l);
if (!line) {
continue;
return;
}
const _domain = line.split(/\s/)[1]?.trim();
if (!_domain) {
continue;
return;
}
const domain = normalizeDomain(_domain);
if (!domain) {
continue;
return;
}
if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
console.warn(picocolors.red(hostsUrl), '(black)', domain.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND)));
@@ -70,6 +70,25 @@ export function processHosts(span: Span, hostsUrl: string, includeAllSubDomain =
}
domainSets.add(includeAllSubDomain ? `.${domain}` : domain);
};
if (mirrors == null || mirrors.length === 0) {
for await (const l of await fetchRemoteTextByLine(hostsUrl)) {
lineCb(l);
}
} else {
// Avoid event loop starvation, so we wait for a macrotask before we start fetching.
await Promise.resolve();
const filterRules = await childSpan.traceChild('download hosts').traceAsyncFn(() => {
return fetchAssets(hostsUrl, mirrors).then(text => text.split('\n'));
});
childSpan.traceChild('parse hosts').traceSyncFn(() => {
for (let i = 0, len = filterRules.length; i < len; i++) {
lineCb(filterRules[i]);
}
});
}
console.log(picocolors.gray('[process hosts]'), picocolors.gray(hostsUrl), picocolors.gray(domainSets.size));

View File

@@ -1,21 +1,34 @@
import { TTL } from './cache-filesystem';
export const HOSTS = [
['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true, TTL.THREE_HOURS()],
['https://someonewhocares.org/hosts/hosts', true, TTL.THREE_HOURS()],
type HostsSource = [main: string, mirrors: string[] | null, includeAllSubDomain: boolean, ttl: number];
export const HOSTS: HostsSource[] = [
[
'https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext',
['https://raw.githubusercontent.com/uBlockOrigin/uAssets/master/thirdparties/pgl.yoyo.org/as/serverlist'],
true,
TTL.THREE_HOURS()
],
['https://someonewhocares.org/hosts/hosts', null, true, TTL.THREE_HOURS()],
// no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', true, TTL.THREE_DAYS()],
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', null, true, TTL.THREE_DAYS()],
// have not been updated for more than a year, so we set a 14 days cache ttl
['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', true, TTL.TWO_WEEKS()],
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', false, TTL.THREE_DAYS()],
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', false, TTL.THREE_DAYS()],
['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', null, true, TTL.TWO_WEEKS()],
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', null, false, TTL.THREE_DAYS()],
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', null, false, TTL.THREE_DAYS()],
// ad-wars is not actively maintained, so we set a 7 days cache ttl
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false, TTL.ONE_WEEK()],
['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true, TTL.THREE_HOURS()],
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', null, false, TTL.ONE_WEEK()],
['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', null, true, TTL.THREE_HOURS()],
// Curben's UrlHaus Malicious URL Blocklist
// 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
// 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, TTL.THREE_HOURS()]
[
'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt',
[
'https://urlhaus-filter.pages.dev/urlhaus-filter-hosts.txt',
'https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-hosts.txt'
],
true,
TTL.THREE_HOURS()
]
// Curben's Phishing URL Blocklist
// Covered by lib/get-phishing-domains.ts
// 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt'

View File

@@ -9,7 +9,7 @@ export function traceSync<T>(prefix: string, fn: () => T, timeFormatter: Formatt
console.log(`${timeFormatter(`[${((end - start) / 1e6).toFixed(3)}ms]`)} ${prefix}`);
return result;
}
traceSync.skip = <T>(_prefix: string, fn: () => T): T => fn();
// traceSync.skip = <T>(_prefix: string, fn: () => T): T => fn();
export const traceAsync = async <T>(prefix: string, fn: () => Promise<T>, timeFormatter: Formatter = picocolors.blue): Promise<T> => {
const start = Bun.nanoseconds();
@@ -18,9 +18,3 @@ export const traceAsync = async <T>(prefix: string, fn: () => Promise<T>, timeFo
console.log(`${timeFormatter(`[${((end - start) / 1e6).toFixed(3)}ms]`)} ${prefix}`);
return result;
};
export interface TaskResult {
readonly start: number,
readonly end: number,
readonly taskName: string
}