mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-13 01:30:37 +08:00
Update Reject Hosts / Add mirror support for Hosts Source
This commit is contained in:
parent
41b2f543f8
commit
af8cce4f45
@ -91,34 +91,36 @@ const processFile = (span: Span, sourcePath: string) => {
|
||||
});
|
||||
};
|
||||
|
||||
async function transformDomainset(parentSpan: Span, sourcePath: string, relativePath: string) {
|
||||
const span = parentSpan.traceChild(`transform domainset: ${path.basename(sourcePath, path.extname(sourcePath))}`);
|
||||
function transformDomainset(parentSpan: Span, sourcePath: string, relativePath: string) {
|
||||
return parentSpan
|
||||
.traceChild(`transform domainset: ${path.basename(sourcePath, path.extname(sourcePath))}`)
|
||||
.traceAsyncFn(async (span) => {
|
||||
const res = await processFile(span, sourcePath);
|
||||
if (!res) return;
|
||||
|
||||
const res = await processFile(span, sourcePath);
|
||||
if (!res) return;
|
||||
const [title, descriptions, lines] = res;
|
||||
|
||||
const [title, descriptions, lines] = res;
|
||||
const deduped = domainDeduper(lines);
|
||||
const description = [
|
||||
...SHARED_DESCRIPTION,
|
||||
...(
|
||||
descriptions.length
|
||||
? ['', ...descriptions]
|
||||
: []
|
||||
)
|
||||
];
|
||||
|
||||
const deduped = domainDeduper(lines);
|
||||
const description = [
|
||||
...SHARED_DESCRIPTION,
|
||||
...(
|
||||
descriptions.length
|
||||
? ['', ...descriptions]
|
||||
: []
|
||||
)
|
||||
];
|
||||
|
||||
return span.traceAsyncFn(() => createRuleset(
|
||||
span,
|
||||
title,
|
||||
description,
|
||||
new Date(),
|
||||
deduped,
|
||||
'domainset',
|
||||
path.resolve(outputSurgeDir, relativePath),
|
||||
path.resolve(outputClashDir, `${relativePath.slice(0, -path.extname(relativePath).length)}.txt`)
|
||||
));
|
||||
return createRuleset(
|
||||
span,
|
||||
title,
|
||||
description,
|
||||
new Date(),
|
||||
deduped,
|
||||
'domainset',
|
||||
path.resolve(outputSurgeDir, relativePath),
|
||||
path.resolve(outputClashDir, `${relativePath.slice(0, -path.extname(relativePath).length)}.txt`)
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -27,14 +27,15 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
||||
|
||||
const domainSets = new Set<string>();
|
||||
|
||||
let shouldStop = false;
|
||||
// Parse from AdGuard Filters
|
||||
await span
|
||||
const shouldStop = await span
|
||||
.traceChild('download and process hosts / adblock filter rules')
|
||||
.traceAsyncFn(async (childSpan) => {
|
||||
// eslint-disable-next-line sukka/no-single-return -- not single return
|
||||
let shouldStop = false;
|
||||
await Promise.all([
|
||||
// Parse from remote hosts & domain lists
|
||||
...HOSTS.map(entry => processHosts(childSpan, entry[0], entry[1], entry[2]).then(hosts => SetHelpers.add(domainSets, hosts))),
|
||||
...HOSTS.map(entry => processHosts(childSpan, entry[0], entry[1], entry[2], entry[3]).then(hosts => SetHelpers.add(domainSets, hosts))),
|
||||
|
||||
...DOMAIN_LISTS.map(entry => processDomainLists(childSpan, entry[0], entry[1], entry[2]).then(hosts => SetHelpers.add(domainSets, hosts))),
|
||||
|
||||
@ -44,6 +45,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
||||
: processFilterRules(childSpan, input[0], input[1], input[2])
|
||||
).then(({ white, black, foundDebugDomain }) => {
|
||||
if (foundDebugDomain) {
|
||||
// eslint-disable-next-line sukka/no-single-return -- not single return
|
||||
shouldStop = true;
|
||||
// we should not break here, as we want to see full matches from all data source
|
||||
}
|
||||
@ -65,7 +67,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
||||
setAddFromArray(domainSets, await readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/domainset/reject_sukka.conf')));
|
||||
})
|
||||
]);
|
||||
|
||||
// eslint-disable-next-line sukka/no-single-return -- not single return
|
||||
return shouldStop;
|
||||
});
|
||||
|
||||
|
||||
@ -44,25 +44,25 @@ export function processDomainLists(span: Span, domainListsUrl: string, includeAl
|
||||
}
|
||||
));
|
||||
}
|
||||
export function processHosts(span: Span, hostsUrl: string, includeAllSubDomain = false, ttl: number | null = null) {
|
||||
return span.traceChild(`processhosts: ${hostsUrl}`).traceAsyncFn(() => fsCache.apply(
|
||||
export function processHosts(span: Span, hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, ttl: number | null = null) {
|
||||
return span.traceChild(`processhosts: ${hostsUrl}`).traceAsyncFn((childSpan) => fsCache.apply(
|
||||
hostsUrl,
|
||||
async () => {
|
||||
const domainSets = new Set<string>();
|
||||
|
||||
for await (const l of await fetchRemoteTextByLine(hostsUrl)) {
|
||||
const lineCb = (l: string) => {
|
||||
const line = processLine(l);
|
||||
if (!line) {
|
||||
continue;
|
||||
return;
|
||||
}
|
||||
|
||||
const _domain = line.split(/\s/)[1]?.trim();
|
||||
if (!_domain) {
|
||||
continue;
|
||||
return;
|
||||
}
|
||||
const domain = normalizeDomain(_domain);
|
||||
if (!domain) {
|
||||
continue;
|
||||
return;
|
||||
}
|
||||
if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
|
||||
console.warn(picocolors.red(hostsUrl), '(black)', domain.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND)));
|
||||
@ -70,6 +70,25 @@ export function processHosts(span: Span, hostsUrl: string, includeAllSubDomain =
|
||||
}
|
||||
|
||||
domainSets.add(includeAllSubDomain ? `.${domain}` : domain);
|
||||
};
|
||||
|
||||
if (mirrors == null || mirrors.length === 0) {
|
||||
for await (const l of await fetchRemoteTextByLine(hostsUrl)) {
|
||||
lineCb(l);
|
||||
}
|
||||
} else {
|
||||
// Avoid event loop starvation, so we wait for a macrotask before we start fetching.
|
||||
await Promise.resolve();
|
||||
|
||||
const filterRules = await childSpan.traceChild('download hosts').traceAsyncFn(() => {
|
||||
return fetchAssets(hostsUrl, mirrors).then(text => text.split('\n'));
|
||||
});
|
||||
|
||||
childSpan.traceChild('parse hosts').traceSyncFn(() => {
|
||||
for (let i = 0, len = filterRules.length; i < len; i++) {
|
||||
lineCb(filterRules[i]);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
console.log(picocolors.gray('[process hosts]'), picocolors.gray(hostsUrl), picocolors.gray(domainSets.size));
|
||||
|
||||
@ -1,21 +1,34 @@
|
||||
import { TTL } from './cache-filesystem';
|
||||
|
||||
export const HOSTS = [
|
||||
['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', true, TTL.THREE_HOURS()],
|
||||
['https://someonewhocares.org/hosts/hosts', true, TTL.THREE_HOURS()],
|
||||
type HostsSource = [main: string, mirrors: string[] | null, includeAllSubDomain: boolean, ttl: number];
|
||||
|
||||
export const HOSTS: HostsSource[] = [
|
||||
[
|
||||
'https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext',
|
||||
['https://raw.githubusercontent.com/uBlockOrigin/uAssets/master/thirdparties/pgl.yoyo.org/as/serverlist'],
|
||||
true,
|
||||
TTL.THREE_HOURS()
|
||||
],
|
||||
['https://someonewhocares.org/hosts/hosts', null, true, TTL.THREE_HOURS()],
|
||||
// no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl
|
||||
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', true, TTL.THREE_DAYS()],
|
||||
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', null, true, TTL.THREE_DAYS()],
|
||||
// have not been updated for more than a year, so we set a 14 days cache ttl
|
||||
['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', true, TTL.TWO_WEEKS()],
|
||||
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', false, TTL.THREE_DAYS()],
|
||||
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', false, TTL.THREE_DAYS()],
|
||||
['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', null, true, TTL.TWO_WEEKS()],
|
||||
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', null, false, TTL.THREE_DAYS()],
|
||||
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Huawei-AdBlock.txt', null, false, TTL.THREE_DAYS()],
|
||||
// ad-wars is not actively maintained, so we set a 7 days cache ttl
|
||||
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false, TTL.ONE_WEEK()],
|
||||
['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true, TTL.THREE_HOURS()],
|
||||
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', null, false, TTL.ONE_WEEK()],
|
||||
['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', null, true, TTL.THREE_HOURS()],
|
||||
// Curben's UrlHaus Malicious URL Blocklist
|
||||
// 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
|
||||
// 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
|
||||
['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, TTL.THREE_HOURS()]
|
||||
[
|
||||
'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt',
|
||||
[
|
||||
'https://urlhaus-filter.pages.dev/urlhaus-filter-hosts.txt',
|
||||
'https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-hosts.txt'
|
||||
],
|
||||
true,
|
||||
TTL.THREE_HOURS()
|
||||
]
|
||||
// Curben's Phishing URL Blocklist
|
||||
// Covered by lib/get-phishing-domains.ts
|
||||
// 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt'
|
||||
|
||||
@ -9,7 +9,7 @@ export function traceSync<T>(prefix: string, fn: () => T, timeFormatter: Formatt
|
||||
console.log(`${timeFormatter(`[${((end - start) / 1e6).toFixed(3)}ms]`)} ${prefix}`);
|
||||
return result;
|
||||
}
|
||||
traceSync.skip = <T>(_prefix: string, fn: () => T): T => fn();
|
||||
// traceSync.skip = <T>(_prefix: string, fn: () => T): T => fn();
|
||||
|
||||
export const traceAsync = async <T>(prefix: string, fn: () => Promise<T>, timeFormatter: Formatter = picocolors.blue): Promise<T> => {
|
||||
const start = Bun.nanoseconds();
|
||||
@ -18,9 +18,3 @@ export const traceAsync = async <T>(prefix: string, fn: () => Promise<T>, timeFo
|
||||
console.log(`${timeFormatter(`[${((end - start) / 1e6).toFixed(3)}ms]`)} ${prefix}`);
|
||||
return result;
|
||||
};
|
||||
|
||||
export interface TaskResult {
|
||||
readonly start: number,
|
||||
readonly end: number,
|
||||
readonly taskName: string
|
||||
}
|
||||
|
||||
@ -52,7 +52,7 @@ export const createSpan = (name: string, parentTraceResult?: TraceResult): Span
|
||||
|
||||
const stop = (time?: number) => {
|
||||
if (status === SPAN_STATUS_END) {
|
||||
throw new Error('span already stopped');
|
||||
throw new Error(`span already stopped: ${name}`);
|
||||
}
|
||||
const end = time ?? Bun.nanoseconds();
|
||||
|
||||
|
||||
@ -108,9 +108,7 @@ DOMAIN-KEYWORD,adjust.
|
||||
DOMAIN-KEYWORD,appsflyer
|
||||
DOMAIN-KEYWORD,dnserror
|
||||
DOMAIN-KEYWORD,marketing.net
|
||||
AND,((DOMAIN-KEYWORD,f-log), (DOMAIN-SUFFIX,grammarly.io))
|
||||
DOMAIN,stun.smartgslb.com
|
||||
AND,((DOMAIN-SUFFIX,prod.hosts.ooklaserver.net), (DOMAIN-KEYWORD,.ad.))
|
||||
|
||||
DOMAIN-KEYWORD,_vmind.qqvideo.tc.qq.com
|
||||
DOMAIN-KEYWORD,-logging.nextmedia.com
|
||||
@ -120,7 +118,10 @@ DOMAIN-KEYWORD,.engage.3m.
|
||||
# -telemetry.officeapps.live.com
|
||||
DOMAIN-KEYWORD,telemetry.officeapps.live.com
|
||||
DOMAIN-KEYWORD,-launches.appsflyersdk.com
|
||||
DOMAIN-KEYWORD,-s2s.sensic.net
|
||||
|
||||
AND,((DOMAIN-KEYWORD,f-log), (DOMAIN-SUFFIX,grammarly.io))
|
||||
AND,((DOMAIN-SUFFIX,prod.hosts.ooklaserver.net), (DOMAIN-KEYWORD,.ad.))
|
||||
AND,((DOMAIN-KEYWORD,genuine), (DOMAIN-KEYWORD,autodesk))
|
||||
|
||||
# Important: Force add the following domains without whitelisting
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user