Perf: prefer hosts over AdBlock syntax

This commit is contained in:
SukkaW 2023-11-29 22:14:57 +08:00
parent 07b3951d6c
commit 4cda4df451
3 changed files with 50 additions and 41 deletions

View File

@ -1,4 +1,4 @@
import { processFilterRules } from './lib/parse-filter';
import { processFilterRules, processHosts } from './lib/parse-filter';
import path from 'path';
import { createRuleset } from './lib/create-file';
import { processLine } from './lib/process-line';
@ -65,15 +65,16 @@ const BLACK_TLD = new Set([
]);
export const buildPhishingDomainSet = task(import.meta.path, async () => {
const [{ black: domainSet }, gorhill] = await Promise.all([
processFilterRules(
'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt',
[
'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
// Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt'
]
),
const [domainSet, gorhill] = await Promise.all([
processHosts('https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true),
// processFilterRules(
// 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt',
// [
// 'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
// // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// // 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt'
// ]
// ),
getGorhillPublicSuffixPromise()
]);

View File

@ -62,7 +62,7 @@ export async function processDomainLists(domainListsUrl: string | URL) {
return domainSets;
}
export async function processHosts(hostsUrl: string | URL, includeAllSubDomain = false) {
export async function processHosts(hostsUrl: string | URL, includeAllSubDomain = false, skipDomainCheck = false) {
console.time(`- processHosts: ${hostsUrl}`);
if (typeof hostsUrl === 'string') {
@ -85,7 +85,7 @@ export async function processHosts(hostsUrl: string | URL, includeAllSubDomain =
foundDebugDomain = true;
}
const domain = normalizeDomain(_domain);
const domain = skipDomainCheck ? _domain : normalizeDomain(_domain);
if (domain) {
if (includeAllSubDomain) {
domainSets.add(`.${domain}`);

View File

@ -1,11 +1,19 @@
export const HOSTS: [string, boolean][] = [
export const HOSTS = [
// ['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', false],
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', false],
['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', false],
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', false],
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false],
['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', false]
];
['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', false],
// Curben's UrlHaus Malicious URL Blocklist
['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, true],
// Curben's Phishing URL Blocklist
['https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true],
// Curben's PUP Domains Blocklist
['https://curbengh.github.io/pup-filter/pup-filter-hosts.txt', true, true],
// BarbBlock
['https://paulgb.github.io/BarbBlock/blacklists/hosts-file.txt', true, true]
] as const;
export const ADGUARD_FILTERS = [
// EasyList
@ -103,33 +111,33 @@ export const ADGUARD_FILTERS = [
]
],
// Curben's UrlHaus Malicious URL Blocklist
[
'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
[
'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
// Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// 'https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
'https://ublockorigin.github.io/uAssets/thirdparties/urlhaus-filter/urlhaus-filter-online.txt',
]
],
// [
// 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
// [
// 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
// // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// // 'https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
// 'https://ublockorigin.github.io/uAssets/thirdparties/urlhaus-filter/urlhaus-filter-online.txt',
// ]
// ],
// Curben's Phishing URL Blocklist
[
'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt',
[
'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
// Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt'
]
],
// [
// 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt',
// [
// 'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
// // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// // 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt'
// ]
// ],
// Curben's PUP Domains Blocklist
[
'https://curbengh.github.io/pup-filter/pup-filter-agh.txt',
[
'https://pup-filter.pages.dev/pup-filter-agh.txt'
// Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// 'https://malware-filter.gitlab.io/malware-filter/pup-filter-agh.txt'
]
],
// [
// 'https://curbengh.github.io/pup-filter/pup-filter-agh.txt',
// [
// 'https://pup-filter.pages.dev/pup-filter-agh.txt'
// // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// // 'https://malware-filter.gitlab.io/malware-filter/pup-filter-agh.txt'
// ]
// ],
// GameConsoleAdblockList
'https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt',
// PiHoleBlocklist
@ -142,7 +150,7 @@ export const ADGUARD_FILTERS = [
// Spam404
'https://raw.githubusercontent.com/Spam404/lists/master/adblock-list.txt',
// BarbBlock
'https://paulgb.github.io/BarbBlock/blacklists/ublock-origin.txt',
// 'https://paulgb.github.io/BarbBlock/blacklists/ublock-origin.txt',
// Brave First Party & First Party CNAME
'https://raw.githubusercontent.com/brave/adblock-lists/master/brave-lists/brave-firstparty.txt'
] as const;