Perf: prefer hosts over AdBlock syntax

This commit is contained in:
SukkaW 2023-11-29 22:14:57 +08:00
parent 07b3951d6c
commit 4cda4df451
3 changed files with 50 additions and 41 deletions

View File

@ -1,4 +1,4 @@
import { processFilterRules } from './lib/parse-filter'; import { processFilterRules, processHosts } from './lib/parse-filter';
import path from 'path'; import path from 'path';
import { createRuleset } from './lib/create-file'; import { createRuleset } from './lib/create-file';
import { processLine } from './lib/process-line'; import { processLine } from './lib/process-line';
@ -65,15 +65,16 @@ const BLACK_TLD = new Set([
]); ]);
export const buildPhishingDomainSet = task(import.meta.path, async () => { export const buildPhishingDomainSet = task(import.meta.path, async () => {
const [{ black: domainSet }, gorhill] = await Promise.all([ const [domainSet, gorhill] = await Promise.all([
processFilterRules( processHosts('https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true),
'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt', // processFilterRules(
[ // 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt',
'https://phishing-filter.pages.dev/phishing-filter-agh.txt' // [
// Prefer mirror, since malware-filter.gitlab.io has not been updated for a while // 'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
// 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt' // // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
] // // 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt'
), // ]
// ),
getGorhillPublicSuffixPromise() getGorhillPublicSuffixPromise()
]); ]);

View File

@ -62,7 +62,7 @@ export async function processDomainLists(domainListsUrl: string | URL) {
return domainSets; return domainSets;
} }
export async function processHosts(hostsUrl: string | URL, includeAllSubDomain = false) { export async function processHosts(hostsUrl: string | URL, includeAllSubDomain = false, skipDomainCheck = false) {
console.time(`- processHosts: ${hostsUrl}`); console.time(`- processHosts: ${hostsUrl}`);
if (typeof hostsUrl === 'string') { if (typeof hostsUrl === 'string') {
@ -85,7 +85,7 @@ export async function processHosts(hostsUrl: string | URL, includeAllSubDomain =
foundDebugDomain = true; foundDebugDomain = true;
} }
const domain = normalizeDomain(_domain); const domain = skipDomainCheck ? _domain : normalizeDomain(_domain);
if (domain) { if (domain) {
if (includeAllSubDomain) { if (includeAllSubDomain) {
domainSets.add(`.${domain}`); domainSets.add(`.${domain}`);

View File

@ -1,11 +1,19 @@
export const HOSTS: [string, boolean][] = [ export const HOSTS = [
// ['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', false], // ['https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=0&mimetype=plaintext', false],
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', false], ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', false],
['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', false], ['https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', false],
['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', false], ['https://raw.githubusercontent.com/jerryn70/GoodbyeAds/master/Extension/GoodbyeAds-Xiaomi-Extension.txt', false],
['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false], ['https://raw.githubusercontent.com/jdlingyu/ad-wars/master/hosts', false],
['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', false] ['https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', false],
]; // Curben's UrlHaus Malicious URL Blocklist
['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, true],
// Curben's Phishing URL Blocklist
['https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true],
// Curben's PUP Domains Blocklist
['https://curbengh.github.io/pup-filter/pup-filter-hosts.txt', true, true],
// BarbBlock
['https://paulgb.github.io/BarbBlock/blacklists/hosts-file.txt', true, true]
] as const;
export const ADGUARD_FILTERS = [ export const ADGUARD_FILTERS = [
// EasyList // EasyList
@ -103,33 +111,33 @@ export const ADGUARD_FILTERS = [
] ]
], ],
// Curben's UrlHaus Malicious URL Blocklist // Curben's UrlHaus Malicious URL Blocklist
[ // [
'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt', // 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
[ // [
'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt', // 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
// Prefer mirror, since malware-filter.gitlab.io has not been updated for a while // // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// 'https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-agh-online.txt', // // 'https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
'https://ublockorigin.github.io/uAssets/thirdparties/urlhaus-filter/urlhaus-filter-online.txt', // 'https://ublockorigin.github.io/uAssets/thirdparties/urlhaus-filter/urlhaus-filter-online.txt',
] // ]
], // ],
// Curben's Phishing URL Blocklist // Curben's Phishing URL Blocklist
[ // [
'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt', // 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt',
[ // [
'https://phishing-filter.pages.dev/phishing-filter-agh.txt' // 'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
// Prefer mirror, since malware-filter.gitlab.io has not been updated for a while // // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt' // // 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt'
] // ]
], // ],
// Curben's PUP Domains Blocklist // Curben's PUP Domains Blocklist
[ // [
'https://curbengh.github.io/pup-filter/pup-filter-agh.txt', // 'https://curbengh.github.io/pup-filter/pup-filter-agh.txt',
[ // [
'https://pup-filter.pages.dev/pup-filter-agh.txt' // 'https://pup-filter.pages.dev/pup-filter-agh.txt'
// Prefer mirror, since malware-filter.gitlab.io has not been updated for a while // // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// 'https://malware-filter.gitlab.io/malware-filter/pup-filter-agh.txt' // // 'https://malware-filter.gitlab.io/malware-filter/pup-filter-agh.txt'
] // ]
], // ],
// GameConsoleAdblockList // GameConsoleAdblockList
'https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt', 'https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt',
// PiHoleBlocklist // PiHoleBlocklist
@ -142,7 +150,7 @@ export const ADGUARD_FILTERS = [
// Spam404 // Spam404
'https://raw.githubusercontent.com/Spam404/lists/master/adblock-list.txt', 'https://raw.githubusercontent.com/Spam404/lists/master/adblock-list.txt',
// BarbBlock // BarbBlock
'https://paulgb.github.io/BarbBlock/blacklists/ublock-origin.txt', // 'https://paulgb.github.io/BarbBlock/blacklists/ublock-origin.txt',
// Brave First Party & First Party CNAME // Brave First Party & First Party CNAME
'https://raw.githubusercontent.com/brave/adblock-lists/master/brave-lists/brave-firstparty.txt' 'https://raw.githubusercontent.com/brave/adblock-lists/master/brave-lists/brave-firstparty.txt'
] as const; ] as const;