mirror of
https://github.com/SukkaW/Surge.git
synced 2026-01-29 01:51:52 +08:00
Drop gorhill publicsuffixlist
This commit is contained in:
@@ -1,24 +0,0 @@
|
||||
import fsp from 'fs/promises';
|
||||
import { toASCII } from 'punycode/punycode';
|
||||
import { createMemoizedPromise } from './memo-promise';
|
||||
import { getPublicSuffixListTextPromise } from './download-publicsuffixlist';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
// TODO: node undfici fetch doesn't support file URL reading yet
|
||||
const customFetch = async (url: URL) => {
|
||||
const filePath = fileURLToPath(url);
|
||||
const file = await fsp.readFile(filePath);
|
||||
return new Blob([file]) as any;
|
||||
};
|
||||
|
||||
export const getGorhillPublicSuffixPromise = createMemoizedPromise(async () => {
|
||||
const [publicSuffixListDat, { default: gorhill }] = await Promise.all([
|
||||
getPublicSuffixListTextPromise(),
|
||||
import('@gorhill/publicsuffixlist')
|
||||
]);
|
||||
|
||||
gorhill.parse(publicSuffixListDat, toASCII);
|
||||
await gorhill.enableWASM({ customFetch });
|
||||
|
||||
return gorhill;
|
||||
});
|
||||
@@ -3,13 +3,8 @@
|
||||
* because `hostname` is already garanteed to be a valid hostname!
|
||||
*/
|
||||
export function isProbablyIpv4(hostname: string): boolean {
|
||||
// Cannot be shorted than 1.1.1.1
|
||||
if (hostname.length < 7) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Cannot be longer than: 255.255.255.255
|
||||
if (hostname.length > 15) {
|
||||
// Cannot be shorted than 1.1.1.1 or longer than 255.255.255.255
|
||||
if (hostname.length < 7 || hostname.length > 15) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
import { fetchRemoteTextByLine } from './fetch-text-by-line';
|
||||
import { NetworkFilter } from '@cliqz/adblocker';
|
||||
import { processLine } from './process-line';
|
||||
import type { PublicSuffixList } from '@gorhill/publicsuffixlist';
|
||||
import tldts from 'tldts-experimental';
|
||||
|
||||
import picocolors from 'picocolors';
|
||||
import { normalizeDomain } from './normalize-domain';
|
||||
@@ -10,7 +10,6 @@ import { fetchAssets } from './fetch-assets';
|
||||
import { deserializeArray, fsFetchCache, serializeArray } from './cache-filesystem';
|
||||
import type { Span } from '../trace';
|
||||
import createKeywordFilter from './aho-corasick';
|
||||
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
|
||||
|
||||
const DEBUG_DOMAIN_TO_FIND: string | null = null; // example.com | null
|
||||
let foundDebugDomain = false;
|
||||
@@ -147,14 +146,12 @@ export async function processFilterRules(
|
||||
|
||||
const warningMessages: string[] = [];
|
||||
|
||||
const gorhill = await span.traceChild('get gorhill').tracePromise(getGorhillPublicSuffixPromise());
|
||||
|
||||
const MUTABLE_PARSE_LINE_RESULT: [string, ParseType] = ['', 1000];
|
||||
/**
|
||||
* @param {string} line
|
||||
*/
|
||||
const lineCb = (line: string) => {
|
||||
const result = parse(line, gorhill, MUTABLE_PARSE_LINE_RESULT);
|
||||
const result = parse(line, MUTABLE_PARSE_LINE_RESULT);
|
||||
const flag = result[1];
|
||||
|
||||
if (flag === ParseType.Null) {
|
||||
@@ -282,7 +279,7 @@ const kwfilter = createKeywordFilter([
|
||||
'$cname'
|
||||
]);
|
||||
|
||||
function parse($line: string, gorhill: PublicSuffixList, result: [string, ParseType]): [hostname: string, flag: ParseType] {
|
||||
function parse($line: string, result: [string, ParseType]): [hostname: string, flag: ParseType] {
|
||||
if (
|
||||
// doesn't include
|
||||
!$line.includes('.') // rule with out dot can not be a domain
|
||||
@@ -557,8 +554,8 @@ function parse($line: string, gorhill: PublicSuffixList, result: [string, ParseT
|
||||
: (lineEndsWithCaretVerticalBar ? -2 : undefined) // replace('^|', '')
|
||||
);
|
||||
|
||||
const suffix = gorhill.getPublicSuffix(sliced);
|
||||
if (!gorhill.suffixInPSL(suffix)) {
|
||||
const suffix = tldts.getPublicSuffix(sliced);
|
||||
if (!suffix) {
|
||||
// This exclude domain-like resource like `1.1.4.514.js`
|
||||
result[1] = ParseType.Null;
|
||||
return result;
|
||||
@@ -632,8 +629,8 @@ function parse($line: string, gorhill: PublicSuffixList, result: [string, ParseT
|
||||
) {
|
||||
const _domain = line.slice(0, -1);
|
||||
|
||||
const suffix = gorhill.getPublicSuffix(_domain);
|
||||
if (!suffix || !gorhill.suffixInPSL(suffix)) {
|
||||
const suffix = tldts.getPublicSuffix(_domain);
|
||||
if (!suffix) {
|
||||
// This exclude domain-like resource like `_social_tracking.js^`
|
||||
result[1] = ParseType.Null;
|
||||
return result;
|
||||
@@ -688,7 +685,7 @@ function parse($line: string, gorhill: PublicSuffixList, result: [string, ParseT
|
||||
sliceEnd = -9;
|
||||
}
|
||||
const sliced = (sliceStart !== 0 || sliceEnd !== undefined) ? line.slice(sliceStart, sliceEnd) : line;
|
||||
const suffix = gorhill.getPublicSuffix(sliced);
|
||||
const suffix = tldts.getPublicSuffix(sliced);
|
||||
/**
|
||||
* Fast exclude definitely not domain-like resource
|
||||
*
|
||||
@@ -697,7 +694,7 @@ function parse($line: string, gorhill: PublicSuffixList, result: [string, ParseT
|
||||
* `-cpm-ads.$badfilter`, suffix is `$badfilter`,
|
||||
* `portal.librus.pl$$advertisement-module`, suffix is `pl$$advertisement-module`
|
||||
*/
|
||||
if (!suffix || !gorhill.suffixInPSL(suffix)) {
|
||||
if (!suffix) {
|
||||
// This exclude domain-like resource like `.gatracking.js`, `.beacon.min.js` and `.cookielaw.js`
|
||||
result[1] = ParseType.Null;
|
||||
return result;
|
||||
|
||||
@@ -5,12 +5,10 @@ import { bench, group, run } from 'mitata';
|
||||
|
||||
import * as tldts from 'tldts';
|
||||
import * as tldtsExperimental from 'tldts-experimental';
|
||||
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
|
||||
|
||||
(async () => {
|
||||
const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt'));
|
||||
|
||||
const gorhill = await getGorhillPublicSuffixPromise();
|
||||
const tldtsOpt: Parameters<typeof tldts.getDomain>[1] = {
|
||||
allowPrivateDomains: false,
|
||||
extractHostname: false,
|
||||
@@ -21,18 +19,6 @@ import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
|
||||
|
||||
(['getDomain', 'getPublicSuffix', 'getSubdomain', 'parse'] as const).forEach(methodName => {
|
||||
group(methodName, () => {
|
||||
if (methodName in gorhill) {
|
||||
bench('gorhill', () => {
|
||||
for (let i = 0, len = data.length; i < len; i++) {
|
||||
const line = data[i];
|
||||
const safeGorhillLine = line[0] === '.' ? line.slice(1) : line;
|
||||
|
||||
// @ts-expect-error -- type guarded
|
||||
gorhill[methodName](safeGorhillLine);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
bench('tldts', () => {
|
||||
for (let i = 0, len = data.length; i < len; i++) {
|
||||
tldts[methodName](data[i], tldtsOpt);
|
||||
|
||||
Reference in New Issue
Block a user