Drop gorhill publicsuffixlist

This commit is contained in:
SukkaW 2024-08-02 17:02:41 +08:00
parent ba66abe750
commit 3d3abb8b50
8 changed files with 12 additions and 103 deletions

View File

@ -1,24 +0,0 @@
import fsp from 'fs/promises';
import { toASCII } from 'punycode/punycode';
import { createMemoizedPromise } from './memo-promise';
import { getPublicSuffixListTextPromise } from './download-publicsuffixlist';
import { fileURLToPath } from 'url';
// TODO: node undfici fetch doesn't support file URL reading yet
const customFetch = async (url: URL) => {
const filePath = fileURLToPath(url);
const file = await fsp.readFile(filePath);
return new Blob([file]) as any;
};
export const getGorhillPublicSuffixPromise = createMemoizedPromise(async () => {
const [publicSuffixListDat, { default: gorhill }] = await Promise.all([
getPublicSuffixListTextPromise(),
import('@gorhill/publicsuffixlist')
]);
gorhill.parse(publicSuffixListDat, toASCII);
await gorhill.enableWASM({ customFetch });
return gorhill;
});

View File

@ -3,13 +3,8 @@
* because `hostname` is already garanteed to be a valid hostname!
*/
export function isProbablyIpv4(hostname: string): boolean {
// Cannot be shorted than 1.1.1.1
if (hostname.length < 7) {
return false;
}
// Cannot be longer than: 255.255.255.255
if (hostname.length > 15) {
// Cannot be shorted than 1.1.1.1 or longer than 255.255.255.255
if (hostname.length < 7 || hostname.length > 15) {
return false;
}

View File

@ -2,7 +2,7 @@
import { fetchRemoteTextByLine } from './fetch-text-by-line';
import { NetworkFilter } from '@cliqz/adblocker';
import { processLine } from './process-line';
import type { PublicSuffixList } from '@gorhill/publicsuffixlist';
import tldts from 'tldts-experimental';
import picocolors from 'picocolors';
import { normalizeDomain } from './normalize-domain';
@ -10,7 +10,6 @@ import { fetchAssets } from './fetch-assets';
import { deserializeArray, fsFetchCache, serializeArray } from './cache-filesystem';
import type { Span } from '../trace';
import createKeywordFilter from './aho-corasick';
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
const DEBUG_DOMAIN_TO_FIND: string | null = null; // example.com | null
let foundDebugDomain = false;
@ -147,14 +146,12 @@ export async function processFilterRules(
const warningMessages: string[] = [];
const gorhill = await span.traceChild('get gorhill').tracePromise(getGorhillPublicSuffixPromise());
const MUTABLE_PARSE_LINE_RESULT: [string, ParseType] = ['', 1000];
/**
* @param {string} line
*/
const lineCb = (line: string) => {
const result = parse(line, gorhill, MUTABLE_PARSE_LINE_RESULT);
const result = parse(line, MUTABLE_PARSE_LINE_RESULT);
const flag = result[1];
if (flag === ParseType.Null) {
@ -282,7 +279,7 @@ const kwfilter = createKeywordFilter([
'$cname'
]);
function parse($line: string, gorhill: PublicSuffixList, result: [string, ParseType]): [hostname: string, flag: ParseType] {
function parse($line: string, result: [string, ParseType]): [hostname: string, flag: ParseType] {
if (
// doesn't include
!$line.includes('.') // rule with out dot can not be a domain
@ -557,8 +554,8 @@ function parse($line: string, gorhill: PublicSuffixList, result: [string, ParseT
: (lineEndsWithCaretVerticalBar ? -2 : undefined) // replace('^|', '')
);
const suffix = gorhill.getPublicSuffix(sliced);
if (!gorhill.suffixInPSL(suffix)) {
const suffix = tldts.getPublicSuffix(sliced);
if (!suffix) {
// This exclude domain-like resource like `1.1.4.514.js`
result[1] = ParseType.Null;
return result;
@ -632,8 +629,8 @@ function parse($line: string, gorhill: PublicSuffixList, result: [string, ParseT
) {
const _domain = line.slice(0, -1);
const suffix = gorhill.getPublicSuffix(_domain);
if (!suffix || !gorhill.suffixInPSL(suffix)) {
const suffix = tldts.getPublicSuffix(_domain);
if (!suffix) {
// This exclude domain-like resource like `_social_tracking.js^`
result[1] = ParseType.Null;
return result;
@ -688,7 +685,7 @@ function parse($line: string, gorhill: PublicSuffixList, result: [string, ParseT
sliceEnd = -9;
}
const sliced = (sliceStart !== 0 || sliceEnd !== undefined) ? line.slice(sliceStart, sliceEnd) : line;
const suffix = gorhill.getPublicSuffix(sliced);
const suffix = tldts.getPublicSuffix(sliced);
/**
* Fast exclude definitely not domain-like resource
*
@ -697,7 +694,7 @@ function parse($line: string, gorhill: PublicSuffixList, result: [string, ParseT
* `-cpm-ads.$badfilter`, suffix is `$badfilter`,
* `portal.librus.pl$$advertisement-module`, suffix is `pl$$advertisement-module`
*/
if (!suffix || !gorhill.suffixInPSL(suffix)) {
if (!suffix) {
// This exclude domain-like resource like `.gatracking.js`, `.beacon.min.js` and `.cookielaw.js`
result[1] = ParseType.Null;
return result;

View File

@ -5,12 +5,10 @@ import { bench, group, run } from 'mitata';
import * as tldts from 'tldts';
import * as tldtsExperimental from 'tldts-experimental';
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
(async () => {
const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt'));
const gorhill = await getGorhillPublicSuffixPromise();
const tldtsOpt: Parameters<typeof tldts.getDomain>[1] = {
allowPrivateDomains: false,
extractHostname: false,
@ -21,18 +19,6 @@ import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
(['getDomain', 'getPublicSuffix', 'getSubdomain', 'parse'] as const).forEach(methodName => {
group(methodName, () => {
if (methodName in gorhill) {
bench('gorhill', () => {
for (let i = 0, len = data.length; i < len; i++) {
const line = data[i];
const safeGorhillLine = line[0] === '.' ? line.slice(1) : line;
// @ts-expect-error -- type guarded
gorhill[methodName](safeGorhillLine);
}
});
}
bench('tldts', () => {
for (let i = 0, len = data.length; i < len; i++) {
tldts[methodName](data[i], tldtsOpt);

36
Build/mod.d.ts vendored
View File

@ -1,36 +0,0 @@
declare module '@gorhill/publicsuffixlist' {
type Selfie =
| string
| {
magic: number,
buf32: number[]
};
interface Decoder {
decode: (bufferStr: string, buffer: ArrayBuffer) => void,
decodeSize: (bufferStr: string) => number
}
interface Encoder {
encode: (buffer: ArrayBuffer, length: number) => string
}
export interface PublicSuffixList {
version: string,
parse(text: string, toAscii: (input: string) => string): void,
getPublicSuffix(hostname: string): string,
getDomain(hostname: string): string,
suffixInPSL(hostname: string): boolean,
toSelfie(encoder?: null | Encoder): Selfie,
fromSelfie(selfie: Selfie, decoder?: null | Decoder): boolean,
enableWASM(options?: {
customFetch?: null | ((url: URL) => Promise<Blob>)
}): Promise<boolean>,
disableWASM(): Promise<boolean>
}
const psl: PublicSuffixList;
export default psl;
}

View File

@ -541,6 +541,7 @@ DOMAIN-SUFFIX,pconline.com.cn
DOMAIN-SUFFIX,peiluyou.com
DOMAIN-SUFFIX,php.cn
DOMAIN-SUFFIX,pingan.com
DOMAIN-SUFFIX,pingwest.com
DOMAIN-SUFFIX,pplive.com
DOMAIN-SUFFIX,pps.tv
DOMAIN-SUFFIX,ppsimg.com

View File

@ -21,7 +21,6 @@
"license": "ISC",
"dependencies": {
"@cliqz/adblocker": "^1.30.0",
"@gorhill/publicsuffixlist": "3.0.1",
"async-retry": "^1.3.3",
"async-sema": "^3.1.1",
"better-sqlite3": "^11.1.2",

9
pnpm-lock.yaml generated
View File

@ -14,9 +14,6 @@ importers:
'@cliqz/adblocker':
specifier: ^1.30.0
version: 1.30.0
'@gorhill/publicsuffixlist':
specifier: 3.0.1
version: 3.0.1
async-retry:
specifier: ^1.3.3
version: 1.3.3
@ -183,10 +180,6 @@ packages:
resolution: {integrity: sha512-BsWiH1yFGjXXS2yvrf5LyuoSIIbPrGUWob917o+BTKuZ7qJdxX8aJLRxs1fS9n6r7vESrq1OUqb68dANcFXuQQ==}
engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0}
'@gorhill/publicsuffixlist@3.0.1':
resolution: {integrity: sha512-TJ3mLuEQ54BVmKejpU1AFuPU/qk8WJEszlVW6WQyLVKlC3Ot4K3OzNljFa1hH0sssmXS6tvii0fWxosUkH7byA==}
engines: {node: '>=14.0.0', npm: '>=6.14.4'}
'@humanwhocodes/module-importer@1.0.1':
resolution: {integrity: sha512-bxveV4V8v5Yb4ncFTT3rPSgZBOpCkjfK0y4oVVVJwIuDVBRMDXrPyXRL988i5ap9m9bnyEEjWfm5WkBmtffLfA==}
engines: {node: '>=12.22'}
@ -1633,8 +1626,6 @@ snapshots:
'@eslint/object-schema@2.1.4': {}
'@gorhill/publicsuffixlist@3.0.1': {}
'@humanwhocodes/module-importer@1.0.1': {}
'@humanwhocodes/retry@0.3.0': {}