diff --git a/Build/constants/loose-tldts-opt.ts b/Build/constants/loose-tldts-opt.ts index 882757f7..ee764c39 100644 --- a/Build/constants/loose-tldts-opt.ts +++ b/Build/constants/loose-tldts-opt.ts @@ -12,3 +12,8 @@ export const loosTldOptWithPrivateDomains: Parameters ...looseTldtsOpt, allowPrivateDomains: true }; + +export const normalizeTldtsOpt: Parameters[1] = { + allowPrivateDomains: true + // detectIp: true +}; diff --git a/Build/lib/normalize-domain.ts b/Build/lib/normalize-domain.ts index d09642a2..6da49dbc 100644 --- a/Build/lib/normalize-domain.ts +++ b/Build/lib/normalize-domain.ts @@ -1,26 +1,32 @@ // https://github.com/remusao/tldts/issues/2121 // import tldts from 'tldts-experimental'; import tldts from 'tldts'; -export const normalizeDomain = (domain: string) => { - if (!domain) return null; +import { normalizeTldtsOpt } from '../constants/loose-tldts-opt'; + +type TldTsParsed = ReturnType; + +export const normalizeDomain = (domain: string, parsed: TldTsParsed | null = null) => { + if (domain.length === 0) return null; + + parsed ??= tldts.parse(domain, normalizeTldtsOpt); - const parsed = tldts.parse(domain, { allowPrivateDomains: true, allowIcannDomains: true, detectIp: true }); if (parsed.isIp) return null; - if (!parsed.hostname) return null; + + let h = parsed.hostname; + if (h === null) return null; // Private invalid domain (things like .tor, .dn42, etc) if (!parsed.isIcann && !parsed.isPrivate) return null; - let h = parsed.hostname; - - let sliceStart: number | undefined; - let sliceEnd: number | undefined; + let sliceStart = 0; + let sliceEnd = 0; if (h[0] === '.') sliceStart = 1; - if (h.endsWith('.')) sliceEnd = -1; + // eslint-disable-next-line sukka/string/prefer-string-starts-ends-with -- performance + if (h[h.length - 1] === '.') sliceEnd = -1; - if (sliceStart !== undefined || sliceEnd !== undefined) { + if (sliceStart !== 0 || sliceEnd !== 0) { h = h.slice(sliceStart, sliceEnd); } - return h || null; + return h.length > 0 ? h : null; }; diff --git a/Build/lib/parse-filter.test.ts b/Build/lib/parse-filter.test.ts index 337b615e..13ac1879 100644 --- a/Build/lib/parse-filter.test.ts +++ b/Build/lib/parse-filter.test.ts @@ -1,12 +1,20 @@ import { describe, it } from 'mocha'; -import { processFilterRules } from './parse-filter'; +import { parse, processFilterRules, type ParseType } from './parse-filter'; import { createCacheKey } from './cache-filesystem'; import { createSpan } from '../trace'; const cacheKey = createCacheKey(__filename); -describe('processFilterRules', () => { +describe('parse', () => { + const MUTABLE_PARSE_LINE_RESULT: [string, ParseType] = ['', 1000]; + + it('||top.mail.ru^$badfilter', () => { + console.log(parse('||top.mail.ru^$badfilter', MUTABLE_PARSE_LINE_RESULT)); + }); +}); + +describe.skip('processFilterRules', () => { it('https://filters.adtidy.org/extension/ublock/filters/18_optimized.txt', () => { console.log(processFilterRules( createSpan('noop'), diff --git a/Build/lib/parse-filter.ts b/Build/lib/parse-filter.ts index d680ef93..a833b83f 100644 --- a/Build/lib/parse-filter.ts +++ b/Build/lib/parse-filter.ts @@ -143,6 +143,8 @@ const enum ParseType { Null = 1000 } +export { type ParseType }; + export async function processFilterRules( parentSpan: Span, filterRulesUrl: string, @@ -289,10 +291,12 @@ const kwfilter = createKeywordFilter([ '$popup', '$removeparam', '$popunder', - '$cname' + '$cname', + // some bad syntax + '^popup' ]); -function parse($line: string, result: [string, ParseType]): [hostname: string, flag: ParseType] { +export function parse($line: string, result: [string, ParseType]): [hostname: string, flag: ParseType] { if ( // doesn't include !$line.includes('.') // rule with out dot can not be a domain @@ -685,6 +689,7 @@ function parse($line: string, result: [string, ParseType]): [hostname: string, f */ let sliceStart = 0; let sliceEnd: number | undefined; + if (lineStartsWithSingleDot) { sliceStart = 1; } @@ -696,28 +701,17 @@ function parse($line: string, result: [string, ParseType]): [hostname: string, f line.endsWith('$document') ) { sliceEnd = -9; + } else if (line.endsWith('$badfilter')) { + sliceEnd = -10; } const sliced = (sliceStart !== 0 || sliceEnd !== undefined) ? line.slice(sliceStart, sliceEnd) : line; - const suffix = tldts.getPublicSuffix(sliced, looseTldtsOpt); - /** - * Fast exclude definitely not domain-like resource - * - * `.gatracking.js`, suffix is `js`, - * `.ads.css`, suffix is `css`, - * `-cpm-ads.$badfilter`, suffix is `$badfilter`, - * `portal.librus.pl$$advertisement-module`, suffix is `pl$$advertisement-module` - */ - if (!suffix) { - // This exclude domain-like resource like `.gatracking.js`, `.beacon.min.js` and `.cookielaw.js` - result[1] = ParseType.Null; - return result; - } const tryNormalizeDomain = normalizeDomain(sliced); if (tryNormalizeDomain === sliced) { // the entire rule is domain result[0] = sliced; result[1] = ParseType.BlackIncludeSubdomain; + return result; }