diff --git a/Build/build-cdn-conf.js b/Build/build-cdn-conf.js index e3966bd3..03fd7cc7 100644 --- a/Build/build-cdn-conf.js +++ b/Build/build-cdn-conf.js @@ -2,7 +2,7 @@ const path = require('path'); const { createRuleset } = require('./lib/create-file'); const { fetchRemoteTextAndCreateReadlineInterface, readFileByLine } = require('./lib/fetch-remote-text-by-line'); -const Trie = require('./lib/trie'); +const createTrie = require('./lib/trie'); const { task } = require('./lib/trace-runner'); const fs = require('fs'); const { processLine } = require('./lib/process-line'); @@ -10,7 +10,7 @@ const { processLine } = require('./lib/process-line'); const publicSuffixPath = path.resolve(__dirname, '../node_modules/.cache/public_suffix_list_dat.txt'); const getS3OSSDomains = async () => { - const trie = new Trie(); + const trie = createTrie(); if (fs.existsSync(publicSuffixPath)) { for await (const line of readFileByLine(publicSuffixPath)) { diff --git a/Build/build-phishing-domainset.js b/Build/build-phishing-domainset.js index e0f28aac..3868a30f 100644 --- a/Build/build-phishing-domainset.js +++ b/Build/build-phishing-domainset.js @@ -5,7 +5,7 @@ const { createRuleset } = require('./lib/create-file'); const { processLine } = require('./lib/process-line.js'); const { createDomainSorter } = require('./lib/stable-sort-domain'); const { traceSync, task } = require('./lib/trace-runner.js'); -const Trie = require('./lib/trie.js'); +const createTrie = require('./lib/trie.js'); const { getGorhillPublicSuffixPromise } = require('./lib/get-gorhill-publicsuffix.js'); const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse.js'); const tldts = require('tldts'); @@ -79,7 +79,7 @@ const buildPhishingDomainSet = task(__filename, async () => { ]); traceSync('* whitelist', () => { - const trieForRemovingWhiteListed = Trie.from(domainSet); + const trieForRemovingWhiteListed = createTrie(domainSet); WHITELIST_DOMAIN.forEach(white => { trieForRemovingWhiteListed.find(`.${white}`, false).forEach(f => domainSet.delete(f)); if (trieForRemovingWhiteListed.has(white)) { diff --git a/Build/build-reject-domainset.js b/Build/build-reject-domainset.js index becdc875..c12a0921 100644 --- a/Build/build-reject-domainset.js +++ b/Build/build-reject-domainset.js @@ -3,7 +3,7 @@ const fse = require('fs-extra'); const { resolve: pathResolve } = require('path'); const { processHosts, processFilterRules } = require('./lib/parse-filter'); -const Trie = require('./lib/trie'); +const createTrie = require('./lib/trie'); const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source'); const { createRuleset, compareAndWriteFile } = require('./lib/create-file'); @@ -44,8 +44,8 @@ const buildRejectDomainSet = task(__filename, async () => { })), ...ADGUARD_FILTERS.map(input => { const promise = typeof input === 'string' - ? processFilterRules(input, undefined, false) - : processFilterRules(input[0], input[1] || undefined, input[2] ?? false); + ? processFilterRules(input, undefined) + : processFilterRules(input[0], input[1] || undefined); return promise.then((i) => { if (i) { @@ -82,7 +82,7 @@ const buildRejectDomainSet = task(__filename, async () => { ]); // remove pre-defined enforced blacklist from whitelist - const trie0 = Trie.from(filterRuleWhitelistDomainSets); + const trie0 = createTrie(filterRuleWhitelistDomainSets); PREDEFINED_ENFORCED_BACKLIST.forEach(enforcedBlack => { trie0.find(enforcedBlack).forEach(found => filterRuleWhitelistDomainSets.delete(found)); }); @@ -131,7 +131,7 @@ const buildRejectDomainSet = task(__filename, async () => { console.log(`Start deduping from black keywords/suffixes! (${previousSize})`); console.time('* Dedupe from black keywords/suffixes'); - const trie1 = Trie.from(domainSets); + const trie1 = createTrie(domainSets); domainSuffixSet.forEach(suffix => { trie1.find(suffix, true).forEach(f => domainSets.delete(f)); }); @@ -143,7 +143,7 @@ const buildRejectDomainSet = task(__filename, async () => { const kwfilter = createKeywordFilter(domainKeywordsSet); // Build whitelist trie, to handle case like removing `g.msn.com` due to white `.g.msn.com` (`@@||g.msn.com`) - const trieWhite = Trie.from(filterRuleWhitelistDomainSets); + const trieWhite = createTrie(filterRuleWhitelistDomainSets); for (const domain of domainSets) { if (domain[0] === '.') { if (trieWhite.contains(domain)) { diff --git a/Build/lib/domain-deduper.js b/Build/lib/domain-deduper.js index 4b24234d..595b9e7e 100644 --- a/Build/lib/domain-deduper.js +++ b/Build/lib/domain-deduper.js @@ -1,11 +1,11 @@ // @ts-check -const Trie = require('./trie'); +const createTrie = require('./trie'); /** * @param {string[]} inputDomains */ const domainDeduper = (inputDomains) => { - const trie = Trie.from(inputDomains); + const trie = createTrie(inputDomains); const sets = new Set(inputDomains); for (let j = 0, len = inputDomains.length; j < len; j++) { diff --git a/Build/lib/parse-filter.js b/Build/lib/parse-filter.js index cd08d48a..cbbb2ffa 100644 --- a/Build/lib/parse-filter.js +++ b/Build/lib/parse-filter.js @@ -1,6 +1,6 @@ // @ts-check const { fetchWithRetry } = require('./fetch-retry'); -const tldts = require('tldts'); +const tldts = require('./cached-tld-parse'); const { fetchRemoteTextAndCreateReadlineInterface } = require('./fetch-remote-text-by-line'); const { NetworkFilter } = require('@cliqz/adblocker'); const { processLine } = require('./process-line'); @@ -113,7 +113,7 @@ async function processHosts(hostsUrl, includeAllSubDomain = false) { * @param {readonly (string | URL)[] | undefined} [fallbackUrls] * @returns {Promise<{ white: Set, black: Set, foundDebugDomain: boolean }>} */ -async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdParties = false) { +async function processFilterRules(filterRulesUrl, fallbackUrls) { const runStart = performance.now(); /** @type Set */ @@ -148,7 +148,7 @@ async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdPart const gorhill = await getGorhillPublicSuffixPromise(); const lineCb = (line) => { - const result = parse(line, includeThirdParties, gorhill); + const result = parse(line, gorhill); if (result) { const flag = result[1]; const hostname = result[0]; @@ -180,12 +180,15 @@ async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdPart }; if (!fallbackUrls || fallbackUrls.length === 0) { - const downloadStart = performance.now(); + downloadTime = 0; + let last = performance.now(); for await (const line of await fetchRemoteTextAndCreateReadlineInterface(filterRulesUrl)) { + const now = performance.now(); + downloadTime += performance.now() - last; + last = now; // don't trim here lineCb(line); } - downloadTime = performance.now() - downloadStart; } else { let filterRules; @@ -229,11 +232,10 @@ const R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2 = /(\$popup|\$removeparam|\$popunder) /** * @param {string} $line - * @param {boolean} includeThirdParties * @param {import('gorhill-publicsuffixlist').default} gorhill - * @returns {null | [string, 0 | 1 | 2 | -1]} - 0 white include subdomain, 1 black abosulte, 2 black include subdomain, -1 white + * @returns {null | [hostname: string, flag: 0 | 1 | 2 | -1]} - 0 white include subdomain, 1 black abosulte, 2 black include subdomain, -1 white */ -function parse($line, includeThirdParties, gorhill) { +function parse($line, gorhill) { if ( // doesn't include !$line.includes('.') // rule with out dot can not be a domain @@ -297,7 +299,7 @@ function parse($line, includeThirdParties, gorhill) { if ( filter.hostname // filter.hasHostname() // must have && filter.isPlain() - && (!filter.isRegex()) + // && (!filter.isRegex()) // isPlain() === !isRegex() && (!filter.isFullRegex()) ) { if (!gorhill.getDomain(filter.hostname)) { @@ -307,22 +309,28 @@ function parse($line, includeThirdParties, gorhill) { if (!hostname) { return null; } + + // console.log({ + // '||': filter.isHostnameAnchor(), + // '|': filter.isLeftAnchor(), + // '|https://': !filter.isHostnameAnchor() && (filter.fromHttps() || filter.fromHttp()) + // }); + const isIncludeAllSubDomain = filter.isHostnameAnchor(); + if (filter.isException() || filter.isBadFilter()) { - return [hostname, 0]; + return [hostname, isIncludeAllSubDomain ? 0 : -1]; } const _1p = filter.firstParty(); const _3p = filter.thirdParty(); - if (_1p === _3p) { - return [hostname, 2]; - } - if (_3p) { - if (includeThirdParties) { - return [hostname, 2]; + + if (_1p) { + if (_1p === _3p) { + return [hostname, isIncludeAllSubDomain ? 2 : 1]; } return null; } - if (_1p) { + if (_3p) { return null; } } @@ -340,10 +348,12 @@ function parse($line, includeThirdParties, gorhill) { return null; } - const lineEndsWithCaretOrCaretVerticalBar = ( - lastChar === '^' - || (lastChar === '|' && line[len - 2] === '^') - ); + /* eslint-disable no-nested-ternary -- speed */ + + const linedEndsWithCaret = lastChar === '^'; + const lineEndsWithCaretVerticalBar = lastChar === '|' && line[len - 2] === '^'; + + const lineEndsWithCaretOrCaretVerticalBar = linedEndsWithCaret || lineEndsWithCaretVerticalBar; // whitelist (exception) if (firstChar === '@' && line[1] === '@') { @@ -397,13 +407,7 @@ function parse($line, includeThirdParties, gorhill) { } } - if ( - firstChar === '|' && line[1] === '|' - && ( - lineEndsWithCaretOrCaretVerticalBar - || line.endsWith('$cname') - ) - ) { + if (firstChar === '|' && (lineEndsWithCaretOrCaretVerticalBar || line.endsWith('$cname'))) { /** * Some malformed filters can not be parsed by NetworkFilter: * @@ -411,17 +415,26 @@ function parse($line, includeThirdParties, gorhill) { * `||solutions.|pages.indigovision.com^` * `||vystar..0rg@client.iebetanialaargentina.edu.co^` */ + + const includeAllSubDomain = line[1] === '|'; + + const sliceStart = includeAllSubDomain ? 2 : 1; + const sliceEnd = lastChar === '^' + ? -1 + : lineEndsWithCaretOrCaretVerticalBar + ? -2 + : line.endsWith('$cname') + ? -6 + : 0; + const _domain = line // .replace('||', '') - .slice(2) // we already make sure line startsWith || - .replace('^|', '') - .replace('$cname', '') - .replaceAll('^', '') + .slice(sliceStart, sliceEnd) // we already make sure line startsWith || .trim(); const domain = normalizeDomain(_domain); if (domain) { - return [domain, 2]; + return [domain, includeAllSubDomain ? 2 : 1]; } console.warn(' * [parse-filter E0002] (black) invalid domain:', _domain); @@ -439,7 +452,14 @@ function parse($line, includeThirdParties, gorhill) { * `.wap.x4399.com^` */ const _domain = line - .slice(1) // remove prefix dot + .slice( + 1, + linedEndsWithCaret + ? -1 + : lineEndsWithCaretVerticalBar + ? -2 + : 0 + ) // remove prefix dot .replace('^|', '') .replaceAll('^', '') .trim(); @@ -503,6 +523,13 @@ function parse($line, includeThirdParties, gorhill) { */ if (firstChar !== '|' && lastChar === '^') { const _domain = line.slice(0, -1); + + const suffix = gorhill.getPublicSuffix(_domain); + if (!suffix || !gorhill.suffixInPSL(suffix)) { + // This exclude domain-like resource like `_social_tracking.js^` + return null; + } + const domain = normalizeDomain(_domain); if (domain) { return [domain, 1]; @@ -540,6 +567,7 @@ function parse($line, includeThirdParties, gorhill) { } return null; + /* eslint-enable no-nested-ternary */ } module.exports.processDomainLists = processDomainLists; diff --git a/Build/lib/reject-data-source.js b/Build/lib/reject-data-source.js index d0d79920..5a4b4be6 100644 --- a/Build/lib/reject-data-source.js +++ b/Build/lib/reject-data-source.js @@ -17,8 +17,7 @@ const ADGUARD_FILTERS = /** @type {const} */([ 'https://easylist-downloads.adblockplus.org/easylist.txt', 'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easylist.txt', 'https://secure.fanboy.co.nz/easylist.txt' - ], - false + ] ], // Easy Privacy [ @@ -27,8 +26,7 @@ const ADGUARD_FILTERS = /** @type {const} */([ 'https://secure.fanboy.co.nz/easyprivacy.txt', 'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easyprivacy.txt', 'https://easylist-downloads.adblockplus.org/easyprivacy.txt' - ], - false + ] ], // AdGuard DNS Filter [ @@ -48,40 +46,35 @@ const ADGUARD_FILTERS = /** @type {const} */([ [ 'https://ublockorigin.github.io/uAssetsCDN/filters/filters.txt', 'https://ublockorigin.pages.dev/filters/filters.txt' - ], - false + ] ], [ 'https://ublockorigin.github.io/uAssets/filters/filters-2020.txt', [ 'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2020.txt', 'https://ublockorigin.pages.dev/filters/filters-2020.txt' - ], - false + ] ], [ 'https://ublockorigin.github.io/uAssets/filters/filters-2021.txt', [ 'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2021.txt', 'https://ublockorigin.pages.dev/filters/filters-2021.txt' - ], - false + ] ], [ 'https://ublockorigin.github.io/uAssets/filters/filters-2022.txt', [ 'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2022.txt', 'https://ublockorigin.pages.dev/filters/filters-2022.txt' - ], - false + ] ], [ 'https://ublockorigin.github.io/uAssets/filters/filters-2023.txt', [ 'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2023.txt', 'https://ublockorigin.pages.dev/filters/filters-2023.txt' - ], - false + ] ], // uBlock Origin Badware Risk List [ @@ -89,8 +82,7 @@ const ADGUARD_FILTERS = /** @type {const} */([ [ 'https://ublockorigin.github.io/uAssetsCDN/filters/badware.txt', 'https://ublockorigin.pages.dev/filters/badware.txt' - ], - false + ] ], // uBlock Origin Privacy List [ @@ -98,8 +90,7 @@ const ADGUARD_FILTERS = /** @type {const} */([ [ 'https://ublockorigin.github.io/uAssetsCDN/filters/privacy.txt', 'https://ublockorigin.pages.dev/filters/privacy.txt' - ], - false + ] ], // uBlock Origin Resource Abuse [ @@ -107,8 +98,7 @@ const ADGUARD_FILTERS = /** @type {const} */([ [ 'https://ublockorigin.github.io/uAssetsCDN/filters/resource-abuse.txt', 'https://ublockorigin.pages.dev/filters/resource-abuse.txt' - ], - false + ] ], // uBlock Origin Unbreak [ @@ -116,8 +106,7 @@ const ADGUARD_FILTERS = /** @type {const} */([ [ 'https://ublockorigin.github.io/uAssetsCDN/filters/unbreak.txt', 'https://ublockorigin.pages.dev/filters/unbreak.txt' - ], - false + ] ], // AdGuard Base Filter 'https://filters.adtidy.org/extension/ublock/filters/2_without_easylist.txt', @@ -136,8 +125,7 @@ const ADGUARD_FILTERS = /** @type {const} */([ 'https://easylist.to/easylistgermany/easylistgermany.txt', [ 'https://easylist-downloads.adblockplus.org/easylistgermany.txt' - ], - false + ] ], // Curben's UrlHaus Malicious URL Blocklist [ @@ -146,8 +134,7 @@ const ADGUARD_FILTERS = /** @type {const} */([ 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt' // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while // 'https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-agh-online.txt' - ], - false + ] ], // Curben's Phishing URL Blocklist [ @@ -156,8 +143,7 @@ const ADGUARD_FILTERS = /** @type {const} */([ 'https://phishing-filter.pages.dev/phishing-filter-agh.txt' // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while // 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt' - ], - false + ] ], // Curben's PUP Domains Blocklist [ @@ -166,8 +152,7 @@ const ADGUARD_FILTERS = /** @type {const} */([ 'https://pup-filter.pages.dev/pup-filter-agh.txt' // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while // 'https://malware-filter.gitlab.io/malware-filter/pup-filter-agh.txt' - ], - false + ] ], // GameConsoleAdblockList 'https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt', diff --git a/Build/lib/trie.js b/Build/lib/trie.js index c5da0579..492652b4 100644 --- a/Build/lib/trie.js +++ b/Build/lib/trie.js @@ -4,38 +4,39 @@ const SENTINEL = String.fromCharCode(0); -class Trie { - size = 0; - root = {}; +/** + * @param {string[] | Set} [from] + */ +const createTrie = (from) => { + let size = 0; + const root = {}; /** * Method used to add the given prefix to the trie. * * @param {string} suffix - Prefix to follow. - * @return {Trie} */ - add(suffix) { - let node = this.root; + const add = (suffix) => { + let node = root; let token; - for (let i = suffix.length - 1; i >= 0; i--) { token = suffix[i]; - - node = node[token] || (node[token] = {}); + node[token] ||= {}; + node = node[token]; } // Do we need to increase size? - if (!(SENTINEL in node)) this.size++; + if (!(SENTINEL in node)) { + size++; + } node[SENTINEL] = true; - - return this; - } + }; /** * @param {string} suffix */ - contains(suffix) { - let node = this.root; + const contains = (suffix) => { + let node = root; let token; for (let i = suffix.length - 1; i >= 0; i--) { @@ -47,8 +48,7 @@ class Trie { } return true; - } - + }; /** * Method used to retrieve every item in the trie with the given prefix. * @@ -56,8 +56,8 @@ class Trie { * @param {boolean} [includeEqualWithSuffix] * @return {string[]} */ - find(suffix, includeEqualWithSuffix = true) { - let node = this.root; + const find = (suffix, includeEqualWithSuffix = true) => { + let node = root; const matches = []; let token; @@ -99,48 +99,7 @@ class Trie { } return matches; - } - - toJSON() { - return this.root; - } - - /** - * Method used to clear the trie. - * - * @return {void} - */ - // clear() { - // // Properties - // this.root = {}; - // this.size = 0; - // } - - /** - * Method used to update the value of the given prefix in the trie. - * - * @param {string|array} prefix - Prefix to follow. - * @param {(oldValue: any | undefined) => any} updateFunction - Update value visitor callback. - * @return {Trie} - */ - // update(prefix, updateFunction) { - // let node = this.root; - // let token; - - // for (let i = 0, l = prefix.length; i < l; i++) { - // token = prefix[i]; - - // node = node[token] || (node[token] = {}); - // } - - // // Do we need to increase size? - // if (!(SENTINEL in node)) - // this.size++; - - // node[SENTINEL] = updateFunction(node[SENTINEL]); - - // return this; - // } + }; /** * Method used to delete a prefix from the trie. @@ -148,8 +107,8 @@ class Trie { * @param {string} suffix - Prefix to delete. * @return {boolean} */ - delete(suffix) { - let node = this.root; + const remove = (suffix) => { + let node = root; let toPrune = null; let tokenToPrune = null; let parent; @@ -179,7 +138,7 @@ class Trie { if (!(SENTINEL in node)) return false; - this.size--; + size--; if (toPrune) { delete toPrune[tokenToPrune]; @@ -188,7 +147,7 @@ class Trie { } return true; - } + }; /** * Method used to assert whether the given prefix exists in the Trie. @@ -196,8 +155,8 @@ class Trie { * @param {string} suffix - Prefix to check. * @return {boolean} */ - has(suffix) { - let node = this.root; + const has = (suffix) => { + let node = root; let token; for (let i = suffix.length - 1; i >= 0; i--) { @@ -210,86 +169,288 @@ class Trie { } return SENTINEL in node; - } - - /** - * @return {string[]} - */ - dump() { - const node = this.root; - const nodeStack = []; - const prefixStack = []; - // Resolving initial prefix - const prefix = ''; - - nodeStack.push(node); - prefixStack.push(prefix); - - /** @type {string[]} */ - const results = []; - - let currentNode; - let currentPrefix; - let hasValue = false; - let k; - - while (nodeStack.length) { - currentNode = nodeStack.pop(); - currentPrefix = prefixStack.pop(); - - // eslint-disable-next-line guard-for-in -- plain object - for (k in currentNode) { - if (k === SENTINEL) { - hasValue = true; - continue; - } - - nodeStack.push(currentNode[k]); - prefixStack.push(k + currentPrefix); - } - - if (hasValue) results.push(currentPrefix); - } - - return results; - } - - /** - * Convenience known methods. - */ - // inspect() { - // const proxy = new Set(); - - // const iterator = this.prefixes(); - // let step; - - // while ((step = iterator.next(), !step.done)) - // proxy.add(step.value); - - // // Trick so that node displays the name of the constructor - // Object.defineProperty(proxy, 'constructor', { - // value: Trie, - // enumerable: false - // }); - - // return proxy; - // } - /** - * Static .from function taking an arbitrary iterable & converting it into - * a trie. - * - * @param {string[] | Set} iterable - Target iterable. - * @return {Trie} - */ - static from = iterable => { - const trie = new Trie(); - iterable.forEach(i => trie.add(i)); - return trie; }; -} + + if (from) { + from.forEach(add); + } + + return { + add, + contains, + find, + remove, + delete: remove, + has, + get size() { + return size; + } + }; +}; + +// class Trie { +// size = 0; +// root = {}; + +// /** +// * @param {string} suffix +// */ +// contains(suffix) { +// let node = this.root; +// let token; + +// for (let i = suffix.length - 1; i >= 0; i--) { +// token = suffix[i]; + +// node = node[token]; + +// if (node == null) return false; +// } + +// return true; +// } + +// /** +// * Method used to retrieve every item in the trie with the given prefix. +// * +// * @param {string} suffix - Prefix to query. +// * @param {boolean} [includeEqualWithSuffix] +// * @return {string[]} +// */ +// find(suffix, includeEqualWithSuffix = true) { +// let node = this.root; +// const matches = []; +// let token; + +// for (let i = suffix.length - 1; i >= 0; i--) { +// token = suffix[i]; + +// node = node[token]; + +// if (node == null) return matches; +// } + +// // Performing DFS from prefix +// const nodeStack = [node]; + +// const suffixStack = [suffix]; +// let k; + +// let $suffix = suffix; + +// while (nodeStack.length) { +// $suffix = suffixStack.pop(); +// node = nodeStack.pop(); + +// // eslint-disable-next-line guard-for-in -- plain object +// for (k in node) { +// if (k === SENTINEL) { +// if (includeEqualWithSuffix) { +// matches.push($suffix); +// } else if ($suffix !== suffix) { +// matches.push($suffix); +// } + +// continue; +// } + +// nodeStack.push(node[k]); +// suffixStack.push(k + $suffix); +// } +// } + +// return matches; +// } + +// // toJSON() { +// // return this.root; +// // } + +// /** +// * Method used to clear the trie. +// * +// * @return {void} +// */ +// // clear() { +// // // Properties +// // this.root = {}; +// // this.size = 0; +// // } + +// /** +// * Method used to update the value of the given prefix in the trie. +// * +// * @param {string|array} prefix - Prefix to follow. +// * @param {(oldValue: any | undefined) => any} updateFunction - Update value visitor callback. +// * @return {Trie} +// */ +// // update(prefix, updateFunction) { +// // let node = this.root; +// // let token; + +// // for (let i = 0, l = prefix.length; i < l; i++) { +// // token = prefix[i]; + +// // node = node[token] || (node[token] = {}); +// // } + +// // // Do we need to increase size? +// // if (!(SENTINEL in node)) +// // this.size++; + +// // node[SENTINEL] = updateFunction(node[SENTINEL]); + +// // return this; +// // } + +// /** +// * Method used to delete a prefix from the trie. +// * +// * @param {string} suffix - Prefix to delete. +// * @return {boolean} +// */ +// delete(suffix) { +// let node = this.root; +// let toPrune = null; +// let tokenToPrune = null; +// let parent; +// let token; + +// for (let i = suffix.length - 1; i >= 0; i--) { +// token = suffix[i]; +// parent = node; +// node = node[token]; + +// // Prefix does not exist +// if (typeof node === 'undefined') { +// return false; +// } + +// // Keeping track of a potential branch to prune +// if (toPrune !== null) { +// if (Object.keys(node).length > 1) { +// toPrune = null; +// tokenToPrune = null; +// } +// } else if (Object.keys(node).length < 2) { +// toPrune = parent; +// tokenToPrune = token; +// } +// } + +// if (!(SENTINEL in node)) return false; + +// this.size--; + +// if (toPrune) { +// delete toPrune[tokenToPrune]; +// } else { +// delete node[SENTINEL]; +// } + +// return true; +// } + +// /** +// * Method used to assert whether the given prefix exists in the Trie. +// * +// * @param {string} suffix - Prefix to check. +// * @return {boolean} +// */ +// has(suffix) { +// let node = this.root; +// let token; + +// for (let i = suffix.length - 1; i >= 0; i--) { +// token = suffix[i]; +// node = node[token]; + +// if (typeof node === 'undefined') { +// return false; +// } +// } + +// return SENTINEL in node; +// } + +// /** +// * @return {string[]} +// */ +// // dump() { +// // const node = this.root; +// // const nodeStack = []; +// // const prefixStack = []; +// // // Resolving initial prefix +// // const prefix = ''; + +// // nodeStack.push(node); +// // prefixStack.push(prefix); + +// // /** @type {string[]} */ +// // const results = []; + +// // let currentNode; +// // let currentPrefix; +// // let hasValue = false; +// // let k; + +// // while (nodeStack.length) { +// // currentNode = nodeStack.pop(); +// // currentPrefix = prefixStack.pop(); + +// // // eslint-disable-next-line guard-for-in -- plain object +// // for (k in currentNode) { +// // if (k === SENTINEL) { +// // hasValue = true; +// // continue; +// // } + +// // nodeStack.push(currentNode[k]); +// // prefixStack.push(k + currentPrefix); +// // } + +// // if (hasValue) results.push(currentPrefix); +// // } + +// // return results; +// // } + +// /** +// * Convenience known methods. +// */ +// // inspect() { +// // const proxy = new Set(); + +// // const iterator = this.prefixes(); +// // let step; + +// // while ((step = iterator.next(), !step.done)) +// // proxy.add(step.value); + +// // // Trick so that node displays the name of the constructor +// // Object.defineProperty(proxy, 'constructor', { +// // value: Trie, +// // enumerable: false +// // }); + +// // return proxy; +// // } +// /** +// * Static .from function taking an arbitrary iterable & converting it into +// * a trie. +// * +// * @param {string[] | Set} iterable - Target iterable. +// * @return {Trie} +// */ +// static from = iterable => { +// const trie = new Trie(); +// iterable.forEach(i => trie.add(i)); +// return trie; +// }; +// } /** * Exporting. */ module.exports.SENTINEL = SENTINEL; -module.exports = Trie; +module.exports = createTrie; diff --git a/Build/lib/trie.test.js b/Build/lib/trie.test.js index 6cdf3d4b..a550c288 100644 --- a/Build/lib/trie.test.js +++ b/Build/lib/trie.test.js @@ -1,12 +1,12 @@ require('chai').should(); -const Trie = require('./trie'); +const createTrie = require('./trie'); const assert = require('assert'); const { describe, it } = require('mocha'); describe('Trie', () => { it('should be possible to add items to a Trie.', () => { - const trie = new Trie(); + const trie = createTrie(); trie.add('sukka'); trie.add('ukka'); @@ -22,7 +22,7 @@ describe('Trie', () => { }); it('adding the same item several times should not increase size.', () => { - const trie = new Trie(); + const trie = createTrie(); trie.add('rat'); trie.add('erat'); @@ -33,21 +33,14 @@ describe('Trie', () => { }); it('should be possible to set the null sequence.', () => { - let trie = new Trie(); + const trie = createTrie(); trie.add(''); - trie.size.should.eq(1); trie.has('').should.eq(true); - - trie = new Trie(Array); - - trie.add([]); - trie.size.should.eq(1); - trie.has([]).should.eq(true); }); it('should be possible to delete items.', () => { - const trie = new Trie(); + const trie = createTrie(); trie.add('rat'); trie.add('rate'); @@ -64,16 +57,13 @@ describe('Trie', () => { trie.size.should.eq(2); assert.strictEqual(trie.delete('rate'), true); - assert.strictEqual(trie.size, 1); - assert.strictEqual(trie.delete('tar'), true); - assert.strictEqual(trie.size, 0); }); it('should be possible to check the existence of a sequence in the Trie.', () => { - const trie = new Trie(); + const trie = createTrie(); trie.add('romanesque'); @@ -83,7 +73,7 @@ describe('Trie', () => { }); it('should be possible to retrieve items matching the given prefix.', () => { - const trie = new Trie(); + const trie = createTrie(); trie.add('roman'); trie.add('esqueroman'); @@ -154,7 +144,7 @@ describe('Trie', () => { it('should be possible to create a trie from an arbitrary iterable.', () => { const words = ['roman', 'esqueroman']; - const trie = Trie.from(words); + const trie = createTrie(words); assert.strictEqual(trie.size, 2); assert.deepStrictEqual(trie.has('roman'), true); @@ -163,14 +153,14 @@ describe('Trie', () => { describe('surge domainset dedupe', () => { it('should not remove same entry', () => { - const trie = Trie.from(['.skk.moe', 'noc.one']); + const trie = createTrie(['.skk.moe', 'noc.one']); trie.find('.skk.moe').should.eql(['.skk.moe']); trie.find('noc.one').should.eql(['noc.one']); }); it('should remove subdomain', () => { - const trie = Trie.from(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net']); + const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net']); // trie.find('noc.one').should.eql(['www.noc.one']); trie.find('.skk.moe').should.eql(['image.cdn.skk.moe', 'blog.skk.moe']); // trie.find('sukkaw.net').should.eql(['cdn.sukkaw.net']); @@ -178,7 +168,7 @@ describe('surge domainset dedupe', () => { }); it('should not remove non-subdomain', () => { - const trie = Trie.from(['skk.moe', 'sukkaskk.moe']); + const trie = createTrie(['skk.moe', 'sukkaskk.moe']); trie.find('.skk.moe').should.eql([]); }); }); diff --git a/Source/non_ip/reject.conf b/Source/non_ip/reject.conf index 1ae39805..6a0eaf39 100644 --- a/Source/non_ip/reject.conf +++ b/Source/non_ip/reject.conf @@ -108,6 +108,8 @@ DOMAIN-KEYWORD,_vmind.qqvideo.tc.qq.com DOMAIN-KEYWORD,-logging.nextmedia.com DOMAIN-KEYWORD,-spiky.clevertap-prod.com DOMAIN-KEYWORD,.engage.3m. +DOMAIN-KEYWORD,telemetry.officeapps.live.com +DOMAIN-KEYWORD,-launches.appsflyersdk.com AND,((DOMAIN-KEYWORD,genuine), (DOMAIN-KEYWORD,autodesk))