Perf: make AdGuard rule parse faster

This commit is contained in:
SukkaW 2023-09-18 11:40:17 +08:00
parent 89b8638883
commit 702ded53c5
9 changed files with 407 additions and 241 deletions

View File

@ -2,7 +2,7 @@
const path = require('path');
const { createRuleset } = require('./lib/create-file');
const { fetchRemoteTextAndCreateReadlineInterface, readFileByLine } = require('./lib/fetch-remote-text-by-line');
const Trie = require('./lib/trie');
const createTrie = require('./lib/trie');
const { task } = require('./lib/trace-runner');
const fs = require('fs');
const { processLine } = require('./lib/process-line');
@ -10,7 +10,7 @@ const { processLine } = require('./lib/process-line');
const publicSuffixPath = path.resolve(__dirname, '../node_modules/.cache/public_suffix_list_dat.txt');
const getS3OSSDomains = async () => {
const trie = new Trie();
const trie = createTrie();
if (fs.existsSync(publicSuffixPath)) {
for await (const line of readFileByLine(publicSuffixPath)) {

View File

@ -5,7 +5,7 @@ const { createRuleset } = require('./lib/create-file');
const { processLine } = require('./lib/process-line.js');
const { createDomainSorter } = require('./lib/stable-sort-domain');
const { traceSync, task } = require('./lib/trace-runner.js');
const Trie = require('./lib/trie.js');
const createTrie = require('./lib/trie.js');
const { getGorhillPublicSuffixPromise } = require('./lib/get-gorhill-publicsuffix.js');
const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse.js');
const tldts = require('tldts');
@ -79,7 +79,7 @@ const buildPhishingDomainSet = task(__filename, async () => {
]);
traceSync('* whitelist', () => {
const trieForRemovingWhiteListed = Trie.from(domainSet);
const trieForRemovingWhiteListed = createTrie(domainSet);
WHITELIST_DOMAIN.forEach(white => {
trieForRemovingWhiteListed.find(`.${white}`, false).forEach(f => domainSet.delete(f));
if (trieForRemovingWhiteListed.has(white)) {

View File

@ -3,7 +3,7 @@ const fse = require('fs-extra');
const { resolve: pathResolve } = require('path');
const { processHosts, processFilterRules } = require('./lib/parse-filter');
const Trie = require('./lib/trie');
const createTrie = require('./lib/trie');
const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source');
const { createRuleset, compareAndWriteFile } = require('./lib/create-file');
@ -44,8 +44,8 @@ const buildRejectDomainSet = task(__filename, async () => {
})),
...ADGUARD_FILTERS.map(input => {
const promise = typeof input === 'string'
? processFilterRules(input, undefined, false)
: processFilterRules(input[0], input[1] || undefined, input[2] ?? false);
? processFilterRules(input, undefined)
: processFilterRules(input[0], input[1] || undefined);
return promise.then((i) => {
if (i) {
@ -82,7 +82,7 @@ const buildRejectDomainSet = task(__filename, async () => {
]);
// remove pre-defined enforced blacklist from whitelist
const trie0 = Trie.from(filterRuleWhitelistDomainSets);
const trie0 = createTrie(filterRuleWhitelistDomainSets);
PREDEFINED_ENFORCED_BACKLIST.forEach(enforcedBlack => {
trie0.find(enforcedBlack).forEach(found => filterRuleWhitelistDomainSets.delete(found));
});
@ -131,7 +131,7 @@ const buildRejectDomainSet = task(__filename, async () => {
console.log(`Start deduping from black keywords/suffixes! (${previousSize})`);
console.time('* Dedupe from black keywords/suffixes');
const trie1 = Trie.from(domainSets);
const trie1 = createTrie(domainSets);
domainSuffixSet.forEach(suffix => {
trie1.find(suffix, true).forEach(f => domainSets.delete(f));
});
@ -143,7 +143,7 @@ const buildRejectDomainSet = task(__filename, async () => {
const kwfilter = createKeywordFilter(domainKeywordsSet);
// Build whitelist trie, to handle case like removing `g.msn.com` due to white `.g.msn.com` (`@@||g.msn.com`)
const trieWhite = Trie.from(filterRuleWhitelistDomainSets);
const trieWhite = createTrie(filterRuleWhitelistDomainSets);
for (const domain of domainSets) {
if (domain[0] === '.') {
if (trieWhite.contains(domain)) {

View File

@ -1,11 +1,11 @@
// @ts-check
const Trie = require('./trie');
const createTrie = require('./trie');
/**
* @param {string[]} inputDomains
*/
const domainDeduper = (inputDomains) => {
const trie = Trie.from(inputDomains);
const trie = createTrie(inputDomains);
const sets = new Set(inputDomains);
for (let j = 0, len = inputDomains.length; j < len; j++) {

View File

@ -1,6 +1,6 @@
// @ts-check
const { fetchWithRetry } = require('./fetch-retry');
const tldts = require('tldts');
const tldts = require('./cached-tld-parse');
const { fetchRemoteTextAndCreateReadlineInterface } = require('./fetch-remote-text-by-line');
const { NetworkFilter } = require('@cliqz/adblocker');
const { processLine } = require('./process-line');
@ -113,7 +113,7 @@ async function processHosts(hostsUrl, includeAllSubDomain = false) {
* @param {readonly (string | URL)[] | undefined} [fallbackUrls]
* @returns {Promise<{ white: Set<string>, black: Set<string>, foundDebugDomain: boolean }>}
*/
async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdParties = false) {
async function processFilterRules(filterRulesUrl, fallbackUrls) {
const runStart = performance.now();
/** @type Set<string> */
@ -148,7 +148,7 @@ async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdPart
const gorhill = await getGorhillPublicSuffixPromise();
const lineCb = (line) => {
const result = parse(line, includeThirdParties, gorhill);
const result = parse(line, gorhill);
if (result) {
const flag = result[1];
const hostname = result[0];
@ -180,12 +180,15 @@ async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdPart
};
if (!fallbackUrls || fallbackUrls.length === 0) {
const downloadStart = performance.now();
downloadTime = 0;
let last = performance.now();
for await (const line of await fetchRemoteTextAndCreateReadlineInterface(filterRulesUrl)) {
const now = performance.now();
downloadTime += performance.now() - last;
last = now;
// don't trim here
lineCb(line);
}
downloadTime = performance.now() - downloadStart;
} else {
let filterRules;
@ -229,11 +232,10 @@ const R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2 = /(\$popup|\$removeparam|\$popunder)
/**
* @param {string} $line
* @param {boolean} includeThirdParties
* @param {import('gorhill-publicsuffixlist').default} gorhill
* @returns {null | [string, 0 | 1 | 2 | -1]} - 0 white include subdomain, 1 black abosulte, 2 black include subdomain, -1 white
* @returns {null | [hostname: string, flag: 0 | 1 | 2 | -1]} - 0 white include subdomain, 1 black abosulte, 2 black include subdomain, -1 white
*/
function parse($line, includeThirdParties, gorhill) {
function parse($line, gorhill) {
if (
// doesn't include
!$line.includes('.') // rule with out dot can not be a domain
@ -297,7 +299,7 @@ function parse($line, includeThirdParties, gorhill) {
if (
filter.hostname // filter.hasHostname() // must have
&& filter.isPlain()
&& (!filter.isRegex())
// && (!filter.isRegex()) // isPlain() === !isRegex()
&& (!filter.isFullRegex())
) {
if (!gorhill.getDomain(filter.hostname)) {
@ -307,22 +309,28 @@ function parse($line, includeThirdParties, gorhill) {
if (!hostname) {
return null;
}
// console.log({
// '||': filter.isHostnameAnchor(),
// '|': filter.isLeftAnchor(),
// '|https://': !filter.isHostnameAnchor() && (filter.fromHttps() || filter.fromHttp())
// });
const isIncludeAllSubDomain = filter.isHostnameAnchor();
if (filter.isException() || filter.isBadFilter()) {
return [hostname, 0];
return [hostname, isIncludeAllSubDomain ? 0 : -1];
}
const _1p = filter.firstParty();
const _3p = filter.thirdParty();
if (_1p === _3p) {
return [hostname, 2];
}
if (_3p) {
if (includeThirdParties) {
return [hostname, 2];
if (_1p) {
if (_1p === _3p) {
return [hostname, isIncludeAllSubDomain ? 2 : 1];
}
return null;
}
if (_1p) {
if (_3p) {
return null;
}
}
@ -340,10 +348,12 @@ function parse($line, includeThirdParties, gorhill) {
return null;
}
const lineEndsWithCaretOrCaretVerticalBar = (
lastChar === '^'
|| (lastChar === '|' && line[len - 2] === '^')
);
/* eslint-disable no-nested-ternary -- speed */
const linedEndsWithCaret = lastChar === '^';
const lineEndsWithCaretVerticalBar = lastChar === '|' && line[len - 2] === '^';
const lineEndsWithCaretOrCaretVerticalBar = linedEndsWithCaret || lineEndsWithCaretVerticalBar;
// whitelist (exception)
if (firstChar === '@' && line[1] === '@') {
@ -397,13 +407,7 @@ function parse($line, includeThirdParties, gorhill) {
}
}
if (
firstChar === '|' && line[1] === '|'
&& (
lineEndsWithCaretOrCaretVerticalBar
|| line.endsWith('$cname')
)
) {
if (firstChar === '|' && (lineEndsWithCaretOrCaretVerticalBar || line.endsWith('$cname'))) {
/**
* Some malformed filters can not be parsed by NetworkFilter:
*
@ -411,17 +415,26 @@ function parse($line, includeThirdParties, gorhill) {
* `||solutions.|pages.indigovision.com^`
* `||vystar..0rg@client.iebetanialaargentina.edu.co^`
*/
const includeAllSubDomain = line[1] === '|';
const sliceStart = includeAllSubDomain ? 2 : 1;
const sliceEnd = lastChar === '^'
? -1
: lineEndsWithCaretOrCaretVerticalBar
? -2
: line.endsWith('$cname')
? -6
: 0;
const _domain = line
// .replace('||', '')
.slice(2) // we already make sure line startsWith ||
.replace('^|', '')
.replace('$cname', '')
.replaceAll('^', '')
.slice(sliceStart, sliceEnd) // we already make sure line startsWith ||
.trim();
const domain = normalizeDomain(_domain);
if (domain) {
return [domain, 2];
return [domain, includeAllSubDomain ? 2 : 1];
}
console.warn(' * [parse-filter E0002] (black) invalid domain:', _domain);
@ -439,7 +452,14 @@ function parse($line, includeThirdParties, gorhill) {
* `.wap.x4399.com^`
*/
const _domain = line
.slice(1) // remove prefix dot
.slice(
1,
linedEndsWithCaret
? -1
: lineEndsWithCaretVerticalBar
? -2
: 0
) // remove prefix dot
.replace('^|', '')
.replaceAll('^', '')
.trim();
@ -503,6 +523,13 @@ function parse($line, includeThirdParties, gorhill) {
*/
if (firstChar !== '|' && lastChar === '^') {
const _domain = line.slice(0, -1);
const suffix = gorhill.getPublicSuffix(_domain);
if (!suffix || !gorhill.suffixInPSL(suffix)) {
// This exclude domain-like resource like `_social_tracking.js^`
return null;
}
const domain = normalizeDomain(_domain);
if (domain) {
return [domain, 1];
@ -540,6 +567,7 @@ function parse($line, includeThirdParties, gorhill) {
}
return null;
/* eslint-enable no-nested-ternary */
}
module.exports.processDomainLists = processDomainLists;

View File

@ -17,8 +17,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
'https://easylist-downloads.adblockplus.org/easylist.txt',
'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easylist.txt',
'https://secure.fanboy.co.nz/easylist.txt'
],
false
]
],
// Easy Privacy
[
@ -27,8 +26,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
'https://secure.fanboy.co.nz/easyprivacy.txt',
'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easyprivacy.txt',
'https://easylist-downloads.adblockplus.org/easyprivacy.txt'
],
false
]
],
// AdGuard DNS Filter
[
@ -48,40 +46,35 @@ const ADGUARD_FILTERS = /** @type {const} */([
[
'https://ublockorigin.github.io/uAssetsCDN/filters/filters.txt',
'https://ublockorigin.pages.dev/filters/filters.txt'
],
false
]
],
[
'https://ublockorigin.github.io/uAssets/filters/filters-2020.txt',
[
'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2020.txt',
'https://ublockorigin.pages.dev/filters/filters-2020.txt'
],
false
]
],
[
'https://ublockorigin.github.io/uAssets/filters/filters-2021.txt',
[
'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2021.txt',
'https://ublockorigin.pages.dev/filters/filters-2021.txt'
],
false
]
],
[
'https://ublockorigin.github.io/uAssets/filters/filters-2022.txt',
[
'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2022.txt',
'https://ublockorigin.pages.dev/filters/filters-2022.txt'
],
false
]
],
[
'https://ublockorigin.github.io/uAssets/filters/filters-2023.txt',
[
'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2023.txt',
'https://ublockorigin.pages.dev/filters/filters-2023.txt'
],
false
]
],
// uBlock Origin Badware Risk List
[
@ -89,8 +82,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
[
'https://ublockorigin.github.io/uAssetsCDN/filters/badware.txt',
'https://ublockorigin.pages.dev/filters/badware.txt'
],
false
]
],
// uBlock Origin Privacy List
[
@ -98,8 +90,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
[
'https://ublockorigin.github.io/uAssetsCDN/filters/privacy.txt',
'https://ublockorigin.pages.dev/filters/privacy.txt'
],
false
]
],
// uBlock Origin Resource Abuse
[
@ -107,8 +98,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
[
'https://ublockorigin.github.io/uAssetsCDN/filters/resource-abuse.txt',
'https://ublockorigin.pages.dev/filters/resource-abuse.txt'
],
false
]
],
// uBlock Origin Unbreak
[
@ -116,8 +106,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
[
'https://ublockorigin.github.io/uAssetsCDN/filters/unbreak.txt',
'https://ublockorigin.pages.dev/filters/unbreak.txt'
],
false
]
],
// AdGuard Base Filter
'https://filters.adtidy.org/extension/ublock/filters/2_without_easylist.txt',
@ -136,8 +125,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
'https://easylist.to/easylistgermany/easylistgermany.txt',
[
'https://easylist-downloads.adblockplus.org/easylistgermany.txt'
],
false
]
],
// Curben's UrlHaus Malicious URL Blocklist
[
@ -146,8 +134,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt'
// Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// 'https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-agh-online.txt'
],
false
]
],
// Curben's Phishing URL Blocklist
[
@ -156,8 +143,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
// Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt'
],
false
]
],
// Curben's PUP Domains Blocklist
[
@ -166,8 +152,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
'https://pup-filter.pages.dev/pup-filter-agh.txt'
// Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// 'https://malware-filter.gitlab.io/malware-filter/pup-filter-agh.txt'
],
false
]
],
// GameConsoleAdblockList
'https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt',

View File

@ -4,38 +4,39 @@
const SENTINEL = String.fromCharCode(0);
class Trie {
size = 0;
root = {};
/**
* @param {string[] | Set<string>} [from]
*/
const createTrie = (from) => {
let size = 0;
const root = {};
/**
* Method used to add the given prefix to the trie.
*
* @param {string} suffix - Prefix to follow.
* @return {Trie}
*/
add(suffix) {
let node = this.root;
const add = (suffix) => {
let node = root;
let token;
for (let i = suffix.length - 1; i >= 0; i--) {
token = suffix[i];
node = node[token] || (node[token] = {});
node[token] ||= {};
node = node[token];
}
// Do we need to increase size?
if (!(SENTINEL in node)) this.size++;
if (!(SENTINEL in node)) {
size++;
}
node[SENTINEL] = true;
return this;
}
};
/**
* @param {string} suffix
*/
contains(suffix) {
let node = this.root;
const contains = (suffix) => {
let node = root;
let token;
for (let i = suffix.length - 1; i >= 0; i--) {
@ -47,8 +48,7 @@ class Trie {
}
return true;
}
};
/**
* Method used to retrieve every item in the trie with the given prefix.
*
@ -56,8 +56,8 @@ class Trie {
* @param {boolean} [includeEqualWithSuffix]
* @return {string[]}
*/
find(suffix, includeEqualWithSuffix = true) {
let node = this.root;
const find = (suffix, includeEqualWithSuffix = true) => {
let node = root;
const matches = [];
let token;
@ -99,48 +99,7 @@ class Trie {
}
return matches;
}
toJSON() {
return this.root;
}
/**
* Method used to clear the trie.
*
* @return {void}
*/
// clear() {
// // Properties
// this.root = {};
// this.size = 0;
// }
/**
* Method used to update the value of the given prefix in the trie.
*
* @param {string|array} prefix - Prefix to follow.
* @param {(oldValue: any | undefined) => any} updateFunction - Update value visitor callback.
* @return {Trie}
*/
// update(prefix, updateFunction) {
// let node = this.root;
// let token;
// for (let i = 0, l = prefix.length; i < l; i++) {
// token = prefix[i];
// node = node[token] || (node[token] = {});
// }
// // Do we need to increase size?
// if (!(SENTINEL in node))
// this.size++;
// node[SENTINEL] = updateFunction(node[SENTINEL]);
// return this;
// }
};
/**
* Method used to delete a prefix from the trie.
@ -148,8 +107,8 @@ class Trie {
* @param {string} suffix - Prefix to delete.
* @return {boolean}
*/
delete(suffix) {
let node = this.root;
const remove = (suffix) => {
let node = root;
let toPrune = null;
let tokenToPrune = null;
let parent;
@ -179,7 +138,7 @@ class Trie {
if (!(SENTINEL in node)) return false;
this.size--;
size--;
if (toPrune) {
delete toPrune[tokenToPrune];
@ -188,7 +147,7 @@ class Trie {
}
return true;
}
};
/**
* Method used to assert whether the given prefix exists in the Trie.
@ -196,8 +155,8 @@ class Trie {
* @param {string} suffix - Prefix to check.
* @return {boolean}
*/
has(suffix) {
let node = this.root;
const has = (suffix) => {
let node = root;
let token;
for (let i = suffix.length - 1; i >= 0; i--) {
@ -210,86 +169,288 @@ class Trie {
}
return SENTINEL in node;
}
/**
* @return {string[]}
*/
dump() {
const node = this.root;
const nodeStack = [];
const prefixStack = [];
// Resolving initial prefix
const prefix = '';
nodeStack.push(node);
prefixStack.push(prefix);
/** @type {string[]} */
const results = [];
let currentNode;
let currentPrefix;
let hasValue = false;
let k;
while (nodeStack.length) {
currentNode = nodeStack.pop();
currentPrefix = prefixStack.pop();
// eslint-disable-next-line guard-for-in -- plain object
for (k in currentNode) {
if (k === SENTINEL) {
hasValue = true;
continue;
}
nodeStack.push(currentNode[k]);
prefixStack.push(k + currentPrefix);
}
if (hasValue) results.push(currentPrefix);
}
return results;
}
/**
* Convenience known methods.
*/
// inspect() {
// const proxy = new Set();
// const iterator = this.prefixes();
// let step;
// while ((step = iterator.next(), !step.done))
// proxy.add(step.value);
// // Trick so that node displays the name of the constructor
// Object.defineProperty(proxy, 'constructor', {
// value: Trie,
// enumerable: false
// });
// return proxy;
// }
/**
* Static .from function taking an arbitrary iterable & converting it into
* a trie.
*
* @param {string[] | Set<string>} iterable - Target iterable.
* @return {Trie}
*/
static from = iterable => {
const trie = new Trie();
iterable.forEach(i => trie.add(i));
return trie;
};
}
if (from) {
from.forEach(add);
}
return {
add,
contains,
find,
remove,
delete: remove,
has,
get size() {
return size;
}
};
};
// class Trie {
// size = 0;
// root = {};
// /**
// * @param {string} suffix
// */
// contains(suffix) {
// let node = this.root;
// let token;
// for (let i = suffix.length - 1; i >= 0; i--) {
// token = suffix[i];
// node = node[token];
// if (node == null) return false;
// }
// return true;
// }
// /**
// * Method used to retrieve every item in the trie with the given prefix.
// *
// * @param {string} suffix - Prefix to query.
// * @param {boolean} [includeEqualWithSuffix]
// * @return {string[]}
// */
// find(suffix, includeEqualWithSuffix = true) {
// let node = this.root;
// const matches = [];
// let token;
// for (let i = suffix.length - 1; i >= 0; i--) {
// token = suffix[i];
// node = node[token];
// if (node == null) return matches;
// }
// // Performing DFS from prefix
// const nodeStack = [node];
// const suffixStack = [suffix];
// let k;
// let $suffix = suffix;
// while (nodeStack.length) {
// $suffix = suffixStack.pop();
// node = nodeStack.pop();
// // eslint-disable-next-line guard-for-in -- plain object
// for (k in node) {
// if (k === SENTINEL) {
// if (includeEqualWithSuffix) {
// matches.push($suffix);
// } else if ($suffix !== suffix) {
// matches.push($suffix);
// }
// continue;
// }
// nodeStack.push(node[k]);
// suffixStack.push(k + $suffix);
// }
// }
// return matches;
// }
// // toJSON() {
// // return this.root;
// // }
// /**
// * Method used to clear the trie.
// *
// * @return {void}
// */
// // clear() {
// // // Properties
// // this.root = {};
// // this.size = 0;
// // }
// /**
// * Method used to update the value of the given prefix in the trie.
// *
// * @param {string|array} prefix - Prefix to follow.
// * @param {(oldValue: any | undefined) => any} updateFunction - Update value visitor callback.
// * @return {Trie}
// */
// // update(prefix, updateFunction) {
// // let node = this.root;
// // let token;
// // for (let i = 0, l = prefix.length; i < l; i++) {
// // token = prefix[i];
// // node = node[token] || (node[token] = {});
// // }
// // // Do we need to increase size?
// // if (!(SENTINEL in node))
// // this.size++;
// // node[SENTINEL] = updateFunction(node[SENTINEL]);
// // return this;
// // }
// /**
// * Method used to delete a prefix from the trie.
// *
// * @param {string} suffix - Prefix to delete.
// * @return {boolean}
// */
// delete(suffix) {
// let node = this.root;
// let toPrune = null;
// let tokenToPrune = null;
// let parent;
// let token;
// for (let i = suffix.length - 1; i >= 0; i--) {
// token = suffix[i];
// parent = node;
// node = node[token];
// // Prefix does not exist
// if (typeof node === 'undefined') {
// return false;
// }
// // Keeping track of a potential branch to prune
// if (toPrune !== null) {
// if (Object.keys(node).length > 1) {
// toPrune = null;
// tokenToPrune = null;
// }
// } else if (Object.keys(node).length < 2) {
// toPrune = parent;
// tokenToPrune = token;
// }
// }
// if (!(SENTINEL in node)) return false;
// this.size--;
// if (toPrune) {
// delete toPrune[tokenToPrune];
// } else {
// delete node[SENTINEL];
// }
// return true;
// }
// /**
// * Method used to assert whether the given prefix exists in the Trie.
// *
// * @param {string} suffix - Prefix to check.
// * @return {boolean}
// */
// has(suffix) {
// let node = this.root;
// let token;
// for (let i = suffix.length - 1; i >= 0; i--) {
// token = suffix[i];
// node = node[token];
// if (typeof node === 'undefined') {
// return false;
// }
// }
// return SENTINEL in node;
// }
// /**
// * @return {string[]}
// */
// // dump() {
// // const node = this.root;
// // const nodeStack = [];
// // const prefixStack = [];
// // // Resolving initial prefix
// // const prefix = '';
// // nodeStack.push(node);
// // prefixStack.push(prefix);
// // /** @type {string[]} */
// // const results = [];
// // let currentNode;
// // let currentPrefix;
// // let hasValue = false;
// // let k;
// // while (nodeStack.length) {
// // currentNode = nodeStack.pop();
// // currentPrefix = prefixStack.pop();
// // // eslint-disable-next-line guard-for-in -- plain object
// // for (k in currentNode) {
// // if (k === SENTINEL) {
// // hasValue = true;
// // continue;
// // }
// // nodeStack.push(currentNode[k]);
// // prefixStack.push(k + currentPrefix);
// // }
// // if (hasValue) results.push(currentPrefix);
// // }
// // return results;
// // }
// /**
// * Convenience known methods.
// */
// // inspect() {
// // const proxy = new Set();
// // const iterator = this.prefixes();
// // let step;
// // while ((step = iterator.next(), !step.done))
// // proxy.add(step.value);
// // // Trick so that node displays the name of the constructor
// // Object.defineProperty(proxy, 'constructor', {
// // value: Trie,
// // enumerable: false
// // });
// // return proxy;
// // }
// /**
// * Static .from function taking an arbitrary iterable & converting it into
// * a trie.
// *
// * @param {string[] | Set<string>} iterable - Target iterable.
// * @return {Trie}
// */
// static from = iterable => {
// const trie = new Trie();
// iterable.forEach(i => trie.add(i));
// return trie;
// };
// }
/**
* Exporting.
*/
module.exports.SENTINEL = SENTINEL;
module.exports = Trie;
module.exports = createTrie;

View File

@ -1,12 +1,12 @@
require('chai').should();
const Trie = require('./trie');
const createTrie = require('./trie');
const assert = require('assert');
const { describe, it } = require('mocha');
describe('Trie', () => {
it('should be possible to add items to a Trie.', () => {
const trie = new Trie();
const trie = createTrie();
trie.add('sukka');
trie.add('ukka');
@ -22,7 +22,7 @@ describe('Trie', () => {
});
it('adding the same item several times should not increase size.', () => {
const trie = new Trie();
const trie = createTrie();
trie.add('rat');
trie.add('erat');
@ -33,21 +33,14 @@ describe('Trie', () => {
});
it('should be possible to set the null sequence.', () => {
let trie = new Trie();
const trie = createTrie();
trie.add('');
trie.size.should.eq(1);
trie.has('').should.eq(true);
trie = new Trie(Array);
trie.add([]);
trie.size.should.eq(1);
trie.has([]).should.eq(true);
});
it('should be possible to delete items.', () => {
const trie = new Trie();
const trie = createTrie();
trie.add('rat');
trie.add('rate');
@ -64,16 +57,13 @@ describe('Trie', () => {
trie.size.should.eq(2);
assert.strictEqual(trie.delete('rate'), true);
assert.strictEqual(trie.size, 1);
assert.strictEqual(trie.delete('tar'), true);
assert.strictEqual(trie.size, 0);
});
it('should be possible to check the existence of a sequence in the Trie.', () => {
const trie = new Trie();
const trie = createTrie();
trie.add('romanesque');
@ -83,7 +73,7 @@ describe('Trie', () => {
});
it('should be possible to retrieve items matching the given prefix.', () => {
const trie = new Trie();
const trie = createTrie();
trie.add('roman');
trie.add('esqueroman');
@ -154,7 +144,7 @@ describe('Trie', () => {
it('should be possible to create a trie from an arbitrary iterable.', () => {
const words = ['roman', 'esqueroman'];
const trie = Trie.from(words);
const trie = createTrie(words);
assert.strictEqual(trie.size, 2);
assert.deepStrictEqual(trie.has('roman'), true);
@ -163,14 +153,14 @@ describe('Trie', () => {
describe('surge domainset dedupe', () => {
it('should not remove same entry', () => {
const trie = Trie.from(['.skk.moe', 'noc.one']);
const trie = createTrie(['.skk.moe', 'noc.one']);
trie.find('.skk.moe').should.eql(['.skk.moe']);
trie.find('noc.one').should.eql(['noc.one']);
});
it('should remove subdomain', () => {
const trie = Trie.from(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net']);
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net']);
// trie.find('noc.one').should.eql(['www.noc.one']);
trie.find('.skk.moe').should.eql(['image.cdn.skk.moe', 'blog.skk.moe']);
// trie.find('sukkaw.net').should.eql(['cdn.sukkaw.net']);
@ -178,7 +168,7 @@ describe('surge domainset dedupe', () => {
});
it('should not remove non-subdomain', () => {
const trie = Trie.from(['skk.moe', 'sukkaskk.moe']);
const trie = createTrie(['skk.moe', 'sukkaskk.moe']);
trie.find('.skk.moe').should.eql([]);
});
});

View File

@ -108,6 +108,8 @@ DOMAIN-KEYWORD,_vmind.qqvideo.tc.qq.com
DOMAIN-KEYWORD,-logging.nextmedia.com
DOMAIN-KEYWORD,-spiky.clevertap-prod.com
DOMAIN-KEYWORD,.engage.3m.
DOMAIN-KEYWORD,telemetry.officeapps.live.com
DOMAIN-KEYWORD,-launches.appsflyersdk.com
AND,((DOMAIN-KEYWORD,genuine), (DOMAIN-KEYWORD,autodesk))