mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Perf: make AdGuard rule parse faster
This commit is contained in:
parent
89b8638883
commit
702ded53c5
@ -2,7 +2,7 @@
|
||||
const path = require('path');
|
||||
const { createRuleset } = require('./lib/create-file');
|
||||
const { fetchRemoteTextAndCreateReadlineInterface, readFileByLine } = require('./lib/fetch-remote-text-by-line');
|
||||
const Trie = require('./lib/trie');
|
||||
const createTrie = require('./lib/trie');
|
||||
const { task } = require('./lib/trace-runner');
|
||||
const fs = require('fs');
|
||||
const { processLine } = require('./lib/process-line');
|
||||
@ -10,7 +10,7 @@ const { processLine } = require('./lib/process-line');
|
||||
const publicSuffixPath = path.resolve(__dirname, '../node_modules/.cache/public_suffix_list_dat.txt');
|
||||
|
||||
const getS3OSSDomains = async () => {
|
||||
const trie = new Trie();
|
||||
const trie = createTrie();
|
||||
|
||||
if (fs.existsSync(publicSuffixPath)) {
|
||||
for await (const line of readFileByLine(publicSuffixPath)) {
|
||||
|
||||
@ -5,7 +5,7 @@ const { createRuleset } = require('./lib/create-file');
|
||||
const { processLine } = require('./lib/process-line.js');
|
||||
const { createDomainSorter } = require('./lib/stable-sort-domain');
|
||||
const { traceSync, task } = require('./lib/trace-runner.js');
|
||||
const Trie = require('./lib/trie.js');
|
||||
const createTrie = require('./lib/trie.js');
|
||||
const { getGorhillPublicSuffixPromise } = require('./lib/get-gorhill-publicsuffix.js');
|
||||
const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse.js');
|
||||
const tldts = require('tldts');
|
||||
@ -79,7 +79,7 @@ const buildPhishingDomainSet = task(__filename, async () => {
|
||||
]);
|
||||
|
||||
traceSync('* whitelist', () => {
|
||||
const trieForRemovingWhiteListed = Trie.from(domainSet);
|
||||
const trieForRemovingWhiteListed = createTrie(domainSet);
|
||||
WHITELIST_DOMAIN.forEach(white => {
|
||||
trieForRemovingWhiteListed.find(`.${white}`, false).forEach(f => domainSet.delete(f));
|
||||
if (trieForRemovingWhiteListed.has(white)) {
|
||||
|
||||
@ -3,7 +3,7 @@ const fse = require('fs-extra');
|
||||
const { resolve: pathResolve } = require('path');
|
||||
|
||||
const { processHosts, processFilterRules } = require('./lib/parse-filter');
|
||||
const Trie = require('./lib/trie');
|
||||
const createTrie = require('./lib/trie');
|
||||
|
||||
const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source');
|
||||
const { createRuleset, compareAndWriteFile } = require('./lib/create-file');
|
||||
@ -44,8 +44,8 @@ const buildRejectDomainSet = task(__filename, async () => {
|
||||
})),
|
||||
...ADGUARD_FILTERS.map(input => {
|
||||
const promise = typeof input === 'string'
|
||||
? processFilterRules(input, undefined, false)
|
||||
: processFilterRules(input[0], input[1] || undefined, input[2] ?? false);
|
||||
? processFilterRules(input, undefined)
|
||||
: processFilterRules(input[0], input[1] || undefined);
|
||||
|
||||
return promise.then((i) => {
|
||||
if (i) {
|
||||
@ -82,7 +82,7 @@ const buildRejectDomainSet = task(__filename, async () => {
|
||||
]);
|
||||
|
||||
// remove pre-defined enforced blacklist from whitelist
|
||||
const trie0 = Trie.from(filterRuleWhitelistDomainSets);
|
||||
const trie0 = createTrie(filterRuleWhitelistDomainSets);
|
||||
PREDEFINED_ENFORCED_BACKLIST.forEach(enforcedBlack => {
|
||||
trie0.find(enforcedBlack).forEach(found => filterRuleWhitelistDomainSets.delete(found));
|
||||
});
|
||||
@ -131,7 +131,7 @@ const buildRejectDomainSet = task(__filename, async () => {
|
||||
console.log(`Start deduping from black keywords/suffixes! (${previousSize})`);
|
||||
console.time('* Dedupe from black keywords/suffixes');
|
||||
|
||||
const trie1 = Trie.from(domainSets);
|
||||
const trie1 = createTrie(domainSets);
|
||||
domainSuffixSet.forEach(suffix => {
|
||||
trie1.find(suffix, true).forEach(f => domainSets.delete(f));
|
||||
});
|
||||
@ -143,7 +143,7 @@ const buildRejectDomainSet = task(__filename, async () => {
|
||||
const kwfilter = createKeywordFilter(domainKeywordsSet);
|
||||
|
||||
// Build whitelist trie, to handle case like removing `g.msn.com` due to white `.g.msn.com` (`@@||g.msn.com`)
|
||||
const trieWhite = Trie.from(filterRuleWhitelistDomainSets);
|
||||
const trieWhite = createTrie(filterRuleWhitelistDomainSets);
|
||||
for (const domain of domainSets) {
|
||||
if (domain[0] === '.') {
|
||||
if (trieWhite.contains(domain)) {
|
||||
|
||||
@ -1,11 +1,11 @@
|
||||
// @ts-check
|
||||
const Trie = require('./trie');
|
||||
const createTrie = require('./trie');
|
||||
|
||||
/**
|
||||
* @param {string[]} inputDomains
|
||||
*/
|
||||
const domainDeduper = (inputDomains) => {
|
||||
const trie = Trie.from(inputDomains);
|
||||
const trie = createTrie(inputDomains);
|
||||
const sets = new Set(inputDomains);
|
||||
|
||||
for (let j = 0, len = inputDomains.length; j < len; j++) {
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
// @ts-check
|
||||
const { fetchWithRetry } = require('./fetch-retry');
|
||||
const tldts = require('tldts');
|
||||
const tldts = require('./cached-tld-parse');
|
||||
const { fetchRemoteTextAndCreateReadlineInterface } = require('./fetch-remote-text-by-line');
|
||||
const { NetworkFilter } = require('@cliqz/adblocker');
|
||||
const { processLine } = require('./process-line');
|
||||
@ -113,7 +113,7 @@ async function processHosts(hostsUrl, includeAllSubDomain = false) {
|
||||
* @param {readonly (string | URL)[] | undefined} [fallbackUrls]
|
||||
* @returns {Promise<{ white: Set<string>, black: Set<string>, foundDebugDomain: boolean }>}
|
||||
*/
|
||||
async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdParties = false) {
|
||||
async function processFilterRules(filterRulesUrl, fallbackUrls) {
|
||||
const runStart = performance.now();
|
||||
|
||||
/** @type Set<string> */
|
||||
@ -148,7 +148,7 @@ async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdPart
|
||||
const gorhill = await getGorhillPublicSuffixPromise();
|
||||
|
||||
const lineCb = (line) => {
|
||||
const result = parse(line, includeThirdParties, gorhill);
|
||||
const result = parse(line, gorhill);
|
||||
if (result) {
|
||||
const flag = result[1];
|
||||
const hostname = result[0];
|
||||
@ -180,12 +180,15 @@ async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdPart
|
||||
};
|
||||
|
||||
if (!fallbackUrls || fallbackUrls.length === 0) {
|
||||
const downloadStart = performance.now();
|
||||
downloadTime = 0;
|
||||
let last = performance.now();
|
||||
for await (const line of await fetchRemoteTextAndCreateReadlineInterface(filterRulesUrl)) {
|
||||
const now = performance.now();
|
||||
downloadTime += performance.now() - last;
|
||||
last = now;
|
||||
// don't trim here
|
||||
lineCb(line);
|
||||
}
|
||||
downloadTime = performance.now() - downloadStart;
|
||||
} else {
|
||||
let filterRules;
|
||||
|
||||
@ -229,11 +232,10 @@ const R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2 = /(\$popup|\$removeparam|\$popunder)
|
||||
|
||||
/**
|
||||
* @param {string} $line
|
||||
* @param {boolean} includeThirdParties
|
||||
* @param {import('gorhill-publicsuffixlist').default} gorhill
|
||||
* @returns {null | [string, 0 | 1 | 2 | -1]} - 0 white include subdomain, 1 black abosulte, 2 black include subdomain, -1 white
|
||||
* @returns {null | [hostname: string, flag: 0 | 1 | 2 | -1]} - 0 white include subdomain, 1 black abosulte, 2 black include subdomain, -1 white
|
||||
*/
|
||||
function parse($line, includeThirdParties, gorhill) {
|
||||
function parse($line, gorhill) {
|
||||
if (
|
||||
// doesn't include
|
||||
!$line.includes('.') // rule with out dot can not be a domain
|
||||
@ -297,7 +299,7 @@ function parse($line, includeThirdParties, gorhill) {
|
||||
if (
|
||||
filter.hostname // filter.hasHostname() // must have
|
||||
&& filter.isPlain()
|
||||
&& (!filter.isRegex())
|
||||
// && (!filter.isRegex()) // isPlain() === !isRegex()
|
||||
&& (!filter.isFullRegex())
|
||||
) {
|
||||
if (!gorhill.getDomain(filter.hostname)) {
|
||||
@ -307,22 +309,28 @@ function parse($line, includeThirdParties, gorhill) {
|
||||
if (!hostname) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// console.log({
|
||||
// '||': filter.isHostnameAnchor(),
|
||||
// '|': filter.isLeftAnchor(),
|
||||
// '|https://': !filter.isHostnameAnchor() && (filter.fromHttps() || filter.fromHttp())
|
||||
// });
|
||||
const isIncludeAllSubDomain = filter.isHostnameAnchor();
|
||||
|
||||
if (filter.isException() || filter.isBadFilter()) {
|
||||
return [hostname, 0];
|
||||
return [hostname, isIncludeAllSubDomain ? 0 : -1];
|
||||
}
|
||||
|
||||
const _1p = filter.firstParty();
|
||||
const _3p = filter.thirdParty();
|
||||
if (_1p === _3p) {
|
||||
return [hostname, 2];
|
||||
}
|
||||
if (_3p) {
|
||||
if (includeThirdParties) {
|
||||
return [hostname, 2];
|
||||
|
||||
if (_1p) {
|
||||
if (_1p === _3p) {
|
||||
return [hostname, isIncludeAllSubDomain ? 2 : 1];
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (_1p) {
|
||||
if (_3p) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@ -340,10 +348,12 @@ function parse($line, includeThirdParties, gorhill) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const lineEndsWithCaretOrCaretVerticalBar = (
|
||||
lastChar === '^'
|
||||
|| (lastChar === '|' && line[len - 2] === '^')
|
||||
);
|
||||
/* eslint-disable no-nested-ternary -- speed */
|
||||
|
||||
const linedEndsWithCaret = lastChar === '^';
|
||||
const lineEndsWithCaretVerticalBar = lastChar === '|' && line[len - 2] === '^';
|
||||
|
||||
const lineEndsWithCaretOrCaretVerticalBar = linedEndsWithCaret || lineEndsWithCaretVerticalBar;
|
||||
|
||||
// whitelist (exception)
|
||||
if (firstChar === '@' && line[1] === '@') {
|
||||
@ -397,13 +407,7 @@ function parse($line, includeThirdParties, gorhill) {
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
firstChar === '|' && line[1] === '|'
|
||||
&& (
|
||||
lineEndsWithCaretOrCaretVerticalBar
|
||||
|| line.endsWith('$cname')
|
||||
)
|
||||
) {
|
||||
if (firstChar === '|' && (lineEndsWithCaretOrCaretVerticalBar || line.endsWith('$cname'))) {
|
||||
/**
|
||||
* Some malformed filters can not be parsed by NetworkFilter:
|
||||
*
|
||||
@ -411,17 +415,26 @@ function parse($line, includeThirdParties, gorhill) {
|
||||
* `||solutions.|pages.indigovision.com^`
|
||||
* `||vystar..0rg@client.iebetanialaargentina.edu.co^`
|
||||
*/
|
||||
|
||||
const includeAllSubDomain = line[1] === '|';
|
||||
|
||||
const sliceStart = includeAllSubDomain ? 2 : 1;
|
||||
const sliceEnd = lastChar === '^'
|
||||
? -1
|
||||
: lineEndsWithCaretOrCaretVerticalBar
|
||||
? -2
|
||||
: line.endsWith('$cname')
|
||||
? -6
|
||||
: 0;
|
||||
|
||||
const _domain = line
|
||||
// .replace('||', '')
|
||||
.slice(2) // we already make sure line startsWith ||
|
||||
.replace('^|', '')
|
||||
.replace('$cname', '')
|
||||
.replaceAll('^', '')
|
||||
.slice(sliceStart, sliceEnd) // we already make sure line startsWith ||
|
||||
.trim();
|
||||
|
||||
const domain = normalizeDomain(_domain);
|
||||
if (domain) {
|
||||
return [domain, 2];
|
||||
return [domain, includeAllSubDomain ? 2 : 1];
|
||||
}
|
||||
console.warn(' * [parse-filter E0002] (black) invalid domain:', _domain);
|
||||
|
||||
@ -439,7 +452,14 @@ function parse($line, includeThirdParties, gorhill) {
|
||||
* `.wap.x4399.com^`
|
||||
*/
|
||||
const _domain = line
|
||||
.slice(1) // remove prefix dot
|
||||
.slice(
|
||||
1,
|
||||
linedEndsWithCaret
|
||||
? -1
|
||||
: lineEndsWithCaretVerticalBar
|
||||
? -2
|
||||
: 0
|
||||
) // remove prefix dot
|
||||
.replace('^|', '')
|
||||
.replaceAll('^', '')
|
||||
.trim();
|
||||
@ -503,6 +523,13 @@ function parse($line, includeThirdParties, gorhill) {
|
||||
*/
|
||||
if (firstChar !== '|' && lastChar === '^') {
|
||||
const _domain = line.slice(0, -1);
|
||||
|
||||
const suffix = gorhill.getPublicSuffix(_domain);
|
||||
if (!suffix || !gorhill.suffixInPSL(suffix)) {
|
||||
// This exclude domain-like resource like `_social_tracking.js^`
|
||||
return null;
|
||||
}
|
||||
|
||||
const domain = normalizeDomain(_domain);
|
||||
if (domain) {
|
||||
return [domain, 1];
|
||||
@ -540,6 +567,7 @@ function parse($line, includeThirdParties, gorhill) {
|
||||
}
|
||||
|
||||
return null;
|
||||
/* eslint-enable no-nested-ternary */
|
||||
}
|
||||
|
||||
module.exports.processDomainLists = processDomainLists;
|
||||
|
||||
@ -17,8 +17,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
|
||||
'https://easylist-downloads.adblockplus.org/easylist.txt',
|
||||
'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easylist.txt',
|
||||
'https://secure.fanboy.co.nz/easylist.txt'
|
||||
],
|
||||
false
|
||||
]
|
||||
],
|
||||
// Easy Privacy
|
||||
[
|
||||
@ -27,8 +26,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
|
||||
'https://secure.fanboy.co.nz/easyprivacy.txt',
|
||||
'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easyprivacy.txt',
|
||||
'https://easylist-downloads.adblockplus.org/easyprivacy.txt'
|
||||
],
|
||||
false
|
||||
]
|
||||
],
|
||||
// AdGuard DNS Filter
|
||||
[
|
||||
@ -48,40 +46,35 @@ const ADGUARD_FILTERS = /** @type {const} */([
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/filters.txt',
|
||||
'https://ublockorigin.pages.dev/filters/filters.txt'
|
||||
],
|
||||
false
|
||||
]
|
||||
],
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssets/filters/filters-2020.txt',
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2020.txt',
|
||||
'https://ublockorigin.pages.dev/filters/filters-2020.txt'
|
||||
],
|
||||
false
|
||||
]
|
||||
],
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssets/filters/filters-2021.txt',
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2021.txt',
|
||||
'https://ublockorigin.pages.dev/filters/filters-2021.txt'
|
||||
],
|
||||
false
|
||||
]
|
||||
],
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssets/filters/filters-2022.txt',
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2022.txt',
|
||||
'https://ublockorigin.pages.dev/filters/filters-2022.txt'
|
||||
],
|
||||
false
|
||||
]
|
||||
],
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssets/filters/filters-2023.txt',
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2023.txt',
|
||||
'https://ublockorigin.pages.dev/filters/filters-2023.txt'
|
||||
],
|
||||
false
|
||||
]
|
||||
],
|
||||
// uBlock Origin Badware Risk List
|
||||
[
|
||||
@ -89,8 +82,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/badware.txt',
|
||||
'https://ublockorigin.pages.dev/filters/badware.txt'
|
||||
],
|
||||
false
|
||||
]
|
||||
],
|
||||
// uBlock Origin Privacy List
|
||||
[
|
||||
@ -98,8 +90,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/privacy.txt',
|
||||
'https://ublockorigin.pages.dev/filters/privacy.txt'
|
||||
],
|
||||
false
|
||||
]
|
||||
],
|
||||
// uBlock Origin Resource Abuse
|
||||
[
|
||||
@ -107,8 +98,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/resource-abuse.txt',
|
||||
'https://ublockorigin.pages.dev/filters/resource-abuse.txt'
|
||||
],
|
||||
false
|
||||
]
|
||||
],
|
||||
// uBlock Origin Unbreak
|
||||
[
|
||||
@ -116,8 +106,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
|
||||
[
|
||||
'https://ublockorigin.github.io/uAssetsCDN/filters/unbreak.txt',
|
||||
'https://ublockorigin.pages.dev/filters/unbreak.txt'
|
||||
],
|
||||
false
|
||||
]
|
||||
],
|
||||
// AdGuard Base Filter
|
||||
'https://filters.adtidy.org/extension/ublock/filters/2_without_easylist.txt',
|
||||
@ -136,8 +125,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
|
||||
'https://easylist.to/easylistgermany/easylistgermany.txt',
|
||||
[
|
||||
'https://easylist-downloads.adblockplus.org/easylistgermany.txt'
|
||||
],
|
||||
false
|
||||
]
|
||||
],
|
||||
// Curben's UrlHaus Malicious URL Blocklist
|
||||
[
|
||||
@ -146,8 +134,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
|
||||
'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt'
|
||||
// Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
|
||||
// 'https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-agh-online.txt'
|
||||
],
|
||||
false
|
||||
]
|
||||
],
|
||||
// Curben's Phishing URL Blocklist
|
||||
[
|
||||
@ -156,8 +143,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
|
||||
'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
|
||||
// Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
|
||||
// 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt'
|
||||
],
|
||||
false
|
||||
]
|
||||
],
|
||||
// Curben's PUP Domains Blocklist
|
||||
[
|
||||
@ -166,8 +152,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
|
||||
'https://pup-filter.pages.dev/pup-filter-agh.txt'
|
||||
// Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
|
||||
// 'https://malware-filter.gitlab.io/malware-filter/pup-filter-agh.txt'
|
||||
],
|
||||
false
|
||||
]
|
||||
],
|
||||
// GameConsoleAdblockList
|
||||
'https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt',
|
||||
|
||||
@ -4,38 +4,39 @@
|
||||
|
||||
const SENTINEL = String.fromCharCode(0);
|
||||
|
||||
class Trie {
|
||||
size = 0;
|
||||
root = {};
|
||||
/**
|
||||
* @param {string[] | Set<string>} [from]
|
||||
*/
|
||||
const createTrie = (from) => {
|
||||
let size = 0;
|
||||
const root = {};
|
||||
|
||||
/**
|
||||
* Method used to add the given prefix to the trie.
|
||||
*
|
||||
* @param {string} suffix - Prefix to follow.
|
||||
* @return {Trie}
|
||||
*/
|
||||
add(suffix) {
|
||||
let node = this.root;
|
||||
const add = (suffix) => {
|
||||
let node = root;
|
||||
let token;
|
||||
|
||||
for (let i = suffix.length - 1; i >= 0; i--) {
|
||||
token = suffix[i];
|
||||
|
||||
node = node[token] || (node[token] = {});
|
||||
node[token] ||= {};
|
||||
node = node[token];
|
||||
}
|
||||
|
||||
// Do we need to increase size?
|
||||
if (!(SENTINEL in node)) this.size++;
|
||||
if (!(SENTINEL in node)) {
|
||||
size++;
|
||||
}
|
||||
node[SENTINEL] = true;
|
||||
|
||||
return this;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @param {string} suffix
|
||||
*/
|
||||
contains(suffix) {
|
||||
let node = this.root;
|
||||
const contains = (suffix) => {
|
||||
let node = root;
|
||||
let token;
|
||||
|
||||
for (let i = suffix.length - 1; i >= 0; i--) {
|
||||
@ -47,8 +48,7 @@ class Trie {
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
};
|
||||
/**
|
||||
* Method used to retrieve every item in the trie with the given prefix.
|
||||
*
|
||||
@ -56,8 +56,8 @@ class Trie {
|
||||
* @param {boolean} [includeEqualWithSuffix]
|
||||
* @return {string[]}
|
||||
*/
|
||||
find(suffix, includeEqualWithSuffix = true) {
|
||||
let node = this.root;
|
||||
const find = (suffix, includeEqualWithSuffix = true) => {
|
||||
let node = root;
|
||||
const matches = [];
|
||||
let token;
|
||||
|
||||
@ -99,48 +99,7 @@ class Trie {
|
||||
}
|
||||
|
||||
return matches;
|
||||
}
|
||||
|
||||
toJSON() {
|
||||
return this.root;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method used to clear the trie.
|
||||
*
|
||||
* @return {void}
|
||||
*/
|
||||
// clear() {
|
||||
// // Properties
|
||||
// this.root = {};
|
||||
// this.size = 0;
|
||||
// }
|
||||
|
||||
/**
|
||||
* Method used to update the value of the given prefix in the trie.
|
||||
*
|
||||
* @param {string|array} prefix - Prefix to follow.
|
||||
* @param {(oldValue: any | undefined) => any} updateFunction - Update value visitor callback.
|
||||
* @return {Trie}
|
||||
*/
|
||||
// update(prefix, updateFunction) {
|
||||
// let node = this.root;
|
||||
// let token;
|
||||
|
||||
// for (let i = 0, l = prefix.length; i < l; i++) {
|
||||
// token = prefix[i];
|
||||
|
||||
// node = node[token] || (node[token] = {});
|
||||
// }
|
||||
|
||||
// // Do we need to increase size?
|
||||
// if (!(SENTINEL in node))
|
||||
// this.size++;
|
||||
|
||||
// node[SENTINEL] = updateFunction(node[SENTINEL]);
|
||||
|
||||
// return this;
|
||||
// }
|
||||
};
|
||||
|
||||
/**
|
||||
* Method used to delete a prefix from the trie.
|
||||
@ -148,8 +107,8 @@ class Trie {
|
||||
* @param {string} suffix - Prefix to delete.
|
||||
* @return {boolean}
|
||||
*/
|
||||
delete(suffix) {
|
||||
let node = this.root;
|
||||
const remove = (suffix) => {
|
||||
let node = root;
|
||||
let toPrune = null;
|
||||
let tokenToPrune = null;
|
||||
let parent;
|
||||
@ -179,7 +138,7 @@ class Trie {
|
||||
|
||||
if (!(SENTINEL in node)) return false;
|
||||
|
||||
this.size--;
|
||||
size--;
|
||||
|
||||
if (toPrune) {
|
||||
delete toPrune[tokenToPrune];
|
||||
@ -188,7 +147,7 @@ class Trie {
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Method used to assert whether the given prefix exists in the Trie.
|
||||
@ -196,8 +155,8 @@ class Trie {
|
||||
* @param {string} suffix - Prefix to check.
|
||||
* @return {boolean}
|
||||
*/
|
||||
has(suffix) {
|
||||
let node = this.root;
|
||||
const has = (suffix) => {
|
||||
let node = root;
|
||||
let token;
|
||||
|
||||
for (let i = suffix.length - 1; i >= 0; i--) {
|
||||
@ -210,86 +169,288 @@ class Trie {
|
||||
}
|
||||
|
||||
return SENTINEL in node;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return {string[]}
|
||||
*/
|
||||
dump() {
|
||||
const node = this.root;
|
||||
const nodeStack = [];
|
||||
const prefixStack = [];
|
||||
// Resolving initial prefix
|
||||
const prefix = '';
|
||||
|
||||
nodeStack.push(node);
|
||||
prefixStack.push(prefix);
|
||||
|
||||
/** @type {string[]} */
|
||||
const results = [];
|
||||
|
||||
let currentNode;
|
||||
let currentPrefix;
|
||||
let hasValue = false;
|
||||
let k;
|
||||
|
||||
while (nodeStack.length) {
|
||||
currentNode = nodeStack.pop();
|
||||
currentPrefix = prefixStack.pop();
|
||||
|
||||
// eslint-disable-next-line guard-for-in -- plain object
|
||||
for (k in currentNode) {
|
||||
if (k === SENTINEL) {
|
||||
hasValue = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
nodeStack.push(currentNode[k]);
|
||||
prefixStack.push(k + currentPrefix);
|
||||
}
|
||||
|
||||
if (hasValue) results.push(currentPrefix);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience known methods.
|
||||
*/
|
||||
// inspect() {
|
||||
// const proxy = new Set();
|
||||
|
||||
// const iterator = this.prefixes();
|
||||
// let step;
|
||||
|
||||
// while ((step = iterator.next(), !step.done))
|
||||
// proxy.add(step.value);
|
||||
|
||||
// // Trick so that node displays the name of the constructor
|
||||
// Object.defineProperty(proxy, 'constructor', {
|
||||
// value: Trie,
|
||||
// enumerable: false
|
||||
// });
|
||||
|
||||
// return proxy;
|
||||
// }
|
||||
/**
|
||||
* Static .from function taking an arbitrary iterable & converting it into
|
||||
* a trie.
|
||||
*
|
||||
* @param {string[] | Set<string>} iterable - Target iterable.
|
||||
* @return {Trie}
|
||||
*/
|
||||
static from = iterable => {
|
||||
const trie = new Trie();
|
||||
iterable.forEach(i => trie.add(i));
|
||||
return trie;
|
||||
};
|
||||
}
|
||||
|
||||
if (from) {
|
||||
from.forEach(add);
|
||||
}
|
||||
|
||||
return {
|
||||
add,
|
||||
contains,
|
||||
find,
|
||||
remove,
|
||||
delete: remove,
|
||||
has,
|
||||
get size() {
|
||||
return size;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
// class Trie {
|
||||
// size = 0;
|
||||
// root = {};
|
||||
|
||||
// /**
|
||||
// * @param {string} suffix
|
||||
// */
|
||||
// contains(suffix) {
|
||||
// let node = this.root;
|
||||
// let token;
|
||||
|
||||
// for (let i = suffix.length - 1; i >= 0; i--) {
|
||||
// token = suffix[i];
|
||||
|
||||
// node = node[token];
|
||||
|
||||
// if (node == null) return false;
|
||||
// }
|
||||
|
||||
// return true;
|
||||
// }
|
||||
|
||||
// /**
|
||||
// * Method used to retrieve every item in the trie with the given prefix.
|
||||
// *
|
||||
// * @param {string} suffix - Prefix to query.
|
||||
// * @param {boolean} [includeEqualWithSuffix]
|
||||
// * @return {string[]}
|
||||
// */
|
||||
// find(suffix, includeEqualWithSuffix = true) {
|
||||
// let node = this.root;
|
||||
// const matches = [];
|
||||
// let token;
|
||||
|
||||
// for (let i = suffix.length - 1; i >= 0; i--) {
|
||||
// token = suffix[i];
|
||||
|
||||
// node = node[token];
|
||||
|
||||
// if (node == null) return matches;
|
||||
// }
|
||||
|
||||
// // Performing DFS from prefix
|
||||
// const nodeStack = [node];
|
||||
|
||||
// const suffixStack = [suffix];
|
||||
// let k;
|
||||
|
||||
// let $suffix = suffix;
|
||||
|
||||
// while (nodeStack.length) {
|
||||
// $suffix = suffixStack.pop();
|
||||
// node = nodeStack.pop();
|
||||
|
||||
// // eslint-disable-next-line guard-for-in -- plain object
|
||||
// for (k in node) {
|
||||
// if (k === SENTINEL) {
|
||||
// if (includeEqualWithSuffix) {
|
||||
// matches.push($suffix);
|
||||
// } else if ($suffix !== suffix) {
|
||||
// matches.push($suffix);
|
||||
// }
|
||||
|
||||
// continue;
|
||||
// }
|
||||
|
||||
// nodeStack.push(node[k]);
|
||||
// suffixStack.push(k + $suffix);
|
||||
// }
|
||||
// }
|
||||
|
||||
// return matches;
|
||||
// }
|
||||
|
||||
// // toJSON() {
|
||||
// // return this.root;
|
||||
// // }
|
||||
|
||||
// /**
|
||||
// * Method used to clear the trie.
|
||||
// *
|
||||
// * @return {void}
|
||||
// */
|
||||
// // clear() {
|
||||
// // // Properties
|
||||
// // this.root = {};
|
||||
// // this.size = 0;
|
||||
// // }
|
||||
|
||||
// /**
|
||||
// * Method used to update the value of the given prefix in the trie.
|
||||
// *
|
||||
// * @param {string|array} prefix - Prefix to follow.
|
||||
// * @param {(oldValue: any | undefined) => any} updateFunction - Update value visitor callback.
|
||||
// * @return {Trie}
|
||||
// */
|
||||
// // update(prefix, updateFunction) {
|
||||
// // let node = this.root;
|
||||
// // let token;
|
||||
|
||||
// // for (let i = 0, l = prefix.length; i < l; i++) {
|
||||
// // token = prefix[i];
|
||||
|
||||
// // node = node[token] || (node[token] = {});
|
||||
// // }
|
||||
|
||||
// // // Do we need to increase size?
|
||||
// // if (!(SENTINEL in node))
|
||||
// // this.size++;
|
||||
|
||||
// // node[SENTINEL] = updateFunction(node[SENTINEL]);
|
||||
|
||||
// // return this;
|
||||
// // }
|
||||
|
||||
// /**
|
||||
// * Method used to delete a prefix from the trie.
|
||||
// *
|
||||
// * @param {string} suffix - Prefix to delete.
|
||||
// * @return {boolean}
|
||||
// */
|
||||
// delete(suffix) {
|
||||
// let node = this.root;
|
||||
// let toPrune = null;
|
||||
// let tokenToPrune = null;
|
||||
// let parent;
|
||||
// let token;
|
||||
|
||||
// for (let i = suffix.length - 1; i >= 0; i--) {
|
||||
// token = suffix[i];
|
||||
// parent = node;
|
||||
// node = node[token];
|
||||
|
||||
// // Prefix does not exist
|
||||
// if (typeof node === 'undefined') {
|
||||
// return false;
|
||||
// }
|
||||
|
||||
// // Keeping track of a potential branch to prune
|
||||
// if (toPrune !== null) {
|
||||
// if (Object.keys(node).length > 1) {
|
||||
// toPrune = null;
|
||||
// tokenToPrune = null;
|
||||
// }
|
||||
// } else if (Object.keys(node).length < 2) {
|
||||
// toPrune = parent;
|
||||
// tokenToPrune = token;
|
||||
// }
|
||||
// }
|
||||
|
||||
// if (!(SENTINEL in node)) return false;
|
||||
|
||||
// this.size--;
|
||||
|
||||
// if (toPrune) {
|
||||
// delete toPrune[tokenToPrune];
|
||||
// } else {
|
||||
// delete node[SENTINEL];
|
||||
// }
|
||||
|
||||
// return true;
|
||||
// }
|
||||
|
||||
// /**
|
||||
// * Method used to assert whether the given prefix exists in the Trie.
|
||||
// *
|
||||
// * @param {string} suffix - Prefix to check.
|
||||
// * @return {boolean}
|
||||
// */
|
||||
// has(suffix) {
|
||||
// let node = this.root;
|
||||
// let token;
|
||||
|
||||
// for (let i = suffix.length - 1; i >= 0; i--) {
|
||||
// token = suffix[i];
|
||||
// node = node[token];
|
||||
|
||||
// if (typeof node === 'undefined') {
|
||||
// return false;
|
||||
// }
|
||||
// }
|
||||
|
||||
// return SENTINEL in node;
|
||||
// }
|
||||
|
||||
// /**
|
||||
// * @return {string[]}
|
||||
// */
|
||||
// // dump() {
|
||||
// // const node = this.root;
|
||||
// // const nodeStack = [];
|
||||
// // const prefixStack = [];
|
||||
// // // Resolving initial prefix
|
||||
// // const prefix = '';
|
||||
|
||||
// // nodeStack.push(node);
|
||||
// // prefixStack.push(prefix);
|
||||
|
||||
// // /** @type {string[]} */
|
||||
// // const results = [];
|
||||
|
||||
// // let currentNode;
|
||||
// // let currentPrefix;
|
||||
// // let hasValue = false;
|
||||
// // let k;
|
||||
|
||||
// // while (nodeStack.length) {
|
||||
// // currentNode = nodeStack.pop();
|
||||
// // currentPrefix = prefixStack.pop();
|
||||
|
||||
// // // eslint-disable-next-line guard-for-in -- plain object
|
||||
// // for (k in currentNode) {
|
||||
// // if (k === SENTINEL) {
|
||||
// // hasValue = true;
|
||||
// // continue;
|
||||
// // }
|
||||
|
||||
// // nodeStack.push(currentNode[k]);
|
||||
// // prefixStack.push(k + currentPrefix);
|
||||
// // }
|
||||
|
||||
// // if (hasValue) results.push(currentPrefix);
|
||||
// // }
|
||||
|
||||
// // return results;
|
||||
// // }
|
||||
|
||||
// /**
|
||||
// * Convenience known methods.
|
||||
// */
|
||||
// // inspect() {
|
||||
// // const proxy = new Set();
|
||||
|
||||
// // const iterator = this.prefixes();
|
||||
// // let step;
|
||||
|
||||
// // while ((step = iterator.next(), !step.done))
|
||||
// // proxy.add(step.value);
|
||||
|
||||
// // // Trick so that node displays the name of the constructor
|
||||
// // Object.defineProperty(proxy, 'constructor', {
|
||||
// // value: Trie,
|
||||
// // enumerable: false
|
||||
// // });
|
||||
|
||||
// // return proxy;
|
||||
// // }
|
||||
// /**
|
||||
// * Static .from function taking an arbitrary iterable & converting it into
|
||||
// * a trie.
|
||||
// *
|
||||
// * @param {string[] | Set<string>} iterable - Target iterable.
|
||||
// * @return {Trie}
|
||||
// */
|
||||
// static from = iterable => {
|
||||
// const trie = new Trie();
|
||||
// iterable.forEach(i => trie.add(i));
|
||||
// return trie;
|
||||
// };
|
||||
// }
|
||||
|
||||
/**
|
||||
* Exporting.
|
||||
*/
|
||||
module.exports.SENTINEL = SENTINEL;
|
||||
module.exports = Trie;
|
||||
module.exports = createTrie;
|
||||
|
||||
@ -1,12 +1,12 @@
|
||||
require('chai').should();
|
||||
|
||||
const Trie = require('./trie');
|
||||
const createTrie = require('./trie');
|
||||
const assert = require('assert');
|
||||
const { describe, it } = require('mocha');
|
||||
|
||||
describe('Trie', () => {
|
||||
it('should be possible to add items to a Trie.', () => {
|
||||
const trie = new Trie();
|
||||
const trie = createTrie();
|
||||
|
||||
trie.add('sukka');
|
||||
trie.add('ukka');
|
||||
@ -22,7 +22,7 @@ describe('Trie', () => {
|
||||
});
|
||||
|
||||
it('adding the same item several times should not increase size.', () => {
|
||||
const trie = new Trie();
|
||||
const trie = createTrie();
|
||||
|
||||
trie.add('rat');
|
||||
trie.add('erat');
|
||||
@ -33,21 +33,14 @@ describe('Trie', () => {
|
||||
});
|
||||
|
||||
it('should be possible to set the null sequence.', () => {
|
||||
let trie = new Trie();
|
||||
const trie = createTrie();
|
||||
|
||||
trie.add('');
|
||||
trie.size.should.eq(1);
|
||||
trie.has('').should.eq(true);
|
||||
|
||||
trie = new Trie(Array);
|
||||
|
||||
trie.add([]);
|
||||
trie.size.should.eq(1);
|
||||
trie.has([]).should.eq(true);
|
||||
});
|
||||
|
||||
it('should be possible to delete items.', () => {
|
||||
const trie = new Trie();
|
||||
const trie = createTrie();
|
||||
|
||||
trie.add('rat');
|
||||
trie.add('rate');
|
||||
@ -64,16 +57,13 @@ describe('Trie', () => {
|
||||
trie.size.should.eq(2);
|
||||
|
||||
assert.strictEqual(trie.delete('rate'), true);
|
||||
|
||||
assert.strictEqual(trie.size, 1);
|
||||
|
||||
assert.strictEqual(trie.delete('tar'), true);
|
||||
|
||||
assert.strictEqual(trie.size, 0);
|
||||
});
|
||||
|
||||
it('should be possible to check the existence of a sequence in the Trie.', () => {
|
||||
const trie = new Trie();
|
||||
const trie = createTrie();
|
||||
|
||||
trie.add('romanesque');
|
||||
|
||||
@ -83,7 +73,7 @@ describe('Trie', () => {
|
||||
});
|
||||
|
||||
it('should be possible to retrieve items matching the given prefix.', () => {
|
||||
const trie = new Trie();
|
||||
const trie = createTrie();
|
||||
|
||||
trie.add('roman');
|
||||
trie.add('esqueroman');
|
||||
@ -154,7 +144,7 @@ describe('Trie', () => {
|
||||
it('should be possible to create a trie from an arbitrary iterable.', () => {
|
||||
const words = ['roman', 'esqueroman'];
|
||||
|
||||
const trie = Trie.from(words);
|
||||
const trie = createTrie(words);
|
||||
|
||||
assert.strictEqual(trie.size, 2);
|
||||
assert.deepStrictEqual(trie.has('roman'), true);
|
||||
@ -163,14 +153,14 @@ describe('Trie', () => {
|
||||
|
||||
describe('surge domainset dedupe', () => {
|
||||
it('should not remove same entry', () => {
|
||||
const trie = Trie.from(['.skk.moe', 'noc.one']);
|
||||
const trie = createTrie(['.skk.moe', 'noc.one']);
|
||||
|
||||
trie.find('.skk.moe').should.eql(['.skk.moe']);
|
||||
trie.find('noc.one').should.eql(['noc.one']);
|
||||
});
|
||||
|
||||
it('should remove subdomain', () => {
|
||||
const trie = Trie.from(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net']);
|
||||
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net']);
|
||||
// trie.find('noc.one').should.eql(['www.noc.one']);
|
||||
trie.find('.skk.moe').should.eql(['image.cdn.skk.moe', 'blog.skk.moe']);
|
||||
// trie.find('sukkaw.net').should.eql(['cdn.sukkaw.net']);
|
||||
@ -178,7 +168,7 @@ describe('surge domainset dedupe', () => {
|
||||
});
|
||||
|
||||
it('should not remove non-subdomain', () => {
|
||||
const trie = Trie.from(['skk.moe', 'sukkaskk.moe']);
|
||||
const trie = createTrie(['skk.moe', 'sukkaskk.moe']);
|
||||
trie.find('.skk.moe').should.eql([]);
|
||||
});
|
||||
});
|
||||
|
||||
@ -108,6 +108,8 @@ DOMAIN-KEYWORD,_vmind.qqvideo.tc.qq.com
|
||||
DOMAIN-KEYWORD,-logging.nextmedia.com
|
||||
DOMAIN-KEYWORD,-spiky.clevertap-prod.com
|
||||
DOMAIN-KEYWORD,.engage.3m.
|
||||
DOMAIN-KEYWORD,telemetry.officeapps.live.com
|
||||
DOMAIN-KEYWORD,-launches.appsflyersdk.com
|
||||
|
||||
AND,((DOMAIN-KEYWORD,genuine), (DOMAIN-KEYWORD,autodesk))
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user