Implement suffix (reversed) trie

This commit is contained in:
SukkaW
2023-07-07 14:18:27 +08:00
parent 762fed66fe
commit 9dd9e4aa05
4 changed files with 760 additions and 10 deletions

300
Build/lib/trie.js Normal file
View File

@@ -0,0 +1,300 @@
/**
* Suffix Trie based on Mnemonist Trie
*/
const SENTINEL = String.fromCharCode(0);
class Trie {
size = 0;
root = {};
/**
* Method used to add the given prefix to the trie.
*
* @param {string} suffix - Prefix to follow.
* @return {Trie}
*/
add(suffix) {
let node = this.root;
let token;
for (let i = suffix.length - 1; i >= 0; i--) {
token = suffix[i];
node = node[token] || (node[token] = {});
}
// Do we need to increase size?
if (!(SENTINEL in node)) this.size++;
node[SENTINEL] = true;
return this;
}
/**
* Method used to retrieve every item in the trie with the given prefix.
*
* @param {string} suffix - Prefix to query.
* @param {boolean} [includeEqualWithSuffix]
* @return {string[]}
*/
find(suffix, includeEqualWithSuffix = true) {
let node = this.root;
const matches = [];
let token;
let i;
let l;
for (let i = suffix.length - 1; i >= 0; i--) {
token = suffix[i];
node = node[token];
if (node == null) return matches;
}
// Performing DFS from prefix
const nodeStack = [node];
const suffixStack = [suffix];
let k;
let $suffix = suffix;
while (nodeStack.length) {
$suffix = suffixStack.pop();
node = nodeStack.pop();
for (k in node) {
if (k === SENTINEL) {
if (includeEqualWithSuffix) {
matches.push($suffix);
} else if ($suffix !== suffix) {
matches.push($suffix);
}
continue;
}
nodeStack.push(node[k]);
suffixStack.push(k + $suffix);
}
}
return matches;
}
toJSON() {
return this.root;
}
/**
* Method used to clear the trie.
*
* @return {void}
*/
// clear() {
// // Properties
// this.root = {};
// this.size = 0;
// }
/**
* Method used to update the value of the given prefix in the trie.
*
* @param {string|array} prefix - Prefix to follow.
* @param {(oldValue: any | undefined) => any} updateFunction - Update value visitor callback.
* @return {Trie}
*/
// update(prefix, updateFunction) {
// let node = this.root;
// let token;
// for (let i = 0, l = prefix.length; i < l; i++) {
// token = prefix[i];
// node = node[token] || (node[token] = {});
// }
// // Do we need to increase size?
// if (!(SENTINEL in node))
// this.size++;
// node[SENTINEL] = updateFunction(node[SENTINEL]);
// return this;
// }
/**
* Method used to delete a prefix from the trie.
*
* @param {string|array} suffix - Prefix to delete.
* @return {boolean}
*/
delete(suffix) {
let node = this.root;
let toPrune = null;
let tokenToPrune = null;
let parent;
let token;
for (let i = suffix.length - 1; i >= 0; i--) {
token = suffix[i];
parent = node;
node = node[token];
// Prefix does not exist
if (typeof node === 'undefined')
return false;
// Keeping track of a potential branch to prune
if (toPrune !== null) {
if (Object.keys(node).length > 1) {
toPrune = null;
tokenToPrune = null;
}
}
else {
if (Object.keys(node).length < 2) {
toPrune = parent;
tokenToPrune = token;
}
}
}
if (!(SENTINEL in node)) return false;
this.size--;
if (toPrune) {
delete toPrune[tokenToPrune];
} else {
delete node[SENTINEL];
}
return true;
}
/**
* Method used to assert whether the given prefix exists in the Trie.
*
* @param {string} suffix - Prefix to check.
* @return {boolean}
*/
has(suffix) {
let node = this.root;
let token;
for (let i = suffix.length - 1; i >= 0; i--) {
token = suffix[i];
node = node[token];
if (typeof node === 'undefined')
return false;
}
return SENTINEL in node;
}
/**
* Method returning an iterator over the trie's prefixes.
*
* @param {string|array} [prefix] - Optional starting prefix.
* @return {Iterator}
*/
// prefixes(prefix) {
// let node = this.root;
// const nodeStack = [];
// const prefixStack = [];
// let token;
// let i;
// let l;
// const isString = this.mode === 'string';
// // Resolving initial prefix
// if (prefix) {
// for (i = 0, l = prefix.length; i < l; i++) {
// token = prefix[i];
// node = node[token];
// // If the prefix does not exist, we return an empty iterator
// if (typeof node === 'undefined')
// return Iterator.empty();
// }
// }
// else {
// prefix = isString ? '' : [];
// }
// nodeStack.push(node);
// prefixStack.push(prefix);
// return new Iterator(() => {
// let currentNode;
// let currentPrefix;
// let hasValue = false;
// let k;
// while (nodeStack.length) {
// currentNode = nodeStack.pop();
// currentPrefix = prefixStack.pop();
// for (k in currentNode) {
// if (k === SENTINEL) {
// hasValue = true;
// continue;
// }
// nodeStack.push(currentNode[k]);
// prefixStack.push(isString ? currentPrefix + k : currentPrefix.concat(k));
// }
// if (hasValue)
// return { done: false, value: currentPrefix };
// }
// return { done: true };
// });
// }
/**
* Convenience known methods.
*/
// inspect() {
// const proxy = new Set();
// const iterator = this.prefixes();
// let step;
// while ((step = iterator.next(), !step.done))
// proxy.add(step.value);
// // Trick so that node displays the name of the constructor
// Object.defineProperty(proxy, 'constructor', {
// value: Trie,
// enumerable: false
// });
// return proxy;
// }
/**
* Static .from function taking an arbitrary iterable & converting it into
* a trie.
*
* @param {string[]} iterable - Target iterable.
* @return {Trie}
*/
static from = iterable => {
const trie = new Trie();
iterable.forEach(i => trie.add(i));
return trie;
};
}
/**
* Exporting.
*/
module.exports.SENTINEL = SENTINEL;
module.exports = Trie;

185
Build/lib/trie.test.js Normal file
View File

@@ -0,0 +1,185 @@
require('chai').should();
const Trie = require('./trie');
const assert = require('assert');
var SENTINEL = Trie.SENTINEL;
describe('Trie', () => {
it('should be possible to add items to a Trie.', () => {
const trie = new Trie();
trie.add('sukka');
trie.add('ukka');
trie.add('akku');
trie.size.should.eq(3);
trie.has('sukka').should.eq(true);
trie.has('ukka').should.eq(true);
trie.has('akku').should.eq(true);
trie.has('noc').should.eq(false);
trie.has('suk').should.eq(false);
trie.has('sukkaw').should.eq(false);
});
it('adding the same item several times should not increase size.', () => {
const trie = new Trie();
trie.add('rat');
trie.add('erat');
trie.add('rat');
assert.strictEqual(trie.size, 2);
assert.strictEqual(trie.has('rat'), true);
});
it('should be possible to set the null sequence.', () => {
let trie = new Trie();
trie.add('');
trie.size.should.eq(1);
trie.has('').should.eq(true);
trie = new Trie(Array);
trie.add([]);
trie.size.should.eq(1);
trie.has([]).should.eq(true);
});
it('should be possible to delete items.', () => {
const trie = new Trie();
trie.add('rat');
trie.add('rate');
trie.add('tar');
assert.strictEqual(trie.delete(''), false);
trie.delete('').should.eq(false);
trie.delete('hello').should.eq(false);
trie.delete('rat').should.eq(true);
trie.has('rat').should.eq(false);
trie.has('rate').should.eq(true);
trie.size.should.eq(2);
assert.strictEqual(trie.delete('rate'), true);
assert.strictEqual(trie.size, 1);
assert.strictEqual(trie.delete('tar'), true);
assert.strictEqual(trie.size, 0);
});
it('should be possible to check the existence of a sequence in the Trie.', () => {
const trie = new Trie();
trie.add('romanesque');
assert.strictEqual(trie.has('romanesque'), true);
assert.strictEqual(trie.has('roman'), false);
assert.strictEqual(trie.has(''), false);
});
it('should be possible to retrieve items matching the given prefix.', () => {
const trie = new Trie();
trie.add('roman');
trie.add('esqueroman');
trie.add('sesqueroman');
trie.add('greek');
assert.deepStrictEqual(trie.find('roman'), ['roman', 'esqueroman', 'sesqueroman']);
assert.deepStrictEqual(trie.find('man'), ['roman', 'esqueroman', 'sesqueroman']);
assert.deepStrictEqual(trie.find('esqueroman'), ['esqueroman', 'sesqueroman']);
assert.deepStrictEqual(trie.find('eek'), ['greek']);
assert.deepStrictEqual(trie.find('hello'), []);
assert.deepStrictEqual(trie.find(''), ['greek', 'roman', 'esqueroman', 'sesqueroman']);
});
// it('should work with custom tokens.', () => {
// const trie = new Trie(Array);
// trie.add(['the', 'cat', 'eats', 'the', 'mouse']);
// trie.add(['the', 'mouse', 'eats', 'cheese']);
// trie.add(['hello', 'world']);
// assert.strictEqual(trie.size, 3);
// assert.strictEqual(trie.has(['the', 'mouse', 'eats', 'cheese']), true);
// assert.strictEqual(trie.has(['the', 'mouse', 'eats']), false);
// assert.strictEqual(trie.delete(['hello']), false);
// assert.strictEqual(trie.delete(['hello', 'world']), true);
// assert.strictEqual(trie.size, 2);
// });
// it('should be possible to iterate over the trie\'s prefixes.', () => {
// const trie = new Trie();
// trie.add('rat');
// trie.add('rate');
// let prefixes = take(trie.prefixes());
// assert.deepStrictEqual(prefixes, ['rat', 'rate']);
// trie.add('rater');
// trie.add('rates');
// prefixes = take(trie.keys('rate'));
// assert.deepStrictEqual(prefixes, ['rate', 'rates', 'rater']);
// });
// it('should be possible to iterate over the trie\'s prefixes using for...of.', () => {
// const trie = new Trie();
// trie.add('rat');
// trie.add('rate');
// const tests = [
// 'rat',
// 'rate'
// ];
// let i = 0;
// for (const prefix of trie)
// assert.deepStrictEqual(prefix, tests[i++]);
// });
it('should be possible to create a trie from an arbitrary iterable.', () => {
const words = ['roman', 'esqueroman'];
const trie = Trie.from(words);
assert.strictEqual(trie.size, 2);
assert.deepStrictEqual(trie.has('roman'), true);
});
});
describe('surge domainset dedupe', () => {
it('should not remove same entry', () => {
const trie = Trie.from(['.skk.moe', 'noc.one']);
trie.find('.skk.moe').should.eql(['.skk.moe']);
trie.find('noc.one').should.eql(['noc.one']);
});
it('should remove subdomain', () => {
const trie = Trie.from(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net']);
// trie.find('noc.one').should.eql(['www.noc.one']);
trie.find('.skk.moe').should.eql(['image.cdn.skk.moe', 'blog.skk.moe']);
// trie.find('sukkaw.net').should.eql(['cdn.sukkaw.net']);
trie.find('.sukkaw.com').should.eql(['www.sukkaw.com']);
});
it('should not remove non-subdomain', () => {
const trie = Trie.from(['skk.moe', 'sukkaskk.moe']);
trie.find('.skk.moe').should.eql([]);
});
})