Perf: make AdGuard rule parse faster

This commit is contained in:
SukkaW 2023-09-18 11:40:17 +08:00
parent 89b8638883
commit 702ded53c5
9 changed files with 407 additions and 241 deletions

View File

@ -2,7 +2,7 @@
const path = require('path'); const path = require('path');
const { createRuleset } = require('./lib/create-file'); const { createRuleset } = require('./lib/create-file');
const { fetchRemoteTextAndCreateReadlineInterface, readFileByLine } = require('./lib/fetch-remote-text-by-line'); const { fetchRemoteTextAndCreateReadlineInterface, readFileByLine } = require('./lib/fetch-remote-text-by-line');
const Trie = require('./lib/trie'); const createTrie = require('./lib/trie');
const { task } = require('./lib/trace-runner'); const { task } = require('./lib/trace-runner');
const fs = require('fs'); const fs = require('fs');
const { processLine } = require('./lib/process-line'); const { processLine } = require('./lib/process-line');
@ -10,7 +10,7 @@ const { processLine } = require('./lib/process-line');
const publicSuffixPath = path.resolve(__dirname, '../node_modules/.cache/public_suffix_list_dat.txt'); const publicSuffixPath = path.resolve(__dirname, '../node_modules/.cache/public_suffix_list_dat.txt');
const getS3OSSDomains = async () => { const getS3OSSDomains = async () => {
const trie = new Trie(); const trie = createTrie();
if (fs.existsSync(publicSuffixPath)) { if (fs.existsSync(publicSuffixPath)) {
for await (const line of readFileByLine(publicSuffixPath)) { for await (const line of readFileByLine(publicSuffixPath)) {

View File

@ -5,7 +5,7 @@ const { createRuleset } = require('./lib/create-file');
const { processLine } = require('./lib/process-line.js'); const { processLine } = require('./lib/process-line.js');
const { createDomainSorter } = require('./lib/stable-sort-domain'); const { createDomainSorter } = require('./lib/stable-sort-domain');
const { traceSync, task } = require('./lib/trace-runner.js'); const { traceSync, task } = require('./lib/trace-runner.js');
const Trie = require('./lib/trie.js'); const createTrie = require('./lib/trie.js');
const { getGorhillPublicSuffixPromise } = require('./lib/get-gorhill-publicsuffix.js'); const { getGorhillPublicSuffixPromise } = require('./lib/get-gorhill-publicsuffix.js');
const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse.js'); const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse.js');
const tldts = require('tldts'); const tldts = require('tldts');
@ -79,7 +79,7 @@ const buildPhishingDomainSet = task(__filename, async () => {
]); ]);
traceSync('* whitelist', () => { traceSync('* whitelist', () => {
const trieForRemovingWhiteListed = Trie.from(domainSet); const trieForRemovingWhiteListed = createTrie(domainSet);
WHITELIST_DOMAIN.forEach(white => { WHITELIST_DOMAIN.forEach(white => {
trieForRemovingWhiteListed.find(`.${white}`, false).forEach(f => domainSet.delete(f)); trieForRemovingWhiteListed.find(`.${white}`, false).forEach(f => domainSet.delete(f));
if (trieForRemovingWhiteListed.has(white)) { if (trieForRemovingWhiteListed.has(white)) {

View File

@ -3,7 +3,7 @@ const fse = require('fs-extra');
const { resolve: pathResolve } = require('path'); const { resolve: pathResolve } = require('path');
const { processHosts, processFilterRules } = require('./lib/parse-filter'); const { processHosts, processFilterRules } = require('./lib/parse-filter');
const Trie = require('./lib/trie'); const createTrie = require('./lib/trie');
const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source'); const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source');
const { createRuleset, compareAndWriteFile } = require('./lib/create-file'); const { createRuleset, compareAndWriteFile } = require('./lib/create-file');
@ -44,8 +44,8 @@ const buildRejectDomainSet = task(__filename, async () => {
})), })),
...ADGUARD_FILTERS.map(input => { ...ADGUARD_FILTERS.map(input => {
const promise = typeof input === 'string' const promise = typeof input === 'string'
? processFilterRules(input, undefined, false) ? processFilterRules(input, undefined)
: processFilterRules(input[0], input[1] || undefined, input[2] ?? false); : processFilterRules(input[0], input[1] || undefined);
return promise.then((i) => { return promise.then((i) => {
if (i) { if (i) {
@ -82,7 +82,7 @@ const buildRejectDomainSet = task(__filename, async () => {
]); ]);
// remove pre-defined enforced blacklist from whitelist // remove pre-defined enforced blacklist from whitelist
const trie0 = Trie.from(filterRuleWhitelistDomainSets); const trie0 = createTrie(filterRuleWhitelistDomainSets);
PREDEFINED_ENFORCED_BACKLIST.forEach(enforcedBlack => { PREDEFINED_ENFORCED_BACKLIST.forEach(enforcedBlack => {
trie0.find(enforcedBlack).forEach(found => filterRuleWhitelistDomainSets.delete(found)); trie0.find(enforcedBlack).forEach(found => filterRuleWhitelistDomainSets.delete(found));
}); });
@ -131,7 +131,7 @@ const buildRejectDomainSet = task(__filename, async () => {
console.log(`Start deduping from black keywords/suffixes! (${previousSize})`); console.log(`Start deduping from black keywords/suffixes! (${previousSize})`);
console.time('* Dedupe from black keywords/suffixes'); console.time('* Dedupe from black keywords/suffixes');
const trie1 = Trie.from(domainSets); const trie1 = createTrie(domainSets);
domainSuffixSet.forEach(suffix => { domainSuffixSet.forEach(suffix => {
trie1.find(suffix, true).forEach(f => domainSets.delete(f)); trie1.find(suffix, true).forEach(f => domainSets.delete(f));
}); });
@ -143,7 +143,7 @@ const buildRejectDomainSet = task(__filename, async () => {
const kwfilter = createKeywordFilter(domainKeywordsSet); const kwfilter = createKeywordFilter(domainKeywordsSet);
// Build whitelist trie, to handle case like removing `g.msn.com` due to white `.g.msn.com` (`@@||g.msn.com`) // Build whitelist trie, to handle case like removing `g.msn.com` due to white `.g.msn.com` (`@@||g.msn.com`)
const trieWhite = Trie.from(filterRuleWhitelistDomainSets); const trieWhite = createTrie(filterRuleWhitelistDomainSets);
for (const domain of domainSets) { for (const domain of domainSets) {
if (domain[0] === '.') { if (domain[0] === '.') {
if (trieWhite.contains(domain)) { if (trieWhite.contains(domain)) {

View File

@ -1,11 +1,11 @@
// @ts-check // @ts-check
const Trie = require('./trie'); const createTrie = require('./trie');
/** /**
* @param {string[]} inputDomains * @param {string[]} inputDomains
*/ */
const domainDeduper = (inputDomains) => { const domainDeduper = (inputDomains) => {
const trie = Trie.from(inputDomains); const trie = createTrie(inputDomains);
const sets = new Set(inputDomains); const sets = new Set(inputDomains);
for (let j = 0, len = inputDomains.length; j < len; j++) { for (let j = 0, len = inputDomains.length; j < len; j++) {

View File

@ -1,6 +1,6 @@
// @ts-check // @ts-check
const { fetchWithRetry } = require('./fetch-retry'); const { fetchWithRetry } = require('./fetch-retry');
const tldts = require('tldts'); const tldts = require('./cached-tld-parse');
const { fetchRemoteTextAndCreateReadlineInterface } = require('./fetch-remote-text-by-line'); const { fetchRemoteTextAndCreateReadlineInterface } = require('./fetch-remote-text-by-line');
const { NetworkFilter } = require('@cliqz/adblocker'); const { NetworkFilter } = require('@cliqz/adblocker');
const { processLine } = require('./process-line'); const { processLine } = require('./process-line');
@ -113,7 +113,7 @@ async function processHosts(hostsUrl, includeAllSubDomain = false) {
* @param {readonly (string | URL)[] | undefined} [fallbackUrls] * @param {readonly (string | URL)[] | undefined} [fallbackUrls]
* @returns {Promise<{ white: Set<string>, black: Set<string>, foundDebugDomain: boolean }>} * @returns {Promise<{ white: Set<string>, black: Set<string>, foundDebugDomain: boolean }>}
*/ */
async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdParties = false) { async function processFilterRules(filterRulesUrl, fallbackUrls) {
const runStart = performance.now(); const runStart = performance.now();
/** @type Set<string> */ /** @type Set<string> */
@ -148,7 +148,7 @@ async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdPart
const gorhill = await getGorhillPublicSuffixPromise(); const gorhill = await getGorhillPublicSuffixPromise();
const lineCb = (line) => { const lineCb = (line) => {
const result = parse(line, includeThirdParties, gorhill); const result = parse(line, gorhill);
if (result) { if (result) {
const flag = result[1]; const flag = result[1];
const hostname = result[0]; const hostname = result[0];
@ -180,12 +180,15 @@ async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdPart
}; };
if (!fallbackUrls || fallbackUrls.length === 0) { if (!fallbackUrls || fallbackUrls.length === 0) {
const downloadStart = performance.now(); downloadTime = 0;
let last = performance.now();
for await (const line of await fetchRemoteTextAndCreateReadlineInterface(filterRulesUrl)) { for await (const line of await fetchRemoteTextAndCreateReadlineInterface(filterRulesUrl)) {
const now = performance.now();
downloadTime += performance.now() - last;
last = now;
// don't trim here // don't trim here
lineCb(line); lineCb(line);
} }
downloadTime = performance.now() - downloadStart;
} else { } else {
let filterRules; let filterRules;
@ -229,11 +232,10 @@ const R_KNOWN_NOT_NETWORK_FILTER_PATTERN_2 = /(\$popup|\$removeparam|\$popunder)
/** /**
* @param {string} $line * @param {string} $line
* @param {boolean} includeThirdParties
* @param {import('gorhill-publicsuffixlist').default} gorhill * @param {import('gorhill-publicsuffixlist').default} gorhill
* @returns {null | [string, 0 | 1 | 2 | -1]} - 0 white include subdomain, 1 black abosulte, 2 black include subdomain, -1 white * @returns {null | [hostname: string, flag: 0 | 1 | 2 | -1]} - 0 white include subdomain, 1 black abosulte, 2 black include subdomain, -1 white
*/ */
function parse($line, includeThirdParties, gorhill) { function parse($line, gorhill) {
if ( if (
// doesn't include // doesn't include
!$line.includes('.') // rule with out dot can not be a domain !$line.includes('.') // rule with out dot can not be a domain
@ -297,7 +299,7 @@ function parse($line, includeThirdParties, gorhill) {
if ( if (
filter.hostname // filter.hasHostname() // must have filter.hostname // filter.hasHostname() // must have
&& filter.isPlain() && filter.isPlain()
&& (!filter.isRegex()) // && (!filter.isRegex()) // isPlain() === !isRegex()
&& (!filter.isFullRegex()) && (!filter.isFullRegex())
) { ) {
if (!gorhill.getDomain(filter.hostname)) { if (!gorhill.getDomain(filter.hostname)) {
@ -307,22 +309,28 @@ function parse($line, includeThirdParties, gorhill) {
if (!hostname) { if (!hostname) {
return null; return null;
} }
// console.log({
// '||': filter.isHostnameAnchor(),
// '|': filter.isLeftAnchor(),
// '|https://': !filter.isHostnameAnchor() && (filter.fromHttps() || filter.fromHttp())
// });
const isIncludeAllSubDomain = filter.isHostnameAnchor();
if (filter.isException() || filter.isBadFilter()) { if (filter.isException() || filter.isBadFilter()) {
return [hostname, 0]; return [hostname, isIncludeAllSubDomain ? 0 : -1];
} }
const _1p = filter.firstParty(); const _1p = filter.firstParty();
const _3p = filter.thirdParty(); const _3p = filter.thirdParty();
if (_1p) {
if (_1p === _3p) { if (_1p === _3p) {
return [hostname, 2]; return [hostname, isIncludeAllSubDomain ? 2 : 1];
}
if (_3p) {
if (includeThirdParties) {
return [hostname, 2];
} }
return null; return null;
} }
if (_1p) { if (_3p) {
return null; return null;
} }
} }
@ -340,10 +348,12 @@ function parse($line, includeThirdParties, gorhill) {
return null; return null;
} }
const lineEndsWithCaretOrCaretVerticalBar = ( /* eslint-disable no-nested-ternary -- speed */
lastChar === '^'
|| (lastChar === '|' && line[len - 2] === '^') const linedEndsWithCaret = lastChar === '^';
); const lineEndsWithCaretVerticalBar = lastChar === '|' && line[len - 2] === '^';
const lineEndsWithCaretOrCaretVerticalBar = linedEndsWithCaret || lineEndsWithCaretVerticalBar;
// whitelist (exception) // whitelist (exception)
if (firstChar === '@' && line[1] === '@') { if (firstChar === '@' && line[1] === '@') {
@ -397,13 +407,7 @@ function parse($line, includeThirdParties, gorhill) {
} }
} }
if ( if (firstChar === '|' && (lineEndsWithCaretOrCaretVerticalBar || line.endsWith('$cname'))) {
firstChar === '|' && line[1] === '|'
&& (
lineEndsWithCaretOrCaretVerticalBar
|| line.endsWith('$cname')
)
) {
/** /**
* Some malformed filters can not be parsed by NetworkFilter: * Some malformed filters can not be parsed by NetworkFilter:
* *
@ -411,17 +415,26 @@ function parse($line, includeThirdParties, gorhill) {
* `||solutions.|pages.indigovision.com^` * `||solutions.|pages.indigovision.com^`
* `||vystar..0rg@client.iebetanialaargentina.edu.co^` * `||vystar..0rg@client.iebetanialaargentina.edu.co^`
*/ */
const includeAllSubDomain = line[1] === '|';
const sliceStart = includeAllSubDomain ? 2 : 1;
const sliceEnd = lastChar === '^'
? -1
: lineEndsWithCaretOrCaretVerticalBar
? -2
: line.endsWith('$cname')
? -6
: 0;
const _domain = line const _domain = line
// .replace('||', '') // .replace('||', '')
.slice(2) // we already make sure line startsWith || .slice(sliceStart, sliceEnd) // we already make sure line startsWith ||
.replace('^|', '')
.replace('$cname', '')
.replaceAll('^', '')
.trim(); .trim();
const domain = normalizeDomain(_domain); const domain = normalizeDomain(_domain);
if (domain) { if (domain) {
return [domain, 2]; return [domain, includeAllSubDomain ? 2 : 1];
} }
console.warn(' * [parse-filter E0002] (black) invalid domain:', _domain); console.warn(' * [parse-filter E0002] (black) invalid domain:', _domain);
@ -439,7 +452,14 @@ function parse($line, includeThirdParties, gorhill) {
* `.wap.x4399.com^` * `.wap.x4399.com^`
*/ */
const _domain = line const _domain = line
.slice(1) // remove prefix dot .slice(
1,
linedEndsWithCaret
? -1
: lineEndsWithCaretVerticalBar
? -2
: 0
) // remove prefix dot
.replace('^|', '') .replace('^|', '')
.replaceAll('^', '') .replaceAll('^', '')
.trim(); .trim();
@ -503,6 +523,13 @@ function parse($line, includeThirdParties, gorhill) {
*/ */
if (firstChar !== '|' && lastChar === '^') { if (firstChar !== '|' && lastChar === '^') {
const _domain = line.slice(0, -1); const _domain = line.slice(0, -1);
const suffix = gorhill.getPublicSuffix(_domain);
if (!suffix || !gorhill.suffixInPSL(suffix)) {
// This exclude domain-like resource like `_social_tracking.js^`
return null;
}
const domain = normalizeDomain(_domain); const domain = normalizeDomain(_domain);
if (domain) { if (domain) {
return [domain, 1]; return [domain, 1];
@ -540,6 +567,7 @@ function parse($line, includeThirdParties, gorhill) {
} }
return null; return null;
/* eslint-enable no-nested-ternary */
} }
module.exports.processDomainLists = processDomainLists; module.exports.processDomainLists = processDomainLists;

View File

@ -17,8 +17,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
'https://easylist-downloads.adblockplus.org/easylist.txt', 'https://easylist-downloads.adblockplus.org/easylist.txt',
'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easylist.txt', 'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easylist.txt',
'https://secure.fanboy.co.nz/easylist.txt' 'https://secure.fanboy.co.nz/easylist.txt'
], ]
false
], ],
// Easy Privacy // Easy Privacy
[ [
@ -27,8 +26,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
'https://secure.fanboy.co.nz/easyprivacy.txt', 'https://secure.fanboy.co.nz/easyprivacy.txt',
'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easyprivacy.txt', 'https://raw.githubusercontent.com/easylist/easylist/gh-pages/easyprivacy.txt',
'https://easylist-downloads.adblockplus.org/easyprivacy.txt' 'https://easylist-downloads.adblockplus.org/easyprivacy.txt'
], ]
false
], ],
// AdGuard DNS Filter // AdGuard DNS Filter
[ [
@ -48,40 +46,35 @@ const ADGUARD_FILTERS = /** @type {const} */([
[ [
'https://ublockorigin.github.io/uAssetsCDN/filters/filters.txt', 'https://ublockorigin.github.io/uAssetsCDN/filters/filters.txt',
'https://ublockorigin.pages.dev/filters/filters.txt' 'https://ublockorigin.pages.dev/filters/filters.txt'
], ]
false
], ],
[ [
'https://ublockorigin.github.io/uAssets/filters/filters-2020.txt', 'https://ublockorigin.github.io/uAssets/filters/filters-2020.txt',
[ [
'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2020.txt', 'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2020.txt',
'https://ublockorigin.pages.dev/filters/filters-2020.txt' 'https://ublockorigin.pages.dev/filters/filters-2020.txt'
], ]
false
], ],
[ [
'https://ublockorigin.github.io/uAssets/filters/filters-2021.txt', 'https://ublockorigin.github.io/uAssets/filters/filters-2021.txt',
[ [
'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2021.txt', 'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2021.txt',
'https://ublockorigin.pages.dev/filters/filters-2021.txt' 'https://ublockorigin.pages.dev/filters/filters-2021.txt'
], ]
false
], ],
[ [
'https://ublockorigin.github.io/uAssets/filters/filters-2022.txt', 'https://ublockorigin.github.io/uAssets/filters/filters-2022.txt',
[ [
'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2022.txt', 'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2022.txt',
'https://ublockorigin.pages.dev/filters/filters-2022.txt' 'https://ublockorigin.pages.dev/filters/filters-2022.txt'
], ]
false
], ],
[ [
'https://ublockorigin.github.io/uAssets/filters/filters-2023.txt', 'https://ublockorigin.github.io/uAssets/filters/filters-2023.txt',
[ [
'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2023.txt', 'https://ublockorigin.github.io/uAssetsCDN/filters/filters-2023.txt',
'https://ublockorigin.pages.dev/filters/filters-2023.txt' 'https://ublockorigin.pages.dev/filters/filters-2023.txt'
], ]
false
], ],
// uBlock Origin Badware Risk List // uBlock Origin Badware Risk List
[ [
@ -89,8 +82,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
[ [
'https://ublockorigin.github.io/uAssetsCDN/filters/badware.txt', 'https://ublockorigin.github.io/uAssetsCDN/filters/badware.txt',
'https://ublockorigin.pages.dev/filters/badware.txt' 'https://ublockorigin.pages.dev/filters/badware.txt'
], ]
false
], ],
// uBlock Origin Privacy List // uBlock Origin Privacy List
[ [
@ -98,8 +90,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
[ [
'https://ublockorigin.github.io/uAssetsCDN/filters/privacy.txt', 'https://ublockorigin.github.io/uAssetsCDN/filters/privacy.txt',
'https://ublockorigin.pages.dev/filters/privacy.txt' 'https://ublockorigin.pages.dev/filters/privacy.txt'
], ]
false
], ],
// uBlock Origin Resource Abuse // uBlock Origin Resource Abuse
[ [
@ -107,8 +98,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
[ [
'https://ublockorigin.github.io/uAssetsCDN/filters/resource-abuse.txt', 'https://ublockorigin.github.io/uAssetsCDN/filters/resource-abuse.txt',
'https://ublockorigin.pages.dev/filters/resource-abuse.txt' 'https://ublockorigin.pages.dev/filters/resource-abuse.txt'
], ]
false
], ],
// uBlock Origin Unbreak // uBlock Origin Unbreak
[ [
@ -116,8 +106,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
[ [
'https://ublockorigin.github.io/uAssetsCDN/filters/unbreak.txt', 'https://ublockorigin.github.io/uAssetsCDN/filters/unbreak.txt',
'https://ublockorigin.pages.dev/filters/unbreak.txt' 'https://ublockorigin.pages.dev/filters/unbreak.txt'
], ]
false
], ],
// AdGuard Base Filter // AdGuard Base Filter
'https://filters.adtidy.org/extension/ublock/filters/2_without_easylist.txt', 'https://filters.adtidy.org/extension/ublock/filters/2_without_easylist.txt',
@ -136,8 +125,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
'https://easylist.to/easylistgermany/easylistgermany.txt', 'https://easylist.to/easylistgermany/easylistgermany.txt',
[ [
'https://easylist-downloads.adblockplus.org/easylistgermany.txt' 'https://easylist-downloads.adblockplus.org/easylistgermany.txt'
], ]
false
], ],
// Curben's UrlHaus Malicious URL Blocklist // Curben's UrlHaus Malicious URL Blocklist
[ [
@ -146,8 +134,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt' 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt'
// Prefer mirror, since malware-filter.gitlab.io has not been updated for a while // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// 'https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-agh-online.txt' // 'https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-agh-online.txt'
], ]
false
], ],
// Curben's Phishing URL Blocklist // Curben's Phishing URL Blocklist
[ [
@ -156,8 +143,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
'https://phishing-filter.pages.dev/phishing-filter-agh.txt' 'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
// Prefer mirror, since malware-filter.gitlab.io has not been updated for a while // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt' // 'https://malware-filter.gitlab.io/malware-filter/phishing-filter-agh.txt'
], ]
false
], ],
// Curben's PUP Domains Blocklist // Curben's PUP Domains Blocklist
[ [
@ -166,8 +152,7 @@ const ADGUARD_FILTERS = /** @type {const} */([
'https://pup-filter.pages.dev/pup-filter-agh.txt' 'https://pup-filter.pages.dev/pup-filter-agh.txt'
// Prefer mirror, since malware-filter.gitlab.io has not been updated for a while // Prefer mirror, since malware-filter.gitlab.io has not been updated for a while
// 'https://malware-filter.gitlab.io/malware-filter/pup-filter-agh.txt' // 'https://malware-filter.gitlab.io/malware-filter/pup-filter-agh.txt'
], ]
false
], ],
// GameConsoleAdblockList // GameConsoleAdblockList
'https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt', 'https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt',

View File

@ -4,38 +4,39 @@
const SENTINEL = String.fromCharCode(0); const SENTINEL = String.fromCharCode(0);
class Trie { /**
size = 0; * @param {string[] | Set<string>} [from]
root = {}; */
const createTrie = (from) => {
let size = 0;
const root = {};
/** /**
* Method used to add the given prefix to the trie. * Method used to add the given prefix to the trie.
* *
* @param {string} suffix - Prefix to follow. * @param {string} suffix - Prefix to follow.
* @return {Trie}
*/ */
add(suffix) { const add = (suffix) => {
let node = this.root; let node = root;
let token; let token;
for (let i = suffix.length - 1; i >= 0; i--) { for (let i = suffix.length - 1; i >= 0; i--) {
token = suffix[i]; token = suffix[i];
node[token] ||= {};
node = node[token] || (node[token] = {}); node = node[token];
} }
// Do we need to increase size? // Do we need to increase size?
if (!(SENTINEL in node)) this.size++; if (!(SENTINEL in node)) {
node[SENTINEL] = true; size++;
return this;
} }
node[SENTINEL] = true;
};
/** /**
* @param {string} suffix * @param {string} suffix
*/ */
contains(suffix) { const contains = (suffix) => {
let node = this.root; let node = root;
let token; let token;
for (let i = suffix.length - 1; i >= 0; i--) { for (let i = suffix.length - 1; i >= 0; i--) {
@ -47,8 +48,7 @@ class Trie {
} }
return true; return true;
} };
/** /**
* Method used to retrieve every item in the trie with the given prefix. * Method used to retrieve every item in the trie with the given prefix.
* *
@ -56,8 +56,8 @@ class Trie {
* @param {boolean} [includeEqualWithSuffix] * @param {boolean} [includeEqualWithSuffix]
* @return {string[]} * @return {string[]}
*/ */
find(suffix, includeEqualWithSuffix = true) { const find = (suffix, includeEqualWithSuffix = true) => {
let node = this.root; let node = root;
const matches = []; const matches = [];
let token; let token;
@ -99,48 +99,7 @@ class Trie {
} }
return matches; return matches;
} };
toJSON() {
return this.root;
}
/**
* Method used to clear the trie.
*
* @return {void}
*/
// clear() {
// // Properties
// this.root = {};
// this.size = 0;
// }
/**
* Method used to update the value of the given prefix in the trie.
*
* @param {string|array} prefix - Prefix to follow.
* @param {(oldValue: any | undefined) => any} updateFunction - Update value visitor callback.
* @return {Trie}
*/
// update(prefix, updateFunction) {
// let node = this.root;
// let token;
// for (let i = 0, l = prefix.length; i < l; i++) {
// token = prefix[i];
// node = node[token] || (node[token] = {});
// }
// // Do we need to increase size?
// if (!(SENTINEL in node))
// this.size++;
// node[SENTINEL] = updateFunction(node[SENTINEL]);
// return this;
// }
/** /**
* Method used to delete a prefix from the trie. * Method used to delete a prefix from the trie.
@ -148,8 +107,8 @@ class Trie {
* @param {string} suffix - Prefix to delete. * @param {string} suffix - Prefix to delete.
* @return {boolean} * @return {boolean}
*/ */
delete(suffix) { const remove = (suffix) => {
let node = this.root; let node = root;
let toPrune = null; let toPrune = null;
let tokenToPrune = null; let tokenToPrune = null;
let parent; let parent;
@ -179,7 +138,7 @@ class Trie {
if (!(SENTINEL in node)) return false; if (!(SENTINEL in node)) return false;
this.size--; size--;
if (toPrune) { if (toPrune) {
delete toPrune[tokenToPrune]; delete toPrune[tokenToPrune];
@ -188,7 +147,7 @@ class Trie {
} }
return true; return true;
} };
/** /**
* Method used to assert whether the given prefix exists in the Trie. * Method used to assert whether the given prefix exists in the Trie.
@ -196,8 +155,8 @@ class Trie {
* @param {string} suffix - Prefix to check. * @param {string} suffix - Prefix to check.
* @return {boolean} * @return {boolean}
*/ */
has(suffix) { const has = (suffix) => {
let node = this.root; let node = root;
let token; let token;
for (let i = suffix.length - 1; i >= 0; i--) { for (let i = suffix.length - 1; i >= 0; i--) {
@ -210,86 +169,288 @@ class Trie {
} }
return SENTINEL in node; return SENTINEL in node;
}
/**
* @return {string[]}
*/
dump() {
const node = this.root;
const nodeStack = [];
const prefixStack = [];
// Resolving initial prefix
const prefix = '';
nodeStack.push(node);
prefixStack.push(prefix);
/** @type {string[]} */
const results = [];
let currentNode;
let currentPrefix;
let hasValue = false;
let k;
while (nodeStack.length) {
currentNode = nodeStack.pop();
currentPrefix = prefixStack.pop();
// eslint-disable-next-line guard-for-in -- plain object
for (k in currentNode) {
if (k === SENTINEL) {
hasValue = true;
continue;
}
nodeStack.push(currentNode[k]);
prefixStack.push(k + currentPrefix);
}
if (hasValue) results.push(currentPrefix);
}
return results;
}
/**
* Convenience known methods.
*/
// inspect() {
// const proxy = new Set();
// const iterator = this.prefixes();
// let step;
// while ((step = iterator.next(), !step.done))
// proxy.add(step.value);
// // Trick so that node displays the name of the constructor
// Object.defineProperty(proxy, 'constructor', {
// value: Trie,
// enumerable: false
// });
// return proxy;
// }
/**
* Static .from function taking an arbitrary iterable & converting it into
* a trie.
*
* @param {string[] | Set<string>} iterable - Target iterable.
* @return {Trie}
*/
static from = iterable => {
const trie = new Trie();
iterable.forEach(i => trie.add(i));
return trie;
}; };
}
if (from) {
from.forEach(add);
}
return {
add,
contains,
find,
remove,
delete: remove,
has,
get size() {
return size;
}
};
};
// class Trie {
// size = 0;
// root = {};
// /**
// * @param {string} suffix
// */
// contains(suffix) {
// let node = this.root;
// let token;
// for (let i = suffix.length - 1; i >= 0; i--) {
// token = suffix[i];
// node = node[token];
// if (node == null) return false;
// }
// return true;
// }
// /**
// * Method used to retrieve every item in the trie with the given prefix.
// *
// * @param {string} suffix - Prefix to query.
// * @param {boolean} [includeEqualWithSuffix]
// * @return {string[]}
// */
// find(suffix, includeEqualWithSuffix = true) {
// let node = this.root;
// const matches = [];
// let token;
// for (let i = suffix.length - 1; i >= 0; i--) {
// token = suffix[i];
// node = node[token];
// if (node == null) return matches;
// }
// // Performing DFS from prefix
// const nodeStack = [node];
// const suffixStack = [suffix];
// let k;
// let $suffix = suffix;
// while (nodeStack.length) {
// $suffix = suffixStack.pop();
// node = nodeStack.pop();
// // eslint-disable-next-line guard-for-in -- plain object
// for (k in node) {
// if (k === SENTINEL) {
// if (includeEqualWithSuffix) {
// matches.push($suffix);
// } else if ($suffix !== suffix) {
// matches.push($suffix);
// }
// continue;
// }
// nodeStack.push(node[k]);
// suffixStack.push(k + $suffix);
// }
// }
// return matches;
// }
// // toJSON() {
// // return this.root;
// // }
// /**
// * Method used to clear the trie.
// *
// * @return {void}
// */
// // clear() {
// // // Properties
// // this.root = {};
// // this.size = 0;
// // }
// /**
// * Method used to update the value of the given prefix in the trie.
// *
// * @param {string|array} prefix - Prefix to follow.
// * @param {(oldValue: any | undefined) => any} updateFunction - Update value visitor callback.
// * @return {Trie}
// */
// // update(prefix, updateFunction) {
// // let node = this.root;
// // let token;
// // for (let i = 0, l = prefix.length; i < l; i++) {
// // token = prefix[i];
// // node = node[token] || (node[token] = {});
// // }
// // // Do we need to increase size?
// // if (!(SENTINEL in node))
// // this.size++;
// // node[SENTINEL] = updateFunction(node[SENTINEL]);
// // return this;
// // }
// /**
// * Method used to delete a prefix from the trie.
// *
// * @param {string} suffix - Prefix to delete.
// * @return {boolean}
// */
// delete(suffix) {
// let node = this.root;
// let toPrune = null;
// let tokenToPrune = null;
// let parent;
// let token;
// for (let i = suffix.length - 1; i >= 0; i--) {
// token = suffix[i];
// parent = node;
// node = node[token];
// // Prefix does not exist
// if (typeof node === 'undefined') {
// return false;
// }
// // Keeping track of a potential branch to prune
// if (toPrune !== null) {
// if (Object.keys(node).length > 1) {
// toPrune = null;
// tokenToPrune = null;
// }
// } else if (Object.keys(node).length < 2) {
// toPrune = parent;
// tokenToPrune = token;
// }
// }
// if (!(SENTINEL in node)) return false;
// this.size--;
// if (toPrune) {
// delete toPrune[tokenToPrune];
// } else {
// delete node[SENTINEL];
// }
// return true;
// }
// /**
// * Method used to assert whether the given prefix exists in the Trie.
// *
// * @param {string} suffix - Prefix to check.
// * @return {boolean}
// */
// has(suffix) {
// let node = this.root;
// let token;
// for (let i = suffix.length - 1; i >= 0; i--) {
// token = suffix[i];
// node = node[token];
// if (typeof node === 'undefined') {
// return false;
// }
// }
// return SENTINEL in node;
// }
// /**
// * @return {string[]}
// */
// // dump() {
// // const node = this.root;
// // const nodeStack = [];
// // const prefixStack = [];
// // // Resolving initial prefix
// // const prefix = '';
// // nodeStack.push(node);
// // prefixStack.push(prefix);
// // /** @type {string[]} */
// // const results = [];
// // let currentNode;
// // let currentPrefix;
// // let hasValue = false;
// // let k;
// // while (nodeStack.length) {
// // currentNode = nodeStack.pop();
// // currentPrefix = prefixStack.pop();
// // // eslint-disable-next-line guard-for-in -- plain object
// // for (k in currentNode) {
// // if (k === SENTINEL) {
// // hasValue = true;
// // continue;
// // }
// // nodeStack.push(currentNode[k]);
// // prefixStack.push(k + currentPrefix);
// // }
// // if (hasValue) results.push(currentPrefix);
// // }
// // return results;
// // }
// /**
// * Convenience known methods.
// */
// // inspect() {
// // const proxy = new Set();
// // const iterator = this.prefixes();
// // let step;
// // while ((step = iterator.next(), !step.done))
// // proxy.add(step.value);
// // // Trick so that node displays the name of the constructor
// // Object.defineProperty(proxy, 'constructor', {
// // value: Trie,
// // enumerable: false
// // });
// // return proxy;
// // }
// /**
// * Static .from function taking an arbitrary iterable & converting it into
// * a trie.
// *
// * @param {string[] | Set<string>} iterable - Target iterable.
// * @return {Trie}
// */
// static from = iterable => {
// const trie = new Trie();
// iterable.forEach(i => trie.add(i));
// return trie;
// };
// }
/** /**
* Exporting. * Exporting.
*/ */
module.exports.SENTINEL = SENTINEL; module.exports.SENTINEL = SENTINEL;
module.exports = Trie; module.exports = createTrie;

View File

@ -1,12 +1,12 @@
require('chai').should(); require('chai').should();
const Trie = require('./trie'); const createTrie = require('./trie');
const assert = require('assert'); const assert = require('assert');
const { describe, it } = require('mocha'); const { describe, it } = require('mocha');
describe('Trie', () => { describe('Trie', () => {
it('should be possible to add items to a Trie.', () => { it('should be possible to add items to a Trie.', () => {
const trie = new Trie(); const trie = createTrie();
trie.add('sukka'); trie.add('sukka');
trie.add('ukka'); trie.add('ukka');
@ -22,7 +22,7 @@ describe('Trie', () => {
}); });
it('adding the same item several times should not increase size.', () => { it('adding the same item several times should not increase size.', () => {
const trie = new Trie(); const trie = createTrie();
trie.add('rat'); trie.add('rat');
trie.add('erat'); trie.add('erat');
@ -33,21 +33,14 @@ describe('Trie', () => {
}); });
it('should be possible to set the null sequence.', () => { it('should be possible to set the null sequence.', () => {
let trie = new Trie(); const trie = createTrie();
trie.add(''); trie.add('');
trie.size.should.eq(1);
trie.has('').should.eq(true); trie.has('').should.eq(true);
trie = new Trie(Array);
trie.add([]);
trie.size.should.eq(1);
trie.has([]).should.eq(true);
}); });
it('should be possible to delete items.', () => { it('should be possible to delete items.', () => {
const trie = new Trie(); const trie = createTrie();
trie.add('rat'); trie.add('rat');
trie.add('rate'); trie.add('rate');
@ -64,16 +57,13 @@ describe('Trie', () => {
trie.size.should.eq(2); trie.size.should.eq(2);
assert.strictEqual(trie.delete('rate'), true); assert.strictEqual(trie.delete('rate'), true);
assert.strictEqual(trie.size, 1); assert.strictEqual(trie.size, 1);
assert.strictEqual(trie.delete('tar'), true); assert.strictEqual(trie.delete('tar'), true);
assert.strictEqual(trie.size, 0); assert.strictEqual(trie.size, 0);
}); });
it('should be possible to check the existence of a sequence in the Trie.', () => { it('should be possible to check the existence of a sequence in the Trie.', () => {
const trie = new Trie(); const trie = createTrie();
trie.add('romanesque'); trie.add('romanesque');
@ -83,7 +73,7 @@ describe('Trie', () => {
}); });
it('should be possible to retrieve items matching the given prefix.', () => { it('should be possible to retrieve items matching the given prefix.', () => {
const trie = new Trie(); const trie = createTrie();
trie.add('roman'); trie.add('roman');
trie.add('esqueroman'); trie.add('esqueroman');
@ -154,7 +144,7 @@ describe('Trie', () => {
it('should be possible to create a trie from an arbitrary iterable.', () => { it('should be possible to create a trie from an arbitrary iterable.', () => {
const words = ['roman', 'esqueroman']; const words = ['roman', 'esqueroman'];
const trie = Trie.from(words); const trie = createTrie(words);
assert.strictEqual(trie.size, 2); assert.strictEqual(trie.size, 2);
assert.deepStrictEqual(trie.has('roman'), true); assert.deepStrictEqual(trie.has('roman'), true);
@ -163,14 +153,14 @@ describe('Trie', () => {
describe('surge domainset dedupe', () => { describe('surge domainset dedupe', () => {
it('should not remove same entry', () => { it('should not remove same entry', () => {
const trie = Trie.from(['.skk.moe', 'noc.one']); const trie = createTrie(['.skk.moe', 'noc.one']);
trie.find('.skk.moe').should.eql(['.skk.moe']); trie.find('.skk.moe').should.eql(['.skk.moe']);
trie.find('noc.one').should.eql(['noc.one']); trie.find('noc.one').should.eql(['noc.one']);
}); });
it('should remove subdomain', () => { it('should remove subdomain', () => {
const trie = Trie.from(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net']); const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net']);
// trie.find('noc.one').should.eql(['www.noc.one']); // trie.find('noc.one').should.eql(['www.noc.one']);
trie.find('.skk.moe').should.eql(['image.cdn.skk.moe', 'blog.skk.moe']); trie.find('.skk.moe').should.eql(['image.cdn.skk.moe', 'blog.skk.moe']);
// trie.find('sukkaw.net').should.eql(['cdn.sukkaw.net']); // trie.find('sukkaw.net').should.eql(['cdn.sukkaw.net']);
@ -178,7 +168,7 @@ describe('surge domainset dedupe', () => {
}); });
it('should not remove non-subdomain', () => { it('should not remove non-subdomain', () => {
const trie = Trie.from(['skk.moe', 'sukkaskk.moe']); const trie = createTrie(['skk.moe', 'sukkaskk.moe']);
trie.find('.skk.moe').should.eql([]); trie.find('.skk.moe').should.eql([]);
}); });
}); });

View File

@ -108,6 +108,8 @@ DOMAIN-KEYWORD,_vmind.qqvideo.tc.qq.com
DOMAIN-KEYWORD,-logging.nextmedia.com DOMAIN-KEYWORD,-logging.nextmedia.com
DOMAIN-KEYWORD,-spiky.clevertap-prod.com DOMAIN-KEYWORD,-spiky.clevertap-prod.com
DOMAIN-KEYWORD,.engage.3m. DOMAIN-KEYWORD,.engage.3m.
DOMAIN-KEYWORD,telemetry.officeapps.live.com
DOMAIN-KEYWORD,-launches.appsflyersdk.com
AND,((DOMAIN-KEYWORD,genuine), (DOMAIN-KEYWORD,autodesk)) AND,((DOMAIN-KEYWORD,genuine), (DOMAIN-KEYWORD,autodesk))