Perf: minor optimization here and there

This commit is contained in:
SukkaW
2024-01-11 11:56:15 +08:00
parent d06fa6975d
commit e8f3519479
11 changed files with 80 additions and 60 deletions

View File

@@ -73,9 +73,7 @@ const createKeywordFilter = (keys: string[] | Set<string>) => {
}
};
keys.forEach(k => {
put(k, k.length);
});
keys.forEach(k => put(k, k.length));
build();

View File

@@ -34,10 +34,13 @@ interface CacheApplyStringOption {
type CacheApplyOption<T> = T extends string ? CacheApplyStringOption : CacheApplyNonStringOption<T>;
const randomInt = (min: number, max: number) => Math.floor(Math.random() * (max - min + 1)) + min;
const ONE_HOUR = 60 * 60 * 1000;
const ONE_DAY = 24 * ONE_HOUR;
// Add some randomness to the cache ttl to avoid thundering herd
export const TTL = {
humanReadable(ttl: number) {
if (ttl >= 24 * 60 * 60 * 1000) {
if (ttl >= ONE_DAY) {
return `${Math.round(ttl / 24 / 60 / 60 / 1000)}d`;
}
if (ttl >= 60 * 60 * 1000) {
@@ -45,13 +48,13 @@ export const TTL = {
}
return `${Math.round(ttl / 1000)}s`;
},
THREE_HOURS: () => randomInt(1, 3) * 60 * 60 * 1000,
TWLVE_HOURS: () => randomInt(8, 12) * 60 * 60 * 1000,
ONE_DAY: () => randomInt(23, 25) * 60 * 60 * 1000,
THREE_DAYS: () => randomInt(1, 3) * 24 * 60 * 60 * 1000,
ONE_WEEK: () => randomInt(4, 7) * 24 * 60 * 60 * 1000,
TWO_WEEKS: () => randomInt(10, 14) * 24 * 60 * 60 * 1000,
TEN_DAYS: () => randomInt(7, 10) * 24 * 60 * 60 * 1000
THREE_HOURS: () => randomInt(1, 3) * ONE_HOUR,
TWLVE_HOURS: () => randomInt(8, 12) * ONE_HOUR,
ONE_DAY: () => randomInt(23, 25) * ONE_HOUR,
THREE_DAYS: () => randomInt(1, 3) * ONE_DAY,
ONE_WEEK: () => randomInt(4, 7) * ONE_DAY,
TEN_DAYS: () => randomInt(7, 10) * ONE_DAY,
TWO_WEEKS: () => randomInt(10, 14) * ONE_DAY
};
export class Cache {

View File

@@ -6,13 +6,16 @@ export function domainDeduper(inputDomains: string[], toArray = true): string[]
const trie = createTrie(inputDomains);
const sets = new Set(inputDomains);
for (let j = 0, len = inputDomains.length; j < len; j++) {
const d = inputDomains[j];
for (let i = 0, len = inputDomains.length; i < len; i++) {
const d = inputDomains[i];
if (d[0] !== '.') {
continue;
}
trie.find(d, false).forEach(f => sets.delete(f));
const found = trie.find(d, true);
for (let j = 0, len = found.length; j < len; j++) {
sets.delete(found[j]);
}
const a: string = d.slice(1);
@@ -27,5 +30,3 @@ export function domainDeduper(inputDomains: string[], toArray = true): string[]
return sets;
}
export default domainDeduper;

View File

@@ -8,6 +8,8 @@ import { processLine } from './process-line';
import { TTL } from './cache-filesystem';
import { isCI } from 'ci-info';
import { add as SetAdd } from 'mnemonist/set';
const WHITELIST_DOMAIN = new Set([
'w3s.link',
'dweb.link',
@@ -92,16 +94,19 @@ export const getPhishingDomains = () => traceAsync('get phishing domains', async
: null,
getGorhillPublicSuffixPromise()
]);
domainSet2?.forEach((domain) => domainSet.add(domain));
if (domainSet2) {
SetAdd(domainSet, domainSet2);
}
traceSync.skip('* whitelisting phishing domains', () => {
const trieForRemovingWhiteListed = createTrie(domainSet);
WHITELIST_DOMAIN.forEach(white => {
trieForRemovingWhiteListed.find(`.${white}`, false).forEach(f => domainSet.delete(f));
// if (trieForRemovingWhiteListed.has(white)) {
for (const white of WHITELIST_DOMAIN) {
const found = trieForRemovingWhiteListed.find(`.${white}`, false);
for (let i = 0, len = found.length; i < len; i++) {
domainSet.delete(found[i]);
}
domainSet.delete(white);
// }
});
}
});
const domainCountMap: Record<string, number> = {};

View File

@@ -10,8 +10,16 @@ export const normalizeDomain = (domain: string) => {
if (!parsed.isIcann && !parsed.isPrivate) return null;
let h = parsed.hostname;
if (h[0] === '.') h = h.slice(1);
if (h.endsWith('.')) h = h.slice(0, -1);
let sliceStart = 0;
let sliceEnd = h.length;
if (h[0] === '.') sliceStart = 1;
if (h.endsWith('.')) sliceEnd = -1;
if (sliceStart !== 0 || sliceEnd !== h.length) {
h = h.slice(sliceStart, sliceEnd);
}
if (h) return h;
return null;

View File

@@ -208,12 +208,12 @@ export async function processFilterRules(
}
));
warningMessages.forEach(msg => {
for (let i = 0, len = warningMessages.length; i < len; i++) {
console.warn(
picocolors.yellow(msg),
picocolors.yellow(warningMessages[i]),
picocolors.gray(picocolors.underline(filterRulesUrl))
);
});
}
console.log(
picocolors.gray('[process filter]'),

View File

@@ -15,17 +15,12 @@ export const HOSTS = [
// Curben's UrlHaus Malicious URL Blocklist
// 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
// 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, TTL.THREE_HOURS()],
['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, TTL.THREE_HOURS()]
// Curben's Phishing URL Blocklist
// Covered by lib/get-phishing-domains.ts
// 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt'
// 'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
// ['https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true],
// Curben's PUP Domains Blocklist
// 'https://curbengh.github.io/pup-filter/pup-filter-agh.txt'
// 'https://pup-filter.pages.dev/pup-filter-agh.txt'
// The PUP filter has paused the update since 2023-05, so we set a 14 days cache ttl
['https://curbengh.github.io/pup-filter/pup-filter-hosts.txt', true, TTL.TWO_WEEKS()]
] as const;
export const DOMAIN_LISTS = [
@@ -38,6 +33,11 @@ export const DOMAIN_LISTS = [
// DigitalSide Threat-Intel - OSINT Hub
// Update once per day
['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true, TTL.ONE_DAY()],
// Curben's PUP Domains Blocklist
// 'https://curbengh.github.io/pup-filter/pup-filter-agh.txt'
// 'https://pup-filter.pages.dev/pup-filter-agh.txt'
// The PUP filter has paused the update since 2023-05, so we set a 14 days cache ttl
['https://curbengh.github.io/pup-filter/pup-filter-domains.txt', true, TTL.TWO_WEEKS()],
// AdGuard CNAME Filter Combined
// Update on a 7 days basis, so we add a 3 hours cache ttl
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads_justdomains.txt', true, TTL.THREE_DAYS()],

View File

@@ -0,0 +1,8 @@
/**
* In-place adding of elements from an array to a set.
*/
export function setAddFromArray<T>(set: Set<T>, arr: T[]): void {
for (let i = 0, len = arr.length; i < len; i++) {
set.add(arr[i]);
}
}