mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 09:10:35 +08:00
Perf: minor optimization here and there
This commit is contained in:
parent
d06fa6975d
commit
e8f3519479
@ -32,7 +32,10 @@ export const getMicrosoftCdnRulesetPromise = createMemoizedPromise(async () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
const trie2 = createTrie(set);
|
const trie2 = createTrie(set);
|
||||||
BLACKLIST.flatMap(domain => trie2.find(domain, true)).forEach(d => set.delete(d));
|
const black = BLACKLIST.flatMap(domain => trie2.find(domain, true));
|
||||||
|
for (let i = 0, len = black.length; i < len; i++) {
|
||||||
|
set.delete(black[i]);
|
||||||
|
}
|
||||||
|
|
||||||
return Array.from(set).map(d => `DOMAIN-SUFFIX,${d}`).concat(WHITELIST);
|
return Array.from(set).map(d => `DOMAIN-SUFFIX,${d}`).concat(WHITELIST);
|
||||||
});
|
});
|
||||||
|
|||||||
@ -17,6 +17,9 @@ import * as tldts from 'tldts';
|
|||||||
import { SHARED_DESCRIPTION } from './lib/constants';
|
import { SHARED_DESCRIPTION } from './lib/constants';
|
||||||
import { getPhishingDomains } from './lib/get-phishing-domains';
|
import { getPhishingDomains } from './lib/get-phishing-domains';
|
||||||
|
|
||||||
|
import * as SetHelpers from 'mnemonist/set';
|
||||||
|
import { setAddFromArray } from './lib/set-add-from-array';
|
||||||
|
|
||||||
export const buildRejectDomainSet = task(import.meta.path, async () => {
|
export const buildRejectDomainSet = task(import.meta.path, async () => {
|
||||||
/** Whitelists */
|
/** Whitelists */
|
||||||
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
|
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
|
||||||
@ -33,9 +36,7 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
|
|||||||
getGorhillPublicSuffixPromise(),
|
getGorhillPublicSuffixPromise(),
|
||||||
// Parse from remote hosts & domain lists
|
// Parse from remote hosts & domain lists
|
||||||
...HOSTS.map(entry => processHosts(entry[0], entry[1], entry[2]).then(hosts => {
|
...HOSTS.map(entry => processHosts(entry[0], entry[1], entry[2]).then(hosts => {
|
||||||
hosts.forEach(host => {
|
SetHelpers.add(domainSets, hosts);
|
||||||
domainSets.add(host);
|
|
||||||
});
|
|
||||||
})),
|
})),
|
||||||
...DOMAIN_LISTS.map(entry => processDomainLists(entry[0], entry[1], entry[2])),
|
...DOMAIN_LISTS.map(entry => processDomainLists(entry[0], entry[1], entry[2])),
|
||||||
...ADGUARD_FILTERS.map(input => {
|
...ADGUARD_FILTERS.map(input => {
|
||||||
@ -48,24 +49,20 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
|
|||||||
shouldStop = true;
|
shouldStop = true;
|
||||||
// we should not break here, as we want to see full matches from all data source
|
// we should not break here, as we want to see full matches from all data source
|
||||||
}
|
}
|
||||||
white.forEach(i => filterRuleWhitelistDomainSets.add(i));
|
setAddFromArray(filterRuleWhitelistDomainSets, white);
|
||||||
black.forEach(i => domainSets.add(i));
|
setAddFromArray(domainSets, black);
|
||||||
});
|
});
|
||||||
}),
|
}),
|
||||||
...([
|
...([
|
||||||
'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exceptions.txt',
|
'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exceptions.txt',
|
||||||
'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exclusions.txt'
|
'https://raw.githubusercontent.com/AdguardTeam/AdGuardSDNSFilter/master/Filters/exclusions.txt'
|
||||||
].map(input => processFilterRules(input).then(({ white, black }) => {
|
].map(input => processFilterRules(input).then(({ white, black }) => {
|
||||||
white.forEach(i => filterRuleWhitelistDomainSets.add(i));
|
setAddFromArray(filterRuleWhitelistDomainSets, white);
|
||||||
black.forEach(i => filterRuleWhitelistDomainSets.add(i));
|
setAddFromArray(filterRuleWhitelistDomainSets, black);
|
||||||
}))),
|
}))),
|
||||||
getPhishingDomains().then(([purePhishingDomains, fullDomainSet]) => {
|
getPhishingDomains().then(([purePhishingDomains, fullPhishingDomainSet]) => {
|
||||||
fullDomainSet.forEach(host => {
|
SetHelpers.add(domainSets, fullPhishingDomainSet);
|
||||||
if (host) {
|
setAddFromArray(domainSets, purePhishingDomains);
|
||||||
domainSets.add(host);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
purePhishingDomains.forEach(suffix => domainSets.add(`.${suffix}`));
|
|
||||||
}),
|
}),
|
||||||
(async () => {
|
(async () => {
|
||||||
for await (const l of readFileByLine(path.resolve(import.meta.dir, '../Source/domainset/reject_sukka.conf'))) {
|
for await (const l of readFileByLine(path.resolve(import.meta.dir, '../Source/domainset/reject_sukka.conf'))) {
|
||||||
@ -79,9 +76,14 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
|
|||||||
|
|
||||||
// remove pre-defined enforced blacklist from whitelist
|
// remove pre-defined enforced blacklist from whitelist
|
||||||
const trie0 = createTrie(filterRuleWhitelistDomainSets);
|
const trie0 = createTrie(filterRuleWhitelistDomainSets);
|
||||||
PREDEFINED_ENFORCED_BACKLIST.forEach(enforcedBlack => {
|
|
||||||
trie0.find(enforcedBlack).forEach(found => filterRuleWhitelistDomainSets.delete(found));
|
for (let i = 0, len1 = PREDEFINED_ENFORCED_BACKLIST.length; i < len1; i++) {
|
||||||
});
|
const enforcedBlack = PREDEFINED_ENFORCED_BACKLIST[i];
|
||||||
|
const found = trie0.find(enforcedBlack);
|
||||||
|
for (let j = 0, len2 = found.length; j < len2; j++) {
|
||||||
|
filterRuleWhitelistDomainSets.delete(found[j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return [gorhill, shouldStop] as const;
|
return [gorhill, shouldStop] as const;
|
||||||
});
|
});
|
||||||
@ -109,6 +111,7 @@ export const buildRejectDomainSet = task(import.meta.path, async () => {
|
|||||||
// Dedupe domainSets
|
// Dedupe domainSets
|
||||||
traceSync('* Dedupe from black keywords/suffixes', () => {
|
traceSync('* Dedupe from black keywords/suffixes', () => {
|
||||||
const trie1 = createTrie(domainSets);
|
const trie1 = createTrie(domainSets);
|
||||||
|
|
||||||
domainSuffixSet.forEach(suffix => {
|
domainSuffixSet.forEach(suffix => {
|
||||||
trie1.find(suffix, true).forEach(f => domainSets.delete(f));
|
trie1.find(suffix, true).forEach(f => domainSets.delete(f));
|
||||||
});
|
});
|
||||||
|
|||||||
@ -73,9 +73,7 @@ const createKeywordFilter = (keys: string[] | Set<string>) => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
keys.forEach(k => {
|
keys.forEach(k => put(k, k.length));
|
||||||
put(k, k.length);
|
|
||||||
});
|
|
||||||
|
|
||||||
build();
|
build();
|
||||||
|
|
||||||
|
|||||||
@ -34,10 +34,13 @@ interface CacheApplyStringOption {
|
|||||||
type CacheApplyOption<T> = T extends string ? CacheApplyStringOption : CacheApplyNonStringOption<T>;
|
type CacheApplyOption<T> = T extends string ? CacheApplyStringOption : CacheApplyNonStringOption<T>;
|
||||||
|
|
||||||
const randomInt = (min: number, max: number) => Math.floor(Math.random() * (max - min + 1)) + min;
|
const randomInt = (min: number, max: number) => Math.floor(Math.random() * (max - min + 1)) + min;
|
||||||
|
|
||||||
|
const ONE_HOUR = 60 * 60 * 1000;
|
||||||
|
const ONE_DAY = 24 * ONE_HOUR;
|
||||||
// Add some randomness to the cache ttl to avoid thundering herd
|
// Add some randomness to the cache ttl to avoid thundering herd
|
||||||
export const TTL = {
|
export const TTL = {
|
||||||
humanReadable(ttl: number) {
|
humanReadable(ttl: number) {
|
||||||
if (ttl >= 24 * 60 * 60 * 1000) {
|
if (ttl >= ONE_DAY) {
|
||||||
return `${Math.round(ttl / 24 / 60 / 60 / 1000)}d`;
|
return `${Math.round(ttl / 24 / 60 / 60 / 1000)}d`;
|
||||||
}
|
}
|
||||||
if (ttl >= 60 * 60 * 1000) {
|
if (ttl >= 60 * 60 * 1000) {
|
||||||
@ -45,13 +48,13 @@ export const TTL = {
|
|||||||
}
|
}
|
||||||
return `${Math.round(ttl / 1000)}s`;
|
return `${Math.round(ttl / 1000)}s`;
|
||||||
},
|
},
|
||||||
THREE_HOURS: () => randomInt(1, 3) * 60 * 60 * 1000,
|
THREE_HOURS: () => randomInt(1, 3) * ONE_HOUR,
|
||||||
TWLVE_HOURS: () => randomInt(8, 12) * 60 * 60 * 1000,
|
TWLVE_HOURS: () => randomInt(8, 12) * ONE_HOUR,
|
||||||
ONE_DAY: () => randomInt(23, 25) * 60 * 60 * 1000,
|
ONE_DAY: () => randomInt(23, 25) * ONE_HOUR,
|
||||||
THREE_DAYS: () => randomInt(1, 3) * 24 * 60 * 60 * 1000,
|
THREE_DAYS: () => randomInt(1, 3) * ONE_DAY,
|
||||||
ONE_WEEK: () => randomInt(4, 7) * 24 * 60 * 60 * 1000,
|
ONE_WEEK: () => randomInt(4, 7) * ONE_DAY,
|
||||||
TWO_WEEKS: () => randomInt(10, 14) * 24 * 60 * 60 * 1000,
|
TEN_DAYS: () => randomInt(7, 10) * ONE_DAY,
|
||||||
TEN_DAYS: () => randomInt(7, 10) * 24 * 60 * 60 * 1000
|
TWO_WEEKS: () => randomInt(10, 14) * ONE_DAY
|
||||||
};
|
};
|
||||||
|
|
||||||
export class Cache {
|
export class Cache {
|
||||||
|
|||||||
@ -6,13 +6,16 @@ export function domainDeduper(inputDomains: string[], toArray = true): string[]
|
|||||||
const trie = createTrie(inputDomains);
|
const trie = createTrie(inputDomains);
|
||||||
const sets = new Set(inputDomains);
|
const sets = new Set(inputDomains);
|
||||||
|
|
||||||
for (let j = 0, len = inputDomains.length; j < len; j++) {
|
for (let i = 0, len = inputDomains.length; i < len; i++) {
|
||||||
const d = inputDomains[j];
|
const d = inputDomains[i];
|
||||||
if (d[0] !== '.') {
|
if (d[0] !== '.') {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
trie.find(d, false).forEach(f => sets.delete(f));
|
const found = trie.find(d, true);
|
||||||
|
for (let j = 0, len = found.length; j < len; j++) {
|
||||||
|
sets.delete(found[j]);
|
||||||
|
}
|
||||||
|
|
||||||
const a: string = d.slice(1);
|
const a: string = d.slice(1);
|
||||||
|
|
||||||
@ -27,5 +30,3 @@ export function domainDeduper(inputDomains: string[], toArray = true): string[]
|
|||||||
|
|
||||||
return sets;
|
return sets;
|
||||||
}
|
}
|
||||||
|
|
||||||
export default domainDeduper;
|
|
||||||
|
|||||||
@ -8,6 +8,8 @@ import { processLine } from './process-line';
|
|||||||
import { TTL } from './cache-filesystem';
|
import { TTL } from './cache-filesystem';
|
||||||
import { isCI } from 'ci-info';
|
import { isCI } from 'ci-info';
|
||||||
|
|
||||||
|
import { add as SetAdd } from 'mnemonist/set';
|
||||||
|
|
||||||
const WHITELIST_DOMAIN = new Set([
|
const WHITELIST_DOMAIN = new Set([
|
||||||
'w3s.link',
|
'w3s.link',
|
||||||
'dweb.link',
|
'dweb.link',
|
||||||
@ -92,16 +94,19 @@ export const getPhishingDomains = () => traceAsync('get phishing domains', async
|
|||||||
: null,
|
: null,
|
||||||
getGorhillPublicSuffixPromise()
|
getGorhillPublicSuffixPromise()
|
||||||
]);
|
]);
|
||||||
domainSet2?.forEach((domain) => domainSet.add(domain));
|
if (domainSet2) {
|
||||||
|
SetAdd(domainSet, domainSet2);
|
||||||
|
}
|
||||||
|
|
||||||
traceSync.skip('* whitelisting phishing domains', () => {
|
traceSync.skip('* whitelisting phishing domains', () => {
|
||||||
const trieForRemovingWhiteListed = createTrie(domainSet);
|
const trieForRemovingWhiteListed = createTrie(domainSet);
|
||||||
WHITELIST_DOMAIN.forEach(white => {
|
for (const white of WHITELIST_DOMAIN) {
|
||||||
trieForRemovingWhiteListed.find(`.${white}`, false).forEach(f => domainSet.delete(f));
|
const found = trieForRemovingWhiteListed.find(`.${white}`, false);
|
||||||
// if (trieForRemovingWhiteListed.has(white)) {
|
for (let i = 0, len = found.length; i < len; i++) {
|
||||||
|
domainSet.delete(found[i]);
|
||||||
|
}
|
||||||
domainSet.delete(white);
|
domainSet.delete(white);
|
||||||
// }
|
}
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|
||||||
const domainCountMap: Record<string, number> = {};
|
const domainCountMap: Record<string, number> = {};
|
||||||
|
|||||||
@ -10,8 +10,16 @@ export const normalizeDomain = (domain: string) => {
|
|||||||
if (!parsed.isIcann && !parsed.isPrivate) return null;
|
if (!parsed.isIcann && !parsed.isPrivate) return null;
|
||||||
|
|
||||||
let h = parsed.hostname;
|
let h = parsed.hostname;
|
||||||
if (h[0] === '.') h = h.slice(1);
|
|
||||||
if (h.endsWith('.')) h = h.slice(0, -1);
|
let sliceStart = 0;
|
||||||
|
let sliceEnd = h.length;
|
||||||
|
|
||||||
|
if (h[0] === '.') sliceStart = 1;
|
||||||
|
if (h.endsWith('.')) sliceEnd = -1;
|
||||||
|
|
||||||
|
if (sliceStart !== 0 || sliceEnd !== h.length) {
|
||||||
|
h = h.slice(sliceStart, sliceEnd);
|
||||||
|
}
|
||||||
|
|
||||||
if (h) return h;
|
if (h) return h;
|
||||||
return null;
|
return null;
|
||||||
|
|||||||
@ -208,12 +208,12 @@ export async function processFilterRules(
|
|||||||
}
|
}
|
||||||
));
|
));
|
||||||
|
|
||||||
warningMessages.forEach(msg => {
|
for (let i = 0, len = warningMessages.length; i < len; i++) {
|
||||||
console.warn(
|
console.warn(
|
||||||
picocolors.yellow(msg),
|
picocolors.yellow(warningMessages[i]),
|
||||||
picocolors.gray(picocolors.underline(filterRulesUrl))
|
picocolors.gray(picocolors.underline(filterRulesUrl))
|
||||||
);
|
);
|
||||||
});
|
}
|
||||||
|
|
||||||
console.log(
|
console.log(
|
||||||
picocolors.gray('[process filter]'),
|
picocolors.gray('[process filter]'),
|
||||||
|
|||||||
@ -15,17 +15,12 @@ export const HOSTS = [
|
|||||||
// Curben's UrlHaus Malicious URL Blocklist
|
// Curben's UrlHaus Malicious URL Blocklist
|
||||||
// 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
|
// 'https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt',
|
||||||
// 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
|
// 'https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt',
|
||||||
['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, TTL.THREE_HOURS()],
|
['https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt', true, TTL.THREE_HOURS()]
|
||||||
// Curben's Phishing URL Blocklist
|
// Curben's Phishing URL Blocklist
|
||||||
// Covered by lib/get-phishing-domains.ts
|
// Covered by lib/get-phishing-domains.ts
|
||||||
// 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt'
|
// 'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt'
|
||||||
// 'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
|
// 'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
|
||||||
// ['https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true],
|
// ['https://curbengh.github.io/phishing-filter/phishing-filter-hosts.txt', true, true],
|
||||||
// Curben's PUP Domains Blocklist
|
|
||||||
// 'https://curbengh.github.io/pup-filter/pup-filter-agh.txt'
|
|
||||||
// 'https://pup-filter.pages.dev/pup-filter-agh.txt'
|
|
||||||
// The PUP filter has paused the update since 2023-05, so we set a 14 days cache ttl
|
|
||||||
['https://curbengh.github.io/pup-filter/pup-filter-hosts.txt', true, TTL.TWO_WEEKS()]
|
|
||||||
] as const;
|
] as const;
|
||||||
|
|
||||||
export const DOMAIN_LISTS = [
|
export const DOMAIN_LISTS = [
|
||||||
@ -38,6 +33,11 @@ export const DOMAIN_LISTS = [
|
|||||||
// DigitalSide Threat-Intel - OSINT Hub
|
// DigitalSide Threat-Intel - OSINT Hub
|
||||||
// Update once per day
|
// Update once per day
|
||||||
['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true, TTL.ONE_DAY()],
|
['https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true, TTL.ONE_DAY()],
|
||||||
|
// Curben's PUP Domains Blocklist
|
||||||
|
// 'https://curbengh.github.io/pup-filter/pup-filter-agh.txt'
|
||||||
|
// 'https://pup-filter.pages.dev/pup-filter-agh.txt'
|
||||||
|
// The PUP filter has paused the update since 2023-05, so we set a 14 days cache ttl
|
||||||
|
['https://curbengh.github.io/pup-filter/pup-filter-domains.txt', true, TTL.TWO_WEEKS()],
|
||||||
// AdGuard CNAME Filter Combined
|
// AdGuard CNAME Filter Combined
|
||||||
// Update on a 7 days basis, so we add a 3 hours cache ttl
|
// Update on a 7 days basis, so we add a 3 hours cache ttl
|
||||||
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads_justdomains.txt', true, TTL.THREE_DAYS()],
|
['https://raw.githubusercontent.com/AdguardTeam/cname-trackers/master/data/combined_disguised_ads_justdomains.txt', true, TTL.THREE_DAYS()],
|
||||||
|
|||||||
8
Build/lib/set-add-from-array.ts
Normal file
8
Build/lib/set-add-from-array.ts
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
/**
|
||||||
|
* In-place adding of elements from an array to a set.
|
||||||
|
*/
|
||||||
|
export function setAddFromArray<T>(set: Set<T>, arr: T[]): void {
|
||||||
|
for (let i = 0, len = arr.length; i < len; i++) {
|
||||||
|
set.add(arr[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -93,15 +93,6 @@ export const parseGfwList = async () => {
|
|||||||
runAgainstRuleset(path.resolve(import.meta.dir, '../List/non_ip/stream.conf'))
|
runAgainstRuleset(path.resolve(import.meta.dir, '../List/non_ip/stream.conf'))
|
||||||
]);
|
]);
|
||||||
|
|
||||||
// for await (const l of readFileByLine(path.resolve(import.meta.dir, '../List/non_ip/stream.conf'))) {
|
|
||||||
// const line = processLine(l);
|
|
||||||
// if (!line) continue;
|
|
||||||
// const domain = line[0] === '.' ? line.slice(1) : line;
|
|
||||||
// if (top500Gfwed.has(domain)) {
|
|
||||||
// notIncludedTop500Gfwed.delete(domain);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
console.log(notIncludedTop500Gfwed);
|
console.log(notIncludedTop500Gfwed);
|
||||||
|
|
||||||
return [
|
return [
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user