mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Perf: simplify white suffix dedupe
This commit is contained in:
parent
48b5f609dd
commit
02bff12245
@ -15,7 +15,7 @@ import * as tldts from 'tldts';
|
||||
import { SHARED_DESCRIPTION } from './lib/constants';
|
||||
import { getPhishingDomains } from './lib/get-phishing-domains';
|
||||
|
||||
import * as SetHelpers from 'mnemonist/set';
|
||||
import { add as SetAdd, subtract as SetSubstract } from 'mnemonist/set';
|
||||
import { setAddFromArray } from './lib/set-add-from-array';
|
||||
import { sort } from './lib/timsort';
|
||||
|
||||
@ -23,7 +23,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
||||
/** Whitelists */
|
||||
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
|
||||
|
||||
const domainSets = new Set<string>();
|
||||
let domainSets = new Set<string>();
|
||||
|
||||
// Parse from AdGuard Filters
|
||||
const shouldStop = await span
|
||||
@ -33,9 +33,9 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
||||
let shouldStop = false;
|
||||
await Promise.all([
|
||||
// Parse from remote hosts & domain lists
|
||||
...HOSTS.map(entry => processHosts(childSpan, entry[0], entry[1], entry[2], entry[3]).then(hosts => SetHelpers.add(domainSets, hosts))),
|
||||
...HOSTS.map(entry => processHosts(childSpan, entry[0], entry[1], entry[2], entry[3]).then(hosts => SetAdd(domainSets, hosts))),
|
||||
|
||||
...DOMAIN_LISTS.map(entry => processDomainLists(childSpan, entry[0], entry[1], entry[2]).then(hosts => SetHelpers.add(domainSets, hosts))),
|
||||
...DOMAIN_LISTS.map(entry => processDomainLists(childSpan, entry[0], entry[1], entry[2]).then(hosts => SetAdd(domainSets, hosts))),
|
||||
|
||||
...ADGUARD_FILTERS.map(input => (
|
||||
typeof input === 'string'
|
||||
@ -58,7 +58,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
||||
setAddFromArray(filterRuleWhitelistDomainSets, black);
|
||||
}))),
|
||||
getPhishingDomains(childSpan).then(([purePhishingDomains, fullPhishingDomainSet]) => {
|
||||
SetHelpers.add(domainSets, fullPhishingDomainSet);
|
||||
SetAdd(domainSets, fullPhishingDomainSet);
|
||||
setAddFromArray(domainSets, purePhishingDomains);
|
||||
}),
|
||||
childSpan.traceChildAsync('process reject_sukka.conf', async () => {
|
||||
@ -94,23 +94,18 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
||||
});
|
||||
|
||||
// Remove as many domains as possible from domainSets before creating trie
|
||||
SetHelpers.subtract(domainSets, filterRuleWhitelistDomainSets);
|
||||
SetSubstract(domainSets, filterRuleWhitelistDomainSets);
|
||||
|
||||
childSpan.traceChildSync('dedupe from white suffixes', () => {
|
||||
const trie = createTrie(domainSets);
|
||||
domainSets = new Set(childSpan.traceChildSync('dedupe from white suffixes', () => {
|
||||
const trie = createTrie(domainSets, true, true);
|
||||
|
||||
filterRuleWhitelistDomainSets.forEach(suffix => {
|
||||
trie.substractSetInPlaceFromFound(suffix, domainSets);
|
||||
if (suffix[0] === '.') {
|
||||
domainSets.delete(suffix.slice(1));
|
||||
domainSets.delete(suffix);
|
||||
} else {
|
||||
domainSets.delete(`.${suffix}`);
|
||||
domainSets.delete(suffix);
|
||||
}
|
||||
});
|
||||
trie.whitelist(suffix);
|
||||
});
|
||||
|
||||
return trie.dump();
|
||||
}));
|
||||
|
||||
childSpan.traceChildSync('dedupe from black keywords', () => {
|
||||
const kwfilter = createKeywordFilter(domainKeywordsSet);
|
||||
|
||||
|
||||
@ -1,22 +1,11 @@
|
||||
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
|
||||
import { processDomainLists } from './parse-filter';
|
||||
import * as tldts from 'tldts';
|
||||
import { createTrie } from './trie';
|
||||
import { TTL } from './cache-filesystem';
|
||||
|
||||
import { add as SetAdd } from 'mnemonist/set';
|
||||
import type { Span } from '../trace';
|
||||
|
||||
const WHITELIST_DOMAIN = [
|
||||
'w3s.link',
|
||||
'dweb.link',
|
||||
'nftstorage.link',
|
||||
'square.site',
|
||||
'business.site',
|
||||
'page.link', // Firebase URL Shortener
|
||||
'fleek.cool',
|
||||
'notion.site'
|
||||
];
|
||||
const BLACK_TLD = new Set([
|
||||
'accountant',
|
||||
'autos',
|
||||
@ -114,20 +103,6 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
||||
return domainSet;
|
||||
});
|
||||
|
||||
span.traceChildSync('whitelisting phishing domains', (curSpan) => {
|
||||
const trieForRemovingWhiteListed = curSpan.traceChildSync('create trie for whitelisting', () => createTrie(domainSet));
|
||||
|
||||
return curSpan.traceChild('delete whitelisted from domainset').traceSyncFn(() => {
|
||||
for (let i = 0, len = WHITELIST_DOMAIN.length; i < len; i++) {
|
||||
const white = WHITELIST_DOMAIN[i];
|
||||
domainSet.delete(white);
|
||||
domainSet.delete(`.${white}`);
|
||||
|
||||
trieForRemovingWhiteListed.substractSetInPlaceFromFound(`.${white}`, domainSet);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
const domainCountMap: Record<string, number> = {};
|
||||
|
||||
span.traceChildSync('process phishing domain set', () => {
|
||||
|
||||
@ -224,8 +224,16 @@ export const PREDEFINED_WHITELIST = [
|
||||
'email.accounts.bitly.com', // Fuck Peter Lowe Hosts
|
||||
'adsense.google.com', // Fuck Peter Lowe Hosts
|
||||
'api.vip.miui.com', // Fuck Goodbye Xiaomi Ads
|
||||
'stripe.com' // EasyPrivacy only blocks m.stripe.com wwith $third-party,
|
||||
'stripe.com', // EasyPrivacy only blocks m.stripe.com wwith $third-party,
|
||||
// yet stupid AdGuardDNSFilter blocks all of it. Stupid AdGuard
|
||||
'w3s.link', // ipfs gateway
|
||||
'dweb.link', // ipfs gateway
|
||||
'nftstorage.link', // ipfs gateway
|
||||
'fleek.cool', // ipfs gateway
|
||||
'square.site', // Drag'n'Drop site building platform
|
||||
'business.site', // Drag'n'Drop site building platform
|
||||
'page.link', // Firebase URL Shortener
|
||||
'notion.site'
|
||||
];
|
||||
|
||||
export const PREDEFINED_ENFORCED_WHITELIST = [
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user