mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Perf: simplify white suffix dedupe
This commit is contained in:
parent
48b5f609dd
commit
02bff12245
@ -15,7 +15,7 @@ import * as tldts from 'tldts';
|
|||||||
import { SHARED_DESCRIPTION } from './lib/constants';
|
import { SHARED_DESCRIPTION } from './lib/constants';
|
||||||
import { getPhishingDomains } from './lib/get-phishing-domains';
|
import { getPhishingDomains } from './lib/get-phishing-domains';
|
||||||
|
|
||||||
import * as SetHelpers from 'mnemonist/set';
|
import { add as SetAdd, subtract as SetSubstract } from 'mnemonist/set';
|
||||||
import { setAddFromArray } from './lib/set-add-from-array';
|
import { setAddFromArray } from './lib/set-add-from-array';
|
||||||
import { sort } from './lib/timsort';
|
import { sort } from './lib/timsort';
|
||||||
|
|
||||||
@ -23,7 +23,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
|||||||
/** Whitelists */
|
/** Whitelists */
|
||||||
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
|
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
|
||||||
|
|
||||||
const domainSets = new Set<string>();
|
let domainSets = new Set<string>();
|
||||||
|
|
||||||
// Parse from AdGuard Filters
|
// Parse from AdGuard Filters
|
||||||
const shouldStop = await span
|
const shouldStop = await span
|
||||||
@ -33,9 +33,9 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
|||||||
let shouldStop = false;
|
let shouldStop = false;
|
||||||
await Promise.all([
|
await Promise.all([
|
||||||
// Parse from remote hosts & domain lists
|
// Parse from remote hosts & domain lists
|
||||||
...HOSTS.map(entry => processHosts(childSpan, entry[0], entry[1], entry[2], entry[3]).then(hosts => SetHelpers.add(domainSets, hosts))),
|
...HOSTS.map(entry => processHosts(childSpan, entry[0], entry[1], entry[2], entry[3]).then(hosts => SetAdd(domainSets, hosts))),
|
||||||
|
|
||||||
...DOMAIN_LISTS.map(entry => processDomainLists(childSpan, entry[0], entry[1], entry[2]).then(hosts => SetHelpers.add(domainSets, hosts))),
|
...DOMAIN_LISTS.map(entry => processDomainLists(childSpan, entry[0], entry[1], entry[2]).then(hosts => SetAdd(domainSets, hosts))),
|
||||||
|
|
||||||
...ADGUARD_FILTERS.map(input => (
|
...ADGUARD_FILTERS.map(input => (
|
||||||
typeof input === 'string'
|
typeof input === 'string'
|
||||||
@ -58,7 +58,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
|||||||
setAddFromArray(filterRuleWhitelistDomainSets, black);
|
setAddFromArray(filterRuleWhitelistDomainSets, black);
|
||||||
}))),
|
}))),
|
||||||
getPhishingDomains(childSpan).then(([purePhishingDomains, fullPhishingDomainSet]) => {
|
getPhishingDomains(childSpan).then(([purePhishingDomains, fullPhishingDomainSet]) => {
|
||||||
SetHelpers.add(domainSets, fullPhishingDomainSet);
|
SetAdd(domainSets, fullPhishingDomainSet);
|
||||||
setAddFromArray(domainSets, purePhishingDomains);
|
setAddFromArray(domainSets, purePhishingDomains);
|
||||||
}),
|
}),
|
||||||
childSpan.traceChildAsync('process reject_sukka.conf', async () => {
|
childSpan.traceChildAsync('process reject_sukka.conf', async () => {
|
||||||
@ -94,22 +94,17 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Remove as many domains as possible from domainSets before creating trie
|
// Remove as many domains as possible from domainSets before creating trie
|
||||||
SetHelpers.subtract(domainSets, filterRuleWhitelistDomainSets);
|
SetSubstract(domainSets, filterRuleWhitelistDomainSets);
|
||||||
|
|
||||||
childSpan.traceChildSync('dedupe from white suffixes', () => {
|
domainSets = new Set(childSpan.traceChildSync('dedupe from white suffixes', () => {
|
||||||
const trie = createTrie(domainSets);
|
const trie = createTrie(domainSets, true, true);
|
||||||
|
|
||||||
filterRuleWhitelistDomainSets.forEach(suffix => {
|
filterRuleWhitelistDomainSets.forEach(suffix => {
|
||||||
trie.substractSetInPlaceFromFound(suffix, domainSets);
|
trie.whitelist(suffix);
|
||||||
if (suffix[0] === '.') {
|
|
||||||
domainSets.delete(suffix.slice(1));
|
|
||||||
domainSets.delete(suffix);
|
|
||||||
} else {
|
|
||||||
domainSets.delete(`.${suffix}`);
|
|
||||||
domainSets.delete(suffix);
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
});
|
|
||||||
|
return trie.dump();
|
||||||
|
}));
|
||||||
|
|
||||||
childSpan.traceChildSync('dedupe from black keywords', () => {
|
childSpan.traceChildSync('dedupe from black keywords', () => {
|
||||||
const kwfilter = createKeywordFilter(domainKeywordsSet);
|
const kwfilter = createKeywordFilter(domainKeywordsSet);
|
||||||
|
|||||||
@ -1,22 +1,11 @@
|
|||||||
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
|
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
|
||||||
import { processDomainLists } from './parse-filter';
|
import { processDomainLists } from './parse-filter';
|
||||||
import * as tldts from 'tldts';
|
import * as tldts from 'tldts';
|
||||||
import { createTrie } from './trie';
|
|
||||||
import { TTL } from './cache-filesystem';
|
import { TTL } from './cache-filesystem';
|
||||||
|
|
||||||
import { add as SetAdd } from 'mnemonist/set';
|
import { add as SetAdd } from 'mnemonist/set';
|
||||||
import type { Span } from '../trace';
|
import type { Span } from '../trace';
|
||||||
|
|
||||||
const WHITELIST_DOMAIN = [
|
|
||||||
'w3s.link',
|
|
||||||
'dweb.link',
|
|
||||||
'nftstorage.link',
|
|
||||||
'square.site',
|
|
||||||
'business.site',
|
|
||||||
'page.link', // Firebase URL Shortener
|
|
||||||
'fleek.cool',
|
|
||||||
'notion.site'
|
|
||||||
];
|
|
||||||
const BLACK_TLD = new Set([
|
const BLACK_TLD = new Set([
|
||||||
'accountant',
|
'accountant',
|
||||||
'autos',
|
'autos',
|
||||||
@ -114,20 +103,6 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
|||||||
return domainSet;
|
return domainSet;
|
||||||
});
|
});
|
||||||
|
|
||||||
span.traceChildSync('whitelisting phishing domains', (curSpan) => {
|
|
||||||
const trieForRemovingWhiteListed = curSpan.traceChildSync('create trie for whitelisting', () => createTrie(domainSet));
|
|
||||||
|
|
||||||
return curSpan.traceChild('delete whitelisted from domainset').traceSyncFn(() => {
|
|
||||||
for (let i = 0, len = WHITELIST_DOMAIN.length; i < len; i++) {
|
|
||||||
const white = WHITELIST_DOMAIN[i];
|
|
||||||
domainSet.delete(white);
|
|
||||||
domainSet.delete(`.${white}`);
|
|
||||||
|
|
||||||
trieForRemovingWhiteListed.substractSetInPlaceFromFound(`.${white}`, domainSet);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
const domainCountMap: Record<string, number> = {};
|
const domainCountMap: Record<string, number> = {};
|
||||||
|
|
||||||
span.traceChildSync('process phishing domain set', () => {
|
span.traceChildSync('process phishing domain set', () => {
|
||||||
|
|||||||
@ -224,8 +224,16 @@ export const PREDEFINED_WHITELIST = [
|
|||||||
'email.accounts.bitly.com', // Fuck Peter Lowe Hosts
|
'email.accounts.bitly.com', // Fuck Peter Lowe Hosts
|
||||||
'adsense.google.com', // Fuck Peter Lowe Hosts
|
'adsense.google.com', // Fuck Peter Lowe Hosts
|
||||||
'api.vip.miui.com', // Fuck Goodbye Xiaomi Ads
|
'api.vip.miui.com', // Fuck Goodbye Xiaomi Ads
|
||||||
'stripe.com' // EasyPrivacy only blocks m.stripe.com wwith $third-party,
|
'stripe.com', // EasyPrivacy only blocks m.stripe.com wwith $third-party,
|
||||||
// yet stupid AdGuardDNSFilter blocks all of it. Stupid AdGuard
|
// yet stupid AdGuardDNSFilter blocks all of it. Stupid AdGuard
|
||||||
|
'w3s.link', // ipfs gateway
|
||||||
|
'dweb.link', // ipfs gateway
|
||||||
|
'nftstorage.link', // ipfs gateway
|
||||||
|
'fleek.cool', // ipfs gateway
|
||||||
|
'square.site', // Drag'n'Drop site building platform
|
||||||
|
'business.site', // Drag'n'Drop site building platform
|
||||||
|
'page.link', // Firebase URL Shortener
|
||||||
|
'notion.site'
|
||||||
];
|
];
|
||||||
|
|
||||||
export const PREDEFINED_ENFORCED_WHITELIST = [
|
export const PREDEFINED_ENFORCED_WHITELIST = [
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user