mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 17:20:35 +08:00
Perf: improve performance of reject suffix/keyword deduping
This commit is contained in:
parent
80deff88f9
commit
725f26b428
@ -109,7 +109,7 @@ async function transformDomainset(parentSpan: Span, sourcePath: string, relative
|
|||||||
)
|
)
|
||||||
];
|
];
|
||||||
|
|
||||||
return createRuleset(
|
return span.traceAsyncFn(() => createRuleset(
|
||||||
span,
|
span,
|
||||||
title,
|
title,
|
||||||
description,
|
description,
|
||||||
@ -118,7 +118,7 @@ async function transformDomainset(parentSpan: Span, sourcePath: string, relative
|
|||||||
'domainset',
|
'domainset',
|
||||||
path.resolve(outputSurgeDir, relativePath),
|
path.resolve(outputSurgeDir, relativePath),
|
||||||
path.resolve(outputClashDir, `${relativePath.slice(0, -path.extname(relativePath).length)}.txt`)
|
path.resolve(outputClashDir, `${relativePath.slice(0, -path.extname(relativePath).length)}.txt`)
|
||||||
);
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -4,12 +4,12 @@ import path from 'path';
|
|||||||
import { processHosts, processFilterRules, processDomainLists } from './lib/parse-filter';
|
import { processHosts, processFilterRules, processDomainLists } from './lib/parse-filter';
|
||||||
import { createTrie } from './lib/trie';
|
import { createTrie } from './lib/trie';
|
||||||
|
|
||||||
import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST, DOMAIN_LISTS } from './lib/reject-data-source';
|
import { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, DOMAIN_LISTS } from './lib/reject-data-source';
|
||||||
import { createRuleset, compareAndWriteFile } from './lib/create-file';
|
import { createRuleset, compareAndWriteFile } from './lib/create-file';
|
||||||
import { processLine } from './lib/process-line';
|
import { processLine } from './lib/process-line';
|
||||||
import { domainDeduper } from './lib/domain-deduper';
|
import { domainDeduper } from './lib/domain-deduper';
|
||||||
import createKeywordFilter from './lib/aho-corasick';
|
import createKeywordFilter from './lib/aho-corasick';
|
||||||
import { readFileByLine } from './lib/fetch-text-by-line';
|
import { readFileByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
|
||||||
import { sortDomains } from './lib/stable-sort-domain';
|
import { sortDomains } from './lib/stable-sort-domain';
|
||||||
import { task } from './trace';
|
import { task } from './trace';
|
||||||
import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
|
import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
|
||||||
@ -63,25 +63,10 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
|||||||
setAddFromArray(domainSets, purePhishingDomains);
|
setAddFromArray(domainSets, purePhishingDomains);
|
||||||
}),
|
}),
|
||||||
childSpan.traceChild('process reject_sukka.conf').traceAsyncFn(async () => {
|
childSpan.traceChild('process reject_sukka.conf').traceAsyncFn(async () => {
|
||||||
for await (const l of readFileByLine(path.resolve(import.meta.dir, '../Source/domainset/reject_sukka.conf'))) {
|
setAddFromArray(domainSets, await readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/domainset/reject_sukka.conf')));
|
||||||
const line = processLine(l);
|
|
||||||
if (!line) continue;
|
|
||||||
domainSets.add(line);
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
]);
|
]);
|
||||||
|
|
||||||
// remove pre-defined enforced blacklist from whitelist
|
|
||||||
const trie0 = createTrie(filterRuleWhitelistDomainSets);
|
|
||||||
|
|
||||||
for (let i = 0, len1 = PREDEFINED_ENFORCED_BACKLIST.length; i < len1; i++) {
|
|
||||||
const enforcedBlack = PREDEFINED_ENFORCED_BACKLIST[i];
|
|
||||||
const found = trie0.find(enforcedBlack);
|
|
||||||
for (let j = 0, len2 = found.length; j < len2; j++) {
|
|
||||||
filterRuleWhitelistDomainSets.delete(found[j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return shouldStop;
|
return shouldStop;
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -116,25 +101,22 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
|||||||
});
|
});
|
||||||
filterRuleWhitelistDomainSets.forEach(suffix => {
|
filterRuleWhitelistDomainSets.forEach(suffix => {
|
||||||
trie1.find(suffix, true).forEach(f => domainSets.delete(f));
|
trie1.find(suffix, true).forEach(f => domainSets.delete(f));
|
||||||
|
|
||||||
|
if (suffix[0] === '.') {
|
||||||
|
// handle case like removing `g.msn.com` due to white `.g.msn.com` (`@@||g.msn.com`)
|
||||||
|
domainSets.delete(suffix.slice(1));
|
||||||
|
} else {
|
||||||
|
// If `g.msn.com` is whitelisted, then `.g.msn.com` should be removed from domain set
|
||||||
|
domainSets.delete(`.${suffix}`);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// remove pre-defined enforced blacklist from whitelist
|
// remove pre-defined enforced blacklist from whitelist
|
||||||
const kwfilter = createKeywordFilter(domainKeywordsSet);
|
const kwfilter = createKeywordFilter(domainKeywordsSet);
|
||||||
|
|
||||||
// handle case like removing `g.msn.com` due to white `.g.msn.com` (`@@||g.msn.com`)
|
|
||||||
for (const domain of domainSets) {
|
for (const domain of domainSets) {
|
||||||
if (domain[0] === '.') {
|
|
||||||
if (filterRuleWhitelistDomainSets.has(domain)) {
|
|
||||||
domainSets.delete(domain);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
} else if (filterRuleWhitelistDomainSets.has(`.${domain}`)) {
|
|
||||||
domainSets.delete(domain);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove keyword
|
// Remove keyword
|
||||||
if (kwfilter.search(domain)) {
|
if (kwfilter(domain)) {
|
||||||
domainSets.delete(domain);
|
domainSets.delete(domain);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -77,7 +77,7 @@ const createKeywordFilter = (keys: string[] | Set<string>) => {
|
|||||||
|
|
||||||
build();
|
build();
|
||||||
|
|
||||||
const search = (text: string) => {
|
return (text: string) => {
|
||||||
let node: Node | undefined = root;
|
let node: Node | undefined = root;
|
||||||
|
|
||||||
for (let i = 0, textLen = text.length; i < textLen; i++) {
|
for (let i = 0, textLen = text.length; i < textLen; i++) {
|
||||||
@ -96,10 +96,6 @@ const createKeywordFilter = (keys: string[] | Set<string>) => {
|
|||||||
|
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
|
||||||
return {
|
|
||||||
search
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
export default createKeywordFilter;
|
export default createKeywordFilter;
|
||||||
|
|||||||
@ -99,9 +99,10 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
|||||||
SetAdd(domainSet, domainSet2);
|
SetAdd(domainSet, domainSet2);
|
||||||
}
|
}
|
||||||
|
|
||||||
span.traceChild('whitelisting phishing domains').traceSyncFn(() => {
|
span.traceChild('whitelisting phishing domains').traceSyncFn((parentSpan) => {
|
||||||
const trieForRemovingWhiteListed = createTrie(domainSet);
|
const trieForRemovingWhiteListed = parentSpan.traceChild('create trie for whitelisting').traceSyncFn(() => createTrie(domainSet));
|
||||||
|
|
||||||
|
return parentSpan.traceChild('delete whitelisted from domainset').traceSyncFn(() => {
|
||||||
for (let i = 0, len = WHITELIST_DOMAIN.length; i < len; i++) {
|
for (let i = 0, len = WHITELIST_DOMAIN.length; i < len; i++) {
|
||||||
const white = WHITELIST_DOMAIN[i];
|
const white = WHITELIST_DOMAIN[i];
|
||||||
const found = trieForRemovingWhiteListed.find(`.${white}`, true);
|
const found = trieForRemovingWhiteListed.find(`.${white}`, true);
|
||||||
@ -111,6 +112,7 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
|||||||
domainSet.delete(white);
|
domainSet.delete(white);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
});
|
||||||
|
|
||||||
const domainCountMap: Record<string, number> = {};
|
const domainCountMap: Record<string, number> = {};
|
||||||
const getDomain = createCachedGorhillGetDomain(gorhill);
|
const getDomain = createCachedGorhillGetDomain(gorhill);
|
||||||
@ -177,11 +179,15 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
const results = span.traceChild('get final phishing results').traceSyncFn(
|
const results = span.traceChild('get final phishing results').traceSyncFn(() => {
|
||||||
() => Object.entries(domainCountMap)
|
const results: string[] = [];
|
||||||
.filter(entries => entries[1] >= 5)
|
for (const domain in domainCountMap) {
|
||||||
.map(entries => entries[0])
|
if (domainCountMap[domain] > 5) {
|
||||||
);
|
results.push(domain);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
});
|
||||||
|
|
||||||
return [results, domainSet] as const;
|
return [results, domainSet] as const;
|
||||||
});
|
});
|
||||||
|
|||||||
@ -211,10 +211,6 @@ export const PREDEFINED_WHITELIST = [
|
|||||||
'pstmrk.it'
|
'pstmrk.it'
|
||||||
];
|
];
|
||||||
|
|
||||||
export const PREDEFINED_ENFORCED_BACKLIST = [
|
|
||||||
'telemetry.mozilla.org'
|
|
||||||
];
|
|
||||||
|
|
||||||
export const PREDEFINED_ENFORCED_WHITELIST = [
|
export const PREDEFINED_ENFORCED_WHITELIST = [
|
||||||
'godaddysites.com',
|
'godaddysites.com',
|
||||||
'web.app',
|
'web.app',
|
||||||
|
|||||||
@ -43,6 +43,10 @@ DOMAIN-SUFFIX,pantheonsite.io
|
|||||||
DOMAIN-SUFFIX,sitebeat.crazydomains.com
|
DOMAIN-SUFFIX,sitebeat.crazydomains.com
|
||||||
# >> Snowplow Analytics (publicsuffix)
|
# >> Snowplow Analytics (publicsuffix)
|
||||||
DOMAIN-SUFFIX,try-snowplow.com
|
DOMAIN-SUFFIX,try-snowplow.com
|
||||||
|
# >> Mozilla Telemetry (Enforcing)
|
||||||
|
DOMAIN-SUFFIX,telemetry-coverage.mozilla.org
|
||||||
|
DOMAIN-SUFFIX,telemetry.mozilla.org
|
||||||
|
DOMAIN-SUFFIX,incoming-telemetry.thunderbird.net
|
||||||
|
|
||||||
# >> Phishing
|
# >> Phishing
|
||||||
DOMAIN-SUFFIX,gofenews.com
|
DOMAIN-SUFFIX,gofenews.com
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user