mirror of
https://github.com/SukkaW/Surge.git
synced 2025-12-12 01:00:34 +08:00
Perf/Refactor: processDomainList now returns string[]
This commit is contained in:
parent
816bb9ce2f
commit
21a31e6c1f
@ -19,7 +19,7 @@ import { SHARED_DESCRIPTION } from './lib/constants';
|
||||
import { getPhishingDomains } from './lib/get-phishing-domains';
|
||||
|
||||
import { add as SetAdd, subtract as SetSubstract } from 'mnemonist/set';
|
||||
import { setAddFromArray } from './lib/set-add-from-array';
|
||||
import { setAddFromArray, setAddFromArrayCurried } from './lib/set-add-from-array';
|
||||
import { sort } from './lib/timsort';
|
||||
|
||||
export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
||||
@ -38,7 +38,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
||||
// Parse from remote hosts & domain lists
|
||||
...HOSTS.map(entry => processHosts(childSpan, entry[0], entry[1], entry[2], entry[3]).then(hosts => SetAdd(domainSets, hosts))),
|
||||
|
||||
...DOMAIN_LISTS.map(entry => processDomainLists(childSpan, entry[0], entry[1], entry[2]).then(hosts => SetAdd(domainSets, hosts))),
|
||||
...DOMAIN_LISTS.map(entry => processDomainLists(childSpan, entry[0], entry[1], entry[2]).then(setAddFromArrayCurried(domainSets))),
|
||||
|
||||
...ADGUARD_FILTERS.map(input => (
|
||||
typeof input === 'string'
|
||||
@ -60,13 +60,9 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
|
||||
setAddFromArray(filterRuleWhitelistDomainSets, white);
|
||||
setAddFromArray(filterRuleWhitelistDomainSets, black);
|
||||
}))),
|
||||
getPhishingDomains(childSpan).then(([purePhishingDomains, fullPhishingDomainSet]) => {
|
||||
SetAdd(domainSets, fullPhishingDomainSet);
|
||||
setAddFromArray(domainSets, purePhishingDomains);
|
||||
}),
|
||||
childSpan.traceChildAsync('process reject_sukka.conf', async () => {
|
||||
setAddFromArray(domainSets, await readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/domainset/reject_sukka.conf')));
|
||||
})
|
||||
getPhishingDomains(childSpan).then(setAddFromArrayCurried(domainSets)),
|
||||
childSpan.traceChildAsync('process reject_sukka.conf', () => readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/domainset/reject_sukka.conf'))
|
||||
.then(setAddFromArrayCurried(domainSets)))
|
||||
]);
|
||||
// eslint-disable-next-line sukka/no-single-return -- not single return
|
||||
return shouldStop;
|
||||
|
||||
@ -5,6 +5,7 @@ import { TTL } from './cache-filesystem';
|
||||
|
||||
import { add as SetAdd } from 'mnemonist/set';
|
||||
import type { Span } from '../trace';
|
||||
import { appendArrayInPlace } from './append-array-in-place';
|
||||
|
||||
const BLACK_TLD = new Set([
|
||||
'accountant',
|
||||
@ -92,13 +93,13 @@ const BLACK_TLD = new Set([
|
||||
export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('get phishing domains').traceAsyncFn(async (span) => {
|
||||
const gorhill = await getGorhillPublicSuffixPromise();
|
||||
|
||||
const domainSet = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => {
|
||||
const domainArr = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => {
|
||||
const [domainSet, domainSet2] = await Promise.all([
|
||||
processDomainLists(curSpan, 'https://curbengh.github.io/phishing-filter/phishing-filter-domains.txt', true, TTL.THREE_HOURS()),
|
||||
processDomainLists(curSpan, 'https://phishing.army/download/phishing_army_blocklist.txt', true, TTL.THREE_HOURS())
|
||||
]);
|
||||
|
||||
SetAdd(domainSet, domainSet2);
|
||||
appendArrayInPlace(domainSet, domainSet2);
|
||||
|
||||
return domainSet;
|
||||
});
|
||||
@ -106,8 +107,6 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
||||
const domainCountMap: Record<string, number> = {};
|
||||
|
||||
span.traceChildSync('process phishing domain set', () => {
|
||||
const domainArr = Array.from(domainSet);
|
||||
|
||||
for (let i = 0, len = domainArr.length; i < len; i++) {
|
||||
const line = domainArr[i];
|
||||
|
||||
@ -126,17 +125,15 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
|
||||
}
|
||||
});
|
||||
|
||||
const results = span.traceChildSync('get final phishing results', () => {
|
||||
const res: string[] = [];
|
||||
span.traceChildSync('get final phishing results', () => {
|
||||
for (const domain in domainCountMap) {
|
||||
if (domainCountMap[domain] >= 8) {
|
||||
res.push(`.${domain}`);
|
||||
domainArr.push(`.${domain}`);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
});
|
||||
|
||||
return [results, domainSet] as const;
|
||||
return domainArr;
|
||||
});
|
||||
|
||||
export function calcDomainAbuseScore(line: string) {
|
||||
|
||||
@ -8,7 +8,7 @@ import type { PublicSuffixList } from '@gorhill/publicsuffixlist';
|
||||
import picocolors from 'picocolors';
|
||||
import { normalizeDomain } from './normalize-domain';
|
||||
import { fetchAssets } from './fetch-assets';
|
||||
import { deserializeSet, fsFetchCache, serializeSet } from './cache-filesystem';
|
||||
import { deserializeArray, deserializeSet, fsFetchCache, serializeArray, serializeSet } from './cache-filesystem';
|
||||
import type { Span } from '../trace';
|
||||
import createKeywordFilter from './aho-corasick';
|
||||
|
||||
@ -20,7 +20,7 @@ export function processDomainLists(span: Span, domainListsUrl: string, includeAl
|
||||
return span.traceChild(`process domainlist: ${domainListsUrl}`).traceAsyncFn(() => fsFetchCache.apply(
|
||||
domainListsUrl,
|
||||
async () => {
|
||||
const domainSets = new Set<string>();
|
||||
const domainSets: string[] = [];
|
||||
|
||||
for await (const line of await fetchRemoteTextByLine(domainListsUrl)) {
|
||||
let domainToAdd = processLine(line);
|
||||
@ -33,7 +33,7 @@ export function processDomainLists(span: Span, domainListsUrl: string, includeAl
|
||||
foundDebugDomain = true;
|
||||
}
|
||||
|
||||
domainSets.add(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd);
|
||||
domainSets.push(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd);
|
||||
}
|
||||
|
||||
return domainSets;
|
||||
@ -41,8 +41,8 @@ export function processDomainLists(span: Span, domainListsUrl: string, includeAl
|
||||
{
|
||||
ttl,
|
||||
temporaryBypass,
|
||||
serializer: serializeSet,
|
||||
deserializer: deserializeSet
|
||||
serializer: serializeArray,
|
||||
deserializer: deserializeArray
|
||||
}
|
||||
));
|
||||
}
|
||||
|
||||
26
Build/lib/set-add-from-array.bench.ts
Normal file
26
Build/lib/set-add-from-array.bench.ts
Normal file
@ -0,0 +1,26 @@
|
||||
import { fetchRemoteTextByLine } from './fetch-text-by-line';
|
||||
import { processLineFromReadline } from './process-line';
|
||||
|
||||
import { bench, group, run } from 'mitata';
|
||||
|
||||
(async () => {
|
||||
const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt'));
|
||||
|
||||
group('setAddFromArray', () => {
|
||||
bench('run', () => {
|
||||
const set = new Set(['1', '2', '1', '3', 'skk.moe']);
|
||||
for (let i = 0, len = data.length; i < len; i++) {
|
||||
set.add(data[i]);
|
||||
}
|
||||
});
|
||||
});
|
||||
group('setAddFromArray', () => {
|
||||
bench('run', () => {
|
||||
const set = new Set(['1', '2', '1', '3', 'skk.moe']);
|
||||
// eslint-disable-next-line @typescript-eslint/unbound-method -- thisArg is passed
|
||||
data.forEach(set.add, set);
|
||||
});
|
||||
});
|
||||
|
||||
run();
|
||||
})();
|
||||
@ -2,7 +2,12 @@
|
||||
* In-place adding of elements from an array to a set.
|
||||
*/
|
||||
export function setAddFromArray<T>(set: Set<T>, arr: T[]): void {
|
||||
for (let i = 0, len = arr.length; i < len; i++) {
|
||||
set.add(arr[i]);
|
||||
}
|
||||
// for (let i = 0, len = arr.length; i < len; i++) {
|
||||
// set.add(arr[i]);
|
||||
// }
|
||||
// eslint-disable-next-line @typescript-eslint/unbound-method -- thisArg is passed
|
||||
arr.forEach(set.add, set);
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/unbound-method -- thisArg is passed
|
||||
export const setAddFromArrayCurried = <T>(set: Set<T>) => (arr: T[]) => arr.forEach(set.add, set);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user