Refactor: improve stable sort domains

This commit is contained in:
SukkaW 2024-05-11 01:03:39 +08:00
parent da58e78e69
commit 22d738d99d
6 changed files with 102 additions and 64 deletions

View File

@ -39,8 +39,7 @@ export const buildInternalCDNDomains = task(import.meta.path, async (span) => {
const proxySet = new Set<string>();
const proxyKeywords = new Set<string>();
const gorhill = (await Promise.all([
getGorhillPublicSuffixPromise(),
await Promise.all([
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/cdn.conf'), proxySet, proxyKeywords),
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global.conf'), proxySet, proxyKeywords),
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/my_proxy.conf'), proxySet, proxyKeywords),
@ -49,12 +48,12 @@ export const buildInternalCDNDomains = task(import.meta.path, async (span) => {
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/telegram.conf'), proxySet, proxyKeywords),
processLocalDomainSet(path.resolve(import.meta.dir, '../List/domainset/cdn.conf'), proxySet),
processLocalDomainSet(path.resolve(import.meta.dir, '../List/domainset/download.conf'), proxySet)
]))[0];
]);
return compareAndWriteFile(
span,
[
...sortDomains(domainDeduper(Array.from(proxySet)), gorhill).map(i => `SUFFIX,${i}`),
...sortDomains(domainDeduper(Array.from(proxySet))).map(i => `SUFFIX,${i}`),
...sort(Array.from(proxyKeywords)).map(i => `REGEX,${i}`)
],
path.resolve(import.meta.dir, '../Internal/cdn.txt')

View File

@ -21,8 +21,6 @@ import { setAddFromArray } from './lib/set-add-from-array';
import { sort } from './lib/timsort';
export const buildRejectDomainSet = task(import.meta.path, async (span) => {
const gorhill = await getGorhillPublicSuffixPromise();
/** Whitelists */
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
@ -178,7 +176,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
'Sukka\'s Ruleset - Reject Base',
description,
new Date(),
span.traceChildSync('sort reject domainset', () => sortDomains(dudupedDominArray, gorhill)),
span.traceChildSync('sort reject domainset', () => sortDomains(dudupedDominArray)),
'domainset',
path.resolve(import.meta.dir, '../List/domainset/reject.conf'),
path.resolve(import.meta.dir, '../Clash/domainset/reject.txt')

View File

@ -83,16 +83,16 @@ const querySpeedtestApi = async (keyword: string): Promise<Array<string | null>>
}
};
// const getPreviousSpeedtestDomainsPromise = createMemoizedPromise(async () => {
// const domains = new Set<string>();
// for await (const l of await fetchRemoteTextByLine('https://ruleset.skk.moe/List/domainset/speedtest.conf')) {
// const line = processLine(l);
// if (line) {
// domains.add(line);
// }
// }
// return domains;
// });
const getPreviousSpeedtestDomainsPromise = createMemoizedPromise(async () => {
const domains = new Set<string>();
for await (const l of await fetchRemoteTextByLine('https://ruleset.skk.moe/List/domainset/speedtest.conf')) {
const line = processLine(l);
if (line) {
domains.add(line);
}
}
return domains;
});
export const buildSpeedtestDomainSet = task(import.meta.path, async (span) => {
// Predefined domainset
@ -183,9 +183,9 @@ export const buildSpeedtestDomainSet = task(import.meta.path, async (span) => {
'mensura.cdn-apple.com' // From netQuality command
]);
// await span.traceChildAsync('fetch previous speedtest domainset', async () => {
// SetHelpers.add(domains, await getPreviousSpeedtestDomainsPromise());
// });
await span.traceChildAsync('fetch previous speedtest domainset', async () => {
SetHelpers.add(domains, await getPreviousSpeedtestDomainsPromise());
});
await new Promise<void>((resolve) => {
const pMap = ([
@ -245,8 +245,7 @@ export const buildSpeedtestDomainSet = task(import.meta.path, async (span) => {
});
});
const gorhill = await getGorhillPublicSuffixPromise();
const deduped = span.traceChildSync('sort result', () => sortDomains(domainDeduper(Array.from(domains)), gorhill));
const deduped = span.traceChildSync('sort result', () => sortDomains(domainDeduper(Array.from(domains))));
const description = [
...SHARED_DESCRIPTION,

View File

@ -7,10 +7,9 @@ import { bench, group, run } from 'mitata';
(async () => {
const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt'));
const gorhill = await getGorhillPublicSuffixPromise();
group('sortDomains', () => {
bench('run', () => sortDomains(data, gorhill));
bench('run', () => sortDomains(data));
});
run();

View File

@ -1,51 +1,92 @@
// eslint-disable-next-line import-x/no-unresolved -- bun
import { describe, expect, it } from 'bun:test';
import { sortDomains } from './stable-sort-domain';
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
import { compare, sortDomains } from './stable-sort-domain';
describe('compare', () => {
it('basic', () => {
expect(
compare('.s3-website.ap-northeast-3.amazonaws.com', '.s3.dualstack.ap-south-1.amazonaws.com')
).toBe(1);
});
it('basic', () => {
expect(
compare('.s3-website.ap-northeast-3.amazonaws.com', '.s3.dualstack.ap-south-1.amazonaws.com')
).toBe(1);
});
});
describe('sortDomains', () => {
it('basic', async () => {
const gorhill = await getGorhillPublicSuffixPromise();
expect(
sortDomains([
it('basic', () => {
expect(sortDomains([
'.s3-website.ap-northeast-3.amazonaws.com',
'.s3.dualstack.ap-south-1.amazonaws.com',
'.s3-website.af-south-1.amazonaws.com'
], gorhill)
).toStrictEqual(
sortDomains([
])).toStrictEqual([
'.s3-website.af-south-1.amazonaws.com',
'.s3.dualstack.ap-south-1.amazonaws.com',
'.s3-website.ap-northeast-3.amazonaws.com'
]);
expect(sortDomains([
'.s3.dualstack.ap-south-1.amazonaws.com',
'.s3-website.ap-northeast-3.amazonaws.com',
'.s3-website.af-south-1.amazonaws.com'
], gorhill)
);
])).toStrictEqual([
'.s3-website.af-south-1.amazonaws.com',
'.s3.dualstack.ap-south-1.amazonaws.com',
'.s3-website.ap-northeast-3.amazonaws.com'
]);
expect(
sortDomains([
expect(sortDomains([
'.s3-website-us-west-2.amazonaws.com',
'.s3-1.amazonaws.com'
], gorhill)
).toStrictEqual(
sortDomains([
])).toStrictEqual([
'.s3-1.amazonaws.com',
'.s3-website-us-west-2.amazonaws.com'
], gorhill)
);
]);
expect(sortDomains([
'.s3-1.amazonaws.com',
'.s3-website-us-west-2.amazonaws.com'
])).toStrictEqual([
'.s3-1.amazonaws.com',
'.s3-website-us-west-2.amazonaws.com'
]);
expect(
sortDomains([
'.s3-deprecated.us-west-2.amazonaws.com',
'.s3-accesspoint.dualstack.us-west-2.amazonaws.com',
'.s3.dualstack.us-west-2.amazonaws.com'
], gorhill)
).toStrictEqual(
sortDomains([
'.s3-accesspoint.dualstack.us-west-2.amazonaws.com',
])
).toStrictEqual([
'.s3.dualstack.us-west-2.amazonaws.com',
'.s3-deprecated.us-west-2.amazonaws.com'
], gorhill)
);
'.s3-deprecated.us-west-2.amazonaws.com',
'.s3-accesspoint.dualstack.us-west-2.amazonaws.com'
]);
expect(
sortDomains([
'.s3-deprecated.us-west-2.amazonaws.com',
'.s3-accesspoint.dualstack.us-west-2.amazonaws.com',
'.s3.dualstack.us-west-2.amazonaws.com'
])
).toStrictEqual([
'.s3.dualstack.us-west-2.amazonaws.com',
'.s3-deprecated.us-west-2.amazonaws.com',
'.s3-accesspoint.dualstack.us-west-2.amazonaws.com'
]);
expect(
sortDomains([
'.ec2-25-58-215-234.us-east-2.compute.amazonaws.com',
'.ec2-13-58-215-234.us-east-2.compute.amazonaws.com'
])
).toStrictEqual([
'.ec2-13-58-215-234.us-east-2.compute.amazonaws.com',
'.ec2-25-58-215-234.us-east-2.compute.amazonaws.com'
]);
});
});

View File

@ -1,7 +1,7 @@
import type { PublicSuffixList } from '@gorhill/publicsuffixlist';
import * as tldts from 'tldts';
import { sort } from './timsort';
const compare = (a: string, b: string) => {
export const compare = (a: string, b: string) => {
if (a === b) return 0;
const aLen = a.length;
@ -27,11 +27,13 @@ const compare = (a: string, b: string) => {
return 0;
};
export const sortDomains = (inputs: string[], gorhill: PublicSuffixList) => {
const tldtsOpt = { allowPrivateDomains: false, detectIp: false, validateHostname: false };
export const sortDomains = (inputs: string[]) => {
const domains = inputs.reduce<Map<string, string>>((acc, cur) => {
if (!acc.has(cur)) {
const topD = gorhill.getDomain(cur[0] === '.' ? cur.slice(1) : cur);
acc.set(cur, topD);
const topD = tldts.getDomain(cur, tldtsOpt);
acc.set(cur, topD ?? cur);
};
return acc;
}, new Map());
@ -42,7 +44,7 @@ export const sortDomains = (inputs: string[], gorhill: PublicSuffixList) => {
const $a = domains.get(a)!;
const $b = domains.get(b)!;
if ($a === a && $b === b) {
if (a === $a && b === $b) {
return compare(a, b);
}
return compare($a, $b) || compare(a, b);