Refactor: improve stable sort domains

This commit is contained in:
SukkaW
2024-05-11 01:03:39 +08:00
parent da58e78e69
commit 22d738d99d
6 changed files with 102 additions and 64 deletions

View File

@@ -7,10 +7,9 @@ import { bench, group, run } from 'mitata';
(async () => {
const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt'));
const gorhill = await getGorhillPublicSuffixPromise();
group('sortDomains', () => {
bench('run', () => sortDomains(data, gorhill));
bench('run', () => sortDomains(data));
});
run();

View File

@@ -1,51 +1,92 @@
// eslint-disable-next-line import-x/no-unresolved -- bun
import { describe, expect, it } from 'bun:test';
import { sortDomains } from './stable-sort-domain';
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
import { compare, sortDomains } from './stable-sort-domain';
describe('compare', () => {
it('basic', () => {
expect(
compare('.s3-website.ap-northeast-3.amazonaws.com', '.s3.dualstack.ap-south-1.amazonaws.com')
).toBe(1);
});
it('basic', () => {
expect(
compare('.s3-website.ap-northeast-3.amazonaws.com', '.s3.dualstack.ap-south-1.amazonaws.com')
).toBe(1);
});
});
describe('sortDomains', () => {
it('basic', async () => {
const gorhill = await getGorhillPublicSuffixPromise();
it('basic', () => {
expect(sortDomains([
'.s3-website.ap-northeast-3.amazonaws.com',
'.s3.dualstack.ap-south-1.amazonaws.com',
'.s3-website.af-south-1.amazonaws.com'
])).toStrictEqual([
'.s3-website.af-south-1.amazonaws.com',
'.s3.dualstack.ap-south-1.amazonaws.com',
'.s3-website.ap-northeast-3.amazonaws.com'
]);
expect(
sortDomains([
'.s3-website.ap-northeast-3.amazonaws.com',
'.s3.dualstack.ap-south-1.amazonaws.com',
'.s3-website.af-south-1.amazonaws.com'
], gorhill)
).toStrictEqual(
sortDomains([
'.s3.dualstack.ap-south-1.amazonaws.com',
'.s3-website.ap-northeast-3.amazonaws.com',
'.s3-website.af-south-1.amazonaws.com'
], gorhill)
);
expect(sortDomains([
'.s3.dualstack.ap-south-1.amazonaws.com',
'.s3-website.ap-northeast-3.amazonaws.com',
'.s3-website.af-south-1.amazonaws.com'
])).toStrictEqual([
'.s3-website.af-south-1.amazonaws.com',
'.s3.dualstack.ap-south-1.amazonaws.com',
'.s3-website.ap-northeast-3.amazonaws.com'
]);
expect(
sortDomains([
'.s3-website-us-west-2.amazonaws.com',
'.s3-1.amazonaws.com'
], gorhill)
).toStrictEqual(
sortDomains([
'.s3-1.amazonaws.com',
'.s3-website-us-west-2.amazonaws.com'
], gorhill)
);
expect(sortDomains([
'.s3-website-us-west-2.amazonaws.com',
'.s3-1.amazonaws.com'
])).toStrictEqual([
'.s3-1.amazonaws.com',
'.s3-website-us-west-2.amazonaws.com'
]);
expect(sortDomains([
'.s3-1.amazonaws.com',
'.s3-website-us-west-2.amazonaws.com'
])).toStrictEqual([
'.s3-1.amazonaws.com',
'.s3-website-us-west-2.amazonaws.com'
]);
expect(
sortDomains([
'.s3-deprecated.us-west-2.amazonaws.com',
'.s3-accesspoint.dualstack.us-west-2.amazonaws.com',
'.s3.dualstack.us-west-2.amazonaws.com'
], gorhill)
).toStrictEqual(
])
).toStrictEqual([
'.s3.dualstack.us-west-2.amazonaws.com',
'.s3-deprecated.us-west-2.amazonaws.com',
'.s3-accesspoint.dualstack.us-west-2.amazonaws.com'
]);
expect(
sortDomains([
'.s3-deprecated.us-west-2.amazonaws.com',
'.s3-accesspoint.dualstack.us-west-2.amazonaws.com',
'.s3.dualstack.us-west-2.amazonaws.com',
'.s3-deprecated.us-west-2.amazonaws.com'
], gorhill)
);
'.s3.dualstack.us-west-2.amazonaws.com'
])
).toStrictEqual([
'.s3.dualstack.us-west-2.amazonaws.com',
'.s3-deprecated.us-west-2.amazonaws.com',
'.s3-accesspoint.dualstack.us-west-2.amazonaws.com'
]);
expect(
sortDomains([
'.ec2-25-58-215-234.us-east-2.compute.amazonaws.com',
'.ec2-13-58-215-234.us-east-2.compute.amazonaws.com'
])
).toStrictEqual([
'.ec2-13-58-215-234.us-east-2.compute.amazonaws.com',
'.ec2-25-58-215-234.us-east-2.compute.amazonaws.com'
]);
});
});

View File

@@ -1,7 +1,7 @@
import type { PublicSuffixList } from '@gorhill/publicsuffixlist';
import * as tldts from 'tldts';
import { sort } from './timsort';
const compare = (a: string, b: string) => {
export const compare = (a: string, b: string) => {
if (a === b) return 0;
const aLen = a.length;
@@ -27,11 +27,13 @@ const compare = (a: string, b: string) => {
return 0;
};
export const sortDomains = (inputs: string[], gorhill: PublicSuffixList) => {
const tldtsOpt = { allowPrivateDomains: false, detectIp: false, validateHostname: false };
export const sortDomains = (inputs: string[]) => {
const domains = inputs.reduce<Map<string, string>>((acc, cur) => {
if (!acc.has(cur)) {
const topD = gorhill.getDomain(cur[0] === '.' ? cur.slice(1) : cur);
acc.set(cur, topD);
const topD = tldts.getDomain(cur, tldtsOpt);
acc.set(cur, topD ?? cur);
};
return acc;
}, new Map());
@@ -42,7 +44,7 @@ export const sortDomains = (inputs: string[], gorhill: PublicSuffixList) => {
const $a = domains.get(a)!;
const $b = domains.get(b)!;
if ($a === a && $b === b) {
if (a === $a && b === $b) {
return compare(a, b);
}
return compare($a, $b) || compare(a, b);