Refactor: improve stable sort domains

This commit is contained in:
SukkaW 2024-05-11 01:03:39 +08:00
parent da58e78e69
commit 22d738d99d
6 changed files with 102 additions and 64 deletions

View File

@ -39,8 +39,7 @@ export const buildInternalCDNDomains = task(import.meta.path, async (span) => {
const proxySet = new Set<string>(); const proxySet = new Set<string>();
const proxyKeywords = new Set<string>(); const proxyKeywords = new Set<string>();
const gorhill = (await Promise.all([ await Promise.all([
getGorhillPublicSuffixPromise(),
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/cdn.conf'), proxySet, proxyKeywords), processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/cdn.conf'), proxySet, proxyKeywords),
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global.conf'), proxySet, proxyKeywords), processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global.conf'), proxySet, proxyKeywords),
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/my_proxy.conf'), proxySet, proxyKeywords), processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/my_proxy.conf'), proxySet, proxyKeywords),
@ -49,12 +48,12 @@ export const buildInternalCDNDomains = task(import.meta.path, async (span) => {
processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/telegram.conf'), proxySet, proxyKeywords), processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/telegram.conf'), proxySet, proxyKeywords),
processLocalDomainSet(path.resolve(import.meta.dir, '../List/domainset/cdn.conf'), proxySet), processLocalDomainSet(path.resolve(import.meta.dir, '../List/domainset/cdn.conf'), proxySet),
processLocalDomainSet(path.resolve(import.meta.dir, '../List/domainset/download.conf'), proxySet) processLocalDomainSet(path.resolve(import.meta.dir, '../List/domainset/download.conf'), proxySet)
]))[0]; ]);
return compareAndWriteFile( return compareAndWriteFile(
span, span,
[ [
...sortDomains(domainDeduper(Array.from(proxySet)), gorhill).map(i => `SUFFIX,${i}`), ...sortDomains(domainDeduper(Array.from(proxySet))).map(i => `SUFFIX,${i}`),
...sort(Array.from(proxyKeywords)).map(i => `REGEX,${i}`) ...sort(Array.from(proxyKeywords)).map(i => `REGEX,${i}`)
], ],
path.resolve(import.meta.dir, '../Internal/cdn.txt') path.resolve(import.meta.dir, '../Internal/cdn.txt')

View File

@ -21,8 +21,6 @@ import { setAddFromArray } from './lib/set-add-from-array';
import { sort } from './lib/timsort'; import { sort } from './lib/timsort';
export const buildRejectDomainSet = task(import.meta.path, async (span) => { export const buildRejectDomainSet = task(import.meta.path, async (span) => {
const gorhill = await getGorhillPublicSuffixPromise();
/** Whitelists */ /** Whitelists */
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST); const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
@ -178,7 +176,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
'Sukka\'s Ruleset - Reject Base', 'Sukka\'s Ruleset - Reject Base',
description, description,
new Date(), new Date(),
span.traceChildSync('sort reject domainset', () => sortDomains(dudupedDominArray, gorhill)), span.traceChildSync('sort reject domainset', () => sortDomains(dudupedDominArray)),
'domainset', 'domainset',
path.resolve(import.meta.dir, '../List/domainset/reject.conf'), path.resolve(import.meta.dir, '../List/domainset/reject.conf'),
path.resolve(import.meta.dir, '../Clash/domainset/reject.txt') path.resolve(import.meta.dir, '../Clash/domainset/reject.txt')

View File

@ -83,16 +83,16 @@ const querySpeedtestApi = async (keyword: string): Promise<Array<string | null>>
} }
}; };
// const getPreviousSpeedtestDomainsPromise = createMemoizedPromise(async () => { const getPreviousSpeedtestDomainsPromise = createMemoizedPromise(async () => {
// const domains = new Set<string>(); const domains = new Set<string>();
// for await (const l of await fetchRemoteTextByLine('https://ruleset.skk.moe/List/domainset/speedtest.conf')) { for await (const l of await fetchRemoteTextByLine('https://ruleset.skk.moe/List/domainset/speedtest.conf')) {
// const line = processLine(l); const line = processLine(l);
// if (line) { if (line) {
// domains.add(line); domains.add(line);
// } }
// } }
// return domains; return domains;
// }); });
export const buildSpeedtestDomainSet = task(import.meta.path, async (span) => { export const buildSpeedtestDomainSet = task(import.meta.path, async (span) => {
// Predefined domainset // Predefined domainset
@ -183,9 +183,9 @@ export const buildSpeedtestDomainSet = task(import.meta.path, async (span) => {
'mensura.cdn-apple.com' // From netQuality command 'mensura.cdn-apple.com' // From netQuality command
]); ]);
// await span.traceChildAsync('fetch previous speedtest domainset', async () => { await span.traceChildAsync('fetch previous speedtest domainset', async () => {
// SetHelpers.add(domains, await getPreviousSpeedtestDomainsPromise()); SetHelpers.add(domains, await getPreviousSpeedtestDomainsPromise());
// }); });
await new Promise<void>((resolve) => { await new Promise<void>((resolve) => {
const pMap = ([ const pMap = ([
@ -245,8 +245,7 @@ export const buildSpeedtestDomainSet = task(import.meta.path, async (span) => {
}); });
}); });
const gorhill = await getGorhillPublicSuffixPromise(); const deduped = span.traceChildSync('sort result', () => sortDomains(domainDeduper(Array.from(domains))));
const deduped = span.traceChildSync('sort result', () => sortDomains(domainDeduper(Array.from(domains)), gorhill));
const description = [ const description = [
...SHARED_DESCRIPTION, ...SHARED_DESCRIPTION,

View File

@ -7,10 +7,9 @@ import { bench, group, run } from 'mitata';
(async () => { (async () => {
const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt')); const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt'));
const gorhill = await getGorhillPublicSuffixPromise();
group('sortDomains', () => { group('sortDomains', () => {
bench('run', () => sortDomains(data, gorhill)); bench('run', () => sortDomains(data));
}); });
run(); run();

View File

@ -1,51 +1,92 @@
// eslint-disable-next-line import-x/no-unresolved -- bun // eslint-disable-next-line import-x/no-unresolved -- bun
import { describe, expect, it } from 'bun:test'; import { describe, expect, it } from 'bun:test';
import { sortDomains } from './stable-sort-domain'; import { compare, sortDomains } from './stable-sort-domain';
import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
describe('compare', () => {
it('basic', () => {
expect(
compare('.s3-website.ap-northeast-3.amazonaws.com', '.s3.dualstack.ap-south-1.amazonaws.com')
).toBe(1);
});
it('basic', () => {
expect(
compare('.s3-website.ap-northeast-3.amazonaws.com', '.s3.dualstack.ap-south-1.amazonaws.com')
).toBe(1);
});
});
describe('sortDomains', () => { describe('sortDomains', () => {
it('basic', async () => { it('basic', () => {
const gorhill = await getGorhillPublicSuffixPromise(); expect(sortDomains([
expect(
sortDomains([
'.s3-website.ap-northeast-3.amazonaws.com', '.s3-website.ap-northeast-3.amazonaws.com',
'.s3.dualstack.ap-south-1.amazonaws.com', '.s3.dualstack.ap-south-1.amazonaws.com',
'.s3-website.af-south-1.amazonaws.com' '.s3-website.af-south-1.amazonaws.com'
], gorhill) ])).toStrictEqual([
).toStrictEqual( '.s3-website.af-south-1.amazonaws.com',
sortDomains([ '.s3.dualstack.ap-south-1.amazonaws.com',
'.s3-website.ap-northeast-3.amazonaws.com'
]);
expect(sortDomains([
'.s3.dualstack.ap-south-1.amazonaws.com', '.s3.dualstack.ap-south-1.amazonaws.com',
'.s3-website.ap-northeast-3.amazonaws.com', '.s3-website.ap-northeast-3.amazonaws.com',
'.s3-website.af-south-1.amazonaws.com' '.s3-website.af-south-1.amazonaws.com'
], gorhill) ])).toStrictEqual([
); '.s3-website.af-south-1.amazonaws.com',
'.s3.dualstack.ap-south-1.amazonaws.com',
'.s3-website.ap-northeast-3.amazonaws.com'
]);
expect( expect(sortDomains([
sortDomains([
'.s3-website-us-west-2.amazonaws.com', '.s3-website-us-west-2.amazonaws.com',
'.s3-1.amazonaws.com' '.s3-1.amazonaws.com'
], gorhill) ])).toStrictEqual([
).toStrictEqual(
sortDomains([
'.s3-1.amazonaws.com', '.s3-1.amazonaws.com',
'.s3-website-us-west-2.amazonaws.com' '.s3-website-us-west-2.amazonaws.com'
], gorhill) ]);
);
expect(sortDomains([
'.s3-1.amazonaws.com',
'.s3-website-us-west-2.amazonaws.com'
])).toStrictEqual([
'.s3-1.amazonaws.com',
'.s3-website-us-west-2.amazonaws.com'
]);
expect( expect(
sortDomains([ sortDomains([
'.s3-deprecated.us-west-2.amazonaws.com', '.s3-deprecated.us-west-2.amazonaws.com',
'.s3-accesspoint.dualstack.us-west-2.amazonaws.com', '.s3-accesspoint.dualstack.us-west-2.amazonaws.com',
'.s3.dualstack.us-west-2.amazonaws.com' '.s3.dualstack.us-west-2.amazonaws.com'
], gorhill) ])
).toStrictEqual( ).toStrictEqual([
sortDomains([
'.s3-accesspoint.dualstack.us-west-2.amazonaws.com',
'.s3.dualstack.us-west-2.amazonaws.com', '.s3.dualstack.us-west-2.amazonaws.com',
'.s3-deprecated.us-west-2.amazonaws.com' '.s3-deprecated.us-west-2.amazonaws.com',
], gorhill) '.s3-accesspoint.dualstack.us-west-2.amazonaws.com'
); ]);
expect(
sortDomains([
'.s3-deprecated.us-west-2.amazonaws.com',
'.s3-accesspoint.dualstack.us-west-2.amazonaws.com',
'.s3.dualstack.us-west-2.amazonaws.com'
])
).toStrictEqual([
'.s3.dualstack.us-west-2.amazonaws.com',
'.s3-deprecated.us-west-2.amazonaws.com',
'.s3-accesspoint.dualstack.us-west-2.amazonaws.com'
]);
expect(
sortDomains([
'.ec2-25-58-215-234.us-east-2.compute.amazonaws.com',
'.ec2-13-58-215-234.us-east-2.compute.amazonaws.com'
])
).toStrictEqual([
'.ec2-13-58-215-234.us-east-2.compute.amazonaws.com',
'.ec2-25-58-215-234.us-east-2.compute.amazonaws.com'
]);
}); });
}); });

View File

@ -1,7 +1,7 @@
import type { PublicSuffixList } from '@gorhill/publicsuffixlist'; import * as tldts from 'tldts';
import { sort } from './timsort'; import { sort } from './timsort';
const compare = (a: string, b: string) => { export const compare = (a: string, b: string) => {
if (a === b) return 0; if (a === b) return 0;
const aLen = a.length; const aLen = a.length;
@ -27,11 +27,13 @@ const compare = (a: string, b: string) => {
return 0; return 0;
}; };
export const sortDomains = (inputs: string[], gorhill: PublicSuffixList) => { const tldtsOpt = { allowPrivateDomains: false, detectIp: false, validateHostname: false };
export const sortDomains = (inputs: string[]) => {
const domains = inputs.reduce<Map<string, string>>((acc, cur) => { const domains = inputs.reduce<Map<string, string>>((acc, cur) => {
if (!acc.has(cur)) { if (!acc.has(cur)) {
const topD = gorhill.getDomain(cur[0] === '.' ? cur.slice(1) : cur); const topD = tldts.getDomain(cur, tldtsOpt);
acc.set(cur, topD); acc.set(cur, topD ?? cur);
}; };
return acc; return acc;
}, new Map()); }, new Map());
@ -42,7 +44,7 @@ export const sortDomains = (inputs: string[], gorhill: PublicSuffixList) => {
const $a = domains.get(a)!; const $a = domains.get(a)!;
const $b = domains.get(b)!; const $b = domains.get(b)!;
if ($a === a && $b === b) { if (a === $a && b === $b) {
return compare(a, b); return compare(a, b);
} }
return compare($a, $b) || compare(a, b); return compare($a, $b) || compare(a, b);