From 66b0a3697dc4f026960d53daa888cddd9913a242 Mon Sep 17 00:00:00 2001 From: SukkaW Date: Sat, 12 Oct 2024 12:37:45 +0800 Subject: [PATCH] Chore: a hash collision test --- Build/lib/string-hash.ts | 11 +++++++ Build/validate-hash-collision-test.ts | 46 +++++++++++++++++++++++++++ package.json | 1 + pnpm-lock.yaml | 8 +++++ 4 files changed, 66 insertions(+) create mode 100644 Build/validate-hash-collision-test.ts diff --git a/Build/lib/string-hash.ts b/Build/lib/string-hash.ts index 8084201e..355856d3 100644 --- a/Build/lib/string-hash.ts +++ b/Build/lib/string-hash.ts @@ -43,4 +43,15 @@ export function fnv1a52(str: string) { ); } +export function fnv1a(s: string) { + let h = 0x81_1C_9D_C5; + + for (let i = 0, l = s.length; i < l; i++) { + h ^= s.charCodeAt(i); + h += (h << 1) + (h << 4) + (h << 7) + (h << 8) + (h << 24); + } + + return (h >>> 0); +} + export const stringHash = (payload: string) => fnv1a52(payload).toString(36) + payload.length.toString(36); diff --git a/Build/validate-hash-collision-test.ts b/Build/validate-hash-collision-test.ts new file mode 100644 index 00000000..fbadb1de --- /dev/null +++ b/Build/validate-hash-collision-test.ts @@ -0,0 +1,46 @@ +/* eslint-disable no-await-in-loop -- no concurrent */ +import { fdir as Fdir } from 'fdir'; +import { OUTPUT_SURGE_DIR } from './constants/dir'; +import path from 'node:path'; +import { readFileIntoProcessedArray } from './lib/fetch-text-by-line'; +import { xxhash3 } from 'hash-wasm'; + +(async () => { + const hashMap = new Map>(); + + const runHash = async (inputs: string[]) => { + for (const input of inputs) { + const hash = await xxhash3(input); + if (!hashMap.has(hash)) { + hashMap.set(hash, new Set()); + } + hashMap.get(hash)!.add(input); + } + }; + + const files = await new Fdir() + .withRelativePaths() + .crawl(OUTPUT_SURGE_DIR) + .withPromise(); + + for (const file of files) { + const fullpath = path.join(OUTPUT_SURGE_DIR, file); + if (file.startsWith('domainset' + path.sep)) { + await runHash((await readFileIntoProcessedArray(fullpath)).map(i => (i[0] === '.' ? i.slice(1) : i))); + } else if (file.startsWith('non_ip' + path.sep)) { + await runHash((await readFileIntoProcessedArray(fullpath)).map(i => i.split(',')[1])); + } + } + + console.log(hashMap.size); + let collision = 0; + hashMap.forEach((v, k) => { + if (v.size > 1) { + collision++; + console.log(k, '=>', v); + } + }); + if (collision === 0) { + console.log(hashMap); + } +})(); diff --git a/package.json b/package.json index 4ac8ba3b..e3a25a4e 100644 --- a/package.json +++ b/package.json @@ -31,6 +31,7 @@ "fast-cidr-tools": "^0.3.1", "fdir": "^6.4.0", "foxact": "^0.2.38", + "hash-wasm": "^4.11.0", "json-stringify-pretty-compact": "^3.0.0", "make-fetch-happen": "^14.0.1", "mnemonist": "^0.39.8", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 171d9783..219b0dbf 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -44,6 +44,9 @@ importers: foxact: specifier: ^0.2.38 version: 0.2.38 + hash-wasm: + specifier: ^4.11.0 + version: 4.11.0 json-stringify-pretty-compact: specifier: ^3.0.0 version: 3.0.0 @@ -1162,6 +1165,9 @@ packages: resolution: {integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==} engines: {node: '>=8'} + hash-wasm@4.11.0: + resolution: {integrity: sha512-HVusNXlVqHe0fzIzdQOGolnFN6mX/fqcrSAOcTBXdvzrXVHwTz11vXeKRmkR5gTuwVpvHZEIyKoePDvuAR+XwQ==} + hasown@2.0.2: resolution: {integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==} engines: {node: '>= 0.4'} @@ -2983,6 +2989,8 @@ snapshots: has-flag@4.0.0: {} + hash-wasm@4.11.0: {} + hasown@2.0.2: dependencies: function-bind: 1.1.2