diff --git a/Build/lib/stable-sort-domain.ts b/Build/lib/stable-sort-domain.ts index 22b35fc6..2f24c317 100644 --- a/Build/lib/stable-sort-domain.ts +++ b/Build/lib/stable-sort-domain.ts @@ -1,5 +1,5 @@ import type { PublicSuffixList } from '@gorhill/publicsuffixlist'; -import { sort } from 'timsort'; +import { sort } from './timsort'; const compare = (a: string, b: string) => { if (a === b) return 0; @@ -42,10 +42,10 @@ export const sortDomains = (inputs: string[], gorhill: PublicSuffixList) => { const $a = domains.get(a); const $b = domains.get(b); - if ($a && $b) { - return compare($a, $b) || compare(a, b); + if ($a == null || $b == null) { + return compare(a, b); } - return compare(a, b); + return compare($a, $b) || compare(a, b); }; sort(inputs, sorter); diff --git a/Build/lib/timsort.ts b/Build/lib/timsort.ts new file mode 100644 index 00000000..53463135 --- /dev/null +++ b/Build/lib/timsort.ts @@ -0,0 +1,954 @@ +type Comparator = (a: T, b: T) => number; + +/** + * Default minimum size of a run. + */ +const DEFAULT_MIN_MERGE = 32; + +/** + * Minimum ordered subsequece required to do galloping. + */ +const DEFAULT_MIN_GALLOPING = 7; + +/** + * Default tmp storage length. Can increase depending on the size of the + * smallest run to merge. + */ +const DEFAULT_TMP_STORAGE_LENGTH = 256; + +/** + * Pre-computed powers of 10 for efficient lexicographic comparison of + * small integers. + */ +const POWERS_OF_TEN = [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9]; + +/** + * Estimate the logarithm base 10 of a small integer. + * + * @param x - The integer to estimate the logarithm of. + * @return {number} - The estimated logarithm of the integer. + */ +function log10(x: number): number { + if (x < 1e5) { + if (x < 1e2) { + return x < 1e1 ? 0 : 1; + } + + if (x < 1e4) { + return x < 1e3 ? 2 : 3; + } + + return 4; + } + + if (x < 1e7) { + return x < 1e6 ? 5 : 6; + } + + if (x < 1e9) { + return x < 1e8 ? 7 : 8; + } + + return 9; +} + +/** + * Default alphabetical comparison of items. + * + * @param a - First element to compare. + * @param b - Second element to compare. + * @return - A positive number if a.toString() > b.toString(), a + * negative number if .toString() < b.toString(), 0 otherwise. + */ +function alphabeticalCompare(a: any, b: any): number { + if (a === b) { + return 0; + } + + if (~~a === a && ~~b === b) { + if (a === 0 || b === 0) { + return a < b ? -1 : 1; + } + + if (a < 0 || b < 0) { + if (b >= 0) { + return -1; + } + + if (a >= 0) { + return 1; + } + + a = -a; + b = -b; + } + + const al = log10(a); + const bl = log10(b); + + let t = 0; + + if (al < bl) { + a *= POWERS_OF_TEN[bl - al - 1]; + b /= 10; + t = -1; + } else if (al > bl) { + b *= POWERS_OF_TEN[al - bl - 1]; + a /= 10; + t = 1; + } + + if (a === b) { + return t; + } + + return a < b ? -1 : 1; + } + + const aStr = String(a); + const bStr = String(b); + + if (aStr === bStr) { + return 0; + } + + return aStr < bStr ? -1 : 1; +} + +/** + * Compute minimum run length for TimSort + * + * @param n - The size of the array to sort. + */ +function minRunLength(n: number) { + let r = 0; + + while (n >= DEFAULT_MIN_MERGE) { + r |= (n & 1); + n >>= 1; + } + + return n + r; +} + +/** + * Counts the length of a monotonically ascending or strictly monotonically + * descending sequence (run) starting at array[lo] in the range [lo, hi). If + * the run is descending it is made ascending. + * + * @param array - The array to reverse. + * @param lo - First element in the range (inclusive). + * @param hi - Last element in the range. + * @param compare - Item comparison function. + * @return - The length of the run. + */ +function makeAscendingRun(array: T[], lo: number, hi: number, compare: Comparator): number { + let runHi = lo + 1; + + if (runHi === hi) { + return 1; + } + + // Descending + if (compare(array[runHi++], array[lo]) < 0) { + while (runHi < hi && compare(array[runHi], array[runHi - 1]) < 0) { + runHi++; + } + + reverseRun(array, lo, runHi); + // Ascending + } else { + while (runHi < hi && compare(array[runHi], array[runHi - 1]) >= 0) { + runHi++; + } + } + + return runHi - lo; +} + +/** + * Reverse an array in the range [lo, hi). + * + * @param array - The array to reverse. + * @param lo - First element in the range (inclusive). + * @param hi - Last element in the range. + */ +function reverseRun(array: T[], lo: number, hi: number) { + hi--; + + while (lo < hi) { + const t = array[lo]; + array[lo++] = array[hi]; + array[hi--] = t; + } +} + +/** + * Perform the binary sort of the array in the range [lo, hi) where start is + * the first element possibly out of order. + * + * @param array - The array to sort. + * @param lo - First element in the range (inclusive). + * @param hi - Last element in the range. + * @param start - First element possibly out of order. + * @param compare - Item comparison function. + */ +function binaryInsertionSort(array: T[], lo: number, hi: number, start: number, compare: Comparator) { + if (start === lo) { + start++; + } + + for (; start < hi; start++) { + const pivot = array[start]; + + // Ranges of the array where pivot belongs + let left = lo; + let right = start; + + /* + * pivot >= array[i] for i in [lo, left) + * pivot < array[i] for i in in [right, start) + */ + while (left < right) { + const mid = (left + right) >>> 1; + + if (compare(pivot, array[mid]) < 0) { + right = mid; + } else { + left = mid + 1; + } + } + + /* + * Move elements right to make room for the pivot. If there are elements + * equal to pivot, left points to the first slot after them: this is also + * a reason for which TimSort is stable + */ + let n = start - left; + // Switch is just an optimization for small arrays + switch (n) { + case 3: + array[left + 3] = array[left + 2]; + /* falls through */ + case 2: + array[left + 2] = array[left + 1]; + /* falls through */ + case 1: + array[left + 1] = array[left]; + break; + default: + while (n > 0) { + array[left + n] = array[left + n - 1]; + n--; + } + } + + array[left] = pivot; + } +} + +/** + * Find the position at which to insert a value in a sorted range. If the range + * contains elements equal to the value the leftmost element index is returned + * (for stability). + * + * @param value - Value to insert. + * @param array - The array in which to insert value. + * @param start - First element in the range. + * @param length - Length of the range. + * @param hint - The index at which to begin the search. + * @param compare - Item comparison function. + * @return - The index where to insert value. + */ +function gallopLeft(value: T, array: T[], start: number, length: number, hint: number, compare: Comparator): number { + let lastOffset = 0; + let maxOffset = 0; + let offset = 1; + + if (compare(value, array[start + hint]) > 0) { + maxOffset = length - hint; + + while (offset < maxOffset && compare(value, array[start + hint + offset]) > 0) { + lastOffset = offset; + offset = (offset << 1) + 1; + + if (offset <= 0) { + offset = maxOffset; + } + } + + if (offset > maxOffset) { + offset = maxOffset; + } + + // Make offsets relative to start + lastOffset += hint; + offset += hint; + + // value <= array[start + hint] + } else { + maxOffset = hint + 1; + while (offset < maxOffset && compare(value, array[start + hint - offset]) <= 0) { + lastOffset = offset; + offset = (offset << 1) + 1; + + if (offset <= 0) { + offset = maxOffset; + } + } + if (offset > maxOffset) { + offset = maxOffset; + } + + // Make offsets relative to start + const tmp = lastOffset; + lastOffset = hint - offset; + offset = hint - tmp; + } + + /* + * Now array[start+lastOffset] < value <= array[start+offset], so value + * belongs somewhere in the range (start + lastOffset, start + offset]. Do a + * binary search, with invariant array[start + lastOffset - 1] < value <= + * array[start + offset]. + */ + lastOffset++; + while (lastOffset < offset) { + const m = lastOffset + ((offset - lastOffset) >>> 1); + + if (compare(value, array[start + m]) > 0) { + lastOffset = m + 1; + } else { + offset = m; + } + } + return offset; +} + +/** + * Find the position at which to insert a value in a sorted range. If the range + * contains elements equal to the value the rightmost element index is returned + * (for stability). + * + * @param value - Value to insert. + * @param array - The array in which to insert value. + * @param start - First element in the range. + * @param length - Length of the range. + * @param hint - The index at which to begin the search. + * @param compare - Item comparison function. + * @return - The index where to insert value. + */ +function gallopRight(value: T, array: T[], start: number, length: number, hint: number, compare: Comparator): number { + let lastOffset = 0; + let maxOffset = 0; + let offset = 1; + + if (compare(value, array[start + hint]) < 0) { + maxOffset = hint + 1; + + while (offset < maxOffset && compare(value, array[start + hint - offset]) < 0) { + lastOffset = offset; + offset = (offset << 1) + 1; + + if (offset <= 0) { + offset = maxOffset; + } + } + + if (offset > maxOffset) { + offset = maxOffset; + } + + // Make offsets relative to start + const tmp = lastOffset; + lastOffset = hint - offset; + offset = hint - tmp; + + // value >= array[start + hint] + } else { + maxOffset = length - hint; + + while (offset < maxOffset && compare(value, array[start + hint + offset]) >= 0) { + lastOffset = offset; + offset = (offset << 1) + 1; + + if (offset <= 0) { + offset = maxOffset; + } + } + + if (offset > maxOffset) { + offset = maxOffset; + } + + // Make offsets relative to start + lastOffset += hint; + offset += hint; + } + + /* + * Now array[start+lastOffset] < value <= array[start+offset], so value + * belongs somewhere in the range (start + lastOffset, start + offset]. Do a + * binary search, with invariant array[start + lastOffset - 1] < value <= + * array[start + offset]. + */ + lastOffset++; + + while (lastOffset < offset) { + const m = lastOffset + ((offset - lastOffset) >>> 1); + + if (compare(value, array[start + m]) < 0) { + offset = m; + } else { + lastOffset = m + 1; + } + } + + return offset; +} + +class TimSort { + tmp: T[]; + minGallop = DEFAULT_MIN_GALLOPING; + length = 0; + tmpStorageLength = DEFAULT_TMP_STORAGE_LENGTH; + stackLength = 0; + runStart: number[]; + runLength: number[]; + stackSize = 0; + + constructor(public array: T[], public compare: Comparator) { + this.length = array.length; + + if (this.length < 2 * DEFAULT_TMP_STORAGE_LENGTH) { + this.tmpStorageLength = this.length >>> 1; + } + + this.tmp = new Array(this.tmpStorageLength); + + this.stackLength = ( + this.length < 120 + ? 5 + : this.length < 1542 + ? 10 + : this.length < 119151 + ? 19 + : 40 + ); + + this.runStart = new Array(this.stackLength); + this.runLength = new Array(this.stackLength); + } + + /** + * Push a new run on TimSort's stack. + * + * @param runStart - Start index of the run in the original array. + * @param runLength - Length of the run; + */ + pushRun(runStart: number, runLength: number) { + this.runStart[this.stackSize] = runStart; + this.runLength[this.stackSize] = runLength; + this.stackSize += 1; + } + + /** + * Merge runs on TimSort's stack so that the following holds for all i: + * 1) runLength[i - 3] > runLength[i - 2] + runLength[i - 1] + * 2) runLength[i - 2] > runLength[i - 1] + */ + mergeRuns() { + while (this.stackSize > 1) { + let n = this.stackSize - 2; + + if ((n >= 1 + && this.runLength[n - 1] <= this.runLength[n] + this.runLength[n + 1]) + || (n >= 2 + && this.runLength[n - 2] <= this.runLength[n] + this.runLength[n - 1])) { + if (this.runLength[n - 1] < this.runLength[n + 1]) { + n--; + } + } else if (this.runLength[n] > this.runLength[n + 1]) { + break; + } + this.mergeAt(n); + } + } + + /** + * Merge all runs on TimSort's stack until only one remains. + */ + forceMergeRuns() { + while (this.stackSize > 1) { + let n = this.stackSize - 2; + + if (n > 0 && this.runLength[n - 1] < this.runLength[n + 1]) { + n--; + } + + this.mergeAt(n); + } + } + + /** + * Merge the runs on the stack at positions i and i+1. Must be always be called + * with i=stackSize-2 or i=stackSize-3 (that is, we merge on top of the stack). + * + * @param i - Index of the run to merge in TimSort's stack. + */ + mergeAt(i: number) { + const compare = this.compare; + const array = this.array; + + let start1 = this.runStart[i]; + let length1 = this.runLength[i]; + const start2 = this.runStart[i + 1]; + let length2 = this.runLength[i + 1]; + + this.runLength[i] = length1 + length2; + + if (i === this.stackSize - 3) { + this.runStart[i + 1] = this.runStart[i + 2]; + this.runLength[i + 1] = this.runLength[i + 2]; + } + + this.stackSize--; + + /* + * Find where the first element in the second run goes in run1. Previous + * elements in run1 are already in place + */ + const k = gallopRight(array[start2], array, start1, length1, 0, compare); + start1 += k; + length1 -= k; + + if (length1 === 0) { + return; + } + + /* + * Find where the last element in the first run goes in run2. Next elements + * in run2 are already in place + */ + length2 = gallopLeft(array[start1 + length1 - 1], array, start2, length2, length2 - 1, compare); + + if (length2 === 0) { + return; + } + + /* + * Merge remaining runs. A tmp array with length = min(length1, length2) is + * used + */ + if (length1 <= length2) { + this.mergeLow(start1, length1, start2, length2); + } else { + this.mergeHigh(start1, length1, start2, length2); + } + } + + /** + * Merge two adjacent runs in a stable way. The runs must be such that the + * first element of run1 is bigger than the first element in run2 and the + * last element of run1 is greater than all the elements in run2. + * The method should be called when run1.length <= run2.length as it uses + * TimSort temporary array to store run1. Use mergeHigh if run1.length > + * run2.length. + * + * @param start1 - First element in run1. + * @param length1 - Length of run1. + * @param start2 - First element in run2. + * @param length2 - Length of run2. + */ + mergeLow(start1: number, length1: number, start2: number, length2: number) { + const compare = this.compare; + const array = this.array; + const tmp = this.tmp; + let i = 0; + + for (i = 0; i < length1; i++) { + tmp[i] = array[start1 + i]; + } + + let cursor1 = 0; + let cursor2 = start2; + let dest = start1; + + array[dest++] = array[cursor2++]; + + if (--length2 === 0) { + for (i = 0; i < length1; i++) { + array[dest + i] = tmp[cursor1 + i]; + } + return; + } + + if (length1 === 1) { + for (i = 0; i < length2; i++) { + array[dest + i] = array[cursor2 + i]; + } + array[dest + length2] = tmp[cursor1]; + return; + } + + let minGallop = this.minGallop; + + while (true) { + let count1 = 0; + let count2 = 0; + let exit = false; + + do { + if (compare(array[cursor2], tmp[cursor1]) < 0) { + array[dest++] = array[cursor2++]; + count2++; + count1 = 0; + + if (--length2 === 0) { + exit = true; + break; + } + } else { + array[dest++] = tmp[cursor1++]; + count1++; + count2 = 0; + if (--length1 === 1) { + exit = true; + break; + } + } + } while ((count1 | count2) < minGallop); + + if (exit) { + break; + } + + do { + count1 = gallopRight(array[cursor2], tmp, cursor1, length1, 0, compare); + + if (count1 !== 0) { + for (i = 0; i < count1; i++) { + array[dest + i] = tmp[cursor1 + i]; + } + + dest += count1; + cursor1 += count1; + length1 -= count1; + if (length1 <= 1) { + exit = true; + break; + } + } + + array[dest++] = array[cursor2++]; + + if (--length2 === 0) { + exit = true; + break; + } + + count2 = gallopLeft(tmp[cursor1], array, cursor2, length2, 0, compare); + + if (count2 !== 0) { + for (i = 0; i < count2; i++) { + array[dest + i] = array[cursor2 + i]; + } + + dest += count2; + cursor2 += count2; + length2 -= count2; + + if (length2 === 0) { + exit = true; + break; + } + } + array[dest++] = tmp[cursor1++]; + + if (--length1 === 1) { + exit = true; + break; + } + + minGallop--; + } while (count1 >= DEFAULT_MIN_GALLOPING || count2 >= DEFAULT_MIN_GALLOPING); + + if (exit) { + break; + } + + if (minGallop < 0) { + minGallop = 0; + } + + minGallop += 2; + } + + this.minGallop = minGallop; + + if (minGallop < 1) { + this.minGallop = 1; + } + + if (length1 === 1) { + for (i = 0; i < length2; i++) { + array[dest + i] = array[cursor2 + i]; + } + array[dest + length2] = tmp[cursor1]; + } else if (length1 === 0) { + // do nothing + } else { + for (i = 0; i < length1; i++) { + array[dest + i] = tmp[cursor1 + i]; + } + } + } + + /** + * Merge two adjacent runs in a stable way. The runs must be such that the + * first element of run1 is bigger than the first element in run2 and the + * last element of run1 is greater than all the elements in run2. + * The method should be called when run1.length > run2.length as it uses + * TimSort temporary array to store run2. Use mergeLow if run1.length <= + * run2.length. + * + * @param start1 - First element in run1. + * @param length1 - Length of run1. + * @param start2 - First element in run2. + * @param length2 - Length of run2. + */ + mergeHigh(start1: number, length1: number, start2: number, length2: number) { + const compare = this.compare; + const array = this.array; + const tmp = this.tmp; + let i = 0; + + for (i = 0; i < length2; i++) { + tmp[i] = array[start2 + i]; + } + + let cursor1 = start1 + length1 - 1; + let cursor2 = length2 - 1; + let dest = start2 + length2 - 1; + let customCursor = 0; + let customDest = 0; + + array[dest--] = array[cursor1--]; + + if (--length1 === 0) { + customCursor = dest - (length2 - 1); + + for (i = 0; i < length2; i++) { + array[customCursor + i] = tmp[i]; + } + + return; + } + + if (length2 === 1) { + dest -= length1; + cursor1 -= length1; + customDest = dest + 1; + customCursor = cursor1 + 1; + + for (i = length1 - 1; i >= 0; i--) { + array[customDest + i] = array[customCursor + i]; + } + + array[dest] = tmp[cursor2]; + return; + } + + let minGallop = this.minGallop; + + while (true) { + let count1 = 0; + let count2 = 0; + let exit = false; + + do { + if (compare(tmp[cursor2], array[cursor1]) < 0) { + array[dest--] = array[cursor1--]; + count1++; + count2 = 0; + if (--length1 === 0) { + exit = true; + break; + } + } else { + array[dest--] = tmp[cursor2--]; + count2++; + count1 = 0; + if (--length2 === 1) { + exit = true; + break; + } + } + } while ((count1 | count2) < minGallop); + + if (exit) { + break; + } + + do { + count1 = length1 - gallopRight(tmp[cursor2], array, start1, length1, length1 - 1, compare); + + if (count1 !== 0) { + dest -= count1; + cursor1 -= count1; + length1 -= count1; + customDest = dest + 1; + customCursor = cursor1 + 1; + + for (i = count1 - 1; i >= 0; i--) { + array[customDest + i] = array[customCursor + i]; + } + + if (length1 === 0) { + exit = true; + break; + } + } + + array[dest--] = tmp[cursor2--]; + + if (--length2 === 1) { + exit = true; + break; + } + + count2 = length2 - gallopLeft(array[cursor1], tmp, 0, length2, length2 - 1, compare); + + if (count2 !== 0) { + dest -= count2; + cursor2 -= count2; + length2 -= count2; + customDest = dest + 1; + customCursor = cursor2 + 1; + + for (i = 0; i < count2; i++) { + array[customDest + i] = tmp[customCursor + i]; + } + + if (length2 <= 1) { + exit = true; + break; + } + } + + array[dest--] = array[cursor1--]; + + if (--length1 === 0) { + exit = true; + break; + } + + minGallop--; + } while (count1 >= DEFAULT_MIN_GALLOPING || count2 >= DEFAULT_MIN_GALLOPING); + + if (exit) { + break; + } + + if (minGallop < 0) { + minGallop = 0; + } + + minGallop += 2; + } + + this.minGallop = minGallop; + + if (minGallop < 1) { + this.minGallop = 1; + } + + if (length2 === 1) { + dest -= length1; + cursor1 -= length1; + customDest = dest + 1; + customCursor = cursor1 + 1; + + for (i = length1 - 1; i >= 0; i--) { + array[customDest + i] = array[customCursor + i]; + } + + array[dest] = tmp[cursor2]; + } else if (length2 === 0) { + // do nothing + } else { + customCursor = dest - (length2 - 1); + for (i = 0; i < length2; i++) { + array[customCursor + i] = tmp[i]; + } + } + } +} + +/** + * Sort an array in the range [lo, hi) using TimSort. + * + * @param array - The array to sort. + * @param compare - Item comparison function. Default is + * alphabetical + * @param lo - First element in the range (inclusive). + * @param hi - Last element in the range. + * comparator. + */ +export function sort(array: T[], compare: Comparator | undefined = alphabeticalCompare, lo = 0, hi: number = array.length) { + // if (!Array.isArray(array)) { + // throw new TypeError('Can only sort arrays'); + // } + + /* + * Handle the case where a comparison function is not provided. We do + * lexicographic sorting + */ + if (typeof compare !== 'function') { + hi = lo; + lo = compare; + compare = alphabeticalCompare; + } + + let remaining = hi - lo; + + // The array is already sorted + if (remaining < 2) { + return; + } + + let runLength = 0; + // On small arrays binary sort can be used directly + if (remaining < DEFAULT_MIN_MERGE) { + runLength = makeAscendingRun(array, lo, hi, compare); + binaryInsertionSort(array, lo, hi, lo + runLength, compare); + return; + } + + const ts = new TimSort(array, compare); + + const minRun = minRunLength(remaining); + + do { + runLength = makeAscendingRun(array, lo, hi, compare); + if (runLength < minRun) { + let force = remaining; + if (force > minRun) { + force = minRun; + } + + binaryInsertionSort(array, lo, lo + force, lo + runLength, compare); + runLength = force; + } + // Push new run and merge if necessary + ts.pushRun(lo, runLength); + ts.mergeRuns(); + + // Go find next run + remaining -= runLength; + lo += runLength; + } while (remaining !== 0); + + // Force merging of remaining runs + ts.forceMergeRuns(); +} diff --git a/bun.lockb b/bun.lockb index 5605f47b..360abbcb 100755 Binary files a/bun.lockb and b/bun.lockb differ diff --git a/package.json b/package.json index efff81b0..677db73d 100644 --- a/package.json +++ b/package.json @@ -29,7 +29,6 @@ "punycode": "^2.3.1", "table": "^6.8.2", "tar-stream": "^3.1.7", - "timsort": "^0.3.0", "tldts": "^6.1.19" }, "devDependencies": { @@ -38,7 +37,6 @@ "@types/async-retry": "^1.4.8", "@types/bun": "^1.1.1", "@types/tar-stream": "^3.1.3", - "@types/timsort": "^0.3.3", "bun-types": "^1.1.7", "eslint": "^9.2.0", "eslint-config-sukka": "6.0.0-beta.3",