|
6 | 6 | //! for pivot selection. Using this as a fallback ensures O(n) worst case running time with
|
7 | 7 | //! better performance than one would get using heapsort as fallback.
|
8 | 8 |
|
| 9 | +use crate::intrinsics; |
9 | 10 | use crate::mem::{self, SizedTypeProperties};
|
| 11 | +#[cfg(not(feature = "optimize_for_size"))] |
10 | 12 | use crate::slice::sort::shared::pivot::choose_pivot;
|
| 13 | +#[cfg(not(feature = "optimize_for_size"))] |
11 | 14 | use crate::slice::sort::shared::smallsort::insertion_sort_shift_left;
|
| 15 | +#[cfg(not(feature = "optimize_for_size"))] |
12 | 16 | use crate::slice::sort::unstable::quicksort::partition;
|
13 | 17 |
|
14 | 18 | /// Reorders the slice such that the element at `index` is at its final sorted position.
|
|
40 | 44 | let min_idx = min_index(v, &mut is_less).unwrap();
|
41 | 45 | v.swap(min_idx, index);
|
42 | 46 | } else {
|
43 |
| - partition_at_index_loop(v, index, None, &mut is_less); |
| 47 | + #[cfg(not(feature = "optimize_for_size"))] |
| 48 | + { |
| 49 | + partition_at_index_loop(v, index, None, &mut is_less); |
| 50 | + } |
| 51 | + |
| 52 | + #[cfg(feature = "optimize_for_size")] |
| 53 | + { |
| 54 | + median_of_medians(v, &mut is_less, index); |
| 55 | + } |
44 | 56 | }
|
45 | 57 |
|
46 | 58 | let (left, right) = v.split_at_mut(index);
|
|
53 | 65 | // most once, it doesn't make sense to use something more sophisticated than insertion-sort.
|
54 | 66 | const INSERTION_SORT_THRESHOLD: usize = 16;
|
55 | 67 |
|
| 68 | +#[cfg(not(feature = "optimize_for_size"))] |
56 | 69 | fn partition_at_index_loop<'a, T, F>(
|
57 | 70 | mut v: &'a mut [T],
|
58 | 71 | mut index: usize,
|
@@ -167,8 +180,17 @@ fn median_of_medians<T, F: FnMut(&T, &T) -> bool>(mut v: &mut [T], is_less: &mut
|
167 | 180 | loop {
|
168 | 181 | if v.len() <= INSERTION_SORT_THRESHOLD {
|
169 | 182 | if v.len() >= 2 {
|
170 |
| - insertion_sort_shift_left(v, 1, is_less); |
| 183 | + #[cfg(not(feature = "optimize_for_size"))] |
| 184 | + { |
| 185 | + insertion_sort_shift_left(v, 1, is_less); |
| 186 | + } |
| 187 | + |
| 188 | + #[cfg(feature = "optimize_for_size")] |
| 189 | + { |
| 190 | + bubble_sort(v, is_less); |
| 191 | + } |
171 | 192 | }
|
| 193 | + |
172 | 194 | return;
|
173 | 195 | }
|
174 | 196 |
|
@@ -230,7 +252,15 @@ fn median_of_ninthers<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F)
|
230 | 252 |
|
231 | 253 | median_of_medians(&mut v[lo..lo + frac], is_less, pivot);
|
232 | 254 |
|
233 |
| - partition(v, lo + pivot, is_less) |
| 255 | + #[cfg(not(feature = "optimize_for_size"))] |
| 256 | + { |
| 257 | + partition(v, lo + pivot, is_less) |
| 258 | + } |
| 259 | + |
| 260 | + #[cfg(feature = "optimize_for_size")] |
| 261 | + { |
| 262 | + partition_size_opt(v, lo + pivot, is_less) |
| 263 | + } |
234 | 264 | }
|
235 | 265 |
|
236 | 266 | /// Moves around the 9 elements at the indices a..i, such that
|
@@ -298,3 +328,92 @@ fn median_idx<T, F: FnMut(&T, &T) -> bool>(
|
298 | 328 | }
|
299 | 329 | b
|
300 | 330 | }
|
| 331 | + |
| 332 | +// It's possible to re-use the insertion sort in the smallsort module, but with optimize_for_size it |
| 333 | +// would clutter that module with cfg statements and make it generally harder to read and develop. |
| 334 | +// So to decouple things and simplify it, we use a an even smaller bubble sort. |
| 335 | +#[cfg(feature = "optimize_for_size")] |
| 336 | +fn bubble_sort<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F) { |
| 337 | + let mut n = v.len(); |
| 338 | + let mut did_swap = true; |
| 339 | + |
| 340 | + while did_swap && n > 1 { |
| 341 | + did_swap = false; |
| 342 | + for i in 1..n { |
| 343 | + // SAFETY: The loop construction implies that `i` and `i - 1` will always be in-bounds. |
| 344 | + unsafe { |
| 345 | + if is_less(v.get_unchecked(i), v.get_unchecked(i - 1)) { |
| 346 | + v.swap_unchecked(i - 1, i); |
| 347 | + did_swap = true; |
| 348 | + } |
| 349 | + } |
| 350 | + } |
| 351 | + n -= 1; |
| 352 | + } |
| 353 | +} |
| 354 | + |
| 355 | +#[cfg(feature = "optimize_for_size")] |
| 356 | +fn partition_size_opt<T, F>(v: &mut [T], pivot: usize, is_less: &mut F) -> usize |
| 357 | +where |
| 358 | + F: FnMut(&T, &T) -> bool, |
| 359 | +{ |
| 360 | + let len = v.len(); |
| 361 | + |
| 362 | + // Allows for panic-free code-gen by proving this property to the compiler. |
| 363 | + if len == 0 { |
| 364 | + return 0; |
| 365 | + } |
| 366 | + |
| 367 | + if pivot >= len { |
| 368 | + intrinsics::abort(); |
| 369 | + } |
| 370 | + |
| 371 | + // SAFETY: We checked that `pivot` is in-bounds. |
| 372 | + unsafe { |
| 373 | + // Place the pivot at the beginning of slice. |
| 374 | + v.swap_unchecked(0, pivot); |
| 375 | + } |
| 376 | + let (pivot, v_without_pivot) = v.split_at_mut(1); |
| 377 | + |
| 378 | + // Assuming that Rust generates noalias LLVM IR we can be sure that a partition function |
| 379 | + // signature of the form `(v: &mut [T], pivot: &T)` guarantees that pivot and v can't alias. |
| 380 | + // Having this guarantee is crucial for optimizations. It's possible to copy the pivot value |
| 381 | + // into a stack value, but this creates issues for types with interior mutability mandating |
| 382 | + // a drop guard. |
| 383 | + let pivot = &mut pivot[0]; |
| 384 | + |
| 385 | + let num_lt = partition_lomuto_branchless_simple(v_without_pivot, pivot, is_less); |
| 386 | + |
| 387 | + if num_lt >= len { |
| 388 | + intrinsics::abort(); |
| 389 | + } |
| 390 | + |
| 391 | + // SAFETY: We checked that `num_lt` is in-bounds. |
| 392 | + unsafe { |
| 393 | + // Place the pivot between the two partitions. |
| 394 | + v.swap_unchecked(0, num_lt); |
| 395 | + } |
| 396 | + |
| 397 | + num_lt |
| 398 | +} |
| 399 | + |
| 400 | +#[cfg(feature = "optimize_for_size")] |
| 401 | +fn partition_lomuto_branchless_simple<T, F: FnMut(&T, &T) -> bool>( |
| 402 | + v: &mut [T], |
| 403 | + pivot: &T, |
| 404 | + is_less: &mut F, |
| 405 | +) -> usize { |
| 406 | + let mut left = 0; |
| 407 | + |
| 408 | + for right in 0..v.len() { |
| 409 | + // SAFETY: `left` can at max be incremented by 1 each loop iteration, which implies that |
| 410 | + // left <= right and that both are in-bounds. |
| 411 | + unsafe { |
| 412 | + let right_is_lt = is_less(v.get_unchecked(right), pivot); |
| 413 | + v.swap_unchecked(left, right); |
| 414 | + left += right_is_lt as usize; |
| 415 | + } |
| 416 | + } |
| 417 | + |
| 418 | + left |
| 419 | +} |
0 commit comments