Skip to content

Commit

Permalink
Reduce code duplication by moving partition_lomuto_branchless_simple …
Browse files Browse the repository at this point in the history
…into quicksort module
  • Loading branch information
Voultapher committed Aug 26, 2024
1 parent 1805c29 commit f46fcfe
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 79 deletions.
78 changes: 1 addition & 77 deletions core/src/slice/sort/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,11 @@
//! for pivot selection. Using this as a fallback ensures O(n) worst case running time with
//! better performance than one would get using heapsort as fallback.
use crate::intrinsics;
use crate::mem::{self, SizedTypeProperties};
#[cfg(not(feature = "optimize_for_size"))]
use crate::slice::sort::shared::pivot::choose_pivot;
#[cfg(not(feature = "optimize_for_size"))]
use crate::slice::sort::shared::smallsort::insertion_sort_shift_left;
#[cfg(not(feature = "optimize_for_size"))]
use crate::slice::sort::unstable::quicksort::partition;

/// Reorders the slice such that the element at `index` is at its final sorted position.
Expand Down Expand Up @@ -252,15 +250,7 @@ fn median_of_ninthers<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F)

median_of_medians(&mut v[lo..lo + frac], is_less, pivot);

#[cfg(not(feature = "optimize_for_size"))]
{
partition(v, lo + pivot, is_less)
}

#[cfg(feature = "optimize_for_size")]
{
partition_size_opt(v, lo + pivot, is_less)
}
partition(v, lo + pivot, is_less)
}

/// Moves around the 9 elements at the indices a..i, such that
Expand Down Expand Up @@ -351,69 +341,3 @@ fn bubble_sort<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F) {
n -= 1;
}
}

#[cfg(feature = "optimize_for_size")]
fn partition_size_opt<T, F>(v: &mut [T], pivot: usize, is_less: &mut F) -> usize
where
F: FnMut(&T, &T) -> bool,
{
let len = v.len();

// Allows for panic-free code-gen by proving this property to the compiler.
if len == 0 {
return 0;
}

if pivot >= len {
intrinsics::abort();
}

// SAFETY: We checked that `pivot` is in-bounds.
unsafe {
// Place the pivot at the beginning of slice.
v.swap_unchecked(0, pivot);
}
let (pivot, v_without_pivot) = v.split_at_mut(1);

// Assuming that Rust generates noalias LLVM IR we can be sure that a partition function
// signature of the form `(v: &mut [T], pivot: &T)` guarantees that pivot and v can't alias.
// Having this guarantee is crucial for optimizations. It's possible to copy the pivot value
// into a stack value, but this creates issues for types with interior mutability mandating
// a drop guard.
let pivot = &mut pivot[0];

let num_lt = partition_lomuto_branchless_simple(v_without_pivot, pivot, is_less);

if num_lt >= len {
intrinsics::abort();
}

// SAFETY: We checked that `num_lt` is in-bounds.
unsafe {
// Place the pivot between the two partitions.
v.swap_unchecked(0, num_lt);
}

num_lt
}

#[cfg(feature = "optimize_for_size")]
fn partition_lomuto_branchless_simple<T, F: FnMut(&T, &T) -> bool>(
v: &mut [T],
pivot: &T,
is_less: &mut F,
) -> usize {
let mut left = 0;

for right in 0..v.len() {
// SAFETY: `left` can at max be incremented by 1 each loop iteration, which implies that
// left <= right and that both are in-bounds.
unsafe {
let right_is_lt = is_less(v.get_unchecked(right), pivot);
v.swap_unchecked(left, right);
left += right_is_lt as usize;
}
}

left
}
1 change: 0 additions & 1 deletion core/src/slice/sort/unstable/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ use crate::slice::sort::shared::find_existing_run;
use crate::slice::sort::shared::smallsort::insertion_sort_shift_left;

pub(crate) mod heapsort;
#[cfg(not(feature = "optimize_for_size"))]
pub(crate) mod quicksort;

/// Unstable sort called ipnsort by Lukas Bergdoll and Orson Peters.
Expand Down
39 changes: 38 additions & 1 deletion core/src/slice/sort/unstable/quicksort.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
//! This module contains an unstable quicksort and two partition implementations.
use crate::mem::{self, ManuallyDrop};
#[cfg(not(feature = "optimize_for_size"))]
use crate::slice::sort::shared::pivot::choose_pivot;
#[cfg(not(feature = "optimize_for_size"))]
use crate::slice::sort::shared::smallsort::UnstableSmallSortTypeImpl;
use crate::{intrinsics, ptr};

Expand All @@ -11,6 +13,7 @@ use crate::{intrinsics, ptr};
///
/// `limit` is the number of allowed imbalanced partitions before switching to `heapsort`. If zero,
/// this function will immediately switch to heapsort.
#[cfg(not(feature = "optimize_for_size"))]
pub(crate) fn quicksort<'a, T, F>(
mut v: &'a mut [T],
mut ancestor_pivot: Option<&'a T>,
Expand Down Expand Up @@ -138,7 +141,16 @@ const fn inst_partition<T, F: FnMut(&T, &T) -> bool>() -> fn(&mut [T], &T, &mut
if mem::size_of::<T>() <= MAX_BRANCHLESS_PARTITION_SIZE {
// Specialize for types that are relatively cheap to copy, where branchless optimizations
// have large leverage e.g. `u64` and `String`.
partition_lomuto_branchless_cyclic::<T, F>

#[cfg(not(feature = "optimize_for_size"))]
{
partition_lomuto_branchless_cyclic::<T, F>
}

#[cfg(feature = "optimize_for_size")]
{
partition_lomuto_branchless_simple::<T, F>
}
} else {
partition_hoare_branchy_cyclic::<T, F>
}
Expand Down Expand Up @@ -224,6 +236,7 @@ where
}
}

#[cfg(not(feature = "optimize_for_size"))]
struct PartitionState<T> {
// The current element that is being looked at, scans left to right through slice.
right: *mut T,
Expand All @@ -234,6 +247,7 @@ struct PartitionState<T> {
gap: GapGuardRaw<T>,
}

#[cfg(not(feature = "optimize_for_size"))]
fn partition_lomuto_branchless_cyclic<T, F>(v: &mut [T], pivot: &T, is_less: &mut F) -> usize
where
F: FnMut(&T, &T) -> bool,
Expand Down Expand Up @@ -325,6 +339,27 @@ where
}
}

#[cfg(feature = "optimize_for_size")]
fn partition_lomuto_branchless_simple<T, F: FnMut(&T, &T) -> bool>(
v: &mut [T],
pivot: &T,
is_less: &mut F,
) -> usize {
let mut left = 0;

for right in 0..v.len() {
// SAFETY: `left` can at max be incremented by 1 each loop iteration, which implies that
// left <= right and that both are in-bounds.
unsafe {
let right_is_lt = is_less(v.get_unchecked(right), pivot);
v.swap_unchecked(left, right);
left += right_is_lt as usize;
}
}

left
}

struct GapGuard<T> {
pos: *mut T,
value: ManuallyDrop<T>,
Expand All @@ -342,11 +377,13 @@ impl<T> Drop for GapGuard<T> {

/// Ideally this wouldn't be needed and we could just use the regular GapGuard.
/// See comment in [`partition_lomuto_branchless_cyclic`].
#[cfg(not(feature = "optimize_for_size"))]
struct GapGuardRaw<T> {
pos: *mut T,
value: *mut T,
}

#[cfg(not(feature = "optimize_for_size"))]
impl<T> Drop for GapGuardRaw<T> {
fn drop(&mut self) {
// SAFETY: `self` MUST be constructed in a way that makes copying the gap value into
Expand Down

0 comments on commit f46fcfe

Please sign in to comment.