Skip to content

Commit 721609e

Browse files
committed
std::vec: implement a stable merge sort, deferring to insertion sort for
very small runs. This uses a lot of unsafe code for speed, otherwise we would be having to sort by sorting lists of indices and then do a pile of swaps to put everything in the correct place. Fixes #9819.
1 parent 3906823 commit 721609e

File tree

2 files changed

+348
-1
lines changed

2 files changed

+348
-1
lines changed

src/libstd/vec.rs

+260-1
Original file line numberDiff line numberDiff line change
@@ -1921,6 +1921,150 @@ impl<T:Eq> OwnedEqVector<T> for ~[T] {
19211921
}
19221922
}
19231923

1924+
fn merge_sort<T>(v: &mut [T], less_eq: |&T, &T| -> bool) {
1925+
// warning: this wildly uses unsafe.
1926+
static INSERTION: uint = 8;
1927+
1928+
let len = v.len();
1929+
1930+
// allocate some memory to use as scratch memory, we keep the
1931+
// length 0 so we can keep shallow copies of the contents of `v`
1932+
// without risking the dtors running on an object twice if
1933+
// `less_eq` fails.
1934+
let mut working_space = with_capacity(2 * len);
1935+
// these both are buffers of length `len`.
1936+
let mut buf_dat = working_space.as_mut_ptr();
1937+
let mut buf_tmp = unsafe {buf_dat.offset(len as int)};
1938+
1939+
// length `len`.
1940+
let buf_v = v.as_ptr();
1941+
1942+
// step 1. sort short runs with insertion sort. This takes the
1943+
// values from `v` and sorts them into `buf_dat`, leaving that
1944+
// with sorted runs of length INSERTION.
1945+
1946+
// We could hardcode the sorting comparisons here, and we could
1947+
// manipulate/step the pointers themselves, rather than repeatedly
1948+
// .offset-ing.
1949+
for start in range_step(0, len, INSERTION) {
1950+
// start <= i <= len;
1951+
for i in range(start, cmp::min(start + INSERTION, len)) {
1952+
// j satisfies: start <= j <= i;
1953+
let mut j = i as int;
1954+
unsafe {
1955+
// `i` is in bounds.
1956+
let read_ptr = buf_v.offset(i as int);
1957+
1958+
// find where to insert, we need to do strict <,
1959+
// rather than <=, to maintain stability.
1960+
1961+
// start <= j - 1 < len, so .offset(j - 1) is in
1962+
// bounds.
1963+
while j > start as int && !less_eq(&*buf_dat.offset(j - 1), &*read_ptr) {
1964+
j -= 1;
1965+
}
1966+
1967+
// shift everything to the right, to make space to
1968+
// insert this value.
1969+
1970+
// j + 1 could be `len` (for the last `i`), but in
1971+
// that case, `i == j` so we don't copy. The
1972+
// `.offset(j)` is always in bounds.
1973+
ptr::copy_memory(buf_dat.offset(j + 1),
1974+
buf_dat.offset(j),
1975+
i - j as uint);
1976+
ptr::copy_nonoverlapping_memory(buf_dat.offset(j), read_ptr, 1);
1977+
}
1978+
}
1979+
}
1980+
1981+
// step 2. merge the sorted runs.
1982+
let mut width = INSERTION;
1983+
while width < len {
1984+
// merge the sorted runs of length `width` in `buf_dat` two at
1985+
// a time, placing the result in `buf_tmp`.
1986+
1987+
// 0 <= start <= len.
1988+
for start in range_step(0, len, 2 * width) {
1989+
// manipulate pointers directly for speed (rather than
1990+
// using a `for` loop with `range` and `.offset` inside
1991+
// that loop).
1992+
unsafe {
1993+
// the end of the first run & start of the
1994+
// second. Offset of `len` is defined, since this is
1995+
// precisely one byte past the end of the object.
1996+
let right_start = buf_dat.offset(cmp::min(start + width, len) as int);
1997+
// end of the second. Similar reasoning to the above re safety.
1998+
let right_end_idx = cmp::min(start + 2 * width, len);
1999+
let right_end = buf_dat.offset(right_end_idx as int);
2000+
2001+
// the pointers to the elements under consideration
2002+
// from the two runs.
2003+
2004+
// both of these are in bounds.
2005+
let mut left = buf_dat.offset(start as int);
2006+
let mut right = right_start;
2007+
2008+
// where we're putting the results, it is a run of
2009+
// length `2*width`, so we step it once for each step
2010+
// of either `left` or `right`. `buf_tmp` has length
2011+
// `len`, so these are in bounds.
2012+
let mut out = buf_tmp.offset(start as int);
2013+
let out_end = buf_tmp.offset(right_end_idx as int);
2014+
2015+
while out < out_end {
2016+
// Either the left or the right run are exhausted,
2017+
// so just copy the remainder from the other run
2018+
// and move on; this gives a huge speed-up (order
2019+
// of 25%) for mostly sorted vectors (the best
2020+
// case).
2021+
if left == right_start {
2022+
// the number remaining in this run.
2023+
let elems = (right_end as uint - right as uint) / mem::size_of::<T>();
2024+
ptr::copy_nonoverlapping_memory(out, right, elems);
2025+
break;
2026+
} else if right == right_end {
2027+
let elems = (right_start as uint - left as uint) / mem::size_of::<T>();
2028+
ptr::copy_nonoverlapping_memory(out, left, elems);
2029+
break;
2030+
}
2031+
2032+
// check which side is smaller, and that's the
2033+
// next element for the new run.
2034+
2035+
// `left < right_start` and `right < right_end`,
2036+
// so these are valid.
2037+
let to_copy = if less_eq(&*left, &*right) {
2038+
step(&mut left)
2039+
} else {
2040+
step(&mut right)
2041+
};
2042+
ptr::copy_nonoverlapping_memory(out, to_copy, 1);
2043+
step(&mut out);
2044+
}
2045+
}
2046+
}
2047+
2048+
util::swap(&mut buf_dat, &mut buf_tmp);
2049+
2050+
width *= 2;
2051+
}
2052+
2053+
// write the result to `v` in one go, so that there are never two copies
2054+
// of the same object in `v`.
2055+
unsafe {
2056+
ptr::copy_nonoverlapping_memory(v.as_mut_ptr(), buf_dat, len);
2057+
}
2058+
2059+
// increment the pointer, returning the old pointer.
2060+
#[inline(always)]
2061+
unsafe fn step<T>(ptr: &mut *mut T) -> *mut T {
2062+
let old = *ptr;
2063+
*ptr = ptr.offset(1);
2064+
old
2065+
}
2066+
}
2067+
19242068
/// Extension methods for vectors such that their elements are
19252069
/// mutable.
19262070
pub trait MutableVector<'a, T> {
@@ -2020,6 +2164,25 @@ pub trait MutableVector<'a, T> {
20202164
/// Reverse the order of elements in a vector, in place
20212165
fn reverse(self);
20222166

2167+
/// Sort the vector, in place, using `less_eq` to compare `a <=
2168+
/// b`.
2169+
///
2170+
/// This sort is `O(n log n)` worst-case and stable, but allocates
2171+
/// approximately `2 * n`, where `n` is the length of `self`.
2172+
///
2173+
/// # Example
2174+
///
2175+
/// ```rust
2176+
/// let mut v = [5, 4, 1, 3, 2];
2177+
/// v.sort(|a, b| *a <= *b);
2178+
/// assert_eq!(v, [1, 2, 3, 4, 5]);
2179+
///
2180+
/// // reverse sorting
2181+
/// v.sort(|a, b| *b <= *a);
2182+
/// assert_eq!(v, [5, 4, 3, 2, 1]);
2183+
/// ```
2184+
fn sort(self, less_eq: |&T, &T| -> bool);
2185+
20232186
/**
20242187
* Consumes `src` and moves as many elements as it can into `self`
20252188
* from the range [start,end).
@@ -2164,6 +2327,11 @@ impl<'a,T> MutableVector<'a, T> for &'a mut [T] {
21642327
}
21652328
}
21662329

2330+
#[inline]
2331+
fn sort(self, less_eq: |&T, &T| -> bool) {
2332+
merge_sort(self, less_eq)
2333+
}
2334+
21672335
#[inline]
21682336
fn move_from(self, mut src: ~[T], start: uint, end: uint) -> uint {
21692337
for (a, b) in self.mut_iter().zip(src.mut_slice(start, end).mut_iter()) {
@@ -2692,6 +2860,7 @@ mod tests {
26922860
use vec::*;
26932861
use cmp::*;
26942862
use prelude::*;
2863+
use rand::{Rng, task_rng};
26952864

26962865
fn square(n: uint) -> uint { n * n }
26972866

@@ -3298,6 +3467,57 @@ mod tests {
32983467
assert!(v3.is_empty());
32993468
}
33003469

3470+
#[test]
3471+
fn test_sort() {
3472+
for len in range(4u, 25) {
3473+
for _ in range(0, 100) {
3474+
let mut v = task_rng().gen_vec::<uint>(len);
3475+
v.sort(|a,b| a <= b);
3476+
3477+
assert!(v.windows(2).all(|w| w[0] <= w[1]));
3478+
}
3479+
}
3480+
3481+
// shouldn't fail/crash
3482+
let mut v: [uint, .. 0] = [];
3483+
v.sort(|a,b| a <= b);
3484+
3485+
let mut v = [0xDEADBEEF];
3486+
v.sort(|a,b| a <= b);
3487+
assert_eq!(v, [0xDEADBEEF]);
3488+
}
3489+
3490+
#[test]
3491+
fn test_sort_stability() {
3492+
for len in range(4, 25) {
3493+
for _ in range(0 , 10) {
3494+
let mut counts = [0, .. 10];
3495+
3496+
// create a vector like [(6, 1), (5, 1), (6, 2), ...],
3497+
// where the first item of each tuple is random, but
3498+
// the second item represents which occurrence of that
3499+
// number this element is, i.e. the second elements
3500+
// will occur in sorted order.
3501+
let mut v = range(0, len).map(|_| {
3502+
let n = task_rng().gen::<uint>() % 10;
3503+
counts[n] += 1;
3504+
(n, counts[n])
3505+
}).to_owned_vec();
3506+
3507+
// only sort on the first element, so an unstable sort
3508+
// may mix up the counts.
3509+
v.sort(|&(a,_), &(b,_)| a <= b);
3510+
3511+
// this comparison includes the count (the second item
3512+
// of the tuple), so elements with equal first items
3513+
// will need to be ordered with increasing
3514+
// counts... i.e. exactly asserting that this sort is
3515+
// stable.
3516+
assert!(v.windows(2).all(|w| w[0] <= w[1]));
3517+
}
3518+
}
3519+
}
3520+
33013521
#[test]
33023522
fn test_partition() {
33033523
assert_eq!((~[]).partition(|x: &int| *x < 3), (~[], ~[]));
@@ -4124,7 +4344,8 @@ mod bench {
41244344
use vec::VectorVector;
41254345
use option::*;
41264346
use ptr;
4127-
use rand::{weak_rng, Rng};
4347+
use rand::{weak_rng, task_rng, Rng};
4348+
use mem;
41284349

41294350
#[bench]
41304351
fn iterator(bh: &mut BenchHarness) {
@@ -4325,4 +4546,42 @@ mod bench {
43254546
}
43264547
})
43274548
}
4549+
4550+
fn sort_random_small(bh: &mut BenchHarness) {
4551+
let mut rng = weak_rng();
4552+
bh.iter(|| {
4553+
let mut v: ~[f64] = rng.gen_vec(5);
4554+
v.sort(|a,b| *a <= *b);
4555+
});
4556+
bh.bytes = 5 * mem::size_of::<f64>() as u64;
4557+
}
4558+
4559+
#[bench]
4560+
fn sort_random_medium(bh: &mut BenchHarness) {
4561+
let mut rng = weak_rng();
4562+
bh.iter(|| {
4563+
let mut v: ~[f64] = rng.gen_vec(100);
4564+
v.sort(|a,b| *a <= *b);
4565+
});
4566+
bh.bytes = 100 * mem::size_of::<f64>() as u64;
4567+
}
4568+
4569+
#[bench]
4570+
fn sort_random_large(bh: &mut BenchHarness) {
4571+
let mut rng = weak_rng();
4572+
bh.iter(|| {
4573+
let mut v: ~[f64] = rng.gen_vec(10000);
4574+
v.sort(|a,b| *a <= *b);
4575+
});
4576+
bh.bytes = 10000 * mem::size_of::<f64>() as u64;
4577+
}
4578+
4579+
#[bench]
4580+
fn sort_sorted(bh: &mut BenchHarness) {
4581+
let mut v = vec::from_fn(10000, |i| i);
4582+
bh.iter(|| {
4583+
v.sort(|a,b| *a <= *b);
4584+
});
4585+
bh.bytes = (v.len() * mem::size_of_val(&v[0])) as u64;
4586+
}
43284587
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
use std::rand::{task_rng, Rng};
12+
13+
static MAX_LEN: uint = 20;
14+
static mut drop_counts: [uint, .. MAX_LEN] = [0, .. MAX_LEN];
15+
static mut clone_count: uint = 0;
16+
17+
#[deriving(Rand, Ord)]
18+
struct DropCounter { x: uint, clone_num: uint }
19+
20+
impl Clone for DropCounter {
21+
fn clone(&self) -> DropCounter {
22+
let num = unsafe { clone_count };
23+
unsafe { clone_count += 1; }
24+
DropCounter {
25+
x: self.x,
26+
clone_num: num
27+
}
28+
}
29+
}
30+
31+
impl Drop for DropCounter {
32+
fn drop(&mut self) {
33+
unsafe {
34+
// Rand creates some with arbitrary clone_nums
35+
if self.clone_num < MAX_LEN {
36+
drop_counts[self.clone_num] += 1;
37+
}
38+
}
39+
}
40+
}
41+
42+
pub fn main() {
43+
// len can't go above 64.
44+
for len in range(2u, MAX_LEN) {
45+
for _ in range(0, 10) {
46+
let main = task_rng().gen_vec::<DropCounter>(len);
47+
48+
// work out the total number of comparisons required to sort
49+
// this array...
50+
let mut count = 0;
51+
main.clone().sort(|a, b| { count += 1; a <= b });
52+
53+
// ... and then fail on each and every single one.
54+
for fail_countdown in range(0, count) {
55+
// refresh the counters.
56+
unsafe {
57+
drop_counts = [0, .. MAX_LEN];
58+
clone_count = 0;
59+
}
60+
61+
let v = main.clone();
62+
63+
std::task::try(proc() {
64+
let mut v = v;
65+
let mut fail_countdown = fail_countdown;
66+
v.sort(|a, b| {
67+
if fail_countdown == 0 {
68+
fail!()
69+
}
70+
fail_countdown -= 1;
71+
a <= b
72+
})
73+
});
74+
75+
// check that the number of things dropped is exactly
76+
// what we expect (i.e. the contents of `v`).
77+
unsafe {
78+
for (i, &c) in drop_counts.iter().enumerate() {
79+
let expected = if i < len {1} else {0};
80+
assert!(c == expected,
81+
"found drop count == {} for i == {}, len == {}",
82+
c, i, len);
83+
}
84+
}
85+
}
86+
}
87+
}
88+
}

0 commit comments

Comments
 (0)