@@ -1921,6 +1921,150 @@ impl<T:Eq> OwnedEqVector<T> for ~[T] {
1921
1921
}
1922
1922
}
1923
1923
1924
+ fn merge_sort < T > ( v : & mut [ T ] , less_eq : |& T , & T | -> bool) {
1925
+ // warning: this wildly uses unsafe.
1926
+ static INSERTION : uint = 8 ;
1927
+
1928
+ let len = v. len ( ) ;
1929
+
1930
+ // allocate some memory to use as scratch memory, we keep the
1931
+ // length 0 so we can keep shallow copies of the contents of `v`
1932
+ // without risking the dtors running on an object twice if
1933
+ // `less_eq` fails.
1934
+ let mut working_space = with_capacity ( 2 * len) ;
1935
+ // these both are buffers of length `len`.
1936
+ let mut buf_dat = working_space. as_mut_ptr ( ) ;
1937
+ let mut buf_tmp = unsafe { buf_dat. offset ( len as int ) } ;
1938
+
1939
+ // length `len`.
1940
+ let buf_v = v. as_ptr ( ) ;
1941
+
1942
+ // step 1. sort short runs with insertion sort. This takes the
1943
+ // values from `v` and sorts them into `buf_dat`, leaving that
1944
+ // with sorted runs of length INSERTION.
1945
+
1946
+ // We could hardcode the sorting comparisons here, and we could
1947
+ // manipulate/step the pointers themselves, rather than repeatedly
1948
+ // .offset-ing.
1949
+ for start in range_step ( 0 , len, INSERTION ) {
1950
+ // start <= i <= len;
1951
+ for i in range ( start, cmp:: min ( start + INSERTION , len) ) {
1952
+ // j satisfies: start <= j <= i;
1953
+ let mut j = i as int ;
1954
+ unsafe {
1955
+ // `i` is in bounds.
1956
+ let read_ptr = buf_v. offset ( i as int ) ;
1957
+
1958
+ // find where to insert, we need to do strict <,
1959
+ // rather than <=, to maintain stability.
1960
+
1961
+ // start <= j - 1 < len, so .offset(j - 1) is in
1962
+ // bounds.
1963
+ while j > start as int && !less_eq ( & * buf_dat. offset ( j - 1 ) , & * read_ptr) {
1964
+ j -= 1 ;
1965
+ }
1966
+
1967
+ // shift everything to the right, to make space to
1968
+ // insert this value.
1969
+
1970
+ // j + 1 could be `len` (for the last `i`), but in
1971
+ // that case, `i == j` so we don't copy. The
1972
+ // `.offset(j)` is always in bounds.
1973
+ ptr:: copy_memory ( buf_dat. offset ( j + 1 ) ,
1974
+ buf_dat. offset ( j) ,
1975
+ i - j as uint ) ;
1976
+ ptr:: copy_nonoverlapping_memory ( buf_dat. offset ( j) , read_ptr, 1 ) ;
1977
+ }
1978
+ }
1979
+ }
1980
+
1981
+ // step 2. merge the sorted runs.
1982
+ let mut width = INSERTION ;
1983
+ while width < len {
1984
+ // merge the sorted runs of length `width` in `buf_dat` two at
1985
+ // a time, placing the result in `buf_tmp`.
1986
+
1987
+ // 0 <= start <= len.
1988
+ for start in range_step ( 0 , len, 2 * width) {
1989
+ // manipulate pointers directly for speed (rather than
1990
+ // using a `for` loop with `range` and `.offset` inside
1991
+ // that loop).
1992
+ unsafe {
1993
+ // the end of the first run & start of the
1994
+ // second. Offset of `len` is defined, since this is
1995
+ // precisely one byte past the end of the object.
1996
+ let right_start = buf_dat. offset ( cmp:: min ( start + width, len) as int ) ;
1997
+ // end of the second. Similar reasoning to the above re safety.
1998
+ let right_end_idx = cmp:: min ( start + 2 * width, len) ;
1999
+ let right_end = buf_dat. offset ( right_end_idx as int ) ;
2000
+
2001
+ // the pointers to the elements under consideration
2002
+ // from the two runs.
2003
+
2004
+ // both of these are in bounds.
2005
+ let mut left = buf_dat. offset ( start as int ) ;
2006
+ let mut right = right_start;
2007
+
2008
+ // where we're putting the results, it is a run of
2009
+ // length `2*width`, so we step it once for each step
2010
+ // of either `left` or `right`. `buf_tmp` has length
2011
+ // `len`, so these are in bounds.
2012
+ let mut out = buf_tmp. offset ( start as int ) ;
2013
+ let out_end = buf_tmp. offset ( right_end_idx as int ) ;
2014
+
2015
+ while out < out_end {
2016
+ // Either the left or the right run are exhausted,
2017
+ // so just copy the remainder from the other run
2018
+ // and move on; this gives a huge speed-up (order
2019
+ // of 25%) for mostly sorted vectors (the best
2020
+ // case).
2021
+ if left == right_start {
2022
+ // the number remaining in this run.
2023
+ let elems = ( right_end as uint - right as uint ) / mem:: size_of :: < T > ( ) ;
2024
+ ptr:: copy_nonoverlapping_memory ( out, right, elems) ;
2025
+ break ;
2026
+ } else if right == right_end {
2027
+ let elems = ( right_start as uint - left as uint ) / mem:: size_of :: < T > ( ) ;
2028
+ ptr:: copy_nonoverlapping_memory ( out, left, elems) ;
2029
+ break ;
2030
+ }
2031
+
2032
+ // check which side is smaller, and that's the
2033
+ // next element for the new run.
2034
+
2035
+ // `left < right_start` and `right < right_end`,
2036
+ // so these are valid.
2037
+ let to_copy = if less_eq ( & * left, & * right) {
2038
+ step ( & mut left)
2039
+ } else {
2040
+ step ( & mut right)
2041
+ } ;
2042
+ ptr:: copy_nonoverlapping_memory ( out, to_copy, 1 ) ;
2043
+ step ( & mut out) ;
2044
+ }
2045
+ }
2046
+ }
2047
+
2048
+ util:: swap ( & mut buf_dat, & mut buf_tmp) ;
2049
+
2050
+ width *= 2 ;
2051
+ }
2052
+
2053
+ // write the result to `v` in one go, so that there are never two copies
2054
+ // of the same object in `v`.
2055
+ unsafe {
2056
+ ptr:: copy_nonoverlapping_memory ( v. as_mut_ptr ( ) , buf_dat, len) ;
2057
+ }
2058
+
2059
+ // increment the pointer, returning the old pointer.
2060
+ #[ inline( always) ]
2061
+ unsafe fn step < T > ( ptr : & mut * mut T ) -> * mut T {
2062
+ let old = * ptr;
2063
+ * ptr = ptr. offset ( 1 ) ;
2064
+ old
2065
+ }
2066
+ }
2067
+
1924
2068
/// Extension methods for vectors such that their elements are
1925
2069
/// mutable.
1926
2070
pub trait MutableVector < ' a , T > {
@@ -2020,6 +2164,25 @@ pub trait MutableVector<'a, T> {
2020
2164
/// Reverse the order of elements in a vector, in place
2021
2165
fn reverse ( self ) ;
2022
2166
2167
+ /// Sort the vector, in place, using `less_eq` to compare `a <=
2168
+ /// b`.
2169
+ ///
2170
+ /// This sort is `O(n log n)` worst-case and stable, but allocates
2171
+ /// approximately `2 * n`, where `n` is the length of `self`.
2172
+ ///
2173
+ /// # Example
2174
+ ///
2175
+ /// ```rust
2176
+ /// let mut v = [5, 4, 1, 3, 2];
2177
+ /// v.sort(|a, b| *a <= *b);
2178
+ /// assert_eq!(v, [1, 2, 3, 4, 5]);
2179
+ ///
2180
+ /// // reverse sorting
2181
+ /// v.sort(|a, b| *b <= *a);
2182
+ /// assert_eq!(v, [5, 4, 3, 2, 1]);
2183
+ /// ```
2184
+ fn sort ( self , less_eq : |& T , & T | -> bool) ;
2185
+
2023
2186
/**
2024
2187
* Consumes `src` and moves as many elements as it can into `self`
2025
2188
* from the range [start,end).
@@ -2164,6 +2327,11 @@ impl<'a,T> MutableVector<'a, T> for &'a mut [T] {
2164
2327
}
2165
2328
}
2166
2329
2330
+ #[ inline]
2331
+ fn sort ( self , less_eq : |& T , & T | -> bool) {
2332
+ merge_sort ( self , less_eq)
2333
+ }
2334
+
2167
2335
#[ inline]
2168
2336
fn move_from ( self , mut src : ~[ T ] , start : uint , end : uint ) -> uint {
2169
2337
for ( a, b) in self . mut_iter ( ) . zip ( src. mut_slice ( start, end) . mut_iter ( ) ) {
@@ -2692,6 +2860,7 @@ mod tests {
2692
2860
use vec:: * ;
2693
2861
use cmp:: * ;
2694
2862
use prelude:: * ;
2863
+ use rand:: { Rng , task_rng} ;
2695
2864
2696
2865
fn square ( n : uint ) -> uint { n * n }
2697
2866
@@ -3298,6 +3467,57 @@ mod tests {
3298
3467
assert ! ( v3. is_empty( ) ) ;
3299
3468
}
3300
3469
3470
+ #[ test]
3471
+ fn test_sort ( ) {
3472
+ for len in range ( 4 u, 25 ) {
3473
+ for _ in range ( 0 , 100 ) {
3474
+ let mut v = task_rng ( ) . gen_vec :: < uint > ( len) ;
3475
+ v. sort ( |a, b| a <= b) ;
3476
+
3477
+ assert ! ( v. windows( 2 ) . all( |w| w[ 0 ] <= w[ 1 ] ) ) ;
3478
+ }
3479
+ }
3480
+
3481
+ // shouldn't fail/crash
3482
+ let mut v: [ uint , .. 0 ] = [ ] ;
3483
+ v. sort ( |a, b| a <= b) ;
3484
+
3485
+ let mut v = [ 0xDEADBEEF ] ;
3486
+ v. sort ( |a, b| a <= b) ;
3487
+ assert_eq ! ( v, [ 0xDEADBEEF ] ) ;
3488
+ }
3489
+
3490
+ #[ test]
3491
+ fn test_sort_stability ( ) {
3492
+ for len in range ( 4 , 25 ) {
3493
+ for _ in range ( 0 , 10 ) {
3494
+ let mut counts = [ 0 , .. 10 ] ;
3495
+
3496
+ // create a vector like [(6, 1), (5, 1), (6, 2), ...],
3497
+ // where the first item of each tuple is random, but
3498
+ // the second item represents which occurrence of that
3499
+ // number this element is, i.e. the second elements
3500
+ // will occur in sorted order.
3501
+ let mut v = range ( 0 , len) . map ( |_| {
3502
+ let n = task_rng ( ) . gen :: < uint > ( ) % 10 ;
3503
+ counts[ n] += 1 ;
3504
+ ( n, counts[ n] )
3505
+ } ) . to_owned_vec ( ) ;
3506
+
3507
+ // only sort on the first element, so an unstable sort
3508
+ // may mix up the counts.
3509
+ v. sort ( |& ( a, _) , & ( b, _) | a <= b) ;
3510
+
3511
+ // this comparison includes the count (the second item
3512
+ // of the tuple), so elements with equal first items
3513
+ // will need to be ordered with increasing
3514
+ // counts... i.e. exactly asserting that this sort is
3515
+ // stable.
3516
+ assert ! ( v. windows( 2 ) . all( |w| w[ 0 ] <= w[ 1 ] ) ) ;
3517
+ }
3518
+ }
3519
+ }
3520
+
3301
3521
#[ test]
3302
3522
fn test_partition ( ) {
3303
3523
assert_eq ! ( ( ~[ ] ) . partition( |x: & int| * x < 3 ) , ( ~[ ] , ~[ ] ) ) ;
@@ -4124,7 +4344,8 @@ mod bench {
4124
4344
use vec:: VectorVector ;
4125
4345
use option:: * ;
4126
4346
use ptr;
4127
- use rand:: { weak_rng, Rng } ;
4347
+ use rand:: { weak_rng, task_rng, Rng } ;
4348
+ use mem;
4128
4349
4129
4350
#[ bench]
4130
4351
fn iterator ( bh : & mut BenchHarness ) {
@@ -4325,4 +4546,42 @@ mod bench {
4325
4546
}
4326
4547
} )
4327
4548
}
4549
+
4550
+ fn sort_random_small ( bh : & mut BenchHarness ) {
4551
+ let mut rng = weak_rng ( ) ;
4552
+ bh. iter ( || {
4553
+ let mut v: ~[ f64 ] = rng. gen_vec ( 5 ) ;
4554
+ v. sort ( |a, b| * a <= * b) ;
4555
+ } ) ;
4556
+ bh. bytes = 5 * mem:: size_of :: < f64 > ( ) as u64 ;
4557
+ }
4558
+
4559
+ #[ bench]
4560
+ fn sort_random_medium ( bh : & mut BenchHarness ) {
4561
+ let mut rng = weak_rng ( ) ;
4562
+ bh. iter ( || {
4563
+ let mut v: ~[ f64 ] = rng. gen_vec ( 100 ) ;
4564
+ v. sort ( |a, b| * a <= * b) ;
4565
+ } ) ;
4566
+ bh. bytes = 100 * mem:: size_of :: < f64 > ( ) as u64 ;
4567
+ }
4568
+
4569
+ #[ bench]
4570
+ fn sort_random_large ( bh : & mut BenchHarness ) {
4571
+ let mut rng = weak_rng ( ) ;
4572
+ bh. iter ( || {
4573
+ let mut v: ~[ f64 ] = rng. gen_vec ( 10000 ) ;
4574
+ v. sort ( |a, b| * a <= * b) ;
4575
+ } ) ;
4576
+ bh. bytes = 10000 * mem:: size_of :: < f64 > ( ) as u64 ;
4577
+ }
4578
+
4579
+ #[ bench]
4580
+ fn sort_sorted ( bh : & mut BenchHarness ) {
4581
+ let mut v = vec:: from_fn ( 10000 , |i| i) ;
4582
+ bh. iter ( || {
4583
+ v. sort ( |a, b| * a <= * b) ;
4584
+ } ) ;
4585
+ bh. bytes = ( v. len ( ) * mem:: size_of_val ( & v[ 0 ] ) ) as u64 ;
4586
+ }
4328
4587
}
0 commit comments