@@ -19,10 +19,12 @@ use core::cmp::Ordering::{self, Less};
19
19
use core:: mem:: { self , SizedTypeProperties } ;
20
20
#[ cfg( not( no_global_oom_handling) ) ]
21
21
use core:: ptr;
22
+ #[ cfg( not( no_global_oom_handling) ) ]
23
+ use core:: slice:: sort;
22
24
23
25
use crate :: alloc:: Allocator ;
24
26
#[ cfg( not( no_global_oom_handling) ) ]
25
- use crate :: alloc:: Global ;
27
+ use crate :: alloc:: { self , Global } ;
26
28
#[ cfg( not( no_global_oom_handling) ) ]
27
29
use crate :: borrow:: ToOwned ;
28
30
use crate :: boxed:: Box ;
@@ -206,7 +208,7 @@ impl<T> [T] {
206
208
where
207
209
T : Ord ,
208
210
{
209
- merge_sort ( self , T :: lt) ;
211
+ stable_sort ( self , T :: lt) ;
210
212
}
211
213
212
214
/// Sorts the slice with a comparator function.
@@ -262,7 +264,7 @@ impl<T> [T] {
262
264
where
263
265
F : FnMut ( & T , & T ) -> Ordering ,
264
266
{
265
- merge_sort ( self , |a, b| compare ( a, b) == Less ) ;
267
+ stable_sort ( self , |a, b| compare ( a, b) == Less ) ;
266
268
}
267
269
268
270
/// Sorts the slice with a key extraction function.
@@ -305,7 +307,7 @@ impl<T> [T] {
305
307
F : FnMut ( & T ) -> K ,
306
308
K : Ord ,
307
309
{
308
- merge_sort ( self , |a, b| f ( a) . lt ( & f ( b) ) ) ;
310
+ stable_sort ( self , |a, b| f ( a) . lt ( & f ( b) ) ) ;
309
311
}
310
312
311
313
/// Sorts the slice with a key extraction function.
@@ -812,324 +814,52 @@ impl<T: Clone> ToOwned for [T] {
812
814
// Sorting
813
815
////////////////////////////////////////////////////////////////////////////////
814
816
815
- /// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted.
816
- ///
817
- /// This is the integral subroutine of insertion sort.
818
- #[ cfg( not( no_global_oom_handling) ) ]
819
- fn insert_head < T , F > ( v : & mut [ T ] , is_less : & mut F )
820
- where
821
- F : FnMut ( & T , & T ) -> bool ,
822
- {
823
- if v. len ( ) >= 2 && is_less ( & v[ 1 ] , & v[ 0 ] ) {
824
- unsafe {
825
- // There are three ways to implement insertion here:
826
- //
827
- // 1. Swap adjacent elements until the first one gets to its final destination.
828
- // However, this way we copy data around more than is necessary. If elements are big
829
- // structures (costly to copy), this method will be slow.
830
- //
831
- // 2. Iterate until the right place for the first element is found. Then shift the
832
- // elements succeeding it to make room for it and finally place it into the
833
- // remaining hole. This is a good method.
834
- //
835
- // 3. Copy the first element into a temporary variable. Iterate until the right place
836
- // for it is found. As we go along, copy every traversed element into the slot
837
- // preceding it. Finally, copy data from the temporary variable into the remaining
838
- // hole. This method is very good. Benchmarks demonstrated slightly better
839
- // performance than with the 2nd method.
840
- //
841
- // All methods were benchmarked, and the 3rd showed best results. So we chose that one.
842
- let tmp = mem:: ManuallyDrop :: new ( ptr:: read ( & v[ 0 ] ) ) ;
843
-
844
- // Intermediate state of the insertion process is always tracked by `hole`, which
845
- // serves two purposes:
846
- // 1. Protects integrity of `v` from panics in `is_less`.
847
- // 2. Fills the remaining hole in `v` in the end.
848
- //
849
- // Panic safety:
850
- //
851
- // If `is_less` panics at any point during the process, `hole` will get dropped and
852
- // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
853
- // initially held exactly once.
854
- let mut hole = InsertionHole { src : & * tmp, dest : & mut v[ 1 ] } ;
855
- ptr:: copy_nonoverlapping ( & v[ 1 ] , & mut v[ 0 ] , 1 ) ;
856
-
857
- for i in 2 ..v. len ( ) {
858
- if !is_less ( & v[ i] , & * tmp) {
859
- break ;
860
- }
861
- ptr:: copy_nonoverlapping ( & v[ i] , & mut v[ i - 1 ] , 1 ) ;
862
- hole. dest = & mut v[ i] ;
863
- }
864
- // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
865
- }
866
- }
867
-
868
- // When dropped, copies from `src` into `dest`.
869
- struct InsertionHole < T > {
870
- src : * const T ,
871
- dest : * mut T ,
872
- }
873
-
874
- impl < T > Drop for InsertionHole < T > {
875
- fn drop ( & mut self ) {
876
- unsafe {
877
- ptr:: copy_nonoverlapping ( self . src , self . dest , 1 ) ;
878
- }
879
- }
880
- }
881
- }
882
-
883
- /// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and
884
- /// stores the result into `v[..]`.
885
- ///
886
- /// # Safety
887
- ///
888
- /// The two slices must be non-empty and `mid` must be in bounds. Buffer `buf` must be long enough
889
- /// to hold a copy of the shorter slice. Also, `T` must not be a zero-sized type.
890
- #[ cfg( not( no_global_oom_handling) ) ]
891
- unsafe fn merge < T , F > ( v : & mut [ T ] , mid : usize , buf : * mut T , is_less : & mut F )
892
- where
893
- F : FnMut ( & T , & T ) -> bool ,
894
- {
895
- let len = v. len ( ) ;
896
- let v = v. as_mut_ptr ( ) ;
897
- let ( v_mid, v_end) = unsafe { ( v. add ( mid) , v. add ( len) ) } ;
898
-
899
- // The merge process first copies the shorter run into `buf`. Then it traces the newly copied
900
- // run and the longer run forwards (or backwards), comparing their next unconsumed elements and
901
- // copying the lesser (or greater) one into `v`.
902
- //
903
- // As soon as the shorter run is fully consumed, the process is done. If the longer run gets
904
- // consumed first, then we must copy whatever is left of the shorter run into the remaining
905
- // hole in `v`.
906
- //
907
- // Intermediate state of the process is always tracked by `hole`, which serves two purposes:
908
- // 1. Protects integrity of `v` from panics in `is_less`.
909
- // 2. Fills the remaining hole in `v` if the longer run gets consumed first.
910
- //
911
- // Panic safety:
912
- //
913
- // If `is_less` panics at any point during the process, `hole` will get dropped and fill the
914
- // hole in `v` with the unconsumed range in `buf`, thus ensuring that `v` still holds every
915
- // object it initially held exactly once.
916
- let mut hole;
917
-
918
- if mid <= len - mid {
919
- // The left run is shorter.
920
- unsafe {
921
- ptr:: copy_nonoverlapping ( v, buf, mid) ;
922
- hole = MergeHole { start : buf, end : buf. add ( mid) , dest : v } ;
923
- }
924
-
925
- // Initially, these pointers point to the beginnings of their arrays.
926
- let left = & mut hole. start ;
927
- let mut right = v_mid;
928
- let out = & mut hole. dest ;
929
-
930
- while * left < hole. end && right < v_end {
931
- // Consume the lesser side.
932
- // If equal, prefer the left run to maintain stability.
933
- unsafe {
934
- let to_copy = if is_less ( & * right, & * * left) {
935
- get_and_increment ( & mut right)
936
- } else {
937
- get_and_increment ( left)
938
- } ;
939
- ptr:: copy_nonoverlapping ( to_copy, get_and_increment ( out) , 1 ) ;
940
- }
941
- }
942
- } else {
943
- // The right run is shorter.
944
- unsafe {
945
- ptr:: copy_nonoverlapping ( v_mid, buf, len - mid) ;
946
- hole = MergeHole { start : buf, end : buf. add ( len - mid) , dest : v_mid } ;
947
- }
948
-
949
- // Initially, these pointers point past the ends of their arrays.
950
- let left = & mut hole. dest ;
951
- let right = & mut hole. end ;
952
- let mut out = v_end;
953
-
954
- while v < * left && buf < * right {
955
- // Consume the greater side.
956
- // If equal, prefer the right run to maintain stability.
957
- unsafe {
958
- let to_copy = if is_less ( & * right. sub ( 1 ) , & * left. sub ( 1 ) ) {
959
- decrement_and_get ( left)
960
- } else {
961
- decrement_and_get ( right)
962
- } ;
963
- ptr:: copy_nonoverlapping ( to_copy, decrement_and_get ( & mut out) , 1 ) ;
964
- }
965
- }
966
- }
967
- // Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of
968
- // it will now be copied into the hole in `v`.
969
-
970
- unsafe fn get_and_increment < T > ( ptr : & mut * mut T ) -> * mut T {
971
- let old = * ptr;
972
- * ptr = unsafe { ptr. add ( 1 ) } ;
973
- old
974
- }
975
-
976
- unsafe fn decrement_and_get < T > ( ptr : & mut * mut T ) -> * mut T {
977
- * ptr = unsafe { ptr. sub ( 1 ) } ;
978
- * ptr
979
- }
980
-
981
- // When dropped, copies the range `start..end` into `dest..`.
982
- struct MergeHole < T > {
983
- start : * mut T ,
984
- end : * mut T ,
985
- dest : * mut T ,
986
- }
987
-
988
- impl < T > Drop for MergeHole < T > {
989
- fn drop ( & mut self ) {
990
- // `T` is not a zero-sized type, and these are pointers into a slice's elements.
991
- unsafe {
992
- let len = self . end . sub_ptr ( self . start ) ;
993
- ptr:: copy_nonoverlapping ( self . start , self . dest , len) ;
994
- }
995
- }
996
- }
997
- }
998
-
999
- /// This merge sort borrows some (but not all) ideas from TimSort, which is described in detail
1000
- /// [here](https://github.com/python/cpython/blob/main/Objects/listsort.txt).
1001
- ///
1002
- /// The algorithm identifies strictly descending and non-descending subsequences, which are called
1003
- /// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed
1004
- /// onto the stack, and then some pairs of adjacent runs are merged until these two invariants are
1005
- /// satisfied:
1006
- ///
1007
- /// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len`
1008
- /// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len`
1009
- ///
1010
- /// The invariants ensure that the total running time is *O*(*n* \* log(*n*)) worst-case.
817
+ #[ inline]
1011
818
#[ cfg( not( no_global_oom_handling) ) ]
1012
- fn merge_sort < T , F > ( v : & mut [ T ] , mut is_less : F )
819
+ fn stable_sort < T , F > ( v : & mut [ T ] , mut is_less : F )
1013
820
where
1014
821
F : FnMut ( & T , & T ) -> bool ,
1015
822
{
1016
- // Slices of up to this length get sorted using insertion sort.
1017
- const MAX_INSERTION : usize = 20 ;
1018
- // Very short runs are extended using insertion sort to span at least this many elements.
1019
- const MIN_RUN : usize = 10 ;
1020
-
1021
- // Sorting has no meaningful behavior on zero-sized types.
1022
823
if T :: IS_ZST {
824
+ // Sorting has no meaningful behavior on zero-sized types. Do nothing.
1023
825
return ;
1024
826
}
1025
827
1026
- let len = v. len ( ) ;
1027
-
1028
- // Short arrays get sorted in-place via insertion sort to avoid allocations.
1029
- if len <= MAX_INSERTION {
1030
- if len >= 2 {
1031
- for i in ( 0 ..len - 1 ) . rev ( ) {
1032
- insert_head ( & mut v[ i..] , & mut is_less) ;
1033
- }
1034
- }
1035
- return ;
1036
- }
1037
-
1038
- // Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it
1039
- // shallow copies of the contents of `v` without risking the dtors running on copies if
1040
- // `is_less` panics. When merging two sorted runs, this buffer holds a copy of the shorter run,
1041
- // which will always have length at most `len / 2`.
1042
- let mut buf = Vec :: with_capacity ( len / 2 ) ;
828
+ let elem_alloc_fn = |len : usize | -> * mut T {
829
+ // SAFETY: Creating the layout is safe as long as merge_sort never calls this with len >
830
+ // v.len(). Alloc in general will only be used as 'shadow-region' to store temporary swap
831
+ // elements.
832
+ unsafe { alloc:: alloc ( alloc:: Layout :: array :: < T > ( len) . unwrap_unchecked ( ) ) as * mut T }
833
+ } ;
1043
834
1044
- // In order to identify natural runs in `v`, we traverse it backwards. That might seem like a
1045
- // strange decision, but consider the fact that merges more often go in the opposite direction
1046
- // (forwards). According to benchmarks, merging forwards is slightly faster than merging
1047
- // backwards. To conclude, identifying runs by traversing backwards improves performance.
1048
- let mut runs = vec ! [ ] ;
1049
- let mut end = len;
1050
- while end > 0 {
1051
- // Find the next natural run, and reverse it if it's strictly descending.
1052
- let mut start = end - 1 ;
1053
- if start > 0 {
1054
- start -= 1 ;
1055
- unsafe {
1056
- if is_less ( v. get_unchecked ( start + 1 ) , v. get_unchecked ( start) ) {
1057
- while start > 0 && is_less ( v. get_unchecked ( start) , v. get_unchecked ( start - 1 ) ) {
1058
- start -= 1 ;
1059
- }
1060
- v[ start..end] . reverse ( ) ;
1061
- } else {
1062
- while start > 0 && !is_less ( v. get_unchecked ( start) , v. get_unchecked ( start - 1 ) )
1063
- {
1064
- start -= 1 ;
1065
- }
1066
- }
1067
- }
1068
- }
1069
-
1070
- // Insert some more elements into the run if it's too short. Insertion sort is faster than
1071
- // merge sort on short sequences, so this significantly improves performance.
1072
- while start > 0 && end - start < MIN_RUN {
1073
- start -= 1 ;
1074
- insert_head ( & mut v[ start..end] , & mut is_less) ;
835
+ let elem_dealloc_fn = |buf_ptr : * mut T , len : usize | {
836
+ // SAFETY: Creating the layout is safe as long as merge_sort never calls this with len >
837
+ // v.len(). The caller must ensure that buf_ptr was created by elem_alloc_fn with the same
838
+ // len.
839
+ unsafe {
840
+ alloc:: dealloc ( buf_ptr as * mut u8 , alloc:: Layout :: array :: < T > ( len) . unwrap_unchecked ( ) ) ;
1075
841
}
842
+ } ;
1076
843
1077
- // Push this run onto the stack.
1078
- runs. push ( Run { start, len : end - start } ) ;
1079
- end = start;
1080
-
1081
- // Merge some pairs of adjacent runs to satisfy the invariants.
1082
- while let Some ( r) = collapse ( & runs) {
1083
- let left = runs[ r + 1 ] ;
1084
- let right = runs[ r] ;
1085
- unsafe {
1086
- merge (
1087
- & mut v[ left. start ..right. start + right. len ] ,
1088
- left. len ,
1089
- buf. as_mut_ptr ( ) ,
1090
- & mut is_less,
1091
- ) ;
1092
- }
1093
- runs[ r] = Run { start : left. start , len : left. len + right. len } ;
1094
- runs. remove ( r + 1 ) ;
844
+ let run_alloc_fn = |len : usize | -> * mut sort:: TimSortRun {
845
+ // SAFETY: Creating the layout is safe as long as merge_sort never calls this with an
846
+ // obscene length or 0.
847
+ unsafe {
848
+ alloc:: alloc ( alloc:: Layout :: array :: < sort:: TimSortRun > ( len) . unwrap_unchecked ( ) )
849
+ as * mut sort:: TimSortRun
1095
850
}
1096
- }
1097
-
1098
- // Finally, exactly one run must remain in the stack.
1099
- debug_assert ! ( runs. len( ) == 1 && runs[ 0 ] . start == 0 && runs[ 0 ] . len == len) ;
851
+ } ;
1100
852
1101
- // Examines the stack of runs and identifies the next pair of runs to merge. More specifically,
1102
- // if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the
1103
- // algorithm should continue building a new run instead, `None` is returned.
1104
- //
1105
- // TimSort is infamous for its buggy implementations, as described here:
1106
- // http://envisage-project.eu/timsort-specification-and-verification/
1107
- //
1108
- // The gist of the story is: we must enforce the invariants on the top four runs on the stack.
1109
- // Enforcing them on just top three is not sufficient to ensure that the invariants will still
1110
- // hold for *all* runs in the stack.
1111
- //
1112
- // This function correctly checks invariants for the top four runs. Additionally, if the top
1113
- // run starts at index 0, it will always demand a merge operation until the stack is fully
1114
- // collapsed, in order to complete the sort.
1115
- #[ inline]
1116
- fn collapse ( runs : & [ Run ] ) -> Option < usize > {
1117
- let n = runs. len ( ) ;
1118
- if n >= 2
1119
- && ( runs[ n - 1 ] . start == 0
1120
- || runs[ n - 2 ] . len <= runs[ n - 1 ] . len
1121
- || ( n >= 3 && runs[ n - 3 ] . len <= runs[ n - 2 ] . len + runs[ n - 1 ] . len )
1122
- || ( n >= 4 && runs[ n - 4 ] . len <= runs[ n - 3 ] . len + runs[ n - 2 ] . len ) )
1123
- {
1124
- if n >= 3 && runs[ n - 3 ] . len < runs[ n - 1 ] . len { Some ( n - 3 ) } else { Some ( n - 2 ) }
1125
- } else {
1126
- None
853
+ let run_dealloc_fn = |buf_ptr : * mut sort:: TimSortRun , len : usize | {
854
+ // SAFETY: The caller must ensure that buf_ptr was created by elem_alloc_fn with the same
855
+ // len.
856
+ unsafe {
857
+ alloc:: dealloc (
858
+ buf_ptr as * mut u8 ,
859
+ alloc:: Layout :: array :: < sort:: TimSortRun > ( len) . unwrap_unchecked ( ) ,
860
+ ) ;
1127
861
}
1128
- }
862
+ } ;
1129
863
1130
- #[ derive( Clone , Copy ) ]
1131
- struct Run {
1132
- start : usize ,
1133
- len : usize ,
1134
- }
864
+ sort:: merge_sort ( v, & mut is_less, elem_alloc_fn, elem_dealloc_fn, run_alloc_fn, run_dealloc_fn) ;
1135
865
}
0 commit comments