@@ -117,6 +117,90 @@ pub unsafe fn swap<T>(x: *mut T, y: *mut T) {
117
117
mem:: forget ( tmp) ;
118
118
}
119
119
120
+ /// Swaps a sequence of values at two mutable locations of the same type.
121
+ ///
122
+ /// # Safety
123
+ ///
124
+ /// The two arguments must each point to the beginning of `count` locations
125
+ /// of valid memory, and the two memory ranges must not overlap.
126
+ ///
127
+ /// # Examples
128
+ ///
129
+ /// Basic usage:
130
+ ///
131
+ /// ```
132
+ /// #![feature(swap_nonoverlapping)]
133
+ ///
134
+ /// use std::ptr;
135
+ ///
136
+ /// let mut x = [1, 2, 3, 4];
137
+ /// let mut y = [7, 8, 9];
138
+ ///
139
+ /// unsafe {
140
+ /// ptr::swap_nonoverlapping(x.as_mut_ptr(), y.as_mut_ptr(), 2);
141
+ /// }
142
+ ///
143
+ /// assert_eq!(x, [7, 8, 3, 4]);
144
+ /// assert_eq!(y, [1, 2, 9]);
145
+ /// ```
146
+ #[ inline]
147
+ #[ unstable( feature = "swap_nonoverlapping" , issue = "42818" ) ]
148
+ pub unsafe fn swap_nonoverlapping < T > ( x : * mut T , y : * mut T , count : usize ) {
149
+ let x = x as * mut u8 ;
150
+ let y = y as * mut u8 ;
151
+ let len = mem:: size_of :: < T > ( ) * count;
152
+ swap_nonoverlapping_bytes ( x, y, len)
153
+ }
154
+
155
+ #[ inline]
156
+ unsafe fn swap_nonoverlapping_bytes ( x : * mut u8 , y : * mut u8 , len : usize ) {
157
+ // The approach here is to utilize simd to swap x & y efficiently. Testing reveals
158
+ // that swapping either 32 bytes or 64 bytes at a time is most efficient for intel
159
+ // Haswell E processors. LLVM is more able to optimize if we give a struct a
160
+ // #[repr(simd)], even if we don't actually use this struct directly.
161
+ //
162
+ // FIXME repr(simd) broken on emscripten and redox
163
+ #[ cfg_attr( not( any( target_os = "emscripten" , target_os = "redox" ) ) , repr( simd) ) ]
164
+ struct Block ( u64 , u64 , u64 , u64 ) ;
165
+ struct UnalignedBlock ( u64 , u64 , u64 , u64 ) ;
166
+
167
+ let block_size = mem:: size_of :: < Block > ( ) ;
168
+
169
+ // Loop through x & y, copying them `Block` at a time
170
+ // The optimizer should unroll the loop fully for most types
171
+ // N.B. We can't use a for loop as the `range` impl calls `mem::swap` recursively
172
+ let mut i = 0 ;
173
+ while i + block_size <= len {
174
+ // Create some uninitialized memory as scratch space
175
+ // Declaring `t` here avoids aligning the stack when this loop is unused
176
+ let mut t: Block = mem:: uninitialized ( ) ;
177
+ let t = & mut t as * mut _ as * mut u8 ;
178
+ let x = x. offset ( i as isize ) ;
179
+ let y = y. offset ( i as isize ) ;
180
+
181
+ // Swap a block of bytes of x & y, using t as a temporary buffer
182
+ // This should be optimized into efficient SIMD operations where available
183
+ copy_nonoverlapping ( x, t, block_size) ;
184
+ copy_nonoverlapping ( y, x, block_size) ;
185
+ copy_nonoverlapping ( t, y, block_size) ;
186
+ i += block_size;
187
+ }
188
+
189
+ if i < len {
190
+ // Swap any remaining bytes
191
+ let mut t: UnalignedBlock = mem:: uninitialized ( ) ;
192
+ let rem = len - i;
193
+
194
+ let t = & mut t as * mut _ as * mut u8 ;
195
+ let x = x. offset ( i as isize ) ;
196
+ let y = y. offset ( i as isize ) ;
197
+
198
+ copy_nonoverlapping ( x, t, rem) ;
199
+ copy_nonoverlapping ( y, x, rem) ;
200
+ copy_nonoverlapping ( t, y, rem) ;
201
+ }
202
+ }
203
+
120
204
/// Replaces the value at `dest` with `src`, returning the old
121
205
/// value, without dropping either.
122
206
///
0 commit comments