@@ -41,6 +41,72 @@ unsafe fn read_usize_unaligned(x: *const usize) -> usize {
41
41
core:: mem:: transmute ( x_read)
42
42
}
43
43
44
+ /// Loads a `T`-sized chunk from `src` into `dst` at offset `offset`, if that does not exceed
45
+ /// `load_sz`. The offset pointers must both be `T`-aligned. Returns the new offset, advanced by the
46
+ /// chunk size if a load happened.
47
+ #[ cfg( not( feature = "mem-unaligned" ) ) ]
48
+ #[ inline( always) ]
49
+ unsafe fn load_chunk_aligned < T : Copy > (
50
+ src : * const usize ,
51
+ dst : * mut usize ,
52
+ load_sz : usize ,
53
+ offset : usize ,
54
+ ) -> usize {
55
+ let chunk_sz = core:: mem:: size_of :: < T > ( ) ;
56
+ if ( load_sz & chunk_sz) != 0 {
57
+ * dst. wrapping_byte_add ( offset) . cast :: < T > ( ) = * src. wrapping_byte_add ( offset) . cast :: < T > ( ) ;
58
+ offset | chunk_sz
59
+ } else {
60
+ offset
61
+ }
62
+ }
63
+
64
+ /// Load `load_sz` many bytes from `src`, which must be usize-aligned. Acts as if we did a `usize`
65
+ /// read with the out-of-bounds part filled with 0s.
66
+ /// `load_sz` be strictly less than `WORD_SIZE`.
67
+ #[ cfg( not( feature = "mem-unaligned" ) ) ]
68
+ #[ inline( always) ]
69
+ unsafe fn load_aligned_partial ( src : * const usize , load_sz : usize ) -> usize {
70
+ debug_assert ! ( load_sz < WORD_SIZE ) ;
71
+ // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
72
+ // (since `load_sz < WORD_SIZE`).
73
+ const { assert ! ( WORD_SIZE <= 8 ) } ;
74
+
75
+ let mut i = 0 ;
76
+ let mut out = 0usize ;
77
+ // We load in decreasing order, so the pointers remain sufficiently aligned for the next step.
78
+ i = load_chunk_aligned :: < u32 > ( src, & raw mut out, load_sz, i) ;
79
+ i = load_chunk_aligned :: < u16 > ( src, & raw mut out, load_sz, i) ;
80
+ i = load_chunk_aligned :: < u8 > ( src, & raw mut out, load_sz, i) ;
81
+ debug_assert ! ( i == load_sz) ;
82
+ out
83
+ }
84
+
85
+ /// Load `load_sz` many bytes from `src.wrapping_byte_add(WORD_SIZE - load_sz)`. `src` must be
86
+ /// `usize`-aligned. The bytes are returned as the *last* bytes of the return value, i.e., this acts
87
+ /// as if we had done a `usize` read from `src`, with the out-of-bounds part filled with 0s.
88
+ /// `load_sz` be strictly less than `WORD_SIZE`.
89
+ #[ cfg( not( feature = "mem-unaligned" ) ) ]
90
+ #[ inline( always) ]
91
+ unsafe fn load_aligned_end_partial ( src : * const usize , load_sz : usize ) -> usize {
92
+ debug_assert ! ( load_sz < WORD_SIZE ) ;
93
+ // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
94
+ // (since `load_sz < WORD_SIZE`).
95
+ const { assert ! ( WORD_SIZE <= 8 ) } ;
96
+
97
+ let mut i = 0 ;
98
+ let mut out = 0usize ;
99
+ // Obtain pointers pointing to the beginning of the range we want to load.
100
+ let src_shifted = src. wrapping_byte_add ( WORD_SIZE - load_sz) ;
101
+ let out_shifted = ( & raw mut out) . wrapping_byte_add ( WORD_SIZE - load_sz) ;
102
+ // We load in increasing order, so by the time we reach `u16` things are 2-aligned etc.
103
+ i = load_chunk_aligned :: < u8 > ( src_shifted, out_shifted, load_sz, i) ;
104
+ i = load_chunk_aligned :: < u16 > ( src_shifted, out_shifted, load_sz, i) ;
105
+ i = load_chunk_aligned :: < u32 > ( src_shifted, out_shifted, load_sz, i) ;
106
+ debug_assert ! ( i == load_sz) ;
107
+ out
108
+ }
109
+
44
110
#[ inline( always) ]
45
111
pub unsafe fn copy_forward ( mut dest : * mut u8 , mut src : * const u8 , mut n : usize ) {
46
112
#[ inline( always) ]
@@ -66,40 +132,57 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize)
66
132
}
67
133
}
68
134
135
+ /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
136
+ /// `src` *must not* be `usize`-aligned.
69
137
#[ cfg( not( feature = "mem-unaligned" ) ) ]
70
138
#[ inline( always) ]
71
139
unsafe fn copy_forward_misaligned_words ( dest : * mut u8 , src : * const u8 , n : usize ) {
140
+ debug_assert ! ( n > 0 && n % WORD_SIZE == 0 ) ;
141
+ debug_assert ! ( src. addr( ) % WORD_SIZE != 0 ) ;
142
+
72
143
let mut dest_usize = dest as * mut usize ;
73
144
let dest_end = dest. wrapping_add ( n) as * mut usize ;
74
145
75
146
// Calculate the misalignment offset and shift needed to reassemble value.
147
+ // Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE.
76
148
let offset = src as usize & WORD_MASK ;
77
149
let shift = offset * 8 ;
78
150
79
151
// Realign src
80
- let mut src_aligned = ( src as usize & !WORD_MASK ) as * mut usize ;
81
- // This will read (but won't use) bytes out of bound.
82
- // cfg needed because not all targets will have atomic loads that can be lowered
83
- // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I)
84
- #[ cfg( target_has_atomic_load_store = "ptr" ) ]
85
- let mut prev_word = core:: intrinsics:: atomic_load_unordered ( src_aligned) ;
86
- #[ cfg( not( target_has_atomic_load_store = "ptr" ) ) ]
87
- let mut prev_word = core:: ptr:: read_volatile ( src_aligned) ;
152
+ let mut src_aligned = src. wrapping_byte_sub ( offset) as * mut usize ;
153
+ let mut prev_word = load_aligned_end_partial ( src_aligned, WORD_SIZE - offset) ;
88
154
89
- while dest_usize < dest_end {
155
+ while dest_usize. wrapping_add ( 1 ) < dest_end {
90
156
src_aligned = src_aligned. wrapping_add ( 1 ) ;
91
157
let cur_word = * src_aligned;
92
- #[ cfg( target_endian = "little" ) ]
93
- let resembled = prev_word >> shift | cur_word << ( WORD_SIZE * 8 - shift) ;
94
- #[ cfg( target_endian = "big" ) ]
95
- let resembled = prev_word << shift | cur_word >> ( WORD_SIZE * 8 - shift) ;
158
+ let reassembled = if cfg ! ( target_endian = "little" ) {
159
+ prev_word >> shift | cur_word << ( WORD_SIZE * 8 - shift)
160
+ } else {
161
+ prev_word << shift | cur_word >> ( WORD_SIZE * 8 - shift)
162
+ } ;
96
163
prev_word = cur_word;
97
164
98
- * dest_usize = resembled ;
165
+ * dest_usize = reassembled ;
99
166
dest_usize = dest_usize. wrapping_add ( 1 ) ;
100
167
}
168
+
169
+ // There's one more element left to go, and we can't use the loop for that as on the `src` side,
170
+ // it is partially out-of-bounds.
171
+ src_aligned = src_aligned. wrapping_add ( 1 ) ;
172
+ let cur_word = load_aligned_partial ( src_aligned, offset) ;
173
+ let reassembled = if cfg ! ( target_endian = "little" ) {
174
+ prev_word >> shift | cur_word << ( WORD_SIZE * 8 - shift)
175
+ } else {
176
+ prev_word << shift | cur_word >> ( WORD_SIZE * 8 - shift)
177
+ } ;
178
+ // prev_word does not matter any more
179
+
180
+ * dest_usize = reassembled;
181
+ // dest_usize does not matter any more
101
182
}
102
183
184
+ /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
185
+ /// `src` *must not* be `usize`-aligned.
103
186
#[ cfg( feature = "mem-unaligned" ) ]
104
187
#[ inline( always) ]
105
188
unsafe fn copy_forward_misaligned_words ( dest : * mut u8 , src : * const u8 , n : usize ) {
@@ -164,40 +247,57 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) {
164
247
}
165
248
}
166
249
250
+ /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
251
+ /// `src` *must not* be `usize`-aligned.
167
252
#[ cfg( not( feature = "mem-unaligned" ) ) ]
168
253
#[ inline( always) ]
169
254
unsafe fn copy_backward_misaligned_words ( dest : * mut u8 , src : * const u8 , n : usize ) {
255
+ debug_assert ! ( n > 0 && n % WORD_SIZE == 0 ) ;
256
+ debug_assert ! ( src. addr( ) % WORD_SIZE != 0 ) ;
257
+
170
258
let mut dest_usize = dest as * mut usize ;
171
- let dest_start = dest. wrapping_sub ( n) as * mut usize ;
259
+ let dest_start = dest. wrapping_sub ( n) as * mut usize ; // we're moving towards the start
172
260
173
261
// Calculate the misalignment offset and shift needed to reassemble value.
262
+ // Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE.
174
263
let offset = src as usize & WORD_MASK ;
175
264
let shift = offset * 8 ;
176
265
177
- // Realign src_aligned
178
- let mut src_aligned = ( src as usize & !WORD_MASK ) as * mut usize ;
179
- // This will read (but won't use) bytes out of bound.
180
- // cfg needed because not all targets will have atomic loads that can be lowered
181
- // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I)
182
- #[ cfg( target_has_atomic_load_store = "ptr" ) ]
183
- let mut prev_word = core:: intrinsics:: atomic_load_unordered ( src_aligned) ;
184
- #[ cfg( not( target_has_atomic_load_store = "ptr" ) ) ]
185
- let mut prev_word = core:: ptr:: read_volatile ( src_aligned) ;
266
+ // Realign src
267
+ let mut src_aligned = src. wrapping_byte_sub ( offset) as * mut usize ;
268
+ let mut prev_word = load_aligned_partial ( src_aligned, offset) ;
186
269
187
- while dest_start < dest_usize {
270
+ while dest_start. wrapping_add ( 1 ) < dest_usize {
188
271
src_aligned = src_aligned. wrapping_sub ( 1 ) ;
189
272
let cur_word = * src_aligned;
190
- #[ cfg( target_endian = "little" ) ]
191
- let resembled = prev_word << ( WORD_SIZE * 8 - shift) | cur_word >> shift;
192
- #[ cfg( target_endian = "big" ) ]
193
- let resembled = prev_word >> ( WORD_SIZE * 8 - shift) | cur_word << shift;
273
+ let reassembled = if cfg ! ( target_endian = "little" ) {
274
+ prev_word << ( WORD_SIZE * 8 - shift) | cur_word >> shift
275
+ } else {
276
+ prev_word >> ( WORD_SIZE * 8 - shift) | cur_word << shift
277
+ } ;
194
278
prev_word = cur_word;
195
279
196
280
dest_usize = dest_usize. wrapping_sub ( 1 ) ;
197
- * dest_usize = resembled ;
281
+ * dest_usize = reassembled ;
198
282
}
283
+
284
+ // There's one more element left to go, and we can't use the loop for that as on the `src` side,
285
+ // it is partially out-of-bounds.
286
+ src_aligned = src_aligned. wrapping_sub ( 1 ) ;
287
+ let cur_word = load_aligned_end_partial ( src_aligned, WORD_SIZE - offset) ;
288
+ let reassembled = if cfg ! ( target_endian = "little" ) {
289
+ prev_word << ( WORD_SIZE * 8 - shift) | cur_word >> shift
290
+ } else {
291
+ prev_word >> ( WORD_SIZE * 8 - shift) | cur_word << shift
292
+ } ;
293
+ // prev_word does not matter any more
294
+
295
+ dest_usize = dest_usize. wrapping_sub ( 1 ) ;
296
+ * dest_usize = reassembled;
199
297
}
200
298
299
+ /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
300
+ /// `src` *must not* be `usize`-aligned.
201
301
#[ cfg( feature = "mem-unaligned" ) ]
202
302
#[ inline( always) ]
203
303
unsafe fn copy_backward_misaligned_words ( dest : * mut u8 , src : * const u8 , n : usize ) {
0 commit comments