Skip to content

Commit cfa9969

Browse files
committed
fill_via_chunks: on BE, swap then copy
1 parent 9684ebf commit cfa9969

File tree

2 files changed

+23
-23
lines changed

2 files changed

+23
-23
lines changed

rand_core/src/block.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ where
226226
self.generate_and_set(0);
227227
}
228228
let (consumed_u32, filled_u8) =
229-
fill_via_u32_chunks(&self.results.as_ref()[self.index..], &mut dest[read_len..]);
229+
fill_via_u32_chunks(&mut self.results.as_mut()[self.index..], &mut dest[read_len..]);
230230

231231
self.index += consumed_u32;
232232
read_len += filled_u8;
@@ -399,7 +399,7 @@ where
399399
}
400400

401401
let (consumed_u64, filled_u8) = fill_via_u64_chunks(
402-
&self.results.as_ref()[self.index as usize..],
402+
&mut self.results.as_mut()[self.index as usize..],
403403
&mut dest[read_len..],
404404
);
405405

rand_core/src/impls.rs

+21-21
Original file line numberDiff line numberDiff line change
@@ -58,28 +58,21 @@ macro_rules! fill_via_chunks {
5858
let chunk_size_u8 = min($src.len() * SIZE, $dst.len());
5959
let chunk_size = (chunk_size_u8 + SIZE - 1) / SIZE;
6060

61-
if cfg!(target_endian = "little") {
62-
// On LE we can do a simple copy, which is 25-50% faster:
63-
unsafe {
64-
core::ptr::copy_nonoverlapping(
65-
$src.as_ptr() as *const u8,
66-
$dst.as_mut_ptr(),
67-
chunk_size_u8);
68-
}
69-
} else {
70-
// This code is valid on all arches, but slower than the above:
71-
let mut i = 0;
72-
let mut iter = $dst[..chunk_size_u8].chunks_exact_mut(SIZE);
73-
while let Some(chunk) = iter.next() {
74-
chunk.copy_from_slice(&$src[i].to_le_bytes());
75-
i += 1;
76-
}
77-
let chunk = iter.into_remainder();
78-
if !chunk.is_empty() {
79-
chunk.copy_from_slice(&$src[i].to_le_bytes()[..chunk.len()]);
61+
// Byte-swap for portability of results:
62+
if cfg!(target_endian = "big") {
63+
for x in &mut $src[..chunk_size] {
64+
*x = x.to_le();
8065
}
8166
}
8267

68+
// We do a simple copy, which is 25-50% faster:
69+
unsafe {
70+
core::ptr::copy_nonoverlapping(
71+
$src.as_ptr() as *const u8,
72+
$dst.as_mut_ptr(),
73+
chunk_size_u8);
74+
}
75+
8376
(chunk_size, chunk_size_u8)
8477
}};
8578
}
@@ -89,6 +82,9 @@ macro_rules! fill_via_chunks {
8982
///
9083
/// The return values are `(consumed_u32, filled_u8)`.
9184
///
85+
/// On big-endian systems, endianness of `src[..consumed_u32]` values is
86+
/// swapped. No other adjustments to `src` are made.
87+
///
9288
/// `filled_u8` is the number of filled bytes in `dest`, which may be less than
9389
/// the length of `dest`.
9490
/// `consumed_u32` is the number of words consumed from `src`, which is the same
@@ -114,21 +110,25 @@ macro_rules! fill_via_chunks {
114110
/// }
115111
/// }
116112
/// ```
117-
pub fn fill_via_u32_chunks(src: &[u32], dest: &mut [u8]) -> (usize, usize) {
113+
pub fn fill_via_u32_chunks(src: &mut [u32], dest: &mut [u8]) -> (usize, usize) {
118114
fill_via_chunks!(src, dest, u32)
119115
}
120116

121117
/// Implement `fill_bytes` by reading chunks from the output buffer of a block
122118
/// based RNG.
123119
///
124120
/// The return values are `(consumed_u64, filled_u8)`.
121+
///
122+
/// On big-endian systems, endianness of `src[..consumed_u64]` values is
123+
/// swapped. No other adjustments to `src` are made.
124+
///
125125
/// `filled_u8` is the number of filled bytes in `dest`, which may be less than
126126
/// the length of `dest`.
127127
/// `consumed_u64` is the number of words consumed from `src`, which is the same
128128
/// as `filled_u8 / 8` rounded up.
129129
///
130130
/// See `fill_via_u32_chunks` for an example.
131-
pub fn fill_via_u64_chunks(src: &[u64], dest: &mut [u8]) -> (usize, usize) {
131+
pub fn fill_via_u64_chunks(src: &mut [u64], dest: &mut [u8]) -> (usize, usize) {
132132
fill_via_chunks!(src, dest, u64)
133133
}
134134

0 commit comments

Comments
 (0)