Skip to content

Commit 4df7a8d

Browse files
authored
copy_misaligned_words: avoid out-of-bounds accesses (#799)
* copy_misaligned_words: avoid out-of-bounds accesses * add test to make Miri able to detect OOB in memmove * run Miri on CI
1 parent 974d721 commit 4df7a8d

File tree

4 files changed

+183
-32
lines changed

4 files changed

+183
-32
lines changed

.github/workflows/main.yml

+16
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,21 @@ jobs:
160160
rm -rf /tmp/.buildx-cache
161161
mv /tmp/.buildx-cache-new /tmp/.buildx-cache
162162
163+
miri:
164+
name: Miri
165+
runs-on: ubuntu-latest
166+
steps:
167+
- uses: actions/checkout@v4
168+
with:
169+
submodules: true
170+
- name: Install Rust (rustup)
171+
run: rustup update nightly --no-self-update && rustup default nightly
172+
shell: bash
173+
- run: rustup component add miri
174+
- run: cargo miri setup
175+
- uses: Swatinem/rust-cache@v2
176+
- run: ./ci/miri.sh
177+
163178
rustfmt:
164179
name: Rustfmt
165180
runs-on: ubuntu-latest
@@ -190,6 +205,7 @@ jobs:
190205
- test
191206
- rustfmt
192207
- clippy
208+
- miri
193209
runs-on: ubuntu-latest
194210
# GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency
195211
# failed" as success. So we have to do some contortions to ensure the job fails if any of its

ci/miri.sh

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/bin/bash
2+
set -ex
3+
4+
# We need Tree Borrows as some of our raw pointer patterns are not
5+
# compatible with Stacked Borrows.
6+
export MIRIFLAGS="-Zmiri-tree-borrows"
7+
8+
# One target that sets `mem-unaligned` and one that does not,
9+
# and a big-endian target.
10+
TARGETS=(x86_64-unknown-linux-gnu
11+
armv7-unknown-linux-gnueabihf
12+
s390x-unknown-linux-gnu)
13+
for TARGET in "${TARGETS[@]}"; do
14+
# Only run the `mem` tests to avoid this taking too long.
15+
cargo miri test --manifest-path testcrate/Cargo.toml --features no-asm --target $TARGET -- mem
16+
done

compiler-builtins/src/mem/impls.rs

+130-30
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,72 @@ unsafe fn read_usize_unaligned(x: *const usize) -> usize {
4141
core::mem::transmute(x_read)
4242
}
4343

44+
/// Loads a `T`-sized chunk from `src` into `dst` at offset `offset`, if that does not exceed
45+
/// `load_sz`. The offset pointers must both be `T`-aligned. Returns the new offset, advanced by the
46+
/// chunk size if a load happened.
47+
#[cfg(not(feature = "mem-unaligned"))]
48+
#[inline(always)]
49+
unsafe fn load_chunk_aligned<T: Copy>(
50+
src: *const usize,
51+
dst: *mut usize,
52+
load_sz: usize,
53+
offset: usize,
54+
) -> usize {
55+
let chunk_sz = core::mem::size_of::<T>();
56+
if (load_sz & chunk_sz) != 0 {
57+
*dst.wrapping_byte_add(offset).cast::<T>() = *src.wrapping_byte_add(offset).cast::<T>();
58+
offset | chunk_sz
59+
} else {
60+
offset
61+
}
62+
}
63+
64+
/// Load `load_sz` many bytes from `src`, which must be usize-aligned. Acts as if we did a `usize`
65+
/// read with the out-of-bounds part filled with 0s.
66+
/// `load_sz` be strictly less than `WORD_SIZE`.
67+
#[cfg(not(feature = "mem-unaligned"))]
68+
#[inline(always)]
69+
unsafe fn load_aligned_partial(src: *const usize, load_sz: usize) -> usize {
70+
debug_assert!(load_sz < WORD_SIZE);
71+
// We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
72+
// (since `load_sz < WORD_SIZE`).
73+
const { assert!(WORD_SIZE <= 8) };
74+
75+
let mut i = 0;
76+
let mut out = 0usize;
77+
// We load in decreasing order, so the pointers remain sufficiently aligned for the next step.
78+
i = load_chunk_aligned::<u32>(src, &raw mut out, load_sz, i);
79+
i = load_chunk_aligned::<u16>(src, &raw mut out, load_sz, i);
80+
i = load_chunk_aligned::<u8>(src, &raw mut out, load_sz, i);
81+
debug_assert!(i == load_sz);
82+
out
83+
}
84+
85+
/// Load `load_sz` many bytes from `src.wrapping_byte_add(WORD_SIZE - load_sz)`. `src` must be
86+
/// `usize`-aligned. The bytes are returned as the *last* bytes of the return value, i.e., this acts
87+
/// as if we had done a `usize` read from `src`, with the out-of-bounds part filled with 0s.
88+
/// `load_sz` be strictly less than `WORD_SIZE`.
89+
#[cfg(not(feature = "mem-unaligned"))]
90+
#[inline(always)]
91+
unsafe fn load_aligned_end_partial(src: *const usize, load_sz: usize) -> usize {
92+
debug_assert!(load_sz < WORD_SIZE);
93+
// We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
94+
// (since `load_sz < WORD_SIZE`).
95+
const { assert!(WORD_SIZE <= 8) };
96+
97+
let mut i = 0;
98+
let mut out = 0usize;
99+
// Obtain pointers pointing to the beginning of the range we want to load.
100+
let src_shifted = src.wrapping_byte_add(WORD_SIZE - load_sz);
101+
let out_shifted = (&raw mut out).wrapping_byte_add(WORD_SIZE - load_sz);
102+
// We load in increasing order, so by the time we reach `u16` things are 2-aligned etc.
103+
i = load_chunk_aligned::<u8>(src_shifted, out_shifted, load_sz, i);
104+
i = load_chunk_aligned::<u16>(src_shifted, out_shifted, load_sz, i);
105+
i = load_chunk_aligned::<u32>(src_shifted, out_shifted, load_sz, i);
106+
debug_assert!(i == load_sz);
107+
out
108+
}
109+
44110
#[inline(always)]
45111
pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) {
46112
#[inline(always)]
@@ -66,40 +132,57 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize)
66132
}
67133
}
68134

135+
/// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
136+
/// `src` *must not* be `usize`-aligned.
69137
#[cfg(not(feature = "mem-unaligned"))]
70138
#[inline(always)]
71139
unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
140+
debug_assert!(n > 0 && n % WORD_SIZE == 0);
141+
debug_assert!(src.addr() % WORD_SIZE != 0);
142+
72143
let mut dest_usize = dest as *mut usize;
73144
let dest_end = dest.wrapping_add(n) as *mut usize;
74145

75146
// Calculate the misalignment offset and shift needed to reassemble value.
147+
// Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE.
76148
let offset = src as usize & WORD_MASK;
77149
let shift = offset * 8;
78150

79151
// Realign src
80-
let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize;
81-
// This will read (but won't use) bytes out of bound.
82-
// cfg needed because not all targets will have atomic loads that can be lowered
83-
// (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I)
84-
#[cfg(target_has_atomic_load_store = "ptr")]
85-
let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned);
86-
#[cfg(not(target_has_atomic_load_store = "ptr"))]
87-
let mut prev_word = core::ptr::read_volatile(src_aligned);
152+
let mut src_aligned = src.wrapping_byte_sub(offset) as *mut usize;
153+
let mut prev_word = load_aligned_end_partial(src_aligned, WORD_SIZE - offset);
88154

89-
while dest_usize < dest_end {
155+
while dest_usize.wrapping_add(1) < dest_end {
90156
src_aligned = src_aligned.wrapping_add(1);
91157
let cur_word = *src_aligned;
92-
#[cfg(target_endian = "little")]
93-
let resembled = prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift);
94-
#[cfg(target_endian = "big")]
95-
let resembled = prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift);
158+
let reassembled = if cfg!(target_endian = "little") {
159+
prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift)
160+
} else {
161+
prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift)
162+
};
96163
prev_word = cur_word;
97164

98-
*dest_usize = resembled;
165+
*dest_usize = reassembled;
99166
dest_usize = dest_usize.wrapping_add(1);
100167
}
168+
169+
// There's one more element left to go, and we can't use the loop for that as on the `src` side,
170+
// it is partially out-of-bounds.
171+
src_aligned = src_aligned.wrapping_add(1);
172+
let cur_word = load_aligned_partial(src_aligned, offset);
173+
let reassembled = if cfg!(target_endian = "little") {
174+
prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift)
175+
} else {
176+
prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift)
177+
};
178+
// prev_word does not matter any more
179+
180+
*dest_usize = reassembled;
181+
// dest_usize does not matter any more
101182
}
102183

184+
/// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
185+
/// `src` *must not* be `usize`-aligned.
103186
#[cfg(feature = "mem-unaligned")]
104187
#[inline(always)]
105188
unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
@@ -164,40 +247,57 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) {
164247
}
165248
}
166249

250+
/// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
251+
/// `src` *must not* be `usize`-aligned.
167252
#[cfg(not(feature = "mem-unaligned"))]
168253
#[inline(always)]
169254
unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
255+
debug_assert!(n > 0 && n % WORD_SIZE == 0);
256+
debug_assert!(src.addr() % WORD_SIZE != 0);
257+
170258
let mut dest_usize = dest as *mut usize;
171-
let dest_start = dest.wrapping_sub(n) as *mut usize;
259+
let dest_start = dest.wrapping_sub(n) as *mut usize; // we're moving towards the start
172260

173261
// Calculate the misalignment offset and shift needed to reassemble value.
262+
// Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE.
174263
let offset = src as usize & WORD_MASK;
175264
let shift = offset * 8;
176265

177-
// Realign src_aligned
178-
let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize;
179-
// This will read (but won't use) bytes out of bound.
180-
// cfg needed because not all targets will have atomic loads that can be lowered
181-
// (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I)
182-
#[cfg(target_has_atomic_load_store = "ptr")]
183-
let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned);
184-
#[cfg(not(target_has_atomic_load_store = "ptr"))]
185-
let mut prev_word = core::ptr::read_volatile(src_aligned);
266+
// Realign src
267+
let mut src_aligned = src.wrapping_byte_sub(offset) as *mut usize;
268+
let mut prev_word = load_aligned_partial(src_aligned, offset);
186269

187-
while dest_start < dest_usize {
270+
while dest_start.wrapping_add(1) < dest_usize {
188271
src_aligned = src_aligned.wrapping_sub(1);
189272
let cur_word = *src_aligned;
190-
#[cfg(target_endian = "little")]
191-
let resembled = prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift;
192-
#[cfg(target_endian = "big")]
193-
let resembled = prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift;
273+
let reassembled = if cfg!(target_endian = "little") {
274+
prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift
275+
} else {
276+
prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift
277+
};
194278
prev_word = cur_word;
195279

196280
dest_usize = dest_usize.wrapping_sub(1);
197-
*dest_usize = resembled;
281+
*dest_usize = reassembled;
198282
}
283+
284+
// There's one more element left to go, and we can't use the loop for that as on the `src` side,
285+
// it is partially out-of-bounds.
286+
src_aligned = src_aligned.wrapping_sub(1);
287+
let cur_word = load_aligned_end_partial(src_aligned, WORD_SIZE - offset);
288+
let reassembled = if cfg!(target_endian = "little") {
289+
prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift
290+
} else {
291+
prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift
292+
};
293+
// prev_word does not matter any more
294+
295+
dest_usize = dest_usize.wrapping_sub(1);
296+
*dest_usize = reassembled;
199297
}
200298

299+
/// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
300+
/// `src` *must not* be `usize`-aligned.
201301
#[cfg(feature = "mem-unaligned")]
202302
#[inline(always)]
203303
unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {

testcrate/tests/mem.rs

+21-2
Original file line numberDiff line numberDiff line change
@@ -128,11 +128,13 @@ fn memcmp_eq() {
128128
#[test]
129129
fn memcmp_ne() {
130130
let arr1 @ arr2 = gen_arr::<256>();
131-
for i in 0..256 {
131+
// Reduce iteration count in Miri as it is too slow otherwise.
132+
let limit = if cfg!(miri) { 64 } else { 256 };
133+
for i in 0..limit {
132134
let mut diff_arr = arr1;
133135
diff_arr.0[i] = 127;
134136
let expect = diff_arr.0[i].cmp(&arr2.0[i]);
135-
for k in i + 1..256 {
137+
for k in i + 1..limit {
136138
let result = unsafe { memcmp(diff_arr.0.as_ptr(), arr2.0.as_ptr(), k) };
137139
assert_eq!(expect, result.cmp(&0));
138140
}
@@ -230,6 +232,23 @@ fn memmove_backward_aligned() {
230232
}
231233
}
232234

235+
#[test]
236+
fn memmove_misaligned_bounds() {
237+
// The above test have the downside that the addresses surrounding the range-to-copy are all
238+
// still in-bounds, so Miri would not actually complain about OOB accesses. So we also test with
239+
// an array that has just the right size. We test a few times to avoid it being accidentally
240+
// aligned.
241+
for _ in 0..8 {
242+
let mut arr1 = [0u8; 17];
243+
let mut arr2 = [0u8; 17];
244+
unsafe {
245+
// Copy both ways so we hit both the forward and backward cases.
246+
memmove(arr1.as_mut_ptr(), arr2.as_mut_ptr(), 17);
247+
memmove(arr2.as_mut_ptr(), arr1.as_mut_ptr(), 17);
248+
}
249+
}
250+
}
251+
233252
#[test]
234253
fn memset_backward_misaligned_nonaligned_start() {
235254
let mut arr = gen_arr::<32>();

0 commit comments

Comments
 (0)