Skip to content

copy_misaligned_words: avoid out-of-bounds accesses #799

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,21 @@ jobs:
rm -rf /tmp/.buildx-cache
mv /tmp/.buildx-cache-new /tmp/.buildx-cache

miri:
name: Miri
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Install Rust (rustup)
run: rustup update nightly --no-self-update && rustup default nightly
shell: bash
- run: rustup component add miri
- run: cargo miri setup
- uses: Swatinem/rust-cache@v2
- run: ./ci/miri.sh

rustfmt:
name: Rustfmt
runs-on: ubuntu-latest
Expand Down Expand Up @@ -190,6 +205,7 @@ jobs:
- test
- rustfmt
- clippy
- miri
runs-on: ubuntu-latest
# GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency
# failed" as success. So we have to do some contortions to ensure the job fails if any of its
Expand Down
16 changes: 16 additions & 0 deletions ci/miri.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash
set -ex

# We need Tree Borrows as some of our raw pointer patterns are not
# compatible with Stacked Borrows.
export MIRIFLAGS="-Zmiri-tree-borrows"

# One target that sets `mem-unaligned` and one that does not,
# and a big-endian target.
TARGETS=(x86_64-unknown-linux-gnu
armv7-unknown-linux-gnueabihf
s390x-unknown-linux-gnu)
for TARGET in "${TARGETS[@]}"; do
# Only run the `mem` tests to avoid this taking too long.
cargo miri test --manifest-path testcrate/Cargo.toml --features no-asm --target $TARGET -- mem
done
160 changes: 130 additions & 30 deletions compiler-builtins/src/mem/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,72 @@ unsafe fn read_usize_unaligned(x: *const usize) -> usize {
core::mem::transmute(x_read)
}

/// Loads a `T`-sized chunk from `src` into `dst` at offset `offset`, if that does not exceed
/// `load_sz`. The offset pointers must both be `T`-aligned. Returns the new offset, advanced by the
/// chunk size if a load happened.
#[cfg(not(feature = "mem-unaligned"))]
#[inline(always)]
unsafe fn load_chunk_aligned<T: Copy>(
src: *const usize,
dst: *mut usize,
load_sz: usize,
offset: usize,
) -> usize {
let chunk_sz = core::mem::size_of::<T>();
if (load_sz & chunk_sz) != 0 {
*dst.wrapping_byte_add(offset).cast::<T>() = *src.wrapping_byte_add(offset).cast::<T>();
offset | chunk_sz
} else {
offset
}
}

/// Load `load_sz` many bytes from `src`, which must be usize-aligned. Acts as if we did a `usize`
/// read with the out-of-bounds part filled with 0s.
/// `load_sz` be strictly less than `WORD_SIZE`.
#[cfg(not(feature = "mem-unaligned"))]
#[inline(always)]
unsafe fn load_aligned_partial(src: *const usize, load_sz: usize) -> usize {
debug_assert!(load_sz < WORD_SIZE);
// We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
// (since `load_sz < WORD_SIZE`).
const { assert!(WORD_SIZE <= 8) };

let mut i = 0;
let mut out = 0usize;
// We load in decreasing order, so the pointers remain sufficiently aligned for the next step.
i = load_chunk_aligned::<u32>(src, &raw mut out, load_sz, i);
i = load_chunk_aligned::<u16>(src, &raw mut out, load_sz, i);
i = load_chunk_aligned::<u8>(src, &raw mut out, load_sz, i);
debug_assert!(i == load_sz);
out
}

/// Load `load_sz` many bytes from `src.wrapping_byte_add(WORD_SIZE - load_sz)`. `src` must be
/// `usize`-aligned. The bytes are returned as the *last* bytes of the return value, i.e., this acts
/// as if we had done a `usize` read from `src`, with the out-of-bounds part filled with 0s.
/// `load_sz` be strictly less than `WORD_SIZE`.
#[cfg(not(feature = "mem-unaligned"))]
#[inline(always)]
unsafe fn load_aligned_end_partial(src: *const usize, load_sz: usize) -> usize {
debug_assert!(load_sz < WORD_SIZE);
// We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
// (since `load_sz < WORD_SIZE`).
const { assert!(WORD_SIZE <= 8) };

let mut i = 0;
let mut out = 0usize;
// Obtain pointers pointing to the beginning of the range we want to load.
let src_shifted = src.wrapping_byte_add(WORD_SIZE - load_sz);
let out_shifted = (&raw mut out).wrapping_byte_add(WORD_SIZE - load_sz);
// We load in increasing order, so by the time we reach `u16` things are 2-aligned etc.
i = load_chunk_aligned::<u8>(src_shifted, out_shifted, load_sz, i);
i = load_chunk_aligned::<u16>(src_shifted, out_shifted, load_sz, i);
i = load_chunk_aligned::<u32>(src_shifted, out_shifted, load_sz, i);
debug_assert!(i == load_sz);
out
}

#[inline(always)]
pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) {
#[inline(always)]
Expand All @@ -66,40 +132,57 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize)
}
}

/// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
/// `src` *must not* be `usize`-aligned.
#[cfg(not(feature = "mem-unaligned"))]
#[inline(always)]
unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
debug_assert!(n > 0 && n % WORD_SIZE == 0);
debug_assert!(src.addr() % WORD_SIZE != 0);

let mut dest_usize = dest as *mut usize;
let dest_end = dest.wrapping_add(n) as *mut usize;

// Calculate the misalignment offset and shift needed to reassemble value.
// Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE.
let offset = src as usize & WORD_MASK;
let shift = offset * 8;

// Realign src
let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize;
// This will read (but won't use) bytes out of bound.
// cfg needed because not all targets will have atomic loads that can be lowered
// (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I)
#[cfg(target_has_atomic_load_store = "ptr")]
let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned);
#[cfg(not(target_has_atomic_load_store = "ptr"))]
let mut prev_word = core::ptr::read_volatile(src_aligned);
let mut src_aligned = src.wrapping_byte_sub(offset) as *mut usize;
let mut prev_word = load_aligned_end_partial(src_aligned, WORD_SIZE - offset);

while dest_usize < dest_end {
while dest_usize.wrapping_add(1) < dest_end {
src_aligned = src_aligned.wrapping_add(1);
let cur_word = *src_aligned;
#[cfg(target_endian = "little")]
let resembled = prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift);
#[cfg(target_endian = "big")]
let resembled = prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift);
let reassembled = if cfg!(target_endian = "little") {
prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift)
} else {
prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift)
};
prev_word = cur_word;

*dest_usize = resembled;
*dest_usize = reassembled;
dest_usize = dest_usize.wrapping_add(1);
}

// There's one more element left to go, and we can't use the loop for that as on the `src` side,
// it is partially out-of-bounds.
src_aligned = src_aligned.wrapping_add(1);
let cur_word = load_aligned_partial(src_aligned, offset);
let reassembled = if cfg!(target_endian = "little") {
prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift)
} else {
prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift)
};
// prev_word does not matter any more

*dest_usize = reassembled;
// dest_usize does not matter any more
}

/// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
/// `src` *must not* be `usize`-aligned.
#[cfg(feature = "mem-unaligned")]
#[inline(always)]
unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
Expand Down Expand Up @@ -164,40 +247,57 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) {
}
}

/// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
/// `src` *must not* be `usize`-aligned.
#[cfg(not(feature = "mem-unaligned"))]
#[inline(always)]
unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
debug_assert!(n > 0 && n % WORD_SIZE == 0);
debug_assert!(src.addr() % WORD_SIZE != 0);

let mut dest_usize = dest as *mut usize;
let dest_start = dest.wrapping_sub(n) as *mut usize;
let dest_start = dest.wrapping_sub(n) as *mut usize; // we're moving towards the start

// Calculate the misalignment offset and shift needed to reassemble value.
// Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE.
let offset = src as usize & WORD_MASK;
let shift = offset * 8;

// Realign src_aligned
let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize;
// This will read (but won't use) bytes out of bound.
// cfg needed because not all targets will have atomic loads that can be lowered
// (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I)
#[cfg(target_has_atomic_load_store = "ptr")]
let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned);
#[cfg(not(target_has_atomic_load_store = "ptr"))]
let mut prev_word = core::ptr::read_volatile(src_aligned);
// Realign src
let mut src_aligned = src.wrapping_byte_sub(offset) as *mut usize;
let mut prev_word = load_aligned_partial(src_aligned, offset);

while dest_start < dest_usize {
while dest_start.wrapping_add(1) < dest_usize {
src_aligned = src_aligned.wrapping_sub(1);
let cur_word = *src_aligned;
#[cfg(target_endian = "little")]
let resembled = prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift;
#[cfg(target_endian = "big")]
let resembled = prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift;
let reassembled = if cfg!(target_endian = "little") {
prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift
} else {
prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift
};
prev_word = cur_word;

dest_usize = dest_usize.wrapping_sub(1);
*dest_usize = resembled;
*dest_usize = reassembled;
}

// There's one more element left to go, and we can't use the loop for that as on the `src` side,
// it is partially out-of-bounds.
src_aligned = src_aligned.wrapping_sub(1);
let cur_word = load_aligned_end_partial(src_aligned, WORD_SIZE - offset);
let reassembled = if cfg!(target_endian = "little") {
prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift
} else {
prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift
};
// prev_word does not matter any more

dest_usize = dest_usize.wrapping_sub(1);
*dest_usize = reassembled;
}

/// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
/// `src` *must not* be `usize`-aligned.
#[cfg(feature = "mem-unaligned")]
#[inline(always)]
unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
Expand Down
23 changes: 21 additions & 2 deletions testcrate/tests/mem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,13 @@ fn memcmp_eq() {
#[test]
fn memcmp_ne() {
let arr1 @ arr2 = gen_arr::<256>();
for i in 0..256 {
// Reduce iteration count in Miri as it is too slow otherwise.
let limit = if cfg!(miri) { 64 } else { 256 };
for i in 0..limit {
let mut diff_arr = arr1;
diff_arr.0[i] = 127;
let expect = diff_arr.0[i].cmp(&arr2.0[i]);
for k in i + 1..256 {
for k in i + 1..limit {
let result = unsafe { memcmp(diff_arr.0.as_ptr(), arr2.0.as_ptr(), k) };
assert_eq!(expect, result.cmp(&0));
}
Expand Down Expand Up @@ -230,6 +232,23 @@ fn memmove_backward_aligned() {
}
}

#[test]
fn memmove_misaligned_bounds() {
// The above test have the downside that the addresses surrounding the range-to-copy are all
// still in-bounds, so Miri would not actually complain about OOB accesses. So we also test with
// an array that has just the right size. We test a few times to avoid it being accidentally
// aligned.
for _ in 0..8 {
let mut arr1 = [0u8; 17];
let mut arr2 = [0u8; 17];
unsafe {
// Copy both ways so we hit both the forward and backward cases.
memmove(arr1.as_mut_ptr(), arr2.as_mut_ptr(), 17);
memmove(arr2.as_mut_ptr(), arr1.as_mut_ptr(), 17);
}
}
}

#[test]
fn memset_backward_misaligned_nonaligned_start() {
let mut arr = gen_arr::<32>();
Expand Down
Loading