From cf8749eda5496c35829c028e86e2da7c6007d192 Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Mon, 14 Feb 2022 21:31:56 -0500 Subject: [PATCH] Fix provenance UB and alignment UB (#27) Fix provenance UB and alignment UB These issues were unlikely to cause any miscompilations today, but it's best to fix them right away. Thanks to miri for finding these issues! Alignment: In the default configuration (**not** gecko-ffi), types with alignment equal to the header size (16 on x64; 8 on x86) were incorrectly handled by an optimization. This could result in misaligned empty slices being produced when accessing a 0-capacity ThinVec (which is produced by ThinVec::new()). Such a slice of course can't be used to load or store any memory, but Rust requires references (including slices) to always be aligned even if they aren't ever used to load/store memory. The destructor for ThinVec creates a slice to its elements, so this dropping any ThinVec::new() with such a highly aligned type was technically UB. That said, it's quite unlikely it could have lead to miscompilations in practice, since the compiler would be unable to "see" the misalignment and Rust doesn't currently do any runtime optimizations based on alignment (such as Option-style enum layout optimizations). Types with higher and lower alignments were handled fine, it was just this *specific* alignment that was taking the wrong path. The specific issue is that 0-cap ThinVecs all point to statically allocated empty singleton. This singleton is designed to Do The Right Thing without additional branches for many operations, meaning we get non-allocating ThinVec::new() *without* needing to compromise the performance of most other operations. One such "just work" situation is the data pointer, which will generally just be one-past-the-end of the empty singleton, which is in-bounds for the singleton. But for highly-aligned elements, the data pointer needs to be "padded" and that would go beyond the static singleton's "allocation". So in general we actually check if cap==0 and return NonNull::dangling for the data pointer. The *optimization* was to identify the cases where no such padding was required and statically eliminate the cap == 0 path. Unfortunately "has no padding" isn't sufficient: in the specific case of align==header_size, there is no padding but the singleton is underaligned! In this case the NonNull::dangling path must be taken. The code has been fixed and the optimization now works correctly for all alignments. Provenance: Many places ran afoul of Stacked Borrows. The issues found were: A &Header cannot be used to get a useful pointer to data beyond it, because the pointer from the as-cast of the &Header only has provenance over the Header. After a set_len call that decreases the length, it is invalid to create a slice then try to get_unchecked into the region between the old and new length, because the reference in the slice that the ThinVec now Derefs to does not have provenance over that region. Alternatively, this is UB because the docs stipulate that you're not allowed to use `get_unchecked` to index out of bounds. I think the use of align_offset in tests is subtly wrong, align_offset seems to be for optimizations only. The docs say that a valid implementation may always return usize::MAX. --- src/lib.rs | 140 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 104 insertions(+), 36 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 19e38b8..f1d10c6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -247,9 +247,22 @@ mod impl_details { } } -/// The header of a ThinVec. -/// -/// The _cap can be a bitfield, so use accessors to avoid trouble. +// The header of a ThinVec. +// +// The _cap can be a bitfield, so use accessors to avoid trouble. +// +// In "real" gecko-ffi mode, the empty singleton will be aligned +// to 8 by gecko. But in tests we have to provide the singleton +// ourselves, and Rust makes it hard to "just" align a static. +// To avoid messing around with a wrapper type around the +// singleton *just* for tests, we just force all headers to be +// aligned to 8 in this weird "zombie" gecko mode. +// +// This shouldn't affect runtime layout (padding), but it will +// result in us asking the allocator to needlessly overalign +// non-empty ThinVecs containing align < 8 types in +// zombie-mode, but not in "real" geck-ffi mode. Minor. +#[cfg_attr(all(feature = "gecko-ffi", any(test, miri)), repr(align(8)))] #[repr(C)] struct Header { _len: SizeType, @@ -264,23 +277,6 @@ impl Header { fn set_len(&mut self, len: usize) { self._len = assert_size(len); } - - fn data(&self) -> *mut T { - let header_size = mem::size_of::
(); - let padding = padding::(); - - let ptr = self as *const Header as *mut Header as *mut u8; - - unsafe { - if padding > 0 && self.cap() == 0 { - // The empty header isn't well-aligned, just make an aligned one up - NonNull::dangling().as_ptr() - } else { - // This could technically result in overflow, but padding would have to be absurdly large for this to occur. - ptr.add(header_size + padding) as *mut T - } - } - } } #[cfg(feature = "gecko-ffi")] @@ -321,10 +317,10 @@ impl Header { /// optimize everything to not do that (basically, make ptr == len and branch /// on size == 0 in every method), but it's a bunch of work for something that /// doesn't matter much. -#[cfg(any(not(feature = "gecko-ffi"), test))] +#[cfg(any(not(feature = "gecko-ffi"), test, miri))] static EMPTY_HEADER: Header = Header { _len: 0, _cap: 0 }; -#[cfg(all(feature = "gecko-ffi", not(test)))] +#[cfg(all(feature = "gecko-ffi", not(test), not(miri)))] extern "C" { #[link_name = "sEmptyTArrayHeader"] static EMPTY_HEADER: Header; @@ -442,17 +438,26 @@ macro_rules! thin_vec { impl ThinVec { pub fn new() -> ThinVec { - unsafe { - ThinVec { - ptr: NonNull::new_unchecked(&EMPTY_HEADER as *const Header as *mut Header), - boo: PhantomData, - } - } + ThinVec::with_capacity(0) } pub fn with_capacity(cap: usize) -> ThinVec { + // `padding` contains ~static assertions against types that are + // incompatible with the current feature flags. We also call it to + // invoke these assertions when getting a pointer to the `ThinVec` + // contents, but since we also get a pointer to the contents in the + // `Drop` impl, trippng an assertion along that code path causes a + // double panic. We duplicate the assertion here so that it is + // testable, + let _ = padding::(); + if cap == 0 { - ThinVec::new() + unsafe { + ThinVec { + ptr: NonNull::new_unchecked(&EMPTY_HEADER as *const Header as *mut Header), + boo: PhantomData, + } + } } else { ThinVec { ptr: header_with_capacity::(cap), @@ -470,7 +475,47 @@ impl ThinVec { unsafe { self.ptr.as_ref() } } fn data_raw(&self) -> *mut T { - self.header().data() + // `padding` contains ~static assertions against types that are + // incompatible with the current feature flags. Even if we don't + // care about its result, we should always call it before getting + // a data pointer to guard against invalid types! + let padding = padding::(); + + // Although we ensure the data array is aligned when we allocate, + // we can't do that with the empty singleton. So when it might not + // be properly aligned, we substitute in the NonNull::dangling + // which *is* aligned. + // + // To minimize dynamic branches on `cap` for all accesses + // to the data, we include this guard which should only involve + // compile-time constants. Ideally this should result in the branch + // only be included for types with excessive alignment. + let empty_header_is_aligned = if cfg!(feature = "gecko-ffi") { + // in gecko-ffi mode `padding` will ensure this under + // the assumption that the header has size 8 and the + // static empty singleton is aligned to 8. + true + } else { + // In non-gecko-ffi mode, the empty singleton is just + // naturally aligned to the Header. If the Header is at + // least as aligned as T *and* the padding would have + // been 0, then one-past-the-end of the empty singleton + // *is* a valid data pointer and we can remove the + // `dangling` special case. + mem::align_of::
() >= mem::align_of::() && padding == 0 + }; + + unsafe { + if !empty_header_is_aligned && self.header().cap() == 0 { + NonNull::dangling().as_ptr() + } else { + // This could technically result in overflow, but padding + // would have to be absurdly large for this to occur. + let header_size = mem::size_of::
(); + let ptr = self.ptr.as_ptr() as *mut u8; + ptr.add(header_size + padding) as *mut T + } + } } // This is unsafe when the header is EMPTY_HEADER. @@ -565,7 +610,7 @@ impl ThinVec { // doesn't re-drop the just-failed value. let new_len = self.len() - 1; self.set_len(new_len); - ptr::drop_in_place(self.get_unchecked_mut(new_len)); + ptr::drop_in_place(self.data_raw().add(new_len)); } } } @@ -915,7 +960,7 @@ impl ThinVec { (*ptr).set_cap(new_cap); self.ptr = NonNull::new_unchecked(ptr); } else { - let mut new_header = header_with_capacity::(new_cap); + let new_header = header_with_capacity::(new_cap); // If we get here and have a non-zero len, then we must be handling // a gecko auto array, and we have items in a stack buffer. We shouldn't @@ -931,8 +976,9 @@ impl ThinVec { let len = self.len(); if cfg!(feature = "gecko-ffi") && len > 0 { new_header - .as_mut() - .data::() + .as_ptr() + .add(1) + .cast::() .copy_from_nonoverlapping(self.data_raw(), len); self.set_len(0); } @@ -1373,6 +1419,28 @@ mod tests { ThinVec::::new(); } + #[test] + fn test_data_ptr_alignment() { + let v = ThinVec::::new(); + assert!(v.data_raw() as usize % 2 == 0); + + let v = ThinVec::::new(); + assert!(v.data_raw() as usize % 4 == 0); + + let v = ThinVec::::new(); + assert!(v.data_raw() as usize % 8 == 0); + } + + #[test] + #[cfg_attr(feature = "gecko-ffi", should_panic)] + fn test_overaligned_type_is_rejected_for_gecko_ffi_mode() { + #[repr(align(16))] + struct Align16(u8); + + let v = ThinVec::::new(); + assert!(v.data_raw() as usize % 16 == 0); + } + #[test] fn test_partial_eq() { assert_eq!(thin_vec![0], thin_vec![0]); @@ -2654,9 +2722,9 @@ mod std_tests { macro_rules! assert_aligned_head_ptr { ($typename:ty) => {{ let v: ThinVec<$typename> = ThinVec::with_capacity(1 /* ensure allocation */); - let head_ptr: *mut $typename = v.header().data::<$typename>(); + let head_ptr: *mut $typename = v.data_raw(); assert_eq!( - head_ptr.align_offset(std::mem::align_of::<$typename>()), + head_ptr as usize % std::mem::align_of::<$typename>(), 0, "expected Header::data<{}> to be aligned", stringify!($typename)