From 31497db60513ba6f34b6acf68c30ac0b710d8d55 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Mon, 27 Sep 2021 14:22:44 -0700 Subject: [PATCH] Use/require byte equality in VarZeroVec (#1103) * Add equality and same-slice invariants to ULE traits * Add assert for same-slice invariant in VarZeroVec * use equality guarantee in vzv partialeq impl * Don't construct the SliceComponents for get_encoded_slice() * review fixes * Add default impls of as_byte_slice() * Add default impl for ULE::from_byte_slice_unchecked() --- utils/zerovec/src/ule/chars.rs | 15 ----- utils/zerovec/src/ule/mod.rs | 75 +++++++++++++++++++--- utils/zerovec/src/ule/plain.rs | 14 ---- utils/zerovec/src/varzerovec/components.rs | 10 ++- utils/zerovec/src/varzerovec/mod.rs | 10 ++- utils/zerovec/src/varzerovec/owned.rs | 5 ++ utils/zerovec/src/varzerovec/ule.rs | 5 +- 7 files changed, 86 insertions(+), 48 deletions(-) diff --git a/utils/zerovec/src/ule/chars.rs b/utils/zerovec/src/ule/chars.rs index c938f79a9e4..38b38d3a673 100644 --- a/utils/zerovec/src/ule/chars.rs +++ b/utils/zerovec/src/ule/chars.rs @@ -55,21 +55,6 @@ unsafe impl ULE for CharULE { // Safe because Self is transparent over [u8; 4] and has been validated Ok(unsafe { Self::from_byte_slice_unchecked(bytes) }) } - - #[inline] - unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &[Self] { - let data = bytes.as_ptr(); - let len = bytes.len() / 4; - core::slice::from_raw_parts(data as *const Self, len) - } - - #[inline] - fn as_byte_slice(slice: &[Self]) -> &[u8] { - let data = slice.as_ptr(); - let len = slice.len() * 4; - // Safe because Self is transparent over [u8; 4] - unsafe { core::slice::from_raw_parts(data as *const u8, len) } - } } impl AsULE for char { diff --git a/utils/zerovec/src/ule/mod.rs b/utils/zerovec/src/ule/mod.rs index 4d5ca14a0a7..2dbc96ab4cb 100644 --- a/utils/zerovec/src/ule/mod.rs +++ b/utils/zerovec/src/ule/mod.rs @@ -13,13 +13,31 @@ mod vec; pub use chars::CharULE; pub use plain::PlainOldULE; +use core::{mem, slice}; + /// Fixed-width, byte-aligned data that can be cast to and from a little-endian byte slice. /// /// "ULE" stands for "Unaligned little-endian" /// /// # Safety /// -/// See the safety invariant documented on [`Self::from_byte_slice_unchecked()`] to implement this trait. +/// There must be no padding bytes involved in this type: [`Self::as_byte_slice()`] *must* return +/// a slice of initialized bytes provided that `Self` is initialized. +/// +/// [`ULE::from_bytes_unchecked()`] *must* be implemented to return the same result as [`ULE::parse_byte_slice()`], +/// and both should return slices to the same region of memory. +/// +/// [`ULE::as_byte_slice()`] should return a slice that is the in-memory representation of `Self`, +/// i.e. it should be just a pointer cast, and `mem::size_of_val(self) == mem::size_of_val(self.as_byte_slice())`= +/// +/// # Equality invariant +/// +/// A non-safety invariant is that if `Self` implements `PartialEq`, it *must* be logically equivalent to +/// byte equality on `.as_byte_slice()`. If `PartialEq` does not imply byte-for-byte equality, then +/// `.parse_byte_slice()` should return an error code for any values that are not in canonical form. +/// +/// Failure to follow this invariant will cause surprising behavior in `PartialEq`, which may result in +/// unpredictable operations on `ZeroVec`, `VarZeroVec`, and `ZeroMap`. pub unsafe trait ULE where Self: Sized, @@ -39,6 +57,8 @@ where /// correct type. It is up to the implementation to reason about the safety. Keep in mind that /// `&[Self]` and `&[u8]` may have different lengths. /// + /// Implementors must return a slice to the same region of memory if parsing succeeds. + /// /// Ideally, implementations call [`ULE::from_byte_slice_unchecked()`] after validation. fn parse_byte_slice(bytes: &[u8]) -> Result<&[Self], Self::Error>; @@ -57,20 +77,35 @@ where /// ## Implementors /// This method _must_ be implemented to return the same result as [`ULE::parse_byte_slice()`]. /// + /// Implementors must return a slice to the same region of memory. The default implementation + /// does this directly. + /// /// Implementations of this method may involve `unsafe{}` blocks to cast the pointer to the /// correct type. It is up to the implementation to reason about the safety, assuming the invariant /// above. - unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &[Self]; + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &[Self] { + let data = bytes.as_ptr(); + let len = bytes.len() / mem::size_of::(); + core::slice::from_raw_parts(data as *const Self, len) + } /// Given `&[Self]`, returns a `&[u8]` with the same lifetime. /// /// # Safety /// - /// In most cases, the implementation of this function should involve re-casting the pointer. + /// The implementation of this function should involve re-casting the pointer. /// It is up to the implementation to reason about the safety. Keep in mind that `&[Self]` and - /// `&[u8]` may have different lengths. + /// `&[u8]` may have different lengths (but should cover the same data). + /// + /// The default implementation already does this, however it can be overridden with + /// a fully-safe method if possible. + #[inline] #[allow(clippy::wrong_self_convention)] // https://github.com/rust-lang/rust-clippy/issues/7219 - fn as_byte_slice(slice: &[Self]) -> &[u8]; + fn as_byte_slice(slice: &[Self]) -> &[u8] { + unsafe { + slice::from_raw_parts(slice as *const [Self] as *const u8, mem::size_of_val(slice)) + } + } } /// A trait for any type that has a 1:1 mapping with an unaligned little-endian (ULE) type. @@ -192,8 +227,22 @@ pub trait AsVarULE { /// There must be no padding bytes involved in this type: [`Self::as_byte_slice()`] MUST return /// a slice of initialized bytes provided that `Self` is initialized. /// -/// [`VarULE::from_byte_slice_unchecked()`] _must_ be implemented to return the same result +/// [`VarULE::from_byte_slice_unchecked()`] *must* be implemented to return the same result /// as [`VarULE::parse_byte_slice()`] provided both are passed the same validly parsing byte slices. +/// Both should return a pointer to the same region of memory that was passed in, covering that region +/// completely. +/// +/// [`VarULE::as_byte_slice()`] should return a slice that is the in-memory representation of `Self`, +/// i.e. it should be just a pointer cast, and `mem::size_of_val(self) == mem::size_of_val(self.as_byte_slice())` +/// +/// # Equality invariant +/// +/// A non-safety invariant is that if `Self` implements `PartialEq`, it *must* be logically equivalent to +/// byte equality on `.as_byte_slice()`. If `PartialEq` does not imply byte-for-byte equality, then +/// `.parse_byte_slice()` should return an error code for any values that are not in canonical form. +/// +/// Failure to follow this invariant will cause surprising behavior in `PartialEq`, which may result in +/// unpredictable operations on `ZeroVec`, `VarZeroVec`, and `ZeroMap`. pub unsafe trait VarULE: 'static { /// The error type to used by [`VarULE::parse_byte_slice()`] type Error; @@ -207,6 +256,8 @@ pub unsafe trait VarULE: 'static { /// /// Implementations of this method may involve `unsafe{}` blocks to cast the pointer to the /// correct type. It is up to the implementation to reason about the safety. + /// + /// Implementors must return a pointer to the same region of memory if parsing succeeds. fn parse_byte_slice(bytes: &[u8]) -> Result<&Self, Self::Error>; /// Takes a byte slice, `&[u8]`, and return it as `&self` with the same lifetime, assuming that @@ -224,6 +275,8 @@ pub unsafe trait VarULE: 'static { /// ## Implementors /// This method _must_ be implemented to return the same result as [`VarULE::parse_byte_slice()`]. /// + /// Implementors must return a pointer to the same region of memory. + /// /// Implementations of this method may involve `unsafe{}` blocks to cast the pointer to the /// correct type. It is up to the implementation to reason about the safety, assuming the invariant /// above. @@ -233,7 +286,13 @@ pub unsafe trait VarULE: 'static { /// /// # Safety /// - /// In most cases, the implementation of this function should involve re-casting the pointer. + /// The implementation of this function should involve re-casting the pointer. /// It is up to the implementation to reason about the safety. - fn as_byte_slice(&self) -> &[u8]; + /// + /// The default implementation already does this, however it can be overridden with + /// a fully-safe method if possible. + #[inline] + fn as_byte_slice(&self) -> &[u8] { + unsafe { slice::from_raw_parts(self as *const Self as *const u8, mem::size_of_val(self)) } + } } diff --git a/utils/zerovec/src/ule/plain.rs b/utils/zerovec/src/ule/plain.rs index 10f15f2d2fc..21b9eca4806 100644 --- a/utils/zerovec/src/ule/plain.rs +++ b/utils/zerovec/src/ule/plain.rs @@ -35,20 +35,6 @@ macro_rules! impl_byte_slice_size { // Safe because Self is transparent over [u8; $size] Ok(unsafe { Self::from_byte_slice_unchecked(bytes) }) } - #[inline] - unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &[Self] { - let data = bytes.as_ptr(); - let len = bytes.len() / $size; - // Safe because Self is transparent over [u8; $size] - core::slice::from_raw_parts(data as *const Self, len) - } - #[inline] - fn as_byte_slice(slice: &[Self]) -> &[u8] { - let data = slice.as_ptr(); - let len = slice.len() * $size; - // Safe because Self is transparent over [u8; $size] - unsafe { core::slice::from_raw_parts(data as *const u8, len) } - } } impl PlainOldULE<$size> { diff --git a/utils/zerovec/src/varzerovec/components.rs b/utils/zerovec/src/varzerovec/components.rs index 3af275bff09..aaf06e920cf 100644 --- a/utils/zerovec/src/varzerovec/components.rs +++ b/utils/zerovec/src/varzerovec/components.rs @@ -294,12 +294,10 @@ pub fn get_serializable_bytes(elements: &[T]) -> Option> { let mut offset: u32 = 0; for element in elements { vec.extend(&offset.as_unaligned().0); - let len_u32: u32 = element - .as_unaligned() - .as_byte_slice() - .len() - .try_into() - .ok()?; + let ule = element.as_unaligned(); + let slice = ule.as_byte_slice(); + debug_assert_eq!(mem::size_of_val(ule), mem::size_of_val(slice)); + let len_u32: u32 = slice.len().try_into().ok()?; offset = offset.checked_add(len_u32)?; } vec.reserve(offset as usize); diff --git a/utils/zerovec/src/varzerovec/mod.rs b/utils/zerovec/src/varzerovec/mod.rs index 69c574d94ca..8c74e38e593 100644 --- a/utils/zerovec/src/varzerovec/mod.rs +++ b/utils/zerovec/src/varzerovec/mod.rs @@ -406,7 +406,10 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// /// This can be passed back to [`Self::parse_byte_slice()`] pub fn get_encoded_slice(&self) -> &[u8] { - self.get_components().entire_slice() + match self.0 { + VarZeroVecInner::Owned(ref vec) => vec.entire_slice(), + VarZeroVecInner::Borrowed(vec) => vec.entire_slice(), + } } /// For a slice of `T`, get a list of bytes that can be passed to @@ -501,8 +504,9 @@ where { #[inline] fn eq(&self, other: &VarZeroVec<'b, T>) -> bool { - // Note: T implements PartialEq but not T::ULE - self.iter().eq(other.iter()) + // VarULE has an API guarantee that this is equivalent + // to `T::VarULE::eq()` + self.get_encoded_slice().eq(other.get_encoded_slice()) } } diff --git a/utils/zerovec/src/varzerovec/owned.rs b/utils/zerovec/src/varzerovec/owned.rs index f271ed6b564..2708fdf33ed 100644 --- a/utils/zerovec/src/varzerovec/owned.rs +++ b/utils/zerovec/src/varzerovec/owned.rs @@ -110,6 +110,11 @@ impl VarZeroVecOwned { self.get_components().to_vec() } + #[inline] + pub fn entire_slice(&self) -> &[u8] { + &self.entire_slice + } + /// Insert an element at index `idx` pub fn insert(&mut self, index: usize, element: &T) { let len = self.len(); diff --git a/utils/zerovec/src/varzerovec/ule.rs b/utils/zerovec/src/varzerovec/ule.rs index 1fb9cb1d462..a8669207923 100644 --- a/utils/zerovec/src/varzerovec/ule.rs +++ b/utils/zerovec/src/varzerovec/ule.rs @@ -147,8 +147,9 @@ where { #[inline] fn eq(&self, other: &VarZeroVecULE) -> bool { - // Note: T implements PartialEq but not T::ULE - self.iter().eq(other.iter()) + // VarULE has an API guarantee that this is equivalent + // to `T::VarULE::eq()` + self.entire_slice.eq(&other.entire_slice) } }