Skip to content

Commit

Permalink
Use/require byte equality in VarZeroVec (#1103)
Browse files Browse the repository at this point in the history
* Add equality and same-slice invariants to ULE traits

* Add assert for same-slice invariant in VarZeroVec

* use equality guarantee in vzv partialeq impl

* Don't construct the SliceComponents for get_encoded_slice()

* review fixes

* Add default impls of as_byte_slice()

* Add default impl for ULE::from_byte_slice_unchecked()
  • Loading branch information
Manishearth authored Sep 27, 2021
1 parent 89d9c50 commit 31497db
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 48 deletions.
15 changes: 0 additions & 15 deletions utils/zerovec/src/ule/chars.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,21 +55,6 @@ unsafe impl ULE for CharULE {
// Safe because Self is transparent over [u8; 4] and has been validated
Ok(unsafe { Self::from_byte_slice_unchecked(bytes) })
}

#[inline]
unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &[Self] {
let data = bytes.as_ptr();
let len = bytes.len() / 4;
core::slice::from_raw_parts(data as *const Self, len)
}

#[inline]
fn as_byte_slice(slice: &[Self]) -> &[u8] {
let data = slice.as_ptr();
let len = slice.len() * 4;
// Safe because Self is transparent over [u8; 4]
unsafe { core::slice::from_raw_parts(data as *const u8, len) }
}
}

impl AsULE for char {
Expand Down
75 changes: 67 additions & 8 deletions utils/zerovec/src/ule/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,31 @@ mod vec;
pub use chars::CharULE;
pub use plain::PlainOldULE;

use core::{mem, slice};

/// Fixed-width, byte-aligned data that can be cast to and from a little-endian byte slice.
///
/// "ULE" stands for "Unaligned little-endian"
///
/// # Safety
///
/// See the safety invariant documented on [`Self::from_byte_slice_unchecked()`] to implement this trait.
/// There must be no padding bytes involved in this type: [`Self::as_byte_slice()`] *must* return
/// a slice of initialized bytes provided that `Self` is initialized.
///
/// [`ULE::from_bytes_unchecked()`] *must* be implemented to return the same result as [`ULE::parse_byte_slice()`],
/// and both should return slices to the same region of memory.
///
/// [`ULE::as_byte_slice()`] should return a slice that is the in-memory representation of `Self`,
/// i.e. it should be just a pointer cast, and `mem::size_of_val(self) == mem::size_of_val(self.as_byte_slice())`=
///
/// # Equality invariant
///
/// A non-safety invariant is that if `Self` implements `PartialEq`, it *must* be logically equivalent to
/// byte equality on `.as_byte_slice()`. If `PartialEq` does not imply byte-for-byte equality, then
/// `.parse_byte_slice()` should return an error code for any values that are not in canonical form.
///
/// Failure to follow this invariant will cause surprising behavior in `PartialEq`, which may result in
/// unpredictable operations on `ZeroVec`, `VarZeroVec`, and `ZeroMap`.
pub unsafe trait ULE
where
Self: Sized,
Expand All @@ -39,6 +57,8 @@ where
/// correct type. It is up to the implementation to reason about the safety. Keep in mind that
/// `&[Self]` and `&[u8]` may have different lengths.
///
/// Implementors must return a slice to the same region of memory if parsing succeeds.
///
/// Ideally, implementations call [`ULE::from_byte_slice_unchecked()`] after validation.
fn parse_byte_slice(bytes: &[u8]) -> Result<&[Self], Self::Error>;

Expand All @@ -57,20 +77,35 @@ where
/// ## Implementors
/// This method _must_ be implemented to return the same result as [`ULE::parse_byte_slice()`].
///
/// Implementors must return a slice to the same region of memory. The default implementation
/// does this directly.
///
/// Implementations of this method may involve `unsafe{}` blocks to cast the pointer to the
/// correct type. It is up to the implementation to reason about the safety, assuming the invariant
/// above.
unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &[Self];
unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &[Self] {
let data = bytes.as_ptr();
let len = bytes.len() / mem::size_of::<Self>();
core::slice::from_raw_parts(data as *const Self, len)
}

/// Given `&[Self]`, returns a `&[u8]` with the same lifetime.
///
/// # Safety
///
/// In most cases, the implementation of this function should involve re-casting the pointer.
/// The implementation of this function should involve re-casting the pointer.
/// It is up to the implementation to reason about the safety. Keep in mind that `&[Self]` and
/// `&[u8]` may have different lengths.
/// `&[u8]` may have different lengths (but should cover the same data).
///
/// The default implementation already does this, however it can be overridden with
/// a fully-safe method if possible.
#[inline]
#[allow(clippy::wrong_self_convention)] // https://github.com/rust-lang/rust-clippy/issues/7219
fn as_byte_slice(slice: &[Self]) -> &[u8];
fn as_byte_slice(slice: &[Self]) -> &[u8] {
unsafe {
slice::from_raw_parts(slice as *const [Self] as *const u8, mem::size_of_val(slice))
}
}
}

/// A trait for any type that has a 1:1 mapping with an unaligned little-endian (ULE) type.
Expand Down Expand Up @@ -192,8 +227,22 @@ pub trait AsVarULE {
/// There must be no padding bytes involved in this type: [`Self::as_byte_slice()`] MUST return
/// a slice of initialized bytes provided that `Self` is initialized.
///
/// [`VarULE::from_byte_slice_unchecked()`] _must_ be implemented to return the same result
/// [`VarULE::from_byte_slice_unchecked()`] *must* be implemented to return the same result
/// as [`VarULE::parse_byte_slice()`] provided both are passed the same validly parsing byte slices.
/// Both should return a pointer to the same region of memory that was passed in, covering that region
/// completely.
///
/// [`VarULE::as_byte_slice()`] should return a slice that is the in-memory representation of `Self`,
/// i.e. it should be just a pointer cast, and `mem::size_of_val(self) == mem::size_of_val(self.as_byte_slice())`
///
/// # Equality invariant
///
/// A non-safety invariant is that if `Self` implements `PartialEq`, it *must* be logically equivalent to
/// byte equality on `.as_byte_slice()`. If `PartialEq` does not imply byte-for-byte equality, then
/// `.parse_byte_slice()` should return an error code for any values that are not in canonical form.
///
/// Failure to follow this invariant will cause surprising behavior in `PartialEq`, which may result in
/// unpredictable operations on `ZeroVec`, `VarZeroVec`, and `ZeroMap`.
pub unsafe trait VarULE: 'static {
/// The error type to used by [`VarULE::parse_byte_slice()`]
type Error;
Expand All @@ -207,6 +256,8 @@ pub unsafe trait VarULE: 'static {
///
/// Implementations of this method may involve `unsafe{}` blocks to cast the pointer to the
/// correct type. It is up to the implementation to reason about the safety.
///
/// Implementors must return a pointer to the same region of memory if parsing succeeds.
fn parse_byte_slice(bytes: &[u8]) -> Result<&Self, Self::Error>;

/// Takes a byte slice, `&[u8]`, and return it as `&self` with the same lifetime, assuming that
Expand All @@ -224,6 +275,8 @@ pub unsafe trait VarULE: 'static {
/// ## Implementors
/// This method _must_ be implemented to return the same result as [`VarULE::parse_byte_slice()`].
///
/// Implementors must return a pointer to the same region of memory.
///
/// Implementations of this method may involve `unsafe{}` blocks to cast the pointer to the
/// correct type. It is up to the implementation to reason about the safety, assuming the invariant
/// above.
Expand All @@ -233,7 +286,13 @@ pub unsafe trait VarULE: 'static {
///
/// # Safety
///
/// In most cases, the implementation of this function should involve re-casting the pointer.
/// The implementation of this function should involve re-casting the pointer.
/// It is up to the implementation to reason about the safety.
fn as_byte_slice(&self) -> &[u8];
///
/// The default implementation already does this, however it can be overridden with
/// a fully-safe method if possible.
#[inline]
fn as_byte_slice(&self) -> &[u8] {
unsafe { slice::from_raw_parts(self as *const Self as *const u8, mem::size_of_val(self)) }
}
}
14 changes: 0 additions & 14 deletions utils/zerovec/src/ule/plain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,6 @@ macro_rules! impl_byte_slice_size {
// Safe because Self is transparent over [u8; $size]
Ok(unsafe { Self::from_byte_slice_unchecked(bytes) })
}
#[inline]
unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &[Self] {
let data = bytes.as_ptr();
let len = bytes.len() / $size;
// Safe because Self is transparent over [u8; $size]
core::slice::from_raw_parts(data as *const Self, len)
}
#[inline]
fn as_byte_slice(slice: &[Self]) -> &[u8] {
let data = slice.as_ptr();
let len = slice.len() * $size;
// Safe because Self is transparent over [u8; $size]
unsafe { core::slice::from_raw_parts(data as *const u8, len) }
}
}

impl PlainOldULE<$size> {
Expand Down
10 changes: 4 additions & 6 deletions utils/zerovec/src/varzerovec/components.rs
Original file line number Diff line number Diff line change
Expand Up @@ -294,12 +294,10 @@ pub fn get_serializable_bytes<T: AsVarULE>(elements: &[T]) -> Option<Vec<u8>> {
let mut offset: u32 = 0;
for element in elements {
vec.extend(&offset.as_unaligned().0);
let len_u32: u32 = element
.as_unaligned()
.as_byte_slice()
.len()
.try_into()
.ok()?;
let ule = element.as_unaligned();
let slice = ule.as_byte_slice();
debug_assert_eq!(mem::size_of_val(ule), mem::size_of_val(slice));
let len_u32: u32 = slice.len().try_into().ok()?;
offset = offset.checked_add(len_u32)?;
}
vec.reserve(offset as usize);
Expand Down
10 changes: 7 additions & 3 deletions utils/zerovec/src/varzerovec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,10 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> {
///
/// This can be passed back to [`Self::parse_byte_slice()`]
pub fn get_encoded_slice(&self) -> &[u8] {
self.get_components().entire_slice()
match self.0 {
VarZeroVecInner::Owned(ref vec) => vec.entire_slice(),
VarZeroVecInner::Borrowed(vec) => vec.entire_slice(),
}
}

/// For a slice of `T`, get a list of bytes that can be passed to
Expand Down Expand Up @@ -501,8 +504,9 @@ where
{
#[inline]
fn eq(&self, other: &VarZeroVec<'b, T>) -> bool {
// Note: T implements PartialEq but not T::ULE
self.iter().eq(other.iter())
// VarULE has an API guarantee that this is equivalent
// to `T::VarULE::eq()`
self.get_encoded_slice().eq(other.get_encoded_slice())
}
}

Expand Down
5 changes: 5 additions & 0 deletions utils/zerovec/src/varzerovec/owned.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ impl<T: AsVarULE> VarZeroVecOwned<T> {
self.get_components().to_vec()
}

#[inline]
pub fn entire_slice(&self) -> &[u8] {
&self.entire_slice
}

/// Insert an element at index `idx`
pub fn insert(&mut self, index: usize, element: &T) {
let len = self.len();
Expand Down
5 changes: 3 additions & 2 deletions utils/zerovec/src/varzerovec/ule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,9 @@ where
{
#[inline]
fn eq(&self, other: &VarZeroVecULE<T>) -> bool {
// Note: T implements PartialEq but not T::ULE
self.iter().eq(other.iter())
// VarULE has an API guarantee that this is equivalent
// to `T::VarULE::eq()`
self.entire_slice.eq(&other.entire_slice)
}
}

Expand Down

0 comments on commit 31497db

Please sign in to comment.