Skip to content

Commit

Permalink
Add MaybeUninit type
Browse files Browse the repository at this point in the history
The standard library's `MaybeUninit` type does not currently support
wrapping unsized types. This commit introduces a polyfill with the same
behavior as `MaybeUninit` which does support wrapping unsized types.

In this commit, the only supported types are sized types and slice
types. Later (as part of #29), we will add the ability to derive the
`AsMaybeUninit` trait, which will extend support to custom DSTs.

Makes progress on #29
  • Loading branch information
joshlf committed Sep 3, 2023
1 parent 62f76d2 commit 51dd7b9
Show file tree
Hide file tree
Showing 2 changed files with 280 additions and 13 deletions.
285 changes: 278 additions & 7 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ use core::{
fmt::{self, Debug, Display, Formatter},
hash::{Hash, Hasher},
marker::PhantomData,
mem::{self, ManuallyDrop, MaybeUninit},
mem::{self, ManuallyDrop},
num::{
NonZeroI128, NonZeroI16, NonZeroI32, NonZeroI64, NonZeroI8, NonZeroIsize, NonZeroU128,
NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU8, NonZeroUsize, Wrapping,
Expand Down Expand Up @@ -1004,17 +1004,16 @@ safety_comment! {
/// - `Unaligned`: `MaybeUninit<T>` is guaranteed by its documentation [1]
/// to have the same alignment as `T`.
///
/// [1]
/// https://doc.rust-lang.org/nightly/core/mem/union.MaybeUninit.html#layout-1
/// [1] https://doc.rust-lang.org/nightly/core/mem/union.MaybeUninit.html#layout-1
///
/// TODO(https://github.com/google/zerocopy/issues/251): If we split
/// `FromBytes` and `RefFromBytes`, or if we introduce a separate
/// `NoCell`/`Freeze` trait, we can relax the trait bounds for `FromZeroes`
/// and `FromBytes`.
unsafe_impl!(T: FromZeroes => FromZeroes for MaybeUninit<T>);
unsafe_impl!(T: FromBytes => FromBytes for MaybeUninit<T>);
unsafe_impl!(T: Unaligned => Unaligned for MaybeUninit<T>);
assert_unaligned!(MaybeUninit<()>, MaybeUninit<u8>);
unsafe_impl!(T: FromZeroes => FromZeroes for mem::MaybeUninit<T>);
unsafe_impl!(T: FromBytes => FromBytes for mem::MaybeUninit<T>);
unsafe_impl!(T: Unaligned => Unaligned for mem::MaybeUninit<T>);
assert_unaligned!(mem::MaybeUninit<()>, mem::MaybeUninit<u8>);
}
safety_comment! {
/// SAFETY:
Expand Down Expand Up @@ -1201,6 +1200,271 @@ mod simd {
simd_arch_mod!(arm, int8x4_t, uint8x4_t);
}

/// An alternative to the standard library's [`MaybeUninit`] that supports
/// unsized types.
///
/// `MaybeUninit<T>` is identical to the standard library's `MaybeUninit` type
/// with the exception that it supports wrapping unsized types. Namely,
/// `MaybeUninit<T>` has the same layout as `T`, but it has no bit validity
/// constraints - any byte of a `MaybeUninit<T>` may have any value, including
/// uninitialized.
///
/// [`MaybeUninit`]: core::mem::MaybeUninit
#[derive(Copy, Clone)]
#[repr(transparent)]
pub struct MaybeUninit<T: AsMaybeUninit + ?Sized> {
inner: T::MaybeUninit,
}

impl<T: AsMaybeUninit + ?Sized> Debug for MaybeUninit<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
f.pad(core::any::type_name::<Self>())
}
}

impl<T: AsMaybeUninit + ?Sized> MaybeUninit<T> {
/// Gets a shared reference to the contained value.
///
/// # Safety
///
/// Calling this when the content is not yet fully initialized causes
/// undefined behavior. It is up to the caller to guarantee that `self` is
/// really in an initialized state.
pub unsafe fn assume_init_ref(&self) -> &T {
let ptr = T::raw_from_maybe_uninit(&self.inner);
// SAFETY: TODO
unsafe { &*ptr }
}

/// Gets a mutable reference to the contained value.
///
/// # Safety
///
/// Calling this when the content is not yet fully initialized causes
/// undefined behavior. It is up to the caller to guarantee that `self` is
/// really in an initialized state.
pub unsafe fn assume_init_mut(&mut self) -> &mut T {
let ptr = T::raw_mut_from_maybe_uninit(&mut self.inner);
// SAFETY: TODO
unsafe { &mut *ptr }
}
}

impl<T: Sized> MaybeUninit<T> {
/// Creates a new `MaybeUninit<T>` in an uninitialized state.
pub const fn uninit() -> MaybeUninit<T> {
MaybeUninit { inner: mem::MaybeUninit::uninit() }
}

/// Extracts the value from the `MaybeUninit<T>` container.
///
/// # Safety
///
/// `assume_init` has the same safety requirements and guarantees as the
/// standard library's [`MaybeUninit::assume_init`] method.
///
/// [`MaybeUninit::assume_init`]: mem::MaybeUninit::assume_init
pub const unsafe fn assume_init(self) -> T {
// SAFETY: The caller has promised to uphold the safety invariants of
// the exact function we're calling here. Since, for `T: Sized`,
// `MaybeUninit<T>` is a `repr(transparent)` wrapper around
// `mem::MaybeUninit<T>`, it is sound to treat `Self` as equivalent to a
// `mem::MaybeUninit<T>` for the purposes of
// `mem::MaybeUninit::assume_init`'s safety invariants.
unsafe { self.inner.assume_init() }
}
}

/// A type which can be wrapped in [`MaybeUninit`].
///
/// # Safety
///
/// The safety invariants on the associated `MaybeUninit` type and on all
/// methods must be upheld.
pub unsafe trait AsMaybeUninit {
/// A type which has the same layout as `Self`, but which has no validity
/// constraints.
///
/// Roughly speaking, this type is equivalent to what the standard library's
/// [`MaybeUninit<Self>`] would be if it supported unsized types.
///
/// # Safety
///
/// For `T: AsMaybeUninit`, the following must hold:
/// - Given `m: T::MaybeUninit`, it is sound to write any byte value,
/// including an uninitialized byte, at any byte offset in `m`
/// - `T` and `T::MaybeUninit` have the same alignment requirement
/// - It is valid to use an `as` cast to convert a `t: *const T` to a `m:
/// *const T::MaybeUninit` and vice-versa (and likewise for `*mut T`/`*mut
/// T::MaybeUninit`). Regardless of which direction the conversion was
/// performed, the sizes of the pointers' referents are always equal (in
/// terms of an API which is not yet stable, `size_of_val_raw(t) ==
/// size_of_val_raw(m)`).
/// - `T::MaybeUninit` contains [`UnsafeCell`]s at exactly the same byte
/// ranges that `T` does.
///
/// [`MaybeUninit<Self>`]: core::mem::MaybeUninit
/// [`UnsafeCell`]: core::cell::UnsafeCell
type MaybeUninit: ?Sized;

/// Converts a const pointer at the type level.
///
/// # Safety
///
/// Callers may assume that the memory region addressed by the return value
/// is the same as that addressed by the argument, and that both the return
/// value and the argument have the same provenance.
fn raw_from_maybe_uninit(maybe_uninit: *const Self::MaybeUninit) -> *const Self;

/// Converts a mut pointer at the type level.
///
/// # Safety
///
/// Callers may assume that the memory region addressed by the return value
/// is the same as that addressed by the argument, and that both the return
/// value and the argument have the same provenance.
fn raw_mut_from_maybe_uninit(maybe_uninit: *mut Self::MaybeUninit) -> *mut Self;
}

// SAFETY: See inline safety comments.
unsafe impl<T: Sized> AsMaybeUninit for T {
// SAFETY:
// - `MaybeUninit` has no validity requirements, so it is sound to write any
// byte value, including an uninitialized byte, at any offset.
// - `MaybeUninit<T>` has the same layout as `T`, so they have the same
// alignment requirement. For the same reason, their sizes are equal.
// - Since their sizes are equal, raw pointers to both types are thin
// pointers, and thus can be converted using as casts. For the same
// reason, the sizes of these pointers' referents are always equal.
// - `MaybeUninit<T>` has the same field offsets as `T`, and so it contains
// `UnsafeCell`s at exactly the same byte ranges as `T`.
type MaybeUninit = mem::MaybeUninit<T>;

// SAFETY: `.cast` preserves pointer address and provenance.
fn raw_from_maybe_uninit(maybe_uninit: *const mem::MaybeUninit<T>) -> *const T {
maybe_uninit.cast::<T>()
}

// SAFETY: `.cast` preserves pointer address and provenance.
fn raw_mut_from_maybe_uninit(maybe_uninit: *mut mem::MaybeUninit<T>) -> *mut T {
maybe_uninit.cast::<T>()
}
}

// SAFETY: See inline safety comments.
unsafe impl<T: Sized> AsMaybeUninit for [T] {
// SAFETY:
// - `MaybeUninit` has no bit validity requirements and `[U]` has the same
// bit validity requirements as `U`, so `[MaybeUninit<T>]` has no bit
// validity requirements. Thus, it is sound to write any byte value,
// including an uninitialized byte, at any byte offset.
// - Since `MaybeUninit<T>` has the same layout as `T`, and `[U]` has the
// same alignment as `U`, `[MaybeUninit<T>]` has the same alignment as
// `[T]`.
// - `[T]` and `[MaybeUninit<T>]` are both slice types, and so pointers can
// be converted using an `as` cast. Since `T` and `MaybeUninit<T>` have
// the same size, and since such a cast preserves the number of elements
// in the slice, the referent slices themselves will have the same size.
// - `MaybeUninit<T>` has the same field offsets as `[T]`, and so it
// contains `UnsafeCell`s at exactly the same byte ranges as `[T]`.
type MaybeUninit = [mem::MaybeUninit<T>];

// SAFETY: `as` preserves pointer address and provenance.
#[allow(clippy::as_conversions)]
fn raw_from_maybe_uninit(maybe_uninit: *const [mem::MaybeUninit<T>]) -> *const [T] {
maybe_uninit as *const [T]
}

// SAFETY: `as` preserves pointer address and provenance.
#[allow(clippy::as_conversions)]
fn raw_mut_from_maybe_uninit(maybe_uninit: *mut [mem::MaybeUninit<T>]) -> *mut [T] {
maybe_uninit as *mut [T]
}
}

// SAFETY: See inline safety comments.
unsafe impl AsMaybeUninit for str {
// SAFETY: `str` has the same layout as `[u8]`. Thus, the same safety
// argument for `<[u8] as AsMaybeUninit>::MaybeUninit` applies here.
type MaybeUninit = <[u8] as AsMaybeUninit>::MaybeUninit;

// SAFETY: `as` preserves pointer address and provenance.
#[allow(clippy::as_conversions)]
fn raw_from_maybe_uninit(
maybe_uninit: *const <[u8] as AsMaybeUninit>::MaybeUninit,
) -> *const str {
maybe_uninit as *const str
}

// SAFETY: `as` preserves pointer address and provenance.
#[allow(clippy::as_conversions)]
fn raw_mut_from_maybe_uninit(
maybe_uninit: *mut <[u8] as AsMaybeUninit>::MaybeUninit,
) -> *mut str {
maybe_uninit as *mut str
}
}

// SAFETY: See inline safety comments.
unsafe impl<T: Sized> AsMaybeUninit for MaybeUninit<[T]> {
// SAFETY: `MaybeUninit<[T]>` is a `repr(transparent)` wrapper around
// `[T::MaybeUninit]`. Thus:
// - Given `m: Self::MaybeUninit = [T::MaybeUninit]`, it is sound to write
// any byte value, including an uninitialized byte, at any byte offset in
// `m` because that is already required of `T::MaybeUninit`, and thus of
// [`T::MaybeUninit`]
// - `Self` and `[T::MaybeUninit]` have the same representation, and so:
// - Alignments are equal
// - Pointer casts are valid, and sizes of referents of both pointer types
// are equal.
// - `Self::MaybeUninit = [T::MaybeUninit]` contains `UnsafeCell`s at
// exactly the same byte ranges that `Self` does because `Self` has the
// same bit validity as `[T::MaybeUninit]`.
type MaybeUninit = [<T as AsMaybeUninit>::MaybeUninit];

// SAFETY: `as` preserves pointer address and provenance.
#[allow(clippy::as_conversions)]
fn raw_from_maybe_uninit(
maybe_uninit: *const [<T as AsMaybeUninit>::MaybeUninit],
) -> *const MaybeUninit<[T]> {
maybe_uninit as *const MaybeUninit<[T]>
}

// SAFETY: `as` preserves pointer address and provenance.
#[allow(clippy::as_conversions)]
fn raw_mut_from_maybe_uninit(
maybe_uninit: *mut [<T as AsMaybeUninit>::MaybeUninit],
) -> *mut MaybeUninit<[T]> {
maybe_uninit as *mut MaybeUninit<[T]>
}
}

safety_comment! {
// `MaybeUninit<T>` is `FromZeroes` and `FromBytes`, but never `AsBytes`
// since it may contain uninitialized bytes.
//
/// SAFETY:
/// - `FromZeroes`, `FromBytes`: `MaybeUninit<T>` has no restrictions on its
/// contents. Unfortunately, in addition to bit validity, `FromZeroes` and
/// `FromBytes` also require that implementers contain no `UnsafeCell`s.
/// Thus, we require `T: FromZeroes` and `T: FromBytes` in order to ensure
/// that `T` - and thus `MaybeUninit<T>` - contains to `UnsafeCell`s.
/// Thus, requiring that `T` implement each of these traits is sufficient
/// - `Unaligned`: `MaybeUninit<T>` is guaranteed by its documentation [1]
/// to have the same alignment as `T`.
///
/// [1] https://doc.rust-lang.org/nightly/core/mem/union.MaybeUninit.html#layout-1
///
/// TODO(https://github.com/google/zerocopy/issues/251): If we split
/// `FromBytes` and `RefFromBytes`, or if we introduce a separate
/// `NoCell`/`Freeze` trait, we can relax the trait bounds for `FromZeroes`
/// and `FromBytes`.
unsafe_impl!(T: ?Sized + AsMaybeUninit + FromZeroes => FromZeroes for MaybeUninit<T>);
unsafe_impl!(T: ?Sized + AsMaybeUninit + FromBytes => FromBytes for MaybeUninit<T>);
unsafe_impl!(T: ?Sized + AsMaybeUninit + Unaligned => Unaligned for MaybeUninit<T>);
assert_unaligned!(mem::MaybeUninit<()>, MaybeUninit<u8>);
}

/// A type with no alignment requirement.
///
/// An `Unalign` wraps a `T`, removing any alignment requirement. `Unalign<T>`
Expand Down Expand Up @@ -4068,8 +4332,15 @@ mod tests {
assert_impls!(ManuallyDrop<NotZerocopy>: !FromZeroes, !FromBytes, !AsBytes, !Unaligned);
assert_impls!(ManuallyDrop<[NotZerocopy]>: !FromZeroes, !FromBytes, !AsBytes, !Unaligned);

assert_impls!(mem::MaybeUninit<u8>: FromZeroes, FromBytes, Unaligned, !AsBytes);
assert_impls!(mem::MaybeUninit<NotZerocopy>: !FromZeroes, !FromBytes, !AsBytes, !Unaligned);

assert_impls!(MaybeUninit<u8>: FromZeroes, FromBytes, Unaligned, !AsBytes);
assert_impls!(MaybeUninit<MaybeUninit<u8>>: FromZeroes, FromBytes, Unaligned, !AsBytes);
assert_impls!(MaybeUninit<[u8]>: FromZeroes, FromBytes, Unaligned, !AsBytes);
assert_impls!(MaybeUninit<MaybeUninit<[u8]>>: FromZeroes, FromBytes, Unaligned, !AsBytes);
assert_impls!(MaybeUninit<NotZerocopy>: !FromZeroes, !FromBytes, !AsBytes, !Unaligned);
assert_impls!(MaybeUninit<MaybeUninit<NotZerocopy>>: !FromZeroes, !FromBytes, !AsBytes, !Unaligned);

assert_impls!(Wrapping<u8>: FromZeroes, FromBytes, AsBytes, Unaligned);
assert_impls!(Wrapping<NotZerocopy>: !FromZeroes, !FromBytes, !AsBytes, !Unaligned);
Expand Down
8 changes: 2 additions & 6 deletions src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,13 @@ macro_rules! unsafe_impl {
($tyvar:ident => $trait:ident for $ty:ty) => {
unsafe impl<$tyvar> $trait for $ty { fn only_derive_is_allowed_to_implement_this_trait() {} }
};
// For all `$tyvar: ?Sized` with no bounds, implement `$trait` for `$ty`.
($tyvar:ident: ?Sized => $trait:ident for $ty:ty) => {
unsafe impl<$tyvar: ?Sized> $trait for $ty { fn only_derive_is_allowed_to_implement_this_trait() {} }
};
// For all `$tyvar: $bound`, implement `$trait` for `$ty`.
($tyvar:ident: $bound:path => $trait:ident for $ty:ty) => {
unsafe impl<$tyvar: $bound> $trait for $ty { fn only_derive_is_allowed_to_implement_this_trait() {} }
};
// For all `$tyvar: $bound + ?Sized`, implement `$trait` for `$ty`.
($tyvar:ident: ?Sized + $bound:path => $trait:ident for $ty:ty) => {
unsafe impl<$tyvar: ?Sized + $bound> $trait for $ty { fn only_derive_is_allowed_to_implement_this_trait() {} }
($tyvar:ident: ?Sized $(+ $bounds:path)* => $trait:ident for $ty:ty) => {
unsafe impl<$tyvar: ?Sized $(+ $bounds)*> $trait for $ty { fn only_derive_is_allowed_to_implement_this_trait() {} }
};
// For all `$tyvar: $bound` and for all `const $constvar: $constty`,
// implement `$trait` for `$ty`.
Expand Down

0 comments on commit 51dd7b9

Please sign in to comment.