From f6649f178300ed0dbeb298ca1941ba73bab527e7 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 6 Aug 2018 15:31:28 +0200 Subject: [PATCH 1/6] discuss that ! is not the only problem; add MaybeUninit::zeroed --- text/0000-uninitialized-uninhabited.md | 36 +++++++++++++++++++++----- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/text/0000-uninitialized-uninhabited.md b/text/0000-uninitialized-uninhabited.md index 49fb792325d..4bba022b3ff 100644 --- a/text/0000-uninitialized-uninhabited.md +++ b/text/0000-uninitialized-uninhabited.md @@ -6,15 +6,17 @@ # Summary [summary]: #summary -Deprecate `mem::uninitialized::` and replace it with a `MaybeUninit` type -for safer and more principled handling of uninitialized data. +Deprecate `mem::uninitialized::` and `mem::zeroed::` and replace them with +a `MaybeUninit` type for safer and more principled handling of uninitialized +data. # Motivation [motivation]: #motivation The problems with `uninitialized` centre around its usage with uninhabited -types. The concept of "uninitialized data" is extremely problematic when it -comes into contact with types like `!` or `Void`. +types, and its interaction with Rust's type layout invariants. The concept of +"uninitialized data" is extremely problematic when it comes into contact with +types like `!` or `Void`. For any given type, there may be valid and invalid bit-representations. For example, the type `u8` consists of a single byte and all possible bytes can be @@ -53,6 +55,18 @@ fn mem::uninitialized::() -> ! Yet calling this function does not diverge! It just breaks everything then eats your laundry instead. +This problem is most prominent with `!` but also applies to other types that +have restrictions on the values they can carry. For example, +`Some(mem::uninitialized::()).is_none()` could actually return `true` +because uninitialized memory could violate the invariant that a `bool` is always +`[00000000]` or `[00000001]` -- and Rust relies on this invariant when doing +enum layout. So, `mem::uninitialized::()` is instantaneous undefined +behavior just like `mem::uninitialized::()`. This also affects `mem::zeroed` +when considering types where the all-`0` bit pattern is not valid, like +references: `mem::zeroed::<&'static i32>()` is instantaneous undefined behavior. + +## Tracking uninitializedness in the type + An alternative way of representing uninitialized data is through a union type: ```rust @@ -63,14 +77,16 @@ union MaybeUninit { ``` Instead of creating an "uninitialized value", we can create a `MaybeUninit` -initialized with `uninit = ()`. Then, once we know that the value in the union +initialized with `uninit: ()`. Then, once we know that the value in the union is valid, we can extract it with `my_uninit.value`. This is a better way of handling uninitialized data because it doesn't involve lying to the type system and pretending that we have a value when we don't. It also better represents what's actually going on: we never *really* have a value of type `T` when we're using `uninitialized::`, what we have is some memory that contains either a value (`value: T`) or nothing (`uninit: ()`), with it being the programmer's -responsibility to keep track of which state we're in. +responsibility to keep track of which state we're in. Notice that creating a +`MaybeUninit` is safe for any `T`! Only when accessing `my_uninit.value`, +we have to be careful to ensure this has been properly initialized. To see how this can replace `uninitialized` and fix bugs in the process, consider the following code: @@ -161,6 +177,14 @@ impl MaybeUninit { } } + /// Create a new `MaybeUninit` in an uninitialized state, with the memory being + /// filled with `0` bytes. + pub fn zeroed() -> MaybeUninit { + let mut u = uninitialized(); + ptr::write_bytes(&mut u as *mut _, 0u8, 1); + u + } + /// Set the value of the `MaybeUninit`. The overwrites any previous value without dropping it. pub fn set(&mut self, val: T) -> &mut T { unsafe { From 4021b2c3353a557ca0a4f1f53fae699c27e3739d Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 6 Aug 2018 16:36:37 +0200 Subject: [PATCH 2/6] clarify that as_(mut_)ptr can always be used to write; clarify Drop situation; change get to into_inner --- text/0000-uninitialized-uninhabited.md | 46 +++++++++++++++----------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/text/0000-uninitialized-uninhabited.md b/text/0000-uninitialized-uninhabited.md index 4bba022b3ff..c8953568ec9 100644 --- a/text/0000-uninitialized-uninhabited.md +++ b/text/0000-uninitialized-uninhabited.md @@ -162,7 +162,7 @@ Add the aforementioned `MaybeUninit` type to the standard library: #[repr(transparent)] union MaybeUninit { uninit: (), - value: T, + value: ManuallyDrop, } ``` @@ -171,6 +171,9 @@ The type should have at least the following interface ```rust impl MaybeUninit { /// Create a new `MaybeUninit` in an uninitialized state. + /// + /// Note that dropping a `MaybeUninit` will never call `T`'s drop code. + /// It is your responsibility to make sure `T` gets dropped if it got initialized. pub fn uninitialized() -> MaybeUninit { MaybeUninit { uninit: (), @@ -178,7 +181,13 @@ impl MaybeUninit { } /// Create a new `MaybeUninit` in an uninitialized state, with the memory being - /// filled with `0` bytes. + /// filled with `0` bytes. It depends on `T` whether that already makes for + /// proper initialization. For example, `MaybeUninit::zeroed()` is initialized, + /// but `MaybeUninit<&'static i32>::zeroed()` is not because references must not + /// be null. + /// + /// Note that dropping a `MaybeUninit` will never call `T`'s drop code. + /// It is your responsibility to make sure `T` gets dropped if it got initialized. pub fn zeroed() -> MaybeUninit { let mut u = uninitialized(); ptr::write_bytes(&mut u as *mut _, 0u8, 1); @@ -186,21 +195,20 @@ impl MaybeUninit { } /// Set the value of the `MaybeUninit`. The overwrites any previous value without dropping it. - pub fn set(&mut self, val: T) -> &mut T { + pub fn set(&mut self, val: T) { unsafe { - self.value = val; - &mut self.value + self.value = ManuallyDrop::new(val); } } - /// Take the value of the `MaybeUninit`, putting it into an uninitialized state. + /// Extract the value from the `MaybeUninit` container. /// /// # Unsafety /// /// It is up to the caller to guarantee that the the `MaybeUninit` really is in an initialized - /// state, otherwise undefined behaviour will result. - pub unsafe fn get(&self) -> T { - std::ptr::read(&self.value) + /// state, otherwise this will immediately cause undefined behavior. + pub unsafe fn into_inner(self) -> T { + std::ptr::read(&*self.value) } /// Get a reference to the contained value. @@ -208,9 +216,9 @@ impl MaybeUninit { /// # Unsafety /// /// It is up to the caller to guarantee that the the `MaybeUninit` really is in an initialized - /// state, otherwise undefined behaviour will result. + /// state, otherwise this will immediately cause undefined behavior. pub unsafe fn get_ref(&self) -> &T { - &self.value + &*self.value } /// Get a mutable reference to the contained value. @@ -218,21 +226,21 @@ impl MaybeUninit { /// # Unsafety /// /// It is up to the caller to guarantee that the the `MaybeUninit` really is in an initialized - /// state, otherwise undefined behaviour will result. + /// state, otherwise this will immediately cause undefined behavior. pub unsafe fn get_mut(&mut self) -> &mut T { - &mut self.value + &mut *self.value } - /// Get a pointer to the contained value. This pointer will only be valid if the `MaybeUninit` - /// is in an initialized state. + /// Get a pointer to the contained value. Reading from this pointer will be undefined + /// behavior unless the `MaybeUninit` is initialized. pub fn as_ptr(&self) -> *const T { - self as *const MaybeUninit as *const T + &*self.value *const T } - /// Get a mutable pointer to the contained value. This pointer will only be valid if the - /// `MaybeUninit` is in an initialized state. + /// Get a mutable pointer to the contained value. Reading from this pointer will be undefined + /// behavior unless the `MaybeUninit` is initialized. pub fn as_mut_ptr(&mut self) -> *mut T { - self as *mut MaybeUninit as *mut T + &mut *self.value *mut T } } ``` From e54882295fb8e5e84560b6a82a6f1f7b69b289ae Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 6 Aug 2018 17:08:25 +0200 Subject: [PATCH 3/6] remove repr(transparent) --- text/0000-uninitialized-uninhabited.md | 1 - 1 file changed, 1 deletion(-) diff --git a/text/0000-uninitialized-uninhabited.md b/text/0000-uninitialized-uninhabited.md index c8953568ec9..290fc9518f5 100644 --- a/text/0000-uninitialized-uninhabited.md +++ b/text/0000-uninitialized-uninhabited.md @@ -159,7 +159,6 @@ library as a replacement. Add the aforementioned `MaybeUninit` type to the standard library: ```rust -#[repr(transparent)] union MaybeUninit { uninit: (), value: ManuallyDrop, From 8e087978dab39d22b44386d7d0afae646b6af4b2 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 6 Aug 2018 17:09:26 +0200 Subject: [PATCH 4/6] use as_mut_ptr --- text/0000-uninitialized-uninhabited.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text/0000-uninitialized-uninhabited.md b/text/0000-uninitialized-uninhabited.md index 290fc9518f5..26c55d71a00 100644 --- a/text/0000-uninitialized-uninhabited.md +++ b/text/0000-uninitialized-uninhabited.md @@ -189,7 +189,7 @@ impl MaybeUninit { /// It is your responsibility to make sure `T` gets dropped if it got initialized. pub fn zeroed() -> MaybeUninit { let mut u = uninitialized(); - ptr::write_bytes(&mut u as *mut _, 0u8, 1); + u.as_mut_ptr().write_bytes(0u8, 1); u } From b7df252780cb74ca82a1fdd1eafb21d0450fe9e9 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 6 Aug 2018 17:57:11 +0200 Subject: [PATCH 5/6] fix some typos in the code and add playground link --- text/0000-uninitialized-uninhabited.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/text/0000-uninitialized-uninhabited.md b/text/0000-uninitialized-uninhabited.md index 26c55d71a00..42118609691 100644 --- a/text/0000-uninitialized-uninhabited.md +++ b/text/0000-uninitialized-uninhabited.md @@ -159,13 +159,14 @@ library as a replacement. Add the aforementioned `MaybeUninit` type to the standard library: ```rust -union MaybeUninit { +pub union MaybeUninit { uninit: (), value: ManuallyDrop, } ``` The type should have at least the following interface +([Playground link](https://play.rust-lang.org/?gist=81f5ab9a7e7107c9583de21382ef4333&version=nightly&mode=debug&edition=2015)): ```rust impl MaybeUninit { @@ -188,8 +189,8 @@ impl MaybeUninit { /// Note that dropping a `MaybeUninit` will never call `T`'s drop code. /// It is your responsibility to make sure `T` gets dropped if it got initialized. pub fn zeroed() -> MaybeUninit { - let mut u = uninitialized(); - u.as_mut_ptr().write_bytes(0u8, 1); + let mut u = MaybeUninit::::uninitialized(); + unsafe { u.as_mut_ptr().write_bytes(0u8, 1); } u } @@ -233,13 +234,13 @@ impl MaybeUninit { /// Get a pointer to the contained value. Reading from this pointer will be undefined /// behavior unless the `MaybeUninit` is initialized. pub fn as_ptr(&self) -> *const T { - &*self.value *const T + unsafe { &*self.value as *const T } } /// Get a mutable pointer to the contained value. Reading from this pointer will be undefined /// behavior unless the `MaybeUninit` is initialized. pub fn as_mut_ptr(&mut self) -> *mut T { - &mut *self.value *mut T + unsafe { &mut *self.value as *mut T } } } ``` From 8ae636b95a5ba5f41541d23f92b3452bb5371395 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 6 Aug 2018 18:02:41 +0200 Subject: [PATCH 6/6] clarify into_inner and Drop --- text/0000-uninitialized-uninhabited.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/text/0000-uninitialized-uninhabited.md b/text/0000-uninitialized-uninhabited.md index 42118609691..7170d24c5eb 100644 --- a/text/0000-uninitialized-uninhabited.md +++ b/text/0000-uninitialized-uninhabited.md @@ -201,7 +201,9 @@ impl MaybeUninit { } } - /// Extract the value from the `MaybeUninit` container. + /// Extract the value from the `MaybeUninit` container. This is a great way + /// to ensure that the data will get dropped, because the resulting `T` is + /// subject to the usual drop handling. /// /// # Unsafety ///