From 83e41531e1c1b19c7cb7fea917615ebc7d9590b7 Mon Sep 17 00:00:00 2001 From: Brent Kerby Date: Mon, 5 Apr 2021 19:13:43 -0600 Subject: [PATCH 1/3] Adjust Vec to build on stable Rust --- src/vec-alloc.md | 111 +++++++++++---------- src/vec-dealloc.md | 11 +-- src/vec-final.md | 208 +++++++++++++++++++++------------------ src/vec-insert-remove.md | 10 +- src/vec-into-iter.md | 12 +-- src/vec-layout.md | 61 ++++-------- src/vec-push-pop.md | 2 +- src/vec-raw.md | 94 ++++++++++-------- src/vec-zsts.md | 76 +++++++------- src/vec.md | 14 +-- 10 files changed, 304 insertions(+), 295 deletions(-) diff --git a/src/vec-alloc.md b/src/vec-alloc.md index 0d270570..167242e7 100644 --- a/src/vec-alloc.md +++ b/src/vec-alloc.md @@ -1,41 +1,52 @@ # Allocating Memory -Using Unique throws a wrench in an important feature of Vec (and indeed all of -the std collections): an empty Vec doesn't actually allocate at all. So if we -can't allocate, but also can't put a null pointer in `ptr`, what do we do in -`Vec::new`? Well, we just put some other garbage in there! +Using NonNull throws a wrench in an important feature of Vec (and indeed all of +the std collections): creating an empty Vec doesn't actually allocate at all. This +is not the same as allocating a zero-sized memory block, which is not allowed by +the global allocator (it results in undefined behavior!). So if we can't allocate, +but also can't put a null pointer in `ptr`, what do we do in `Vec::new`? Well, we +just put some other garbage in there! This is perfectly fine because we already have `cap == 0` as our sentinel for no allocation. We don't even need to handle it specially in almost any code because we usually need to check if `cap > len` or `len > 0` anyway. The recommended -Rust value to put here is `mem::align_of::()`. Unique provides a convenience -for this: `Unique::dangling()`. There are quite a few places where we'll +Rust value to put here is `mem::align_of::()`. NonNull provides a convenience +for this: `NonNull::dangling()`. There are quite a few places where we'll want to use `dangling` because there's no real allocation to talk about but `null` would make the compiler do bad things. So: ```rust,ignore +use std::mem; + impl Vec { fn new() -> Self { assert!(mem::size_of::() != 0, "We're not ready to handle ZSTs"); - Vec { ptr: Unique::dangling(), len: 0, cap: 0 } + Vec { + ptr: NonNull::dangling(), + len: 0, + cap: 0, + _marker: PhantomData + } } } +# fn main() {} ``` I slipped in that assert there because zero-sized types will require some special handling throughout our code, and I want to defer the issue for now. Without this assert, some of our early drafts will do some Very Bad Things. -Next we need to figure out what to actually do when we *do* want space. For -that, we'll need to use the rest of the heap APIs. These basically allow us to -talk directly to Rust's allocator (`malloc` on Unix platforms and `HeapAlloc` -on Windows by default). +Next we need to figure out what to actually do when we *do* want space. For that, +we use the global allocation functions [`alloc`][alloc], [`realloc`][realloc], +and [`dealloc`][dealloc] which are available in stable Rust in +[`std::alloc`][std_alloc]. These functions are expected to become deprecated in +favor of the methods of [`std::alloc::Global`][Global] after this type is stabilized. We'll also need a way to handle out-of-memory (OOM) conditions. The standard -library calls `std::alloc::oom()`, which in turn calls the `oom` langitem, -which aborts the program in a platform-specific manner. +library provides a function [`alloc::handle_alloc_error`][handle_alloc_error], +which will abort the program in a platform-specific manner. The reason we abort and don't panic is because unwinding can cause allocations to happen, and that seems like a bad thing to do when your allocator just came back with "hey I don't have any more memory". @@ -152,52 +163,48 @@ such we will guard against this case explicitly. Ok with all the nonsense out of the way, let's actually allocate some memory: ```rust,ignore -fn grow(&mut self) { - // this is all pretty delicate, so let's say it's all unsafe - unsafe { - let elem_size = mem::size_of::(); - - let (new_cap, ptr) = if self.cap == 0 { - let ptr = Global.allocate(Layout::array::(1).unwrap()); - (1, ptr) +use std::alloc::{self, Layout}; + +impl Vec { + fn grow(&mut self) { + let (new_cap, new_layout) = if self.cap == 0 { + (1, Layout::array::(1).unwrap()) } else { - // as an invariant, we can assume that `self.cap < isize::MAX`, - // so this doesn't need to be checked. - let new_cap = 2 * self.cap; - // Similarly this can't overflow due to previously allocating this - let old_num_bytes = self.cap * elem_size; - - // check that the new allocation doesn't exceed `isize::MAX` at all - // regardless of the actual size of the capacity. This combines the - // `new_cap <= isize::MAX` and `new_num_bytes <= usize::MAX` checks - // we need to make. We lose the ability to allocate e.g. 2/3rds of - // the address space with a single Vec of i16's on 32-bit though. - // Alas, poor Yorick -- I knew him, Horatio. - assert!(old_num_bytes <= (isize::MAX as usize) / 2, - "capacity overflow"); - - let c: NonNull = self.ptr.into(); - let ptr = Global.grow(c.cast(), - Layout::array::(self.cap).unwrap(), - Layout::array::(new_cap).unwrap()); - (new_cap, ptr) + // This can't overflow since self.cap <= isize::MAX. + let new_cap = 2 * self.cap; + + // Layout::array checks that the number of bytes is <= usize::MAX, + // but this is redundant since old_layout.size() <= isize::MAX, + // so the `unwrap` should never fail. + let new_layout = Layout::array::(new_cap).unwrap(); + (new_cap, new_layout) }; - // If allocate or reallocate fail, oom - if ptr.is_err() { - handle_alloc_error(Layout::from_size_align_unchecked( - new_cap * elem_size, - mem::align_of::(), - )) - } + // Ensure that the new allocation doesn't exceed `isize::MAX` bytes. + assert!(new_layout.size() <= isize::MAX as usize, "Allocation too large"); - let ptr = ptr.unwrap(); + let new_ptr = if self.cap == 0 { + unsafe { alloc::alloc(new_layout) } + } else { + let old_layout = Layout::array::(self.cap).unwrap(); + let old_ptr = self.ptr.as_ptr() as *mut u8; + unsafe { alloc::realloc(old_ptr, old_layout, new_layout.size()) } + }; - self.ptr = Unique::new_unchecked(ptr.as_ptr() as *mut _); + // If allocation fails, `new_ptr` will be null, in which case we abort. + self.ptr = match NonNull::new(new_ptr as *mut T) { + Some(p) => p, + None => alloc::handle_alloc_error(new_layout), + }; self.cap = new_cap; } } +# fn main() {} ``` -Nothing particularly tricky here. Just computing sizes and alignments and doing -some careful multiplication checks. +[Global]: ../std/alloc/struct.Global.html +[handle_alloc_error]: ../alloc/alloc/fn.handle_alloc_error.html +[alloc]: ../alloc/alloc/fn.alloc.html +[realloc]: ../alloc/alloc/fn.realloc.html +[dealloc]: ../alloc/alloc/fn.dealloc.html +[std_alloc]: ../alloc/alloc/index.html diff --git a/src/vec-dealloc.md b/src/vec-dealloc.md index 34ddc2b5..7cc8ce49 100644 --- a/src/vec-dealloc.md +++ b/src/vec-dealloc.md @@ -7,20 +7,17 @@ ask Rust if `T` `needs_drop` and omit the calls to `pop`. However in practice LLVM is *really* good at removing simple side-effect free code like this, so I wouldn't bother unless you notice it's not being stripped (in this case it is). -We must not call `Global.deallocate` when `self.cap == 0`, as in this case we +We must not call `alloc::dealloc` when `self.cap == 0`, as in this case we haven't actually allocated any memory. - ```rust,ignore impl Drop for Vec { fn drop(&mut self) { if self.cap != 0 { while let Some(_) = self.pop() { } - - unsafe { - let c: NonNull = self.ptr.into(); - Global.deallocate(c.cast(), - Layout::array::(self.cap).unwrap()); + let layout = Layout::array::(self.cap).unwrap(); + unsafe { + alloc::dealloc(self.ptr.as_ptr() as *mut u8, layout); } } } diff --git a/src/vec-final.md b/src/vec-final.md index e5f35296..4e3ad18f 100644 --- a/src/vec-final.md +++ b/src/vec-final.md @@ -1,79 +1,85 @@ # The Final Code ```rust -#![feature(ptr_internals)] -#![feature(allocator_api)] -#![feature(alloc_layout_extra)] - -use std::ptr::{Unique, NonNull, self}; +use std::alloc::{self, Layout}; +use std::marker::PhantomData; use std::mem; use std::ops::{Deref, DerefMut}; -use std::marker::PhantomData; -use std::alloc::{ - Allocator, - Global, - GlobalAlloc, - Layout, - handle_alloc_error -}; +use std::ptr::{self, NonNull}; struct RawVec { - ptr: Unique, + ptr: NonNull, cap: usize, + _marker: PhantomData, } +unsafe impl Send for RawVec {} +unsafe impl Sync for RawVec {} + impl RawVec { fn new() -> Self { // !0 is usize::MAX. This branch should be stripped at compile time. let cap = if mem::size_of::() == 0 { !0 } else { 0 }; - // Unique::dangling() doubles as "unallocated" and "zero-sized allocation" - RawVec { ptr: Unique::dangling(), cap: cap } + // NonNull::dangling() doubles as "unallocated" and "zero-sized allocation" + RawVec { + ptr: NonNull::dangling(), + cap: cap, + _marker: PhantomData, + } } fn grow(&mut self) { - unsafe { - let elem_size = mem::size_of::(); - - // since we set the capacity to usize::MAX when elem_size is - // 0, getting to here necessarily means the Vec is overfull. - assert!(elem_size != 0, "capacity overflow"); + // since we set the capacity to usize::MAX when T has size 0, + // getting to here necessarily means the Vec is overfull. + assert!(mem::size_of::() != 0, "capacity overflow"); - let (new_cap, ptr) = if self.cap == 0 { - let ptr = Global.allocate(Layout::array::(1).unwrap()); - (1, ptr) - } else { - let new_cap = 2 * self.cap; - let c: NonNull = self.ptr.into(); - let ptr = Global.grow(c.cast(), - Layout::array::(self.cap).unwrap(), - Layout::array::(new_cap).unwrap()); - (new_cap, ptr) - }; - - // If allocate or reallocate fail, oom - if ptr.is_err() { - handle_alloc_error(Layout::from_size_align_unchecked( - new_cap * elem_size, - mem::align_of::(), - )) - } - let ptr = ptr.unwrap(); - - self.ptr = Unique::new_unchecked(ptr.as_ptr() as *mut _); - self.cap = new_cap; - } + let (new_cap, new_layout) = if self.cap == 0 { + (1, Layout::array::(1).unwrap()) + } else { + // This can't overflow because we ensure self.cap <= isize::MAX. + let new_cap = 2 * self.cap; + + // Layout::array checks that the number of bytes is <= usize::MAX, + // but this is redundant since old_layout.size() <= isize::MAX, + // so the `unwrap` should never fail. + let new_layout = Layout::array::(new_cap).unwrap(); + (new_cap, new_layout) + }; + + // Ensure that the new allocation doesn't exceed `isize::MAX` bytes. + assert!( + new_layout.size() <= isize::MAX as usize, + "Allocation too large" + ); + + let new_ptr = if self.cap == 0 { + unsafe { alloc::alloc(new_layout) } + } else { + let old_layout = Layout::array::(self.cap).unwrap(); + let old_ptr = self.ptr.as_ptr() as *mut u8; + unsafe { alloc::realloc(old_ptr, old_layout, new_layout.size()) } + }; + + // If allocation fails, `new_ptr` will be null, in which case we abort. + self.ptr = match NonNull::new(new_ptr as *mut T) { + Some(p) => p, + None => alloc::handle_alloc_error(new_layout), + }; + self.cap = new_cap; } } impl Drop for RawVec { fn drop(&mut self) { let elem_size = mem::size_of::(); + if self.cap != 0 && elem_size != 0 { unsafe { - let c: NonNull = self.ptr.into(); - Global.deallocate(c.cast(), - Layout::array::(self.cap).unwrap()); + alloc::dealloc( + self.ptr.as_ptr() as *mut u8, + Layout::array::(self.cap).unwrap(), + ); } } } @@ -85,21 +91,30 @@ pub struct Vec { } impl Vec { - fn ptr(&self) -> *mut T { self.buf.ptr.as_ptr() } + fn ptr(&self) -> *mut T { + self.buf.ptr.as_ptr() + } - fn cap(&self) -> usize { self.buf.cap } + fn cap(&self) -> usize { + self.buf.cap + } pub fn new() -> Self { - Vec { buf: RawVec::new(), len: 0 } + Vec { + buf: RawVec::new(), + len: 0, + } } pub fn push(&mut self, elem: T) { - if self.len == self.cap() { self.buf.grow(); } + if self.len == self.cap() { + self.buf.grow(); + } unsafe { ptr::write(self.ptr().offset(self.len as isize), elem); } - // Can't fail, we'll OOM first. + // Can't overflow, we'll OOM first. self.len += 1; } @@ -108,22 +123,22 @@ impl Vec { None } else { self.len -= 1; - unsafe { - Some(ptr::read(self.ptr().offset(self.len as isize))) - } + unsafe { Some(ptr::read(self.ptr().offset(self.len as isize))) } } } pub fn insert(&mut self, index: usize, elem: T) { assert!(index <= self.len, "index out of bounds"); - if self.cap() == self.len { self.buf.grow(); } + if self.cap() == self.len { + self.buf.grow(); + } unsafe { - if index < self.len { - ptr::copy(self.ptr().offset(index as isize), - self.ptr().offset(index as isize + 1), - self.len - index); - } + ptr::copy( + self.ptr().offset(index as isize), + self.ptr().offset(index as isize + 1), + self.len - index, + ); ptr::write(self.ptr().offset(index as isize), elem); self.len += 1; } @@ -134,9 +149,11 @@ impl Vec { unsafe { self.len -= 1; let result = ptr::read(self.ptr().offset(index as isize)); - ptr::copy(self.ptr().offset(index as isize + 1), - self.ptr().offset(index as isize), - self.len - index); + ptr::copy( + self.ptr().offset(index as isize + 1), + self.ptr().offset(index as isize), + self.len - index, + ); result } } @@ -181,24 +198,16 @@ impl Drop for Vec { impl Deref for Vec { type Target = [T]; fn deref(&self) -> &[T] { - unsafe { - std::slice::from_raw_parts(self.ptr(), self.len) - } + unsafe { std::slice::from_raw_parts(self.ptr(), self.len) } } } impl DerefMut for Vec { fn deref_mut(&mut self) -> &mut [T] { - unsafe { - std::slice::from_raw_parts_mut(self.ptr(), self.len) - } + unsafe { std::slice::from_raw_parts_mut(self.ptr(), self.len) } } } - - - - struct RawValIter { start: *const T, end: *const T, @@ -239,8 +248,8 @@ impl Iterator for RawValIter { fn size_hint(&self) -> (usize, Option) { let elem_size = mem::size_of::(); - let len = (self.end as usize - self.start as usize) - / if elem_size == 0 { 1 } else { elem_size }; + let len = (self.end as usize - self.start as usize) / + if elem_size == 0 { 1 } else { elem_size }; (len, Some(len)) } } @@ -262,9 +271,6 @@ impl DoubleEndedIterator for RawValIter { } } - - - pub struct IntoIter { _buf: RawVec, // we don't actually care about this. Just need it to live. iter: RawValIter, @@ -272,12 +278,18 @@ pub struct IntoIter { impl Iterator for IntoIter { type Item = T; - fn next(&mut self) -> Option { self.iter.next() } - fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } + fn next(&mut self) -> Option { + self.iter.next() + } + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } } impl DoubleEndedIterator for IntoIter { - fn next_back(&mut self) -> Option { self.iter.next_back() } + fn next_back(&mut self) -> Option { + self.iter.next_back() + } } impl Drop for IntoIter { @@ -286,9 +298,6 @@ impl Drop for IntoIter { } } - - - pub struct Drain<'a, T: 'a> { vec: PhantomData<&'a mut Vec>, iter: RawValIter, @@ -296,12 +305,18 @@ pub struct Drain<'a, T: 'a> { impl<'a, T> Iterator for Drain<'a, T> { type Item = T; - fn next(&mut self) -> Option { self.iter.next() } - fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } + fn next(&mut self) -> Option { + self.iter.next() + } + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } } impl<'a, T> DoubleEndedIterator for Drain<'a, T> { - fn next_back(&mut self) -> Option { self.iter.next_back() } + fn next_back(&mut self) -> Option { + self.iter.next_back() + } } impl<'a, T> Drop for Drain<'a, T> { @@ -321,6 +336,7 @@ impl<'a, T> Drop for Drain<'a, T> { # # mod tests { # use super::*; +# # pub fn create_push_pop() { # let mut v = Vec::new(); # v.push(1); @@ -338,7 +354,7 @@ impl<'a, T> Drop for Drain<'a, T> { # assert_eq!(5, x); # assert_eq!(1, v.len()); # } -# +# # pub fn iter_test() { # let mut v = Vec::new(); # for i in 0..10 { @@ -351,7 +367,7 @@ impl<'a, T> Drop for Drain<'a, T> { # assert_eq!(0, *first); # assert_eq!(9, *last); # } -# +# # pub fn test_drain() { # let mut v = Vec::new(); # for i in 0..10 { @@ -368,19 +384,19 @@ impl<'a, T> Drop for Drain<'a, T> { # v.push(Box::new(1)); # assert_eq!(1, *v.pop().unwrap()); # } -# +# # pub fn test_zst() { # let mut v = Vec::new(); # for _i in 0..10 { # v.push(()) # } -# +# # let mut count = 0; -# +# # for _ in v.into_iter() { # count += 1 # } -# +# # assert_eq!(10, count); # } # } diff --git a/src/vec-insert-remove.md b/src/vec-insert-remove.md index c02a16d8..d351b013 100644 --- a/src/vec-insert-remove.md +++ b/src/vec-insert-remove.md @@ -20,12 +20,10 @@ pub fn insert(&mut self, index: usize, elem: T) { if self.cap == self.len { self.grow(); } unsafe { - if index < self.len { - // ptr::copy(src, dest, len): "copy from source to dest len elems" - ptr::copy(self.ptr.as_ptr().offset(index as isize), - self.ptr.as_ptr().offset(index as isize + 1), - self.len - index); - } + // ptr::copy(src, dest, len): "copy from src to dest len elems" + ptr::copy(self.ptr.as_ptr().offset(index as isize), + self.ptr.as_ptr().offset(index as isize + 1), + self.len - index); ptr::write(self.ptr.as_ptr().offset(index as isize), elem); self.len += 1; } diff --git a/src/vec-into-iter.md b/src/vec-into-iter.md index 02326f68..4023c68c 100644 --- a/src/vec-into-iter.md +++ b/src/vec-into-iter.md @@ -44,10 +44,11 @@ So we're going to use the following struct: ```rust,ignore pub struct IntoIter { - buf: Unique, + buf: NonNull, cap: usize, start: *const T, end: *const T, + _marker: PhantomData, } ``` @@ -75,6 +76,7 @@ impl Vec { } else { ptr.as_ptr().offset(len as isize) }, + _marker: PhantomData, } } } @@ -134,11 +136,9 @@ impl Drop for IntoIter { if self.cap != 0 { // drop any remaining elements for _ in &mut *self {} - - unsafe { - let c: NonNull = self.buf.into(); - Global.deallocate(c.cast(), - Layout::array::(self.cap).unwrap()); + let layout = Layout::array::(self.cap).unwrap(); + unsafe { + alloc::dealloc(self.buf.as_ptr() as *mut u8, layout); } } } diff --git a/src/vec-layout.md b/src/vec-layout.md index 26c979f7..508a44aa 100644 --- a/src/vec-layout.md +++ b/src/vec-layout.md @@ -22,64 +22,39 @@ conservatively assume we don't own any values of type `T`. See [the chapter on ownership and lifetimes][ownership] for all the details on variance and drop check. -As we saw in the ownership chapter, we should use `Unique` in place of -`*mut T` when we have a raw pointer to an allocation we own. Unique is unstable, +As we saw in the ownership chapter, the standard library uses `Unique` in place of +`*mut T` when it has a raw pointer to an allocation that it owns. Unique is unstable, so we'd like to not use it if possible, though. As a recap, Unique is a wrapper around a raw pointer that declares that: -* We are variant over `T` +* We are covariant over `T` * We may own a value of type `T` (for drop check) * We are Send/Sync if `T` is Send/Sync * Our pointer is never null (so `Option>` is null-pointer-optimized) -We can implement all of the above requirements except for the last -one in stable Rust: +We can implement all of the above requirements in stable Rust. To do this, instead +of using `Unique` we will use [`NonNull`][NonNull], another wrapper around a +raw pointer, which gives us two of the above properties, namely it is covariant +over `T` and is declared to never be null. By adding a `PhantomData` (for drop +check) and implementing Send/Sync if `T` is, we get the same results as using +`Unique`: -```rust,ignore +```rust +use std::ptr::NonNull; use std::marker::PhantomData; -use std::ops::Deref; -use std::mem; - -struct Unique { - ptr: *const T, // *const for variance - _marker: PhantomData, // For the drop checker -} - -// Deriving Send and Sync is safe because we are the Unique owners -// of this data. It's like Unique is "just" T. -unsafe impl Send for Unique {} -unsafe impl Sync for Unique {} -impl Unique { - pub fn new(ptr: *mut T) -> Self { - Unique { ptr: ptr, _marker: PhantomData } - } - - pub fn as_ptr(&self) -> *mut T { - self.ptr as *mut T - } -} -``` - -Unfortunately the mechanism for stating that your value is non-zero is -unstable and unlikely to be stabilized soon. As such we're just going to -take the hit and use std's Unique: - - -```rust,ignore pub struct Vec { - ptr: Unique, + ptr: NonNull, cap: usize, len: usize, + _marker: PhantomData, } -``` -If you don't care about the null-pointer optimization, then you can use the -stable code. However we will be designing the rest of the code around enabling -this optimization. It should be noted that `Unique::new` is unsafe to call, because -putting `null` inside of it is Undefined Behavior. Our stable Unique doesn't -need `new` to be unsafe because it doesn't make any interesting guarantees about -its contents. +unsafe impl Send for Vec {} +unsafe impl Sync for Vec {} +# fn main() {} +``` [ownership]: ownership.html +[NonNull]: ../std/ptr/struct.NonNull.html \ No newline at end of file diff --git a/src/vec-push-pop.md b/src/vec-push-pop.md index 6f39a05d..3d20d17f 100644 --- a/src/vec-push-pop.md +++ b/src/vec-push-pop.md @@ -51,5 +51,5 @@ pub fn pop(&mut self) -> Option { Some(ptr::read(self.ptr.as_ptr().offset(self.len as isize))) } } -} +} ``` diff --git a/src/vec-raw.md b/src/vec-raw.md index 7303a549..5662cfe1 100644 --- a/src/vec-raw.md +++ b/src/vec-raw.md @@ -10,56 +10,64 @@ allocating, growing, and freeing: ```rust,ignore struct RawVec { - ptr: Unique, + ptr: NonNull, cap: usize, + _marker: PhantomData, } +unsafe impl Send for RawVec {} +unsafe impl Sync for RawVec {} + impl RawVec { fn new() -> Self { - assert!(mem::size_of::() != 0, "We're not ready to handle ZSTs"); - RawVec { ptr: Unique::dangling(), cap: 0 } + assert!(mem::size_of::() != 0, "TODO: implement ZST support"); + RawVec { + ptr: NonNull::dangling(), + cap: 0, + _marker: PhantomData, + } } - // unchanged from Vec fn grow(&mut self) { - unsafe { - let elem_size = mem::size_of::(); - - let (new_cap, ptr) = if self.cap == 0 { - let ptr = Global.allocate(Layout::array::(1).unwrap()); - (1, ptr) - } else { - let new_cap = 2 * self.cap; - let c: NonNull = self.ptr.into(); - let ptr = Global.grow(c.cast(), - Layout::array::(self.cap).unwrap(), - Layout::array::(new_cap).unwrap()); - (new_cap, ptr) - }; - - // If allocate or reallocate fail, oom - if ptr.is_err() { - handle_alloc_error(Layout::from_size_align_unchecked( - new_cap * elem_size, - mem::align_of::(), - )) - } - - let ptr = ptr.unwrap(); - - self.ptr = Unique::new_unchecked(ptr.as_ptr() as *mut _); - self.cap = new_cap; - } + let (new_cap, new_layout) = if self.cap == 0 { + (1, Layout::array::(1).unwrap()) + } else { + // This can't overflow because we ensure self.cap <= isize::MAX. + let new_cap = 2 * self.cap; + + // Layout::array checks that the number of bytes is <= usize::MAX, + // but this is redundant since old_layout.size() <= isize::MAX, + // so the `unwrap` should never fail. + let new_layout = Layout::array::(new_cap).unwrap(); + (new_cap, new_layout) + }; + + // Ensure that the new allocation doesn't exceed `isize::MAX` bytes. + assert!(new_layout.size() <= isize::MAX as usize, "Allocation too large"); + + let new_ptr = if self.cap == 0 { + unsafe { alloc::alloc(new_layout) } + } else { + let old_layout = Layout::array::(self.cap).unwrap(); + let old_ptr = self.ptr.as_ptr() as *mut u8; + unsafe { alloc::realloc(old_ptr, old_layout, new_layout.size()) } + }; + + // If allocation fails, `new_ptr` will be null, in which case we abort. + self.ptr = match NonNull::new(new_ptr as *mut T) { + Some(p) => p, + None => alloc::handle_alloc_error(new_layout), + }; + self.cap = new_cap; } } impl Drop for RawVec { fn drop(&mut self) { if self.cap != 0 { + let layout = Layout::array::(self.cap).unwrap(); unsafe { - let c: NonNull = self.ptr.into(); - Global.deallocate(c.cast(), - Layout::array::(self.cap).unwrap()); + alloc::dealloc(self.ptr.as_ptr() as *mut u8, layout); } } } @@ -69,24 +77,32 @@ impl Drop for RawVec { And change Vec as follows: ```rust,ignore + pub struct Vec { buf: RawVec, len: usize, } impl Vec { - fn ptr(&self) -> *mut T { self.buf.ptr.as_ptr() } + fn ptr(&self) -> *mut T { + self.buf.ptr.as_ptr() + } - fn cap(&self) -> usize { self.buf.cap } + fn cap(&self) -> usize { + self.buf.cap + } pub fn new() -> Self { - Vec { buf: RawVec::new(), len: 0 } + Vec { + buf: RawVec::new(), + len: 0, + } } // push/pop/insert/remove largely unchanged: // * `self.ptr.as_ptr() -> self.ptr()` // * `self.cap -> self.cap()` - // * `self.grow -> self.buf.grow()` + // * `self.grow() -> self.buf.grow()` } impl Drop for Vec { diff --git a/src/vec-zsts.md b/src/vec-zsts.md index 418f557a..2b138e59 100644 --- a/src/vec-zsts.md +++ b/src/vec-zsts.md @@ -19,7 +19,7 @@ Thankfully we abstracted out pointer-iterators and allocating handling into ## Allocating Zero-Sized Types So if the allocator API doesn't support zero-sized allocations, what on earth -do we store as our allocation? `Unique::dangling()` of course! Almost every operation +do we store as our allocation? `NonNull::dangling()` of course! Almost every operation with a ZST is a no-op since ZSTs have exactly one value, and therefore no state needs to be considered to store or load them. This actually extends to `ptr::read` and `ptr::write`: they won't actually look at the pointer at all. As such we never need @@ -38,43 +38,49 @@ impl RawVec { // !0 is usize::MAX. This branch should be stripped at compile time. let cap = if mem::size_of::() == 0 { !0 } else { 0 }; - // Unique::dangling() doubles as "unallocated" and "zero-sized allocation" - RawVec { ptr: Unique::dangling(), cap: cap } + // NonNull::dangling() doubles as "unallocated" and "zero-sized allocation" + RawVec { + ptr: NonNull::dangling(), + cap: cap, + _marker: PhantomData, + } } fn grow(&mut self) { - unsafe { - let elem_size = mem::size_of::(); + // since we set the capacity to usize::MAX when T has size 0, + // getting to here necessarily means the Vec is overfull. + assert!(mem::size_of::() != 0, "capacity overflow"); - // since we set the capacity to usize::MAX when elem_size is - // 0, getting to here necessarily means the Vec is overfull. - assert!(elem_size != 0, "capacity overflow"); + let (new_cap, new_layout) = if self.cap == 0 { + (1, Layout::array::(1).unwrap()) + } else { + // This can't overflow because we ensure self.cap <= isize::MAX. + let new_cap = 2 * self.cap; - let (new_cap, ptr) = if self.cap == 0 { - let ptr = Global.allocate(Layout::array::(1).unwrap()); - (1, ptr) - } else { - let new_cap = 2 * self.cap; - let c: NonNull = self.ptr.into(); - let ptr = Global.grow(c.cast(), - Layout::array::(self.cap).unwrap(), - Layout::array::(new_cap).unwrap()); - (new_cap, ptr) - }; - - // If allocate or reallocate fail, oom - if ptr.is_err() { - handle_alloc_error(Layout::from_size_align_unchecked( - new_cap * elem_size, - mem::align_of::(), - )) - } + // Layout::array checks that the number of bytes is <= usize::MAX, + // but this is redundant since old_layout.size() <= isize::MAX, + // so the `unwrap` should never fail. + let new_layout = Layout::array::(new_cap).unwrap(); + (new_cap, new_layout) + }; - let ptr = ptr.unwrap(); + // Ensure that the new allocation doesn't exceed `isize::MAX` bytes. + assert!(new_layout.size() <= isize::MAX as usize, "Allocation too large"); - self.ptr = Unique::new_unchecked(ptr.as_ptr() as *mut _); - self.cap = new_cap; - } + let new_ptr = if self.cap == 0 { + unsafe { alloc::alloc(new_layout) } + } else { + let old_layout = Layout::array::(self.cap).unwrap(); + let old_ptr = self.ptr.as_ptr() as *mut u8; + unsafe { alloc::realloc(old_ptr, old_layout, new_layout.size()) } + }; + + // If allocation fails, `new_ptr` will be null, in which case we abort. + self.ptr = match NonNull::new(new_ptr as *mut T) { + Some(p) => p, + None => alloc::handle_alloc_error(new_layout), + }; + self.cap = new_cap; } } @@ -82,12 +88,12 @@ impl Drop for RawVec { fn drop(&mut self) { let elem_size = mem::size_of::(); - // don't free zero-sized allocations, as they were never allocated. if self.cap != 0 && elem_size != 0 { unsafe { - let c: NonNull = self.ptr.into(); - Global.deallocate(c.cast(), - Layout::array::(self.cap).unwrap()); + alloc::dealloc( + self.ptr.as_ptr() as *mut u8, + Layout::array::(self.cap).unwrap(), + ); } } } diff --git a/src/vec.md b/src/vec.md index d2d47e98..efe701c3 100644 --- a/src/vec.md +++ b/src/vec.md @@ -1,16 +1,10 @@ # Example: Implementing Vec To bring everything together, we're going to write `std::Vec` from scratch. -Because all the best tools for writing unsafe code are unstable, this -project will only work on nightly (as of Rust 1.9.0). With the exception of the -allocator API, much of the unstable code we'll use is expected to be stabilized -in a similar form as it is today. - -However we will generally try to avoid unstable code where possible. In -particular we won't use any intrinsics that could make a code a little -bit nicer or efficient because intrinsics are permanently unstable. Although -many intrinsics *do* become stabilized elsewhere (`std::ptr` and `std::mem` -consist of many intrinsics). +We will limit ourselves to stable Rust. In particular we won't use any +intrinsics that could make our code a little bit nicer or efficient because +intrinsics are permanently unstable. Although many intrinsics *do* become +stabilized elsewhere (`std::ptr` and `std::mem` consist of many intrinsics). Ultimately this means our implementation may not take advantage of all possible optimizations, though it will be by no means *naive*. We will From 0e33cf622f129ba5b4864db2b36a16c473b522e8 Mon Sep 17 00:00:00 2001 From: Brent Kerby Date: Fri, 9 Apr 2021 21:27:03 -0600 Subject: [PATCH 2/3] Apply suggested style fixes Co-authored-by: Yuki Okushi --- src/vec-alloc.md | 26 +++++++++++++------------- src/vec-dealloc.md | 4 ++-- src/vec-final.md | 20 ++++++++++---------- src/vec-into-iter.md | 4 ++-- src/vec-layout.md | 12 ++++++------ src/vec-push-pop.md | 2 +- src/vec-raw.md | 1 - src/vec-zsts.md | 4 ++-- src/vec.md | 6 +++--- 9 files changed, 39 insertions(+), 40 deletions(-) diff --git a/src/vec-alloc.md b/src/vec-alloc.md index 167242e7..cf3844bd 100644 --- a/src/vec-alloc.md +++ b/src/vec-alloc.md @@ -1,16 +1,16 @@ # Allocating Memory -Using NonNull throws a wrench in an important feature of Vec (and indeed all of -the std collections): creating an empty Vec doesn't actually allocate at all. This -is not the same as allocating a zero-sized memory block, which is not allowed by -the global allocator (it results in undefined behavior!). So if we can't allocate, -but also can't put a null pointer in `ptr`, what do we do in `Vec::new`? Well, we +Using `NonNull` throws a wrench in an important feature of Vec (and indeed all of +the std collections): creating an empty Vec doesn't actually allocate at all. This +is not the same as allocating a zero-sized memory block, which is not allowed by +the global allocator (it results in undefined behavior!). So if we can't allocate, +but also can't put a null pointer in `ptr`, what do we do in `Vec::new`? Well, we just put some other garbage in there! This is perfectly fine because we already have `cap == 0` as our sentinel for no allocation. We don't even need to handle it specially in almost any code because we usually need to check if `cap > len` or `len > 0` anyway. The recommended -Rust value to put here is `mem::align_of::()`. NonNull provides a convenience +Rust value to put here is `mem::align_of::()`. `NonNull` provides a convenience for this: `NonNull::dangling()`. There are quite a few places where we'll want to use `dangling` because there's no real allocation to talk about but `null` would make the compiler do bad things. @@ -23,11 +23,11 @@ use std::mem; impl Vec { fn new() -> Self { assert!(mem::size_of::() != 0, "We're not ready to handle ZSTs"); - Vec { - ptr: NonNull::dangling(), - len: 0, + Vec { + ptr: NonNull::dangling(), + len: 0, cap: 0, - _marker: PhantomData + _marker: PhantomData, } } } @@ -45,7 +45,7 @@ and [`dealloc`][dealloc] which are available in stable Rust in favor of the methods of [`std::alloc::Global`][Global] after this type is stabilized. We'll also need a way to handle out-of-memory (OOM) conditions. The standard -library provides a function [`alloc::handle_alloc_error`][handle_alloc_error], +library provides a function [`alloc::handle_alloc_error`][handle_alloc_error], which will abort the program in a platform-specific manner. The reason we abort and don't panic is because unwinding can cause allocations to happen, and that seems like a bad thing to do when your allocator just came @@ -171,9 +171,9 @@ impl Vec { (1, Layout::array::(1).unwrap()) } else { // This can't overflow since self.cap <= isize::MAX. - let new_cap = 2 * self.cap; + let new_cap = 2 * self.cap; - // Layout::array checks that the number of bytes is <= usize::MAX, + // `Layout::array` checks that the number of bytes is <= usize::MAX, // but this is redundant since old_layout.size() <= isize::MAX, // so the `unwrap` should never fail. let new_layout = Layout::array::(new_cap).unwrap(); diff --git a/src/vec-dealloc.md b/src/vec-dealloc.md index 7cc8ce49..45fe9412 100644 --- a/src/vec-dealloc.md +++ b/src/vec-dealloc.md @@ -16,8 +16,8 @@ impl Drop for Vec { if self.cap != 0 { while let Some(_) = self.pop() { } let layout = Layout::array::(self.cap).unwrap(); - unsafe { - alloc::dealloc(self.ptr.as_ptr() as *mut u8, layout); + unsafe { + alloc::dealloc(self.ptr.as_ptr() as *mut u8, layout); } } } diff --git a/src/vec-final.md b/src/vec-final.md index 4e3ad18f..574fe47a 100644 --- a/src/vec-final.md +++ b/src/vec-final.md @@ -21,7 +21,7 @@ impl RawVec { // !0 is usize::MAX. This branch should be stripped at compile time. let cap = if mem::size_of::() == 0 { !0 } else { 0 }; - // NonNull::dangling() doubles as "unallocated" and "zero-sized allocation" + // `NonNull::dangling()` doubles as "unallocated" and "zero-sized allocation" RawVec { ptr: NonNull::dangling(), cap: cap, @@ -40,7 +40,7 @@ impl RawVec { // This can't overflow because we ensure self.cap <= isize::MAX. let new_cap = 2 * self.cap; - // Layout::array checks that the number of bytes is <= usize::MAX, + // `Layout::array` checks that the number of bytes is <= usize::MAX, // but this is redundant since old_layout.size() <= isize::MAX, // so the `unwrap` should never fail. let new_layout = Layout::array::(new_cap).unwrap(); @@ -248,7 +248,7 @@ impl Iterator for RawValIter { fn size_hint(&self) -> (usize, Option) { let elem_size = mem::size_of::(); - let len = (self.end as usize - self.start as usize) / + let len = (self.end as usize - self.start as usize) / if elem_size == 0 { 1 } else { elem_size }; (len, Some(len)) } @@ -336,7 +336,7 @@ impl<'a, T> Drop for Drain<'a, T> { # # mod tests { # use super::*; -# +# # pub fn create_push_pop() { # let mut v = Vec::new(); # v.push(1); @@ -354,7 +354,7 @@ impl<'a, T> Drop for Drain<'a, T> { # assert_eq!(5, x); # assert_eq!(1, v.len()); # } -# +# # pub fn iter_test() { # let mut v = Vec::new(); # for i in 0..10 { @@ -367,7 +367,7 @@ impl<'a, T> Drop for Drain<'a, T> { # assert_eq!(0, *first); # assert_eq!(9, *last); # } -# +# # pub fn test_drain() { # let mut v = Vec::new(); # for i in 0..10 { @@ -384,19 +384,19 @@ impl<'a, T> Drop for Drain<'a, T> { # v.push(Box::new(1)); # assert_eq!(1, *v.pop().unwrap()); # } -# +# # pub fn test_zst() { # let mut v = Vec::new(); # for _i in 0..10 { # v.push(()) # } -# +# # let mut count = 0; -# +# # for _ in v.into_iter() { # count += 1 # } -# +# # assert_eq!(10, count); # } # } diff --git a/src/vec-into-iter.md b/src/vec-into-iter.md index 4023c68c..204724ba 100644 --- a/src/vec-into-iter.md +++ b/src/vec-into-iter.md @@ -137,8 +137,8 @@ impl Drop for IntoIter { // drop any remaining elements for _ in &mut *self {} let layout = Layout::array::(self.cap).unwrap(); - unsafe { - alloc::dealloc(self.buf.as_ptr() as *mut u8, layout); + unsafe { + alloc::dealloc(self.buf.as_ptr() as *mut u8, layout); } } } diff --git a/src/vec-layout.md b/src/vec-layout.md index 508a44aa..20e23065 100644 --- a/src/vec-layout.md +++ b/src/vec-layout.md @@ -33,11 +33,11 @@ As a recap, Unique is a wrapper around a raw pointer that declares that: * We are Send/Sync if `T` is Send/Sync * Our pointer is never null (so `Option>` is null-pointer-optimized) -We can implement all of the above requirements in stable Rust. To do this, instead -of using `Unique` we will use [`NonNull`][NonNull], another wrapper around a -raw pointer, which gives us two of the above properties, namely it is covariant -over `T` and is declared to never be null. By adding a `PhantomData` (for drop -check) and implementing Send/Sync if `T` is, we get the same results as using +We can implement all of the above requirements in stable Rust. To do this, instead +of using `Unique` we will use [`NonNull`][NonNull], another wrapper around a +raw pointer, which gives us two of the above properties, namely it is covariant +over `T` and is declared to never be null. By adding a `PhantomData` (for drop +check) and implementing Send/Sync if `T` is, we get the same results as using `Unique`: ```rust @@ -57,4 +57,4 @@ unsafe impl Sync for Vec {} ``` [ownership]: ownership.html -[NonNull]: ../std/ptr/struct.NonNull.html \ No newline at end of file +[NonNull]: ../std/ptr/struct.NonNull.html diff --git a/src/vec-push-pop.md b/src/vec-push-pop.md index 3d20d17f..6f39a05d 100644 --- a/src/vec-push-pop.md +++ b/src/vec-push-pop.md @@ -51,5 +51,5 @@ pub fn pop(&mut self) -> Option { Some(ptr::read(self.ptr.as_ptr().offset(self.len as isize))) } } -} +} ``` diff --git a/src/vec-raw.md b/src/vec-raw.md index 5662cfe1..cedae8ec 100644 --- a/src/vec-raw.md +++ b/src/vec-raw.md @@ -77,7 +77,6 @@ impl Drop for RawVec { And change Vec as follows: ```rust,ignore - pub struct Vec { buf: RawVec, len: usize, diff --git a/src/vec-zsts.md b/src/vec-zsts.md index 2b138e59..56d67e80 100644 --- a/src/vec-zsts.md +++ b/src/vec-zsts.md @@ -38,7 +38,7 @@ impl RawVec { // !0 is usize::MAX. This branch should be stripped at compile time. let cap = if mem::size_of::() == 0 { !0 } else { 0 }; - // NonNull::dangling() doubles as "unallocated" and "zero-sized allocation" + // `NonNull::dangling()` doubles as "unallocated" and "zero-sized allocation" RawVec { ptr: NonNull::dangling(), cap: cap, @@ -57,7 +57,7 @@ impl RawVec { // This can't overflow because we ensure self.cap <= isize::MAX. let new_cap = 2 * self.cap; - // Layout::array checks that the number of bytes is <= usize::MAX, + // `Layout::array` checks that the number of bytes is <= usize::MAX, // but this is redundant since old_layout.size() <= isize::MAX, // so the `unwrap` should never fail. let new_layout = Layout::array::(new_cap).unwrap(); diff --git a/src/vec.md b/src/vec.md index efe701c3..b0338633 100644 --- a/src/vec.md +++ b/src/vec.md @@ -1,9 +1,9 @@ # Example: Implementing Vec To bring everything together, we're going to write `std::Vec` from scratch. -We will limit ourselves to stable Rust. In particular we won't use any -intrinsics that could make our code a little bit nicer or efficient because -intrinsics are permanently unstable. Although many intrinsics *do* become +We will limit ourselves to stable Rust. In particular we won't use any +intrinsics that could make our code a little bit nicer or efficient because +intrinsics are permanently unstable. Although many intrinsics *do* become stabilized elsewhere (`std::ptr` and `std::mem` consist of many intrinsics). Ultimately this means our implementation may not take advantage of all From d9183663966b14be91f66e405fd89c32a5ae555f Mon Sep 17 00:00:00 2001 From: Brent Kerby Date: Sat, 1 May 2021 08:04:57 -0600 Subject: [PATCH 3/3] fix formatting --- src/vec-insert-remove.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vec-insert-remove.md b/src/vec-insert-remove.md index d351b013..3324bbbc 100644 --- a/src/vec-insert-remove.md +++ b/src/vec-insert-remove.md @@ -22,8 +22,8 @@ pub fn insert(&mut self, index: usize, elem: T) { unsafe { // ptr::copy(src, dest, len): "copy from src to dest len elems" ptr::copy(self.ptr.as_ptr().offset(index as isize), - self.ptr.as_ptr().offset(index as isize + 1), - self.len - index); + self.ptr.as_ptr().offset(index as isize + 1), + self.len - index); ptr::write(self.ptr.as_ptr().offset(index as isize), elem); self.len += 1; }