Skip to content

Commit

Permalink
Simplify the implementation of iterators over slices of ZSTs
Browse files Browse the repository at this point in the history
Currently, slice iterators over ZSTs store `end = start.wrapping_byte_add(len)`.

That's slightly convenient for `is_empty`, but kinda annoying for pretty much everything else -- see bugs like 42789, for example.

This PR instead changes it to just `end = ptr::invalid(len)` instead.

That's easier to think about (IMHO, at least) as well as easier to represent.
  • Loading branch information
scottmcm committed May 10, 2023
1 parent 50dff95 commit 15aa7fa
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 35 deletions.
13 changes: 13 additions & 0 deletions library/core/src/ptr/non_null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,19 @@ impl<T: ?Sized> NonNull<T> {
// SAFETY: `self` is a `NonNull` pointer which is necessarily non-null
unsafe { NonNull::new_unchecked(self.as_ptr() as *mut U) }
}

/// See [`pointer::add`] for semantics and safety requirements.
#[inline]
pub(crate) const unsafe fn add(self, delta: usize) -> Self
where
T: Sized,
{
// SAFETY: We require that the delta stays in-bounds of the object, and
// thus it cannot become null, as that would require wrapping the
// address space, which no legal objects are allowed to do.
// And the caller promised the `delta` is sound to add.
unsafe { NonNull { pointer: self.pointer.add(delta) } }
}
}

impl<T> NonNull<[T]> {
Expand Down
16 changes: 5 additions & 11 deletions library/core/src/slice/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::iter::{
use crate::marker::{PhantomData, Send, Sized, Sync};
use crate::mem::{self, SizedTypeProperties};
use crate::num::NonZeroUsize;
use crate::ptr::NonNull;
use crate::ptr::{invalid, invalid_mut, NonNull};

use super::{from_raw_parts, from_raw_parts_mut};

Expand Down Expand Up @@ -67,9 +67,7 @@ pub struct Iter<'a, T: 'a> {
ptr: NonNull<T>,
/// For non-ZSTs, the non-null pointer to the past-the-end element.
///
/// For ZSTs, this is `ptr.wrapping_byte_add(len)`.
///
/// For all types, `ptr == end` tests whether the iterator is empty.
/// For ZSTs, this is `ptr::invalid(len)`.
end: *const T,
_marker: PhantomData<&'a T>,
}
Expand All @@ -94,8 +92,7 @@ impl<'a, T> Iter<'a, T> {
unsafe {
assume(!ptr.is_null());

let end =
if T::IS_ZST { ptr.wrapping_byte_add(slice.len()) } else { ptr.add(slice.len()) };
let end = if T::IS_ZST { invalid(slice.len()) } else { ptr.add(slice.len()) };

Self { ptr: NonNull::new_unchecked(ptr as *mut T), end, _marker: PhantomData }
}
Expand Down Expand Up @@ -193,9 +190,7 @@ pub struct IterMut<'a, T: 'a> {
ptr: NonNull<T>,
/// For non-ZSTs, the non-null pointer to the past-the-end element.
///
/// For ZSTs, this is `ptr.wrapping_byte_add(len)`.
///
/// For all types, `ptr == end` tests whether the iterator is empty.
/// For ZSTs, this is `ptr::invalid_mut(len)`.
end: *mut T,
_marker: PhantomData<&'a mut T>,
}
Expand Down Expand Up @@ -235,8 +230,7 @@ impl<'a, T> IterMut<'a, T> {
unsafe {
assume(!ptr.is_null());

let end =
if T::IS_ZST { ptr.wrapping_byte_add(slice.len()) } else { ptr.add(slice.len()) };
let end = if T::IS_ZST { invalid_mut(slice.len()) } else { ptr.add(slice.len()) };

Self { ptr: NonNull::new_unchecked(ptr), end, _marker: PhantomData }
}
Expand Down
57 changes: 33 additions & 24 deletions library/core/src/slice/iter/macros.rs
Original file line number Diff line number Diff line change
@@ -1,28 +1,44 @@
//! Macros used by iterators of slice.
// Shrinks the iterator when T is a ZST, setting the length to `new_len`.
// `new_len` must not exceed `self.len()`.
macro_rules! zst_set_len {
($self: ident, $new_len: expr) => {{
#![allow(unused_unsafe)] // we're sometimes used within an unsafe block

// SAFETY: same as `invalid(_mut)`, but the macro doesn't know
// which versions of that function to call, so open-code it.
$self.end = unsafe { mem::transmute::<usize, _>($new_len) };
}};
}

// Shrinks the iterator when T is a ZST, reducing the length by `n`.
// `n` must not exceed `self.len()`.
macro_rules! zst_shrink {
($self: ident, $n: ident) => {
let new_len = $self.end.addr() - $n;
zst_set_len!($self, new_len);
};
}

// Inlining is_empty and len makes a huge performance difference
macro_rules! is_empty {
// The way we encode the length of a ZST iterator, this works both for ZST
// and non-ZST.
($self: ident) => {
$self.ptr.as_ptr() as *const T == $self.end
if T::IS_ZST { $self.end.addr() == 0 } else { $self.ptr.as_ptr() as *const _ == $self.end }
};
}

macro_rules! len {
($self: ident) => {{
#![allow(unused_unsafe)] // we're sometimes used within an unsafe block

let start = $self.ptr;
if T::IS_ZST {
// This _cannot_ use `ptr_sub` because we depend on wrapping
// to represent the length of long ZST slice iterators.
$self.end.addr().wrapping_sub(start.as_ptr().addr())
$self.end.addr()
} else {
// To get rid of some bounds checks (see `position`), we use ptr_sub instead of
// offset_from (Tested by `codegen/slice-position-bounds-check`.)
// SAFETY: by the type invariant pointers are aligned and `start <= end`
unsafe { $self.end.sub_ptr(start.as_ptr()) }
unsafe { $self.end.sub_ptr($self.ptr.as_ptr()) }
}
}};
}
Expand Down Expand Up @@ -50,14 +66,6 @@ macro_rules! iterator {
($self: ident) => {& $( $mut_ )? *$self.pre_dec_end(1)}
}

// Shrinks the iterator when T is a ZST, by moving the end of the iterator
// backwards by `n`. `n` must not exceed `self.len()`.
macro_rules! zst_shrink {
($self: ident, $n: ident) => {
$self.end = $self.end.wrapping_byte_sub($n);
}
}

impl<'a, T> $name<'a, T> {
// Helper function for creating a slice from the iterator.
#[inline(always)]
Expand All @@ -73,16 +81,15 @@ macro_rules! iterator {
// Unsafe because the offset must not exceed `self.len()`.
#[inline(always)]
unsafe fn post_inc_start(&mut self, offset: usize) -> * $raw_mut T {
let old = self.ptr;
if T::IS_ZST {
zst_shrink!(self, offset);
self.ptr.as_ptr()
} else {
let old = self.ptr.as_ptr();
// SAFETY: the caller guarantees that `offset` doesn't exceed `self.len()`,
// so this new pointer is inside `self` and thus guaranteed to be non-null.
self.ptr = unsafe { NonNull::new_unchecked(self.ptr.as_ptr().add(offset)) };
old
self.ptr = unsafe { self.ptr.add(offset) };
}
old.as_ptr()
}

// Helper function for moving the end of the iterator backwards by `offset` elements,
Expand Down Expand Up @@ -155,9 +162,7 @@ macro_rules! iterator {
if n >= len!(self) {
// This iterator is now empty.
if T::IS_ZST {
// We have to do it this way as `ptr` may never be 0, but `end`
// could be (due to wrapping).
self.end = self.ptr.as_ptr();
zst_set_len!(self, 0);
} else {
// SAFETY: end can't be 0 if T isn't ZST because ptr isn't 0 and end >= ptr
unsafe {
Expand Down Expand Up @@ -356,7 +361,11 @@ macro_rules! iterator {
fn nth_back(&mut self, n: usize) -> Option<$elem> {
if n >= len!(self) {
// This iterator is now empty.
self.end = self.ptr.as_ptr();
if T::IS_ZST {
zst_set_len!(self, 0);
} else {
self.end = self.ptr.as_ptr();
}
return None;
}
// SAFETY: We are in bounds. `pre_dec_end` does the right thing even for ZSTs.
Expand Down

0 comments on commit 15aa7fa

Please sign in to comment.