From 0e70c269feece979427bcc8795afc43efc1e47c1 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Fri, 7 Dec 2018 16:52:10 +0100 Subject: [PATCH 1/3] fix BTree creating shared references that are not entirely in-bounds --- src/liballoc/collections/btree/node.rs | 115 ++++++++++++++++++------- 1 file changed, 86 insertions(+), 29 deletions(-) diff --git a/src/liballoc/collections/btree/node.rs b/src/liballoc/collections/btree/node.rs index 215689dfc48c9..13cbcee2f8e08 100644 --- a/src/liballoc/collections/btree/node.rs +++ b/src/liballoc/collections/btree/node.rs @@ -58,9 +58,34 @@ pub const CAPACITY: usize = 2 * B - 1; /// these should always be put behind pointers, and specifically behind `BoxedNode` in the owned /// case. /// -/// We put the metadata first so that its position is the same for every `K` and `V`, in order -/// to statically allocate a single dummy node to avoid allocations. This struct is `repr(C)` to -/// prevent them from being reordered. +/// We have a separate type for the header and rely on it matching the prefix of `LeafNode`, in +/// order to statically allocate a single dummy node to avoid allocations. This struct is +/// `repr(C)` to prevent them from being reordered. `LeafNode` does not just contain a +/// `NodeHeader` because we do not want unnecessary padding between `len` and the keys. +/// Crucially, `NodeHeader` can be safely transmuted to different K and V. (This is exploited +/// by `as_header`.) +/// See `into_key_slice` for an explanation of K2. K2 cannot be safely transmuted around +/// because the size of `NodeHeader` depends on its alignment! +#[repr(C)] +struct NodeHeader { + /// We use `*const` as opposed to `*mut` so as to be covariant in `K` and `V`. + /// This either points to an actual node or is null. + parent: *const InternalNode, + + /// This node's index into the parent node's `edges` array. + /// `*node.parent.edges[node.parent_idx]` should be the same thing as `node`. + /// This is only guaranteed to be initialized when `parent` is non-null. + parent_idx: MaybeUninit, + + /// The number of keys and values this node stores. + /// + /// This next to `parent_idx` to encourage the compiler to join `len` and + /// `parent_idx` into the same 32-bit word, reducing space overhead. + len: u16, + + /// See `into_key_slice`. + keys_start: [K2; 0], +} #[repr(C)] struct LeafNode { /// We use `*const` as opposed to `*mut` so as to be covariant in `K` and `V`. @@ -98,24 +123,25 @@ impl LeafNode { len: 0 } } +} +impl NodeHeader { fn is_shared_root(&self) -> bool { ptr::eq(self, &EMPTY_ROOT_NODE as *const _ as *const _) } } // We need to implement Sync here in order to make a static instance. -unsafe impl Sync for LeafNode<(), ()> {} +unsafe impl Sync for NodeHeader<(), ()> {} // An empty node used as a placeholder for the root node, to avoid allocations. -// We use () in order to save space, since no operation on an empty tree will +// We use just a header in order to save space, since no operation on an empty tree will // ever take a pointer past the first key. -static EMPTY_ROOT_NODE: LeafNode<(), ()> = LeafNode { +static EMPTY_ROOT_NODE: NodeHeader<(), ()> = NodeHeader { parent: ptr::null(), parent_idx: MaybeUninit::uninitialized(), len: 0, - keys: MaybeUninit::uninitialized(), - vals: MaybeUninit::uninitialized(), + keys_start: [], }; /// The underlying representation of internal nodes. As with `LeafNode`s, these should be hidden @@ -306,6 +332,11 @@ impl Root { /// `Leaf`, the `NodeRef` points to a leaf node, when this is `Internal` the /// `NodeRef` points to an internal node, and when this is `LeafOrInternal` the /// `NodeRef` could be pointing to either type of node. +/// Note that in case of a leaf node, this might still be the shared root! Only turn +/// this into a `LeafNode` reference if you know it is not a root! Shared references +/// must be dereferencable *for the entire size of their pointee*, so `&InternalNode` +/// pointing to the shared root is UB. +/// Turning this into a `NodeHeader` is always safe. pub struct NodeRef { height: usize, node: NonNull>, @@ -352,7 +383,7 @@ impl NodeRef { /// Finds the length of the node. This is the number of keys or values. In an /// internal node, the number of edges is `len() + 1`. pub fn len(&self) -> usize { - self.as_leaf().len as usize + self.as_header().len as usize } /// Returns the height of this node in the whole tree. Zero height denotes the @@ -382,14 +413,19 @@ impl NodeRef { } } - fn as_leaf(&self) -> &LeafNode { + /// Assert that this is indeed a proper leaf node, and not the shared root. + unsafe fn as_leaf(&self) -> &LeafNode { + self.node.as_ref() + } + + fn as_header(&self) -> &NodeHeader { unsafe { - self.node.as_ref() + &*(self.node.as_ptr() as *const NodeHeader) } } pub fn is_shared_root(&self) -> bool { - self.as_leaf().is_shared_root() + self.as_header().is_shared_root() } pub fn keys(&self) -> &[K] { @@ -418,7 +454,7 @@ impl NodeRef { >, Self > { - let parent_as_leaf = self.as_leaf().parent as *const LeafNode; + let parent_as_leaf = self.as_header().parent as *const LeafNode; if let Some(non_zero) = NonNull::new(parent_as_leaf as *mut _) { Ok(Handle { node: NodeRef { @@ -427,7 +463,7 @@ impl NodeRef { root: self.root, _marker: PhantomData }, - idx: unsafe { usize::from(*self.as_leaf().parent_idx.get_ref()) }, + idx: unsafe { usize::from(*self.as_header().parent_idx.get_ref()) }, _marker: PhantomData }) } else { @@ -535,9 +571,8 @@ impl<'a, K, V, Type> NodeRef, K, V, Type> { } fn as_leaf_mut(&mut self) -> &mut LeafNode { - unsafe { - self.node.as_mut() - } + // We are mutable, so we cannot be the root, so this is okay. + unsafe { self.node.as_mut() } } fn keys_mut(&mut self) -> &mut [K] { @@ -551,28 +586,50 @@ impl<'a, K, V, Type> NodeRef, K, V, Type> { impl<'a, K: 'a, V: 'a, Type> NodeRef, K, V, Type> { fn into_key_slice(self) -> &'a [K] { - // When taking a pointer to the keys, if our key has a stricter - // alignment requirement than the shared root does, then the pointer - // would be out of bounds, which LLVM assumes will not happen. If the - // alignment is more strict, we need to make an empty slice that doesn't - // use an out of bounds pointer. + // We have to be careful here because we might be pointing to the shared root. + // In that case, we must not create an `&LeafNode`. We could just return + // an empty slice whenever the lenght is 0 (this includes the shared root), + // but we want to avoid that run-time check. + // Instead, we create a slice pointing into the node whenever possible. + // We can sometimes do this even for the shared root, as the slice will be + // empty. We cannot *always* do this because if the type is too highly + // aligned, the offset of `keys` in a "full node" might be outside the bounds + // of the header! So we do an alignment check first, that will be + // evaluated at compile-time, and only do any run-time check in the rare case + // that the alignment is very big. if mem::align_of::() > mem::align_of::>() && self.is_shared_root() { &[] } else { - // Here either it's not the root, or the alignment is less strict, - // in which case the keys pointer will point "one-past-the-end" of - // the node, which is allowed by LLVM. + // Thanks to the alignment check above, we know that `keys` will be + // in-bounds of some allocation even if this is the shared root! + // (We might be one-past-the-end, but that is allowed by LLVM.) + // Getting the pointer is tricky though. `NodeHeader` does not have a `keys` + // field because we want its size to not depend on the alignment of `K` + // (needed becuase `as_header` should be safe). We cannot call `as_leaf` + // because we might be the shared root. + // For this reason, `NodeHeader` has this `K2` parameter (that's usually `()` + // and hence just adds a size-0-align-1 field, not affecting layout). + // We know that we can transmute `NodeHeader` to `NodeHeader` + // because we did the alignment check above, and hence `NodeHeader` + // is not bigger than `NodeHeader`! Then we can use `NodeHeader` + // to compute the pointer where the keys start. + // This entire hack will become unnecessary once + // lands, then we can just take a raw + // pointer to the `keys` field of `*const InternalNode`. + + // This is a non-debug-assert because it can be completely compile-time evaluated. + assert!(mem::size_of::>() == mem::size_of::>()); + let header = self.as_header() as *const _ as *const NodeHeader; + let keys = unsafe { &(*header).keys_start as *const _ as *const K }; unsafe { - slice::from_raw_parts( - self.as_leaf().keys.as_ptr() as *const K, - self.len() - ) + slice::from_raw_parts(keys, self.len()) } } } fn into_val_slice(self) -> &'a [V] { debug_assert!(!self.is_shared_root()); + // We cannot be the root, so `as_leaf` is okay unsafe { slice::from_raw_parts( self.as_leaf().vals.as_ptr() as *const V, From 4558340ecc64c2702ad89b8175e07ac1c71e273b Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 8 Dec 2018 23:38:12 +0100 Subject: [PATCH 2/3] avoid as_leaf_mut asserting exclusive access --- src/liballoc/collections/btree/node.rs | 63 ++++++++++++++------------ 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/src/liballoc/collections/btree/node.rs b/src/liballoc/collections/btree/node.rs index 13cbcee2f8e08..a9f6cdbb51ff1 100644 --- a/src/liballoc/collections/btree/node.rs +++ b/src/liballoc/collections/btree/node.rs @@ -307,7 +307,7 @@ impl Root { .node) }; self.height -= 1; - self.as_mut().as_leaf_mut().parent = ptr::null(); + unsafe { (*self.as_mut().as_leaf_mut()).parent = ptr::null(); } unsafe { Global.dealloc(NonNull::from(top).cast(), Layout::new::>()); @@ -570,9 +570,10 @@ impl<'a, K, V, Type> NodeRef, K, V, Type> { } } - fn as_leaf_mut(&mut self) -> &mut LeafNode { - // We are mutable, so we cannot be the root, so this is okay. - unsafe { self.node.as_mut() } + /// Returns a raw ptr to avoid asserting exclusive access to the entire node. + fn as_leaf_mut(&mut self) -> *mut LeafNode { + // We are mutable, so we cannot be the root, so accessing this as a leaf is okay. + self.node.as_ptr() } fn keys_mut(&mut self) -> &mut [K] { @@ -659,7 +660,7 @@ impl<'a, K: 'a, V: 'a, Type> NodeRef, K, V, Type> { } else { unsafe { slice::from_raw_parts_mut( - self.as_leaf_mut().keys.as_mut_ptr() as *mut K, + (*self.as_leaf_mut()).keys.as_mut_ptr() as *mut K, self.len() ) } @@ -670,7 +671,7 @@ impl<'a, K: 'a, V: 'a, Type> NodeRef, K, V, Type> { debug_assert!(!self.is_shared_root()); unsafe { slice::from_raw_parts_mut( - self.as_leaf_mut().vals.as_mut_ptr() as *mut V, + (*self.as_leaf_mut()).vals.as_mut_ptr() as *mut V, self.len() ) } @@ -694,9 +695,9 @@ impl<'a, K, V> NodeRef, K, V, marker::Leaf> { unsafe { ptr::write(self.keys_mut().get_unchecked_mut(idx), key); ptr::write(self.vals_mut().get_unchecked_mut(idx), val); - } - self.as_leaf_mut().len += 1; + (*self.as_leaf_mut()).len += 1; + } } /// Adds a key/value pair to the beginning of the node. @@ -708,9 +709,9 @@ impl<'a, K, V> NodeRef, K, V, marker::Leaf> { unsafe { slice_insert(self.keys_mut(), 0, key); slice_insert(self.vals_mut(), 0, val); - } - self.as_leaf_mut().len += 1; + (*self.as_leaf_mut()).len += 1; + } } } @@ -729,7 +730,7 @@ impl<'a, K, V> NodeRef, K, V, marker::Internal> { ptr::write(self.vals_mut().get_unchecked_mut(idx), val); ptr::write(self.as_internal_mut().edges.get_unchecked_mut(idx + 1), edge.node); - self.as_leaf_mut().len += 1; + (*self.as_leaf_mut()).len += 1; Handle::new_edge(self.reborrow_mut(), idx + 1).correct_parent_link(); } @@ -765,7 +766,7 @@ impl<'a, K, V> NodeRef, K, V, marker::Internal> { edge.node ); - self.as_leaf_mut().len += 1; + (*self.as_leaf_mut()).len += 1; self.correct_all_childrens_parent_links(); } @@ -789,12 +790,12 @@ impl<'a, K, V> NodeRef, K, V, marker::LeafOrInternal> { ForceResult::Internal(internal) => { let edge = ptr::read(internal.as_internal().edges.get_unchecked(idx + 1)); let mut new_root = Root { node: edge, height: internal.height - 1 }; - new_root.as_mut().as_leaf_mut().parent = ptr::null(); + (*new_root.as_mut().as_leaf_mut()).parent = ptr::null(); Some(new_root) } }; - self.as_leaf_mut().len -= 1; + (*self.as_leaf_mut()).len -= 1; (key, val, edge) } } @@ -822,7 +823,7 @@ impl<'a, K, V> NodeRef, K, V, marker::LeafOrInternal> { ); let mut new_root = Root { node: edge, height: internal.height - 1 }; - new_root.as_mut().as_leaf_mut().parent = ptr::null(); + (*new_root.as_mut().as_leaf_mut()).parent = ptr::null(); for i in 0..old_len { Handle::new_edge(internal.reborrow_mut(), i).correct_parent_link(); @@ -832,7 +833,7 @@ impl<'a, K, V> NodeRef, K, V, marker::LeafOrInternal> { } }; - self.as_leaf_mut().len -= 1; + (*self.as_leaf_mut()).len -= 1; (key, val, edge) } @@ -1023,7 +1024,7 @@ impl<'a, K, V> Handle, K, V, marker::Leaf>, marker::Edge slice_insert(self.node.keys_mut(), self.idx, key); slice_insert(self.node.vals_mut(), self.idx, val); - self.node.as_leaf_mut().len += 1; + (*self.node.as_leaf_mut()).len += 1; self.node.vals_mut().get_unchecked_mut(self.idx) } @@ -1066,8 +1067,10 @@ impl<'a, K, V> Handle, K, V, marker::Internal>, marker:: let idx = self.idx as u16; let ptr = self.node.as_internal_mut() as *mut _; let mut child = self.descend(); - child.as_leaf_mut().parent = ptr; - child.as_leaf_mut().parent_idx.set(idx); + unsafe { + (*child.as_leaf_mut()).parent = ptr; + (*child.as_leaf_mut()).parent_idx.set(idx); + } } /// Unsafely asserts to the compiler some static information about whether the underlying @@ -1215,7 +1218,7 @@ impl<'a, K, V> Handle, K, V, marker::Leaf>, marker::KV> new_len ); - self.node.as_leaf_mut().len = self.idx as u16; + (*self.node.as_leaf_mut()).len = self.idx as u16; new_node.len = new_len as u16; ( @@ -1237,7 +1240,7 @@ impl<'a, K, V> Handle, K, V, marker::Leaf>, marker::KV> unsafe { let k = slice_remove(self.node.keys_mut(), self.idx); let v = slice_remove(self.node.vals_mut(), self.idx); - self.node.as_leaf_mut().len -= 1; + (*self.node.as_leaf_mut()).len -= 1; (self.left_edge(), k, v) } } @@ -1278,7 +1281,7 @@ impl<'a, K, V> Handle, K, V, marker::Internal>, marker:: new_len + 1 ); - self.node.as_leaf_mut().len = self.idx as u16; + (*self.node.as_leaf_mut()).len = self.idx as u16; new_node.data.len = new_len as u16; let mut new_root = Root { @@ -1352,9 +1355,9 @@ impl<'a, K, V> Handle, K, V, marker::Internal>, marker:: for i in self.idx+1..self.node.len() { Handle::new_edge(self.node.reborrow_mut(), i).correct_parent_link(); } - self.node.as_leaf_mut().len -= 1; + (*self.node.as_leaf_mut()).len -= 1; - left_node.as_leaf_mut().len += right_len as u16 + 1; + (*left_node.as_leaf_mut()).len += right_len as u16 + 1; if self.node.height > 1 { ptr::copy_nonoverlapping( @@ -1464,8 +1467,8 @@ impl<'a, K, V> Handle, K, V, marker::Internal>, marker:: move_kv(left_kv, new_left_len, parent_kv, 0, 1); } - left_node.reborrow_mut().as_leaf_mut().len -= count as u16; - right_node.reborrow_mut().as_leaf_mut().len += count as u16; + (*left_node.reborrow_mut().as_leaf_mut()).len -= count as u16; + (*right_node.reborrow_mut().as_leaf_mut()).len += count as u16; match (left_node.force(), right_node.force()) { (ForceResult::Internal(left), ForceResult::Internal(mut right)) => { @@ -1525,8 +1528,8 @@ impl<'a, K, V> Handle, K, V, marker::Internal>, marker:: new_right_len); } - left_node.reborrow_mut().as_leaf_mut().len += count as u16; - right_node.reborrow_mut().as_leaf_mut().len -= count as u16; + (*left_node.reborrow_mut().as_leaf_mut()).len += count as u16; + (*right_node.reborrow_mut().as_leaf_mut()).len -= count as u16; match (left_node.force(), right_node.force()) { (ForceResult::Internal(left), ForceResult::Internal(mut right)) => { @@ -1617,8 +1620,8 @@ impl<'a, K, V> Handle, K, V, marker::LeafOrInternal>, ma move_kv(left_kv, left_new_len, right_kv, 0, right_new_len); - left_node.reborrow_mut().as_leaf_mut().len = left_new_len as u16; - right_node.reborrow_mut().as_leaf_mut().len = right_new_len as u16; + (*left_node.reborrow_mut().as_leaf_mut()).len = left_new_len as u16; + (*right_node.reborrow_mut().as_leaf_mut()).len = right_new_len as u16; match (left_node.force(), right_node.force()) { (ForceResult::Internal(left), ForceResult::Internal(right)) => { From d9c64e50a07dec062c273f4ec4c8f5985af13273 Mon Sep 17 00:00:00 2001 From: Alexis Beingessner Date: Tue, 11 Dec 2018 08:55:15 +0100 Subject: [PATCH 3/3] Typo Co-Authored-By: RalfJung --- src/liballoc/collections/btree/node.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/liballoc/collections/btree/node.rs b/src/liballoc/collections/btree/node.rs index a9f6cdbb51ff1..a2d2d3c74be9d 100644 --- a/src/liballoc/collections/btree/node.rs +++ b/src/liballoc/collections/btree/node.rs @@ -589,7 +589,7 @@ impl<'a, K: 'a, V: 'a, Type> NodeRef, K, V, Type> { fn into_key_slice(self) -> &'a [K] { // We have to be careful here because we might be pointing to the shared root. // In that case, we must not create an `&LeafNode`. We could just return - // an empty slice whenever the lenght is 0 (this includes the shared root), + // an empty slice whenever the length is 0 (this includes the shared root), // but we want to avoid that run-time check. // Instead, we create a slice pointing into the node whenever possible. // We can sometimes do this even for the shared root, as the slice will be