Skip to content

Commit 66ff163

Browse files
committed
Auto merge of #32466 - jooert:btree_append, r=apasel422
Implement `append` for b-trees. I have finally found time to revive #26227, this time only with an `append` implementation. The algorithm implemented here is linear in the size of the two b-trees. It firsts creates a `MergeIter` from the two b-trees and then builds a new b-tree by pushing key-value pairs from the `MergeIter` into nodes at the right heights. Three functions for stealing have been added to the implementation of `Handle` as well as a getter for the height of a `NodeRef`. The docs have been updated with performance information about `BTreeMap::append` and the remark about B has been removed now that it is the same for all instances of `BTreeMap`. cc @gereeter @gankro @apasel422
2 parents af000a7 + 241a3e4 commit 66ff163

File tree

7 files changed

+420
-30
lines changed

7 files changed

+420
-30
lines changed

Diff for: src/libcollections/btree/map.rs

+165-24
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
use core::cmp::Ordering;
1212
use core::fmt::Debug;
1313
use core::hash::{Hash, Hasher};
14-
use core::iter::FromIterator;
14+
use core::iter::{FromIterator, Peekable};
1515
use core::marker::PhantomData;
1616
use core::ops::Index;
1717
use core::{fmt, intrinsics, mem, ptr};
@@ -348,6 +348,12 @@ pub struct OccupiedEntry<'a, K: 'a, V: 'a> {
348348
_marker: PhantomData<&'a mut (K, V)>,
349349
}
350350

351+
// An iterator for merging two sorted sequences into one
352+
struct MergeIter<K, V, I: Iterator<Item=(K, V)>> {
353+
left: Peekable<I>,
354+
right: Peekable<I>,
355+
}
356+
351357
impl<K: Ord, V> BTreeMap<K, V> {
352358
/// Makes a new empty BTreeMap with a reasonable choice for B.
353359
///
@@ -535,6 +541,62 @@ impl<K: Ord, V> BTreeMap<K, V> {
535541
}
536542
}
537543

544+
/// Moves all elements from `other` into `Self`, leaving `other` empty.
545+
///
546+
/// # Examples
547+
///
548+
/// ```
549+
/// #![feature(btree_append)]
550+
/// use std::collections::BTreeMap;
551+
///
552+
/// let mut a = BTreeMap::new();
553+
/// a.insert(1, "a");
554+
/// a.insert(2, "b");
555+
/// a.insert(3, "c");
556+
///
557+
/// let mut b = BTreeMap::new();
558+
/// b.insert(3, "d");
559+
/// b.insert(4, "e");
560+
/// b.insert(5, "f");
561+
///
562+
/// a.append(&mut b);
563+
///
564+
/// assert_eq!(a.len(), 5);
565+
/// assert_eq!(b.len(), 0);
566+
///
567+
/// assert_eq!(a[&1], "a");
568+
/// assert_eq!(a[&2], "b");
569+
/// assert_eq!(a[&3], "d");
570+
/// assert_eq!(a[&4], "e");
571+
/// assert_eq!(a[&5], "f");
572+
/// ```
573+
#[unstable(feature = "btree_append", reason = "recently added as part of collections reform 2",
574+
issue = "19986")]
575+
pub fn append(&mut self, other: &mut Self) {
576+
// Do we have to append anything at all?
577+
if other.len() == 0 {
578+
return;
579+
}
580+
581+
// We can just swap `self` and `other` if `self` is empty.
582+
if self.len() == 0 {
583+
mem::swap(self, other);
584+
return;
585+
}
586+
587+
// First, we merge `self` and `other` into a sorted sequence in linear time.
588+
let self_iter = mem::replace(self, BTreeMap::new()).into_iter();
589+
let other_iter = mem::replace(other, BTreeMap::new()).into_iter();
590+
let iter = MergeIter {
591+
left: self_iter.peekable(),
592+
right: other_iter.peekable(),
593+
};
594+
595+
// Second, we build a tree from the sorted sequence in linear time.
596+
self.from_sorted_iter(iter);
597+
self.fix_right_edge();
598+
}
599+
538600
/// Constructs a double-ended iterator over a sub-range of elements in the map, starting
539601
/// at min, and ending at max. If min is `Unbounded`, then it will be treated as "negative
540602
/// infinity", and if max is `Unbounded`, then it will be treated as "positive infinity".
@@ -724,6 +786,76 @@ impl<K: Ord, V> BTreeMap<K, V> {
724786
})
725787
}
726788
}
789+
790+
fn from_sorted_iter<I: Iterator<Item=(K, V)>>(&mut self, iter: I) {
791+
let mut cur_node = last_leaf_edge(self.root.as_mut()).into_node();
792+
// Iterate through all key-value pairs, pushing them into nodes at the right level.
793+
for (key, value) in iter {
794+
// Try to push key-value pair into the current leaf node.
795+
if cur_node.len() < node::CAPACITY {
796+
cur_node.push(key, value);
797+
} else {
798+
// No space left, go up and push there.
799+
let mut open_node;
800+
let mut test_node = cur_node.forget_type();
801+
loop {
802+
match test_node.ascend() {
803+
Ok(parent) => {
804+
let parent = parent.into_node();
805+
if parent.len() < node::CAPACITY {
806+
// Found a node with space left, push here.
807+
open_node = parent;
808+
break;
809+
} else {
810+
// Go up again.
811+
test_node = parent.forget_type();
812+
}
813+
},
814+
Err(node) => {
815+
// We are at the top, create a new root node and push there.
816+
open_node = node.into_root_mut().push_level();
817+
break;
818+
},
819+
}
820+
}
821+
822+
// Push key-value pair and new right subtree.
823+
let tree_height = open_node.height() - 1;
824+
let mut right_tree = node::Root::new_leaf();
825+
for _ in 0..tree_height {
826+
right_tree.push_level();
827+
}
828+
open_node.push(key, value, right_tree);
829+
830+
// Go down to the right-most leaf again.
831+
cur_node = last_leaf_edge(open_node.forget_type()).into_node();
832+
}
833+
834+
self.length += 1;
835+
}
836+
}
837+
838+
fn fix_right_edge(&mut self) {
839+
// Handle underfull nodes, start from the top.
840+
let mut cur_node = self.root.as_mut();
841+
while let Internal(internal) = cur_node.force() {
842+
// Check if right-most child is underfull.
843+
let mut last_edge = internal.last_edge();
844+
let right_child_len = last_edge.reborrow().descend().len();
845+
if right_child_len < node::CAPACITY / 2 {
846+
// We need to steal.
847+
let mut last_kv = match last_edge.left_kv() {
848+
Ok(left) => left,
849+
Err(_) => unreachable!(),
850+
};
851+
last_kv.bulk_steal_left(node::CAPACITY/2 - right_child_len);
852+
last_edge = last_kv.right_edge();
853+
}
854+
855+
// Go further down.
856+
cur_node = last_edge.descend();
857+
}
858+
}
727859
}
728860

729861
impl<'a, K: 'a, V: 'a> IntoIterator for &'a BTreeMap<K, V> {
@@ -1690,32 +1822,41 @@ fn handle_underfull_node<'a, K, V>(node: NodeRef<marker::Mut<'a>,
16901822
};
16911823

16921824
if handle.can_merge() {
1693-
return Merged(handle.merge().into_node());
1825+
Merged(handle.merge().into_node())
16941826
} else {
1695-
unsafe {
1696-
let (k, v, edge) = if is_left {
1697-
handle.reborrow_mut().left_edge().descend().pop()
1698-
} else {
1699-
handle.reborrow_mut().right_edge().descend().pop_front()
1700-
};
1827+
if is_left {
1828+
handle.steal_left();
1829+
} else {
1830+
handle.steal_right();
1831+
}
1832+
Stole(handle.into_node())
1833+
}
1834+
}
17011835

1702-
let k = mem::replace(handle.reborrow_mut().into_kv_mut().0, k);
1703-
let v = mem::replace(handle.reborrow_mut().into_kv_mut().1, v);
1836+
impl<K: Ord, V, I: Iterator<Item=(K, V)>> Iterator for MergeIter<K, V, I> {
1837+
type Item = (K, V);
17041838

1705-
// FIXME: reuse cur_node?
1706-
if is_left {
1707-
match handle.reborrow_mut().right_edge().descend().force() {
1708-
Leaf(mut leaf) => leaf.push_front(k, v),
1709-
Internal(mut internal) => internal.push_front(k, v, edge.unwrap())
1710-
}
1711-
} else {
1712-
match handle.reborrow_mut().left_edge().descend().force() {
1713-
Leaf(mut leaf) => leaf.push(k, v),
1714-
Internal(mut internal) => internal.push(k, v, edge.unwrap())
1715-
}
1716-
}
1717-
}
1839+
fn next(&mut self) -> Option<(K, V)> {
1840+
let res = match (self.left.peek(), self.right.peek()) {
1841+
(Some(&(ref left_key, _)), Some(&(ref right_key, _))) => left_key.cmp(right_key),
1842+
(Some(_), None) => Ordering::Less,
1843+
(None, Some(_)) => Ordering::Greater,
1844+
(None, None) => return None,
1845+
};
17181846

1719-
return Stole(handle.into_node());
1847+
// Check which elements comes first and only advance the corresponding iterator.
1848+
// If two keys are equal, take the value from `right`.
1849+
match res {
1850+
Ordering::Less => {
1851+
self.left.next()
1852+
},
1853+
Ordering::Greater => {
1854+
self.right.next()
1855+
},
1856+
Ordering::Equal => {
1857+
self.left.next();
1858+
self.right.next()
1859+
},
1860+
}
17201861
}
17211862
}

Diff for: src/libcollections/btree/node.rs

+139
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,12 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
328328
self.as_leaf().len as usize
329329
}
330330

331+
/// Returns the height of this node in the whole tree. Zero height denotes the
332+
/// leaf level.
333+
pub fn height(&self) -> usize {
334+
self.height
335+
}
336+
331337
/// Removes any static information about whether this node is a `Leaf` or an
332338
/// `Internal` node.
333339
pub fn forget_type(self) -> NodeRef<BorrowType, K, V, marker::LeafOrInternal> {
@@ -1233,6 +1239,139 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Internal>, marker::
12331239
Handle::new_edge(self.node, self.idx)
12341240
}
12351241
}
1242+
1243+
/// This removes a key/value pair from the left child and replaces it with the key/value pair
1244+
/// pointed to by this handle while pushing the old key/value pair of this handle into the right
1245+
/// child.
1246+
pub fn steal_left(&mut self) {
1247+
unsafe {
1248+
let (k, v, edge) = self.reborrow_mut().left_edge().descend().pop();
1249+
1250+
let k = mem::replace(self.reborrow_mut().into_kv_mut().0, k);
1251+
let v = mem::replace(self.reborrow_mut().into_kv_mut().1, v);
1252+
1253+
match self.reborrow_mut().right_edge().descend().force() {
1254+
ForceResult::Leaf(mut leaf) => leaf.push_front(k, v),
1255+
ForceResult::Internal(mut internal) => internal.push_front(k, v, edge.unwrap())
1256+
}
1257+
}
1258+
}
1259+
1260+
/// This removes a key/value pair from the right child and replaces it with the key/value pair
1261+
/// pointed to by this handle while pushing the old key/value pair of this handle into the left
1262+
/// child.
1263+
pub fn steal_right(&mut self) {
1264+
unsafe {
1265+
let (k, v, edge) = self.reborrow_mut().right_edge().descend().pop_front();
1266+
1267+
let k = mem::replace(self.reborrow_mut().into_kv_mut().0, k);
1268+
let v = mem::replace(self.reborrow_mut().into_kv_mut().1, v);
1269+
1270+
match self.reborrow_mut().left_edge().descend().force() {
1271+
ForceResult::Leaf(mut leaf) => leaf.push(k, v),
1272+
ForceResult::Internal(mut internal) => internal.push(k, v, edge.unwrap())
1273+
}
1274+
}
1275+
}
1276+
1277+
/// This does stealing similar to `steal_left` but steals multiple elements at once.
1278+
pub fn bulk_steal_left(&mut self, n: usize) {
1279+
unsafe {
1280+
// Get raw pointers to left child's keys, values and edges.
1281+
let (left_len, left_k, left_v, left_e) = {
1282+
let mut left = self.reborrow_mut().left_edge().descend();
1283+
1284+
(left.len(),
1285+
left.keys_mut().as_mut_ptr(),
1286+
left.vals_mut().as_mut_ptr(),
1287+
match left.force() {
1288+
ForceResult::Leaf(_) => None,
1289+
ForceResult::Internal(mut i) => Some(i.as_internal_mut().edges.as_mut_ptr()),
1290+
})
1291+
};
1292+
1293+
// Get raw pointers to right child's keys, values and edges.
1294+
let (right_len, right_k, right_v, right_e) = {
1295+
let mut right = self.reborrow_mut().right_edge().descend();
1296+
1297+
(right.len(),
1298+
right.keys_mut().as_mut_ptr(),
1299+
right.vals_mut().as_mut_ptr(),
1300+
match right.force() {
1301+
ForceResult::Leaf(_) => None,
1302+
ForceResult::Internal(mut i) => Some(i.as_internal_mut().edges.as_mut_ptr()),
1303+
})
1304+
};
1305+
1306+
// Get raw pointers to parent's key and value.
1307+
let (parent_k, parent_v) = {
1308+
let kv = self.reborrow_mut().into_kv_mut();
1309+
(kv.0 as *mut K, kv.1 as *mut V)
1310+
};
1311+
1312+
// Make sure that we may steal safely.
1313+
debug_assert!(right_len + n <= CAPACITY);
1314+
debug_assert!(left_len >= n);
1315+
1316+
// Make room for stolen elements in right child.
1317+
ptr::copy(right_k,
1318+
right_k.offset(n as isize),
1319+
right_len);
1320+
ptr::copy(right_v,
1321+
right_v.offset(n as isize),
1322+
right_len);
1323+
if let Some(edges) = right_e {
1324+
ptr::copy(edges,
1325+
edges.offset(n as isize),
1326+
right_len+1);
1327+
}
1328+
1329+
// Move elements from the left child to the right one.
1330+
let left_ind = (left_len - n) as isize;
1331+
ptr::copy_nonoverlapping(left_k.offset(left_ind + 1),
1332+
right_k,
1333+
n - 1);
1334+
ptr::copy_nonoverlapping(left_v.offset(left_ind + 1),
1335+
right_v,
1336+
n - 1);
1337+
match (left_e, right_e) {
1338+
(Some(left), Some(right)) => {
1339+
ptr::copy_nonoverlapping(left.offset(left_ind + 1),
1340+
right,
1341+
n);
1342+
},
1343+
(Some(_), None) => unreachable!(),
1344+
(None, Some(_)) => unreachable!(),
1345+
(None, None) => {},
1346+
}
1347+
1348+
// Copy parent key/value pair to right child.
1349+
ptr::copy_nonoverlapping(parent_k,
1350+
right_k.offset(n as isize - 1),
1351+
1);
1352+
ptr::copy_nonoverlapping(parent_v,
1353+
right_v.offset(n as isize - 1),
1354+
1);
1355+
// Copy left-most stolen pair to parent.
1356+
ptr::copy_nonoverlapping(left_k.offset(left_ind),
1357+
parent_k,
1358+
1);
1359+
ptr::copy_nonoverlapping(left_v.offset(left_ind),
1360+
parent_v,
1361+
1);
1362+
1363+
// Fix lengths of left and right child and parent pointers in children of the right
1364+
// child.
1365+
self.reborrow_mut().left_edge().descend().as_leaf_mut().len -= n as u16;
1366+
let mut right = self.reborrow_mut().right_edge().descend();
1367+
right.as_leaf_mut().len += n as u16;
1368+
if let ForceResult::Internal(mut node) = right.force() {
1369+
for i in 0..(right_len+n+1) {
1370+
Handle::new_edge(node.reborrow_mut(), i as usize).correct_parent_link();
1371+
}
1372+
}
1373+
}
1374+
}
12361375
}
12371376

12381377
impl<BorrowType, K, V, HandleType>

0 commit comments

Comments
 (0)