Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 156 additions & 0 deletions library/alloc/src/collections/btree/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1240,6 +1240,162 @@ impl<K, V, A: Allocator + Clone> BTreeMap<K, V, A> {
)
}

/// Moves all elements from `other` into `self`, leaving `other` empty.
///
/// If a key from `other` is already present in `self`, then the `conflict`
/// closure is used to return a value to `self`. The `conflict`
/// closure takes in a borrow of `self`'s key, `self`'s value, and `other`'s value
/// in that order.
///
/// An example of why one might use this method over [`append`]
/// is to combine `self`'s value with `other`'s value when their keys conflict.
///
/// Similar to [`insert`], though, the key is not overwritten,
/// which matters for types that can be `==` without being identical.
///
/// [`insert`]: BTreeMap::insert
/// [`append`]: BTreeMap::append
///
/// # Examples
///
/// ```
/// #![feature(btree_merge)]
/// use std::collections::BTreeMap;
///
/// let mut a = BTreeMap::new();
/// a.insert(1, String::from("a"));
/// a.insert(2, String::from("b"));
/// a.insert(3, String::from("c")); // Note: Key (3) also present in b.
///
/// let mut b = BTreeMap::new();
/// b.insert(3, String::from("d")); // Note: Key (3) also present in a.
/// b.insert(4, String::from("e"));
/// b.insert(5, String::from("f"));
///
/// // concatenate a's value and b's value
/// a.merge(b, |_, a_val, b_val| {
/// format!("{a_val}{b_val}")
/// });
///
/// assert_eq!(a.len(), 5); // all of b's keys in a
///
/// assert_eq!(a[&1], "a");
/// assert_eq!(a[&2], "b");
/// assert_eq!(a[&3], "cd"); // Note: "c" has been combined with "d".
/// assert_eq!(a[&4], "e");
/// assert_eq!(a[&5], "f");
/// ```
#[unstable(feature = "btree_merge", issue = "152152")]
pub fn merge(&mut self, mut other: Self, mut conflict: impl FnMut(&K, V, V) -> V)
where
K: Ord,
A: Clone,
{
// Do we have to append anything at all?
if other.is_empty() {
return;
}

// We can just swap `self` and `other` if `self` is empty.
if self.is_empty() {
mem::swap(self, &mut other);
return;
}

let mut other_iter = other.into_iter();
let (first_other_key, first_other_val) = other_iter.next().unwrap();

// find the first gap that has the smallest key greater than or equal to
// the first key from other
let mut self_cursor = self.lower_bound_mut(Bound::Included(&first_other_key));

if let Some((self_key, _)) = self_cursor.peek_next() {
match K::cmp(self_key, &first_other_key) {
Ordering::Equal => {
// if `f` unwinds, the next entry is already removed leaving
// the tree in valid state.
// FIXME: Once `MaybeDangling` is implemented, we can optimize
// this through using a drop handler and transmutating CursorMutKey<K, V>
// to CursorMutKey<ManuallyDrop<K>, ManuallyDrop<V>> (see PR #152418)
if let Some((k, v)) = self_cursor.remove_next() {
// SAFETY: we remove the K, V out of the next entry,
// apply 'f' to get a new (K, V), and insert it back
// into the next entry that the cursor is pointing at
let v = conflict(&k, v, first_other_val);
unsafe { self_cursor.insert_after_unchecked(k, v) };
}
}
Ordering::Greater =>
// SAFETY: we know our other_key's ordering is less than self_key,
// so inserting before will guarantee sorted order
unsafe {
self_cursor.insert_before_unchecked(first_other_key, first_other_val);
},
Ordering::Less => {
unreachable!("Cursor's peek_next should return None.");
}
}
} else {
// SAFETY: reaching here means our cursor is at the end
// self BTreeMap so we just insert other_key here
unsafe {
self_cursor.insert_before_unchecked(first_other_key, first_other_val);
}
}

for (other_key, other_val) in other_iter {
loop {
if let Some((self_key, _)) = self_cursor.peek_next() {
match K::cmp(self_key, &other_key) {
Ordering::Equal => {
// if `f` unwinds, the next entry is already removed leaving
// the tree in valid state.
// FIXME: Once `MaybeDangling` is implemented, we can optimize
// this through using a drop handler and transmutating CursorMutKey<K, V>
// to CursorMutKey<ManuallyDrop<K>, ManuallyDrop<V>> (see PR #152418)
if let Some((k, v)) = self_cursor.remove_next() {
// SAFETY: we remove the K, V out of the next entry,
// apply 'f' to get a new (K, V), and insert it back
// into the next entry that the cursor is pointing at
let v = conflict(&k, v, other_val);
unsafe { self_cursor.insert_after_unchecked(k, v) };
}
break;
}
Ordering::Greater => {
// SAFETY: we know our self_key's ordering is greater than other_key,
// so inserting before will guarantee sorted order
unsafe {
self_cursor.insert_before_unchecked(other_key, other_val);
}
break;
}
Ordering::Less => {
// FIXME: instead of doing a linear search here,
// this can be optimized to search the tree by starting
// from self_cursor and going towards the root and then
// back down to the proper node -- that should probably
// be a new method on Cursor*.
self_cursor.next();
}
}
} else {
// FIXME: If we get here, that means all of other's keys are greater than
// self's keys. For performance, this should really do a bulk insertion of items
// from other_iter into the end of self `BTreeMap`. Maybe this should be
// a method for Cursor*?

// SAFETY: reaching here means our cursor is at the end
// self BTreeMap so we just insert other_key here
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like this is also probably inefficient: we'd want to do a bulk build here, especially if we're merging in a much larger other map into self. I think this is similar to the linear search above on Ordering::Greater.

I think it's worth at least adding an unresolved question and/or expanding the merge function's doc comment to say something about merges with expected few-matches being better done with e.g. self.into_iter().sorted_zip(other.into_iter()).collect::<BTreeMap<_, _>>(), given a sorted_zip that does the advancing of left/right (merge sort's merge effectively). I'm not sure what the constant factors are -- probably depends on how expensive allocation and moving the keys/values around is -- but it seems remiss not to mention it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like this is also probably inefficient: we'd want to do a bulk build here, especially if we're merging in a much larger other map into self.

I agree that bulk building has to occur in the else condition, where we know that our other map leftovers keys are greater than self map.

I think this is similar to the linear search above on Ordering::Greater.

Yea, this needs a method that acts similarly to BTreeMap::lower_bound_mut/BTreeMap::upper_bound_mut for Cursor*, so that we don't have multiple .next() calls and we can traverse through the underlying map structure efficiently in logarithmic time.

I think it's worth at least adding an unresolved question and/or expanding the merge function's doc comment to say something about merges with expected few-matches being better done with e.g. self.into_iter().sorted_zip(other.into_iter()).collect::<BTreeMap<_, _>>(), given a sorted_zip that does the advancing of left/right (merge sort's merge effectively)

I'll leave a FIXME here noting how this could be improved with batching/bulk pushing the items (which could possibly be another method that Cursor* should be able to support?). My only hesitation with the example code self.into_iter().sorted_zip(other.into_iter()).collect::<BTreeMap<_, _>>() is how collecting an iterator into a BTreeMap works. I don't imagine that it has knowledge that the iterator is sorted, so it would end up taking O(nlg(n)) since you have n elements from the sorted merged iterator and if it's traversing from root, it could take lg(n) time to find the right spot to insert the element. There is a bulk_build_from_sorted_iter() method that BTreeMap has, but that's private.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, right now it collects into a Vec and then sorts that:

let mut inputs: Vec<_> = iter.into_iter().collect();
if inputs.is_empty() {
return BTreeMap::new();
}
// use stable sort to preserve the insertion order.
inputs.sort_by(|a, b| a.0.cmp(&b.0));
BTreeMap::bulk_build_from_sorted_iter(inputs, Global)

I suspect that implies that code shouldn't get written as-is exactly in most cases, but we are missing some kind of bulk construction primitive. I don't know how commonly that would actually matter -- I've had fairly few cases I'm doing anything with large BTrees personally, much less merging them -- so it's not clear we need/want a public API. On the other hand, maybe there's an API shape that avoids the merge operation being needed.

Copy link
Contributor Author

@asder8215 asder8215 Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think a bulk construction that could work if we're able to traverse through the BTreeMap directly and ascend and descend through the levels, inserting it into the appropriate places.

I think doing that would at least minimize the best case work of the inputs being sorted already to O(n), and in the avg/worst case it would take O(nlgn), since in the sorted case it'll take O(1) to find the spot to insert the item relative to the current node that we're looking at, and in the unsorted case we would need to traverse through the map and entries to find the right spot (which would be O(lgn)). I don't know if that would be faster, but maybe worth experimenting and benchmarking?

unsafe {
self_cursor.insert_before_unchecked(other_key, other_val);
}
break;
}
}
}
}

/// Constructs a double-ended iterator over a sub-range of elements in the map.
/// The simplest way is to use the range syntax `min..max`, thus `range(min..max)` will
/// yield elements from min (inclusive) to max (exclusive).
Expand Down
182 changes: 181 additions & 1 deletion library/alloc/src/collections/btree/map/tests.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use core::assert_matches;
use std::iter;
use std::ops::Bound::{Excluded, Included, Unbounded};
use std::panic::{AssertUnwindSafe, catch_unwind};
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering::SeqCst;
use std::{cmp, iter};

use super::*;
use crate::boxed::Box;
Expand Down Expand Up @@ -2128,6 +2128,86 @@ create_append_test!(test_append_239, 239);
#[cfg(not(miri))] // Miri is too slow
create_append_test!(test_append_1700, 1700);

// a inserts (0, 0)..(8, 8) to its own tree
// b inserts (5, 5 * 2)..($len, 2 * $len) to its own tree
// note that between a and b, there are duplicate keys
// between 5..min($len, 8), so on merge we add the values
// of these keys together
// we check that:
// - the merged tree 'a' has a length of max(8, $len)
// - all keys in 'a' have the correct value associated
// - removing and inserting an element into the merged
// tree 'a' still keeps it in valid tree form
macro_rules! create_merge_test {
($name:ident, $len:expr) => {
#[test]
fn $name() {
let mut a = BTreeMap::new();
for i in 0..8 {
a.insert(i, i);
}

let mut b = BTreeMap::new();
for i in 5..$len {
b.insert(i, 2 * i);
}

a.merge(b, |_, a_val, b_val| a_val + b_val);

assert_eq!(a.len(), cmp::max($len, 8));

for i in 0..cmp::max($len, 8) {
if i < 5 {
assert_eq!(a[&i], i);
} else {
if i < cmp::min($len, 8) {
assert_eq!(a[&i], i + 2 * i);
} else if i >= $len {
assert_eq!(a[&i], i);
} else {
assert_eq!(a[&i], 2 * i);
}
}
}

a.check();
assert_eq!(
a.remove(&($len - 1)),
if $len >= 5 && $len < 8 {
Some(($len - 1) + 2 * ($len - 1))
} else {
Some(2 * ($len - 1))
}
);
assert_eq!(a.insert($len - 1, 20), None);
a.check();
}
};
}

// These are mostly for testing the algorithm that "fixes" the right edge after insertion.
// Single node, merge conflicting key values.
create_merge_test!(test_merge_7, 7);
// Single node.
create_merge_test!(test_merge_9, 9);
// Two leafs that don't need fixing.
create_merge_test!(test_merge_17, 17);
// Two leafs where the second one ends up underfull and needs stealing at the end.
create_merge_test!(test_merge_14, 14);
// Two leafs where the second one ends up empty because the insertion finished at the root.
create_merge_test!(test_merge_12, 12);
// Three levels; insertion finished at the root.
create_merge_test!(test_merge_144, 144);
// Three levels; insertion finished at leaf while there is an empty node on the second level.
create_merge_test!(test_merge_145, 145);
// Tests for several randomly chosen sizes.
create_merge_test!(test_merge_170, 170);
create_merge_test!(test_merge_181, 181);
#[cfg(not(miri))] // Miri is too slow
create_merge_test!(test_merge_239, 239);
#[cfg(not(miri))] // Miri is too slow
create_merge_test!(test_merge_1700, 1700);

#[test]
#[cfg_attr(not(panic = "unwind"), ignore = "test requires unwinding support")]
fn test_append_drop_leak() {
Expand Down Expand Up @@ -2169,6 +2249,84 @@ fn test_append_ord_chaos() {
map2.check();
}

#[test]
#[cfg_attr(not(panic = "unwind"), ignore = "test requires unwinding support")]
fn test_merge_drop_leak() {
let a = CrashTestDummy::new(0);
let b = CrashTestDummy::new(1);
let c = CrashTestDummy::new(2);
let mut left = BTreeMap::new();
let mut right = BTreeMap::new();
left.insert(a.spawn(Panic::Never), ());
left.insert(b.spawn(Panic::Never), ());
left.insert(c.spawn(Panic::Never), ());
right.insert(b.spawn(Panic::InDrop), ()); // first duplicate key, dropped during merge
right.insert(c.spawn(Panic::Never), ());

catch_unwind(move || left.merge(right, |_, _, _| ())).unwrap_err();
assert_eq!(a.dropped(), 1); // this should not be dropped
assert_eq!(b.dropped(), 2); // key is dropped on panic
assert_eq!(c.dropped(), 2); // key is dropped on panic
}

#[test]
#[cfg_attr(not(panic = "unwind"), ignore = "test requires unwinding support")]
fn test_merge_conflict_drop_leak() {
let a = CrashTestDummy::new(0);
let a_val_left = CrashTestDummy::new(0);

let b = CrashTestDummy::new(1);
let b_val_left = CrashTestDummy::new(1);
let b_val_right = CrashTestDummy::new(1);

let c = CrashTestDummy::new(2);
let c_val_left = CrashTestDummy::new(2);
let c_val_right = CrashTestDummy::new(2);

let mut left = BTreeMap::new();
let mut right = BTreeMap::new();

left.insert(a.spawn(Panic::Never), a_val_left.spawn(Panic::Never));
left.insert(b.spawn(Panic::Never), b_val_left.spawn(Panic::Never));
left.insert(c.spawn(Panic::Never), c_val_left.spawn(Panic::Never));
right.insert(b.spawn(Panic::Never), b_val_right.spawn(Panic::Never));
right.insert(c.spawn(Panic::Never), c_val_right.spawn(Panic::Never));

// First key that conflicts should
catch_unwind(move || {
left.merge(right, |_, _, _| panic!("Panic in conflict function"));
assert_eq!(left.len(), 1); // only 1 entry should be left
})
.unwrap_err();
assert_eq!(a.dropped(), 1); // should not panic
assert_eq!(a_val_left.dropped(), 1); // should not panic
assert_eq!(b.dropped(), 2); // should drop from panic (conflict)
assert_eq!(b_val_left.dropped(), 1); // should be 2 were it not for Rust issue #47949
assert_eq!(b_val_right.dropped(), 1); // should be 2 were it not for Rust issue #47949
assert_eq!(c.dropped(), 2); // should drop from panic (conflict)
assert_eq!(c_val_left.dropped(), 1); // should be 2 were it not for Rust issue #47949
assert_eq!(c_val_right.dropped(), 1); // should be 2 were it not for Rust issue #47949
}

#[test]
fn test_merge_ord_chaos() {
let mut map1 = BTreeMap::new();
map1.insert(Cyclic3::A, ());
map1.insert(Cyclic3::B, ());
let mut map2 = BTreeMap::new();
map2.insert(Cyclic3::A, ());
map2.insert(Cyclic3::B, ());
map2.insert(Cyclic3::C, ()); // lands first, before A
map2.insert(Cyclic3::B, ()); // lands first, before C
map1.check();
map2.check(); // keys are not unique but still strictly ascending
assert_eq!(map1.len(), 2);
assert_eq!(map2.len(), 4);
map1.merge(map2, |_, _, _| ());
assert_eq!(map1.len(), 5);
map1.check();
}

fn rand_data(len: usize) -> Vec<(u32, u32)> {
let mut rng = DeterministicRng::new();
Vec::from_iter((0..len).map(|_| (rng.next(), rng.next())))
Expand Down Expand Up @@ -2615,3 +2773,25 @@ fn test_id_based_append() {

assert_eq!(lhs.pop_first().unwrap().0.name, "lhs_k".to_string());
}

#[test]
fn test_id_based_merge() {
let mut lhs = BTreeMap::new();
let mut rhs = BTreeMap::new();

lhs.insert(IdBased { id: 0, name: "lhs_k".to_string() }, "1".to_string());
rhs.insert(IdBased { id: 0, name: "rhs_k".to_string() }, "2".to_string());

lhs.merge(rhs, |_, mut lhs_val, rhs_val| {
// confirming that lhs_val comes from lhs tree,
// rhs_val comes from rhs tree
assert_eq!(lhs_val, String::from("1"));
assert_eq!(rhs_val, String::from("2"));
lhs_val.push_str(&rhs_val);
lhs_val
});

let merged_kv_pair = lhs.pop_first().unwrap();
assert_eq!(merged_kv_pair.0.id, 0);
assert_eq!(merged_kv_pair.0.name, "lhs_k".to_string());
}
1 change: 1 addition & 0 deletions library/alloctests/tests/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#![feature(allocator_api)]
#![feature(binary_heap_pop_if)]
#![feature(btree_merge)]
#![feature(const_heap)]
#![feature(deque_extend_front)]
#![feature(iter_array_chunks)]
Expand Down
Loading