Skip to content

Commit

Permalink
Merge pull request #306 from RoaringBitmap/insert-many
Browse files Browse the repository at this point in the history
Improve the `Extend::extend` implementation for performances
  • Loading branch information
Kerollmops authored Jan 7, 2025
2 parents d2ec04f + d5dca9a commit 27743fb
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 14 deletions.
24 changes: 21 additions & 3 deletions roaring/src/bitmap/inherent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,12 @@ impl RoaringBitmap {
container.insert(index)
}

/// Search for the specific container by the given key.
/// Create a new container if not exist.
/// Searches for the specific container by the given key.
/// Creates a new container if it doesn't exist.
///
/// Return the index of the target container.
fn find_container_by_key(&mut self, key: u16) -> usize {
#[inline]
pub(crate) fn find_container_by_key(&mut self, key: u16) -> usize {
match self.containers.binary_search_by_key(&key, |c| c.key) {
Ok(loc) => loc,
Err(loc) => {
Expand All @@ -89,6 +90,7 @@ impl RoaringBitmap {
/// assert!(rb.contains(3));
/// assert!(!rb.contains(4));
/// ```
#[inline]
pub fn insert_range<R>(&mut self, range: R) -> u64
where
R: RangeBounds<u32>,
Expand Down Expand Up @@ -153,6 +155,7 @@ impl RoaringBitmap {
///
/// assert_eq!(rb.iter().collect::<Vec<u32>>(), vec![1, 3, 5]);
/// ```
#[inline]
pub fn push(&mut self, value: u32) -> bool {
let (key, index) = util::split(value);

Expand All @@ -175,6 +178,7 @@ impl RoaringBitmap {
/// # Panics
///
/// If debug_assertions enabled and index is > self.max()
#[inline]
pub(crate) fn push_unchecked(&mut self, value: u32) {
let (key, index) = util::split(value);

Expand Down Expand Up @@ -204,6 +208,7 @@ impl RoaringBitmap {
/// assert_eq!(rb.remove(3), false);
/// assert_eq!(rb.contains(3), false);
/// ```
#[inline]
pub fn remove(&mut self, value: u32) -> bool {
let (key, index) = util::split(value);
match self.containers.binary_search_by_key(&key, |c| c.key) {
Expand Down Expand Up @@ -234,6 +239,7 @@ impl RoaringBitmap {
/// rb.insert(3);
/// assert_eq!(rb.remove_range(2..4), 2);
/// ```
#[inline]
pub fn remove_range<R>(&mut self, range: R) -> u64
where
R: RangeBounds<u32>,
Expand Down Expand Up @@ -277,6 +283,7 @@ impl RoaringBitmap {
/// assert_eq!(rb.contains(1), true);
/// assert_eq!(rb.contains(100), false);
/// ```
#[inline]
pub fn contains(&self, value: u32) -> bool {
let (key, index) = util::split(value);
match self.containers.binary_search_by_key(&key, |c| c.key) {
Expand Down Expand Up @@ -304,6 +311,7 @@ impl RoaringBitmap {
/// // 0xFFF is not contained
/// assert!(!rb.contains_range(1..=0xFFF));
/// ```
#[inline]
pub fn contains_range<R>(&self, range: R) -> bool
where
R: RangeBounds<u32>,
Expand Down Expand Up @@ -364,6 +372,7 @@ impl RoaringBitmap {
/// assert_eq!(rb.range_cardinality(0x10000..0x10000), 0);
/// assert_eq!(rb.range_cardinality(0x50000..=u32::MAX), 3);
/// ```
#[inline]
pub fn range_cardinality<R>(&self, range: R) -> u64
where
R: RangeBounds<u32>,
Expand Down Expand Up @@ -423,6 +432,7 @@ impl RoaringBitmap {
/// rb.clear();
/// assert_eq!(rb.contains(1), false);
/// ```
#[inline]
pub fn clear(&mut self) {
self.containers.clear();
}
Expand All @@ -440,6 +450,7 @@ impl RoaringBitmap {
/// rb.insert(3);
/// assert_eq!(rb.is_empty(), false);
/// ```
#[inline]
pub fn is_empty(&self) -> bool {
self.containers.is_empty()
}
Expand All @@ -455,6 +466,7 @@ impl RoaringBitmap {
/// assert!(!rb.is_empty());
/// assert!(rb.is_full());
/// ```
#[inline]
pub fn is_full(&self) -> bool {
self.containers.len() == (u16::MAX as usize + 1)
&& self.containers.iter().all(Container::is_full)
Expand All @@ -477,6 +489,7 @@ impl RoaringBitmap {
/// rb.insert(4);
/// assert_eq!(rb.len(), 2);
/// ```
#[inline]
pub fn len(&self) -> u64 {
self.containers.iter().map(|container| container.len()).sum()
}
Expand All @@ -495,6 +508,7 @@ impl RoaringBitmap {
/// rb.insert(4);
/// assert_eq!(rb.min(), Some(3));
/// ```
#[inline]
pub fn min(&self) -> Option<u32> {
self.containers.first().and_then(|tail| tail.min().map(|min| util::join(tail.key, min)))
}
Expand Down Expand Up @@ -533,6 +547,7 @@ impl RoaringBitmap {
/// assert_eq!(rb.rank(3), 1);
/// assert_eq!(rb.rank(10), 2)
/// ```
#[inline]
pub fn rank(&self, value: u32) -> u64 {
// if len becomes cached for RoaringBitmap: return len if len > value

Expand Down Expand Up @@ -567,6 +582,7 @@ impl RoaringBitmap {
/// assert_eq!(rb.select(2), Some(100));
/// assert_eq!(rb.select(3), None);
/// ```
#[inline]
pub fn select(&self, n: u32) -> Option<u32> {
let mut n = n as u64;

Expand Down Expand Up @@ -598,6 +614,7 @@ impl RoaringBitmap {
/// let mut rb = RoaringBitmap::from_iter([1, 3, 7, 9]);
/// rb.remove_smallest(2);
/// assert_eq!(rb, RoaringBitmap::from_iter([7, 9]));
#[inline]
pub fn remove_smallest(&mut self, mut n: u64) {
// remove containers up to the front of the target
let position = self.containers.iter().position(|container| {
Expand Down Expand Up @@ -632,6 +649,7 @@ impl RoaringBitmap {
/// assert_eq!(rb, RoaringBitmap::from_iter([1, 5]));
/// rb.remove_biggest(1);
/// assert_eq!(rb, RoaringBitmap::from_iter([1]));
#[inline]
pub fn remove_biggest(&mut self, mut n: u64) {
// remove containers up to the back of the target
let position = self.containers.iter().rposition(|container| {
Expand Down
68 changes: 61 additions & 7 deletions roaring/src/bitmap/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -715,18 +715,72 @@ impl<'a> FromIterator<&'a u32> for RoaringBitmap {
}

impl Extend<u32> for RoaringBitmap {
fn extend<I: IntoIterator<Item = u32>>(&mut self, iterator: I) {
for value in iterator {
self.insert(value);
/// Inserts multiple values and returns the count of new additions.
/// This is expected to be faster than calling [`RoaringBitmap::insert`] on each value.
///
/// The provided integers values don't have to be in sorted order, but it may be preferable
/// to sort them from a performance point of view.
///
/// # Examples
///
/// ```rust
/// use roaring::RoaringBitmap;
///
/// let mut rb = RoaringBitmap::new();
/// rb.extend([1, 2, 3, 4, 1500, 1508, 1507, 1509]);
/// assert!(rb.contains(2));
/// assert!(rb.contains(1508));
/// assert!(!rb.contains(5));
/// ```
#[inline]
fn extend<I: IntoIterator<Item = u32>>(&mut self, values: I) {
let mut values = values.into_iter();
let value = match values.next() {
Some(value) => value,
None => return,
};

let (mut currenthb, lowbit) = util::split(value);
let mut current_container_index = self.find_container_by_key(currenthb);
let mut current_cont = &mut self.containers[current_container_index];
current_cont.insert(lowbit);

for val in values {
let (newhb, lowbit) = util::split(val);
if currenthb == newhb {
// easy case, this could be quite frequent
current_cont.insert(lowbit);
} else {
currenthb = newhb;
current_container_index = self.find_container_by_key(currenthb);
current_cont = &mut self.containers[current_container_index];
current_cont.insert(lowbit);
}
}
}
}

impl<'a> Extend<&'a u32> for RoaringBitmap {
fn extend<I: IntoIterator<Item = &'a u32>>(&mut self, iterator: I) {
for value in iterator {
self.insert(*value);
}
/// Inserts multiple values and returns the count of new additions.
/// This is expected to be faster than calling [`RoaringBitmap::insert`] on each value.
///
/// The provided integers values don't have to be in sorted order, but it may be preferable
/// to sort them from a performance point of view.
///
/// # Examples
///
/// ```rust
/// use roaring::RoaringBitmap;
///
/// let mut rb = RoaringBitmap::new();
/// rb.extend([1, 2, 3, 4, 1500, 1508, 1507, 1509]);
/// assert!(rb.contains(2));
/// assert!(rb.contains(1508));
/// assert!(!rb.contains(5));
/// ```
#[inline]
fn extend<I: IntoIterator<Item = &'a u32>>(&mut self, values: I) {
self.extend(values.into_iter().copied());
}
}

Expand Down
6 changes: 3 additions & 3 deletions roaring/src/bitmap/store/bitmap_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ impl BitmapStore {
pub fn insert(&mut self, index: u16) -> bool {
let (key, bit) = (key(index), bit(index));
let old_w = self.bits[key];
let new_w = old_w | 1 << bit;
let new_w = old_w | (1 << bit);
let inserted = (old_w ^ new_w) >> bit; // 1 or 0
self.bits[key] = new_w;
self.len += inserted;
Expand Down Expand Up @@ -634,7 +634,7 @@ impl BitOrAssign<&ArrayStore> for BitmapStore {
for &index in rhs.iter() {
let (key, bit) = (key(index), bit(index));
let old_w = self.bits[key];
let new_w = old_w | 1 << bit;
let new_w = old_w | (1 << bit);
self.len += (old_w ^ new_w) >> bit;
self.bits[key] = new_w;
}
Expand Down Expand Up @@ -679,7 +679,7 @@ impl BitXorAssign<&ArrayStore> for BitmapStore {
for &index in rhs.iter() {
let (key, bit) = (key(index), bit(index));
let old_w = self.bits[key];
let new_w = old_w ^ 1 << bit;
let new_w = old_w ^ (1 << bit);
len += 1 - 2 * (((1 << bit) & old_w) >> bit) as i64; // +1 or -1
self.bits[key] = new_w;
}
Expand Down
2 changes: 1 addition & 1 deletion roaring/src/treemap/inherent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ impl RoaringTreemap {
for (&key, bitmap) in &self.map {
let len = bitmap.len();
if len > n {
return Some((key as u64) << 32 | bitmap.select(n as u32).unwrap() as u64);
return Some(((key as u64) << 32) | bitmap.select(n as u32).unwrap() as u64);
}
n -= len;
}
Expand Down

0 comments on commit 27743fb

Please sign in to comment.