Skip to content

Commit

Permalink
Add iterators to BooleanBuffer and NullBuffer (#3901)
Browse files Browse the repository at this point in the history
* Add iterators to BooleanBuffer and NullBuffer

* Clippy

* Review feedback
  • Loading branch information
tustvold authored Mar 23, 2023
1 parent 2d68ed5 commit 1a42f4c
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 7 deletions.
4 changes: 1 addition & 3 deletions arrow-arith/src/aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ use arrow_array::iterator::ArrayIter;
use arrow_array::*;
use arrow_buffer::ArrowNativeType;
use arrow_data::bit_iterator::try_for_each_valid_idx;
use arrow_data::bit_iterator::BitIndexIterator;
use arrow_schema::ArrowError;
use arrow_schema::*;

Expand Down Expand Up @@ -118,9 +117,8 @@ where
.reduce(|acc, item| if cmp(&acc, &item) { item } else { acc })
} else {
let nulls = array.nulls().unwrap();
let iter = BitIndexIterator::new(nulls.validity(), nulls.offset(), nulls.len());
unsafe {
let idx = iter.reduce(|acc_idx, idx| {
let idx = nulls.valid_indices().reduce(|acc_idx, idx| {
let acc = array.value_unchecked(acc_idx);
let item = array.value_unchecked(idx);
if cmp(&acc, &item) {
Expand Down
25 changes: 25 additions & 0 deletions arrow-buffer/src/buffer/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
// under the License.

use crate::bit_chunk_iterator::BitChunks;
use crate::bit_iterator::{BitIndexIterator, BitIterator, BitSliceIterator};
use crate::{bit_util, buffer_bin_and, buffer_bin_or, buffer_unary_not, Buffer};
use std::ops::{BitAnd, BitOr, Not};

Expand Down Expand Up @@ -164,6 +165,21 @@ impl BooleanBuffer {
pub fn into_inner(self) -> Buffer {
self.buffer
}

/// Returns an iterator over the bits in this [`BooleanBuffer`]
pub fn iter(&self) -> BitIterator<'_> {
self.into_iter()
}

/// Returns an iterator over the set bit positions in this [`BooleanBuffer`]
pub fn set_indices(&self) -> BitIndexIterator<'_> {
BitIndexIterator::new(self.values(), self.offset, self.len)
}

/// Returns a [`BitSliceIterator`] yielding contiguous ranges of set bits
pub fn set_slices(&self) -> BitSliceIterator<'_> {
BitSliceIterator::new(self.values(), self.offset, self.len)
}
}

impl Not for &BooleanBuffer {
Expand Down Expand Up @@ -215,3 +231,12 @@ impl BitOr<&BooleanBuffer> for &BooleanBuffer {
}
}
}

impl<'a> IntoIterator for &'a BooleanBuffer {
type Item = bool;
type IntoIter = BitIterator<'a>;

fn into_iter(self) -> Self::IntoIter {
BitIterator::new(self.values(), self.offset, self.len)
}
}
37 changes: 35 additions & 2 deletions arrow-buffer/src/buffer/null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::bit_iterator::BitIndexIterator;
use crate::bit_iterator::{BitIndexIterator, BitIterator, BitSliceIterator};
use crate::buffer::BooleanBuffer;
use crate::{Buffer, MutableBuffer};

Expand Down Expand Up @@ -114,6 +114,30 @@ impl NullBuffer {
Self::new(self.buffer.slice(offset, len))
}

/// Returns an iterator over the bits in this [`NullBuffer`]
///
/// * `true` indicates that the corresponding value is not NULL
/// * `false` indicates that the corresponding value is NULL
///
/// Note: [`Self::valid_indices`] will be significantly faster for most use-cases
pub fn iter(&self) -> BitIterator<'_> {
self.buffer.iter()
}

/// Returns a [`BitIndexIterator`] over the valid indices in this [`NullBuffer`]
///
/// Valid indices indicate the corresponding value is not NULL
pub fn valid_indices(&self) -> BitIndexIterator<'_> {
self.buffer.set_indices()
}

/// Returns a [`BitSliceIterator`] yielding contiguous ranges of valid indices
///
/// Valid indices indicate the corresponding value is not NULL
pub fn valid_slices(&self) -> BitSliceIterator<'_> {
self.buffer.set_slices()
}

/// Calls the provided closure for each index in this null mask that is set
#[inline]
pub fn try_for_each_valid_idx<E, F: FnMut(usize) -> Result<(), E>>(
Expand All @@ -123,7 +147,7 @@ impl NullBuffer {
if self.null_count == self.len() {
return Ok(());
}
BitIndexIterator::new(self.validity(), self.offset(), self.len()).try_for_each(f)
self.valid_indices().try_for_each(f)
}

/// Returns the inner [`BooleanBuffer`]
Expand All @@ -145,6 +169,15 @@ impl NullBuffer {
}
}

impl<'a> IntoIterator for &'a NullBuffer {
type Item = bool;
type IntoIter = BitIterator<'a>;

fn into_iter(self) -> Self::IntoIter {
self.buffer.iter()
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
3 changes: 1 addition & 2 deletions arrow-select/src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,7 @@ struct IndexIterator<'a> {
impl<'a> IndexIterator<'a> {
fn new(filter: &'a BooleanArray, remaining: usize) -> Self {
assert_eq!(filter.null_count(), 0);
let data = filter.data();
let iter = BitIndexIterator::new(data.buffers()[0], data.offset(), data.len());
let iter = filter.values().set_indices();
Self { remaining, iter }
}
}
Expand Down

0 comments on commit 1a42f4c

Please sign in to comment.