Skip to content

Commit

Permalink
Add BooleanArray::new (#3879) (#3898)
Browse files Browse the repository at this point in the history
* Add BooleanArray::new (#3879)

* Review feedback
  • Loading branch information
tustvold authored Mar 23, 2023
1 parent 1a42f4c commit d38f8e0
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 28 deletions.
57 changes: 30 additions & 27 deletions arrow-array/src/array/boolean_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ use crate::array::print_long_array;
use crate::builder::BooleanBuilder;
use crate::iterator::BooleanIter;
use crate::{Array, ArrayAccessor, ArrayRef};
use arrow_buffer::{bit_util, BooleanBuffer, Buffer, MutableBuffer, NullBuffer};
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_buffer::{bit_util, BooleanBuffer, MutableBuffer, NullBuffer};
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;
use std::sync::Arc;
Expand Down Expand Up @@ -81,6 +81,28 @@ impl std::fmt::Debug for BooleanArray {
}

impl BooleanArray {
/// Create a new [`BooleanArray`] from the provided values and nulls
///
/// # Panics
///
/// Panics if `values.len() != nulls.len()`
pub fn new(values: BooleanBuffer, nulls: Option<NullBuffer>) -> Self {
if let Some(n) = nulls.as_ref() {
assert_eq!(values.len(), n.len());
}

// TODO: Don't store ArrayData inside arrays (#3880)
let data = unsafe {
ArrayData::builder(DataType::Boolean)
.len(values.len())
.offset(values.offset())
.nulls(nulls)
.buffers(vec![values.inner().clone()])
.build_unchecked()
};
Self { data, values }
}

/// Returns the length of this array.
pub fn len(&self) -> usize {
self.data.len()
Expand Down Expand Up @@ -182,24 +204,12 @@ impl BooleanArray {
where
F: FnMut(T::Item) -> bool,
{
let null_bit_buffer = left.nulls().map(|x| x.inner().sliced());
let buffer = MutableBuffer::collect_bool(left.len(), |i| unsafe {
let nulls = left.nulls().cloned();
let values = BooleanBuffer::collect_bool(left.len(), |i| unsafe {
// SAFETY: i in range 0..len
op(left.value_unchecked(i))
});

let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
left.len(),
None,
null_bit_buffer,
0,
vec![Buffer::from(buffer)],
vec![],
)
};
Self::from(data)
Self::new(values, nulls)
}

/// Create a [`BooleanArray`] by evaluating the binary operation for
Expand Down Expand Up @@ -229,19 +239,11 @@ impl BooleanArray {
assert_eq!(left.len(), right.len());

let nulls = NullBuffer::union(left.nulls(), right.nulls());
let buffer = MutableBuffer::collect_bool(left.len(), |i| unsafe {
let values = BooleanBuffer::collect_bool(left.len(), |i| unsafe {
// SAFETY: i in range 0..len
op(left.value_unchecked(i), right.value_unchecked(i))
});

let data = unsafe {
ArrayDataBuilder::new(DataType::Boolean)
.len(left.len())
.nulls(nulls)
.buffers(vec![buffer.into()])
.build_unchecked()
};
Self::from(data)
Self::new(values, nulls)
}
}

Expand Down Expand Up @@ -393,6 +395,7 @@ impl<Ptr: std::borrow::Borrow<Option<bool>>> FromIterator<Ptr> for BooleanArray
#[cfg(test)]
mod tests {
use super::*;
use arrow_buffer::Buffer;
use rand::{thread_rng, Rng};

#[test]
Expand Down
10 changes: 9 additions & 1 deletion arrow-buffer/src/buffer/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@

use crate::bit_chunk_iterator::BitChunks;
use crate::bit_iterator::{BitIndexIterator, BitIterator, BitSliceIterator};
use crate::{bit_util, buffer_bin_and, buffer_bin_or, buffer_unary_not, Buffer};
use crate::{
bit_util, buffer_bin_and, buffer_bin_or, buffer_unary_not, Buffer, MutableBuffer,
};
use std::ops::{BitAnd, BitOr, Not};

/// A slice-able [`Buffer`] containing bit-packed booleans
Expand Down Expand Up @@ -61,6 +63,12 @@ impl BooleanBuffer {
}
}

/// Invokes `f` with indexes `0..len` collecting the boolean results into a new `BooleanBuffer`
pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, f: F) -> Self {
let buffer = MutableBuffer::collect_bool(len, f);
Self::new(buffer.into(), 0, len)
}

/// Returns the number of set bits in this buffer
pub fn count_set_bits(&self) -> usize {
self.buffer.count_set_bits_offset(self.offset, self.len)
Expand Down

0 comments on commit d38f8e0

Please sign in to comment.