Skip to content

Commit

Permalink
Use BooleanBuffer in BooleanArray (#3879) (#3895)
Browse files Browse the repository at this point in the history
* Use BooleanBuffer in BooleanArray (#3879)

* Review feedback
  • Loading branch information
tustvold authored Mar 21, 2023
1 parent ae4db60 commit 2730da1
Show file tree
Hide file tree
Showing 8 changed files with 66 additions and 48 deletions.
33 changes: 15 additions & 18 deletions arrow-array/src/array/boolean_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use crate::array::print_long_array;
use crate::builder::BooleanBuilder;
use crate::iterator::BooleanIter;
use crate::{Array, ArrayAccessor, ArrayRef};
use arrow_buffer::{bit_util, Buffer, MutableBuffer, NullBuffer};
use arrow_buffer::{bit_util, BooleanBuffer, Buffer, MutableBuffer, NullBuffer};
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::DataType;
use std::any::Any;
Expand Down Expand Up @@ -67,7 +67,7 @@ use std::sync::Arc;
#[derive(Clone)]
pub struct BooleanArray {
data: ArrayData,
raw_values: Buffer,
values: BooleanBuffer,
}

impl std::fmt::Debug for BooleanArray {
Expand Down Expand Up @@ -96,19 +96,17 @@ impl BooleanArray {
BooleanBuilder::with_capacity(capacity)
}

/// Returns a `Buffer` holding all the values of this array.
///
/// Note this doesn't take the offset of this array into account.
pub fn values(&self) -> &Buffer {
&self.raw_values
/// Returns the underlying [`BooleanBuffer`] holding all the values of this array
pub fn values(&self) -> &BooleanBuffer {
&self.values
}

/// Returns the number of non null, true values within this array
pub fn true_count(&self) -> usize {
match self.data.nulls() {
Some(nulls) => {
let null_chunks = nulls.inner().bit_chunks();
let value_chunks = self.values().bit_chunks(self.offset(), self.len());
let value_chunks = self.values().bit_chunks();
null_chunks
.iter()
.zip(value_chunks.iter())
Expand All @@ -119,9 +117,7 @@ impl BooleanArray {
.map(|(a, b)| (a & b).count_ones() as usize)
.sum()
}
None => self
.values()
.count_set_bits_offset(self.offset(), self.len()),
None => self.values().count_set_bits(),
}
}

Expand All @@ -135,8 +131,7 @@ impl BooleanArray {
/// # Safety
/// This doesn't check bounds, the caller must ensure that index < self.len()
pub unsafe fn value_unchecked(&self, i: usize) -> bool {
let offset = i + self.offset();
bit_util::get_bit_raw(self.raw_values.as_ptr(), offset)
self.values.value_unchecked(i)
}

/// Returns the boolean value at index `i`.
Expand Down Expand Up @@ -329,8 +324,10 @@ impl From<ArrayData> for BooleanArray {
1,
"BooleanArray data should contain a single buffer only (values buffer)"
);
let raw_values = data.buffers()[0].clone();
Self { data, raw_values }
let values =
BooleanBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());

Self { data, values }
}
}

Expand Down Expand Up @@ -424,7 +421,7 @@ mod tests {
fn test_boolean_array_from_vec() {
let buf = Buffer::from([10_u8]);
let arr = BooleanArray::from(vec![false, true, false, true]);
assert_eq!(&buf, arr.values());
assert_eq!(&buf, arr.values().inner());
assert_eq!(4, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(0, arr.null_count());
Expand All @@ -439,7 +436,7 @@ mod tests {
fn test_boolean_array_from_vec_option() {
let buf = Buffer::from([10_u8]);
let arr = BooleanArray::from(vec![Some(false), Some(true), None, Some(true)]);
assert_eq!(&buf, arr.values());
assert_eq!(&buf, arr.values().inner());
assert_eq!(4, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
Expand Down Expand Up @@ -501,7 +498,7 @@ mod tests {
.build()
.unwrap();
let arr = BooleanArray::from(data);
assert_eq!(&buf2, arr.values());
assert_eq!(&buf2, arr.values().inner());
assert_eq!(5, arr.len());
assert_eq!(2, arr.offset());
assert_eq!(0, arr.null_count());
Expand Down
2 changes: 1 addition & 1 deletion arrow-array/src/builder/boolean_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ mod tests {
}

let arr = builder.finish();
assert_eq!(&buf, arr.values());
assert_eq!(&buf, arr.values().inner());
assert_eq!(10, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(0, arr.null_count());
Expand Down
2 changes: 1 addition & 1 deletion arrow-array/src/builder/primitive_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ mod tests {
}

let arr = builder.finish();
assert_eq!(&buf, arr.values());
assert_eq!(&buf, arr.values().inner());
assert_eq!(10, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(0, arr.null_count());
Expand Down
39 changes: 35 additions & 4 deletions arrow-buffer/src/buffer/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
// under the License.

use crate::bit_chunk_iterator::BitChunks;
use crate::{bit_util, buffer_bin_and, buffer_bin_or, Buffer};
use std::ops::{BitAnd, BitOr};
use crate::{bit_util, buffer_bin_and, buffer_bin_or, buffer_unary_not, Buffer};
use std::ops::{BitAnd, BitOr, Not};

/// A slice-able [`Buffer`] containing bit-packed booleans
#[derive(Debug, Clone, Eq)]
Expand Down Expand Up @@ -77,9 +77,9 @@ impl BooleanBuffer {
///
/// Panics if `i >= self.len()`
#[inline]
#[deprecated(note = "use BooleanBuffer::value")]
pub fn is_set(&self, i: usize) -> bool {
assert!(i < self.len);
unsafe { bit_util::get_bit_raw(self.buffer.as_ptr(), i + self.offset) }
self.value(i)
}

/// Returns the offset of this [`BooleanBuffer`] in bits
Expand All @@ -100,6 +100,25 @@ impl BooleanBuffer {
self.len == 0
}

/// Returns the boolean value at index `i`.
///
/// # Panics
///
/// Panics if `i >= self.len()`
pub fn value(&self, idx: usize) -> bool {
assert!(idx < self.len);
unsafe { self.value_unchecked(idx) }
}

/// Returns the boolean value at index `i`.
///
/// # Safety
/// This doesn't check bounds, the caller must ensure that index < self.len()
#[inline]
pub unsafe fn value_unchecked(&self, i: usize) -> bool {
unsafe { bit_util::get_bit_raw(self.buffer.as_ptr(), i + self.offset) }
}

/// Returns the packed values of this [`BooleanBuffer`] not including any offset
#[inline]
pub fn values(&self) -> &[u8] {
Expand Down Expand Up @@ -147,6 +166,18 @@ impl BooleanBuffer {
}
}

impl Not for &BooleanBuffer {
type Output = BooleanBuffer;

fn not(self) -> Self::Output {
BooleanBuffer {
buffer: buffer_unary_not(&self.buffer, self.offset, self.len),
offset: 0,
len: self.len,
}
}
}

impl BitAnd<&BooleanBuffer> for &BooleanBuffer {
type Output = BooleanBuffer;

Expand Down
2 changes: 1 addition & 1 deletion arrow-buffer/src/buffer/null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ impl NullBuffer {
/// Returns `true` if the value at `idx` is not null
#[inline]
pub fn is_valid(&self, idx: usize) -> bool {
self.buffer.is_set(idx)
self.buffer.value(idx)
}

/// Returns `true` if the value at `idx` is null
Expand Down
17 changes: 6 additions & 11 deletions arrow-ord/src/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
use arrow_array::cast::*;
use arrow_array::types::*;
use arrow_array::*;
use arrow_buffer::buffer::buffer_unary_not;
use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::bit_mask::combine_option_bitmap;
use arrow_data::ArrayData;
Expand Down Expand Up @@ -196,23 +195,19 @@ pub fn eq_bool_scalar(
left: &BooleanArray,
right: bool,
) -> Result<BooleanArray, ArrowError> {
let len = left.len();
let left_offset = left.offset();

let values = if right {
left.values().bit_slice(left_offset, len)
} else {
buffer_unary_not(left.values(), left.offset(), left.len())
let values = match right {
true => left.values().clone(),
false => !left.values(),
};

let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
len,
values.len(),
None,
left.nulls().map(|b| b.inner().sliced()),
0,
vec![values],
values.offset(),
vec![values.into_inner()],
vec![],
)
};
Expand Down
17 changes: 6 additions & 11 deletions arrow-select/src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use arrow_array::cast::{as_generic_binary_array, as_largestring_array, as_string
use arrow_array::types::{ArrowDictionaryKeyType, ByteArrayType};
use arrow_array::*;
use arrow_buffer::bit_util;
use arrow_buffer::{buffer::buffer_bin_and, Buffer, MutableBuffer};
use arrow_buffer::{Buffer, MutableBuffer};
use arrow_data::bit_iterator::{BitIndexIterator, BitSliceIterator};
use arrow_data::transform::MutableArrayData;
use arrow_data::{ArrayData, ArrayDataBuilder};
Expand Down Expand Up @@ -109,9 +109,7 @@ impl<'a> Iterator for IndexIterator<'a> {

/// Counts the number of set bits in `filter`
fn filter_count(filter: &BooleanArray) -> usize {
filter
.values()
.count_set_bits_offset(filter.offset(), filter.len())
filter.values().count_set_bits()
}

/// Function that can filter arbitrary arrays
Expand Down Expand Up @@ -154,15 +152,12 @@ pub fn build_filter(filter: &BooleanArray) -> Result<Filter, ArrowError> {
pub fn prep_null_mask_filter(filter: &BooleanArray) -> BooleanArray {
let array_data = filter.data_ref();
let nulls = array_data.nulls().unwrap();
let mask = filter.values();
let offset = filter.offset();

let new_mask =
buffer_bin_and(mask, offset, nulls.buffer(), nulls.offset(), filter.len());
let mask = filter.values() & nulls.inner();

let array_data = ArrayData::builder(DataType::Boolean)
.len(filter.len())
.add_buffer(new_mask);
.len(mask.len())
.offset(mask.offset())
.add_buffer(mask.into_inner());

let array_data = unsafe { array_data.build_unchecked() };

Expand Down
2 changes: 1 addition & 1 deletion arrow-select/src/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,7 @@ where
IndexType: ArrowPrimitiveType,
IndexType::Native: ToPrimitive,
{
let val_buf = take_bits(values.values(), values.offset(), indices)?;
let val_buf = take_bits(values.values().inner(), values.offset(), indices)?;
let null_buf = match values.nulls() {
Some(nulls) if nulls.null_count() > 0 => {
Some(take_bits(nulls.buffer(), nulls.offset(), indices)?)
Expand Down

0 comments on commit 2730da1

Please sign in to comment.