Skip to content
Merged
128 changes: 121 additions & 7 deletions arrow-array/src/array/boolean_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -436,11 +436,84 @@ impl<'a> BooleanArray {
}
}

impl<Ptr: std::borrow::Borrow<Option<bool>>> FromIterator<Ptr> for BooleanArray {
/// An optional boolean value
///
/// This struct is used as an adapter when creating `BooleanArray` from an iterator.
/// `FromIterator` for `BooleanArray` takes an iterator where the elements can be `into`
/// this struct. So once implementing `From` or `Into` trait for a type, an iterator of
/// the type can be collected to `BooleanArray`.
///
/// See also [NativeAdapter](crate::array::NativeAdapter).
#[derive(Debug)]
struct BooleanAdapter {
/// Corresponding Rust native type if available
pub native: Option<bool>,
}

impl From<bool> for BooleanAdapter {
fn from(value: bool) -> Self {
BooleanAdapter {
native: Some(value),
}
}
}

impl From<&bool> for BooleanAdapter {
fn from(value: &bool) -> Self {
BooleanAdapter {
native: Some(*value),
}
}
}

impl From<Option<bool>> for BooleanAdapter {
fn from(value: Option<bool>) -> Self {
BooleanAdapter { native: value }
}
}

impl From<&Option<bool>> for BooleanAdapter {
fn from(value: &Option<bool>) -> Self {
BooleanAdapter { native: *value }
}
}

impl<Ptr: Into<BooleanAdapter>> FromIterator<Ptr> for BooleanArray {
fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
let iter = iter.into_iter();
let (_, data_len) = iter.size_hint();
let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.
let capacity = match iter.size_hint() {
(lower, Some(upper)) if lower == upper => lower,
_ => 0,
};
let mut builder = BooleanBuilder::with_capacity(capacity);
builder.extend(iter.map(|item| item.into().native));
builder.finish()
}
}

impl BooleanArray {
/// Creates a [`BooleanArray`] from an iterator of trusted length.
///
/// # Safety
///
/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
/// I.e. that `size_hint().1` correctly reports its length. Note that this is a stronger
/// guarantee that `ExactSizeIterator` provides which could still report a wrong length.
///
/// # Panics
///
/// Panics if the iterator does not report an upper bound on `size_hint()`.
#[inline]
#[allow(
private_bounds,
reason = "We will expose BooleanAdapter if there is a need"
)]
pub unsafe fn from_trusted_len_iter<I, P>(iter: I) -> Self
where
P: Into<BooleanAdapter>,
I: ExactSizeIterator<Item = P>,
{
let data_len = iter.len();

let num_bytes = bit_util::ceil(data_len, 8);
let mut null_builder = MutableBuffer::from_len_zeroed(num_bytes);
Expand All @@ -450,10 +523,14 @@ impl<Ptr: std::borrow::Borrow<Option<bool>>> FromIterator<Ptr> for BooleanArray

let null_slice = null_builder.as_slice_mut();
iter.enumerate().for_each(|(i, item)| {
if let Some(a) = item.borrow() {
bit_util::set_bit(null_slice, i);
if *a {
bit_util::set_bit(data, i);
if let Some(a) = item.into().native {
unsafe {
// SAFETY: There will be enough space in the buffers due to the trusted len size
// hint
bit_util::set_bit_raw(null_slice.as_mut_ptr(), i);
if a {
bit_util::set_bit_raw(data.as_mut_ptr(), i);
}
}
}
});
Expand Down Expand Up @@ -599,6 +676,20 @@ mod tests {
}
}

#[test]
fn test_boolean_array_from_non_nullable_iter() {
let v = vec![true, false, true];
let arr = v.into_iter().collect::<BooleanArray>();
assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(0, arr.null_count());
assert!(arr.nulls().is_none());

assert!(arr.value(0));
assert!(!arr.value(1));
assert!(arr.value(2));
}

#[test]
fn test_boolean_array_from_nullable_iter() {
let v = vec![Some(true), None, Some(false), None];
Expand All @@ -617,6 +708,29 @@ mod tests {
assert!(!arr.value(2));
}

#[test]
fn test_boolean_array_from_nullable_trusted_len_iter() {
// Should exhibit the same behavior as `from_iter`, which is tested above.
let v = vec![Some(true), None, Some(false), None];
let expected = v.clone().into_iter().collect::<BooleanArray>();
let actual = unsafe {
// SAFETY: `v` has trusted length
BooleanArray::from_trusted_len_iter(v.into_iter())
};
assert_eq!(expected, actual);
}

#[test]
fn test_boolean_array_from_iter_with_larger_upper_bound() {
// See https://github.com/apache/arrow-rs/issues/8505
// This returns an upper size hint of 4
let iterator = vec![Some(true), None, Some(false), None]
.into_iter()
.filter(Option::is_some);
let arr = iterator.collect::<BooleanArray>();
assert_eq!(2, arr.len());
}

#[test]
fn test_boolean_array_builder() {
// Test building a boolean array with ArrayData builder and offset
Expand Down
9 changes: 6 additions & 3 deletions arrow-array/src/builder/boolean_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,12 @@ impl ArrayBuilder for BooleanBuilder {
impl Extend<Option<bool>> for BooleanBuilder {
#[inline]
fn extend<T: IntoIterator<Item = Option<bool>>>(&mut self, iter: T) {
for v in iter {
self.append_option(v)
}
let buffered = iter.into_iter().collect::<Vec<_>>();
let array = unsafe {
// SAFETY: std::vec::IntoIter implements TrustedLen
BooleanArray::from_trusted_len_iter(buffered.into_iter())
};
self.append_array(&array)
}
}

Expand Down
10 changes: 8 additions & 2 deletions arrow/benches/array_from.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,12 @@
// specific language governing permissions and limitations
// under the License.

extern crate arrow;
#[macro_use]
extern crate criterion;

use criterion::Criterion;

extern crate arrow;

use arrow::array::*;
use arrow_buffer::i256;
use rand::Rng;
Expand Down Expand Up @@ -236,6 +235,13 @@ fn from_iter_benchmark(c: &mut Criterion) {
let values = gen_option_vector(true, ITER_LEN);
b.iter(|| hint::black_box(BooleanArray::from_iter(values.iter())));
});
c.bench_function("BooleanArray::from_trusted_len_iter", |b| {
let values = gen_option_vector(true, ITER_LEN);
b.iter(|| unsafe {
// SAFETY: values.iter() is a TrustedLenIterator
hint::black_box(BooleanArray::from_trusted_len_iter(values.iter()))
});
});
}

criterion_group!(
Expand Down
Loading