From 0bfdcbdaa81cacd0fcaf509355eb1eee1271c969 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Sat, 18 Jun 2022 08:51:59 +0000 Subject: [PATCH] Improved docs for BooleanArray --- src/array/boolean/from.rs | 78 -------- src/array/boolean/iterator.rs | 17 -- src/array/boolean/mod.rs | 327 ++++++++++++++++++++++++---------- src/array/primitive/mod.rs | 3 +- 4 files changed, 238 insertions(+), 187 deletions(-) diff --git a/src/array/boolean/from.rs b/src/array/boolean/from.rs index 8c767513764..81a5395ccc0 100644 --- a/src/array/boolean/from.rs +++ b/src/array/boolean/from.rs @@ -1,91 +1,13 @@ use std::iter::FromIterator; -use crate::trusted_len::TrustedLen; - use super::{BooleanArray, MutableBooleanArray}; impl]>> From

for BooleanArray { - /// Creates a new [`BooleanArray`] out of a slice of Optional `bool`. fn from(slice: P) -> Self { MutableBooleanArray::from(slice).into() } } -impl BooleanArray { - /// Creates a new [`BooleanArray`] from an [`TrustedLen`] of `bool`. - #[inline] - pub fn from_trusted_len_values_iter>(iterator: I) -> Self { - MutableBooleanArray::from_trusted_len_values_iter(iterator).into() - } - - /// Creates a new [`BooleanArray`] from an [`TrustedLen`] of `bool`. - /// Use this over [`BooleanArray::from_trusted_len_iter`] when the iterator is trusted len - /// but this crate does not mark it as such. - /// # Safety - /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html). - /// I.e. that `size_hint().1` correctly reports its length. - #[inline] - pub unsafe fn from_trusted_len_values_iter_unchecked>( - iterator: I, - ) -> Self { - MutableBooleanArray::from_trusted_len_values_iter_unchecked(iterator).into() - } - - /// Creates a new [`BooleanArray`] from a slice of `bool`. - #[inline] - pub fn from_slice>(slice: P) -> Self { - MutableBooleanArray::from_slice(slice).into() - } - - /// Creates a [`BooleanArray`] from an iterator of trusted length. - /// Use this over [`BooleanArray::from_trusted_len_iter`] when the iterator is trusted len - /// but this crate does not mark it as such. - /// # Safety - /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html). - /// I.e. that `size_hint().1` correctly reports its length. - #[inline] - pub unsafe fn from_trusted_len_iter_unchecked(iterator: I) -> Self - where - P: std::borrow::Borrow, - I: Iterator>, - { - MutableBooleanArray::from_trusted_len_iter_unchecked(iterator).into() - } - - /// Creates a [`BooleanArray`] from a [`TrustedLen`]. - #[inline] - pub fn from_trusted_len_iter(iterator: I) -> Self - where - P: std::borrow::Borrow, - I: TrustedLen>, - { - MutableBooleanArray::from_trusted_len_iter(iterator).into() - } - - /// Creates a [`BooleanArray`] from an falible iterator of trusted length. - /// # Safety - /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html). - /// I.e. that `size_hint().1` correctly reports its length. - #[inline] - pub unsafe fn try_from_trusted_len_iter_unchecked(iterator: I) -> Result - where - P: std::borrow::Borrow, - I: Iterator, E>>, - { - Ok(MutableBooleanArray::try_from_trusted_len_iter_unchecked(iterator)?.into()) - } - - /// Creates a [`BooleanArray`] from a [`TrustedLen`]. - #[inline] - pub fn try_from_trusted_len_iter(iterator: I) -> Result - where - P: std::borrow::Borrow, - I: TrustedLen, E>>, - { - Ok(MutableBooleanArray::try_from_trusted_len_iter(iterator)?.into()) - } -} - impl>> FromIterator for BooleanArray { fn from_iter>(iter: I) -> Self { MutableBooleanArray::from_iter(iter).into() diff --git a/src/array/boolean/iterator.rs b/src/array/boolean/iterator.rs index 8b5e679fef0..3f07bfb28e1 100644 --- a/src/array/boolean/iterator.rs +++ b/src/array/boolean/iterator.rs @@ -13,23 +13,6 @@ impl<'a> IntoIterator for &'a BooleanArray { } } -impl<'a> BooleanArray { - /// Returns an iterator over the optional values of this [`BooleanArray`]. - #[inline] - pub fn iter(&'a self) -> ZipValidity<'a, bool, BitmapIter<'a>> { - zip_validity( - self.values().iter(), - self.validity.as_ref().map(|x| x.iter()), - ) - } - - /// Returns an iterator over the values of this [`BooleanArray`] - #[inline] - pub fn values_iter(&'a self) -> BitmapIter<'a> { - self.values().iter() - } -} - impl<'a> IntoIterator for &'a MutableBooleanArray { type Item = Option; type IntoIter = ZipValidity<'a, bool, BitmapIter<'a>>; diff --git a/src/array/boolean/mod.rs b/src/array/boolean/mod.rs index 815ec6b1348..85ad172be0b 100644 --- a/src/array/boolean/mod.rs +++ b/src/array/boolean/mod.rs @@ -1,7 +1,11 @@ use crate::{ - bitmap::{Bitmap, MutableBitmap}, + bitmap::{ + utils::{zip_validity, BitmapIter, ZipValidity}, + Bitmap, MutableBitmap, + }, datatypes::{DataType, PhysicalType}, error::Error, + trusted_len::TrustedLen, }; use either::Either; @@ -16,8 +20,31 @@ mod mutable; pub use iterator::*; pub use mutable::*; -/// The Arrow's equivalent to an immutable `Vec>`, but with `1/16` of its size. -/// Cloning and slicing this struct is `O(1)`. +/// A [`BooleanArray`] is Arrow's semantically equivalent of an immutable `Vec>`. +/// It implements [`Array`]. +/// +/// One way to think about a [`BooleanArray`] is `(DataType, Arc>, Option>>)` +/// where: +/// * the first item is the array's logical type +/// * the second is the immutable values +/// * the third is the immutable validity (whether a value is null or not as a bitmap). +/// +/// The size of this struct is `O(1)`, as all data is stored behind an [`std::sync::Arc`]. +/// # Example +/// ``` +/// use arrow2::array::BooleanArray; +/// use arrow2::bitmap::Bitmap; +/// use arrow2::buffer::Buffer; +/// +/// let array = BooleanArray::from([Some(true), None, Some(false)]); +/// assert_eq!(array.value(0), true); +/// assert_eq!(array.iter().collect::>(), vec![Some(true), None, Some(false)]); +/// assert_eq!(array.values_iter().collect::>(), vec![true, false, false]); +/// // the underlying representation +/// assert_eq!(array.values(), &Bitmap::from([true, false, false])); +/// assert_eq!(array.validity(), Some(&Bitmap::from([true, false, true]))); +/// +/// ``` #[derive(Clone)] pub struct BooleanArray { data_type: DataType, @@ -58,87 +85,67 @@ impl BooleanArray { }) } - /// The canonical method to create a [`BooleanArray`] - /// # Panics - /// This function errors iff: - /// * The validity is not `None` and its length is different from `values`'s length - /// * The `data_type`'s [`PhysicalType`] is not equal to [`PhysicalType::Boolean`]. - pub fn new(data_type: DataType, values: Bitmap, validity: Option) -> Self { - Self::try_new(data_type, values, validity).unwrap() + /// Returns an iterator over the optional values of this [`BooleanArray`]. + #[inline] + pub fn iter(&self) -> ZipValidity { + zip_validity( + self.values().iter(), + self.validity.as_ref().map(|x| x.iter()), + ) } - /// Alias for `new` - pub fn from_data(data_type: DataType, values: Bitmap, validity: Option) -> Self { - Self::new(data_type, values, validity) + /// Returns an iterator over the values of this [`BooleanArray`]. + #[inline] + pub fn values_iter(&self) -> BitmapIter { + self.values().iter() } - /// Returns a new empty [`BooleanArray`]. - pub fn new_empty(data_type: DataType) -> Self { - Self::new(data_type, Bitmap::new(), None) + /// Returns the length of this array + #[inline] + pub fn len(&self) -> usize { + self.values.len() } - /// Returns a new [`BooleanArray`] whose all slots are null / `None`. - pub fn new_null(data_type: DataType, length: usize) -> Self { - let bitmap = Bitmap::new_zeroed(length); - Self::new(data_type, bitmap.clone(), Some(bitmap)) + /// The values [`Bitmap`]. + /// Values on null slots are undetermined (they can be anything). + #[inline] + pub fn values(&self) -> &Bitmap { + &self.values } - /// Boxes self into a [`Box`]. - pub fn boxed(self) -> Box { - Box::new(self) + /// Returns the optional validity. + #[inline] + pub fn validity(&self) -> Option<&Bitmap> { + self.validity.as_ref() } - /// Boxes self into a [`std::sync::Arc`]. - pub fn arced(self) -> std::sync::Arc { - std::sync::Arc::new(self) + /// Returns the arrays' [`DataType`]. + #[inline] + pub fn data_type(&self) -> &DataType { + &self.data_type } - /// Applies a function `f` to the values of this array, cloning the values - /// iff they are being shared with others - /// - /// This is an API to use clone-on-write - /// # Implementation - /// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)` - /// if it is being shared (since it results in a `O(N)` memcopy). - /// # Panics - /// This function panics if the function modifies the length of the [`MutableBitmap`]. - pub fn apply_values_mut(&mut self, f: F) { - let values = std::mem::take(&mut self.values); - let mut values = values.make_mut(); - f(&mut values); - if let Some(validity) = &self.validity { - assert_eq!(validity.len(), values.len()); - } - self.values = values.into(); + /// Returns the value at index `i` + /// # Panic + /// This function panics iff `i >= self.len()`. + #[inline] + pub fn value(&self, i: usize) -> bool { + self.values.get_bit(i) } - /// Applies a function `f` to the validity of this array, cloning it - /// iff it is being shared. - /// - /// This is an API to leverage clone-on-write - /// # Implementation - /// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)` - /// if it is being shared (since it results in a `O(N)` memcopy). - /// # Panics - /// This function panics if the function modifies the length of the [`MutableBitmap`]. - pub fn apply_validity_mut(&mut self, f: F) { - if let Some(validity) = self.validity.as_mut() { - let values = std::mem::take(validity); - let mut bitmap = values.make_mut(); - f(&mut bitmap); - assert_eq!(bitmap.len(), self.values.len()); - *validity = bitmap.into(); - } + /// Returns the element at index `i` as bool + /// # Safety + /// Caller must be sure that `i < self.len()` + #[inline] + pub unsafe fn value_unchecked(&self, i: usize) -> bool { + self.values.get_bit_unchecked(i) } -} -// must use -impl BooleanArray { /// Returns a slice of this [`BooleanArray`]. /// # Implementation - /// This operation is `O(1)` as it amounts to increase two ref counts. + /// This operation is `O(1)` as it amounts to increase up to two ref counts. /// # Panic - /// This function panics iff `offset + length >= self.len()`. + /// This function panics iff `offset + length > self.len()`. #[inline] #[must_use] pub fn slice(&self, offset: usize, length: usize) -> Self { @@ -168,17 +175,84 @@ impl BooleanArray { } } - /// Sets the validity bitmap on this [`BooleanArray`]. + /// Clones this [`BooleanArray`], returning one with the provided validity. /// # Panic /// This function panics iff `validity.len() != self.len()`. #[must_use] pub fn with_validity(&self, validity: Option) -> Self { + let mut array = self.clone(); + array.set_validity(validity); + array + } + + /// Sets the validity of this [`BooleanArray`]. + /// # Panics + /// This function panics iff `values.len() != self.len()`. + pub fn set_validity(&mut self, validity: Option) { if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) { - panic!("validity should be as least as large as the array") + panic!("validity must be equal to the array's length") + } + self.validity = validity; + } + + /// Returns a clone of this [`BooleanArray`] with new values. + /// # Panics + /// This function panics iff `values.len() != self.len()`. + #[must_use] + pub fn with_values(&self, values: Bitmap) -> Self { + let mut out = self.clone(); + out.set_values(values); + out + } + + /// Sets the values of this [`BooleanArray`]. + /// # Panics + /// This function panics iff `values.len() != self.len()`. + pub fn set_values(&mut self, values: Bitmap) { + assert_eq!( + values.len(), + self.len(), + "values length must be equal to this arrays length" + ); + self.values = values; + } + + /// Applies a function `f` to the values of this array, cloning the values + /// iff they are being shared with others + /// + /// This is an API to use clone-on-write + /// # Implementation + /// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)` + /// if it is being shared (since it results in a `O(N)` memcopy). + /// # Panics + /// This function panics if the function modifies the length of the [`MutableBitmap`]. + pub fn apply_values_mut(&mut self, f: F) { + let values = std::mem::take(&mut self.values); + let mut values = values.make_mut(); + f(&mut values); + if let Some(validity) = &self.validity { + assert_eq!(validity.len(), values.len()); + } + self.values = values.into(); + } + + /// Applies a function `f` to the validity of this array, cloning it + /// iff it is being shared. + /// + /// This is an API to leverage clone-on-write + /// # Implementation + /// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)` + /// if it is being shared (since it results in a `O(N)` memcopy). + /// # Panics + /// This function panics if the function modifies the length of the [`MutableBitmap`]. + pub fn apply_validity_mut(&mut self, f: F) { + if let Some(validity) = self.validity.as_mut() { + let owned_validity = std::mem::take(validity); + let mut mut_bitmap = owned_validity.make_mut(); + f(&mut mut_bitmap); + assert_eq!(mut_bitmap.len(), self.values.len()); + *validity = mut_bitmap.into(); } - let mut arr = self.clone(); - arr.validity = validity; - arr } /// Try to convert this [`BooleanArray`] to a [`MutableBooleanArray`] @@ -212,42 +286,113 @@ impl BooleanArray { } } } -} -// accessors -impl BooleanArray { - /// Returns the length of this array + /// Returns a new empty [`BooleanArray`]. + pub fn new_empty(data_type: DataType) -> Self { + Self::new(data_type, Bitmap::new(), None) + } + + /// Returns a new [`BooleanArray`] whose all slots are null / `None`. + pub fn new_null(data_type: DataType, length: usize) -> Self { + let bitmap = Bitmap::new_zeroed(length); + Self::new(data_type, bitmap.clone(), Some(bitmap)) + } + + /// Creates a new [`BooleanArray`] from an [`TrustedLen`] of `bool`. #[inline] - pub fn len(&self) -> usize { - self.values.len() + pub fn from_trusted_len_values_iter>(iterator: I) -> Self { + MutableBooleanArray::from_trusted_len_values_iter(iterator).into() } - /// Returns the value at index `i` - /// # Panic - /// This function panics iff `i >= self.len()`. + /// Creates a new [`BooleanArray`] from an [`TrustedLen`] of `bool`. + /// Use this over [`BooleanArray::from_trusted_len_iter`] when the iterator is trusted len + /// but this crate does not mark it as such. + /// # Safety + /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html). + /// I.e. that `size_hint().1` correctly reports its length. #[inline] - pub fn value(&self, i: usize) -> bool { - self.values.get_bit(i) + pub unsafe fn from_trusted_len_values_iter_unchecked>( + iterator: I, + ) -> Self { + MutableBooleanArray::from_trusted_len_values_iter_unchecked(iterator).into() } - /// Returns the element at index `i` as bool + /// Creates a new [`BooleanArray`] from a slice of `bool`. + #[inline] + pub fn from_slice>(slice: P) -> Self { + MutableBooleanArray::from_slice(slice).into() + } + + /// Creates a [`BooleanArray`] from an iterator of trusted length. + /// Use this over [`BooleanArray::from_trusted_len_iter`] when the iterator is trusted len + /// but this crate does not mark it as such. /// # Safety - /// Caller must be sure that `i < self.len()` + /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html). + /// I.e. that `size_hint().1` correctly reports its length. #[inline] - pub unsafe fn value_unchecked(&self, i: usize) -> bool { - self.values.get_bit_unchecked(i) + pub unsafe fn from_trusted_len_iter_unchecked(iterator: I) -> Self + where + P: std::borrow::Borrow, + I: Iterator>, + { + MutableBooleanArray::from_trusted_len_iter_unchecked(iterator).into() } - /// The optional validity. + /// Creates a [`BooleanArray`] from a [`TrustedLen`]. #[inline] - pub fn validity(&self) -> Option<&Bitmap> { - self.validity.as_ref() + pub fn from_trusted_len_iter(iterator: I) -> Self + where + P: std::borrow::Borrow, + I: TrustedLen>, + { + MutableBooleanArray::from_trusted_len_iter(iterator).into() } - /// Returns the values of this [`BooleanArray`]. + /// Creates a [`BooleanArray`] from an falible iterator of trusted length. + /// # Safety + /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html). + /// I.e. that `size_hint().1` correctly reports its length. #[inline] - pub fn values(&self) -> &Bitmap { - &self.values + pub unsafe fn try_from_trusted_len_iter_unchecked(iterator: I) -> Result + where + P: std::borrow::Borrow, + I: Iterator, E>>, + { + Ok(MutableBooleanArray::try_from_trusted_len_iter_unchecked(iterator)?.into()) + } + + /// Creates a [`BooleanArray`] from a [`TrustedLen`]. + #[inline] + pub fn try_from_trusted_len_iter(iterator: I) -> Result + where + P: std::borrow::Borrow, + I: TrustedLen, E>>, + { + Ok(MutableBooleanArray::try_from_trusted_len_iter(iterator)?.into()) + } + + /// Boxes self into a [`Box`]. + pub fn boxed(self) -> Box { + Box::new(self) + } + + /// Boxes self into a [`std::sync::Arc`]. + pub fn arced(self) -> std::sync::Arc { + std::sync::Arc::new(self) + } + + /// The canonical method to create a [`BooleanArray`] + /// # Panics + /// This function errors iff: + /// * The validity is not `None` and its length is different from `values`'s length + /// * The `data_type`'s [`PhysicalType`] is not equal to [`PhysicalType::Boolean`]. + pub fn new(data_type: DataType, values: Bitmap, validity: Option) -> Self { + Self::try_new(data_type, values, validity).unwrap() + } + + /// Alias for `new` + pub fn from_data(data_type: DataType, values: Bitmap, validity: Option) -> Self { + Self::new(data_type, values, validity) } } diff --git a/src/array/primitive/mod.rs b/src/array/primitive/mod.rs index 8443fca7d56..925b9a26fc8 100644 --- a/src/array/primitive/mod.rs +++ b/src/array/primitive/mod.rs @@ -126,6 +126,7 @@ impl PrimitiveArray { } /// Creates a (non-null) [`PrimitiveArray`] from a vector of values. + /// This function is `O(1)`. /// # Examples /// ``` /// use arrow2::array::PrimitiveArray; @@ -260,7 +261,7 @@ impl PrimitiveArray { self.validity = validity; } - /// Returns a clone of this [`PrimitiveArray`] with a new values. + /// Returns a clone of this [`PrimitiveArray`] with new values. /// # Panics /// This function panics iff `values.len() != self.len()`. #[must_use]