From 96fde0158830dc6a0065c97c7609f68c32772c54 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Mon, 20 Sep 2021 17:21:25 +0000 Subject: [PATCH] Made accessing validity simpler. --- src/array/binary/mod.rs | 6 +++ src/array/boolean/mod.rs | 33 ++++++++++------ src/array/dictionary/mod.rs | 6 +++ src/array/fixed_size_binary/mod.rs | 6 +++ src/array/fixed_size_list/mod.rs | 6 +++ src/array/list/mod.rs | 53 ++++++++++++++----------- src/array/primitive/mod.rs | 8 +++- src/array/struct_.rs | 9 +++++ src/array/utf8/mod.rs | 63 +++++++++++++++++------------- src/compute/utils.rs | 2 +- 10 files changed, 129 insertions(+), 63 deletions(-) diff --git a/src/array/binary/mod.rs b/src/array/binary/mod.rs index 9433b5cdc9c..a05d994e4bc 100644 --- a/src/array/binary/mod.rs +++ b/src/array/binary/mod.rs @@ -139,6 +139,12 @@ impl BinaryArray { self.values.get_unchecked(start..end) } + /// The optional validity. + #[inline] + pub fn validity(&self) -> Option<&Bitmap> { + self.validity.as_ref() + } + /// Returns the offsets that slice `.values()` to return valid values. #[inline] pub fn offsets(&self) -> &Buffer { diff --git a/src/array/boolean/mod.rs b/src/array/boolean/mod.rs index d7be39074e9..bf5a5a6ff6a 100644 --- a/src/array/boolean/mod.rs +++ b/src/array/boolean/mod.rs @@ -70,6 +70,21 @@ impl BooleanArray { } } + /// Sets the validity bitmap on this [`BooleanArray`]. + /// # Panic + /// This function panics iff `validity.len() != self.len()`. + pub fn with_validity(&self, validity: Option) -> Self { + if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) { + panic!("validity should be as least as large as the array") + } + let mut arr = self.clone(); + arr.validity = validity; + arr + } +} + +// accessors +impl BooleanArray { /// Returns the value at index `i` /// # Panic /// This function panics iff `i >= self.len()`. @@ -86,23 +101,17 @@ impl BooleanArray { self.values.get_bit_unchecked(i) } + /// The optional validity. + #[inline] + pub fn validity(&self) -> Option<&Bitmap> { + self.validity.as_ref() + } + /// Returns the values of this [`BooleanArray`]. #[inline] pub fn values(&self) -> &Bitmap { &self.values } - - /// Sets the validity bitmap on this [`BooleanArray`]. - /// # Panic - /// This function panics iff `validity.len() != self.len()`. - pub fn with_validity(&self, validity: Option) -> Self { - if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) { - panic!("validity should be as least as large as the array") - } - let mut arr = self.clone(); - arr.validity = validity; - arr - } } impl Array for BooleanArray { diff --git a/src/array/dictionary/mod.rs b/src/array/dictionary/mod.rs index 4fbfb461d3c..4f2477d29ff 100644 --- a/src/array/dictionary/mod.rs +++ b/src/array/dictionary/mod.rs @@ -95,6 +95,12 @@ impl DictionaryArray { arr } + /// The optional validity. Equivalent to `self.keys().validity()`. + #[inline] + pub fn validity(&self) -> Option<&Bitmap> { + self.keys.validity() + } + /// Returns the keys of the [`DictionaryArray`]. These keys can be used to fetch values /// from `values`. #[inline] diff --git a/src/array/fixed_size_binary/mod.rs b/src/array/fixed_size_binary/mod.rs index 3508e078767..19f185bfcf9 100644 --- a/src/array/fixed_size_binary/mod.rs +++ b/src/array/fixed_size_binary/mod.rs @@ -65,6 +65,12 @@ impl FixedSizeBinaryArray { } } + /// The optional validity. + #[inline] + pub fn validity(&self) -> Option<&Bitmap> { + self.validity.as_ref() + } + /// Returns the values allocated on this [`FixedSizeBinaryArray`]. pub fn values(&self) -> &Buffer { &self.values diff --git a/src/array/fixed_size_list/mod.rs b/src/array/fixed_size_list/mod.rs index 7832cdf13ed..6d400091d38 100644 --- a/src/array/fixed_size_list/mod.rs +++ b/src/array/fixed_size_list/mod.rs @@ -79,6 +79,12 @@ impl FixedSizeListArray { } } + /// The optional validity. + #[inline] + pub fn validity(&self) -> Option<&Bitmap> { + self.validity.as_ref() + } + /// Returns the inner array. pub fn values(&self) -> &Arc { &self.values diff --git a/src/array/list/mod.rs b/src/array/list/mod.rs index e89d0f87e27..4714cecefbe 100644 --- a/src/array/list/mod.rs +++ b/src/array/list/mod.rs @@ -69,6 +69,33 @@ impl ListArray { } } + pub fn slice(&self, offset: usize, length: usize) -> Self { + let validity = self.validity.clone().map(|x| x.slice(offset, length)); + let offsets = self.offsets.clone().slice(offset, length + 1); + Self { + data_type: self.data_type.clone(), + offsets, + values: self.values.clone(), + validity, + offset: self.offset + offset, + } + } + + /// Sets the validity bitmap on this [`ListArray`]. + /// # Panic + /// This function panics iff `validity.len() != self.len()`. + pub fn with_validity(&self, validity: Option) -> Self { + if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) { + panic!("validity should be as least as large as the array") + } + let mut arr = self.clone(); + arr.validity = validity; + arr + } +} + +// Accessors +impl ListArray { /// Returns the element at index `i` #[inline] pub fn value(&self, i: usize) -> Box { @@ -96,16 +123,10 @@ impl ListArray { self.values.slice(offset.to_usize(), length) } - pub fn slice(&self, offset: usize, length: usize) -> Self { - let validity = self.validity.clone().map(|x| x.slice(offset, length)); - let offsets = self.offsets.clone().slice(offset, length + 1); - Self { - data_type: self.data_type.clone(), - offsets, - values: self.values.clone(), - validity, - offset: self.offset + offset, - } + /// The optional validity. + #[inline] + pub fn validity(&self) -> Option<&Bitmap> { + self.validity.as_ref() } #[inline] @@ -117,18 +138,6 @@ impl ListArray { pub fn values(&self) -> &Arc { &self.values } - - /// Sets the validity bitmap on this [`ListArray`]. - /// # Panic - /// This function panics iff `validity.len() != self.len()`. - pub fn with_validity(&self, validity: Option) -> Self { - if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) { - panic!("validity should be as least as large as the array") - } - let mut arr = self.clone(); - arr.validity = validity; - arr - } } impl ListArray { diff --git a/src/array/primitive/mod.rs b/src/array/primitive/mod.rs index 61db9c1c6de..09880d37dfa 100644 --- a/src/array/primitive/mod.rs +++ b/src/array/primitive/mod.rs @@ -107,7 +107,13 @@ impl PrimitiveArray { arr } - /// The values. + /// The optional validity. + #[inline] + pub fn validity(&self) -> Option<&Bitmap> { + self.validity.as_ref() + } + + /// The values [`Buffer`]. /// Values on null slots are undetermined (they can be anything). #[inline] pub fn values(&self) -> &Buffer { diff --git a/src/array/struct_.rs b/src/array/struct_.rs index 8bb32b05a1d..c1145d93964 100644 --- a/src/array/struct_.rs +++ b/src/array/struct_.rs @@ -128,6 +128,15 @@ impl StructArray { arr.validity = validity; arr } +} + +// Accessors +impl StructArray { + /// The optional validity. + #[inline] + pub fn validity(&self) -> Option<&Bitmap> { + self.validity.as_ref() + } /// Returns the values of this [`StructArray`]. pub fn values(&self) -> &[Arc] { diff --git a/src/array/utf8/mod.rs b/src/array/utf8/mod.rs index a28307acf4c..bf709c02041 100644 --- a/src/array/utf8/mod.rs +++ b/src/array/utf8/mod.rs @@ -132,33 +132,6 @@ impl Utf8Array { } } - /// Returns the element at index `i` as &str - /// # Safety - /// This function is safe iff `i < self.len`. - pub unsafe fn value_unchecked(&self, i: usize) -> &str { - // soundness: the invariant of the function - let start = self.offsets.get_unchecked(i).to_usize(); - let end = self.offsets.get_unchecked(i + 1).to_usize(); - - // soundness: the invariant of the struct - let slice = self.values.get_unchecked(start..end); - - // soundness: the invariant of the struct - std::str::from_utf8_unchecked(slice) - } - - /// Returns the element at index `i` - pub fn value(&self, i: usize) -> &str { - let start = self.offsets[i].to_usize(); - let end = self.offsets[i + 1].to_usize(); - - // soundness: the invariant of the struct - let slice = unsafe { self.values.get_unchecked(start..end) }; - - // soundness: we always check for utf8 soundness on constructors. - unsafe { std::str::from_utf8_unchecked(slice) } - } - /// Returns a slice of this [`Utf8Array`]. /// # Implementation /// This operation is `O(1)` as it amounts to essentially increase two ref counts. @@ -188,6 +161,42 @@ impl Utf8Array { arr.validity = validity; arr } +} + +// Accessors +impl Utf8Array { + /// Returns the element at index `i` as &str + /// # Safety + /// This function is safe iff `i < self.len`. + pub unsafe fn value_unchecked(&self, i: usize) -> &str { + // soundness: the invariant of the function + let start = self.offsets.get_unchecked(i).to_usize(); + let end = self.offsets.get_unchecked(i + 1).to_usize(); + + // soundness: the invariant of the struct + let slice = self.values.get_unchecked(start..end); + + // soundness: the invariant of the struct + std::str::from_utf8_unchecked(slice) + } + + /// Returns the element at index `i` + pub fn value(&self, i: usize) -> &str { + let start = self.offsets[i].to_usize(); + let end = self.offsets[i + 1].to_usize(); + + // soundness: the invariant of the struct + let slice = unsafe { self.values.get_unchecked(start..end) }; + + // soundness: we always check for utf8 soundness on constructors. + unsafe { std::str::from_utf8_unchecked(slice) } + } + + /// The optional validity. + #[inline] + pub fn validity(&self) -> Option<&Bitmap> { + self.validity.as_ref() + } /// Returns the offsets of this [`Utf8Array`]. #[inline] diff --git a/src/compute/utils.rs b/src/compute/utils.rs index 2187d8aa2d4..87f1372aeb7 100644 --- a/src/compute/utils.rs +++ b/src/compute/utils.rs @@ -16,7 +16,7 @@ // under the License. use crate::{ - array::{Array, BooleanArray, Offset, Utf8Array}, + array::{BooleanArray, Offset, Utf8Array}, bitmap::Bitmap, datatypes::DataType, };