Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Made accessing validity simpler. (#432)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Sep 26, 2021
1 parent e27ff27 commit 194a95d
Show file tree
Hide file tree
Showing 10 changed files with 146 additions and 80 deletions.
6 changes: 6 additions & 0 deletions src/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,12 @@ impl<O: Offset> BinaryArray<O> {
self.values.get_unchecked(start..end)
}

/// The optional validity.
#[inline]
pub fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}

/// Returns the offsets that slice `.values()` to return valid values.
#[inline]
pub fn offsets(&self) -> &Buffer<O> {
Expand Down
33 changes: 21 additions & 12 deletions src/array/boolean/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,21 @@ impl BooleanArray {
}
}

/// Sets the validity bitmap on this [`BooleanArray`].
/// # Panic
/// This function panics iff `validity.len() != self.len()`.
pub fn with_validity(&self, validity: Option<Bitmap>) -> Self {
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
panic!("validity should be as least as large as the array")
}
let mut arr = self.clone();
arr.validity = validity;
arr
}
}

// accessors
impl BooleanArray {
/// Returns the value at index `i`
/// # Panic
/// This function panics iff `i >= self.len()`.
Expand All @@ -103,23 +118,17 @@ impl BooleanArray {
self.values.get_bit_unchecked(i)
}

/// The optional validity.
#[inline]
pub fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}

/// Returns the values of this [`BooleanArray`].
#[inline]
pub fn values(&self) -> &Bitmap {
&self.values
}

/// Sets the validity bitmap on this [`BooleanArray`].
/// # Panic
/// This function panics iff `validity.len() != self.len()`.
pub fn with_validity(&self, validity: Option<Bitmap>) -> Self {
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
panic!("validity should be as least as large as the array")
}
let mut arr = self.clone();
arr.validity = validity;
arr
}
}

impl Array for BooleanArray {
Expand Down
6 changes: 6 additions & 0 deletions src/array/dictionary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,12 @@ impl<K: DictionaryKey> DictionaryArray<K> {
arr
}

/// The optional validity. Equivalent to `self.keys().validity()`.
#[inline]
pub fn validity(&self) -> Option<&Bitmap> {
self.keys.validity()
}

/// Returns the keys of the [`DictionaryArray`]. These keys can be used to fetch values
/// from `values`.
#[inline]
Expand Down
6 changes: 6 additions & 0 deletions src/array/fixed_size_binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ impl FixedSizeBinaryArray {
}
}

/// The optional validity.
#[inline]
pub fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}

/// Returns the values allocated on this [`FixedSizeBinaryArray`].
pub fn values(&self) -> &Buffer<u8> {
&self.values
Expand Down
6 changes: 6 additions & 0 deletions src/array/fixed_size_list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,12 @@ impl FixedSizeListArray {
}
}

/// The optional validity.
#[inline]
pub fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}

/// Returns the inner array.
pub fn values(&self) -> &Arc<dyn Array> {
&self.values
Expand Down
87 changes: 48 additions & 39 deletions src/array/list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,50 @@ impl<O: Offset> ListArray<O> {
}
}

/// Returns a slice of this [`ListArray`].
/// # Panics
/// panics iff `offset + length >= self.len()`
pub fn slice(&self, offset: usize, length: usize) -> Self {
assert!(
offset + length <= self.len(),
"the offset of the new Buffer cannot exceed the existing length"
);
unsafe { self.slice_unchecked(offset, length) }
}

/// Returns a slice of this [`ListArray`].
/// # Safety
/// The caller must ensure that `offset + length < self.len()`.
pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self {
let validity = self
.validity
.clone()
.map(|x| x.slice_unchecked(offset, length));
let offsets = self.offsets.clone().slice_unchecked(offset, length + 1);
Self {
data_type: self.data_type.clone(),
offsets,
values: self.values.clone(),
validity,
offset: self.offset + offset,
}
}

/// Sets the validity bitmap on this [`ListArray`].
/// # Panic
/// This function panics iff `validity.len() != self.len()`.
pub fn with_validity(&self, validity: Option<Bitmap>) -> Self {
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
panic!("validity should be as least as large as the array")
}
let mut arr = self.clone();
arr.validity = validity;
arr
}
}

// Accessors
impl<O: Offset> ListArray<O> {
/// Returns the element at index `i`
#[inline]
pub fn value(&self, i: usize) -> Box<dyn Array> {
Expand Down Expand Up @@ -99,33 +143,10 @@ impl<O: Offset> ListArray<O> {
self.values.slice_unchecked(offset.to_usize(), length)
}

/// Returns a slice of this [`ListArray`].
/// # Panics
/// panics iff `offset + length >= self.len()`
pub fn slice(&self, offset: usize, length: usize) -> Self {
assert!(
offset + length <= self.len(),
"the offset of the new Buffer cannot exceed the existing length"
);
unsafe { self.slice_unchecked(offset, length) }
}

/// Returns a slice of this [`ListArray`].
/// # Safety
/// The caller must ensure that `offset + length < self.len()`.
pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self {
let validity = self
.validity
.clone()
.map(|x| x.slice_unchecked(offset, length));
let offsets = self.offsets.clone().slice_unchecked(offset, length + 1);
Self {
data_type: self.data_type.clone(),
offsets,
values: self.values.clone(),
validity,
offset: self.offset + offset,
}
/// The optional validity.
#[inline]
pub fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}

#[inline]
Expand All @@ -137,18 +158,6 @@ impl<O: Offset> ListArray<O> {
pub fn values(&self) -> &Arc<dyn Array> {
&self.values
}

/// Sets the validity bitmap on this [`ListArray`].
/// # Panic
/// This function panics iff `validity.len() != self.len()`.
pub fn with_validity(&self, validity: Option<Bitmap>) -> Self {
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
panic!("validity should be as least as large as the array")
}
let mut arr = self.clone();
arr.validity = validity;
arr
}
}

impl<O: Offset> ListArray<O> {
Expand Down
8 changes: 7 additions & 1 deletion src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,13 @@ impl<T: NativeType> PrimitiveArray<T> {
arr
}

/// The values.
/// The optional validity.
#[inline]
pub fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}

/// The values [`Buffer`].
/// Values on null slots are undetermined (they can be anything).
#[inline]
pub fn values(&self) -> &Buffer<T> {
Expand Down
9 changes: 9 additions & 0 deletions src/array/struct_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,15 @@ impl StructArray {
arr.validity = validity;
arr
}
}

// Accessors
impl StructArray {
/// The optional validity.
#[inline]
pub fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}

/// Returns the values of this [`StructArray`].
pub fn values(&self) -> &[Arc<dyn Array>] {
Expand Down
63 changes: 36 additions & 27 deletions src/array/utf8/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,33 +132,6 @@ impl<O: Offset> Utf8Array<O> {
}
}

/// Returns the element at index `i` as &str
/// # Safety
/// This function is safe iff `i < self.len`.
pub unsafe fn value_unchecked(&self, i: usize) -> &str {
// soundness: the invariant of the function
let start = self.offsets.get_unchecked(i).to_usize();
let end = self.offsets.get_unchecked(i + 1).to_usize();

// soundness: the invariant of the struct
let slice = self.values.get_unchecked(start..end);

// soundness: the invariant of the struct
std::str::from_utf8_unchecked(slice)
}

/// Returns the element at index `i`
pub fn value(&self, i: usize) -> &str {
let start = self.offsets[i].to_usize();
let end = self.offsets[i + 1].to_usize();

// soundness: the invariant of the struct
let slice = unsafe { self.values.get_unchecked(start..end) };

// soundness: we always check for utf8 soundness on constructors.
unsafe { std::str::from_utf8_unchecked(slice) }
}

/// Returns a slice of this [`Utf8Array`].
/// # Implementation
/// This operation is `O(1)` as it amounts to essentially increase two ref counts.
Expand Down Expand Up @@ -203,6 +176,42 @@ impl<O: Offset> Utf8Array<O> {
arr.validity = validity;
arr
}
}

// Accessors
impl<O: Offset> Utf8Array<O> {
/// Returns the element at index `i` as &str
/// # Safety
/// This function is safe iff `i < self.len`.
pub unsafe fn value_unchecked(&self, i: usize) -> &str {
// soundness: the invariant of the function
let start = self.offsets.get_unchecked(i).to_usize();
let end = self.offsets.get_unchecked(i + 1).to_usize();

// soundness: the invariant of the struct
let slice = self.values.get_unchecked(start..end);

// soundness: the invariant of the struct
std::str::from_utf8_unchecked(slice)
}

/// Returns the element at index `i`
pub fn value(&self, i: usize) -> &str {
let start = self.offsets[i].to_usize();
let end = self.offsets[i + 1].to_usize();

// soundness: the invariant of the struct
let slice = unsafe { self.values.get_unchecked(start..end) };

// soundness: we always check for utf8 soundness on constructors.
unsafe { std::str::from_utf8_unchecked(slice) }
}

/// The optional validity.
#[inline]
pub fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}

/// Returns the offsets of this [`Utf8Array`].
#[inline]
Expand Down
2 changes: 1 addition & 1 deletion src/compute/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use crate::{
array::{Array, BooleanArray, Offset, Utf8Array},
array::{BooleanArray, Offset, Utf8Array},
bitmap::Bitmap,
datatypes::DataType,
};
Expand Down

0 comments on commit 194a95d

Please sign in to comment.