Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Made accessing validity simpler.
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Sep 25, 2021
1 parent 49c8f55 commit 96fde01
Show file tree
Hide file tree
Showing 10 changed files with 129 additions and 63 deletions.
6 changes: 6 additions & 0 deletions src/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,12 @@ impl<O: Offset> BinaryArray<O> {
self.values.get_unchecked(start..end)
}

/// The optional validity.
#[inline]
pub fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}

/// Returns the offsets that slice `.values()` to return valid values.
#[inline]
pub fn offsets(&self) -> &Buffer<O> {
Expand Down
33 changes: 21 additions & 12 deletions src/array/boolean/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,21 @@ impl BooleanArray {
}
}

/// Sets the validity bitmap on this [`BooleanArray`].
/// # Panic
/// This function panics iff `validity.len() != self.len()`.
pub fn with_validity(&self, validity: Option<Bitmap>) -> Self {
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
panic!("validity should be as least as large as the array")
}
let mut arr = self.clone();
arr.validity = validity;
arr
}
}

// accessors
impl BooleanArray {
/// Returns the value at index `i`
/// # Panic
/// This function panics iff `i >= self.len()`.
Expand All @@ -86,23 +101,17 @@ impl BooleanArray {
self.values.get_bit_unchecked(i)
}

/// The optional validity.
#[inline]
pub fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}

/// Returns the values of this [`BooleanArray`].
#[inline]
pub fn values(&self) -> &Bitmap {
&self.values
}

/// Sets the validity bitmap on this [`BooleanArray`].
/// # Panic
/// This function panics iff `validity.len() != self.len()`.
pub fn with_validity(&self, validity: Option<Bitmap>) -> Self {
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
panic!("validity should be as least as large as the array")
}
let mut arr = self.clone();
arr.validity = validity;
arr
}
}

impl Array for BooleanArray {
Expand Down
6 changes: 6 additions & 0 deletions src/array/dictionary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,12 @@ impl<K: DictionaryKey> DictionaryArray<K> {
arr
}

/// The optional validity. Equivalent to `self.keys().validity()`.
#[inline]
pub fn validity(&self) -> Option<&Bitmap> {
self.keys.validity()
}

/// Returns the keys of the [`DictionaryArray`]. These keys can be used to fetch values
/// from `values`.
#[inline]
Expand Down
6 changes: 6 additions & 0 deletions src/array/fixed_size_binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ impl FixedSizeBinaryArray {
}
}

/// The optional validity.
#[inline]
pub fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}

/// Returns the values allocated on this [`FixedSizeBinaryArray`].
pub fn values(&self) -> &Buffer<u8> {
&self.values
Expand Down
6 changes: 6 additions & 0 deletions src/array/fixed_size_list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ impl FixedSizeListArray {
}
}

/// The optional validity.
#[inline]
pub fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}

/// Returns the inner array.
pub fn values(&self) -> &Arc<dyn Array> {
&self.values
Expand Down
53 changes: 31 additions & 22 deletions src/array/list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,33 @@ impl<O: Offset> ListArray<O> {
}
}

pub fn slice(&self, offset: usize, length: usize) -> Self {
let validity = self.validity.clone().map(|x| x.slice(offset, length));
let offsets = self.offsets.clone().slice(offset, length + 1);
Self {
data_type: self.data_type.clone(),
offsets,
values: self.values.clone(),
validity,
offset: self.offset + offset,
}
}

/// Sets the validity bitmap on this [`ListArray`].
/// # Panic
/// This function panics iff `validity.len() != self.len()`.
pub fn with_validity(&self, validity: Option<Bitmap>) -> Self {
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
panic!("validity should be as least as large as the array")
}
let mut arr = self.clone();
arr.validity = validity;
arr
}
}

// Accessors
impl<O: Offset> ListArray<O> {
/// Returns the element at index `i`
#[inline]
pub fn value(&self, i: usize) -> Box<dyn Array> {
Expand Down Expand Up @@ -96,16 +123,10 @@ impl<O: Offset> ListArray<O> {
self.values.slice(offset.to_usize(), length)
}

pub fn slice(&self, offset: usize, length: usize) -> Self {
let validity = self.validity.clone().map(|x| x.slice(offset, length));
let offsets = self.offsets.clone().slice(offset, length + 1);
Self {
data_type: self.data_type.clone(),
offsets,
values: self.values.clone(),
validity,
offset: self.offset + offset,
}
/// The optional validity.
#[inline]
pub fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}

#[inline]
Expand All @@ -117,18 +138,6 @@ impl<O: Offset> ListArray<O> {
pub fn values(&self) -> &Arc<dyn Array> {
&self.values
}

/// Sets the validity bitmap on this [`ListArray`].
/// # Panic
/// This function panics iff `validity.len() != self.len()`.
pub fn with_validity(&self, validity: Option<Bitmap>) -> Self {
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
panic!("validity should be as least as large as the array")
}
let mut arr = self.clone();
arr.validity = validity;
arr
}
}

impl<O: Offset> ListArray<O> {
Expand Down
8 changes: 7 additions & 1 deletion src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,13 @@ impl<T: NativeType> PrimitiveArray<T> {
arr
}

/// The values.
/// The optional validity.
#[inline]
pub fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}

/// The values [`Buffer`].
/// Values on null slots are undetermined (they can be anything).
#[inline]
pub fn values(&self) -> &Buffer<T> {
Expand Down
9 changes: 9 additions & 0 deletions src/array/struct_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,15 @@ impl StructArray {
arr.validity = validity;
arr
}
}

// Accessors
impl StructArray {
/// The optional validity.
#[inline]
pub fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}

/// Returns the values of this [`StructArray`].
pub fn values(&self) -> &[Arc<dyn Array>] {
Expand Down
63 changes: 36 additions & 27 deletions src/array/utf8/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,33 +132,6 @@ impl<O: Offset> Utf8Array<O> {
}
}

/// Returns the element at index `i` as &str
/// # Safety
/// This function is safe iff `i < self.len`.
pub unsafe fn value_unchecked(&self, i: usize) -> &str {
// soundness: the invariant of the function
let start = self.offsets.get_unchecked(i).to_usize();
let end = self.offsets.get_unchecked(i + 1).to_usize();

// soundness: the invariant of the struct
let slice = self.values.get_unchecked(start..end);

// soundness: the invariant of the struct
std::str::from_utf8_unchecked(slice)
}

/// Returns the element at index `i`
pub fn value(&self, i: usize) -> &str {
let start = self.offsets[i].to_usize();
let end = self.offsets[i + 1].to_usize();

// soundness: the invariant of the struct
let slice = unsafe { self.values.get_unchecked(start..end) };

// soundness: we always check for utf8 soundness on constructors.
unsafe { std::str::from_utf8_unchecked(slice) }
}

/// Returns a slice of this [`Utf8Array`].
/// # Implementation
/// This operation is `O(1)` as it amounts to essentially increase two ref counts.
Expand Down Expand Up @@ -188,6 +161,42 @@ impl<O: Offset> Utf8Array<O> {
arr.validity = validity;
arr
}
}

// Accessors
impl<O: Offset> Utf8Array<O> {
/// Returns the element at index `i` as &str
/// # Safety
/// This function is safe iff `i < self.len`.
pub unsafe fn value_unchecked(&self, i: usize) -> &str {
// soundness: the invariant of the function
let start = self.offsets.get_unchecked(i).to_usize();
let end = self.offsets.get_unchecked(i + 1).to_usize();

// soundness: the invariant of the struct
let slice = self.values.get_unchecked(start..end);

// soundness: the invariant of the struct
std::str::from_utf8_unchecked(slice)
}

/// Returns the element at index `i`
pub fn value(&self, i: usize) -> &str {
let start = self.offsets[i].to_usize();
let end = self.offsets[i + 1].to_usize();

// soundness: the invariant of the struct
let slice = unsafe { self.values.get_unchecked(start..end) };

// soundness: we always check for utf8 soundness on constructors.
unsafe { std::str::from_utf8_unchecked(slice) }
}

/// The optional validity.
#[inline]
pub fn validity(&self) -> Option<&Bitmap> {
self.validity.as_ref()
}

/// Returns the offsets of this [`Utf8Array`].
#[inline]
Expand Down
2 changes: 1 addition & 1 deletion src/compute/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use crate::{
array::{Array, BooleanArray, Offset, Utf8Array},
array::{BooleanArray, Offset, Utf8Array},
bitmap::Bitmap,
datatypes::DataType,
};
Expand Down

0 comments on commit 96fde01

Please sign in to comment.