Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added MutableStructArray (#1196)
Browse files Browse the repository at this point in the history
  • Loading branch information
hohav authored Aug 5, 2022
1 parent 3b9d86b commit f485b4d
Show file tree
Hide file tree
Showing 5 changed files with 253 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ pub use list::{ListArray, ListValuesIter, MutableListArray};
pub use map::MapArray;
pub use null::NullArray;
pub use primitive::*;
pub use struct_::StructArray;
pub use struct_::{MutableStructArray, StructArray};
pub use union::UnionArray;
pub use utf8::{MutableUtf8Array, Utf8Array, Utf8ValuesIter};

Expand Down
2 changes: 2 additions & 0 deletions src/array/struct_/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ use super::{new_empty_array, new_null_array, Array};
mod ffi;
pub(super) mod fmt;
mod iterator;
mod mutable;
pub use mutable::*;

/// A [`StructArray`] is a nested [`Array`] with an optional validity representing
/// multiple [`Array`] with the same number of rows.
Expand Down
216 changes: 216 additions & 0 deletions src/array/struct_/mutable.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
use std::sync::Arc;

use crate::{
array::{Array, MutableArray},
bitmap::MutableBitmap,
datatypes::DataType,
};

use super::StructArray;

/// Converting a [`MutableStructArray`] into a [`StructArray`] is `O(1)`.
#[derive(Debug)]
pub struct MutableStructArray {
data_type: DataType,
values: Vec<Box<dyn MutableArray>>,
validity: Option<MutableBitmap>,
}

impl From<MutableStructArray> for StructArray {
fn from(other: MutableStructArray) -> Self {
let validity = if other.validity.as_ref().map(|x| x.unset_bits()).unwrap_or(0) > 0 {
other.validity.map(|x| x.into())
} else {
None
};

StructArray::from_data(
other.data_type,
other.values.into_iter().map(|mut v| v.as_box()).collect(),
validity,
)
}
}

impl MutableStructArray {
/// Creates a new [`MutableStructArray`].
pub fn new(data_type: DataType, values: Vec<Box<dyn MutableArray>>) -> Self {
Self::from_data(data_type, values, None)
}

/// Create a [`MutableStructArray`] out of low-end APIs.
/// # Panics
/// This function panics iff:
/// * `data_type` is not [`DataType::Struct`]
/// * The inner types of `data_type` are not equal to those of `values`
/// * `validity` is not `None` and its length is different from the `values`'s length
pub fn from_data(
data_type: DataType,
values: Vec<Box<dyn MutableArray>>,
validity: Option<MutableBitmap>,
) -> Self {
match data_type.to_logical_type() {
DataType::Struct(ref fields) => assert!(fields
.iter()
.map(|f| f.data_type())
.eq(values.iter().map(|f| f.data_type()))),
_ => panic!("StructArray must be initialized with DataType::Struct"),
};
let self_ = Self {
data_type,
values,
validity,
};
self_.assert_lengths();
self_
}

fn assert_lengths(&self) {
let first_len = self.values.first().map(|v| v.len());
if let Some(len) = first_len {
if !self.values.iter().all(|x| x.len() == len) {
let lengths: Vec<_> = self.values.iter().map(|v| v.len()).collect();
panic!("StructArray child lengths differ: {:?}", lengths);
}
}
if let Some(validity) = &self.validity {
assert_eq!(first_len.unwrap_or(0), validity.len());
}
}

/// Extract the low-end APIs from the [`MutableStructArray`].
pub fn into_data(self) -> (DataType, Vec<Box<dyn MutableArray>>, Option<MutableBitmap>) {
(self.data_type, self.values, self.validity)
}

/// The mutable values
pub fn mut_values(&mut self) -> &mut Vec<Box<dyn MutableArray>> {
&mut self.values
}

/// The values
pub fn values(&self) -> &Vec<Box<dyn MutableArray>> {
&self.values
}

/// Return the `i`th child array.
pub fn value<A: MutableArray + 'static>(&mut self, i: usize) -> Option<&mut A> {
self.values[i].as_mut_any().downcast_mut::<A>()
}
}

impl MutableStructArray {
/// Reserves `additional` entries.
pub fn reserve(&mut self, additional: usize) {
for v in &mut self.values {
v.reserve(additional);
}
if let Some(x) = self.validity.as_mut() {
x.reserve(additional)
}
}

/// Call this after pushing into each child array.
/// # Panics
/// This function panics if any of the children does not have exactly one more
/// element than before the last call of `push`.
pub fn push(&mut self, valid: bool) {
match &mut self.validity {
Some(validity) => validity.push(valid),
None => match valid {
true => (),
false => self.init_validity(),
},
};
self.assert_lengths();
}

fn push_null(&mut self) {
for v in &mut self.values {
v.push_null();
}
self.push(false);
}

fn init_validity(&mut self) {
let mut validity = MutableBitmap::with_capacity(self.values.capacity());
let len = self.len();
if len > 0 {
validity.extend_constant(len, true);
validity.set(len - 1, false);
}
self.validity = Some(validity)
}

/// Converts itself into an [`Array`].
pub fn into_arc(self) -> Arc<dyn Array> {
let a: StructArray = self.into();
Arc::new(a)
}

/// Shrinks the capacity of the [`MutableStructArray`] to fit its current length.
pub fn shrink_to_fit(&mut self) {
for v in &mut self.values {
v.shrink_to_fit();
}
if let Some(validity) = self.validity.as_mut() {
validity.shrink_to_fit()
}
}
}

impl MutableArray for MutableStructArray {
fn len(&self) -> usize {
self.values.first().map(|v| v.len()).unwrap_or(0)
}

fn validity(&self) -> Option<&MutableBitmap> {
self.validity.as_ref()
}

fn as_box(&mut self) -> Box<dyn Array> {
Box::new(StructArray::from_data(
self.data_type.clone(),
std::mem::take(&mut self.values)
.into_iter()
.map(|mut v| v.as_box())
.collect(),
std::mem::take(&mut self.validity).map(|x| x.into()),
))
}

fn as_arc(&mut self) -> Arc<dyn Array> {
Arc::new(StructArray::from_data(
self.data_type.clone(),
std::mem::take(&mut self.values)
.into_iter()
.map(|mut v| v.as_box())
.collect(),
std::mem::take(&mut self.validity).map(|x| x.into()),
))
}

fn data_type(&self) -> &DataType {
&self.data_type
}

fn as_any(&self) -> &dyn std::any::Any {
self
}

fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
self
}

fn push_null(&mut self) {
self.push_null()
}

fn shrink_to_fit(&mut self) {
self.shrink_to_fit()
}

fn reserve(&mut self, additional: usize) {
self.reserve(additional)
}
}
1 change: 1 addition & 0 deletions tests/it/array/struct_/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
mod iterator;
mod mutable;

use arrow2::array::*;
use arrow2::bitmap::Bitmap;
Expand Down
33 changes: 33 additions & 0 deletions tests/it/array/struct_/mutable.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
use arrow2::{
array::*,
datatypes::{DataType, Field},
};

#[test]
fn push() {
let c1 = Box::new(MutablePrimitiveArray::<i32>::new()) as Box<dyn MutableArray>;
let values = vec![c1];
let data_type = DataType::Struct(vec![Field::new("f1", DataType::Int32, true)]);
let mut a = MutableStructArray::new(data_type, values);

a.value::<MutablePrimitiveArray<i32>>(0)
.unwrap()
.push(Some(1));
a.push(true);
a.value::<MutablePrimitiveArray<i32>>(0).unwrap().push(None);
a.push(false);
a.value::<MutablePrimitiveArray<i32>>(0)
.unwrap()
.push(Some(2));
a.push(true);

assert_eq!(a.len(), 3);
assert!(a.is_valid(0));
assert!(!a.is_valid(1));
assert!(a.is_valid(2));

assert_eq!(
a.value::<MutablePrimitiveArray<i32>>(0).unwrap().values(),
&Vec::from([1, 0, 2])
);
}

0 comments on commit f485b4d

Please sign in to comment.