Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added GrowableFixedSizeList and improved MutableFixedSizeListArray (#470
Browse files Browse the repository at this point in the history
)
  • Loading branch information
jorgecarleitao authored Sep 30, 2021
1 parent b78eeb7 commit 76e8992
Show file tree
Hide file tree
Showing 7 changed files with 268 additions and 86 deletions.
127 changes: 54 additions & 73 deletions src/array/fixed_size_list/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
use std::sync::Arc;

use crate::{
array::{
Array, MutableArray, MutableBinaryArray, MutablePrimitiveArray, MutableUtf8Array, Offset,
},
array::{Array, MutableArray, TryExtend, TryPush},
bitmap::MutableBitmap,
datatypes::DataType,
error::{ArrowError, Result},
types::NativeType,
};

use super::FixedSizeListArray;
Expand All @@ -32,6 +29,7 @@ impl<M: MutableArray> From<MutableFixedSizeListArray<M>> for FixedSizeListArray
}

impl<M: MutableArray> MutableFixedSizeListArray<M> {
/// Creates a new [`MutableFixedSizeListArray`] from a [`MutableArray`] and size.
pub fn new(values: M, size: usize) -> Self {
let data_type = FixedSizeListArray::default_datatype(values.data_type().clone(), size);
assert_eq!(values.len(), 0);
Expand All @@ -43,20 +41,38 @@ impl<M: MutableArray> MutableFixedSizeListArray<M> {
}
}

pub fn mut_values(&mut self) -> &mut M {
&mut self.values
}

/// The inner values
pub fn values(&self) -> &M {
&self.values
}

fn init_validity(&mut self) {
self.validity = Some(MutableBitmap::from_trusted_len_iter(
std::iter::repeat(true)
.take(self.values.len() - 1)
.chain(std::iter::once(false)),
))
let len = self.values.len() / self.size;

let mut validity = MutableBitmap::new();
validity.extend_constant(len, true);
validity.set(len - 1, false);
self.validity = Some(validity)
}

#[inline]
fn try_push_valid(&mut self) -> Result<()> {
if self.values.len() % self.size != 0 {
return Err(ArrowError::KeyOverflowError);
};
if let Some(validity) = &mut self.validity {
validity.push(true)
}
Ok(())
}

#[inline]
fn push_null(&mut self) {
(0..self.size).for_each(|_| self.values.push_null());
match &mut self.validity {
Some(validity) => validity.push(false),
None => self.init_validity(),
}
}
}

Expand Down Expand Up @@ -97,6 +113,7 @@ impl<M: MutableArray + 'static> MutableArray for MutableFixedSizeListArray<M> {
self
}

#[inline]
fn push_null(&mut self) {
(0..self.size).for_each(|_| {
self.values.push_null();
Expand All @@ -109,69 +126,33 @@ impl<M: MutableArray + 'static> MutableArray for MutableFixedSizeListArray<M> {
}
}

impl<T: NativeType> MutableFixedSizeListArray<MutablePrimitiveArray<T>> {
pub fn try_from_iter<P: IntoIterator<Item = Option<T>>, I: IntoIterator<Item = Option<P>>>(
iter: I,
size: usize,
data_type: DataType,
) -> Result<Self> {
let iterator = iter.into_iter();
let (lower, _) = iterator.size_hint();
let array = MutablePrimitiveArray::<T>::with_capacity_from(lower * size, data_type);
let mut array = MutableFixedSizeListArray::new(array, size);
for items in iterator {
if let Some(items) = items {
let values = array.mut_values();
let len = values.len();
values.extend(items);
if values.len() - len != size {
return Err(ArrowError::InvalidArgumentError(
"A FixedSizeList must have all its values with the same size".to_string(),
));
};
} else {
array.push_null();
}
impl<M, I, T> TryExtend<Option<I>> for MutableFixedSizeListArray<M>
where
M: MutableArray + TryExtend<Option<T>>,
I: IntoIterator<Item = Option<T>>,
{
#[inline]
fn try_extend<II: IntoIterator<Item = Option<I>>>(&mut self, iter: II) -> Result<()> {
for items in iter {
self.try_push(items)?;
}
Ok(array)
Ok(())
}
}

macro_rules! impl_offsets {
($mutable:ident, $type:ty) => {
impl<O: Offset> MutableFixedSizeListArray<$mutable<O>> {
pub fn try_from_iter<
T: AsRef<$type>,
P: IntoIterator<Item = Option<T>>,
I: IntoIterator<Item = Option<P>>,
>(
iter: I,
size: usize,
) -> Result<Self> {
let iterator = iter.into_iter();
let (lower, _) = iterator.size_hint();
let array = $mutable::<O>::with_capacity(lower * size);
let mut array = MutableFixedSizeListArray::new(array, size);
for items in iterator {
if let Some(items) = items {
let values = array.mut_values();
let len = values.len();
values.extend(items);
if values.len() - len != size {
return Err(ArrowError::InvalidArgumentError(
"A FixedSizeList must have all its values with the same size"
.to_string(),
));
};
} else {
array.push_null();
}
}
Ok(array)
}
impl<M, I, T> TryPush<Option<I>> for MutableFixedSizeListArray<M>
where
M: MutableArray + TryExtend<Option<T>>,
I: IntoIterator<Item = Option<T>>,
{
#[inline]
fn try_push(&mut self, item: Option<I>) -> Result<()> {
if let Some(items) = item {
self.values.try_extend(items)?;
self.try_push_valid()?;
} else {
self.push_null();
}
};
Ok(())
}
}

impl_offsets!(MutableUtf8Array, str);
impl_offsets!(MutableBinaryArray, [u8]);
108 changes: 108 additions & 0 deletions src/array/growable/fixed_size_list.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
use std::sync::Arc;

use crate::{
array::{Array, FixedSizeListArray},
bitmap::MutableBitmap,
datatypes::DataType,
};

use super::{
make_growable,
utils::{build_extend_null_bits, ExtendNullBits},
Growable,
};

/// Concrete [`Growable`] for the [`FixedSizeListArray`].
pub struct GrowableFixedSizeList<'a> {
arrays: Vec<&'a FixedSizeListArray>,
validity: MutableBitmap,
values: Box<dyn Growable<'a> + 'a>,
extend_null_bits: Vec<ExtendNullBits<'a>>,
size: usize,
}

impl<'a> GrowableFixedSizeList<'a> {
/// Creates a new [`GrowableList`] bound to `arrays` with a pre-allocated `capacity`.
/// # Panics
/// If `arrays` is empty.
pub fn new(
arrays: Vec<&'a FixedSizeListArray>,
mut use_validity: bool,
capacity: usize,
) -> Self {
assert!(!arrays.is_empty());

// if any of the arrays has nulls, insertions from any array requires setting bits
// as there is at least one array with nulls.
if !use_validity & arrays.iter().any(|array| array.null_count() > 0) {
use_validity = true;
};

let size =
if let DataType::FixedSizeList(_, size) = &arrays[0].data_type().to_logical_type() {
*size as usize
} else {
unreachable!("`GrowableFixedSizeList` expects `DataType::FixedSizeList`")
};

let extend_null_bits = arrays
.iter()
.map(|array| build_extend_null_bits(*array, use_validity))
.collect();

let inner = arrays
.iter()
.map(|array| array.values().as_ref())
.collect::<Vec<_>>();
let values = make_growable(&inner, use_validity, 0);

Self {
arrays,
values,
validity: MutableBitmap::with_capacity(capacity),
extend_null_bits,
size,
}
}

fn to(&mut self) -> FixedSizeListArray {
let validity = std::mem::take(&mut self.validity);
let values = self.values.as_arc();

FixedSizeListArray::from_data(self.arrays[0].data_type().clone(), values, validity.into())
}
}

impl<'a> Growable<'a> for GrowableFixedSizeList<'a> {
fn extend(&mut self, index: usize, start: usize, len: usize) {
(self.extend_null_bits[index])(&mut self.validity, start, len);
self.values
.extend(index, start * self.size, len * self.size);
}

fn extend_validity(&mut self, additional: usize) {
self.values.extend_validity(additional * self.size);
self.validity.extend_constant(additional, false);
}

fn as_arc(&mut self) -> Arc<dyn Array> {
Arc::new(self.to())
}

fn as_box(&mut self) -> Box<dyn Array> {
Box::new(self.to())
}
}

impl<'a> From<GrowableFixedSizeList<'a>> for FixedSizeListArray {
fn from(val: GrowableFixedSizeList<'a>) -> Self {
let mut values = val.values;
let values = values.as_arc();

Self::from_data(
val.arrays[0].data_type().clone(),
values,
val.validity.into(),
)
}
}
14 changes: 13 additions & 1 deletion src/array/growable/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ mod list;
pub use list::GrowableList;
mod structure;
pub use structure::GrowableStruct;
mod fixed_size_list;
pub use fixed_size_list::GrowableFixedSizeList;
mod utf8;
pub use utf8::GrowableUtf8;
mod dictionary;
Expand Down Expand Up @@ -201,7 +203,17 @@ pub fn make_growable<'a>(
capacity,
))
}
FixedSizeList => todo!(),
FixedSizeList => {
let arrays = arrays
.iter()
.map(|array| array.as_any().downcast_ref().unwrap())
.collect::<Vec<_>>();
Box::new(fixed_size_list::GrowableFixedSizeList::new(
arrays,
use_validity,
capacity,
))
}
Union => todo!(),
Dictionary(key_type) => {
with_match_physical_dictionary_key_type!(key_type, |$T| {
Expand Down
8 changes: 4 additions & 4 deletions tests/it/array/equal/fixed_size_list.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use arrow2::{
array::{FixedSizeListArray, MutableFixedSizeListArray, MutablePrimitiveArray},
array::{FixedSizeListArray, MutableFixedSizeListArray, MutablePrimitiveArray, TryExtend},
datatypes::DataType,
};

Expand All @@ -16,9 +16,9 @@ fn create_fixed_size_list_array<U: AsRef<[i32]>, T: AsRef<[Option<U>]>>(
})
});

MutableFixedSizeListArray::<MutablePrimitiveArray<i32>>::try_from_iter(data, 3, DataType::Int32)
.unwrap()
.into()
let mut list = MutableFixedSizeListArray::new(MutablePrimitiveArray::<i32>::new(), 3);
list.try_extend(data).unwrap();
list.into()
}

#[test]
Expand Down
11 changes: 3 additions & 8 deletions tests/it/array/fixed_size_list/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,9 @@ fn primitive() {
Some(vec![Some(4), None, Some(6)]),
];

let list: FixedSizeListArray =
MutableFixedSizeListArray::<MutablePrimitiveArray<i32>>::try_from_iter(
data,
3,
DataType::Int32,
)
.unwrap()
.into();
let mut list = MutableFixedSizeListArray::new(MutablePrimitiveArray::<i32>::new(), 3);
list.try_extend(data).unwrap();
let list: FixedSizeListArray = list.into();

let a = list.value(0);
let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
Expand Down
Loading

0 comments on commit 76e8992

Please sign in to comment.