Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Added iterator for StructArray #613

Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions src/array/struct_/ffi.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
use std::sync::Arc;

use super::super::{ffi::ToFfi, Array, FromFfi};
use super::StructArray;
use crate::{error::Result, ffi};

unsafe impl ToFfi for StructArray {
fn buffers(&self) -> Vec<Option<std::ptr::NonNull<u8>>> {
vec![self.validity.as_ref().map(|x| x.as_ptr())]
}

fn children(&self) -> Vec<Arc<dyn Array>> {
self.values.clone()
}

fn offset(&self) -> Option<usize> {
Some(
self.validity
.as_ref()
.map(|bitmap| bitmap.offset())
.unwrap_or_default(),
)
}

fn to_ffi_aligned(&self) -> Self {
self.clone()
}
}

impl<A: ffi::ArrowArrayRef> FromFfi<A> for StructArray {
unsafe fn try_from_ffi(array: A) -> Result<Self> {
let data_type = array.field().data_type().clone();
let fields = Self::get_fields(&data_type);

let validity = unsafe { array.validity() }?;
let values = (0..fields.len())
.map(|index| {
let child = array.child(index)?;
Ok(ffi::try_from(child)?.into())
})
.collect::<Result<Vec<Arc<dyn Array>>>>()?;

Ok(Self::from_data(data_type, values, validity))
}
}
100 changes: 100 additions & 0 deletions src/array/struct_/iterator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
use crate::{
bitmap::utils::{zip_validity, ZipValidity},
scalar::{new_scalar, Scalar},
trusted_len::TrustedLen,
};

use super::StructArray;

pub struct StructValueIter<'a> {
array: &'a StructArray,
index: usize,
end: usize,
}

impl<'a> StructValueIter<'a> {
#[inline]
pub fn new(array: &'a StructArray) -> Self {
Self {
array,
index: 0,
end: array.len(),
}
}
}

impl<'a> Iterator for StructValueIter<'a> {
type Item = Vec<Box<dyn Scalar>>;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.index == self.end {
return None;
}
let old = self.index;
self.index += 1;

let mut item = vec![];
for i in 0..self.array.fields().len() {
let arr = self.array.value(i);
item.push(new_scalar(arr.as_ref(), old))
}
// Safety:
// self.end is maximized by the length of the array
Some(item)
jorgecarleitao marked this conversation as resolved.
Show resolved Hide resolved
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(self.end - self.index, Some(self.end - self.index))
}
}

unsafe impl<'a> TrustedLen for StructValueIter<'a> {}

impl<'a> DoubleEndedIterator for StructValueIter<'a> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
if self.index == self.end {
None
} else {
self.end -= 1;

let mut item = vec![];
for i in 0..self.array.fields().len() {
jorgecarleitao marked this conversation as resolved.
Show resolved Hide resolved
let arr = self.array.value(i);
item.push(new_scalar(arr.as_ref(), self.end))
}
// Safety:
// self.end is maximized by the length of the array
Some(item)
}
}
}

type ValuesIter<'a> = StructValueIter<'a>;
type ZipIter<'a> = ZipValidity<'a, Vec<Box<dyn Scalar>>, ValuesIter<'a>>;

impl<'a> IntoIterator for &'a StructArray {
type Item = Option<Vec<Box<dyn Scalar>>>;
type IntoIter = ZipIter<'a>;

fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}

impl<'a> StructArray {
/// Returns an iterator of `Option<Box<dyn Array>>`
pub fn iter(&'a self) -> ZipIter<'a> {
zip_validity(
StructValueIter::new(self),
self.validity.as_ref().map(|x| x.iter()),
)
}

/// Returns an iterator of `Box<dyn Array>`
pub fn values_iter(&'a self) -> ValuesIter<'a> {
StructValueIter::new(self)
}
}
54 changes: 10 additions & 44 deletions src/array/struct_.rs → src/array/struct_/mod.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
use std::sync::Arc;
use std::{ops::Index, sync::Arc};

use crate::{
bitmap::Bitmap,
datatypes::{DataType, Field},
error::Result,
ffi,
};

use super::{ffi::ToFfi, new_empty_array, new_null_array, Array, FromFfi};
use super::{new_empty_array, new_null_array, Array};

mod ffi;
mod iterator;

/// A [`StructArray`] is a nested [`Array`] with an optional validity representing
/// multiple [`Array`] with the same number of rows.
Expand Down Expand Up @@ -168,6 +169,11 @@ impl StructArray {
pub fn fields(&self) -> &[Field] {
Self::get_fields(&self.data_type)
}

/// Returns the element at index `i`
pub fn value(&self, index: usize) -> &Arc<dyn Array> {
self.values().index(index)
}
}

impl StructArray {
Expand Down Expand Up @@ -222,43 +228,3 @@ impl std::fmt::Display for StructArray {
write!(f, "}}")
}
}

unsafe impl ToFfi for StructArray {
fn buffers(&self) -> Vec<Option<std::ptr::NonNull<u8>>> {
vec![self.validity.as_ref().map(|x| x.as_ptr())]
}

fn children(&self) -> Vec<Arc<dyn Array>> {
self.values.clone()
}

fn offset(&self) -> Option<usize> {
Some(
self.validity
.as_ref()
.map(|bitmap| bitmap.offset())
.unwrap_or_default(),
)
}

fn to_ffi_aligned(&self) -> Self {
self.clone()
}
}

impl<A: ffi::ArrowArrayRef> FromFfi<A> for StructArray {
unsafe fn try_from_ffi(array: A) -> Result<Self> {
let data_type = array.field().data_type().clone();
let fields = Self::get_fields(&data_type);

let validity = unsafe { array.validity() }?;
let values = (0..fields.len())
.map(|index| {
let child = array.child(index)?;
Ok(ffi::try_from(child)?.into())
})
.collect::<Result<Vec<Arc<dyn Array>>>>()?;

Ok(Self::from_data(data_type, values, validity))
}
}
1 change: 1 addition & 0 deletions tests/it/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ mod growable;
mod list;
mod ord;
mod primitive;
mod struct_;
mod union;
mod utf8;

Expand Down
29 changes: 29 additions & 0 deletions tests/it/array/struct_/iterator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
use arrow2::array::*;
use arrow2::datatypes::*;
use arrow2::scalar::new_scalar;

#[test]
fn test_simple_iter() {
use std::sync::Arc;
let boolean = Arc::new(BooleanArray::from_slice(&[false, false, true, true])) as Arc<dyn Array>;
let int = Arc::new(Int32Array::from_slice(&[42, 28, 19, 31])) as Arc<dyn Array>;

let fields = vec![
Field::new("b", DataType::Boolean, false),
Field::new("c", DataType::Int32, false),
];

let array = StructArray::from_data(
DataType::Struct(fields),
vec![boolean.clone(), int.clone()],
None,
);

for (i, item) in array.iter().enumerate() {
let expected = Some(vec![
new_scalar(boolean.as_ref(), i),
new_scalar(int.as_ref(), i),
]);
assert_eq!(expected, item);
}
}
1 change: 1 addition & 0 deletions tests/it/array/struct_/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
mod iterator;