Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Fixed nested
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Feb 2, 2022
1 parent fafd279 commit 97fcf3b
Show file tree
Hide file tree
Showing 10 changed files with 291 additions and 327 deletions.
7 changes: 7 additions & 0 deletions src/array/struct_/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,13 @@ impl StructArray {
let fields = Self::get_fields(&data_type);
assert!(!fields.is_empty());
assert_eq!(fields.len(), values.len());
assert!(
fields
.iter()
.map(|f| f.data_type())
.eq(values.iter().map(|a| a.data_type())),
"The fields' datatypes must equal the values datatypes"
);
assert!(values.iter().all(|x| x.len() == values[0].len()));
if let Some(ref validity) = validity {
assert_eq!(values[0].len(), validity.len());
Expand Down
14 changes: 9 additions & 5 deletions src/io/parquet/read/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@ use std::sync::Arc;

use crate::{
array::{Array, Offset},
datatypes::{DataType, Field},
datatypes::DataType,
};

use self::basic::TraitBinaryArray;
use self::nested::ArrayIterator;
use super::ArrayIter;
use super::{nested_utils::NestedArrayIter, DataPages};
use super::{
nested_utils::{InitNested, NestedArrayIter},
DataPages,
};
use basic::BinaryArrayIterator;

/// Converts [`DataPages`] to an [`Iterator`] of [`Array`]
Expand All @@ -34,7 +37,7 @@ where
/// Converts [`DataPages`] to an [`Iterator`] of [`Array`]
pub fn iter_to_arrays_nested<'a, O, A, I>(
iter: I,
field: Field,
init: InitNested,
data_type: DataType,
chunk_size: usize,
) -> NestedArrayIter<'a>
Expand All @@ -44,8 +47,9 @@ where
O: Offset,
{
Box::new(
ArrayIterator::<O, A, I>::new(iter, field, data_type, chunk_size).map(|x| {
x.map(|(nested, array)| {
ArrayIterator::<O, A, I>::new(iter, init, data_type, chunk_size).map(|x| {
x.map(|(mut nested, array)| {
let _ = nested.nested.pop().unwrap(); // the primitive
let values = Arc::new(array) as Arc<dyn Array>;
(nested, values)
})
Expand Down
10 changes: 5 additions & 5 deletions src/io/parquet/read/binary/nested.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use parquet2::{encoding::Encoding, page::DataPage, schema::Repetition};
use crate::{
array::Offset,
bitmap::MutableBitmap,
datatypes::{DataType, Field},
datatypes::DataType,
error::Result,
io::parquet::read::{utils::MaybeNext, DataPages},
};
Expand Down Expand Up @@ -99,19 +99,19 @@ impl<'a, O: Offset> utils::Decoder<'a, &'a [u8], Binary<O>> for BinaryDecoder<O>
pub struct ArrayIterator<O: Offset, A: TraitBinaryArray<O>, I: DataPages> {
iter: I,
data_type: DataType,
field: Field,
init: InitNested,
items: VecDeque<(Binary<O>, MutableBitmap)>,
nested: VecDeque<NestedState>,
chunk_size: usize,
phantom_a: std::marker::PhantomData<A>,
}

impl<O: Offset, A: TraitBinaryArray<O>, I: DataPages> ArrayIterator<O, A, I> {
pub fn new(iter: I, field: Field, data_type: DataType, chunk_size: usize) -> Self {
pub fn new(iter: I, init: InitNested, data_type: DataType, chunk_size: usize) -> Self {
Self {
iter,
data_type,
field,
init,
items: VecDeque::new(),
nested: VecDeque::new(),
chunk_size,
Expand All @@ -128,7 +128,7 @@ impl<O: Offset, A: TraitBinaryArray<O>, I: DataPages> Iterator for ArrayIterator
&mut self.iter,
&mut self.items,
&mut self.nested,
&self.field,
&self.init,
self.chunk_size,
&BinaryDecoder::<O>::default(),
);
Expand Down
17 changes: 9 additions & 8 deletions src/io/parquet/read/boolean/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@ mod nested;

use std::sync::Arc;

use crate::{
array::Array,
datatypes::{DataType, Field},
};
use crate::{array::Array, datatypes::DataType};

use self::basic::BooleanArrayIterator;
use self::nested::ArrayIterator;
use super::ArrayIter;
use super::{nested_utils::NestedArrayIter, DataPages};
use super::{
nested_utils::{InitNested, NestedArrayIter},
DataPages,
};

/// Converts [`DataPages`] to an [`Iterator`] of [`Array`]
pub fn iter_to_arrays<'a, I: 'a>(iter: I, data_type: DataType, chunk_size: usize) -> ArrayIter<'a>
Expand All @@ -27,14 +27,15 @@ where
/// Converts [`DataPages`] to an [`Iterator`] of [`Array`]
pub fn iter_to_arrays_nested<'a, I: 'a>(
iter: I,
field: Field,
init: InitNested,
chunk_size: usize,
) -> NestedArrayIter<'a>
where
I: DataPages,
{
Box::new(ArrayIterator::new(iter, field, chunk_size).map(|x| {
x.map(|(nested, array)| {
Box::new(ArrayIterator::new(iter, init, chunk_size).map(|x| {
x.map(|(mut nested, array)| {
let _ = nested.nested.pop().unwrap(); // the primitive
let values = Arc::new(array) as Arc<dyn Array>;
(nested, values)
})
Expand Down
10 changes: 5 additions & 5 deletions src/io/parquet/read/boolean/nested.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use parquet2::{encoding::Encoding, page::DataPage, schema::Repetition};
use crate::{
array::BooleanArray,
bitmap::{utils::BitmapIter, MutableBitmap},
datatypes::{DataType, Field},
datatypes::DataType,
error::Result,
};

Expand Down Expand Up @@ -117,18 +117,18 @@ impl<'a> Decoder<'a, bool, MutableBitmap> for BooleanDecoder {
#[derive(Debug)]
pub struct ArrayIterator<I: DataPages> {
iter: I,
field: Field,
init: InitNested,
// invariant: items.len() == nested.len()
items: VecDeque<(MutableBitmap, MutableBitmap)>,
nested: VecDeque<NestedState>,
chunk_size: usize,
}

impl<I: DataPages> ArrayIterator<I> {
pub fn new(iter: I, field: Field, chunk_size: usize) -> Self {
pub fn new(iter: I, init: InitNested, chunk_size: usize) -> Self {
Self {
iter,
field,
init,
items: VecDeque::new(),
nested: VecDeque::new(),
chunk_size,
Expand All @@ -148,7 +148,7 @@ impl<I: DataPages> Iterator for ArrayIterator<I> {
&mut self.iter,
&mut self.items,
&mut self.nested,
&self.field,
&self.init,
self.chunk_size,
&BooleanDecoder::default(),
);
Expand Down
Loading

0 comments on commit 97fcf3b

Please sign in to comment.