Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Fixed error in writing fixedSizeListArray to parquet #941

Merged
merged 1 commit into from
Apr 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/io/parquet/read/deserialize/binary/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ impl<'a> ValuesDictionary<'a> {
}
}

#[derive(Debug)]
enum State<'a> {
Optional(OptionalPageValidity<'a>, BinaryIter<'a>),
Required(Required<'a>),
Expand Down
5 changes: 5 additions & 0 deletions src/io/parquet/read/deserialize/fixed_size_binary/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use super::super::utils::{
use super::super::DataPages;
use super::utils::FixedSizeBinary;

#[derive(Debug)]
struct Optional<'a> {
values: std::slice::ChunksExact<'a, u8>,
validity: OptionalPageValidity<'a>,
Expand All @@ -35,6 +36,7 @@ impl<'a> Optional<'a> {
}
}

#[derive(Debug)]
struct Required<'a> {
pub values: std::slice::ChunksExact<'a, u8>,
pub remaining: usize,
Expand All @@ -49,6 +51,7 @@ impl<'a> Required<'a> {
}
}

#[derive(Debug)]
struct RequiredDictionary<'a> {
pub values: hybrid_rle::HybridRleDecoder<'a>,
pub remaining: usize,
Expand All @@ -67,6 +70,7 @@ impl<'a> RequiredDictionary<'a> {
}
}

#[derive(Debug)]
struct OptionalDictionary<'a> {
values: hybrid_rle::HybridRleDecoder<'a>,
validity: OptionalPageValidity<'a>,
Expand All @@ -87,6 +91,7 @@ impl<'a> OptionalDictionary<'a> {
}
}

#[derive(Debug)]
enum State<'a> {
Optional(Optional<'a>),
Required(Required<'a>),
Expand Down
50 changes: 48 additions & 2 deletions src/io/parquet/read/deserialize/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ mod struct_;
mod utils;

use crate::{
array::{Array, BinaryArray, ListArray, Utf8Array},
array::{Array, BinaryArray, FixedSizeListArray, ListArray, Utf8Array},
datatypes::{DataType, Field},
error::{ArrowError, Result},
};
Expand Down Expand Up @@ -65,6 +65,15 @@ fn create_list(
validity.and_then(|x| x.into()),
))
}
DataType::FixedSizeList(_, _) => {
let (_, validity) = nested.nested.pop().unwrap().inner();

Arc::new(FixedSizeListArray::new(
data_type,
values,
validity.and_then(|x| x.into()),
))
}
_ => {
return Err(ArrowError::NotYetImplemented(format!(
"Read nested datatype {:?}",
Expand Down Expand Up @@ -102,6 +111,16 @@ where
types.pop();
boolean::iter_to_arrays_nested(columns.pop().unwrap(), init.pop().unwrap(), chunk_size)
}
Int8 => {
types.pop();
primitive::iter_to_arrays_nested(
columns.pop().unwrap(),
init.pop().unwrap(),
field.data_type().clone(),
chunk_size,
|x: i32| x as i8,
)
}
Int16 => {
types.pop();
primitive::iter_to_arrays_nested(
Expand All @@ -112,6 +131,16 @@ where
|x: i32| x as i16,
)
}
Int32 => {
types.pop();
primitive::iter_to_arrays_nested(
columns.pop().unwrap(),
init.pop().unwrap(),
field.data_type().clone(),
chunk_size,
|x: i32| x,
)
}
Int64 => {
types.pop();
primitive::iter_to_arrays_nested(
Expand Down Expand Up @@ -192,7 +221,24 @@ where
let columns = columns.into_iter().rev().collect();
Box::new(struct_::StructIterator::new(columns, fields.clone()))
}
_ => todo!(),
FixedSizeList(inner, _) => {
let iter = columns_to_iter_recursive(
vec![columns.pop().unwrap()],
types,
inner.as_ref().clone(),
init,
chunk_size,
)?;
let iter = iter.map(move |x| {
let (mut nested, array) = x?;
println!("{nested:?}");
println!("{array:?}");
let array = create_list(field.data_type().clone(), &mut nested, array)?;
Ok((nested, array))
});
Box::new(iter) as _
}
other => todo!("{other:?}"),
})
}

Expand Down
2 changes: 1 addition & 1 deletion src/io/parquet/read/deserialize/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ pub(super) fn extend_from_decoder<'a, T: Default, P: Pushable<T>, I: Iterator<It
}

/// The state of a partially deserialized page
pub(super) trait PageState<'a> {
pub(super) trait PageState<'a>: std::fmt::Debug {
fn len(&self) -> usize;
}

Expand Down
2 changes: 1 addition & 1 deletion src/io/parquet/write/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ fn nested_array_to_page(
}
DataType::FixedSizeList(_, size) => {
let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
let offsets = (0..array.len())
let offsets = (0..=array.len())
.map(|x| (*size * x) as i32)
.collect::<Vec<_>>();
list_array_to_page(
Expand Down