diff --git a/src/io/parquet/read/deserialize/binary/basic.rs b/src/io/parquet/read/deserialize/binary/basic.rs index 6b0b8f1bd9a..4bf3287373a 100644 --- a/src/io/parquet/read/deserialize/binary/basic.rs +++ b/src/io/parquet/read/deserialize/binary/basic.rs @@ -94,6 +94,7 @@ impl<'a> ValuesDictionary<'a> { } } +#[derive(Debug)] enum State<'a> { Optional(OptionalPageValidity<'a>, BinaryIter<'a>), Required(Required<'a>), diff --git a/src/io/parquet/read/deserialize/fixed_size_binary/basic.rs b/src/io/parquet/read/deserialize/fixed_size_binary/basic.rs index 260fafa7eb3..a013db42806 100644 --- a/src/io/parquet/read/deserialize/fixed_size_binary/basic.rs +++ b/src/io/parquet/read/deserialize/fixed_size_binary/basic.rs @@ -17,6 +17,7 @@ use super::super::utils::{ use super::super::DataPages; use super::utils::FixedSizeBinary; +#[derive(Debug)] struct Optional<'a> { values: std::slice::ChunksExact<'a, u8>, validity: OptionalPageValidity<'a>, @@ -35,6 +36,7 @@ impl<'a> Optional<'a> { } } +#[derive(Debug)] struct Required<'a> { pub values: std::slice::ChunksExact<'a, u8>, pub remaining: usize, @@ -49,6 +51,7 @@ impl<'a> Required<'a> { } } +#[derive(Debug)] struct RequiredDictionary<'a> { pub values: hybrid_rle::HybridRleDecoder<'a>, pub remaining: usize, @@ -67,6 +70,7 @@ impl<'a> RequiredDictionary<'a> { } } +#[derive(Debug)] struct OptionalDictionary<'a> { values: hybrid_rle::HybridRleDecoder<'a>, validity: OptionalPageValidity<'a>, @@ -87,6 +91,7 @@ impl<'a> OptionalDictionary<'a> { } } +#[derive(Debug)] enum State<'a> { Optional(Optional<'a>), Required(Required<'a>), diff --git a/src/io/parquet/read/deserialize/mod.rs b/src/io/parquet/read/deserialize/mod.rs index bb688a12f89..6e332b4e8e9 100644 --- a/src/io/parquet/read/deserialize/mod.rs +++ b/src/io/parquet/read/deserialize/mod.rs @@ -11,7 +11,7 @@ mod struct_; mod utils; use crate::{ - array::{Array, BinaryArray, ListArray, Utf8Array}, + array::{Array, BinaryArray, FixedSizeListArray, ListArray, Utf8Array}, datatypes::{DataType, Field}, error::{ArrowError, Result}, }; @@ -65,6 +65,15 @@ fn create_list( validity.and_then(|x| x.into()), )) } + DataType::FixedSizeList(_, _) => { + let (_, validity) = nested.nested.pop().unwrap().inner(); + + Arc::new(FixedSizeListArray::new( + data_type, + values, + validity.and_then(|x| x.into()), + )) + } _ => { return Err(ArrowError::NotYetImplemented(format!( "Read nested datatype {:?}", @@ -102,6 +111,16 @@ where types.pop(); boolean::iter_to_arrays_nested(columns.pop().unwrap(), init.pop().unwrap(), chunk_size) } + Int8 => { + types.pop(); + primitive::iter_to_arrays_nested( + columns.pop().unwrap(), + init.pop().unwrap(), + field.data_type().clone(), + chunk_size, + |x: i32| x as i8, + ) + } Int16 => { types.pop(); primitive::iter_to_arrays_nested( @@ -112,6 +131,16 @@ where |x: i32| x as i16, ) } + Int32 => { + types.pop(); + primitive::iter_to_arrays_nested( + columns.pop().unwrap(), + init.pop().unwrap(), + field.data_type().clone(), + chunk_size, + |x: i32| x, + ) + } Int64 => { types.pop(); primitive::iter_to_arrays_nested( @@ -192,7 +221,24 @@ where let columns = columns.into_iter().rev().collect(); Box::new(struct_::StructIterator::new(columns, fields.clone())) } - _ => todo!(), + FixedSizeList(inner, _) => { + let iter = columns_to_iter_recursive( + vec![columns.pop().unwrap()], + types, + inner.as_ref().clone(), + init, + chunk_size, + )?; + let iter = iter.map(move |x| { + let (mut nested, array) = x?; + println!("{nested:?}"); + println!("{array:?}"); + let array = create_list(field.data_type().clone(), &mut nested, array)?; + Ok((nested, array)) + }); + Box::new(iter) as _ + } + other => todo!("{other:?}"), }) } diff --git a/src/io/parquet/read/deserialize/utils.rs b/src/io/parquet/read/deserialize/utils.rs index f9e9cfe20be..40afdbdbdac 100644 --- a/src/io/parquet/read/deserialize/utils.rs +++ b/src/io/parquet/read/deserialize/utils.rs @@ -222,7 +222,7 @@ pub(super) fn extend_from_decoder<'a, T: Default, P: Pushable, I: Iterator { +pub(super) trait PageState<'a>: std::fmt::Debug { fn len(&self) -> usize; } diff --git a/src/io/parquet/write/mod.rs b/src/io/parquet/write/mod.rs index 7c31f27fc52..f7c9620c1b2 100644 --- a/src/io/parquet/write/mod.rs +++ b/src/io/parquet/write/mod.rs @@ -421,7 +421,7 @@ fn nested_array_to_page( } DataType::FixedSizeList(_, size) => { let array = array.as_any().downcast_ref::().unwrap(); - let offsets = (0..array.len()) + let offsets = (0..=array.len()) .map(|x| (*size * x) as i32) .collect::>(); list_array_to_page(