diff --git a/parquet/src/arrow/array_reader.rs b/parquet/src/arrow/array_reader.rs index 752ca4c63052..c3e170abf9e4 100644 --- a/parquet/src/arrow/array_reader.rs +++ b/parquet/src/arrow/array_reader.rs @@ -67,6 +67,7 @@ use crate::arrow::schema::parquet_to_arrow_field; use crate::basic::{ConvertedType, Repetition, Type as PhysicalType}; use crate::column::page::PageIterator; use crate::column::reader::ColumnReaderImpl; +use crate::data_type::private::ScalarDataType; use crate::data_type::{ BoolType, ByteArrayType, DataType, DoubleType, FixedLenByteArrayType, FloatType, Int32Type, Int64Type, Int96Type, @@ -104,7 +105,7 @@ pub trait ArrayReader { /// /// Returns the number of records read, which can be less than batch_size if /// pages is exhausted. -fn read_records( +fn read_records( record_reader: &mut RecordReader, pages: &mut dyn PageIterator, batch_size: usize, @@ -132,7 +133,7 @@ fn read_records( /// A NullArrayReader reads Parquet columns stored as null int32s with an Arrow /// NullArray type. -pub struct NullArrayReader { +pub struct NullArrayReader { data_type: ArrowType, pages: Box, def_levels_buffer: Option, @@ -142,7 +143,7 @@ pub struct NullArrayReader { _type_marker: PhantomData, } -impl NullArrayReader { +impl NullArrayReader { /// Construct null array reader. pub fn new(pages: Box, column_desc: ColumnDescPtr) -> Result { let record_reader = RecordReader::::new(column_desc.clone()); @@ -160,7 +161,7 @@ impl NullArrayReader { } /// Implementation of primitive array reader. -impl ArrayReader for NullArrayReader { +impl ArrayReader for NullArrayReader { fn as_any(&self) -> &dyn Any { self } @@ -200,7 +201,7 @@ impl ArrayReader for NullArrayReader { /// Primitive array readers are leaves of array reader tree. They accept page iterator /// and read them into primitive arrays. -pub struct PrimitiveArrayReader { +pub struct PrimitiveArrayReader { data_type: ArrowType, pages: Box, def_levels_buffer: Option, @@ -210,7 +211,7 @@ pub struct PrimitiveArrayReader { _type_marker: PhantomData, } -impl PrimitiveArrayReader { +impl PrimitiveArrayReader { /// Construct primitive array reader. pub fn new( pages: Box, @@ -240,7 +241,7 @@ impl PrimitiveArrayReader { } /// Implementation of primitive array reader. -impl ArrayReader for PrimitiveArrayReader { +impl ArrayReader for PrimitiveArrayReader { fn as_any(&self) -> &dyn Any { self } @@ -288,7 +289,7 @@ impl ArrayReader for PrimitiveArrayReader { } }; - // Convert to arrays by using the Parquet phyisical type. + // Convert to arrays by using the Parquet physical type. // The physical types are then cast to Arrow types if necessary let mut record_data = self.record_reader.consume_record_data()?; diff --git a/parquet/src/arrow/record_reader.rs b/parquet/src/arrow/record_reader.rs index a5c0b47efcd2..53db620dbcb6 100644 --- a/parquet/src/arrow/record_reader.rs +++ b/parquet/src/arrow/record_reader.rs @@ -19,7 +19,7 @@ use std::cmp::{max, min}; use std::mem::{replace, size_of}; use crate::column::{page::PageReader, reader::ColumnReaderImpl}; -use crate::data_type::DataType; +use crate::data_type::private::ScalarDataType; use crate::errors::{ParquetError, Result}; use crate::schema::types::ColumnDescPtr; use arrow::array::BooleanBufferBuilder; @@ -29,7 +29,7 @@ use arrow::buffer::{Buffer, MutableBuffer}; const MIN_BATCH_SIZE: usize = 1024; /// A `RecordReader` is a stateful column reader that delimits semantic records. -pub struct RecordReader { +pub struct RecordReader { column_desc: ColumnDescPtr, records: MutableBuffer, @@ -47,7 +47,7 @@ pub struct RecordReader { values_written: usize, } -impl RecordReader { +impl RecordReader { pub fn new(column_schema: ColumnDescPtr) -> Self { let (def_levels, null_map) = if column_schema.max_def_level() > 0 { ( diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs index 6f3468af8381..73a010aa572d 100644 --- a/parquet/src/data_type.rs +++ b/parquet/src/data_type.rs @@ -572,6 +572,7 @@ impl AsBytes for str { } pub(crate) mod private { + use super::*; use crate::encodings::decoding::PlainDecoderDetails; use crate::util::bit_util::{round_upto_power_of_2, BitReader, BitWriter}; use crate::util::memory::ByteBufferPtr; @@ -1032,6 +1033,21 @@ pub(crate) mod private { self } } + + /// A marker trait for [`DataType`] with a [scalar] physical type + /// + /// This means that a `[Self::T::default()]` of length `len` can be safely created from a + /// zero-initialized `[u8]` with length `len * Self::get_type_size()` and + /// alignment of `Self::get_type_size()` + /// + /// [scalar]: https://doc.rust-lang.org/book/ch03-02-data-types.html#scalar-types + /// + pub trait ScalarDataType: DataType {} + impl ScalarDataType for BoolType {} + impl ScalarDataType for Int32Type {} + impl ScalarDataType for Int64Type {} + impl ScalarDataType for FloatType {} + impl ScalarDataType for DoubleType {} } /// Contains the Parquet physical type information as well as the Rust primitive type