diff --git a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs index d0ecb86f9e081..8d17354839a8a 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs @@ -75,6 +75,12 @@ macro_rules! get_statistic { *scale, )) } + Some(DataType::Int8) => { + Some(ScalarValue::Int8(Some((*s.$func()).try_into().unwrap()))) + } + Some(DataType::Int16) => { + Some(ScalarValue::Int16(Some((*s.$func()).try_into().unwrap()))) + } Some(DataType::Date32) => { Some(ScalarValue::Date32(Some(*s.$func()))) } @@ -373,8 +379,8 @@ mod test { use arrow::datatypes::{Date32Type, Date64Type}; use arrow_array::{ new_null_array, Array, BinaryArray, BooleanArray, Date32Array, Date64Array, - Decimal128Array, Float32Array, Float64Array, Int32Array, Int64Array, RecordBatch, - StringArray, StructArray, TimestampNanosecondArray, + Decimal128Array, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, + Int8Array, RecordBatch, StringArray, StructArray, TimestampNanosecondArray, }; use arrow_schema::{Field, SchemaRef}; use bytes::Bytes; @@ -856,13 +862,13 @@ mod test { }) .with_column(ExpectedColumn { name: "tinyint_col", - expected_min: i32_array([Some(0)]), - expected_max: i32_array([Some(9)]), + expected_min: i8_array([Some(0)]), + expected_max: i8_array([Some(9)]), }) .with_column(ExpectedColumn { name: "smallint_col", - expected_min: i32_array([Some(0)]), - expected_max: i32_array([Some(9)]), + expected_min: i16_array([Some(0)]), + expected_max: i16_array([Some(9)]), }) .with_column(ExpectedColumn { name: "int_col", @@ -1088,6 +1094,16 @@ mod test { Arc::new(array) } + fn i8_array(input: impl IntoIterator>) -> ArrayRef { + let array: Int8Array = input.into_iter().collect(); + Arc::new(array) + } + + fn i16_array(input: impl IntoIterator>) -> ArrayRef { + let array: Int16Array = input.into_iter().collect(); + Arc::new(array) + } + fn i32_array(input: impl IntoIterator>) -> ArrayRef { let array: Int32Array = input.into_iter().collect(); Arc::new(array) diff --git a/datafusion/core/tests/parquet/arrow_statistics.rs b/datafusion/core/tests/parquet/arrow_statistics.rs index a08487d3a9ca5..db687a3777a4f 100644 --- a/datafusion/core/tests/parquet/arrow_statistics.rs +++ b/datafusion/core/tests/parquet/arrow_statistics.rs @@ -385,7 +385,6 @@ async fn test_int_32() { // Note that the file has 4 columns named "i8", "i16", "i32", "i64". // - The tests on column i32 and i64 passed. // - The tests on column i8 and i16 failed. -#[ignore] #[tokio::test] async fn test_int_16() { // This creates a parquet files of 4 columns named "i8", "i16", "i32", "i64" @@ -421,7 +420,6 @@ async fn test_int_16() { // BUG (same as above): ignore this test for now // https://github.com/apache/datafusion/issues/10585 -#[ignore] #[tokio::test] async fn test_int_8() { // This creates a parquet files of 4 columns named "i8", "i16", "i32", "i64"