From fa5acd971c973161f17e69d5c6b50d6e77c7da03 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 9 Aug 2021 20:58:03 -0400 Subject: [PATCH] Write FixedLenByteArray stats for FixedLenByteArray columns (not ByteArray stats) (#662) --- parquet/src/column/writer.rs | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/parquet/src/column/writer.rs b/parquet/src/column/writer.rs index af76c84c6a03..0da943918104 100644 --- a/parquet/src/column/writer.rs +++ b/parquet/src/column/writer.rs @@ -924,11 +924,28 @@ impl ColumnWriterImpl { Type::INT96 => gen_stats_section!(Int96, int96, min, max, distinct, nulls), Type::FLOAT => gen_stats_section!(f32, float, min, max, distinct, nulls), Type::DOUBLE => gen_stats_section!(f64, double, min, max, distinct, nulls), - Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => { + Type::BYTE_ARRAY => { let min = min.as_ref().map(|v| ByteArray::from(v.as_bytes().to_vec())); let max = max.as_ref().map(|v| ByteArray::from(v.as_bytes().to_vec())); Statistics::byte_array(min, max, distinct, nulls, false) } + Type::FIXED_LEN_BYTE_ARRAY => { + let min = min + .as_ref() + .map(|v| ByteArray::from(v.as_bytes().to_vec())) + .map(|ba| { + let ba: FixedLenByteArray = ba.into(); + ba + }); + let max = max + .as_ref() + .map(|v| ByteArray::from(v.as_bytes().to_vec())) + .map(|ba| { + let ba: FixedLenByteArray = ba.into(); + ba + }); + Statistics::fixed_len_byte_array(min, max, distinct, nulls, false) + } } } @@ -1797,13 +1814,13 @@ mod tests { let stats = statistics_roundtrip::(&input); assert!(stats.has_min_max_set()); - // should it be FixedLenByteArray? - // https://github.com/apache/arrow-rs/issues/660 - if let Statistics::ByteArray(stats) = stats { - assert_eq!(stats.min(), &ByteArray::from("aaw ")); - assert_eq!(stats.max(), &ByteArray::from("zz ")); + if let Statistics::FixedLenByteArray(stats) = stats { + let expected_min: FixedLenByteArray = ByteArray::from("aaw ").into(); + assert_eq!(stats.min(), &expected_min); + let expected_max: FixedLenByteArray = ByteArray::from("zz ").into(); + assert_eq!(stats.max(), &expected_max); } else { - panic!("expecting Statistics::ByteArray, got {:?}", stats); + panic!("expecting Statistics::FixedLenByteArray, got {:?}", stats); } }