From f99c35146394baa4e6eec0f20b8f1d155aca8595 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 5 Aug 2021 08:58:23 -0400 Subject: [PATCH] Write FixedLenByteArray stats for FixedLenByteArray columns (not ByteArray stats) --- parquet/src/column/writer.rs | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/parquet/src/column/writer.rs b/parquet/src/column/writer.rs index 3cb17e17f7f6..dd28c84c73d5 100644 --- a/parquet/src/column/writer.rs +++ b/parquet/src/column/writer.rs @@ -924,11 +924,28 @@ impl ColumnWriterImpl { Type::INT96 => gen_stats_section!(Int96, int96, min, max, distinct, nulls), Type::FLOAT => gen_stats_section!(f32, float, min, max, distinct, nulls), Type::DOUBLE => gen_stats_section!(f64, double, min, max, distinct, nulls), - Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => { + Type::BYTE_ARRAY => { let min = min.as_ref().map(|v| ByteArray::from(v.as_bytes().to_vec())); let max = max.as_ref().map(|v| ByteArray::from(v.as_bytes().to_vec())); Statistics::byte_array(min, max, distinct, nulls, false) } + Type::FIXED_LEN_BYTE_ARRAY => { + let min = min + .as_ref() + .map(|v| ByteArray::from(v.as_bytes().to_vec())) + .map(|ba| { + let ba: FixedLenByteArray = ba.into(); + ba + }); + let max = max + .as_ref() + .map(|v| ByteArray::from(v.as_bytes().to_vec())) + .map(|ba| { + let ba: FixedLenByteArray = ba.into(); + ba + }); + Statistics::fixed_len_byte_array(min, max, distinct, nulls, false) + } } } @@ -1799,13 +1816,13 @@ mod tests { let stats = statistics_roundtrip::(&input); assert!(stats.has_min_max_set()); - // should it be FixedLenByteArray? - // https://github.com/apache/arrow-rs/issues/660 - if let Statistics::ByteArray(stats) = stats { - assert_eq!(stats.min(), &ByteArray::from("aaw ")); - assert_eq!(stats.max(), &ByteArray::from("zz ")); + if let Statistics::FixedLenByteArray(stats) = stats { + let expected_min: FixedLenByteArray = ByteArray::from("aaw ").into(); + assert_eq!(stats.min(), &expected_min); + let expected_max: FixedLenByteArray = ByteArray::from("zz ").into(); + assert_eq!(stats.max(), &expected_max); } else { - panic!("expecting Statistics::ByteArray, got {:?}", stats); + panic!("expecting Statistics::FixedLenByteArray, got {:?}", stats); } }