2525#include " arrow/util/unreachable.h"
2626
2727#include < limits>
28- #include < map>
2928#include < numeric>
3029
3130namespace parquet {
@@ -42,7 +41,10 @@ void Decode(std::unique_ptr<typename EncodingTraits<DType>::Decoder>& decoder,
4241
4342 decoder->SetData (/* num_values=*/ 1 , reinterpret_cast <const uint8_t *>(input.c_str ()),
4443 static_cast <int >(input.size ()));
45- decoder->Decode (&output->at (output_index), /* max_values=*/ 1 );
44+ const auto num_values = decoder->Decode (&output->at (output_index), /* max_values=*/ 1 );
45+ if (ARROW_PREDICT_FALSE (num_values != 1 )) {
46+ throw ParquetException (" Could not decode statistics value" );
47+ }
4648}
4749
4850template <>
@@ -56,7 +58,10 @@ void Decode<BooleanType>(std::unique_ptr<BooleanDecoder>& decoder,
5658 bool value;
5759 decoder->SetData (/* num_values=*/ 1 , reinterpret_cast <const uint8_t *>(input.c_str ()),
5860 static_cast <int >(input.size ()));
59- decoder->Decode (&value, /* max_values=*/ 1 );
61+ const auto num_values = decoder->Decode (&value, /* max_values=*/ 1 );
62+ if (ARROW_PREDICT_FALSE (num_values != 1 )) {
63+ throw ParquetException (" Could not decode statistics value" );
64+ }
6065 output->at (output_index) = value;
6166}
6267
@@ -72,9 +77,8 @@ void Decode<ByteArrayType>(std::unique_ptr<ByteArrayDecoder>&, const std::string
7277 throw ParquetException (" Invalid encoded byte array length" );
7378 }
7479
75- auto & decoded = output->at (output_index);
76- decoded.len = static_cast <uint32_t >(input.size ());
77- decoded.ptr = reinterpret_cast <const uint8_t *>(input.data ());
80+ output->at (output_index) = {/* len=*/ static_cast <uint32_t >(input.size ()),
81+ /* ptr=*/ reinterpret_cast <const uint8_t *>(input.data ())};
7882}
7983
8084template <typename DType>
@@ -86,26 +90,25 @@ class TypedColumnIndexImpl : public TypedColumnIndex<DType> {
8690 const format::ColumnIndex& column_index)
8791 : column_index_(column_index) {
8892 // Make sure the number of pages is valid and it does not overflow to int32_t.
89- if ( ARROW_PREDICT_FALSE ( column_index_.null_pages .size () >=
90- static_cast <size_t >(std::numeric_limits<int32_t >::max () )) ||
91- column_index_.null_pages .size () != column_index_. min_values . size () ||
92- column_index_.min_values .size () != column_index_. max_values . size () ||
93+ const size_t num_pages = column_index_.null_pages .size ();
94+ if (num_pages >= static_cast <size_t >(std::numeric_limits<int32_t >::max ()) ||
95+ column_index_.min_values .size () != num_pages ||
96+ column_index_.max_values .size () != num_pages ||
9397 (column_index_.__isset .null_counts &&
94- column_index_.null_counts .size () != column_index_. null_pages . size () )) {
98+ column_index_.null_counts .size () != num_pages )) {
9599 throw ParquetException (" Invalid column index" );
96100 }
97101
98- size_t num_pages = column_index_.null_pages .size ();
99- size_t num_non_null_pages = static_cast <size_t >(std::accumulate (
102+ const size_t num_non_null_pages = static_cast <size_t >(std::accumulate (
100103 column_index_.null_pages .cbegin (), column_index_.null_pages .cend (), 0 ,
101104 [](int32_t num_non_null_pages, bool null_page) {
102105 return num_non_null_pages + (null_page ? 0 : 1 );
103106 }));
104107 DCHECK_LE (num_non_null_pages, num_pages);
105108
106109 // Allocate slots for decoded values.
107- min_values_.resize (num_pages );
108- max_values_.resize (num_pages );
110+ min_values_.resize (num_non_null_pages );
111+ max_values_.resize (num_non_null_pages );
109112 non_null_page_indices_.reserve (num_non_null_pages);
110113
111114 // Decode min and max values according to the physical type.
0 commit comments