Skip to content

Commit 3fa5d62

Browse files
committed
Nits
1 parent 3e0aeed commit 3fa5d62

File tree

2 files changed

+28
-25
lines changed

2 files changed

+28
-25
lines changed

cpp/src/parquet/page_index.cc

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
#include "arrow/util/unreachable.h"
2626

2727
#include <limits>
28-
#include <map>
2928
#include <numeric>
3029

3130
namespace parquet {
@@ -42,7 +41,10 @@ void Decode(std::unique_ptr<typename EncodingTraits<DType>::Decoder>& decoder,
4241

4342
decoder->SetData(/*num_values=*/1, reinterpret_cast<const uint8_t*>(input.c_str()),
4443
static_cast<int>(input.size()));
45-
decoder->Decode(&output->at(output_index), /*max_values=*/1);
44+
const auto num_values = decoder->Decode(&output->at(output_index), /*max_values=*/1);
45+
if (ARROW_PREDICT_FALSE(num_values != 1)) {
46+
throw ParquetException("Could not decode statistics value");
47+
}
4648
}
4749

4850
template <>
@@ -56,7 +58,10 @@ void Decode<BooleanType>(std::unique_ptr<BooleanDecoder>& decoder,
5658
bool value;
5759
decoder->SetData(/*num_values=*/1, reinterpret_cast<const uint8_t*>(input.c_str()),
5860
static_cast<int>(input.size()));
59-
decoder->Decode(&value, /*max_values=*/1);
61+
const auto num_values = decoder->Decode(&value, /*max_values=*/1);
62+
if (ARROW_PREDICT_FALSE(num_values != 1)) {
63+
throw ParquetException("Could not decode statistics value");
64+
}
6065
output->at(output_index) = value;
6166
}
6267

@@ -72,9 +77,8 @@ void Decode<ByteArrayType>(std::unique_ptr<ByteArrayDecoder>&, const std::string
7277
throw ParquetException("Invalid encoded byte array length");
7378
}
7479

75-
auto& decoded = output->at(output_index);
76-
decoded.len = static_cast<uint32_t>(input.size());
77-
decoded.ptr = reinterpret_cast<const uint8_t*>(input.data());
80+
output->at(output_index) = {/*len=*/static_cast<uint32_t>(input.size()),
81+
/*ptr=*/reinterpret_cast<const uint8_t*>(input.data())};
7882
}
7983

8084
template <typename DType>
@@ -86,26 +90,25 @@ class TypedColumnIndexImpl : public TypedColumnIndex<DType> {
8690
const format::ColumnIndex& column_index)
8791
: column_index_(column_index) {
8892
// Make sure the number of pages is valid and it does not overflow to int32_t.
89-
if (ARROW_PREDICT_FALSE(column_index_.null_pages.size() >=
90-
static_cast<size_t>(std::numeric_limits<int32_t>::max())) ||
91-
column_index_.null_pages.size() != column_index_.min_values.size() ||
92-
column_index_.min_values.size() != column_index_.max_values.size() ||
93+
const size_t num_pages = column_index_.null_pages.size();
94+
if (num_pages >= static_cast<size_t>(std::numeric_limits<int32_t>::max()) ||
95+
column_index_.min_values.size() != num_pages ||
96+
column_index_.max_values.size() != num_pages ||
9397
(column_index_.__isset.null_counts &&
94-
column_index_.null_counts.size() != column_index_.null_pages.size())) {
98+
column_index_.null_counts.size() != num_pages)) {
9599
throw ParquetException("Invalid column index");
96100
}
97101

98-
size_t num_pages = column_index_.null_pages.size();
99-
size_t num_non_null_pages = static_cast<size_t>(std::accumulate(
102+
const size_t num_non_null_pages = static_cast<size_t>(std::accumulate(
100103
column_index_.null_pages.cbegin(), column_index_.null_pages.cend(), 0,
101104
[](int32_t num_non_null_pages, bool null_page) {
102105
return num_non_null_pages + (null_page ? 0 : 1);
103106
}));
104107
DCHECK_LE(num_non_null_pages, num_pages);
105108

106109
// Allocate slots for decoded values.
107-
min_values_.resize(num_pages);
108-
max_values_.resize(num_pages);
110+
min_values_.resize(num_non_null_pages);
111+
max_values_.resize(num_non_null_pages);
109112
non_null_page_indices_.reserve(num_non_null_pages);
110113

111114
// Decode min and max values according to the physical type.

cpp/src/parquet/page_index_test.cc

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ TEST(PageIndex, ReadDoubleColumnIndex) {
176176
null_pages, min_values, max_values, has_null_counts, null_counts);
177177
}
178178

179-
TEST(PageIndex, ByteArrayColumnIndex) {
179+
TEST(PageIndex, ReadByteArrayColumnIndex) {
180180
const int column_id = 9;
181181
const size_t num_pages = 352;
182182
const BoundaryOrder::type boundary_order = BoundaryOrder::Ascending;
@@ -214,11 +214,11 @@ TEST(PageIndex, ReadBoolColumnIndex) {
214214
null_pages, min_values, max_values, has_null_counts, null_counts);
215215
}
216216

217-
namespace {
218-
FLBA toFLBA(const char* ptr) { return FLBA{reinterpret_cast<const uint8_t*>(ptr)}; }
219-
} // namespace
220-
221217
TEST(PageIndex, FixedLengthByteArrayColumnIndex) {
218+
auto to_flba = [](const char* ptr) {
219+
return FLBA{reinterpret_cast<const uint8_t*>(ptr)};
220+
};
221+
222222
const int column_id = 0;
223223
const size_t num_pages = 10;
224224
const BoundaryOrder::type boundary_order = BoundaryOrder::Descending;
@@ -230,10 +230,10 @@ TEST(PageIndex, FixedLengthByteArrayColumnIndex) {
230230
"\x00\x00\x00\x65"};
231231
const std::vector<const char*> max_literals = {"\x00\x00\x03\xE8", "\x00\x00\x02\x58",
232232
"\x00\x00\x00\xC8"};
233-
const std::vector<FLBA> min_values = {toFLBA(min_literals[0]), toFLBA(min_literals[1]),
234-
toFLBA(min_literals[2])};
235-
const std::vector<FLBA> max_values = {toFLBA(max_literals[0]), toFLBA(max_literals[1]),
236-
toFLBA(max_literals[2])};
233+
const std::vector<FLBA> min_values = {
234+
to_flba(min_literals[0]), to_flba(min_literals[1]), to_flba(min_literals[2])};
235+
const std::vector<FLBA> max_values = {
236+
to_flba(max_literals[0]), to_flba(max_literals[1]), to_flba(max_literals[2])};
237237

238238
TestReadTypedColumnIndex<FLBAType>(
239239
"fixed_length_byte_array.parquet", column_id, num_pages, boundary_order,
@@ -253,7 +253,7 @@ TEST(PageIndex, ReadColumnIndexWithNullPage) {
253253
const std::vector<int32_t> max_values = {};
254254

255255
TestReadTypedColumnIndex<Int32Type>(
256-
"datapage_v1-corrupt-checksum.parquet", column_id, num_pages, boundary_order,
256+
"datapage_v1-uncompressed-checksum.parquet", column_id, num_pages, boundary_order,
257257
page_indices, null_pages, min_values, max_values, has_null_counts, null_counts);
258258
}
259259

0 commit comments

Comments
 (0)