Skip to content

Commit

Permalink
address feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
emkornfield committed Jan 22, 2024
1 parent e7fb451 commit df8ca6c
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 8 deletions.
14 changes: 10 additions & 4 deletions cpp/src/parquet/column_writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,9 @@ class SerializedPageWriter : public PageWriter {
int64_t WriteDictionaryPage(const DictionaryPage& page) override {
int64_t uncompressed_size = page.buffer()->size();
if (uncompressed_size > std::numeric_limits<int32_t>::max()) {
throw ParquetException("Uncompressed page size overflows to INT32_MAX.");
throw ParquetException(
"Uncompressed dictionary page size overflows to INT32_MAX. Size:",
uncompressed_size);
}
std::shared_ptr<Buffer> compressed_data;
if (has_compressor()) {
Expand All @@ -292,7 +294,9 @@ class SerializedPageWriter : public PageWriter {

const uint8_t* output_data_buffer = compressed_data->data();
if (compressed_data->size() > std::numeric_limits<int32_t>::max()) {
throw ParquetException("Compressed page size overflows to INT32_MAX.");
throw ParquetException(
"Compressed dictionary page size overflows to INT32_MAX. Size: ",
uncompressed_size);
}
int32_t output_data_len = static_cast<int32_t>(compressed_data->size());

Expand Down Expand Up @@ -380,7 +384,8 @@ class SerializedPageWriter : public PageWriter {
int64_t output_data_len = compressed_data->size();

if (output_data_len > std::numeric_limits<int32_t>::max()) {
throw ParquetException("Compressed page size overflows to INT32_MAX.");
throw ParquetException("Compressed data page size overflows to INT32_MAX. Size:",
output_data_len);
}

if (data_encryptor_.get()) {
Expand All @@ -396,7 +401,8 @@ class SerializedPageWriter : public PageWriter {
format::PageHeader page_header;

if (uncompressed_size > std::numeric_limits<int32_t>::max()) {
throw ParquetException("Uncompressed page size overflows to INT32_MAX.");
throw ParquetException("Uncompressed data page size overflows to INT32_MAX. Size:",
uncompressed_size);
}
page_header.__set_uncompressed_page_size(static_cast<int32_t>(uncompressed_size));
page_header.__set_compressed_page_size(static_cast<int32_t>(output_data_len));
Expand Down
13 changes: 9 additions & 4 deletions cpp/src/parquet/column_writer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <utility>
#include <vector>

#include <gmock/gmock.h>
#include <gtest/gtest.h>

#include "arrow/io/buffered.h"
Expand Down Expand Up @@ -481,6 +482,9 @@ using TestValuesWriterInt64Type = TestPrimitiveWriter<Int64Type>;
using TestByteArrayValuesWriter = TestPrimitiveWriter<ByteArrayType>;
using TestFixedLengthByteArrayValuesWriter = TestPrimitiveWriter<FLBAType>;

using ::testing::HasSubstr;
using ::testing::ThrowsMessage;

TYPED_TEST(TestPrimitiveWriter, RequiredPlain) {
this->TestRequiredWithEncoding(Encoding::PLAIN);
}
Expand Down Expand Up @@ -906,16 +910,16 @@ TEST(TestPageWriter, ThrowsOnPagesTooLarge) {

auto metadata = ColumnChunkMetaDataBuilder::Make(props, schema.Column(0));
std::unique_ptr<PageWriter> pager =
PageWriter::Open(sink, Compression::UNCOMPRESSED,
Codec::UseDefaultCompressionLevel(), metadata.get());
PageWriter::Open(sink, Compression::UNCOMPRESSED, metadata.get());

uint8_t data;
std::shared_ptr<Buffer> buffer =
std::make_shared<Buffer>(&data, std::numeric_limits<int32_t>::max() + int64_t{1});
DataPageV1 over_compressed_limit(buffer, /*num_values=*/100, Encoding::BIT_PACKED,
Encoding::BIT_PACKED, Encoding::BIT_PACKED,
/*uncompressed_size=*/100);
EXPECT_THROW(pager->WriteDataPage(over_compressed_limit), ParquetException);
EXPECT_THAT([&]() { pager->WriteDataPage(over_compressed_limit); },
ThrowsMessage<ParquetException>(HasSubstr("overflows to INT32_MAX")));
DictionaryPage dictionary_over_compressed_limit(buffer, /*num_values=*/100,
Encoding::PLAIN);
EXPECT_THROW(pager->WriteDictionaryPage(dictionary_over_compressed_limit),
Expand All @@ -926,7 +930,8 @@ TEST(TestPageWriter, ThrowsOnPagesTooLarge) {
buffer, /*num_values=*/100, Encoding::BIT_PACKED, Encoding::BIT_PACKED,
Encoding::BIT_PACKED,
/*uncompressed_size=*/std::numeric_limits<int32_t>::max() + int64_t{1});
EXPECT_THROW(pager->WriteDataPage(over_uncompressed_limit), ParquetException);
EXPECT_THAT([&]() { pager->WriteDataPage(over_compressed_limit); },
ThrowsMessage<ParquetException>(HasSubstr("overflows to INT32_MAX")));
}

TEST(TestColumnWriter, RepeatedListsUpdateSpacedBug) {
Expand Down

0 comments on commit df8ca6c

Please sign in to comment.