diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index 3ea436336930a..e81827dd6a1c2 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -273,7 +273,9 @@ class SerializedPageWriter : public PageWriter { int64_t WriteDictionaryPage(const DictionaryPage& page) override { int64_t uncompressed_size = page.buffer()->size(); if (uncompressed_size > std::numeric_limits::max()) { - throw ParquetException("Uncompressed page size overflows to INT32_MAX."); + throw ParquetException( + "Uncompressed dictionary page size overflows to INT32_MAX. Size:", + uncompressed_size); } std::shared_ptr compressed_data; if (has_compressor()) { @@ -292,7 +294,9 @@ class SerializedPageWriter : public PageWriter { const uint8_t* output_data_buffer = compressed_data->data(); if (compressed_data->size() > std::numeric_limits::max()) { - throw ParquetException("Compressed page size overflows to INT32_MAX."); + throw ParquetException( + "Compressed dictionary page size overflows to INT32_MAX. Size: ", + uncompressed_size); } int32_t output_data_len = static_cast(compressed_data->size()); @@ -380,7 +384,8 @@ class SerializedPageWriter : public PageWriter { int64_t output_data_len = compressed_data->size(); if (output_data_len > std::numeric_limits::max()) { - throw ParquetException("Compressed page size overflows to INT32_MAX."); + throw ParquetException("Compressed data page size overflows to INT32_MAX. Size:", + output_data_len); } if (data_encryptor_.get()) { @@ -396,7 +401,8 @@ class SerializedPageWriter : public PageWriter { format::PageHeader page_header; if (uncompressed_size > std::numeric_limits::max()) { - throw ParquetException("Uncompressed page size overflows to INT32_MAX."); + throw ParquetException("Uncompressed data page size overflows to INT32_MAX. Size:", + uncompressed_size); } page_header.__set_uncompressed_page_size(static_cast(uncompressed_size)); page_header.__set_compressed_page_size(static_cast(output_data_len)); diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc index e7400310ca92c..b5304f9768ba9 100644 --- a/cpp/src/parquet/column_writer_test.cc +++ b/cpp/src/parquet/column_writer_test.cc @@ -19,6 +19,7 @@ #include #include +#include #include #include "arrow/io/buffered.h" @@ -481,6 +482,9 @@ using TestValuesWriterInt64Type = TestPrimitiveWriter; using TestByteArrayValuesWriter = TestPrimitiveWriter; using TestFixedLengthByteArrayValuesWriter = TestPrimitiveWriter; +using ::testing::HasSubstr; +using ::testing::ThrowsMessage; + TYPED_TEST(TestPrimitiveWriter, RequiredPlain) { this->TestRequiredWithEncoding(Encoding::PLAIN); } @@ -906,8 +910,7 @@ TEST(TestPageWriter, ThrowsOnPagesTooLarge) { auto metadata = ColumnChunkMetaDataBuilder::Make(props, schema.Column(0)); std::unique_ptr pager = - PageWriter::Open(sink, Compression::UNCOMPRESSED, - Codec::UseDefaultCompressionLevel(), metadata.get()); + PageWriter::Open(sink, Compression::UNCOMPRESSED, metadata.get()); uint8_t data; std::shared_ptr buffer = @@ -915,7 +918,8 @@ TEST(TestPageWriter, ThrowsOnPagesTooLarge) { DataPageV1 over_compressed_limit(buffer, /*num_values=*/100, Encoding::BIT_PACKED, Encoding::BIT_PACKED, Encoding::BIT_PACKED, /*uncompressed_size=*/100); - EXPECT_THROW(pager->WriteDataPage(over_compressed_limit), ParquetException); + EXPECT_THAT([&]() { pager->WriteDataPage(over_compressed_limit); }, + ThrowsMessage(HasSubstr("overflows to INT32_MAX"))); DictionaryPage dictionary_over_compressed_limit(buffer, /*num_values=*/100, Encoding::PLAIN); EXPECT_THROW(pager->WriteDictionaryPage(dictionary_over_compressed_limit), @@ -926,7 +930,8 @@ TEST(TestPageWriter, ThrowsOnPagesTooLarge) { buffer, /*num_values=*/100, Encoding::BIT_PACKED, Encoding::BIT_PACKED, Encoding::BIT_PACKED, /*uncompressed_size=*/std::numeric_limits::max() + int64_t{1}); - EXPECT_THROW(pager->WriteDataPage(over_uncompressed_limit), ParquetException); + EXPECT_THAT([&]() { pager->WriteDataPage(over_compressed_limit); }, + ThrowsMessage(HasSubstr("overflows to INT32_MAX"))); } TEST(TestColumnWriter, RepeatedListsUpdateSpacedBug) {