diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc index 769622c488420..9d1c3afa03dbd 100644 --- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc +++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc @@ -5824,7 +5824,7 @@ TEST_F(ParquetBloomFilterRoundTripTest, SimpleRoundTripDictionary) { auto origin_schema = ::arrow::schema( {::arrow::field("c0", ::arrow::int64()), ::arrow::field("c1", ::arrow::utf8())}); auto schema = ::arrow::schema( - {::arrow::field("c0", ::arrow::dictionary(::arrow::int64(), ::arrow::utf8())), + {::arrow::field("c0", ::arrow::dictionary(::arrow::int64(), ::arrow::int64())), ::arrow::field("c1", ::arrow::dictionary(::arrow::int64(), ::arrow::utf8()))}); bloom_filters_.clear(); BloomFilterOptions options; @@ -5836,11 +5836,11 @@ TEST_F(ParquetBloomFilterRoundTripTest, SimpleRoundTripDictionary) { ->build(); std::vector contents = {R"([ [1, "a"], - [2, "a"], - [1, "c"], + [2, "b"], + [3, "c"], [null, "d"], [5, null], - [6, "d"] + [6, "f"] ])"}; auto table = ::arrow::TableFromJSON(schema, contents); auto non_dict_table = ::arrow::TableFromJSON(origin_schema, contents); @@ -5854,13 +5854,15 @@ TEST_F(ParquetBloomFilterRoundTripTest, SimpleRoundTripDictionary) { for (int64_t row_group_id = 0; row_group_id < 2; ++row_group_id) { { ASSERT_NE(nullptr, bloom_filters_[bloom_filter_idx]); - auto col = table->column(0)->Slice(current_row, row_group_row_count[row_group_id]); + auto col = non_dict_table->column(0)->Slice(current_row, + row_group_row_count[row_group_id]); VerifyBloomFilter<::arrow::Int64Type>(bloom_filters_[bloom_filter_idx].get(), *col); ++bloom_filter_idx; } { ASSERT_NE(nullptr, bloom_filters_[bloom_filter_idx]); - auto col = table->column(1)->Slice(current_row, row_group_row_count[row_group_id]); + auto col = non_dict_table->column(1)->Slice(current_row, + row_group_row_count[row_group_id]); VerifyBloomFilter<::arrow::StringType>(bloom_filters_[bloom_filter_idx].get(), *col); ++bloom_filter_idx; diff --git a/cpp/src/parquet/file_writer.cc b/cpp/src/parquet/file_writer.cc index d485da0ccfa33..e4cd9f91aa8b8 100644 --- a/cpp/src/parquet/file_writer.cc +++ b/cpp/src/parquet/file_writer.cc @@ -165,20 +165,15 @@ class RowGroupSerializer : public RowGroupWriter::Contents { ? bloom_filter_builder_->GetOrCreateBloomFilter(column_ordinal) : nullptr; - std::unique_ptr pager; + CodecOptions default_codec_options; if (!codec_options) { - pager = PageWriter::Open( - sink_, column_properties.compression(), col_meta, row_group_ordinal_, - static_cast(column_ordinal), properties_->memory_pool(), - /*buffered_row_group=*/false, meta_encryptor, data_encryptor, - properties_->page_checksum_enabled(), ci_builder, oi_builder, CodecOptions()); - } else { - pager = PageWriter::Open( - sink_, column_properties.compression(), col_meta, row_group_ordinal_, - static_cast(column_ordinal), properties_->memory_pool(), - /*buffered_row_group=*/false, meta_encryptor, data_encryptor, - properties_->page_checksum_enabled(), ci_builder, oi_builder, *codec_options); + codec_options = &default_codec_options; } + std::unique_ptr pager = PageWriter::Open( + sink_, column_properties.compression(), col_meta, row_group_ordinal_, + static_cast(column_ordinal), properties_->memory_pool(), + /*buffered_row_group=*/false, meta_encryptor, data_encryptor, + properties_->page_checksum_enabled(), ci_builder, oi_builder, *codec_options); column_writers_[0] = ColumnWriter::Make(col_meta, std::move(pager), properties_, bloom_filter); return column_writers_[0].get(); @@ -322,20 +317,15 @@ class RowGroupSerializer : public RowGroupWriter::Contents { bloom_filter_builder_ && column_properties.bloom_filter_enabled() ? bloom_filter_builder_->GetOrCreateBloomFilter(column_ordinal) : nullptr; - std::unique_ptr pager; + CodecOptions default_codec_options; if (!codec_options) { - pager = PageWriter::Open( - sink_, properties_->compression(path), col_meta, row_group_ordinal_, - static_cast(column_ordinal), properties_->memory_pool(), - buffered_row_group_, meta_encryptor, data_encryptor, - properties_->page_checksum_enabled(), ci_builder, oi_builder, CodecOptions()); - } else { - pager = PageWriter::Open( - sink_, properties_->compression(path), col_meta, row_group_ordinal_, - static_cast(column_ordinal), properties_->memory_pool(), - buffered_row_group_, meta_encryptor, data_encryptor, - properties_->page_checksum_enabled(), ci_builder, oi_builder, *codec_options); + codec_options = &default_codec_options; } + std::unique_ptr pager = PageWriter::Open( + sink_, properties_->compression(path), col_meta, row_group_ordinal_, + static_cast(column_ordinal), properties_->memory_pool(), + buffered_row_group_, meta_encryptor, data_encryptor, + properties_->page_checksum_enabled(), ci_builder, oi_builder, *codec_options); column_writers_.push_back( ColumnWriter::Make(col_meta, std::move(pager), properties_, bloom_filter)); }