diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc index 2f304a8565c6a..254b7517d49e5 100644 --- a/cpp/src/arrow/dataset/file_parquet.cc +++ b/cpp/src/arrow/dataset/file_parquet.cc @@ -1066,8 +1066,6 @@ Result> ParquetDatasetFactory::Make( auto scan_options = std::make_shared(); ARROW_ASSIGN_OR_RAISE(auto reader, format->GetReader(metadata_source, scan_options)); std::shared_ptr metadata = reader->parquet_reader()->metadata(); -// ARROW_LOG(INFO) << "Reading Parquet metadata from " << metadata_source.path(); -// ARROW_LOG(INFO) << metadata->SerializeToString(); if (metadata->num_columns() == 0) { diff --git a/cpp/src/parquet/file_writer.cc b/cpp/src/parquet/file_writer.cc index e264294ef9c2b..31766c9289af3 100644 --- a/cpp/src/parquet/file_writer.cc +++ b/cpp/src/parquet/file_writer.cc @@ -589,7 +589,7 @@ void WriteEncryptedMetadataFile( WriteFileCryptoMetaData(*crypto_metadata, sink.get()); auto footer_encryptor = file_encryptor->GetFooterEncryptor(); - WriteEncryptedFileMetadata(*footer_metadata, sink.get(), footer_encryptor, true); + WriteEncryptedFileMetadata(metadata, sink.get(), footer_encryptor, true); PARQUET_ASSIGN_OR_THROW(position, sink->Tell()); auto footer_and_crypto_len = static_cast(position - metadata_start); PARQUET_THROW_NOT_OK( @@ -601,7 +601,6 @@ void WriteEncryptedMetadataFile( auto footer_signing_encryptor = file_encryptor->GetFooterSigningEncryptor(); WriteEncryptedFileMetadata(metadata, sink.get(), footer_signing_encryptor, false); } - PARQUET_THROW_NOT_OK(sink->Close()); file_encryptor->WipeOutEncryptionKeys(); } diff --git a/cpp/src/parquet/properties.cc b/cpp/src/parquet/properties.cc index f27a1b8802dd8..4e6c558e064d4 100644 --- a/cpp/src/parquet/properties.cc +++ b/cpp/src/parquet/properties.cc @@ -56,7 +56,7 @@ ::arrow::internal::Executor* ArrowWriterProperties::executor() const { return executor_ != nullptr ? executor_ : ::arrow::internal::GetCpuThreadPool(); } -ArrowReaderProperties default_arrow_reader_properties() { +ArrowReaderProperties default_arrow_reader_properties() { static ArrowReaderProperties default_reader_props; return default_reader_props; } diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index ec32214491421..11f8fbd271d39 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -18,7 +18,6 @@ # cython: profile=False # distutils: language = c++ -from __future__ import print_function from collections.abc import Sequence from textwrap import indent import warnings diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py index a1327afbb52ff..7942b7300702e 100644 --- a/python/pyarrow/parquet/core.py +++ b/python/pyarrow/parquet/core.py @@ -2250,7 +2250,8 @@ def write_metadata(schema, where, metadata_collector=None, filesystem=None, # ParquetWriter doesn't expose the metadata until it's written. Write # it and read it again. - metadata = read_metadata(where, filesystem=filesystem, decryption_properties=decryption_properties) + metadata = read_metadata(where, filesystem=filesystem, + decryption_properties=decryption_properties) if hasattr(where, "seek"): where.seek(cursor_position) # file-like, set cursor back. @@ -2260,7 +2261,7 @@ def write_metadata(schema, where, metadata_collector=None, filesystem=None, with filesystem.open_output_stream(where) as f: metadata.write_metadata_file(f, encryption_properties2) else: - metadata.write_metadata_file(where, encryption_properties) + metadata.write_metadata_file(where, encryption_properties2) def read_metadata(where, memory_map=False, decryption_properties=None, diff --git a/python/pyarrow/tests/test_dataset_encryption.py b/python/pyarrow/tests/test_dataset_encryption.py index d2c9ed6e52c9e..247f911b51c34 100644 --- a/python/pyarrow/tests/test_dataset_encryption.py +++ b/python/pyarrow/tests/test_dataset_encryption.py @@ -317,14 +317,15 @@ def test_dataset_metadata_encryption_decryption(tempdir): pformat = pa.dataset.ParquetFileFormat(default_fragment_scan_options=pq_scan_opts) dataset = ds.dataset(metadata_file, format=pformat, filesystem=mockfs) + # TODO: cpp doesn't correctly deserialize row group metadata yet, + # seems like file_paths are not being set serialized new_table = dataset.to_table() - # TODO: cpp doesn't correctly deserialize row group metadata yet - # assert table.equals(new_table) + assert table.equals(new_table) metadata = pq.read_metadata( metadata_file, decryption_properties=decryption_properties, filesystem=mockfs) assert metadata.num_columns == 2 - # assert metadata.num_rows == 6 - # assert metadata.num_row_groups == 1 + assert metadata.num_rows == 6 + assert metadata.num_row_groups == 1 assert metadata.schema.to_arrow_schema() == subschema