Skip to content

Commit

Permalink
Change WriteEncryptedFileMetadata
Browse files Browse the repository at this point in the history
  • Loading branch information
rok committed Aug 6, 2024
1 parent b0012e9 commit a4d58e0
Show file tree
Hide file tree
Showing 6 changed files with 10 additions and 12 deletions.
2 changes: 0 additions & 2 deletions cpp/src/arrow/dataset/file_parquet.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1066,8 +1066,6 @@ Result<std::shared_ptr<DatasetFactory>> ParquetDatasetFactory::Make(
auto scan_options = std::make_shared<ScanOptions>();
ARROW_ASSIGN_OR_RAISE(auto reader, format->GetReader(metadata_source, scan_options));
std::shared_ptr<parquet::FileMetaData> metadata = reader->parquet_reader()->metadata();
// ARROW_LOG(INFO) << "Reading Parquet metadata from " << metadata_source.path();
// ARROW_LOG(INFO) << metadata->SerializeToString();


if (metadata->num_columns() == 0) {
Expand Down
3 changes: 1 addition & 2 deletions cpp/src/parquet/file_writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@ void WriteEncryptedMetadataFile(
WriteFileCryptoMetaData(*crypto_metadata, sink.get());

auto footer_encryptor = file_encryptor->GetFooterEncryptor();
WriteEncryptedFileMetadata(*footer_metadata, sink.get(), footer_encryptor, true);
WriteEncryptedFileMetadata(metadata, sink.get(), footer_encryptor, true);
PARQUET_ASSIGN_OR_THROW(position, sink->Tell());
auto footer_and_crypto_len = static_cast<uint32_t>(position - metadata_start);
PARQUET_THROW_NOT_OK(
Expand All @@ -601,7 +601,6 @@ void WriteEncryptedMetadataFile(
auto footer_signing_encryptor = file_encryptor->GetFooterSigningEncryptor();
WriteEncryptedFileMetadata(metadata, sink.get(), footer_signing_encryptor, false);
}
PARQUET_THROW_NOT_OK(sink->Close());

file_encryptor->WipeOutEncryptionKeys();
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/parquet/properties.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ ::arrow::internal::Executor* ArrowWriterProperties::executor() const {
return executor_ != nullptr ? executor_ : ::arrow::internal::GetCpuThreadPool();
}

ArrowReaderProperties default_arrow_reader_properties() {
ArrowReaderProperties default_arrow_reader_properties() {
static ArrowReaderProperties default_reader_props;
return default_reader_props;
}
Expand Down
1 change: 0 additions & 1 deletion python/pyarrow/_parquet.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
# cython: profile=False
# distutils: language = c++

from __future__ import print_function
from collections.abc import Sequence
from textwrap import indent
import warnings
Expand Down
5 changes: 3 additions & 2 deletions python/pyarrow/parquet/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2250,7 +2250,8 @@ def write_metadata(schema, where, metadata_collector=None, filesystem=None,
# ParquetWriter doesn't expose the metadata until it's written. Write
# it and read it again.

metadata = read_metadata(where, filesystem=filesystem, decryption_properties=decryption_properties)
metadata = read_metadata(where, filesystem=filesystem,
decryption_properties=decryption_properties)
if hasattr(where, "seek"):
where.seek(cursor_position) # file-like, set cursor back.

Expand All @@ -2260,7 +2261,7 @@ def write_metadata(schema, where, metadata_collector=None, filesystem=None,
with filesystem.open_output_stream(where) as f:
metadata.write_metadata_file(f, encryption_properties2)
else:
metadata.write_metadata_file(where, encryption_properties)
metadata.write_metadata_file(where, encryption_properties2)


def read_metadata(where, memory_map=False, decryption_properties=None,
Expand Down
9 changes: 5 additions & 4 deletions python/pyarrow/tests/test_dataset_encryption.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,14 +317,15 @@ def test_dataset_metadata_encryption_decryption(tempdir):
pformat = pa.dataset.ParquetFileFormat(default_fragment_scan_options=pq_scan_opts)

dataset = ds.dataset(metadata_file, format=pformat, filesystem=mockfs)
# TODO: cpp doesn't correctly deserialize row group metadata yet,
# seems like file_paths are not being set serialized
new_table = dataset.to_table()
# TODO: cpp doesn't correctly deserialize row group metadata yet
# assert table.equals(new_table)
assert table.equals(new_table)

metadata = pq.read_metadata(
metadata_file, decryption_properties=decryption_properties, filesystem=mockfs)

assert metadata.num_columns == 2
# assert metadata.num_rows == 6
# assert metadata.num_row_groups == 1
assert metadata.num_rows == 6
assert metadata.num_row_groups == 1
assert metadata.schema.to_arrow_schema() == subschema

0 comments on commit a4d58e0

Please sign in to comment.