diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx index ad914c77bf31c..bc4786b9cd61e 100644 --- a/python/pyarrow/_dataset_parquet.pyx +++ b/python/pyarrow/_dataset_parquet.pyx @@ -531,6 +531,7 @@ cdef class ParquetFileWriteOptions(FileWriteOptions): "use_deprecated_int96_timestamps", "coerce_timestamps", "allow_truncated_timestamps", + "use_compliant_nested_type", } setters = set() @@ -586,7 +587,7 @@ cdef class ParquetFileWriteOptions(FileWriteOptions): self._properties = dict( use_dictionary=True, compression="snappy", - version="1.0", + version="2.6", write_statistics=None, data_page_size=None, compression_level=None, @@ -601,6 +602,11 @@ cdef class ParquetFileWriteOptions(FileWriteOptions): self._set_properties() self._set_arrow_properties() + def __repr__(self): + return "".format( + " ".join([f"{key}={value}" for key, value in self._properties.items()]) + ) + cdef set _PARQUET_READ_OPTIONS = { 'dictionary_columns', 'coerce_int96_timestamp_unit' diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py index 144da21cf5e6b..7e8ce329be1c7 100644 --- a/python/pyarrow/tests/test_dataset.py +++ b/python/pyarrow/tests/test_dataset.py @@ -4539,7 +4539,9 @@ def test_write_table_partitioned_dict(tempdir): @pytest.mark.parquet def test_write_dataset_parquet(tempdir): table = pa.table([ - pa.array(range(20)), pa.array(np.random.randn(20)), + pa.array(range(20), type="uint32"), + pa.array(np.arange("2012-01-01", 20, dtype="datetime64[D]").astype( + "datetime64[ns]")), pa.array(np.repeat(['a', 'b'], 10)) ], names=["f1", "f2", "part"]) @@ -4551,7 +4553,7 @@ def test_write_dataset_parquet(tempdir): file_paths = list(base_dir.rglob("*")) expected_paths = [base_dir / "part-0.parquet"] assert set(file_paths) == set(expected_paths) - # check Table roundtrip + # check Table roundtrip with default version result = ds.dataset(base_dir, format="parquet").to_table() assert result.equals(table) @@ -4559,12 +4561,24 @@ def test_write_dataset_parquet(tempdir): for version in ["1.0", "2.4", "2.6"]: format = ds.ParquetFileFormat() opts = format.make_write_options(version=version) + assert "