From 25904588d27ae121ebd9dfe4410ae89683d52604 Mon Sep 17 00:00:00 2001 From: ion-elgreco <15728914+ion-elgreco@users.noreply.github.com> Date: Sat, 30 Dec 2023 15:57:23 +0100 Subject: [PATCH] expose large_dtype --- python/deltalake/table.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/python/deltalake/table.py b/python/deltalake/table.py index 5adeaaa9dc..76b1dd4f49 100644 --- a/python/deltalake/table.py +++ b/python/deltalake/table.py @@ -808,6 +808,7 @@ def merge( target_alias: Optional[str] = None, error_on_type_mismatch: bool = True, writer_properties: Optional[WriterProperties] = None, + large_dtypes: bool = True, ) -> "TableMerger": """Pass the source data which you want to merge on the target delta table, providing a predicate in SQL query like format. You can also specify on what to do when the underlying data types do not @@ -820,6 +821,7 @@ def merge( target_alias: Alias for the target table error_on_type_mismatch: specify if merge will return error if data types are mismatching :default = True writer_properties: Pass writer properties to the Rust parquet writer + large_dtypes: If True, the data schema is kept in large_dtypes. Returns: TableMerger: TableMerger Object @@ -835,16 +837,16 @@ def merge( ) if isinstance(source, pyarrow.RecordBatchReader): - source = convert_pyarrow_recordbatchreader(source, large_dtypes=True) + source = convert_pyarrow_recordbatchreader(source, large_dtypes) elif isinstance(source, pyarrow.RecordBatch): - source = convert_pyarrow_recordbatch(source, large_dtypes=True) + source = convert_pyarrow_recordbatch(source, large_dtypes) elif isinstance(source, pyarrow.Table): - source = convert_pyarrow_table(source, large_dtypes=True) + source = convert_pyarrow_table(source, large_dtypes) elif isinstance(source, ds.Dataset): - source = convert_pyarrow_dataset(source, large_dtypes=True) + source = convert_pyarrow_dataset(source, large_dtypes) elif isinstance(source, pandas.DataFrame): source = convert_pyarrow_table( - pyarrow.Table.from_pandas(source), large_dtypes=True + pyarrow.Table.from_pandas(source), large_dtypes ) else: raise TypeError(