delta-io · ion-elgreco · Jan 2, 2024 · Dec 30, 2023 · Jan 2, 2024
@@ -808,6 +808,7 @@ def merge(
         target_alias: Optional[str] = None,
         error_on_type_mismatch: bool = True,
         writer_properties: Optional[WriterProperties] = None,
+        large_dtypes: bool = True,
     ) -> "TableMerger":
         """Pass the source data which you want to merge on the target delta table, providing a
         predicate in SQL query like format. You can also specify on what to do when the underlying data types do not
@@ -820,6 +821,7 @@ def merge(
             target_alias: Alias for the target table
             error_on_type_mismatch: specify if merge will return error if data types are mismatching :default = True
             writer_properties: Pass writer properties to the Rust parquet writer
+            large_dtypes: If True, the data schema is kept in large_dtypes.
 
         Returns:
             TableMerger: TableMerger Object
@@ -835,16 +837,16 @@ def merge(
         )
 
         if isinstance(source, pyarrow.RecordBatchReader):
-            source = convert_pyarrow_recordbatchreader(source, large_dtypes=True)
+            source = convert_pyarrow_recordbatchreader(source, large_dtypes)
         elif isinstance(source, pyarrow.RecordBatch):
-            source = convert_pyarrow_recordbatch(source, large_dtypes=True)
+            source = convert_pyarrow_recordbatch(source, large_dtypes)
         elif isinstance(source, pyarrow.Table):
-            source = convert_pyarrow_table(source, large_dtypes=True)
+            source = convert_pyarrow_table(source, large_dtypes)
         elif isinstance(source, ds.Dataset):
-            source = convert_pyarrow_dataset(source, large_dtypes=True)
+            source = convert_pyarrow_dataset(source, large_dtypes)
         elif isinstance(source, pandas.DataFrame):
             source = convert_pyarrow_table(
-                pyarrow.Table.from_pandas(source), large_dtypes=True
+                pyarrow.Table.from_pandas(source), large_dtypes
             )
         else:
             raise TypeError(