Skip to content

Commit

Permalink
feat(python): expose large_dtype param in merge (#2003)
Browse files Browse the repository at this point in the history
# Description
This helps to avoid this
[error](#1998 )since you can
now set to large_dtypes=False.

Also once upstream in arrow-rs there is better type coercion, this param
should be able to be removed completely in the writer and merge
operation.
  • Loading branch information
ion-elgreco authored Jan 2, 2024
1 parent 7add491 commit f54bb28
Showing 1 changed file with 7 additions and 5 deletions.
12 changes: 7 additions & 5 deletions python/deltalake/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,6 +808,7 @@ def merge(
target_alias: Optional[str] = None,
error_on_type_mismatch: bool = True,
writer_properties: Optional[WriterProperties] = None,
large_dtypes: bool = True,
) -> "TableMerger":
"""Pass the source data which you want to merge on the target delta table, providing a
predicate in SQL query like format. You can also specify on what to do when the underlying data types do not
Expand All @@ -820,6 +821,7 @@ def merge(
target_alias: Alias for the target table
error_on_type_mismatch: specify if merge will return error if data types are mismatching :default = True
writer_properties: Pass writer properties to the Rust parquet writer
large_dtypes: If True, the data schema is kept in large_dtypes.
Returns:
TableMerger: TableMerger Object
Expand All @@ -835,16 +837,16 @@ def merge(
)

if isinstance(source, pyarrow.RecordBatchReader):
source = convert_pyarrow_recordbatchreader(source, large_dtypes=True)
source = convert_pyarrow_recordbatchreader(source, large_dtypes)
elif isinstance(source, pyarrow.RecordBatch):
source = convert_pyarrow_recordbatch(source, large_dtypes=True)
source = convert_pyarrow_recordbatch(source, large_dtypes)
elif isinstance(source, pyarrow.Table):
source = convert_pyarrow_table(source, large_dtypes=True)
source = convert_pyarrow_table(source, large_dtypes)
elif isinstance(source, ds.Dataset):
source = convert_pyarrow_dataset(source, large_dtypes=True)
source = convert_pyarrow_dataset(source, large_dtypes)
elif isinstance(source, pandas.DataFrame):
source = convert_pyarrow_table(
pyarrow.Table.from_pandas(source), large_dtypes=True
pyarrow.Table.from_pandas(source), large_dtypes
)
else:
raise TypeError(
Expand Down

0 comments on commit f54bb28

Please sign in to comment.