Skip to content

Commit

Permalink
empty dataset fix
Browse files Browse the repository at this point in the history
  • Loading branch information
sherlockbeard authored and ion-elgreco committed Jul 21, 2024
1 parent d3642a6 commit 53da029
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
4 changes: 3 additions & 1 deletion python/deltalake/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,9 @@ def _cast_schema_to_recordbatchreader(
) -> Generator[pa.RecordBatch, None, None]:
"""Creates recordbatch generator."""
for batch in reader:
yield pa.Table.from_batches([batch]).cast(schema).to_batches()[0]
batchs = pa.Table.from_batches([batch]).cast(schema).to_batches()
if len(batchs) > 0:
yield batchs[0]


def convert_pyarrow_recordbatchreader(
Expand Down
6 changes: 6 additions & 0 deletions python/tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1838,3 +1838,9 @@ def test_roundtrip_cdc_evolution(tmp_path: pathlib.Path):
print(os.listdir(tmp_path))
# This is kind of a weak test to verify that CDFs were written
assert os.path.isdir(os.path.join(tmp_path, "_change_data"))


def test_empty_dataset_write(tmp_path: pathlib.Path, sample_data: pa.Table):
empty_arrow_table = sample_data.schema.empty_table()
empty_dataset = dataset(empty_arrow_table)
write_deltalake(tmp_path, empty_dataset, mode="append")

0 comments on commit 53da029

Please sign in to comment.