Skip to content

Commit

Permalink
fix: introduce a reproduction case for List casting with polars
Browse files Browse the repository at this point in the history
See delta-io#3063

Signed-off-by: R. Tyler Croy <rtyler@brokenco.de>
  • Loading branch information
rtyler authored and ion-elgreco committed Jan 2, 2025
1 parent 9e35c06 commit a639dea
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
- name: Build and install deltalake
run: |
# Install minimum PyArrow version
uv sync --extra devel --extra pandas
uv sync --extra devel --extra pandas --extra polars
uv pip install pyarrow==16.0.0
env:
RUSTFLAGS: "-C debuginfo=line-tables-only"
Expand Down
8 changes: 4 additions & 4 deletions python/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ DAT_VERSION := 0.0.2
.PHONY: setup
setup: ## Setup the requirements
$(info --- Setup dependencies ---)
uv sync --extra devel --extra pandas
uv sync --extra devel --extra pandas --extra polars

.PHONY: setup-dat
setup-dat: ## Download DAT test files
Expand All @@ -28,21 +28,21 @@ build: setup ## Build Python binding of delta-rs
.PHONY: develop
develop: setup ## Install Python binding of delta-rs
$(info --- Develop with Python binding ---)
uvx --from 'maturin[zig]' maturin develop --extras=devel,pandas $(MATURIN_EXTRA_ARGS)
uvx --from 'maturin[zig]' maturin develop --extras=devel,pandas,polars $(MATURIN_EXTRA_ARGS)

.PHONY: install
install: build ## Install Python binding of delta-rs
$(info --- Uninstall Python binding ---)
uv pip uninstall deltalake
$(info --- Install Python binding ---)
$(eval TARGET_WHEEL := $(shell ls ../target/wheels/deltalake-${PACKAGE_VERSION}-*.whl))
uv pip install $(TARGET_WHEEL)[devel,pandas]
uv pip install $(TARGET_WHEEL)[devel,pandas,polars]

.PHONY: develop-pyspark
develop-pyspark:
uv sync --all-extras
$(info --- Develop with Python binding ---)
uvx --from 'maturin[zig]' maturin develop --extras=devel,pandas,pyspark $(MATURIN_EXTRA_ARGS)
uvx --from 'maturin[zig]' maturin develop --extras=devel,pandas,polars,pyspark $(MATURIN_EXTRA_ARGS)

.PHONY: format
format: ## Format the code
Expand Down
2 changes: 2 additions & 0 deletions python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ devel = [
"mypy==1.10.1",
"ruff==0.5.2",
]
polars = ["polars==1.17.1"]
pyspark = [
"pyspark",
"delta-spark",
Expand Down Expand Up @@ -93,6 +94,7 @@ markers = [
"s3: marks tests as integration tests with S3 (deselect with '-m \"not s3\"')",
"azure: marks tests as integration tests with Azure Blob Store",
"pandas: marks tests that require pandas",
"polars: marks tests that require polars",
"pyspark: marks tests that require pyspark",
]

Expand Down
40 changes: 40 additions & 0 deletions python/tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2025,3 +2025,43 @@ def test_write_transactions(tmp_path: pathlib.Path, sample_data: pa.Table):
assert transaction_2.app_id == "app_2"
assert transaction_2.version == 2
assert transaction_2.last_updated == 123456


# <https://github.com/delta-io/delta-rs/issues/3063>
@pytest.mark.polars
def test_write_structs(tmp_path: pathlib.Path):
import polars as pl

dt = DeltaTable.create(
tmp_path,
schema=pa.schema(
[
("a", pa.int32()),
("b", pa.string()),
("c", pa.struct({"d": pa.int16(), "e": pa.int16()})),
]
),
)

df = pl.DataFrame(
{
"a": [0, 1],
"b": ["x", "y"],
"c": [
{"d": -55, "e": -32},
{"d": 0, "e": 0},
],
}
)

dt.merge(
source=df.to_arrow(),
predicate=" AND ".join([f"target.{x} = source.{x}" for x in ["a"]]),
source_alias="source",
target_alias="target",
large_dtypes=False,
).when_not_matched_insert_all().execute()

arrow_dt = dt.to_pyarrow_dataset()
new_df = pl.scan_pyarrow_dataset(arrow_dt)
new_df.collect()

0 comments on commit a639dea

Please sign in to comment.