-
Notifications
You must be signed in to change notification settings - Fork 416
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(python): expose delete operation (#1687)
# Description Naively expose the delete operation, with the option to provide a predicate. I first tried to expose a richer API with the Python `FilterType` and DNF expressions, but from what I understand delta-rs doesn't implement generic filters but only `PartitionFilter`. The `DeleteBuilder` also only accepts datafusion expressions. So Instead of hacking my way around or proposing a refactor I went for the simpler approach of sending a string predicate to the rust lib. If this implementation is OK I will add tests. # Related Issue(s) - closes #1417 --------- Co-authored-by: Will Jones <willjones127@gmail.com>
- Loading branch information
1 parent
4da7d66
commit 3ba3426
Showing
4 changed files
with
96 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
import pathlib | ||
|
||
import pyarrow as pa | ||
import pyarrow.compute as pc | ||
|
||
from deltalake.table import DeltaTable | ||
from deltalake.writer import write_deltalake | ||
|
||
|
||
def test_delete_no_predicates(existing_table: DeltaTable): | ||
old_version = existing_table.version() | ||
|
||
existing_table.delete() | ||
|
||
last_action = existing_table.history(1)[0] | ||
assert last_action["operation"] == "DELETE" | ||
assert existing_table.version() == old_version + 1 | ||
|
||
dataset = existing_table.to_pyarrow_dataset() | ||
assert dataset.count_rows() == 0 | ||
assert len(existing_table.files()) == 0 | ||
|
||
|
||
def test_delete_a_partition(tmp_path: pathlib.Path, sample_data: pa.Table): | ||
write_deltalake(tmp_path, sample_data, partition_by=["bool"]) | ||
|
||
dt = DeltaTable(tmp_path) | ||
old_version = dt.version() | ||
|
||
mask = pc.equal(sample_data["bool"], False) | ||
expected_table = sample_data.filter(mask) | ||
|
||
dt.delete(predicate="bool = true") | ||
|
||
last_action = dt.history(1)[0] | ||
assert last_action["operation"] == "DELETE" | ||
assert dt.version() == old_version + 1 | ||
|
||
table = dt.to_pyarrow_table() | ||
assert table.equals(expected_table) | ||
assert len(dt.files()) == 1 | ||
|
||
|
||
def test_delete_some_rows(existing_table: DeltaTable): | ||
old_version = existing_table.version() | ||
|
||
existing = existing_table.to_pyarrow_table() | ||
mask = pc.invert(pc.is_in(existing["utf8"], pa.array(["0", "1"]))) | ||
expected_table = existing.filter(mask) | ||
|
||
existing_table.delete(predicate="utf8 in ('0', '1')") | ||
|
||
last_action = existing_table.history(1)[0] | ||
assert last_action["operation"] == "DELETE" | ||
assert existing_table.version() == old_version + 1 | ||
|
||
table = existing_table.to_pyarrow_table() | ||
assert table.equals(expected_table) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters