From 5cee29b820cc822c67b18ec36d3eb13432230f0d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 12 Aug 2024 17:05:11 +0200 Subject: [PATCH] Fix tuples as on argument in merge (#1117) --- dask_expr/_collection.py | 5 +++++ dask_expr/tests/test_merge.py | 21 +++++++++++++++++++++ dask_expr/tests/test_resample.py | 2 +- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/dask_expr/_collection.py b/dask_expr/_collection.py index eed74b4e..88681dd1 100644 --- a/dask_expr/_collection.py +++ b/dask_expr/_collection.py @@ -5658,6 +5658,11 @@ def merge( if on and not left_on and not right_on: left_on = right_on = on + if pd.api.types.is_list_like(left_on) and not isinstance(left_on, FrameBase): + left_on = list(left_on) + if pd.api.types.is_list_like(right_on) and not isinstance(right_on, FrameBase): + right_on = list(right_on) + supported_how = ("left", "right", "outer", "inner", "leftsemi") if how not in supported_how: raise ValueError( diff --git a/dask_expr/tests/test_merge.py b/dask_expr/tests/test_merge.py index d16035df..90385e39 100644 --- a/dask_expr/tests/test_merge.py +++ b/dask_expr/tests/test_merge.py @@ -1029,3 +1029,24 @@ def test_merge_after_rename(index): expected = pleft.merge(right, how="inner") result = left.merge(right, how="inner") assert_eq(result, expected, check_index=False) + + +def test_merge_tuple_left_on(): + df = pd.DataFrame( + { + "a": [1, 2, 3] * 5, + "b": [1, 2, 3] * 5, + "c": ["A"] * 15, + }, + ) + ddf = from_pandas(df, npartitions=2) + assert_eq( + ddf.merge(ddf, left_on=("a",), right_on=("a",)), + df.merge(df, left_on=("a",), right_on=("a",)), + check_index=False, + ) + assert_eq( + ddf.merge(ddf, on=("a",)), + df.merge(df, on=("a",)), + check_index=False, + ) diff --git a/dask_expr/tests/test_resample.py b/dask_expr/tests/test_resample.py index 338fc0dd..1921ae42 100644 --- a/dask_expr/tests/test_resample.py +++ b/dask_expr/tests/test_resample.py @@ -135,7 +135,7 @@ def test_resample_divisions_propagation(): pdf = pd.DataFrame({"data": 1}, index=idx) df = from_pandas(pdf, npartitions=10) result = df.resample("0.03s").mean() - result = result.repartition(freq="1T") + result = result.repartition(freq="1d") expected = pdf.resample("0.03s").mean() assert_eq(result, expected)