diff --git a/dask_expr/_shuffle.py b/dask_expr/_shuffle.py index b01f7cf3..bc3aee6e 100644 --- a/dask_expr/_shuffle.py +++ b/dask_expr/_shuffle.py @@ -731,7 +731,9 @@ def _get_index(idx, obj): if idx.ndim == 1: idx = idx.to_frame() elif index_shuffle: - idx = obj.index.to_frame() + # set default index name, otherwise we will end up with 0 + name = {"name": "_index"} if idx == ["_index"] else {} + idx = obj.index.to_frame(**name) else: idx = _select_columns_or_index(obj, idx) return idx diff --git a/dask_expr/tests/test_merge.py b/dask_expr/tests/test_merge.py index 90385e39..7711028a 100644 --- a/dask_expr/tests/test_merge.py +++ b/dask_expr/tests/test_merge.py @@ -2,8 +2,9 @@ import numpy as np import pytest +from dask import delayed -from dask_expr import Merge, from_pandas, merge, repartition +from dask_expr import Merge, from_delayed, from_pandas, merge, repartition from dask_expr._expr import Filter, Projection from dask_expr._merge import BroadcastJoin from dask_expr._shuffle import Shuffle @@ -1031,6 +1032,22 @@ def test_merge_after_rename(index): assert_eq(result, expected, check_index=False) +def test_mismatching_meta(): + df1 = from_pandas(pd.DataFrame({"value": [1, 2, 3]})) + + def creator(i): + return pd.DataFrame( + {"value2": [1, 2, 3]}, index=pd.Index([1, 2, 3], name="index") + ) + + df2 = from_delayed( + [delayed(creator)(i) for i in range(3)], + meta=pd.DataFrame({"value2": [1, 2, 3]}), + ) + result = df1.join(df2) + assert_eq(result, df1.compute().join(df2.compute()), check_index=False) + + def test_merge_tuple_left_on(): df = pd.DataFrame( {