Skip to content

Commit

Permalink
Fix DataFrame.merge(Series, how="left"/"right") on column and index n…
Browse files Browse the repository at this point in the history
…ot resulting in a RangeIndex (rapidsai#17739)

Left or right merging on a DataFrame column with a Series index (or vice versa) would not result in a `cudf.RangeIndex` in the result like pandas.

IMO not the most ideal fix, but I think this is the most best location for the fix given the flow of this merge code.

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: rapidsai#17739
  • Loading branch information
mroeschke authored Jan 15, 2025
1 parent 834565a commit 89e8703
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 1 deletion.
18 changes: 18 additions & 0 deletions python/cudf/cudf/core/join/join.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
from __future__ import annotations

import itertools
from typing import Any

import pylibcudf as plc
Expand Down Expand Up @@ -188,6 +189,23 @@ def __init__(
self._using_right_index = any(
isinstance(idx, _IndexIndexer) for idx in self._right_keys
)
if self.how in {"left", "right"} and not (
all(
isinstance(idx, _IndexIndexer)
for idx in itertools.chain(
self._left_keys, self._right_keys
)
)
or all(
isinstance(idx, _ColumnIndexer)
for idx in itertools.chain(
self._left_keys, self._right_keys
)
)
):
# For left/right merges, joining on an index and column should result in a RangeIndex
self._using_left_index = False
self._using_right_index = False
else:
# if `on` is not provided and we're not merging
# index with column or on both indexes, then use
Expand Down
17 changes: 16 additions & 1 deletion python/cudf/cudf/tests/test_joining.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
# Copyright (c) 2018-2025, NVIDIA CORPORATION.

from itertools import combinations, product, repeat

Expand Down Expand Up @@ -2275,3 +2275,18 @@ def test_merge_timedelta_types(dtype1, dtype2):
else True,
check_dtype=len(actual) > 0,
)


def test_merge_index_on_opposite_how_column_reset_index():
df = pd.DataFrame({"a": [1, 2, 3, 4, 5]}, index=[1, 3, 5, 7, 9])
ser = pd.Series([1, 2], index=pd.Index([1, 2], name="a"), name="b")
df_cudf = cudf.DataFrame.from_pandas(df)
ser_cudf = cudf.Series.from_pandas(ser)

expected = pd.merge(df, ser, on="a", how="left")
result = cudf.merge(df_cudf, ser_cudf, on="a", how="left")
assert_eq(result, expected)

expected = pd.merge(ser, df, on="a", how="right")
result = cudf.merge(ser_cudf, df_cudf, on="a", how="right")
assert_eq(result, expected)

0 comments on commit 89e8703

Please sign in to comment.