From cacc73ffe11e77c668e2cd94b038f13f7773cf1a Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 3 Jul 2025 14:38:01 -0700 Subject: [PATCH 1/4] BUG[string]: incorrect index downcast in DataFrame.join --- doc/source/whatsnew/v3.0.0.rst | 2 ++ pandas/core/reshape/merge.py | 6 +++--- pandas/tests/copy_view/test_functions.py | 16 ++++------------ 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 94e375615d122..7f853a7ef5ab0 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -866,6 +866,8 @@ Reshaping - Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`) - Bug in :func:`melt` where calling with duplicate column names in ``id_vars`` raised a misleading ``AttributeError`` (:issue:`61475`) - Bug in :meth:`DataFrame.merge` where user-provided suffixes could result in duplicate column names if the resulting names matched existing columns. Now raises a :class:`MergeError` in such cases. (:issue:`61402`) +- Bug in :meth:`DataFrame.join` incorrectly downcasting object-dtype indexes (:issue:`??`) +- Sparse ^^^^^^ diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index f762695eedb3d..285256ac7b16a 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1328,13 +1328,13 @@ def _maybe_add_join_keys( # if we have an all missing left_indexer # make sure to just use the right values or vice-versa if left_indexer is not None and (left_indexer == -1).all(): - key_col = Index(rvals) + key_col = Index(rvals, dtype=rvals.dtype, copy=False) result_dtype = rvals.dtype elif right_indexer is not None and (right_indexer == -1).all(): - key_col = Index(lvals) + key_col = Index(lvals, dtype=lvals.dtype, copy=False) result_dtype = lvals.dtype else: - key_col = Index(lvals) + key_col = Index(lvals, dtype=lvals.dtype, copy=False) if left_indexer is not None: mask_left = left_indexer == -1 key_col = key_col.where(~mask_left, rvals) diff --git a/pandas/tests/copy_view/test_functions.py b/pandas/tests/copy_view/test_functions.py index 32fea794975b6..d23263835c615 100644 --- a/pandas/tests/copy_view/test_functions.py +++ b/pandas/tests/copy_view/test_functions.py @@ -1,10 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - -from pandas.compat import HAS_PYARROW - from pandas import ( DataFrame, Index, @@ -247,13 +243,9 @@ def test_merge_copy_keyword(): assert np.shares_memory(get_array(df2, "b"), get_array(result, "b")) -@pytest.mark.xfail( - using_string_dtype() and HAS_PYARROW, - reason="TODO(infer_string); result.index infers str dtype while both " - "df1 and df2 index are object.", -) -def test_join_on_key(): - df_index = Index(["a", "b", "c"], name="key", dtype=object) +@pytest.mark.parametrize("dtype", [object, "str"]) +def test_join_on_key(dtype): + df_index = Index(["a", "b", "c"], name="key", dtype=dtype) df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True)) df2 = DataFrame({"b": [4, 5, 6]}, index=df_index.copy(deep=True)) @@ -265,7 +257,7 @@ def test_join_on_key(): assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) assert np.shares_memory(get_array(result, "b"), get_array(df2, "b")) - assert np.shares_memory(get_array(result.index), get_array(df1.index)) + assert tm.shares_memory(get_array(result.index), get_array(df1.index)) assert not np.shares_memory(get_array(result.index), get_array(df2.index)) result.iloc[0, 0] = 0 From 5d515e9e78f75bf2ecb009aa48af2e9db6abfce5 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 3 Jul 2025 14:38:55 -0700 Subject: [PATCH 2/4] GH ref --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 7f853a7ef5ab0..c26e387240dbe 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -865,8 +865,8 @@ Reshaping - Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`) - Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`) - Bug in :func:`melt` where calling with duplicate column names in ``id_vars`` raised a misleading ``AttributeError`` (:issue:`61475`) +- Bug in :meth:`DataFrame.join` incorrectly downcasting object-dtype indexes (:issue:`61771`) - Bug in :meth:`DataFrame.merge` where user-provided suffixes could result in duplicate column names if the resulting names matched existing columns. Now raises a :class:`MergeError` in such cases. (:issue:`61402`) -- Bug in :meth:`DataFrame.join` incorrectly downcasting object-dtype indexes (:issue:`??`) - Sparse From 133e9bc457980d4d0b486e4548ceceba4c27c5c7 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 7 Jul 2025 13:42:16 +0200 Subject: [PATCH 3/4] move whatsnew to 2.3.1 --- doc/source/whatsnew/v2.3.1.rst | 1 + doc/source/whatsnew/v3.0.0.rst | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.3.1.rst b/doc/source/whatsnew/v2.3.1.rst index eb3ad72f6a59f..2ee9cf9263768 100644 --- a/doc/source/whatsnew/v2.3.1.rst +++ b/doc/source/whatsnew/v2.3.1.rst @@ -57,6 +57,7 @@ correctly, rather than defaulting to ``object`` dtype. For example: Bug fixes ^^^^^^^^^ - Bug in :meth:`.DataFrameGroupBy.min`, :meth:`.DataFrameGroupBy.max`, :meth:`.Resampler.min`, :meth:`.Resampler.max` where all NA values of string dtype would return float instead of string dtype (:issue:`60810`) +- Bug in :meth:`DataFrame.join` incorrectly downcasting object-dtype indexes (:issue:`61771`) - Bug in :meth:`DataFrame.sum` with ``axis=1``, :meth:`.DataFrameGroupBy.sum` or :meth:`.SeriesGroupBy.sum` with ``skipna=True``, and :meth:`.Resampler.sum` with all NA values of :class:`StringDtype` resulted in ``0`` instead of the empty string ``""`` (:issue:`60229`) - Fixed bug in :meth:`DataFrame.explode` and :meth:`Series.explode` where methods would fail with ``dtype="str"`` (:issue:`61623`) - Fixed bug in unpickling objects pickled in pandas versions pre-2.3.0 that used :class:`StringDtype` (:issue:`61763`). diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 5514226ec1bca..3fefc5d15b720 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -866,7 +866,6 @@ Reshaping - Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`) - Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`) - Bug in :func:`melt` where calling with duplicate column names in ``id_vars`` raised a misleading ``AttributeError`` (:issue:`61475`) -- Bug in :meth:`DataFrame.join` incorrectly downcasting object-dtype indexes (:issue:`61771`) - Bug in :meth:`DataFrame.merge` where user-provided suffixes could result in duplicate column names if the resulting names matched existing columns. Now raises a :class:`MergeError` in such cases. (:issue:`61402`) - From 1bb89abd4fa0299e2a6d794df41ee125038f9f1d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 7 Jul 2025 13:42:51 +0200 Subject: [PATCH 4/4] fixup --- doc/source/whatsnew/v3.0.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 3fefc5d15b720..4154942f92907 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -867,7 +867,6 @@ Reshaping - Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`) - Bug in :func:`melt` where calling with duplicate column names in ``id_vars`` raised a misleading ``AttributeError`` (:issue:`61475`) - Bug in :meth:`DataFrame.merge` where user-provided suffixes could result in duplicate column names if the resulting names matched existing columns. Now raises a :class:`MergeError` in such cases. (:issue:`61402`) -- Sparse ^^^^^^