From 3b3162b9ad0ae759d36499c6c7306281155b6a56 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Wed, 25 May 2022 20:32:07 -0500 Subject: [PATCH] Backport PR #46636 on branch 1.4.x (REGR: Replace changes the dtype of other columns) (#47123) Backport PR #46636: REGR: Replace changes the dtype of other columns Co-authored-by: Simon Hawkins --- doc/source/whatsnew/v1.4.3.rst | 1 + pandas/core/internals/blocks.py | 14 ++++++++------ pandas/tests/frame/methods/test_replace.py | 19 +++++++++++++++++++ 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.4.3.rst b/doc/source/whatsnew/v1.4.3.rst index 415a3ff4efda0..bf414ab77cf65 100644 --- a/doc/source/whatsnew/v1.4.3.rst +++ b/doc/source/whatsnew/v1.4.3.rst @@ -14,6 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ +- Fixed regression in :meth:`DataFrame.replace` when the replacement value was explicitly ``None`` when passed in a dictionary to ``to_replace`` also casting other columns to object dtype even when there were no values to replace (:issue:`46634`) - Fixed regression in :meth:`DataFrame.nsmallest` led to wrong results when ``np.nan`` in the sorting column (:issue:`46589`) - Fixed regression in :func:`read_fwf` raising ``ValueError`` when ``widths`` was specified with ``usecols`` (:issue:`46580`) - Fixed regression in :meth:`.Groupby.transform` and :meth:`.Groupby.agg` failing with ``engine="numba"`` when the index was a :class:`MultiIndex` (:issue:`46867`) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 8a09e4ff2d5b7..941b1648a9778 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -875,12 +875,14 @@ def _replace_coerce( ) else: if value is None: - # gh-45601, gh-45836 - nb = self.astype(np.dtype(object), copy=False) - if nb is self and not inplace: - nb = nb.copy() - putmask_inplace(nb.values, mask, value) - return [nb] + # gh-45601, gh-45836, gh-46634 + if mask.any(): + nb = self.astype(np.dtype(object), copy=False) + if nb is self and not inplace: + nb = nb.copy() + putmask_inplace(nb.values, mask, value) + return [nb] + return [self] if inplace else [self.copy()] return self.replace( to_replace=to_replace, value=value, inplace=inplace, mask=mask ) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index b84af7b0e0b52..fd2044fd4fa7a 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -675,6 +675,25 @@ def test_replace_NAT_with_None(self): expected = DataFrame([None, None]) tm.assert_frame_equal(result, expected) + def test_replace_with_None_keeps_categorical(self): + # gh-46634 + cat_series = Series(["b", "b", "b", "d"], dtype="category") + df = DataFrame( + { + "id": Series([5, 4, 3, 2], dtype="float64"), + "col": cat_series, + } + ) + result = df.replace({3: None}) + + expected = DataFrame( + { + "id": Series([5.0, 4.0, None, 2.0], dtype="object"), + "col": cat_series, + } + ) + tm.assert_frame_equal(result, expected) + def test_replace_value_is_none(self, datetime_frame): orig_value = datetime_frame.iloc[0, 0] orig2 = datetime_frame.iloc[1, 0]