From 91ffff995c77d7e71ca59e657fbfceaca25ef1a2 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 16 Dec 2021 11:50:56 -0800
Subject: [PATCH 1/7] REF: get regex logic out of Block.replace

---
 pandas/core/generic.py                 | 15 ++++++++++++---
 pandas/core/internals/array_manager.py | 10 ++++++++--
 pandas/core/internals/blocks.py        | 23 +++++++++--------------
 pandas/core/internals/managers.py      | 12 +++++++++---
 4 files changed, 38 insertions(+), 22 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 1e5b0a107615e..359d89ce664c3 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -118,6 +118,7 @@
     nanops,
 )
 import pandas.core.algorithms as algos
+from pandas.core.array_algos.replace import should_use_regex
 from pandas.core.arrays import ExtensionArray
 from pandas.core.base import PandasObject
 import pandas.core.common as com
@@ -6688,9 +6689,17 @@ def replace(
                     return self._replace_columnwise(mapping, inplace, regex)
 
                 elif not is_list_like(value):  # NA -> 0
-                    new_data = self._mgr.replace(
-                        to_replace=to_replace, value=value, inplace=inplace, regex=regex
-                    )
+                    regex = should_use_regex(regex, to_replace)
+                    if regex:
+                        new_data = self._mgr.replace_regex(
+                            to_replace=to_replace,
+                            value=value,
+                            inplace=inplace,
+                        )
+                    else:
+                        new_data = self._mgr.replace(
+                            to_replace=to_replace, value=value, inplace=inplace
+                        )
                 else:
                     raise TypeError(
                         f'Invalid "to_replace" type: {repr(type(to_replace).__name__)}'
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 93a9e8fbcb1ad..a817341367592 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -413,11 +413,17 @@ def _convert(arr):
 
         return self.apply(_convert)
 
-    def replace(self: T, value, **kwargs) -> T:
+    def replace_regex(self: T, **kwargs):
+        return self.apply_with_block("_replace_regex", **kwargs)
+
+    def replace(self: T, to_replace, value, inplace: bool) -> T:
+        inplace = validate_bool_kwarg(inplace, "inplace")
         assert np.ndim(value) == 0, value
         # TODO "replace" is right now implemented on the blocks, we should move
         # it to general array algos so it can be reused here
-        return self.apply_with_block("replace", value=value, **kwargs)
+        return self.apply_with_block(
+            "replace", value=value, to_replace=to_replace, inplace=inplace
+        )
 
     def replace_list(
         self: T,
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 7056a34c73008..da2ff58ea3d0d 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -640,14 +640,11 @@ def replace(
         to_replace,
         value,
         inplace: bool = False,
-        regex: bool = False,
     ) -> list[Block]:
         """
         replace the to_replace value with value, possible to create new
-        blocks here this is just a call to putmask. regex is not used here.
-        It is used in ObjectBlocks.  It is here for API compatibility.
+        blocks here this is just a call to putmask.
         """
-        inplace = validate_bool_kwarg(inplace, "inplace")
 
         # Note: the checks we do in NDFrame.replace ensure we never get
         #  here with listlike to_replace or value, as those cases
@@ -661,11 +658,6 @@ def replace(
             blk.values._replace(to_replace=to_replace, value=value, inplace=True)
             return [blk]
 
-        regex = should_use_regex(regex, to_replace)
-
-        if regex:
-            return self._replace_regex(to_replace, value, inplace=inplace)
-
         if not self._can_hold_element(to_replace):
             # We cannot hold `to_replace`, so we know immediately that
             #  replacing it is a no-op.
@@ -691,13 +683,12 @@ def replace(
                 to_replace=to_replace,
                 value=value,
                 inplace=True,
-                regex=regex,
             )
 
         else:
             # split so that we only upcast where necessary
             return self.split_and_operate(
-                type(self).replace, to_replace, value, inplace=True, regex=regex
+                type(self).replace, to_replace, value, inplace=True
             )
 
     @final
@@ -756,10 +747,14 @@ def replace_list(
         values = self.values
 
         # TODO: dont special-case Categorical
-        if isinstance(values, Categorical) and len(algos.unique(dest_list)) == 1:
+        if (
+            isinstance(values, Categorical)
+            and len(algos.unique(dest_list)) == 1
+            and not regex
+        ):
             # We likely got here by tiling value inside NDFrame.replace,
             #  so un-tile here
-            return self.replace(src_list, dest_list[0], inplace, regex)
+            return self.replace(src_list, dest_list[0], inplace)
 
         # Exclude anything that we know we won't contain
         pairs = [
@@ -866,7 +861,7 @@ def _replace_coerce(
                         convert=False,
                         mask=mask,
                     )
-                return self.replace(to_replace, value, inplace=inplace, regex=False)
+                return self.replace(to_replace, value, inplace=inplace)
         return [self]
 
     # ---------------------------------------------------------------------
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index cb18c6cccbc60..5ebc0292f24b4 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -432,12 +432,18 @@ def convert(
             timedelta=timedelta,
         )
 
-    def replace(self: T, to_replace, value, inplace: bool, regex: bool) -> T:
-        assert np.ndim(value) == 0, value
+    def replace(self: T, to_replace, value, inplace: bool) -> T:
+        inplace = validate_bool_kwarg(inplace, "inplace")
+        # NDFrame.replace ensures the not-is_list_likes here
+        assert not is_list_like(to_replace)
+        assert not is_list_like(value)
         return self.apply(
-            "replace", to_replace=to_replace, value=value, inplace=inplace, regex=regex
+            "replace", to_replace=to_replace, value=value, inplace=inplace
         )
 
+    def replace_regex(self, **kwargs):
+        return self.apply("_replace_regex", **kwargs)
+
     def replace_list(
         self: T,
         src_list: list[Any],

From ddd71b6f383e7df6bd5a2c6dc048ae4ae62932fd Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 16 Dec 2021 20:09:58 -0800
Subject: [PATCH 2/7] BUG: nullable dtypes not retained by replace

---
 pandas/core/array_algos/replace.py            |  3 +-
 pandas/core/internals/blocks.py               | 47 +++++-------
 .../tests/arrays/categorical/test_replace.py  | 12 +--
 pandas/tests/frame/methods/test_replace.py    |  4 -
 pandas/tests/series/methods/test_replace.py   | 75 ++++++++++++++-----
 5 files changed, 78 insertions(+), 63 deletions(-)

diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py
index 4d1fb8f33e5ad..e26bb9fb6ebad 100644
--- a/pandas/core/array_algos/replace.py
+++ b/pandas/core/array_algos/replace.py
@@ -80,7 +80,8 @@ def _check_comparison_types(
                 f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}"
             )
 
-    if not regex:
+    if not regex or not should_use_regex(regex, b):
+        # TODO: should use missing.mask_missing?
         op = lambda x: operator.eq(x, b)
     else:
         op = np.vectorize(
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index da2ff58ea3d0d..abbebcefc7a87 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -640,6 +640,8 @@ def replace(
         to_replace,
         value,
         inplace: bool = False,
+        # mask may be pre-computed if we're called from replace_list
+        mask: npt.NDArray[np.bool_] | None = None,
     ) -> list[Block]:
         """
         replace the to_replace value with value, possible to create new
@@ -665,7 +667,8 @@ def replace(
             #  replace_list instead of replace.
             return [self] if inplace else [self.copy()]
 
-        mask = missing.mask_missing(values, to_replace)
+        if mask is None:
+            mask = missing.mask_missing(values, to_replace)
         if not mask.any():
             # Note: we get here with test_replace_extension_other incorrectly
             #  bc _can_hold_element is incorrect.
@@ -683,6 +686,7 @@ def replace(
                 to_replace=to_replace,
                 value=value,
                 inplace=True,
+                mask=mask,
             )
 
         else:
@@ -746,16 +750,6 @@ def replace_list(
         """
         values = self.values
 
-        # TODO: dont special-case Categorical
-        if (
-            isinstance(values, Categorical)
-            and len(algos.unique(dest_list)) == 1
-            and not regex
-        ):
-            # We likely got here by tiling value inside NDFrame.replace,
-            #  so un-tile here
-            return self.replace(src_list, dest_list[0], inplace)
-
         # Exclude anything that we know we won't contain
         pairs = [
             (x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x)
@@ -844,25 +838,18 @@ def _replace_coerce(
         -------
         List[Block]
         """
-        if mask.any():
-            if not regex:
-                nb = self.coerce_to_target_dtype(value)
-                if nb is self and not inplace:
-                    nb = nb.copy()
-                putmask_inplace(nb.values, mask, value)
-                return [nb]
-            else:
-                regex = should_use_regex(regex, to_replace)
-                if regex:
-                    return self._replace_regex(
-                        to_replace,
-                        value,
-                        inplace=inplace,
-                        convert=False,
-                        mask=mask,
-                    )
-                return self.replace(to_replace, value, inplace=inplace)
-        return [self]
+        if should_use_regex(regex, to_replace):
+            return self._replace_regex(
+                to_replace,
+                value,
+                inplace=inplace,
+                convert=False,
+                mask=mask,
+            )
+        else:
+            return self.replace(
+                to_replace=to_replace, value=value, inplace=inplace, mask=mask
+            )
 
     # ---------------------------------------------------------------------
 
diff --git a/pandas/tests/arrays/categorical/test_replace.py b/pandas/tests/arrays/categorical/test_replace.py
index fe12e7c7571ea..a50b1eddd99be 100644
--- a/pandas/tests/arrays/categorical/test_replace.py
+++ b/pandas/tests/arrays/categorical/test_replace.py
@@ -1,4 +1,3 @@
-import numpy as np
 import pytest
 
 import pandas as pd
@@ -20,10 +19,8 @@
         ([1, 2], 4, [4, 4, 3], False),
         ((1, 2, 4), 5, [5, 5, 3], False),
         ((5, 6), 2, [1, 2, 3], False),
-        # many-to-many, handled outside of Categorical and results in separate dtype
-        #  except for cases with only 1 unique entry in `value`
-        ([1], [2], [2, 2, 3], True),
-        ([1, 4], [5, 2], [5, 2, 3], True),
+        ([1], [2], [2, 2, 3], False),
+        ([1, 4], [5, 2], [5, 2, 3], False),
         # check_categorical sorts categories, which crashes on mixed dtypes
         (3, "4", [1, 2, "4"], False),
         ([1, 2, "3"], "5", ["5", "5", 3], True),
@@ -31,7 +28,6 @@
 )
 def test_replace_categorical_series(to_replace, value, expected, flip_categories):
     # GH 31720
-    stays_categorical = not isinstance(value, list) or len(pd.unique(value)) == 1
 
     ser = pd.Series([1, 2, 3], dtype="category")
     result = ser.replace(to_replace, value)
@@ -41,10 +37,6 @@ def test_replace_categorical_series(to_replace, value, expected, flip_categories
     if flip_categories:
         expected = expected.cat.set_categories(expected.cat.categories[::-1])
 
-    if not stays_categorical:
-        # the replace call loses categorical dtype
-        expected = pd.Series(np.asarray(expected))
-
     tm.assert_series_equal(expected, result, check_category_order=False)
     tm.assert_series_equal(expected, ser, check_category_order=False)
 
diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
index 28e28490c73b9..44489ffd5761a 100644
--- a/pandas/tests/frame/methods/test_replace.py
+++ b/pandas/tests/frame/methods/test_replace.py
@@ -1382,10 +1382,6 @@ def test_replace_value_category_type(self):
 
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(
-        reason="category dtype gets changed to object type after replace, see #35268",
-        raises=AssertionError,
-    )
     def test_replace_dict_category_type(self):
         """
         Test to ensure category dtypes are maintained
diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py
index 3a55062af618f..22f55af69dbfd 100644
--- a/pandas/tests/series/methods/test_replace.py
+++ b/pandas/tests/series/methods/test_replace.py
@@ -5,6 +5,7 @@
 
 import pandas as pd
 import pandas._testing as tm
+from pandas.core.arrays import IntervalArray
 
 
 class TestSeriesReplace:
@@ -148,20 +149,22 @@ def test_replace_with_single_list(self):
         tm.assert_series_equal(s, ser)
 
     def test_replace_mixed_types(self):
-        s = pd.Series(np.arange(5), dtype="int64")
+        ser = pd.Series(np.arange(5), dtype="int64")
 
         def check_replace(to_rep, val, expected):
-            sc = s.copy()
-            r = s.replace(to_rep, val)
+            sc = ser.copy()
+            result = ser.replace(to_rep, val)
             return_value = sc.replace(to_rep, val, inplace=True)
             assert return_value is None
-            tm.assert_series_equal(expected, r)
+            tm.assert_series_equal(expected, result)
             tm.assert_series_equal(expected, sc)
 
-        # MUST upcast to float
-        e = pd.Series([0.0, 1.0, 2.0, 3.0, 4.0])
+        # 3.0 can still be held in our int64 series, so we do not upcast
+        # Note this matches what we get with the scalars 3 and 3.0
         tr, v = [3], [3.0]
-        check_replace(tr, v, e)
+        check_replace(tr, v, ser)
+        # Note this matches what we get with the scalars 3 and 3.0
+        check_replace(tr[0], v[0], ser)
 
         # MUST upcast to float
         e = pd.Series([0, 1, 2, 3.5, 4])
@@ -258,9 +261,9 @@ def test_replace2(self):
 
     def test_replace_with_dictlike_and_string_dtype(self, nullable_string_dtype):
         # GH 32621
-        s = pd.Series(["one", "two", np.nan], dtype=nullable_string_dtype)
-        expected = pd.Series(["1", "2", np.nan])
-        result = s.replace({"one": "1", "two": "2"})
+        ser = pd.Series(["one", "two", np.nan], dtype=nullable_string_dtype)
+        expected = pd.Series(["1", "2", np.nan], dtype=nullable_string_dtype)
+        result = ser.replace({"one": "1", "two": "2"})
         tm.assert_series_equal(expected, result)
 
     def test_replace_with_empty_dictlike(self):
@@ -305,17 +308,17 @@ def test_replace_mixed_types_with_string(self):
         "categorical, numeric",
         [
             (pd.Categorical(["A"], categories=["A", "B"]), [1]),
-            (pd.Categorical(("A",), categories=["A", "B"]), [1]),
-            (pd.Categorical(("A", "B"), categories=["A", "B"]), [1, 2]),
+            (pd.Categorical(["A", "B"], categories=["A", "B"]), [1, 2]),
         ],
     )
     def test_replace_categorical(self, categorical, numeric):
-        # GH 24971
-        # Do not check if dtypes are equal due to a known issue that
-        # Categorical.replace sometimes coerces to object (GH 23305)
-        s = pd.Series(categorical)
-        result = s.replace({"A": 1, "B": 2})
-        expected = pd.Series(numeric)
+        # GH 24971, GH#23305
+        ser = pd.Series(categorical)
+        result = ser.replace({"A": 1, "B": 2})
+        expected = pd.Series(numeric).astype("category")
+        if 2 not in expected.cat.categories:
+            # i.e. categories should be [1, 2] even if there are no "B"s present
+            expected = expected.cat.add_categories(2)
         tm.assert_series_equal(expected, result)
 
     def test_replace_categorical_single(self):
@@ -514,3 +517,39 @@ def test_pandas_replace_na(self):
         result = ser.replace(regex_mapping, regex=True)
         exp = pd.Series(["CC", "CC", "CC-REPL", "DD", "CC", "", pd.NA], dtype="string")
         tm.assert_series_equal(result, exp)
+
+    @pytest.mark.parametrize(
+        "dtype, input_data, to_replace, expected_data",
+        [
+            ("bool", [True, False], {True: False}, [False, False]),
+            ("int64", [1, 2], {1: 10, 2: 20}, [10, 20]),
+            ("Int64", [1, 2], {1: 10, 2: 20}, [10, 20]),
+            ("float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]),
+            ("Float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]),
+            ("string", ["one", "two"], {"one": "1", "two": "2"}, ["1", "2"]),
+            (
+                pd.IntervalDtype("int64"),
+                IntervalArray([pd.Interval(1, 2), pd.Interval(2, 3)]),
+                {pd.Interval(1, 2): pd.Interval(10, 20)},
+                IntervalArray([pd.Interval(10, 20), pd.Interval(2, 3)]),
+            ),
+            (
+                pd.IntervalDtype("float64"),
+                IntervalArray([pd.Interval(1.0, 2.7), pd.Interval(2.8, 3.1)]),
+                {pd.Interval(1.0, 2.7): pd.Interval(10.6, 20.8)},
+                IntervalArray([pd.Interval(10.6, 20.8), pd.Interval(2.8, 3.1)]),
+            ),
+            (
+                pd.PeriodDtype("M"),
+                [pd.Period("2020-05", freq="M")],
+                {pd.Period("2020-05", freq="M"): pd.Period("2020-06", freq="M")},
+                [pd.Period("2020-06", freq="M")],
+            ),
+        ],
+    )
+    def test_replace_dtype(self, dtype, input_data, to_replace, expected_data):
+        # GH#33484
+        ser = pd.Series(input_data, dtype=dtype)
+        result = ser.replace(to_replace)
+        expected = pd.Series(expected_data, dtype=dtype)
+        tm.assert_series_equal(result, expected)

From 56c8b8494a0926a01a5c4c5a1888157da7829048 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 17 Dec 2021 08:47:05 -0800
Subject: [PATCH 3/7] catch warning

---
 pandas/tests/indexing/test_coercion.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
index 2366dd39c25f2..9213c420a9a00 100644
--- a/pandas/tests/indexing/test_coercion.py
+++ b/pandas/tests/indexing/test_coercion.py
@@ -1177,6 +1177,7 @@ def test_replace_series_datetime_tz(self, how, to_key, from_key, replacer):
         assert obj.dtype == from_key
 
         result = obj.replace(replacer)
+
         exp = pd.Series(self.rep[to_key], index=index, name="yyy")
         assert exp.dtype == to_key
 
@@ -1197,7 +1198,21 @@ def test_replace_series_datetime_datetime(self, how, to_key, from_key, replacer)
         obj = pd.Series(self.rep[from_key], index=index, name="yyy")
         assert obj.dtype == from_key
 
-        result = obj.replace(replacer)
+        warn = None
+        rep_ser = pd.Series(replacer)
+        if (
+            isinstance(obj.dtype, pd.DatetimeTZDtype)
+            and isinstance(rep_ser.dtype, pd.DatetimeTZDtype)
+            and obj.dtype != rep_ser.dtype
+        ):
+            # mismatched tz DatetimeArray behavior will change to cast
+            #  for setitem-like methods with mismatched tzs
+            warn = FutureWarning
+
+        msg = "explicitly cast to object"
+        with tm.assert_produces_warning(warn, match=msg):
+            result = obj.replace(replacer)
+
         exp = pd.Series(self.rep[to_key], index=index, name="yyy")
         assert exp.dtype == to_key
 

From f3d9b4096f025b3e9ba9cff82b8f9ccab3a092c2 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 17 Dec 2021 11:38:38 -0800
Subject: [PATCH 4/7] whatsnew, GH ref

---
 doc/source/whatsnew/v1.4.0.rst              | 4 ++--
 pandas/tests/frame/methods/test_replace.py  | 1 +
 pandas/tests/indexing/test_coercion.py      | 2 +-
 pandas/tests/series/methods/test_replace.py | 6 +++---
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index caf3a4281561f..d46669cf943fe 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -529,7 +529,7 @@ Other Deprecations
 - Deprecated silent dropping of columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a dictionary (:issue:`43740`)
 - Deprecated silent dropping of columns that raised a ``TypeError``, ``DataError``, and some cases of ``ValueError`` in :meth:`Series.aggregate`, :meth:`DataFrame.aggregate`, :meth:`Series.groupby.aggregate`, and :meth:`DataFrame.groupby.aggregate` when used with a list (:issue:`43740`)
 - Deprecated casting behavior when setting timezone-aware value(s) into a timezone-aware :class:`Series` or :class:`DataFrame` column when the timezones do not match. Previously this cast to object dtype. In a future version, the values being inserted will be converted to the series or column's existing timezone (:issue:`37605`)
-- Deprecated casting behavior when passing an item with mismatched-timezone to :meth:`DatetimeIndex.insert`, :meth:`DatetimeIndex.putmask`, :meth:`DatetimeIndex.where` :meth:`DatetimeIndex.fillna`, :meth:`Series.mask`, :meth:`Series.where`, :meth:`Series.fillna`, :meth:`Series.shift`, :meth:`Series.replace`, :meth:`Series.reindex` (and :class:`DataFrame` column analogues). In the past this has cast to object dtype. In a future version, these will cast the passed item to the index or series's timezone (:issue:`37605`)
+- Deprecated casting behavior when passing an item with mismatched-timezone to :meth:`DatetimeIndex.insert`, :meth:`DatetimeIndex.putmask`, :meth:`DatetimeIndex.where` :meth:`DatetimeIndex.fillna`, :meth:`Series.mask`, :meth:`Series.where`, :meth:`Series.fillna`, :meth:`Series.shift`, :meth:`Series.replace`, :meth:`Series.reindex` (and :class:`DataFrame` column analogues). In the past this has cast to object dtype. In a future version, these will cast the passed item to the index or series's timezone (:issue:`37605`,:issue:`44940`)
 - Deprecated the 'errors' keyword argument in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, and meth:`DataFrame.mask`; in a future version the argument will be removed (:issue:`44294`)
 - Deprecated the ``prefix`` keyword argument in :func:`read_csv` and :func:`read_table`, in a future version the argument will be removed (:issue:`43396`)
 - Deprecated :meth:`PeriodIndex.astype` to ``datetime64[ns]`` or ``DatetimeTZDtype``, use ``obj.to_timestamp(how).tz_localize(dtype.tz)`` instead (:issue:`44398`)
@@ -837,7 +837,7 @@ ExtensionArray
 - Bug in :func:`array` incorrectly raising when passed a ``ndarray`` with ``float16`` dtype (:issue:`44715`)
 - Bug in calling ``np.sqrt`` on :class:`BooleanArray` returning a malformed :class:`FloatingArray` (:issue:`44715`)
 - Bug in :meth:`Series.where` with ``ExtensionDtype`` when ``other`` is a NA scalar incompatible with the series dtype (e.g. ``NaT`` with a numeric dtype) incorrectly casting to a compatible NA value (:issue:`44697`)
--
+- Fixed bug in :meth:`Series.replace` with ``FloatDtype``, ``string[python]``, or ``string[pyarrow]`` dtype not being preserved when possible (:issue:`33484`)
 
 Styler
 ^^^^^^
diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
index 44489ffd5761a..a772b745e47d3 100644
--- a/pandas/tests/frame/methods/test_replace.py
+++ b/pandas/tests/frame/methods/test_replace.py
@@ -1387,6 +1387,7 @@ def test_replace_dict_category_type(self):
         Test to ensure category dtypes are maintained
         after replace with dict values
         """
+        # GH#35268, GH#44940
 
         # create input dataframe
         input_dict = {"col1": ["a"], "col2": ["obj1"], "col3": ["cat1"]}
diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
index 9213c420a9a00..014f0f5933387 100644
--- a/pandas/tests/indexing/test_coercion.py
+++ b/pandas/tests/indexing/test_coercion.py
@@ -1206,7 +1206,7 @@ def test_replace_series_datetime_datetime(self, how, to_key, from_key, replacer)
             and obj.dtype != rep_ser.dtype
         ):
             # mismatched tz DatetimeArray behavior will change to cast
-            #  for setitem-like methods with mismatched tzs
+            #  for setitem-like methods with mismatched tzs GH#44940
             warn = FutureWarning
 
         msg = "explicitly cast to object"
diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py
index 22f55af69dbfd..78129439952da 100644
--- a/pandas/tests/series/methods/test_replace.py
+++ b/pandas/tests/series/methods/test_replace.py
@@ -159,8 +159,7 @@ def check_replace(to_rep, val, expected):
             tm.assert_series_equal(expected, result)
             tm.assert_series_equal(expected, sc)
 
-        # 3.0 can still be held in our int64 series, so we do not upcast
-        # Note this matches what we get with the scalars 3 and 3.0
+        # 3.0 can still be held in our int64 series, so we do not upcast GH#44940
         tr, v = [3], [3.0]
         check_replace(tr, v, ser)
         # Note this matches what we get with the scalars 3 and 3.0
@@ -260,7 +259,7 @@ def test_replace2(self):
         assert (ser[20:30] == -1).all()
 
     def test_replace_with_dictlike_and_string_dtype(self, nullable_string_dtype):
-        # GH 32621
+        # GH 32621, GH#44940
         ser = pd.Series(["one", "two", np.nan], dtype=nullable_string_dtype)
         expected = pd.Series(["1", "2", np.nan], dtype=nullable_string_dtype)
         result = ser.replace({"one": "1", "two": "2"})
@@ -318,6 +317,7 @@ def test_replace_categorical(self, categorical, numeric):
         expected = pd.Series(numeric).astype("category")
         if 2 not in expected.cat.categories:
             # i.e. categories should be [1, 2] even if there are no "B"s present
+            # GH#44940
             expected = expected.cat.add_categories(2)
         tm.assert_series_equal(expected, result)
 

From 569db46eb50f7300e67bcb1663f826a5a6e4220a Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 17 Dec 2021 22:12:04 -0800
Subject: [PATCH 5/7] TST: closes #40732

---
 pandas/tests/series/methods/test_replace.py | 26 +++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py
index 78129439952da..971861dfe8812 100644
--- a/pandas/tests/series/methods/test_replace.py
+++ b/pandas/tests/series/methods/test_replace.py
@@ -553,3 +553,29 @@ def test_replace_dtype(self, dtype, input_data, to_replace, expected_data):
         result = ser.replace(to_replace)
         expected = pd.Series(expected_data, dtype=dtype)
         tm.assert_series_equal(result, expected)
+
+    def test_replace_string_dtype(self):
+        # GH#40732, GH#44940
+        ser = pd.Series(["one", "two", np.nan], dtype="string")
+        res = ser.replace({"one": "1", "two": "2"})
+        expected = pd.Series(["1", "2", np.nan], dtype="string")
+        tm.assert_series_equal(res, expected)
+
+    def test_replace_nullable_numeric(self):
+        # GH#40732, GH#44940
+
+        floats = pd.Series([1.0, 2.0, 3.999, 4.4], dtype=pd.Float64Dtype())
+        assert floats.replace({1.0: 9}).dtype == floats.dtype
+        assert floats.replace(1.0, 9).dtype == floats.dtype
+        assert floats.replace({1.0: 9.0}).dtype == floats.dtype
+        assert floats.replace(1.0, 9.0).dtype == floats.dtype
+
+        res = floats.replace(to_replace=[1.0, 2.0], value=[9.0, 10.0])
+        assert res.dtype == floats.dtype
+
+        ints = pd.Series([1, 2, 3, 4], dtype=pd.Int64Dtype())
+        assert ints.replace({1: 9}).dtype == ints.dtype
+        assert ints.replace(1, 9).dtype == ints.dtype
+        assert ints.replace({1: 9.0}).dtype == ints.dtype
+        assert ints.replace(1, 9.0).dtype == ints.dtype
+        # FIXME: ints.replace({1: 9.5}) raises bc of incorrect _can_hold_element

From 2351cbbb4a294b0431af464516d1150bd836c18f Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 17 Dec 2021 22:27:18 -0800
Subject: [PATCH 6/7] tests for more closed issues

---
 pandas/tests/series/methods/test_replace.py | 25 +++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py
index 971861dfe8812..b7d6c498d1e0b 100644
--- a/pandas/tests/series/methods/test_replace.py
+++ b/pandas/tests/series/methods/test_replace.py
@@ -561,6 +561,31 @@ def test_replace_string_dtype(self):
         expected = pd.Series(["1", "2", np.nan], dtype="string")
         tm.assert_series_equal(res, expected)
 
+        # GH#31644
+        ser2 = pd.Series(["A", np.nan], dtype="string")
+        res2 = ser2.replace("A", "B")
+        expected2 = pd.Series(["B", np.nan], dtype="string")
+        tm.assert_series_equal(res2, expected2)
+
+        ser3 = pd.Series(["A", "B"], dtype="string")
+        res3 = ser3.replace("A", pd.NA)
+        expected3 = pd.Series([pd.NA, "B"], dtype="string")
+        tm.assert_series_equal(res3, expected3)
+
+    def test_replace_string_dtype_list_to_replace(self):
+        # GH#41215, GH#44940
+        ser = pd.Series(["abc", "def"], dtype="string")
+        res = ser.replace(["abc", "any other string"], "xyz")
+        expected = pd.Series(["xyz", "def"], dtype="string")
+        tm.assert_series_equal(res, expected)
+
+    def test_replace_string_dtype_regex(self):
+        # GH#31644
+        ser = pd.Series(["A", "B"], dtype="string")
+        res = ser.replace(r".", "C", regex=True)
+        expected = pd.Series(["C", "C"], dtype="string")
+        tm.assert_series_equal(res, expected)
+
     def test_replace_nullable_numeric(self):
         # GH#40732, GH#44940
 

From b7679d363ec206e44f69ae861a68e2d0f5b86a15 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 17 Dec 2021 22:31:19 -0800
Subject: [PATCH 7/7] test for closed issue

---
 pandas/tests/frame/methods/test_replace.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
index a772b745e47d3..1bfc00f8d31ac 100644
--- a/pandas/tests/frame/methods/test_replace.py
+++ b/pandas/tests/frame/methods/test_replace.py
@@ -624,6 +624,14 @@ def test_replace_mixed3(self):
         expected.iloc[1, 1] = m[1]
         tm.assert_frame_equal(result, expected)
 
+    def test_replace_nullable_int_with_string_doesnt_cast(self):
+        # GH#25438 don't cast df['a'] to float64
+        df = DataFrame({"a": [1, 2, 3, np.nan], "b": ["some", "strings", "here", "he"]})
+        df["a"] = df["a"].astype("Int64")
+
+        res = df.replace("", np.nan)
+        tm.assert_series_equal(res["a"], df["a"])
+
     @pytest.mark.parametrize("dtype", ["boolean", "Int64", "Float64"])
     def test_replace_with_nullable_column(self, dtype):
         # GH-44499