From 7117306f1b7a6da497c1f26b8e8f72aae922136d Mon Sep 17 00:00:00 2001 From: Hasan Yaman Date: Sat, 10 Apr 2021 15:24:17 +0300 Subject: [PATCH 1/6] BUG: Fix dtypes change using replace with nullable dtypes (#40732) --- pandas/core/internals/blocks.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 603cc6a6ff1f2..f2e1145160016 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -870,7 +870,10 @@ def _replace_coerce( """ if mask.any(): if not regex: - nb = self.coerce_to_target_dtype(value) + if not isinstance(self.values, Categorical) and self._can_hold_element(value): + nb = self + else: + nb = self.coerce_to_target_dtype(value) if nb is self and not inplace: nb = nb.copy() putmask_inplace(nb.values, mask, value) From 7616c070175695684830f18f3d9047a6f79a0852 Mon Sep 17 00:00:00 2001 From: Hasan Yaman Date: Sat, 10 Apr 2021 15:28:32 +0300 Subject: [PATCH 2/6] STYLE: Fix too long if statement (#40732) --- pandas/core/internals/blocks.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f2e1145160016..56a92fc92e165 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -870,7 +870,9 @@ def _replace_coerce( """ if mask.any(): if not regex: - if not isinstance(self.values, Categorical) and self._can_hold_element(value): + if not isinstance(self.values, Categorical) and self._can_hold_element( + value + ): nb = self else: nb = self.coerce_to_target_dtype(value) From 11f0b3ae78fed6803f26c174968d91ce48c60cd0 Mon Sep 17 00:00:00 2001 From: Hasan Yaman Date: Sat, 10 Apr 2021 22:57:54 +0300 Subject: [PATCH 3/6] BUG: Fix dtypes change using replace with nullable dtypes (#40742) --- pandas/core/internals/blocks.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 56a92fc92e165..0c87f0e974112 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -870,12 +870,13 @@ def _replace_coerce( """ if mask.any(): if not regex: - if not isinstance(self.values, Categorical) and self._can_hold_element( - value - ): + if isinstance( + self.values, (IntegerArray, FloatingArray) + ) and self._can_hold_element(value): nb = self else: nb = self.coerce_to_target_dtype(value) + if nb is self and not inplace: nb = nb.copy() putmask_inplace(nb.values, mask, value) From de8e570c9366470a20b00dec84d16ff9cd99b254 Mon Sep 17 00:00:00 2001 From: Hasan Yaman Date: Mon, 12 Apr 2021 21:25:58 +0300 Subject: [PATCH 4/6] TST: Add tests for replace with nullable types (#40732) --- pandas/tests/frame/methods/test_replace.py | 28 ++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index d8f93f047e74b..f8ca312a9042f 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1681,3 +1681,31 @@ def test_replace_bytes(self, frame_or_series): expected = obj.copy() obj = obj.replace({None: np.nan}) tm.assert_equal(obj, expected) + + @pytest.mark.parametrize( + "value, to_replace, expected, dtype", + [ + ( + [1.0, 2.0, 3.999, 4.4], + {1.0: 9}, + [9.0, 2.0, 3.999, 4.4], + "Float64", + ), + ( + [1.0, 2.0, 3.999, 4.4], + {1.0: 9.0}, + [9.0, 2.0, 3.999, 4.4], + "Float64", + ), + ([1, 2, 3, 4], {1: 9}, [9, 2, 3, 4], "Int64"), + ([1, 2, 3, 4], {1: 9.0}, [9, 2, 3, 4], "Int64"), + ], + ) + def test_replace_nullable_types_with_dict( + self, frame_or_series, value, to_replace, expected, dtype + ): + # GH40732 + obj = frame_or_series(value, dtype=dtype) + expected = frame_or_series(expected, dtype=dtype) + result = obj.replace(to_replace) + tm.assert_equal(result, expected) From f9c09dac1ec666353455dda7ee08ac13176596fa Mon Sep 17 00:00:00 2001 From: Hasan Yaman Date: Tue, 13 Apr 2021 01:50:32 +0300 Subject: [PATCH 5/6] BUG: Fix dtypes change using replace with nullable types (#40732) --- pandas/core/internals/blocks.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 0c87f0e974112..b71599e7ed46b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -870,9 +870,9 @@ def _replace_coerce( """ if mask.any(): if not regex: - if isinstance( - self.values, (IntegerArray, FloatingArray) - ) and self._can_hold_element(value): + if isinstance(self.values, ExtensionArray) and self._can_hold_element( + value + ): nb = self else: nb = self.coerce_to_target_dtype(value) From 0bbf419e41f64579de9480035b788c4fcdf842fe Mon Sep 17 00:00:00 2001 From: Hasan Yaman Date: Tue, 13 Apr 2021 01:51:51 +0300 Subject: [PATCH 6/6] TST: Add different data types for replace tests with nullable types (#40732) --- pandas/tests/frame/methods/test_replace.py | 26 ++++++++++++++-------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index f8ca312a9042f..637b887f99150 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1683,29 +1683,37 @@ def test_replace_bytes(self, frame_or_series): tm.assert_equal(obj, expected) @pytest.mark.parametrize( - "value, to_replace, expected, dtype", + "data, to_replace, value, expected, dtype", [ ( [1.0, 2.0, 3.999, 4.4], - {1.0: 9}, + 1.0, + 9, [9.0, 2.0, 3.999, 4.4], "Float64", ), ( [1.0, 2.0, 3.999, 4.4], - {1.0: 9.0}, + 1.0, + 9.0, [9.0, 2.0, 3.999, 4.4], "Float64", ), - ([1, 2, 3, 4], {1: 9}, [9, 2, 3, 4], "Int64"), - ([1, 2, 3, 4], {1: 9.0}, [9, 2, 3, 4], "Int64"), + ([1, 2, 3, 4], 1, 9, [9, 2, 3, 4], "Int64"), + ([1, 2, 3, 4], 1, 9.0, [9, 2, 3, 4], "Int64"), + (["a", None, "b"], "a", "1", ["1", None, "b"], "string"), + ([None, False, True], True, False, [None, False, False], "boolean"), ], ) - def test_replace_nullable_types_with_dict( - self, frame_or_series, value, to_replace, expected, dtype + def test_replace_with_nullable_arrays( + self, frame_or_series, data, to_replace, value, expected, dtype ): # GH40732 - obj = frame_or_series(value, dtype=dtype) + obj = frame_or_series(data, dtype=dtype) expected = frame_or_series(expected, dtype=dtype) - result = obj.replace(to_replace) + + result = obj.replace(to_replace, value) + tm.assert_equal(result, expected) + + result = obj.replace({to_replace: value}) tm.assert_equal(result, expected)