From f2ff20d96d387a7457bf032b83858a6a1a94894f Mon Sep 17 00:00:00 2001 From: roib <66280613+roib20@users.noreply.github.com> Date: Tue, 1 Feb 2022 04:27:32 +0200 Subject: [PATCH 1/7] BUG: RecursionError when attempting to replace "np.nan" values under main branch (#45725) --- pandas/core/internals/blocks.py | 1 + pandas/tests/frame/methods/test_replace.py | 12 ++++++++++++ pandas/tests/series/methods/test_replace.py | 14 ++++++++++++++ 3 files changed, 27 insertions(+) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 386aee102cbcd..7db410b36f62b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -669,6 +669,7 @@ def replace( # go through replace_list values = self.values + value = self._standardize_fill_value(value) # GH#45725 if isinstance(values, Categorical): # TODO: avoid special-casing diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 001a50c3ce1b0..652f28bac9cb1 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -661,6 +661,18 @@ def test_replace_simple_nested_dict_with_nonexistent_value(self): result = df.replace({"col": {-1: "-", 1: "a", 4: "b"}}) tm.assert_frame_equal(expected, result) + def test_replace_numpy_nan(self): + # GH#45725 ensure np.nan can be replaced with pd.NA + df = pd.DataFrame({"A": [np.nan], "B": [np.nan]}, dtype=object) + result = df.replace({np.nan: pd.NA}) + expected = pd.DataFrame({"A": [pd.NA], "B": [pd.NA]}, dtype=object) + tm.assert_frame_equal(result, expected) + + # same thing but with None + result = df.replace({np.nan: None}) + expected = pd.DataFrame({"A": [None], "B": [None]}, dtype=object) + tm.assert_frame_equal(result, expected) + def test_replace_value_is_none(self, datetime_frame): orig_value = datetime_frame.iloc[0, 0] orig2 = datetime_frame.iloc[1, 0] diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 6a8dacfda5e78..439260c214e66 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -36,6 +36,20 @@ def test_replace_explicit_none(self): assert expected.iloc[-1] is None tm.assert_series_equal(result, expected) + def test_replace_numpy_nan(self): + # GH#45725 ensure np.nan can be replaced + ser = pd.Series([np.nan, np.nan], dtype=object) + result = ser.replace({np.nan: pd.NA}) + expected = pd.Series([pd.NA, pd.NA], dtype=object) + tm.assert_series_equal(result, expected) + assert result.dtype == object + + # same thing but with None + result = ser.replace({np.nan: None}) + expected = pd.Series([None, None], dtype=object) + tm.assert_series_equal(result, expected) + assert result.dtype == object + def test_replace_noop_doesnt_downcast(self): # GH#44498 ser = pd.Series([None, None, pd.Timestamp("2021-12-16 17:31")], dtype=object) From cb42c6bb31dfbde0a18b15d9a698f97985cac6b8 Mon Sep 17 00:00:00 2001 From: roib <66280613+roib20@users.noreply.github.com> Date: Tue, 1 Feb 2022 04:37:47 +0200 Subject: [PATCH 2/7] BUG: RecursionError when attempting to replace "np.nan" values under main branch (#45725) --- doc/source/whatsnew/v1.5.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 78bede643f2ac..2eb56037e2b31 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -248,6 +248,7 @@ Conversion - Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` from floating dtype to unsigned integer dtype failing to raise in the presence of negative values (:issue:`45151`) - Bug in :func:`array` with ``FloatingDtype`` and values containing float-castable strings incorrectly raising (:issue:`45424`) - Bug when comparing string and datetime64ns objects causing ``OverflowError`` exception. (:issue:`45506`) +- Bug when attempting to replace ``numpy.nan`` values causing ``RecursionError`` Strings ^^^^^^^ From 53405528052e1650e94b63f9c7d1a0540ec9fc04 Mon Sep 17 00:00:00 2001 From: roib20 <66280613+roib20@users.noreply.github.com> Date: Tue, 1 Feb 2022 02:52:39 +0000 Subject: [PATCH 3/7] Fixes from pre-commit [automated commit] --- pandas/core/internals/blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 87bd80563e89a..5cc1e9926638e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -669,7 +669,7 @@ def replace( # go through replace_list values = self.values - value = self._standardize_fill_value(value) # GH#45725 + value = self._standardize_fill_value(value) # GH#45725 if isinstance(values, Categorical): # TODO: avoid special-casing From 01d0a08672218115e808316431d1ba548489bd6c Mon Sep 17 00:00:00 2001 From: roib <66280613+roib20@users.noreply.github.com> Date: Tue, 1 Feb 2022 06:07:48 +0200 Subject: [PATCH 4/7] BUG: RecursionError when attempting to replace np.nan values (#45725) --- pandas/tests/frame/methods/test_replace.py | 17 +++++++---------- pandas/tests/series/methods/test_replace.py | 18 +++++++----------- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 652f28bac9cb1..9feca1db4a9ea 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -661,16 +661,13 @@ def test_replace_simple_nested_dict_with_nonexistent_value(self): result = df.replace({"col": {-1: "-", 1: "a", 4: "b"}}) tm.assert_frame_equal(expected, result) - def test_replace_numpy_nan(self): - # GH#45725 ensure np.nan can be replaced with pd.NA - df = pd.DataFrame({"A": [np.nan], "B": [np.nan]}, dtype=object) - result = df.replace({np.nan: pd.NA}) - expected = pd.DataFrame({"A": [pd.NA], "B": [pd.NA]}, dtype=object) - tm.assert_frame_equal(result, expected) - - # same thing but with None - result = df.replace({np.nan: None}) - expected = pd.DataFrame({"A": [None], "B": [None]}, dtype=object) + @pytest.mark.parametrize("dtype", [object]) + @pytest.mark.parametrize("to_replace, value", [(np.nan, pd.NA), (np.nan, None)]) + def test_replace_numpy_nan(self, dtype, to_replace, value): + # GH#45725 ensure numpy.nan can be replaced with pandas.NA or None + df = pd.DataFrame({"A": [to_replace]}, dtype=dtype) + result = df.replace({to_replace: value}) + expected = pd.DataFrame({"A": [value]}, dtype=dtype) tm.assert_frame_equal(result, expected) def test_replace_value_is_none(self, datetime_frame): diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 439260c214e66..48dbbc9ce045e 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -36,17 +36,13 @@ def test_replace_explicit_none(self): assert expected.iloc[-1] is None tm.assert_series_equal(result, expected) - def test_replace_numpy_nan(self): - # GH#45725 ensure np.nan can be replaced - ser = pd.Series([np.nan, np.nan], dtype=object) - result = ser.replace({np.nan: pd.NA}) - expected = pd.Series([pd.NA, pd.NA], dtype=object) - tm.assert_series_equal(result, expected) - assert result.dtype == object - - # same thing but with None - result = ser.replace({np.nan: None}) - expected = pd.Series([None, None], dtype=object) + @pytest.mark.parametrize("dtype", [object]) + @pytest.mark.parametrize("to_replace, value", [(np.nan, pd.NA), (np.nan, None)]) + def test_replace_numpy_nan(self, dtype, to_replace, value): + # GH#45725 ensure numpy.nan can be replaced with pandas.NA or None + ser = pd.Series([to_replace], dtype=dtype) + result = ser.replace({to_replace: value}) + expected = pd.Series([value], dtype=dtype) tm.assert_series_equal(result, expected) assert result.dtype == object From a4fc217a3dd855eba7c32c966bfff6e23052b9fd Mon Sep 17 00:00:00 2001 From: roib <66280613+roib20@users.noreply.github.com> Date: Tue, 1 Feb 2022 06:15:42 +0200 Subject: [PATCH 5/7] BUG: RecursionError when attempting to replace np.nan values (#45725) --- pandas/tests/frame/methods/test_replace.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 9feca1db4a9ea..00252077a4222 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -665,9 +665,9 @@ def test_replace_simple_nested_dict_with_nonexistent_value(self): @pytest.mark.parametrize("to_replace, value", [(np.nan, pd.NA), (np.nan, None)]) def test_replace_numpy_nan(self, dtype, to_replace, value): # GH#45725 ensure numpy.nan can be replaced with pandas.NA or None - df = pd.DataFrame({"A": [to_replace]}, dtype=dtype) + df = DataFrame({"A": [to_replace]}, dtype=dtype) result = df.replace({to_replace: value}) - expected = pd.DataFrame({"A": [value]}, dtype=dtype) + expected = DataFrame({"A": [value]}, dtype=dtype) tm.assert_frame_equal(result, expected) def test_replace_value_is_none(self, datetime_frame): From a966f773424fc1360690d661fd37cb0d9e44b737 Mon Sep 17 00:00:00 2001 From: roib <66280613+roib20@users.noreply.github.com> Date: Tue, 1 Feb 2022 06:37:03 +0200 Subject: [PATCH 6/7] BUG: RecursionError when attempting to replace np.nan values (#45725) --- pandas/tests/frame/methods/test_replace.py | 7 +++---- pandas/tests/series/methods/test_replace.py | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 00252077a4222..782731883b928 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -661,13 +661,12 @@ def test_replace_simple_nested_dict_with_nonexistent_value(self): result = df.replace({"col": {-1: "-", 1: "a", 4: "b"}}) tm.assert_frame_equal(expected, result) - @pytest.mark.parametrize("dtype", [object]) @pytest.mark.parametrize("to_replace, value", [(np.nan, pd.NA), (np.nan, None)]) - def test_replace_numpy_nan(self, dtype, to_replace, value): + def test_replace_numpy_nan(self, to_replace, value): # GH#45725 ensure numpy.nan can be replaced with pandas.NA or None - df = DataFrame({"A": [to_replace]}, dtype=dtype) + df = DataFrame({"A": [to_replace]}, dtype=object) result = df.replace({to_replace: value}) - expected = DataFrame({"A": [value]}, dtype=dtype) + expected = DataFrame({"A": [value]}, dtype=object) tm.assert_frame_equal(result, expected) def test_replace_value_is_none(self, datetime_frame): diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 48dbbc9ce045e..57ce7b62a954c 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -36,13 +36,12 @@ def test_replace_explicit_none(self): assert expected.iloc[-1] is None tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("dtype", [object]) @pytest.mark.parametrize("to_replace, value", [(np.nan, pd.NA), (np.nan, None)]) - def test_replace_numpy_nan(self, dtype, to_replace, value): + def test_replace_numpy_nan(self, to_replace, value): # GH#45725 ensure numpy.nan can be replaced with pandas.NA or None - ser = pd.Series([to_replace], dtype=dtype) + ser = pd.Series([to_replace], dtype=object) result = ser.replace({to_replace: value}) - expected = pd.Series([value], dtype=dtype) + expected = pd.Series([value], dtype=object) tm.assert_series_equal(result, expected) assert result.dtype == object From 13687c68b1a9bd2c54380327d91b54151e7df9f3 Mon Sep 17 00:00:00 2001 From: roib <66280613+roib20@users.noreply.github.com> Date: Tue, 1 Feb 2022 18:43:54 +0200 Subject: [PATCH 7/7] BUG: RecursionError when attempting to replace np.nan values (#45725) --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/tests/frame/methods/test_replace.py | 19 ++++++++++++------ pandas/tests/series/methods/test_replace.py | 22 ++++++++++++++------- 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 2eb56037e2b31..063e0c7512f4d 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -193,6 +193,7 @@ Other Deprecations - Deprecated :meth:`DataFrame.iteritems`, :meth:`Series.iteritems`, :meth:`HDFStore.iteritems` in favor of :meth:`DataFrame.items`, :meth:`Series.items`, :meth:`HDFStore.items` (:issue:`45321`) - Deprecated :meth:`Series.is_monotonic` and :meth:`Index.is_monotonic` in favor of :meth:`Series.is_monotonic_increasing` and :meth:`Index.is_monotonic_increasing` (:issue:`45422`, :issue:`21335`) - Deprecated the ``__array_wrap__`` method of DataFrame and Series, rely on standard numpy ufuncs instead (:issue:`45451`) +- Deprecated the behavior of :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype and incompatible fill value; in a future version this will cast to a common dtype (usually object) instead of raising, matching the behavior of other dtypes (:issue:`45746`) - @@ -248,7 +249,6 @@ Conversion - Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` from floating dtype to unsigned integer dtype failing to raise in the presence of negative values (:issue:`45151`) - Bug in :func:`array` with ``FloatingDtype`` and values containing float-castable strings incorrectly raising (:issue:`45424`) - Bug when comparing string and datetime64ns objects causing ``OverflowError`` exception. (:issue:`45506`) -- Bug when attempting to replace ``numpy.nan`` values causing ``RecursionError`` Strings ^^^^^^^ diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 782731883b928..f92c779c85f03 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -661,12 +661,19 @@ def test_replace_simple_nested_dict_with_nonexistent_value(self): result = df.replace({"col": {-1: "-", 1: "a", 4: "b"}}) tm.assert_frame_equal(expected, result) - @pytest.mark.parametrize("to_replace, value", [(np.nan, pd.NA), (np.nan, None)]) - def test_replace_numpy_nan(self, to_replace, value): - # GH#45725 ensure numpy.nan can be replaced with pandas.NA or None - df = DataFrame({"A": [to_replace]}, dtype=object) - result = df.replace({to_replace: value}) - expected = DataFrame({"A": [value]}, dtype=object) + def test_replace_numpy_nan(self, nulls_fixture): + # GH#45725 ensure numpy.nan can be replaced with all other null types + to_replace = np.nan + value = nulls_fixture + dtype = object + df = DataFrame({"A": [to_replace]}, dtype=dtype) + expected = DataFrame({"A": [value]}, dtype=dtype) + + result = df.replace({to_replace: value}).astype(dtype=dtype) + tm.assert_frame_equal(result, expected) + + # same thing but different calling convention + result = df.replace(to_replace, value).astype(dtype=dtype) tm.assert_frame_equal(result, expected) def test_replace_value_is_none(self, datetime_frame): diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 57ce7b62a954c..c852898a217a1 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -36,14 +36,22 @@ def test_replace_explicit_none(self): assert expected.iloc[-1] is None tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("to_replace, value", [(np.nan, pd.NA), (np.nan, None)]) - def test_replace_numpy_nan(self, to_replace, value): - # GH#45725 ensure numpy.nan can be replaced with pandas.NA or None - ser = pd.Series([to_replace], dtype=object) - result = ser.replace({to_replace: value}) - expected = pd.Series([value], dtype=object) + def test_replace_numpy_nan(self, nulls_fixture): + # GH#45725 ensure numpy.nan can be replaced with all other null types + to_replace = np.nan + value = nulls_fixture + dtype = object + ser = pd.Series([to_replace], dtype=dtype) + expected = pd.Series([value], dtype=dtype) + + result = ser.replace({to_replace: value}).astype(dtype=dtype) tm.assert_series_equal(result, expected) - assert result.dtype == object + assert result.dtype == dtype + + # same thing but different calling convention + result = ser.replace(to_replace, value).astype(dtype=dtype) + tm.assert_series_equal(result, expected) + assert result.dtype == dtype def test_replace_noop_doesnt_downcast(self): # GH#44498