From 516a3d82804545b7252d120e7802ca70f6240d45 Mon Sep 17 00:00:00 2001 From: Ayla Khan Date: Tue, 11 Feb 2020 23:28:11 -0700 Subject: [PATCH 01/12] API: raise TypeError if to_replace is of invalid type (GH18634). --- pandas/core/generic.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index dfafb1057a543..55f4865b07efe 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6204,6 +6204,9 @@ def bfill( * If `regex` is not a ``bool`` and `to_replace` is not ``None``. TypeError + * If `to_replace` is not any of the expected types + (``str``, `regex`, ``list``, ``dict``, ``Series``, ``int``, + ``float`` or ``None``) * If `to_replace` is a ``dict`` and `value` is not a ``list``, ``dict``, ``ndarray``, or ``Series`` * If `to_replace` is ``None`` and `regex` is not compilable @@ -6407,6 +6410,19 @@ def replace( regex=False, method="pad", ): + if not ( + is_scalar(to_replace) + or isinstance(to_replace, pd.Series) + or is_re_compilable(to_replace) + or is_list_like(to_replace) + or is_dict_like(to_replace) + ): + raise TypeError( + f"Expecting 'to_replace' to be str, regex, list, dict, Series, " + f"int, float or None, got invalid type " + f"{repr(type(to_replace).__name__)}" + ) + inplace = validate_bool_kwarg(inplace, "inplace") if not is_bool(regex) and to_replace is not None: raise AssertionError("'to_replace' must be 'None' if 'regex' is not a bool") From a8a6e91ef6f913be0e14a472a1a0f2694c1e8da2 Mon Sep 17 00:00:00 2001 From: Ayla Khan Date: Tue, 11 Feb 2020 23:57:24 -0700 Subject: [PATCH 02/12] API: Add GH18634 to whatsnew. --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 381578ad13bdd..18d12aed887a6 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -56,6 +56,7 @@ Other API changes - :meth:`Series.describe` will now show distribution percentiles for ``datetime`` dtypes, statistics ``first`` and ``last`` will now be ``min`` and ``max`` to match with numeric dtypes in :meth:`DataFrame.describe` (:issue:`30164`) - :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`) +- :meth:`DataFrame.replace` and :meth:`Series.replace` will raise a ``TypeError`` if ``to_replace`` is not an expected type. Previously the ``replace`` would fail silently (:issue:`18634`) Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 54ee9d918f0224b1a1f7f0354ddc342bed4d7517 Mon Sep 17 00:00:00 2001 From: Ayla Khan Date: Wed, 12 Feb 2020 00:48:22 -0700 Subject: [PATCH 03/12] API: GH18634 tests. --- pandas/tests/frame/methods/test_replace.py | 22 +++++++++++++++++++++ pandas/tests/series/methods/test_replace.py | 16 +++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 92b74c4409d7d..77dff90e5e616 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1363,3 +1363,25 @@ def test_replace_after_convert_dtypes(self): result = df.replace(1, 10) expected = pd.DataFrame({"grp": [10, 2, 3, 4, 5]}, dtype="Int64") tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "df, to_replace", + [ + ( + {'one': ['1', '1 ', '10'], 'two': ['1 ', '20 ', '30 ']}, + lambda x: x.strip() + ), + ( + {'one': ['1', '1 ', '10'], 'two': ['1 ', '20 ', '30 ']}, + type('X', (object,), dict(myattr=1)) + ) + ] + ) + def test_replace_invalid_to_replace(self, df, to_replace): + # GH 18634 + # API: replace() should raise an exception if invalid argument is given + df = pd.DataFrame(df) + msg = r"Expecting 'to_replace' to be str, regex, list, dict, Series, "\ + r"int, float or None, got invalid type.*" + with pytest.raises(TypeError, match=msg): + df.replace(self, to_replace) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 770ad38b0215e..dde30ade0db7e 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -362,3 +362,19 @@ def test_replace_no_cast(self, ser, exp): expected = pd.Series(exp) tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "ser, to_replace", + [ + (['a', 'b', 'c'], lambda x: x.strip()), + (['a', 'b', 'c'], type('X', (object,), dict(myattr=1))) + ] + ) + def test_replace_invalid_to_replace(self, ser, to_replace): + # GH 18634 + # API: replace() should raise an exception if invalid argument is given + series = pd.Series(ser) + msg = r"Expecting 'to_replace' to be str, regex, list, dict, Series, "\ + r"int, float or None, got invalid type.*" + with pytest.raises(TypeError, match=msg): + series.replace(self, to_replace) From 78f2f070c5a58e3ed958c1be938733ab3a4d1214 Mon Sep 17 00:00:00 2001 From: Ayla Khan Date: Wed, 12 Feb 2020 22:06:00 -0700 Subject: [PATCH 04/12] Improve tests. --- pandas/tests/frame/methods/test_replace.py | 24 +++++++++++++-------- pandas/tests/series/methods/test_replace.py | 16 +++++++++----- 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 77dff90e5e616..f83323687de20 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1363,25 +1363,31 @@ def test_replace_after_convert_dtypes(self): result = df.replace(1, 10) expected = pd.DataFrame({"grp": [10, 2, 3, 4, 5]}, dtype="Int64") tm.assert_frame_equal(result, expected) - + @pytest.mark.parametrize( "df, to_replace", [ ( - {'one': ['1', '1 ', '10'], 'two': ['1 ', '20 ', '30 ']}, - lambda x: x.strip() + {"one": ["a", "b ", "c"], "two": ["d ", "e ", "f "]}, + lambda x: x.strip(), + ), + ( + {"one": ["a", "b ", "c"], "two": ["d ", "e ", "f "]}, + type("callable_object", (object,), dict(__call__=lambda x: x.strip())), ), ( - {'one': ['1', '1 ', '10'], 'two': ['1 ', '20 ', '30 ']}, - type('X', (object,), dict(myattr=1)) - ) - ] + {"one": ["a", "b ", "c"], "two": ["d ", "e ", "f "]}, + pd.DataFrame({"one": ["a", "b ", "c"], "two": ["d ", "e ", "f"]}), + ), + ], ) def test_replace_invalid_to_replace(self, df, to_replace): # GH 18634 # API: replace() should raise an exception if invalid argument is given df = pd.DataFrame(df) - msg = r"Expecting 'to_replace' to be str, regex, list, dict, Series, "\ - r"int, float or None, got invalid type.*" + msg = ( + r"Expecting 'to_replace' to be str, regex, list, dict, Series, " + r"int, float or None, got invalid type.*" + ) with pytest.raises(TypeError, match=msg): df.replace(self, to_replace) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index dde30ade0db7e..613a4dc1e8958 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -366,15 +366,21 @@ def test_replace_no_cast(self, ser, exp): @pytest.mark.parametrize( "ser, to_replace", [ - (['a', 'b', 'c'], lambda x: x.strip()), - (['a', 'b', 'c'], type('X', (object,), dict(myattr=1))) - ] + (["a", "b", "c "], lambda x: x.strip()), + ( + ["a", "b", "c "], + type("callable_object", (object,), dict(__call__=lambda x: x.strip())), + ), + (["a", "b", "c "], np.nan), + ], ) def test_replace_invalid_to_replace(self, ser, to_replace): # GH 18634 # API: replace() should raise an exception if invalid argument is given series = pd.Series(ser) - msg = r"Expecting 'to_replace' to be str, regex, list, dict, Series, "\ - r"int, float or None, got invalid type.*" + msg = ( + r"Expecting 'to_replace' to be str, regex, list, dict, Series, " + r"int, float or None, got invalid type.*" + ) with pytest.raises(TypeError, match=msg): series.replace(self, to_replace) From 2309200526b5c09008e92858c5d43d689087be3a Mon Sep 17 00:00:00 2001 From: Ayla Khan Date: Wed, 12 Feb 2020 22:48:27 -0700 Subject: [PATCH 05/12] Fix tests. --- pandas/tests/frame/methods/test_replace.py | 6 +----- pandas/tests/series/methods/test_replace.py | 3 +-- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index f83323687de20..b225e20e42bdc 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1375,10 +1375,6 @@ def test_replace_after_convert_dtypes(self): {"one": ["a", "b ", "c"], "two": ["d ", "e ", "f "]}, type("callable_object", (object,), dict(__call__=lambda x: x.strip())), ), - ( - {"one": ["a", "b ", "c"], "two": ["d ", "e ", "f "]}, - pd.DataFrame({"one": ["a", "b ", "c"], "two": ["d ", "e ", "f"]}), - ), ], ) def test_replace_invalid_to_replace(self, df, to_replace): @@ -1390,4 +1386,4 @@ def test_replace_invalid_to_replace(self, df, to_replace): r"int, float or None, got invalid type.*" ) with pytest.raises(TypeError, match=msg): - df.replace(self, to_replace) + df.replace(to_replace) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 613a4dc1e8958..7112938cc4f43 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -371,7 +371,6 @@ def test_replace_no_cast(self, ser, exp): ["a", "b", "c "], type("callable_object", (object,), dict(__call__=lambda x: x.strip())), ), - (["a", "b", "c "], np.nan), ], ) def test_replace_invalid_to_replace(self, ser, to_replace): @@ -383,4 +382,4 @@ def test_replace_invalid_to_replace(self, ser, to_replace): r"int, float or None, got invalid type.*" ) with pytest.raises(TypeError, match=msg): - series.replace(self, to_replace) + series.replace(to_replace) From dfca1811014b3bfdf5257395e8e5b5366bb352a1 Mon Sep 17 00:00:00 2001 From: Ayla Khan Date: Thu, 13 Feb 2020 12:03:28 -0700 Subject: [PATCH 06/12] API: doc fix (hopefully). --- pandas/core/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 55f4865b07efe..39adc46eef8ba 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6205,8 +6205,8 @@ def bfill( ``None``. TypeError * If `to_replace` is not any of the expected types - (``str``, `regex`, ``list``, ``dict``, ``Series``, ``int``, - ``float`` or ``None``) + (``str``, `regex`, ``list``, ``dict``, ``Series``, ``int``, + ``float`` or ``None``) * If `to_replace` is a ``dict`` and `value` is not a ``list``, ``dict``, ``ndarray``, or ``Series`` * If `to_replace` is ``None`` and `regex` is not compilable From 67e37c185de00cd5234528e26cd8ab7b03a9a662 Mon Sep 17 00:00:00 2001 From: Ayla Khan Date: Thu, 13 Feb 2020 17:25:21 -0700 Subject: [PATCH 07/12] API: another attempt to fix docs. --- pandas/core/generic.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5de80a0d9ad82..4c704e71b4538 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6189,10 +6189,11 @@ def bfill( AssertionError * If `regex` is not a ``bool`` and `to_replace` is not ``None``. + TypeError * If `to_replace` is not any of the expected types - (``str``, `regex`, ``list``, ``dict``, ``Series``, ``int``, - ``float`` or ``None``) + (``str``, `regex`, ``list``, ``dict``, ``Series``, ``int``, + ``float`` or ``None``) * If `to_replace` is a ``dict`` and `value` is not a ``list``, ``dict``, ``ndarray``, or ``Series`` * If `to_replace` is ``None`` and `regex` is not compilable @@ -6201,6 +6202,7 @@ def bfill( * When replacing multiple ``bool`` or ``datetime64`` objects and the arguments to `to_replace` does not match the type of the value being replaced + ValueError * If a ``list`` or an ``ndarray`` is passed to `to_replace` and `value` but they are not the same length. From 7aeffd2d8cd377b56f947d5078fb5c3e0d54882c Mon Sep 17 00:00:00 2001 From: Ayla Khan <1399377+a-y-khan@users.noreply.github.com> Date: Thu, 13 Feb 2020 18:37:30 -0700 Subject: [PATCH 08/12] Update pandas/core/generic.py Co-Authored-By: William Ayd --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4c704e71b4538..a793fd950c261 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6407,7 +6407,7 @@ def replace( ): raise TypeError( f"Expecting 'to_replace' to be str, regex, list, dict, Series, " - f"int, float or None, got invalid type " + "int, float or None, got invalid type " f"{repr(type(to_replace).__name__)}" ) From c3cb90193718281e5c8bde222bf390a58d53390d Mon Sep 17 00:00:00 2001 From: Ayla Khan <1399377+a-y-khan@users.noreply.github.com> Date: Thu, 13 Feb 2020 18:37:41 -0700 Subject: [PATCH 09/12] Update pandas/core/generic.py Co-Authored-By: William Ayd --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a793fd950c261..520953bc57dce 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6406,7 +6406,7 @@ def replace( or is_dict_like(to_replace) ): raise TypeError( - f"Expecting 'to_replace' to be str, regex, list, dict, Series, " + "Expecting 'to_replace' to be str, regex, list, dict, Series, " "int, float or None, got invalid type " f"{repr(type(to_replace).__name__)}" ) From 22c1b6fad5a97cbb7e5be021f4dfdd05b7df6fb9 Mon Sep 17 00:00:00 2001 From: Ayla Khan Date: Thu, 13 Feb 2020 21:15:58 -0700 Subject: [PATCH 10/12] API: applying PR feedback. --- pandas/core/generic.py | 1 - pandas/tests/frame/methods/test_replace.py | 19 +++---------------- pandas/tests/series/methods/test_replace.py | 16 +++------------- 3 files changed, 6 insertions(+), 30 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 520953bc57dce..ff85a922f834e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6403,7 +6403,6 @@ def replace( or isinstance(to_replace, pd.Series) or is_re_compilable(to_replace) or is_list_like(to_replace) - or is_dict_like(to_replace) ): raise TypeError( "Expecting 'to_replace' to be str, regex, list, dict, Series, " diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index b225e20e42bdc..66467288d244f 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1364,26 +1364,13 @@ def test_replace_after_convert_dtypes(self): expected = pd.DataFrame({"grp": [10, 2, 3, 4, 5]}, dtype="Int64") tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize( - "df, to_replace", - [ - ( - {"one": ["a", "b ", "c"], "two": ["d ", "e ", "f "]}, - lambda x: x.strip(), - ), - ( - {"one": ["a", "b ", "c"], "two": ["d ", "e ", "f "]}, - type("callable_object", (object,), dict(__call__=lambda x: x.strip())), - ), - ], - ) - def test_replace_invalid_to_replace(self, df, to_replace): + def test_replace_invalid_to_replace(self): # GH 18634 # API: replace() should raise an exception if invalid argument is given - df = pd.DataFrame(df) + df = pd.DataFrame({"one": ["a", "b ", "c"], "two": ["d ", "e ", "f "]}) msg = ( r"Expecting 'to_replace' to be str, regex, list, dict, Series, " r"int, float or None, got invalid type.*" ) with pytest.raises(TypeError, match=msg): - df.replace(to_replace) + df.replace(lambda x: x.strip()) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 7112938cc4f43..4912d3888eada 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -363,23 +363,13 @@ def test_replace_no_cast(self, ser, exp): tm.assert_series_equal(result, expected) - @pytest.mark.parametrize( - "ser, to_replace", - [ - (["a", "b", "c "], lambda x: x.strip()), - ( - ["a", "b", "c "], - type("callable_object", (object,), dict(__call__=lambda x: x.strip())), - ), - ], - ) - def test_replace_invalid_to_replace(self, ser, to_replace): + def test_replace_invalid_to_replace(self): # GH 18634 # API: replace() should raise an exception if invalid argument is given - series = pd.Series(ser) + series = pd.Series(["a", "b", "c "]) msg = ( r"Expecting 'to_replace' to be str, regex, list, dict, Series, " r"int, float or None, got invalid type.*" ) with pytest.raises(TypeError, match=msg): - series.replace(to_replace) + series.replace(lambda x: x.strip()) From 21751c98d2b3eb8bb430806e7f97db3d6088ad73 Mon Sep 17 00:00:00 2001 From: Ayla Khan Date: Thu, 27 Feb 2020 05:12:53 +0000 Subject: [PATCH 11/12] API: applying additional PR feedback. --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/core/generic.py | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 90639e9e6723c..39a9946442ce7 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -57,7 +57,6 @@ Other API changes will now be ``min`` and ``max`` to match with numeric dtypes in :meth:`DataFrame.describe` (:issue:`30164`) - Added :meth:`DataFrame.value_counts` (:issue:`5377`) - :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`) -- :meth:`DataFrame.replace` and :meth:`Series.replace` will raise a ``TypeError`` if ``to_replace`` is not an expected type. Previously the ``replace`` would fail silently (:issue:`18634`) - ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`) - @@ -231,6 +230,7 @@ Reshaping - Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`) - :meth:`DataFrame.pivot` can now take lists for ``index`` and ``columns`` arguments (:issue:`21425`) - Bug in :func:`concat` where the resulting indices are not copied when ``copy=True`` (:issue:`29879`) +- :meth:`DataFrame.replace` and :meth:`Series.replace` will raise a ``TypeError`` if ``to_replace`` is not an expected type. Previously the ``replace`` would fail silently (:issue:`18634`) Sparse diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9dd844bf7e1b0..bcf0d94e6faaf 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6174,9 +6174,7 @@ def bfill( ``None``. TypeError - * If `to_replace` is not any of the expected types - (``str``, `regex`, ``list``, ``dict``, ``Series``, ``int``, - ``float`` or ``None``) + * If `to_replace` is not a scalar, array-like, ``dict``, or ``None`` * If `to_replace` is a ``dict`` and `value` is not a ``list``, ``dict``, ``ndarray``, or ``Series`` * If `to_replace` is ``None`` and `regex` is not compilable @@ -6388,8 +6386,8 @@ def replace( or is_list_like(to_replace) ): raise TypeError( - "Expecting 'to_replace' to be str, regex, list, dict, Series, " - "int, float or None, got invalid type " + "Expecting 'to_replace' to be either a scalar, array-like, " + "dict or None, got invalid type " f"{repr(type(to_replace).__name__)}" ) From d208a623af35a03c68f471f05f0ba4c08713378e Mon Sep 17 00:00:00 2001 From: Ayla Khan Date: Thu, 27 Feb 2020 05:44:54 +0000 Subject: [PATCH 12/12] API: test corrections. --- pandas/tests/frame/methods/test_replace.py | 4 ++-- pandas/tests/series/methods/test_replace.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 66467288d244f..ee89562261b19 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1369,8 +1369,8 @@ def test_replace_invalid_to_replace(self): # API: replace() should raise an exception if invalid argument is given df = pd.DataFrame({"one": ["a", "b ", "c"], "two": ["d ", "e ", "f "]}) msg = ( - r"Expecting 'to_replace' to be str, regex, list, dict, Series, " - r"int, float or None, got invalid type.*" + r"Expecting 'to_replace' to be either a scalar, array-like, " + r"dict or None, got invalid type.*" ) with pytest.raises(TypeError, match=msg): df.replace(lambda x: x.strip()) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 4912d3888eada..26eaf53616282 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -368,8 +368,8 @@ def test_replace_invalid_to_replace(self): # API: replace() should raise an exception if invalid argument is given series = pd.Series(["a", "b", "c "]) msg = ( - r"Expecting 'to_replace' to be str, regex, list, dict, Series, " - r"int, float or None, got invalid type.*" + r"Expecting 'to_replace' to be either a scalar, array-like, " + r"dict or None, got invalid type.*" ) with pytest.raises(TypeError, match=msg): series.replace(lambda x: x.strip())