From f6a4af6b407704cd56c076f6b0472243c4cebe83 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 28 Mar 2023 10:47:43 +0100 Subject: [PATCH 1/5] DEPR: Deprecate param convert_dtype in Series.Apply --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/_libs/lib.pyx | 18 ++++++------------ pandas/core/series.py | 14 +++++++++++++- pandas/tests/apply/test_series_apply.py | 11 ++++++----- 4 files changed, 26 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 1f8c93978c890..11bf20e5778c0 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -116,6 +116,7 @@ Deprecations - Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`) - Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`) - Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`) +- Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index c6aded1b25281..b91dcb0ae663a 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2797,12 +2797,9 @@ def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=Tr result[i] = val if convert: - return maybe_convert_objects(result, - try_float=False, - convert_datetime=False, - convert_timedelta=False) - - return result + return maybe_convert_objects(result) + else: + return result @cython.boundscheck(False) @@ -2845,12 +2842,9 @@ def map_infer( result[i] = val if convert: - return maybe_convert_objects(result, - try_float=False, - convert_datetime=False, - convert_timedelta=False) - - return result + return maybe_convert_objects(result) + else: + return result def to_object_array(rows: object, min_width: int = 0) -> ndarray: diff --git a/pandas/core/series.py b/pandas/core/series.py index 40fbc9b74b3f2..69006d28ef9b0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4387,7 +4387,7 @@ def transform( def apply( self, func: AggFuncType, - convert_dtype: bool = True, + convert_dtype: bool | lib.NoDefault = lib.no_default, args: tuple[Any, ...] = (), **kwargs, ) -> DataFrame | Series: @@ -4405,6 +4405,10 @@ def apply( Try to find better dtype for elementwise function results. If False, leave as dtype=object. Note that the dtype is always preserved for some extension array dtypes, such as Categorical. + + .. deprecated:: 2.1.0 + The convert_dtype has been deprecated. Do ``ser.astype(object).apply()`` + instead if you want this functionality. args : tuple Positional arguments passed to func after the series value. **kwargs @@ -4494,6 +4498,14 @@ def apply( Helsinki 2.484907 dtype: float64 """ + if convert_dtype is not lib.no_default: + warnings.warn( + "the convert_dtype parameter is deprecated and will be removed in a " + "future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + convert_dtype = True return SeriesApply(self, func, convert_dtype, args, kwargs).apply() def _reduce( diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index bd0167701d08b..733a60ffee1da 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -74,14 +74,15 @@ def f(x): tm.assert_series_equal(result, expected) -def test_apply_dont_convert_dtype(): - s = Series(np.random.randn(10)) +@pytest.mark.parametrize("convert_dtype", [True, False]) +def test_apply_convert_dtype_deprecated(convert_dtype): + ser = Series(np.random.randn(10)) - def f(x): + def func(x): return x if x > 0 else np.nan - result = s.apply(f, convert_dtype=False) - assert result.dtype == object + with tm.assert_produces_warning(FutureWarning): + ser.apply(func, convert_dtype=convert_dtype) def test_apply_args(): From 9806cab2e033994193b16faa946a71b574721aab Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 28 Mar 2023 15:29:15 +0100 Subject: [PATCH 2/5] fix StataReader --- pandas/io/stata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 5b6326685d63e..e1c70277f1496 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1780,7 +1780,7 @@ def read( # Decode strings for col, typ in zip(data, self._typlist): if type(typ) is int: - data[col] = data[col].apply(self._decode, convert_dtype=True) + data[col] = data[col].apply(self._decode) data = self._insert_strls(data) From d9b1fdcbaf3f7d1da631317cdd62b770aa758fc7 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 28 Mar 2023 16:57:41 +0100 Subject: [PATCH 3/5] fix issue --- pandas/core/series.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 69006d28ef9b0..48d69e57a355e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4498,14 +4498,15 @@ def apply( Helsinki 2.484907 dtype: float64 """ - if convert_dtype is not lib.no_default: + if convert_dtype is lib.no_default: + convert_dtype = True + else: warnings.warn( "the convert_dtype parameter is deprecated and will be removed in a " "future version.", FutureWarning, stacklevel=find_stack_level(), ) - convert_dtype = True return SeriesApply(self, func, convert_dtype, args, kwargs).apply() def _reduce( From ea797a2c9fa9823d7788bb1f02e990a9523ab277 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Thu, 30 Mar 2023 06:58:17 +0100 Subject: [PATCH 4/5] Update pandas/core/series.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 48d69e57a355e..239f9762c242f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4408,7 +4408,7 @@ def apply( .. deprecated:: 2.1.0 The convert_dtype has been deprecated. Do ``ser.astype(object).apply()`` - instead if you want this functionality. + instead if you want ``convert_dtype=False``. args : tuple Positional arguments passed to func after the series value. **kwargs From f90bf50a75a3e8842bfd540150cd4f27cf13bf02 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Thu, 30 Mar 2023 07:02:59 +0100 Subject: [PATCH 5/5] explain more in warning --- pandas/core/series.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 239f9762c242f..8ea5285425875 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4503,7 +4503,8 @@ def apply( else: warnings.warn( "the convert_dtype parameter is deprecated and will be removed in a " - "future version.", + "future version. Do ``ser.astype(object).apply()`` " + "instead if you want ``convert_dtype=False``.", FutureWarning, stacklevel=find_stack_level(), )