diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 128fd68674f96..9e3a2b608855a 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -150,6 +150,7 @@ Other enhancements - Added ``validate`` argument to :meth:`DataFrame.join` (:issue:`46622`) - A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`) - Added ``numeric_only`` argument to :meth:`Resampler.sum`, :meth:`Resampler.prod`, :meth:`Resampler.min`, :meth:`Resampler.max`, :meth:`Resampler.first`, and :meth:`Resampler.last` (:issue:`46442`) +- ``times`` argument in :class:`.ExponentialMovingWindow` now accepts ``np.timedelta64`` (:issue:`47003`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: @@ -493,7 +494,8 @@ retained by specifying ``group_keys=False``. ``numeric_only`` default value ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Across the DataFrame operations such as ``min``, ``sum``, and ``idxmax``, the default +Across the DataFrame and DataFrameGroupBy operations such as +``min``, ``sum``, and ``idxmax``, the default value of the ``numeric_only`` argument, if it exists at all, was inconsistent. Furthermore, operations with the default value ``None`` can lead to surprising results. (:issue:`46560`) @@ -523,6 +525,8 @@ gained the ``numeric_only`` argument. - :meth:`DataFrame.cov` - :meth:`DataFrame.idxmin` - :meth:`DataFrame.idxmax` +- :meth:`.DataFrameGroupBy.cummin` +- :meth:`.DataFrameGroupBy.cummax` - :meth:`.DataFrameGroupBy.idxmin` - :meth:`.DataFrameGroupBy.idxmax` - :meth:`.GroupBy.var` diff --git a/pandas/_libs/src/ujson/lib/ultrajson.h b/pandas/_libs/src/ujson/lib/ultrajson.h index 5b5995a671b2c..71df0c5a186b7 100644 --- a/pandas/_libs/src/ujson/lib/ultrajson.h +++ b/pandas/_libs/src/ujson/lib/ultrajson.h @@ -29,7 +29,7 @@ Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) https://github.com/client9/stringencoders Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. -Numeric decoder derived from from TCL library +Numeric decoder derived from TCL library https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms * Copyright (c) 1988-1993 The Regents of the University of California. * Copyright (c) 1994 Sun Microsystems, Inc. diff --git a/pandas/_libs/src/ujson/lib/ultrajsondec.c b/pandas/_libs/src/ujson/lib/ultrajsondec.c index fee552672b8b6..c7779b8b428ae 100644 --- a/pandas/_libs/src/ujson/lib/ultrajsondec.c +++ b/pandas/_libs/src/ujson/lib/ultrajsondec.c @@ -32,7 +32,7 @@ Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. -Numeric decoder derived from from TCL library +Numeric decoder derived from TCL library https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms * Copyright (c) 1988-1993 The Regents of the University of California. * Copyright (c) 1994 Sun Microsystems, Inc. diff --git a/pandas/_libs/src/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/ujson/lib/ultrajsonenc.c index 4469631b7b3f7..5d90710441a94 100644 --- a/pandas/_libs/src/ujson/lib/ultrajsonenc.c +++ b/pandas/_libs/src/ujson/lib/ultrajsonenc.c @@ -32,7 +32,7 @@ Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. -Numeric decoder derived from from TCL library +Numeric decoder derived from TCL library https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms * Copyright (c) 1988-1993 The Regents of the University of California. * Copyright (c) 1994 Sun Microsystems, Inc. diff --git a/pandas/_libs/src/ujson/python/JSONtoObj.c b/pandas/_libs/src/ujson/python/JSONtoObj.c index 14683f4c28cbe..c58f25b8f99ea 100644 --- a/pandas/_libs/src/ujson/python/JSONtoObj.c +++ b/pandas/_libs/src/ujson/python/JSONtoObj.c @@ -29,7 +29,7 @@ Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) https://github.com/client9/stringencoders Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. -Numeric decoder derived from from TCL library +Numeric decoder derived from TCL library https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms * Copyright (c) 1988-1993 The Regents of the University of California. * Copyright (c) 1994 Sun Microsystems, Inc. diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 7de47749e500c..73d2a1f786f8b 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -30,7 +30,7 @@ Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. -Numeric decoder derived from from TCL library +Numeric decoder derived from TCL library https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms * Copyright (c) 1988-1993 The Regents of the University of California. * Copyright (c) 1994 Sun Microsystems, Inc. diff --git a/pandas/_libs/src/ujson/python/ujson.c b/pandas/_libs/src/ujson/python/ujson.c index a8fdb4f55bfca..def06cdf2db84 100644 --- a/pandas/_libs/src/ujson/python/ujson.c +++ b/pandas/_libs/src/ujson/python/ujson.c @@ -29,7 +29,7 @@ Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) https://github.com/client9/stringencoders Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. -Numeric decoder derived from from TCL library +Numeric decoder derived from TCL library https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms * Copyright (c) 1988-1993 The Regents of the University of California. * Copyright (c) 1994 Sun Microsystems, Inc. diff --git a/pandas/_libs/src/ujson/python/version.h b/pandas/_libs/src/ujson/python/version.h index 3f38642b6df87..15c55309d6270 100644 --- a/pandas/_libs/src/ujson/python/version.h +++ b/pandas/_libs/src/ujson/python/version.h @@ -29,7 +29,7 @@ Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) https://github.com/client9/stringencoders Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. -Numeric decoder derived from from TCL library +Numeric decoder derived from TCL library https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms * Copyright (c) 1988-1993 The Regents of the University of California. * Copyright (c) 1994 Sun Microsystems, Inc. diff --git a/pandas/_libs/tslibs/fields.pyi b/pandas/_libs/tslibs/fields.pyi index e404eadf13657..b1d9e0342f81e 100644 --- a/pandas/_libs/tslibs/fields.pyi +++ b/pandas/_libs/tslibs/fields.pyi @@ -22,6 +22,7 @@ def get_start_end_field( def get_date_field( dtindex: npt.NDArray[np.int64], # const int64_t[:] field: str, + reso: int = ..., # NPY_DATETIMEUNIT ) -> npt.NDArray[np.int32]: ... def get_timedelta_field( tdindex: npt.NDArray[np.int64], # const int64_t[:] @@ -32,6 +33,7 @@ def isleapyear_arr( ) -> npt.NDArray[np.bool_]: ... def build_isocalendar_sarray( dtindex: npt.NDArray[np.int64], # const int64_t[:] + reso: int = ..., # NPY_DATETIMEUNIT ) -> np.ndarray: ... def _get_locale_names(name_type: str, locale: str | None = ...): ... diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 57d4c27b3337d..5865b8c6877b0 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -329,7 +329,7 @@ def get_start_end_field( @cython.wraparound(False) @cython.boundscheck(False) -def get_date_field(const int64_t[:] dtindex, str field): +def get_date_field(const int64_t[:] dtindex, str field, NPY_DATETIMEUNIT reso=NPY_FR_ns): """ Given a int64-based datetime index, extract the year, month, etc., field and return an array of these values. @@ -348,7 +348,7 @@ def get_date_field(const int64_t[:] dtindex, str field): out[i] = -1 continue - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) out[i] = dts.year return out @@ -359,7 +359,7 @@ def get_date_field(const int64_t[:] dtindex, str field): out[i] = -1 continue - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) out[i] = dts.month return out @@ -370,7 +370,7 @@ def get_date_field(const int64_t[:] dtindex, str field): out[i] = -1 continue - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) out[i] = dts.day return out @@ -381,8 +381,9 @@ def get_date_field(const int64_t[:] dtindex, str field): out[i] = -1 continue - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) out[i] = dts.hour + # TODO: can we de-dup with period.pyx s? return out elif field == 'm': @@ -392,7 +393,7 @@ def get_date_field(const int64_t[:] dtindex, str field): out[i] = -1 continue - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) out[i] = dts.min return out @@ -403,7 +404,7 @@ def get_date_field(const int64_t[:] dtindex, str field): out[i] = -1 continue - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) out[i] = dts.sec return out @@ -414,7 +415,7 @@ def get_date_field(const int64_t[:] dtindex, str field): out[i] = -1 continue - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) out[i] = dts.us return out @@ -425,7 +426,7 @@ def get_date_field(const int64_t[:] dtindex, str field): out[i] = -1 continue - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) out[i] = dts.ps // 1000 return out elif field == 'doy': @@ -435,7 +436,7 @@ def get_date_field(const int64_t[:] dtindex, str field): out[i] = -1 continue - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) out[i] = get_day_of_year(dts.year, dts.month, dts.day) return out @@ -446,7 +447,7 @@ def get_date_field(const int64_t[:] dtindex, str field): out[i] = -1 continue - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) out[i] = dayofweek(dts.year, dts.month, dts.day) return out @@ -457,7 +458,7 @@ def get_date_field(const int64_t[:] dtindex, str field): out[i] = -1 continue - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) out[i] = get_week_of_year(dts.year, dts.month, dts.day) return out @@ -468,7 +469,7 @@ def get_date_field(const int64_t[:] dtindex, str field): out[i] = -1 continue - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) out[i] = dts.month out[i] = ((out[i] - 1) // 3) + 1 return out @@ -480,11 +481,11 @@ def get_date_field(const int64_t[:] dtindex, str field): out[i] = -1 continue - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) out[i] = get_days_in_month(dts.year, dts.month) return out elif field == 'is_leap_year': - return isleapyear_arr(get_date_field(dtindex, 'Y')) + return isleapyear_arr(get_date_field(dtindex, 'Y', reso=reso)) raise ValueError(f"Field {field} not supported") @@ -564,7 +565,7 @@ cpdef isleapyear_arr(ndarray years): @cython.wraparound(False) @cython.boundscheck(False) -def build_isocalendar_sarray(const int64_t[:] dtindex): +def build_isocalendar_sarray(const int64_t[:] dtindex, NPY_DATETIMEUNIT reso=NPY_FR_ns): """ Given a int64-based datetime array, return the ISO 8601 year, week, and day as a structured array. @@ -592,7 +593,7 @@ def build_isocalendar_sarray(const int64_t[:] dtindex): if dtindex[i] == NPY_NAT: ret_val = 0, 0, 0 else: - dt64_to_dtstruct(dtindex[i], &dts) + pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) ret_val = get_iso_calendar(dts.year, dts.month, dts.day) iso_years[i] = ret_val[0] diff --git a/pandas/core/array_algos/putmask.py b/pandas/core/array_algos/putmask.py index d2e6b6e935ed5..84160344437b5 100644 --- a/pandas/core/array_algos/putmask.py +++ b/pandas/core/array_algos/putmask.py @@ -1,5 +1,5 @@ """ -EA-compatible analogue to to np.putmask +EA-compatible analogue to np.putmask """ from __future__ import annotations diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 6f984727f4f6d..dadfad394b903 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -136,7 +136,7 @@ def f(self): values, field, self.freqstr, month_kw, reso=self._reso ) else: - result = fields.get_date_field(values, field) + result = fields.get_date_field(values, field, reso=self._reso) # these return a boolean by-definition return result @@ -146,7 +146,7 @@ def f(self): result = self._maybe_mask_results(result, fill_value=None) else: - result = fields.get_date_field(values, field) + result = fields.get_date_field(values, field, reso=self._reso) result = self._maybe_mask_results( result, fill_value=None, convert="float64" ) @@ -1403,7 +1403,7 @@ def isocalendar(self) -> DataFrame: from pandas import DataFrame values = self._local_timestamps() - sarray = fields.build_isocalendar_sarray(values) + sarray = fields.build_isocalendar_sarray(values, reso=self._reso) iso_calendar_df = DataFrame( sarray, columns=["year", "week", "day"], dtype="UInt32" ) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index f725ae061cedb..2acf5c826eb57 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -28,6 +28,7 @@ from pandas._libs import ( Interval, + lib, reduction as libreduction, ) from pandas._typing import ( @@ -1128,10 +1129,15 @@ def _wrap_applied_output_series( return self._reindex_output(result) def _cython_transform( - self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs + self, + how: str, + numeric_only: bool | lib.NoDefault = lib.no_default, + axis: int = 0, + **kwargs, ) -> DataFrame: assert axis == 0 # handled by caller # TODO: no tests with self.ndim == 1 for DataFrameGroupBy + numeric_only_bool = self._resolve_numeric_only(numeric_only, axis) # With self.axis == 0, we have multi-block tests # e.g. test_rank_min_int, test_cython_transform_frame @@ -1139,7 +1145,8 @@ def _cython_transform( # With self.axis == 1, _get_data_to_aggregate does a transpose # so we always have a single block. mgr: Manager2D = self._get_data_to_aggregate() - if numeric_only: + orig_mgr_len = len(mgr) + if numeric_only_bool: mgr = mgr.get_numeric_data(copy=False) def arr_func(bvalues: ArrayLike) -> ArrayLike: @@ -1152,8 +1159,8 @@ def arr_func(bvalues: ArrayLike) -> ArrayLike: res_mgr = mgr.grouped_reduce(arr_func, ignore_failures=True) res_mgr.set_axis(1, mgr.axes[1]) - if len(res_mgr) < len(mgr): - warn_dropping_nuisance_columns_deprecated(type(self), how) + if len(res_mgr) < orig_mgr_len: + warn_dropping_nuisance_columns_deprecated(type(self), how, numeric_only) res_df = self.obj._constructor(res_mgr) if self.axis == 1: @@ -1269,7 +1276,9 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame: output[i] = sgb.transform(wrapper) except TypeError: # e.g. trying to call nanmean with string values - warn_dropping_nuisance_columns_deprecated(type(self), "transform") + warn_dropping_nuisance_columns_deprecated( + type(self), "transform", numeric_only=False + ) else: inds.append(i) @@ -1559,19 +1568,27 @@ def nunique(self, dropna: bool = True) -> DataFrame: _shared_docs["idxmax"], numeric_only_default="True for axis=0, False for axis=1", ) - def idxmax(self, axis=0, skipna: bool = True, numeric_only: bool | None = None): + def idxmax( + self, + axis=0, + skipna: bool = True, + numeric_only: bool | lib.NoDefault = lib.no_default, + ): axis = DataFrame._get_axis_number(axis) - if numeric_only is None: - numeric_only = None if axis == 0 else False + if numeric_only is lib.no_default: + # Cannot use self._resolve_numeric_only; we must pass None to + # DataFrame.idxmax for backwards compatibility + numeric_only_arg = None if axis == 0 else False + else: + numeric_only_arg = cast(bool, numeric_only) def func(df): - # NB: here we use numeric_only=None, in DataFrame it is False GH#38217 res = df._reduce( nanops.nanargmax, "argmax", axis=axis, skipna=skipna, - numeric_only=numeric_only, + numeric_only=numeric_only_arg, ) indices = res._values index = df._get_axis(axis) @@ -1579,25 +1596,35 @@ def func(df): return df._constructor_sliced(result, index=res.index) func.__name__ = "idxmax" - return self._python_apply_general(func, self._obj_with_exclusions) + result = self._python_apply_general(func, self._obj_with_exclusions) + self._maybe_warn_numeric_only_depr("idxmax", result, numeric_only) + return result @doc( _shared_docs["idxmin"], numeric_only_default="True for axis=0, False for axis=1", ) - def idxmin(self, axis=0, skipna: bool = True, numeric_only: bool | None = None): + def idxmin( + self, + axis=0, + skipna: bool = True, + numeric_only: bool | lib.NoDefault = lib.no_default, + ): axis = DataFrame._get_axis_number(axis) - if numeric_only is None: - numeric_only = None if axis == 0 else False + if numeric_only is lib.no_default: + # Cannot use self._resolve_numeric_only; we must pass None to + # DataFrame.idxmin for backwards compatibility + numeric_only_arg = None if axis == 0 else False + else: + numeric_only_arg = cast(bool, numeric_only) def func(df): - # NB: here we use numeric_only=None, in DataFrame it is False GH#46560 res = df._reduce( nanops.nanargmin, "argmin", axis=axis, skipna=skipna, - numeric_only=numeric_only, + numeric_only=numeric_only_arg, ) indices = res._values index = df._get_axis(axis) @@ -1605,7 +1632,9 @@ def func(df): return df._constructor_sliced(result, index=res.index) func.__name__ = "idxmin" - return self._python_apply_general(func, self._obj_with_exclusions) + result = self._python_apply_general(func, self._obj_with_exclusions) + self._maybe_warn_numeric_only_depr("idxmin", result, numeric_only) + return result boxplot = boxplot_frame_groupby diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 70f8e0a752dcb..0203d54e0de86 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -939,8 +939,15 @@ def wrapper(*args, **kwargs): if kwargs.get("axis", None) is None: kwargs["axis"] = self.axis + numeric_only = kwargs.get("numeric_only", lib.no_default) + def curried(x): - return f(x, *args, **kwargs) + with warnings.catch_warnings(): + # Catch any warnings from dispatch to DataFrame; we'll emit + # a warning for groupby below + match = "The default value of numeric_only " + warnings.filterwarnings("ignore", match, FutureWarning) + return f(x, *args, **kwargs) # preserve the name so we can detect it when calling plot methods, # to avoid duplicates @@ -956,6 +963,13 @@ def curried(x): curried, self._obj_with_exclusions, is_transform=is_transform ) + if self._selected_obj.ndim != 1 and self.axis != 1: + missing = self._obj_with_exclusions.columns.difference(result.columns) + if len(missing) > 0: + warn_dropping_nuisance_columns_deprecated( + type(self), name, numeric_only + ) + if self.grouper.has_dropped_na and is_transform: # result will have dropped rows due to nans, fill with null # and ensure index is ordered same as the input @@ -1223,7 +1237,9 @@ def _wrap_applied_output( ): raise AbstractMethodError(self) - def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool: + def _resolve_numeric_only( + self, numeric_only: bool | lib.NoDefault, axis: int + ) -> bool: """ Determine subclass-specific default value for 'numeric_only'. @@ -1233,6 +1249,8 @@ def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool: Parameters ---------- numeric_only : bool or lib.no_default + axis : int + Axis passed to the groupby op (not self.axis). Returns ------- @@ -1243,7 +1261,7 @@ def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool: # i.e. not explicitly passed by user if self.obj.ndim == 2: # i.e. DataFrameGroupBy - numeric_only = True + numeric_only = axis != 1 # GH#42395 GH#43108 GH#43154 # Regression from 1.2.5 to 1.3 caused object columns to be dropped if self.axis: @@ -1253,7 +1271,6 @@ def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool: check = obj._get_numeric_data() if len(obj.columns) and not len(check.columns) and not obj.empty: numeric_only = False - # TODO: v1.4+ Add FutureWarning else: numeric_only = False @@ -1262,6 +1279,27 @@ def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool: # expected "bool") return numeric_only # type: ignore[return-value] + def _maybe_warn_numeric_only_depr( + self, how: str, result: DataFrame | Series, numeric_only: bool | lib.NoDefault + ) -> None: + """Emit warning on numeric_only behavior deprecation when appropriate. + + Parameters + ---------- + how : str + Groupby kernel name. + result : + Result of the groupby operation. + numeric_only : bool or lib.no_default + Argument as passed by user. + """ + if ( + self._obj_with_exclusions.ndim != 1 + and result.ndim > 1 + and len(result.columns) < len(self._obj_with_exclusions.columns) + ): + warn_dropping_nuisance_columns_deprecated(type(self), how, numeric_only) + # ----------------------------------------------------------------- # numba @@ -1522,7 +1560,9 @@ def _python_agg_general(self, func, *args, raise_on_typeerror=False, **kwargs): except TypeError: if raise_on_typeerror: raise - warn_dropping_nuisance_columns_deprecated(type(self), "agg") + warn_dropping_nuisance_columns_deprecated( + type(self), "agg", numeric_only=False + ) continue key = base.OutputKey(label=name, position=idx) @@ -1536,7 +1576,7 @@ def _python_agg_general(self, func, *args, raise_on_typeerror=False, **kwargs): @final def _agg_general( self, - numeric_only: bool = True, + numeric_only: bool | lib.NoDefault = True, min_count: int = -1, *, alias: str, @@ -1598,17 +1638,19 @@ def _cython_agg_general( self, how: str, alt: Callable, - numeric_only: bool, + numeric_only: bool | lib.NoDefault, min_count: int = -1, ignore_failures: bool = True, ): # Note: we never get here with how="ohlc" for DataFrameGroupBy; # that goes through SeriesGroupBy + numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0) data = self._get_data_to_aggregate() is_ser = data.ndim == 1 - if numeric_only: + orig_len = len(data) + if numeric_only_bool: if is_ser and not is_numeric_dtype(self._selected_obj.dtype): # GH#41291 match Series behavior kwd_name = "numeric_only" @@ -1638,8 +1680,8 @@ def array_func(values: ArrayLike) -> ArrayLike: # continue and exclude the block new_mgr = data.grouped_reduce(array_func, ignore_failures=ignore_failures) - if not is_ser and len(new_mgr) < len(data): - warn_dropping_nuisance_columns_deprecated(type(self), how) + if not is_ser and len(new_mgr) < orig_len: + warn_dropping_nuisance_columns_deprecated(type(self), how, numeric_only) res = self._wrap_agged_manager(new_mgr) if is_ser: @@ -1997,7 +2039,7 @@ def mean( 2 4.0 Name: B, dtype: float64 """ - numeric_only_bool = self._resolve_numeric_only(numeric_only) + numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0) if maybe_use_numba(engine): from pandas.core._numba.kernels import sliding_mean @@ -2007,7 +2049,7 @@ def mean( result = self._cython_agg_general( "mean", alt=lambda x: Series(x).mean(numeric_only=numeric_only_bool), - numeric_only=numeric_only_bool, + numeric_only=numeric_only, ) return result.__finalize__(self.obj, method="groupby") @@ -2031,12 +2073,12 @@ def median(self, numeric_only: bool | lib.NoDefault = lib.no_default): Series or DataFrame Median of values within each group. """ - numeric_only_bool = self._resolve_numeric_only(numeric_only) + numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0) result = self._cython_agg_general( "median", alt=lambda x: Series(x).median(numeric_only=numeric_only_bool), - numeric_only=numeric_only_bool, + numeric_only=numeric_only, ) return result.__finalize__(self.obj, method="groupby") @@ -2092,7 +2134,7 @@ def std( return np.sqrt(self._numba_agg_general(sliding_var, engine_kwargs, ddof)) else: - return self._get_cythonized_result( + result = self._get_cythonized_result( libgroupby.group_var, cython_dtype=np.dtype(np.float64), numeric_only=numeric_only, @@ -2100,6 +2142,8 @@ def std( post_processing=lambda vals, inference: np.sqrt(vals), ddof=ddof, ) + self._maybe_warn_numeric_only_depr("std", result, numeric_only) + return result @final @Substitution(name="groupby") @@ -2153,12 +2197,12 @@ def var( return self._numba_agg_general(sliding_var, engine_kwargs, ddof) else: - numeric_only_bool = self._resolve_numeric_only(numeric_only) + numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0) if ddof == 1: return self._cython_agg_general( "var", alt=lambda x: Series(x).var(ddof=ddof), - numeric_only=numeric_only_bool, + numeric_only=numeric_only, ignore_failures=numeric_only is lib.no_default, ) else: @@ -2193,6 +2237,8 @@ def sem(self, ddof: int = 1, numeric_only: bool | lib.NoDefault = lib.no_default Standard error of the mean of values within each group. """ result = self.std(ddof=ddof, numeric_only=numeric_only) + self._maybe_warn_numeric_only_depr("sem", result, numeric_only) + if result.ndim == 1: result /= np.sqrt(self.count()) else: @@ -2253,8 +2299,6 @@ def sum( engine_kwargs, ) else: - numeric_only = self._resolve_numeric_only(numeric_only) - # If we are grouping on categoricals we want unobserved categories to # return zero, rather than the default of NaN which the reindexing in # _agg_general() returns. GH #31422 @@ -2273,8 +2317,6 @@ def sum( def prod( self, numeric_only: bool | lib.NoDefault = lib.no_default, min_count: int = 0 ): - numeric_only = self._resolve_numeric_only(numeric_only) - return self._agg_general( numeric_only=numeric_only, min_count=min_count, alias="prod", npfunc=np.prod ) @@ -3050,7 +3092,7 @@ def quantile( a 2.0 b 3.0 """ - numeric_only_bool = self._resolve_numeric_only(numeric_only) + numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0) def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, np.dtype | None]: if is_object_dtype(vals): @@ -3153,7 +3195,9 @@ def blk_func(values: ArrayLike) -> ArrayLike: and not is_ser and len(res_mgr.items) != len(mgr.items) ): - warn_dropping_nuisance_columns_deprecated(type(self), "quantile") + warn_dropping_nuisance_columns_deprecated( + type(self), "quantile", numeric_only + ) if len(res_mgr.items) == 0: # re-call grouped_reduce to get the desired exception message @@ -3447,7 +3491,7 @@ def cumsum(self, axis=0, *args, **kwargs): @final @Substitution(name="groupby") @Appender(_common_see_also) - def cummin(self, axis=0, **kwargs): + def cummin(self, axis=0, numeric_only=False, **kwargs): """ Cumulative min for each group. @@ -3460,12 +3504,14 @@ def cummin(self, axis=0, **kwargs): f = lambda x: np.minimum.accumulate(x, axis) return self._python_apply_general(f, self._selected_obj, is_transform=True) - return self._cython_transform("cummin", numeric_only=False, skipna=skipna) + return self._cython_transform( + "cummin", numeric_only=numeric_only, skipna=skipna + ) @final @Substitution(name="groupby") @Appender(_common_see_also) - def cummax(self, axis=0, **kwargs): + def cummax(self, axis=0, numeric_only=False, **kwargs): """ Cumulative max for each group. @@ -3478,7 +3524,9 @@ def cummax(self, axis=0, **kwargs): f = lambda x: np.maximum.accumulate(x, axis) return self._python_apply_general(f, self._selected_obj, is_transform=True) - return self._cython_transform("cummax", numeric_only=False, skipna=skipna) + return self._cython_transform( + "cummax", numeric_only=numeric_only, skipna=skipna + ) @final def _get_cythonized_result( @@ -3532,7 +3580,7 @@ def _get_cythonized_result( ------- `Series` or `DataFrame` with filled values """ - numeric_only = self._resolve_numeric_only(numeric_only) + numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0) if post_processing and not callable(post_processing): raise ValueError("'post_processing' must be a callable!") @@ -3601,15 +3649,16 @@ def blk_func(values: ArrayLike) -> ArrayLike: # Operate block-wise instead of column-by-column is_ser = obj.ndim == 1 mgr = self._get_data_to_aggregate() + orig_mgr_len = len(mgr) - if numeric_only: + if numeric_only_bool: mgr = mgr.get_numeric_data() res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=True) - if not is_ser and len(res_mgr.items) != len(mgr.items): + if not is_ser and len(res_mgr.items) != orig_mgr_len: howstr = how.replace("group_", "") - warn_dropping_nuisance_columns_deprecated(type(self), howstr) + warn_dropping_nuisance_columns_deprecated(type(self), howstr, numeric_only) if len(res_mgr.items) == 0: # We re-call grouped_reduce to get the right exception message @@ -4155,13 +4204,27 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde return mi -def warn_dropping_nuisance_columns_deprecated(cls, how: str) -> None: - warnings.warn( - "Dropping invalid columns in " - f"{cls.__name__}.{how} is deprecated. " - "In a future version, a TypeError will be raised. " - f"Before calling .{how}, select only columns which " - "should be valid for the function.", - FutureWarning, - stacklevel=find_stack_level(), - ) +def warn_dropping_nuisance_columns_deprecated(cls, how: str, numeric_only) -> None: + if how == "add": + how = "sum" + if numeric_only is not lib.no_default and not numeric_only: + # numeric_only was specified and falsey but still dropped nuisance columns + warnings.warn( + "Dropping invalid columns in " + f"{cls.__name__}.{how} is deprecated. " + "In a future version, a TypeError will be raised. " + f"Before calling .{how}, select only columns which " + "should be valid for the function.", + FutureWarning, + stacklevel=find_stack_level(), + ) + elif numeric_only is lib.no_default: + warnings.warn( + "The default value of numeric_only in " + f"{cls.__name__}.{how} is deprecated. " + "In a future version, numeric_only will default to False. " + f"Either specify numeric_only or select only columns which " + "should be valid for the function.", + FutureWarning, + stacklevel=find_stack_level(), + ) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8ebaaa28e13a5..c4a3afbb282cf 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4205,9 +4205,14 @@ def is_int(v): return v is None or is_integer(v) is_index_slice = is_int(start) and is_int(stop) and is_int(step) - is_positional = is_index_slice and not ( - self.is_integer() or self.is_categorical() + + # special case for interval_dtype bc we do not do partial-indexing + # on integer Intervals when slicing + # TODO: write this in terms of e.g. should_partial_index? + ints_are_positional = self._should_fallback_to_positional or is_interval_dtype( + self.dtype ) + is_positional = is_index_slice and ints_are_positional if kind == "getitem": """ diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 25b7a5c3d3689..811dc72e9b908 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -210,8 +210,12 @@ def _summary(self, name=None) -> str: # -------------------------------------------------------------------- # Indexing Methods + @final def _can_partial_date_slice(self, reso: Resolution) -> bool: - raise NotImplementedError + # e.g. test_getitem_setitem_periodindex + # History of conversation GH#3452, GH#3931, GH#2369, GH#14826 + return reso > self._resolution_obj + # NB: for DTI/PI, not TDI def _parsed_string_to_bounds(self, reso: Resolution, parsed): raise NotImplementedError diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 3954cb28c2aca..5274f68eb3171 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -593,10 +593,6 @@ def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime): end = self._maybe_cast_for_get_loc(end) return start, end - def _can_partial_date_slice(self, reso: Resolution) -> bool: - # History of conversation GH#3452, GH#3931, GH#2369, GH#14826 - return reso > self._resolution_obj - def _deprecate_mismatched_indexing(self, key) -> None: # GH#36148 # we get here with isinstance(key, self._data._recognized_scalars) @@ -651,12 +647,8 @@ def get_loc(self, key, method=None, tolerance=None): except KeyError as err: if method is None: raise KeyError(key) from err - try: - key = self._maybe_cast_for_get_loc(key) - except ValueError as err: - # FIXME(dateutil#1180): we get here because parse_with_reso - # doesn't raise on "t2m" - raise KeyError(key) from err + + key = self._maybe_cast_for_get_loc(key) elif isinstance(key, timedelta): # GH#20464 @@ -682,7 +674,16 @@ def get_loc(self, key, method=None, tolerance=None): def _maybe_cast_for_get_loc(self, key) -> Timestamp: # needed to localize naive datetimes or dates (GH 35690) - key = Timestamp(key) + try: + key = Timestamp(key) + except ValueError as err: + # FIXME(dateutil#1180): we get here because parse_with_reso + # doesn't raise on "t2m" + if not isinstance(key, str): + # Not expected to be reached, but check to be sure + raise # pragma: no cover + raise KeyError(key) from err + if key.tzinfo is None: key = key.tz_localize(self.tz) else: @@ -691,6 +692,13 @@ def _maybe_cast_for_get_loc(self, key) -> Timestamp: @doc(DatetimeTimedeltaMixin._maybe_cast_slice_bound) def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): + + # GH#42855 handle date here instead of get_slice_bound + if isinstance(label, date) and not isinstance(label, datetime): + # Pandas supports slicing with dates, treated as datetimes at midnight. + # https://github.com/pandas-dev/pandas/issues/31501 + label = Timestamp(label).to_pydatetime() + label = super()._maybe_cast_slice_bound(label, side, kind=kind) self._deprecate_mismatched_indexing(label) return self._maybe_cast_for_get_loc(label) @@ -722,13 +730,6 @@ def slice_indexer(self, start=None, end=None, step=None, kind=lib.no_default): if isinstance(start, time) or isinstance(end, time): raise KeyError("Cannot mix time and non-time slice keys") - # Pandas supports slicing with dates, treated as datetimes at midnight. - # https://github.com/pandas-dev/pandas/issues/31501 - if isinstance(start, date) and not isinstance(start, datetime): - start = datetime.combine(start, time(0, 0)) - if isinstance(end, date) and not isinstance(end, datetime): - end = datetime.combine(end, time(0, 0)) - def check_str_or_none(point): return point is not None and not isinstance(point, str) @@ -768,15 +769,6 @@ def check_str_or_none(point): else: return indexer - @doc(Index.get_slice_bound) - def get_slice_bound( - self, label, side: Literal["left", "right"], kind=lib.no_default - ) -> int: - # GH#42855 handle date here instead of _maybe_cast_slice_bound - if isinstance(label, date) and not isinstance(label, datetime): - label = Timestamp(label).to_pydatetime() - return super().get_slice_bound(label, side=side, kind=kind) - # -------------------------------------------------------------------- @property diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index e015a3e5a941a..752ce28c58f55 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -720,7 +720,7 @@ def _values(self) -> np.ndarray: if isinstance(vals, ABCDatetimeIndex): # TODO: this can be removed after Timestamp.freq is removed # The astype(object) below does not remove the freq from - # the underlying Timestamps so we remove it here to to match + # the underlying Timestamps so we remove it here to match # the behavior of self._get_level_values vals = vals.copy() vals.freq = None diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 174e0a7f81850..c1cb5ad315298 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -222,6 +222,8 @@ def _convert_slice_indexer(self, key: slice, kind: str, is_frame: bool = False): if is_float_dtype(self.dtype): assert kind in ["loc", "getitem"] + # TODO: can we write this as a condition based on + # e.g. _should_fallback_to_positional? # We always treat __getitem__ slicing as label-based # translate to locations return self.slice_indexer(key.start, key.stop, key.step) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 592e6e9fb703d..e3ab5e8624585 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -25,7 +25,10 @@ DtypeObj, npt, ) -from pandas.util._decorators import doc +from pandas.util._decorators import ( + cache_readonly, + doc, +) from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( @@ -159,6 +162,12 @@ class PeriodIndex(DatetimeIndexOpsMixin): _engine_type = libindex.PeriodEngine _supports_partial_string_indexing = True + @cache_readonly + # Signature of "_resolution_obj" incompatible with supertype "DatetimeIndexOpsMixin" + def _resolution_obj(self) -> Resolution: # type: ignore[override] + # for compat with DatetimeIndex + return self.dtype._resolution_obj + # -------------------------------------------------------------------- # methods that dispatch to array and wrap result in Index # These are defined here instead of via inherit_names for mypy @@ -446,10 +455,10 @@ def get_loc(self, key, method=None, tolerance=None): # TODO: pass if method is not None, like DTI does? raise KeyError(key) from err - if reso == self.dtype._resolution_obj: - # the reso < self.dtype._resolution_obj case goes + if reso == self._resolution_obj: + # the reso < self._resolution_obj case goes # through _get_string_slice - key = Period(parsed, freq=self.freq) + key = self._cast_partial_indexing_scalar(key) loc = self.get_loc(key, method=method, tolerance=tolerance) # Recursing instead of falling through matters for the exception # message in test_get_loc3 (though not clear if that really matters) @@ -457,28 +466,14 @@ def get_loc(self, key, method=None, tolerance=None): elif method is None: raise KeyError(key) else: - key = Period(parsed, freq=self.freq) + key = self._cast_partial_indexing_scalar(parsed) elif isinstance(key, Period): - sfreq = self.freq - kfreq = key.freq - if not ( - sfreq.n == kfreq.n - # error: "BaseOffset" has no attribute "_period_dtype_code" - and sfreq._period_dtype_code # type: ignore[attr-defined] - # error: "BaseOffset" has no attribute "_period_dtype_code" - == kfreq._period_dtype_code # type: ignore[attr-defined] - ): - # GH#42247 For the subset of DateOffsets that can be Period freqs, - # checking these two attributes is sufficient to check equality, - # and much more performant than `self.freq == key.freq` - raise KeyError(key) + key = self._maybe_cast_for_get_loc(key) + elif isinstance(key, datetime): - try: - key = Period(key, freq=self.freq) - except ValueError as err: - # we cannot construct the Period - raise KeyError(orig_key) from err + key = self._cast_partial_indexing_scalar(key) + else: # in particular integer, which Period constructor would cast to string raise KeyError(key) @@ -488,10 +483,35 @@ def get_loc(self, key, method=None, tolerance=None): except KeyError as err: raise KeyError(orig_key) from err + def _maybe_cast_for_get_loc(self, key: Period) -> Period: + # name is a misnomer, chosen for compat with DatetimeIndex + sfreq = self.freq + kfreq = key.freq + if not ( + sfreq.n == kfreq.n + # error: "BaseOffset" has no attribute "_period_dtype_code" + and sfreq._period_dtype_code # type: ignore[attr-defined] + # error: "BaseOffset" has no attribute "_period_dtype_code" + == kfreq._period_dtype_code # type: ignore[attr-defined] + ): + # GH#42247 For the subset of DateOffsets that can be Period freqs, + # checking these two attributes is sufficient to check equality, + # and much more performant than `self.freq == key.freq` + raise KeyError(key) + return key + + def _cast_partial_indexing_scalar(self, label): + try: + key = Period(label, freq=self.freq) + except ValueError as err: + # we cannot construct the Period + raise KeyError(label) from err + return key + @doc(DatetimeIndexOpsMixin._maybe_cast_slice_bound) def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): if isinstance(label, datetime): - label = Period(label, freq=self.freq) + label = self._cast_partial_indexing_scalar(label) return super()._maybe_cast_slice_bound(label, side, kind=kind) @@ -499,11 +519,6 @@ def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime): iv = Period(parsed, freq=reso.attr_abbrev) return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end")) - def _can_partial_date_slice(self, reso: Resolution) -> bool: - assert isinstance(reso, Resolution), (type(reso), reso) - # e.g. test_getitem_setitem_periodindex - return reso > self.dtype._resolution_obj - def period_range( start=None, end=None, periods: int | None = None, freq=None, name=None diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ddae58fd46bb0..02c095202d079 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1322,10 +1322,6 @@ def _convert_to_indexer(self, key, axis: int): if isinstance(key, slice): return labels._convert_slice_indexer(key, kind="loc") - # see if we are positional in nature - is_int_index = labels.is_integer() - is_int_positional = is_integer(key) and not is_int_index - if ( isinstance(key, tuple) and not isinstance(labels, MultiIndex) @@ -1350,17 +1346,9 @@ def _convert_to_indexer(self, key, axis: int): if not isinstance(labels, MultiIndex): raise except ValueError: - if not is_int_positional: + if not is_integer(key): raise - - # a positional - if is_int_positional: - - # if we are setting and its not a valid location - # its an insert which fails by definition - - # always valid - return {"key": key} + return {"key": key} if is_nested_tuple(key, labels): if self.ndim == 1 and any(isinstance(k, tuple) for k in key): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index ded525cd099fc..2e638f5b0fb3d 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1892,7 +1892,7 @@ def create_block_manager_from_blocks( # If verify_integrity=False, then caller is responsible for checking # all(x.shape[-1] == len(axes[1]) for x in blocks) # sum(x.shape[0] for x in blocks) == len(axes[0]) - # set(x for for blk in blocks for x in blk.mgr_locs) == set(range(len(axes[0]))) + # set(x for blk in blocks for x in blk.mgr_locs) == set(range(len(axes[0]))) # all(blk.ndim == 2 for blk in blocks) # This allows us to safely pass verify_integrity=False diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 32cb4938344c4..922d194f04c55 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -134,8 +134,9 @@ class ExponentialMovingWindow(BaseWindow): r""" Provide exponentially weighted (EW) calculations. - Exactly one parameter: ``com``, ``span``, ``halflife``, or ``alpha`` must be - provided. + Exactly one of ``com``, ``span``, ``halflife``, or ``alpha`` must be + provided if ``times`` is not provided. If ``times`` is provided, + ``halflife`` and one of ``com``, ``span`` or ``alpha`` may be provided. Parameters ---------- @@ -155,7 +156,7 @@ class ExponentialMovingWindow(BaseWindow): :math:`\alpha = 1 - \exp\left(-\ln(2) / halflife\right)`, for :math:`halflife > 0`. - If ``times`` is specified, the time unit (str or timedelta) over which an + If ``times`` is specified, a timedelta convertible unit over which an observation decays to half its value. Only applicable to ``mean()``, and halflife value will not apply to the other functions. @@ -389,10 +390,8 @@ def __init__( raise ValueError("times must be datetime64[ns] dtype.") if len(self.times) != len(obj): raise ValueError("times must be the same length as the object.") - if not isinstance(self.halflife, (str, datetime.timedelta)): - raise ValueError( - "halflife must be a string or datetime.timedelta object" - ) + if not isinstance(self.halflife, (str, datetime.timedelta, np.timedelta64)): + raise ValueError("halflife must be a timedelta convertible object") if isna(self.times).any(): raise ValueError("Cannot convert NaT values to integer") self._deltas = _calculate_deltas(self.times, self.halflife) @@ -404,7 +403,7 @@ def __init__( self._com = 1.0 else: if self.halflife is not None and isinstance( - self.halflife, (str, datetime.timedelta) + self.halflife, (str, datetime.timedelta, np.timedelta64) ): raise ValueError( "halflife can only be a timedelta convertible argument if " diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 8eb5cc2dd82f6..897528cf18122 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -12,7 +12,15 @@ class TestNonNano: - @pytest.mark.parametrize("unit,reso", [("s", 7), ("ms", 8), ("us", 9)]) + @pytest.fixture(params=["s", "ms", "us"]) + def unit(self, request): + return request.param + + @pytest.fixture + def reso(self, unit): + # TODO: avoid hard-coding + return {"s": 7, "ms": 8, "us": 9}[unit] + @pytest.mark.xfail(reason="_box_func is not yet patched to get reso right") def test_non_nano(self, unit, reso): arr = np.arange(5, dtype=np.int64).view(f"M8[{unit}]") @@ -21,6 +29,22 @@ def test_non_nano(self, unit, reso): assert dta.dtype == arr.dtype assert dta[0]._reso == reso + @pytest.mark.filterwarnings( + "ignore:weekofyear and week have been deprecated:FutureWarning" + ) + @pytest.mark.parametrize( + "field", DatetimeArray._field_ops + DatetimeArray._bool_ops + ) + def test_fields(self, unit, reso, field): + dti = pd.date_range("2016-01-01", periods=55, freq="D") + arr = np.asarray(dti).astype(f"M8[{unit}]") + + dta = DatetimeArray._simple_new(arr, dtype=arr.dtype) + + res = getattr(dta, field) + expected = getattr(dti._data, field) + tm.assert_numpy_array_equal(res, expected) + class TestDatetimeArrayComparisons: # TODO: merge this into tests/arithmetic/test_datetime64 once it is diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py index 336865d32167d..711f1835446a5 100644 --- a/pandas/tests/extension/base/groupby.py +++ b/pandas/tests/extension/base/groupby.py @@ -1,5 +1,7 @@ import pytest +from pandas.core.dtypes.common import is_numeric_dtype + import pandas as pd import pandas._testing as tm from pandas.tests.extension.base.base import BaseExtensionTests @@ -96,7 +98,15 @@ def test_in_numeric_groupby(self, data_for_grouping): "C": [1, 1, 1, 1, 1, 1, 1, 1], } ) - result = df.groupby("A").sum().columns + + dtype = data_for_grouping.dtype + if is_numeric_dtype(dtype) or dtype.name == "decimal": + warn = None + else: + warn = FutureWarning + msg = "The default value of numeric_only" + with tm.assert_produces_warning(warn, match=msg): + result = df.groupby("A").sum().columns if data_for_grouping.dtype._is_numeric: expected = pd.Index(["B", "C"]) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index ba89a76a7f8c2..fedcc0e2a2284 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1785,7 +1785,9 @@ def test_stack_multiple_bug(self): multi = df.set_index(["DATE", "ID"]) multi.columns.name = "Params" unst = multi.unstack("ID") - down = unst.resample("W-THU").mean() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + down = unst.resample("W-THU").mean() rs = down.stack("ID") xp = unst.loc[:, ["VAR1"]].resample("W-THU").mean().stack("ID") diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py index 2b248afb42057..b4a3a60e72139 100644 --- a/pandas/tests/generic/test_frame.py +++ b/pandas/tests/generic/test_frame.py @@ -71,7 +71,9 @@ def test_metadata_propagation_indiv_groupby(self): "D": np.random.randn(8), } ) - result = df.groupby("A").sum() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby("A").sum() tm.assert_metadata_equivalent(df, result) def test_metadata_propagation_indiv_resample(self): diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index bdb33bff5eadd..37b02571158b9 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -238,7 +238,10 @@ def test_multiindex_groupby_mixed_cols_axis1(func, expected, dtype, result_dtype [[1, 2, 3, 4, 5, 6]] * 3, columns=MultiIndex.from_product([["a", "b"], ["i", "j", "k"]]), ).astype({("a", "j"): dtype, ("b", "j"): dtype}) - result = df.groupby(level=1, axis=1).agg(func) + warn = FutureWarning if func == "std" else None + msg = "The default value of numeric_only" + with tm.assert_produces_warning(warn, match=msg): + result = df.groupby(level=1, axis=1).agg(func) expected = DataFrame([expected] * 3, columns=["i", "j", "k"]).astype( result_dtype_dict ) @@ -262,7 +265,10 @@ def test_groupby_mixed_cols_axis1(func, expected_data, result_dtype_dict): columns=Index([10, 20, 10, 20], name="x"), dtype="int64", ).astype({10: "Int64"}) - result = df.groupby("x", axis=1).agg(func) + warn = FutureWarning if func == "std" else None + msg = "The default value of numeric_only" + with tm.assert_produces_warning(warn, match=msg): + result = df.groupby("x", axis=1).agg(func) expected = DataFrame( data=expected_data, index=Index([0, 1, 0], name="y"), diff --git a/pandas/tests/groupby/test_allowlist.py b/pandas/tests/groupby/test_allowlist.py index 7c64d82608c9e..e541abb368a02 100644 --- a/pandas/tests/groupby/test_allowlist.py +++ b/pandas/tests/groupby/test_allowlist.py @@ -187,7 +187,9 @@ def test_regression_allowlist_methods(raw_frame, op, level, axis, skipna, sort): if op in AGG_FUNCTIONS_WITH_SKIPNA: grouped = frame.groupby(level=level, axis=axis, sort=sort) - with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + with tm.assert_produces_warning( + warn, match="The 'mad' method is deprecated", raise_on_extra_warnings=False + ): result = getattr(grouped, op)(skipna=skipna) with tm.assert_produces_warning(FutureWarning): expected = getattr(frame, op)(level=level, axis=axis, skipna=skipna) @@ -196,8 +198,8 @@ def test_regression_allowlist_methods(raw_frame, op, level, axis, skipna, sort): tm.assert_frame_equal(result, expected) else: grouped = frame.groupby(level=level, axis=axis, sort=sort) - result = getattr(grouped, op)() with tm.assert_produces_warning(FutureWarning): + result = getattr(grouped, op)() expected = getattr(frame, op)(level=level, axis=axis) if sort: expected = expected.sort_index(axis=axis, level=level) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index abe1b8f13e32e..004e55f4d161f 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -103,7 +103,9 @@ def test_basic(): # TODO: split this test gb = df.groupby("A", observed=False) exp_idx = CategoricalIndex(["a", "b", "z"], name="A", ordered=True) expected = DataFrame({"values": Series([3, 7, 0], index=exp_idx)}) - result = gb.sum() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = gb.sum() tm.assert_frame_equal(result, expected) # GH 8623 @@ -314,6 +316,7 @@ def test_apply(ordered): tm.assert_series_equal(result, expected) +@pytest.mark.filterwarnings("ignore:.*value of numeric_only.*:FutureWarning") def test_observed(observed): # multiple groupers, don't re-expand the output space # of the grouper @@ -807,8 +810,12 @@ def test_preserve_categorical_dtype(): } ) for col in ["C1", "C2"]: - result1 = df.groupby(by=col, as_index=False, observed=False).mean() - result2 = df.groupby(by=col, as_index=True, observed=False).mean().reset_index() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result1 = df.groupby(by=col, as_index=False, observed=False).mean() + result2 = ( + df.groupby(by=col, as_index=True, observed=False).mean().reset_index() + ) expected = exp_full.reindex(columns=result1.columns) tm.assert_frame_equal(result1, expected) tm.assert_frame_equal(result2, expected) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index c99405dfccb66..206d37e1a800e 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -4,6 +4,7 @@ import numpy as np import pytest +from pandas._libs import lib from pandas.errors import UnsupportedFunctionCall import pandas as pd @@ -259,7 +260,9 @@ def _check(self, df, method, expected_columns, expected_columns_numeric): # these have numeric_only kwarg, but default to False warn = FutureWarning - with tm.assert_produces_warning(warn, match="Dropping invalid columns"): + with tm.assert_produces_warning( + warn, match="Dropping invalid columns", raise_on_extra_warnings=False + ): result = getattr(gb, method)() tm.assert_index_equal(result.columns, expected_columns_numeric) @@ -297,24 +300,26 @@ def gni(self, df): return gni # TODO: non-unique columns, as_index=False - @pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") def test_idxmax(self, gb): # object dtype so idxmax goes through _aggregate_item_by_item # GH#5610 # non-cython calls should not include the grouper expected = DataFrame([[0.0], [np.nan]], columns=["B"], index=[1, 3]) expected.index.name = "A" - result = gb.idxmax() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = gb.idxmax() tm.assert_frame_equal(result, expected) - @pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") def test_idxmin(self, gb): # object dtype so idxmax goes through _aggregate_item_by_item # GH#5610 # non-cython calls should not include the grouper expected = DataFrame([[0.0], [np.nan]], columns=["B"], index=[1, 3]) expected.index.name = "A" - result = gb.idxmin() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = gb.idxmin() tm.assert_frame_equal(result, expected) def test_mad(self, gb, gni): @@ -1238,3 +1243,114 @@ def test_groupby_sum_timedelta_with_nat(): res = gb["b"].sum(min_count=2) expected = Series([td3, pd.NaT], dtype="m8[ns]", name="b", index=expected.index) tm.assert_series_equal(res, expected) + + +@pytest.mark.parametrize( + "kernel, numeric_only_default, drops_nuisance, has_arg", + [ + ("all", False, False, False), + ("any", False, False, False), + ("bfill", False, False, False), + ("corr", True, False, True), + ("corrwith", True, False, True), + ("cov", True, False, True), + ("cummax", False, True, True), + ("cummin", False, True, True), + ("cumprod", True, True, True), + ("cumsum", True, True, True), + ("diff", False, False, False), + ("ffill", False, False, False), + ("fillna", False, False, False), + ("first", False, False, True), + ("idxmax", True, False, True), + ("idxmin", True, False, True), + ("last", False, False, True), + ("max", False, True, True), + ("mean", True, True, True), + ("median", True, True, True), + ("min", False, True, True), + ("nth", False, False, False), + ("nunique", False, False, False), + ("pct_change", False, False, False), + ("prod", True, True, True), + ("quantile", True, False, True), + ("sem", True, True, True), + ("skew", True, False, True), + ("std", True, True, True), + ("sum", True, True, True), + ("var", True, False, True), + ], +) +@pytest.mark.parametrize("numeric_only", [True, False, lib.no_default]) +@pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]]) +def test_deprecate_numeric_only( + kernel, numeric_only_default, drops_nuisance, has_arg, numeric_only, keys +): + # GH#46072 + # drops_nuisance: Whether the op drops nuisance columns even when numeric_only=False + # has_arg: Whether the op has a numeric_only arg + df = DataFrame({"a1": [1, 1], "a2": [2, 2], "a3": [5, 6], "b": 2 * [object]}) + + if kernel == "corrwith": + args = (df,) + elif kernel == "nth" or kernel == "fillna": + args = (0,) + else: + args = () + kwargs = {} if numeric_only is lib.no_default else {"numeric_only": numeric_only} + + gb = df.groupby(keys) + method = getattr(gb, kernel) + if has_arg and ( + # Cases where b does not appear in the result + numeric_only is True + or (numeric_only is lib.no_default and numeric_only_default) + or drops_nuisance + ): + if numeric_only is True or (not numeric_only_default and not drops_nuisance): + warn = None + else: + warn = FutureWarning + if numeric_only is lib.no_default and numeric_only_default: + msg = f"The default value of numeric_only in DataFrameGroupBy.{kernel}" + else: + msg = f"Dropping invalid columns in DataFrameGroupBy.{kernel}" + with tm.assert_produces_warning(warn, match=msg): + result = method(*args, **kwargs) + + assert "b" not in result.columns + elif ( + # kernels that work on any dtype and have numeric_only arg + kernel in ("first", "last", "corrwith") + or ( + # kernels that work on any dtype and don't have numeric_only arg + kernel in ("any", "all", "bfill", "ffill", "fillna", "nth", "nunique") + and numeric_only is lib.no_default + ) + ): + result = method(*args, **kwargs) + assert "b" in result.columns + elif has_arg: + assert numeric_only is not True + assert numeric_only is not lib.no_default or numeric_only_default is False + assert not drops_nuisance + # kernels that are successful on any dtype were above; this will fail + msg = ( + "(not allowed for this dtype" + "|must be a string or a number" + "|cannot be performed against 'object' dtypes" + "|must be a string or a real number)" + ) + with pytest.raises(TypeError, match=msg): + method(*args, **kwargs) + elif not has_arg and numeric_only is not lib.no_default: + with pytest.raises( + TypeError, match="got an unexpected keyword argument 'numeric_only'" + ): + method(*args, **kwargs) + else: + assert kernel in ("diff", "pct_change") + assert numeric_only is lib.no_default + # Doesn't have numeric_only argument and fails on nuisance columns + with pytest.raises(TypeError, match=r"unsupported operand type"): + method(*args, **kwargs) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 016e817e43402..61951292d55a8 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -474,13 +474,17 @@ def test_frame_groupby_columns(tsframe): def test_frame_set_name_single(df): grouped = df.groupby("A") - result = grouped.mean() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = grouped.mean() assert result.index.name == "A" - result = df.groupby("A", as_index=False).mean() + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby("A", as_index=False).mean() assert result.index.name != "A" - result = grouped.agg(np.mean) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = grouped.agg(np.mean) assert result.index.name == "A" result = grouped.agg({"C": np.mean, "D": np.std}) @@ -503,8 +507,10 @@ def test_multi_func(df): col2 = df["B"] grouped = df.groupby([col1.get, col2.get]) - agged = grouped.mean() - expected = df.groupby(["A", "B"]).mean() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + agged = grouped.mean() + expected = df.groupby(["A", "B"]).mean() # TODO groupby get drops names tm.assert_frame_equal( @@ -661,13 +667,16 @@ def test_groupby_as_index_agg(df): # single-key - result = grouped.agg(np.mean) - expected = grouped.mean() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = grouped.agg(np.mean) + expected = grouped.mean() tm.assert_frame_equal(result, expected) result2 = grouped.agg({"C": np.mean, "D": np.sum}) - expected2 = grouped.mean() - expected2["D"] = grouped.sum()["D"] + with tm.assert_produces_warning(FutureWarning, match=msg): + expected2 = grouped.mean() + expected2["D"] = grouped.sum()["D"] tm.assert_frame_equal(result2, expected2) grouped = df.groupby("A", as_index=True) @@ -754,8 +763,10 @@ def test_as_index_series_return_frame(df): grouped = df.groupby("A", as_index=False) grouped2 = df.groupby(["A", "B"], as_index=False) - result = grouped["C"].agg(np.sum) - expected = grouped.agg(np.sum).loc[:, ["A", "C"]] + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = grouped["C"].agg(np.sum) + expected = grouped.agg(np.sum).loc[:, ["A", "C"]] assert isinstance(result, DataFrame) tm.assert_frame_equal(result, expected) @@ -765,7 +776,8 @@ def test_as_index_series_return_frame(df): tm.assert_frame_equal(result2, expected2) result = grouped["C"].sum() - expected = grouped.sum().loc[:, ["A", "C"]] + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = grouped.sum().loc[:, ["A", "C"]] assert isinstance(result, DataFrame) tm.assert_frame_equal(result, expected) @@ -789,8 +801,10 @@ def test_groupby_as_index_cython(df): # single-key grouped = data.groupby("A", as_index=False) - result = grouped.mean() - expected = data.groupby(["A"]).mean() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = grouped.mean() + expected = data.groupby(["A"]).mean() expected.insert(0, "A", expected.index) expected.index = np.arange(len(expected)) tm.assert_frame_equal(result, expected) @@ -859,15 +873,18 @@ def test_groupby_multi_corner(df): def test_omit_nuisance(df): grouped = df.groupby("A") - agged = grouped.agg(np.mean) - exp = grouped.mean() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + agged = grouped.agg(np.mean) + exp = grouped.mean() tm.assert_frame_equal(agged, exp) df = df.loc[:, ["A", "C", "D"]] df["E"] = datetime.now() grouped = df.groupby("A") - result = grouped.agg(np.sum) - expected = grouped.sum() + with tm.assert_produces_warning(FutureWarning, match=msg): + result = grouped.agg(np.sum) + expected = grouped.sum() tm.assert_frame_equal(result, expected) # won't work with axis = 1 @@ -898,7 +915,7 @@ def test_keep_nuisance_agg(df, agg_function): @pytest.mark.parametrize("numeric_only", [lib.no_default, True, False]) def test_omit_nuisance_agg(df, agg_function, numeric_only): # GH 38774, GH 38815 - if not numeric_only and agg_function != "sum": + if numeric_only is lib.no_default or (not numeric_only and agg_function != "sum"): # sum doesn't drop strings warn = FutureWarning else: @@ -913,7 +930,13 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only): with pytest.raises(klass, match="could not convert string to float"): getattr(grouped, agg_function)(numeric_only=numeric_only) else: - with tm.assert_produces_warning(warn, match="Dropping invalid columns"): + if numeric_only is lib.no_default: + msg = ( + f"The default value of numeric_only in DataFrameGroupBy.{agg_function}" + ) + else: + msg = "Dropping invalid columns" + with tm.assert_produces_warning(warn, match=msg): result = getattr(grouped, agg_function)(numeric_only=numeric_only) if ( (numeric_only is lib.no_default or not numeric_only) @@ -923,9 +946,18 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only): columns = ["A", "B", "C", "D"] else: columns = ["A", "C", "D"] - expected = getattr(df.loc[:, columns].groupby("A"), agg_function)( - numeric_only=numeric_only - ) + if agg_function == "sum" and numeric_only is False: + # sum doesn't drop nuisance string columns + warn = None + elif agg_function in ("sum", "std", "var", "sem") and numeric_only is not True: + warn = FutureWarning + else: + warn = None + msg = "The default value of numeric_only" + with tm.assert_produces_warning(warn, match=msg): + expected = getattr(df.loc[:, columns].groupby("A"), agg_function)( + numeric_only=numeric_only + ) tm.assert_frame_equal(result, expected) @@ -941,8 +973,10 @@ def test_omit_nuisance_warnings(df): def test_omit_nuisance_python_multiple(three_group): grouped = three_group.groupby(["A", "B"]) - agged = grouped.agg(np.mean) - exp = grouped.mean() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + agged = grouped.agg(np.mean) + exp = grouped.mean() tm.assert_frame_equal(agged, exp) @@ -959,8 +993,10 @@ def test_empty_groups_corner(mframe): ) grouped = df.groupby(["k1", "k2"]) - result = grouped.agg(np.mean) - expected = grouped.mean() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = grouped.agg(np.mean) + expected = grouped.mean() tm.assert_frame_equal(result, expected) grouped = mframe[3:5].groupby(level=0) @@ -982,7 +1018,9 @@ def test_wrap_aggregated_output_multindex(mframe): df["baz", "two"] = "peekaboo" keys = [np.array([0, 0, 1]), np.array([0, 0, 1])] - agged = df.groupby(keys).agg(np.mean) + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + agged = df.groupby(keys).agg(np.mean) assert isinstance(agged.columns, MultiIndex) def aggfun(ser): @@ -1143,15 +1181,19 @@ def test_groupby_with_hier_columns(): # add a nuisance column sorted_columns, _ = columns.sortlevel(0) df["A", "foo"] = "bar" - result = df.groupby(level=0).mean() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby(level=0).mean() tm.assert_index_equal(result.columns, df.columns[:-1]) def test_grouping_ndarray(df): grouped = df.groupby(df["A"].values) - result = grouped.sum() - expected = df.groupby("A").sum() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = grouped.sum() + expected = df.groupby("A").sum() tm.assert_frame_equal( result, expected, check_names=False ) # Note: no names when grouping by value @@ -1179,8 +1221,10 @@ def test_groupby_wrong_multi_labels(): def test_groupby_series_with_name(df): - result = df.groupby(df["A"]).mean() - result2 = df.groupby(df["A"], as_index=False).mean() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby(df["A"]).mean() + result2 = df.groupby(df["A"], as_index=False).mean() assert result.index.name == "A" assert "A" in result2 @@ -1331,8 +1375,10 @@ def test_groupby_unit64_float_conversion(): def test_groupby_list_infer_array_like(df): - result = df.groupby(list(df["A"])).mean() - expected = df.groupby(df["A"]).mean() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby(list(df["A"])).mean() + expected = df.groupby(df["A"]).mean() tm.assert_frame_equal(result, expected, check_names=False) with pytest.raises(KeyError, match=r"^'foo'$"): @@ -1445,7 +1491,9 @@ def test_groupby_2d_malformed(): d["zeros"] = [0, 0] d["ones"] = [1, 1] d["label"] = ["l1", "l2"] - tmp = d.groupby(["group"]).mean() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + tmp = d.groupby(["group"]).mean() res_values = np.array([[0.0, 1.0], [0.0, 1.0]]) tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"])) tm.assert_numpy_array_equal(tmp.values, res_values) @@ -1611,10 +1659,13 @@ def f(group): def test_no_dummy_key_names(df): # see gh-1291 - result = df.groupby(df["A"].values).sum() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby(df["A"].values).sum() assert result.index.name is None - result = df.groupby([df["A"].values, df["B"].values]).sum() + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby([df["A"].values, df["B"].values]).sum() assert result.index.names == (None, None) @@ -2634,7 +2685,9 @@ def test_groupby_aggregation_numeric_with_non_numeric_dtype(): ) gb = df.groupby(by=["x"]) - result = gb.sum() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = gb.sum() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py index 54cde30ceac92..b665843728165 100644 --- a/pandas/tests/groupby/test_groupby_subclass.py +++ b/pandas/tests/groupby/test_groupby_subclass.py @@ -112,5 +112,7 @@ def test_groupby_resample_preserves_subclass(obj): df = df.set_index("Date") # Confirm groupby.resample() preserves dataframe type - result = df.groupby("Buyer").resample("5D").sum() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby("Buyer").resample("5D").sum() assert isinstance(result, obj) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index c6e4bec3f7b2c..85602fdf7274a 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -59,8 +59,10 @@ def test_column_select_via_attr(self, df): tm.assert_series_equal(result, expected) df["mean"] = 1.5 - result = df.groupby("A").mean() - expected = df.groupby("A").agg(np.mean) + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby("A").mean() + expected = df.groupby("A").agg(np.mean) tm.assert_frame_equal(result, expected) def test_getitem_list_of_columns(self): @@ -284,25 +286,30 @@ def test_grouper_column_and_index(self): {"A": np.arange(6), "B": ["one", "one", "two", "two", "one", "one"]}, index=idx, ) - result = df_multi.groupby(["B", pd.Grouper(level="inner")]).mean() - expected = df_multi.reset_index().groupby(["B", "inner"]).mean() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df_multi.groupby(["B", pd.Grouper(level="inner")]).mean() + expected = df_multi.reset_index().groupby(["B", "inner"]).mean() tm.assert_frame_equal(result, expected) # Test the reverse grouping order - result = df_multi.groupby([pd.Grouper(level="inner"), "B"]).mean() - expected = df_multi.reset_index().groupby(["inner", "B"]).mean() + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df_multi.groupby([pd.Grouper(level="inner"), "B"]).mean() + expected = df_multi.reset_index().groupby(["inner", "B"]).mean() tm.assert_frame_equal(result, expected) # Grouping a single-index frame by a column and the index should # be equivalent to resetting the index and grouping by two columns df_single = df_multi.reset_index("outer") - result = df_single.groupby(["B", pd.Grouper(level="inner")]).mean() - expected = df_single.reset_index().groupby(["B", "inner"]).mean() + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df_single.groupby(["B", pd.Grouper(level="inner")]).mean() + expected = df_single.reset_index().groupby(["B", "inner"]).mean() tm.assert_frame_equal(result, expected) # Test the reverse grouping order - result = df_single.groupby([pd.Grouper(level="inner"), "B"]).mean() - expected = df_single.reset_index().groupby(["inner", "B"]).mean() + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df_single.groupby([pd.Grouper(level="inner"), "B"]).mean() + expected = df_single.reset_index().groupby(["inner", "B"]).mean() tm.assert_frame_equal(result, expected) def test_groupby_levels_and_columns(self): @@ -376,8 +383,10 @@ def test_empty_groups(self, df): def test_groupby_grouper(self, df): grouped = df.groupby("A") - result = df.groupby(grouped.grouper).mean() - expected = grouped.mean() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby(grouped.grouper).mean() + expected = grouped.mean() tm.assert_frame_equal(result, expected) def test_groupby_dict_mapping(self): diff --git a/pandas/tests/groupby/test_index_as_string.py b/pandas/tests/groupby/test_index_as_string.py index 971a447b84cae..501a21981a148 100644 --- a/pandas/tests/groupby/test_index_as_string.py +++ b/pandas/tests/groupby/test_index_as_string.py @@ -47,8 +47,11 @@ def series(): ], ) def test_grouper_index_level_as_string(frame, key_strs, groupers): - result = frame.groupby(key_strs).mean() - expected = frame.groupby(groupers).mean() + warn = FutureWarning if "B" not in key_strs or "outer" in frame.columns else None + msg = "The default value of numeric_only" + with tm.assert_produces_warning(warn, match=msg): + result = frame.groupby(key_strs).mean() + expected = frame.groupby(groupers).mean() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_pipe.py b/pandas/tests/groupby/test_pipe.py index 1229251f88c7d..4f58bcb5ee763 100644 --- a/pandas/tests/groupby/test_pipe.py +++ b/pandas/tests/groupby/test_pipe.py @@ -60,7 +60,9 @@ def f(dfgb, arg1): ) def g(dfgb, arg2): - return dfgb.sum() / dfgb.sum().sum() + arg2 + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + return dfgb.sum() / dfgb.sum().sum() + arg2 def h(df, arg3): return df.x + df.y - arg3 diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 0f7e71c99584d..20328426a69b2 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -246,9 +246,10 @@ def test_groupby_quantile_nullable_array(values, q): def test_groupby_quantile_skips_invalid_dtype(q, numeric_only): df = DataFrame({"a": [1], "b": [2.0], "c": ["x"]}) - if numeric_only is None or numeric_only: + if numeric_only is lib.no_default or numeric_only: warn = FutureWarning if numeric_only is lib.no_default else None - with tm.assert_produces_warning(warn, match="Dropping invalid columns"): + msg = "The default value of numeric_only in DataFrameGroupBy.quantile" + with tm.assert_produces_warning(warn, match=msg): result = df.groupby("a").quantile(q, numeric_only=numeric_only) expected = df.groupby("a")[["b"]].quantile(q) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 7c9d6e7a73087..ae725cbb2b588 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -105,14 +105,18 @@ def test_groupby_with_timegrouper(self): ) expected.iloc[[0, 6, 18], 0] = np.array([24, 6, 9], dtype="int64") - result1 = df.resample("5D").sum() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result1 = df.resample("5D").sum() tm.assert_frame_equal(result1, expected) df_sorted = df.sort_index() - result2 = df_sorted.groupby(Grouper(freq="5D")).sum() + with tm.assert_produces_warning(FutureWarning, match=msg): + result2 = df_sorted.groupby(Grouper(freq="5D")).sum() tm.assert_frame_equal(result2, expected) - result3 = df.groupby(Grouper(freq="5D")).sum() + with tm.assert_produces_warning(FutureWarning, match=msg): + result3 = df.groupby(Grouper(freq="5D")).sum() tm.assert_frame_equal(result3, expected) @pytest.mark.parametrize("should_sort", [True, False]) @@ -186,7 +190,9 @@ def test_timegrouper_with_reg_groups(self): } ).set_index(["Date", "Buyer"]) - result = df.groupby([Grouper(freq="A"), "Buyer"]).sum() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby([Grouper(freq="A"), "Buyer"]).sum() tm.assert_frame_equal(result, expected) expected = DataFrame( @@ -201,7 +207,8 @@ def test_timegrouper_with_reg_groups(self): ], } ).set_index(["Date", "Buyer"]) - result = df.groupby([Grouper(freq="6MS"), "Buyer"]).sum() + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby([Grouper(freq="6MS"), "Buyer"]).sum() tm.assert_frame_equal(result, expected) df_original = DataFrame( @@ -239,10 +246,13 @@ def test_timegrouper_with_reg_groups(self): } ).set_index(["Date", "Buyer"]) - result = df.groupby([Grouper(freq="1D"), "Buyer"]).sum() + warn_msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + result = df.groupby([Grouper(freq="1D"), "Buyer"]).sum() tm.assert_frame_equal(result, expected) - result = df.groupby([Grouper(freq="1M"), "Buyer"]).sum() + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + result = df.groupby([Grouper(freq="1M"), "Buyer"]).sum() expected = DataFrame( { "Buyer": "Carl Joe Mark".split(), @@ -258,7 +268,8 @@ def test_timegrouper_with_reg_groups(self): # passing the name df = df.reset_index() - result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum() + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum() tm.assert_frame_equal(result, expected) with pytest.raises(KeyError, match="'The grouper name foo is not found'"): @@ -266,9 +277,11 @@ def test_timegrouper_with_reg_groups(self): # passing the level df = df.set_index("Date") - result = df.groupby([Grouper(freq="1M", level="Date"), "Buyer"]).sum() + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + result = df.groupby([Grouper(freq="1M", level="Date"), "Buyer"]).sum() tm.assert_frame_equal(result, expected) - result = df.groupby([Grouper(freq="1M", level=0), "Buyer"]).sum() + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + result = df.groupby([Grouper(freq="1M", level=0), "Buyer"]).sum() tm.assert_frame_equal(result, expected) with pytest.raises(ValueError, match="The level foo is not valid"): @@ -277,7 +290,8 @@ def test_timegrouper_with_reg_groups(self): # multi names df = df.copy() df["Date"] = df.index + offsets.MonthEnd(2) - result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum() + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum() expected = DataFrame( { "Buyer": "Carl Joe Mark".split(), @@ -306,18 +320,22 @@ def test_timegrouper_with_reg_groups(self): [datetime(2013, 10, 31, 0, 0)], freq=offsets.MonthEnd(), name="Date" ), ) - result = df.groupby(Grouper(freq="1M")).sum() + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + result = df.groupby(Grouper(freq="1M")).sum() tm.assert_frame_equal(result, expected) - result = df.groupby([Grouper(freq="1M")]).sum() + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + result = df.groupby([Grouper(freq="1M")]).sum() tm.assert_frame_equal(result, expected) expected.index = expected.index.shift(1) assert expected.index.freq == offsets.MonthEnd() - result = df.groupby(Grouper(freq="1M", key="Date")).sum() + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + result = df.groupby(Grouper(freq="1M", key="Date")).sum() tm.assert_frame_equal(result, expected) - result = df.groupby([Grouper(freq="1M", key="Date")]).sum() + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + result = df.groupby([Grouper(freq="1M", key="Date")]).sum() tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("freq", ["D", "M", "A", "Q-APR"]) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 0492b143eaf1f..b325edaf2b1ea 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -203,15 +203,24 @@ def test_transform_axis_1_reducer(request, reduction_func): ): marker = pytest.mark.xfail(reason="transform incorrectly fails - GH#45986") request.node.add_marker(marker) - warn = FutureWarning if reduction_func == "mad" else None + if reduction_func == "mad": + warn = FutureWarning + msg = "The 'mad' method is deprecated" + elif reduction_func in ("sem", "std"): + warn = FutureWarning + msg = "The default value of numeric_only" + else: + warn = None + msg = "" df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"]) - with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + with tm.assert_produces_warning(warn, match=msg): result = df.groupby([0, 0, 1], axis=1).transform(reduction_func) if reduction_func == "size": # size doesn't behave in the same manner; hardcode expected result expected = DataFrame(2 * [[2, 2, 1]], index=df.index, columns=df.columns) else: + warn = FutureWarning if reduction_func == "mad" else None with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): expected = df.T.groupby([0, 0, 1]).transform(reduction_func).T tm.assert_equal(result, expected) @@ -462,8 +471,10 @@ def test_transform_exclude_nuisance(df): def test_transform_function_aliases(df): - result = df.groupby("A").transform("mean") - expected = df.groupby("A").transform(np.mean) + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby("A").transform("mean") + expected = df.groupby("A").transform(np.mean) tm.assert_frame_equal(result, expected) result = df.groupby("A")["C"].transform("mean") @@ -774,8 +785,15 @@ def test_cython_transform_frame(op, args, targop): expected = gb.apply(targop) expected = expected.sort_index(axis=1) - tm.assert_frame_equal(expected, gb.transform(op, *args).sort_index(axis=1)) - tm.assert_frame_equal(expected, getattr(gb, op)(*args).sort_index(axis=1)) + + warn = None if op == "shift" else FutureWarning + msg = "The default value of numeric_only" + with tm.assert_produces_warning(warn, match=msg): + result = gb.transform(op, *args).sort_index(axis=1) + tm.assert_frame_equal(result, expected) + with tm.assert_produces_warning(warn, match=msg): + result = getattr(gb, op)(*args).sort_index(axis=1) + tm.assert_frame_equal(result, expected) # individual columns for c in df: if ( diff --git a/pandas/tests/indexes/multi/conftest.py b/pandas/tests/indexes/multi/conftest.py index 9d0a2fa81b53b..3cc4fa4713831 100644 --- a/pandas/tests/indexes/multi/conftest.py +++ b/pandas/tests/indexes/multi/conftest.py @@ -8,7 +8,7 @@ ) -# Note: identical the the "multi" entry in the top-level "index" fixture +# Note: identical the "multi" entry in the top-level "index" fixture @pytest.fixture def idx(): # a MultiIndex used to test the general functionality of the diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 426192ab46914..19ea6753c616c 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -1180,7 +1180,7 @@ def test_iloc_getitem_int_single_ea_block_view(self): arr = interval_range(1, 10.0)._values df = DataFrame(arr) - # ser should be a *view* on the the DataFrame data + # ser should be a *view* on the DataFrame data ser = df.iloc[2] # if we have a view, then changing arr[2] should also change ser[0] diff --git a/pandas/tests/io/formats/style/test_matplotlib.py b/pandas/tests/io/formats/style/test_matplotlib.py index a350b6fe7546d..8d9f075d8674d 100644 --- a/pandas/tests/io/formats/style/test_matplotlib.py +++ b/pandas/tests/io/formats/style/test_matplotlib.py @@ -216,7 +216,7 @@ def test_background_gradient_gmap_array_raises(gmap, axis): ], ) def test_background_gradient_gmap_dataframe_align(styler_blank, gmap, subset, exp_gmap): - # test gmap given as DataFrame that it aligns to the the data including subset + # test gmap given as DataFrame that it aligns to the data including subset expected = styler_blank.background_gradient(axis=None, gmap=exp_gmap, subset=subset) result = styler_blank.background_gradient(axis=None, gmap=gmap, subset=subset) assert expected._compute().ctx == result._compute().ctx @@ -232,7 +232,7 @@ def test_background_gradient_gmap_dataframe_align(styler_blank, gmap, subset, ex ], ) def test_background_gradient_gmap_series_align(styler_blank, gmap, axis, exp_gmap): - # test gmap given as Series that it aligns to the the data including subset + # test gmap given as Series that it aligns to the data including subset expected = styler_blank.background_gradient(axis=None, gmap=exp_gmap)._compute() result = styler_blank.background_gradient(axis=axis, gmap=gmap)._compute() assert expected.ctx == result.ctx diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index b5bae4759090a..21ef078bcf418 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -90,9 +90,10 @@ def test_groupby_resample_on_api(): } ) - expected = df.set_index("dates").groupby("key").resample("D").mean() - - result = df.groupby("key").resample("D", on="dates").mean() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = df.set_index("dates").groupby("key").resample("D").mean() + result = df.groupby("key").resample("D", on="dates").mean() tm.assert_frame_equal(result, expected) @@ -196,7 +197,9 @@ def tests_skip_nuisance(test_frame): tm.assert_frame_equal(result, expected) expected = r[["A", "B", "C"]].sum() - result = r.sum() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = r.sum() tm.assert_frame_equal(result, expected) @@ -643,10 +646,15 @@ def test_selection_api_validation(): exp = df_exp.resample("2D").sum() exp.index.name = "date" - tm.assert_frame_equal(exp, df.resample("2D", on="date").sum()) + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.resample("2D", on="date").sum() + tm.assert_frame_equal(exp, result) exp.index.name = "d" - tm.assert_frame_equal(exp, df.resample("2D", level="d").sum()) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.resample("2D", level="d").sum() + tm.assert_frame_equal(exp, result) @pytest.mark.parametrize( @@ -809,9 +817,13 @@ def test_frame_downsample_method(method, numeric_only, expected_data): func = getattr(resampled, method) if method == "prod" and numeric_only is not True: warn = FutureWarning + msg = "Dropping invalid columns in DataFrameGroupBy.prod is deprecated" + elif method == "sum" and numeric_only is lib.no_default: + warn = FutureWarning + msg = "The default value of numeric_only in DataFrameGroupBy.sum is deprecated" else: warn = None - msg = "Dropping invalid columns in DataFrameGroupBy.prod is deprecated" + msg = "" with tm.assert_produces_warning(warn, match=msg): result = func(numeric_only=numeric_only) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index cae2d77dfbd3f..5392ec88544a1 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -408,7 +408,9 @@ def test_resample_groupby_agg(): df["date"] = pd.to_datetime(df["date"]) resampled = df.groupby("cat").resample("Y", on="date") - expected = resampled.sum() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = resampled.sum() result = resampled.agg({"num": "sum"}) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 5d6df078ee8c3..905c2af2d22a5 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -553,7 +553,9 @@ def test_mixed_type_join_with_suffix(self): df.insert(5, "dt", "foo") grouped = df.groupby("id") - mn = grouped.mean() + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + mn = grouped.mean() cn = grouped.count() # it works! diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 31f720b9ec336..0d3b9f4561b55 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -146,8 +146,10 @@ def test_pivot_table_nocols(self): df = DataFrame( {"rows": ["a", "b", "c"], "cols": ["x", "y", "z"], "values": [1, 2, 3]} ) - rs = df.pivot_table(columns="cols", aggfunc=np.sum) - xp = df.pivot_table(index="cols", aggfunc=np.sum).T + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + rs = df.pivot_table(columns="cols", aggfunc=np.sum) + xp = df.pivot_table(index="cols", aggfunc=np.sum).T tm.assert_frame_equal(rs, xp) rs = df.pivot_table(columns="cols", aggfunc={"values": "mean"}) @@ -903,12 +905,19 @@ def test_no_col(self): # to help with a buglet self.data.columns = [k * 2 for k in self.data.columns] - table = self.data.pivot_table(index=["AA", "BB"], margins=True, aggfunc=np.mean) + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + table = self.data.pivot_table( + index=["AA", "BB"], margins=True, aggfunc=np.mean + ) for value_col in table.columns: totals = table.loc[("All", ""), value_col] assert totals == self.data[value_col].mean() - table = self.data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean") + with tm.assert_produces_warning(FutureWarning, match=msg): + table = self.data.pivot_table( + index=["AA", "BB"], margins=True, aggfunc="mean" + ) for item in ["DD", "EE", "FF"]: totals = table.loc[("All", ""), item] assert totals == self.data[item].mean() @@ -964,7 +973,9 @@ def test_margin_with_only_columns_defined( } ) - result = df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc) + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc) expected = DataFrame(values, index=Index(["D", "E"]), columns=expected_columns) tm.assert_frame_equal(result, expected) @@ -1990,8 +2001,11 @@ def test_pivot_string_as_func(self): def test_pivot_string_func_vs_func(self, f, f_numpy): # GH #18713 # for consistency purposes - result = pivot_table(self.data, index="A", columns="B", aggfunc=f) - expected = pivot_table(self.data, index="A", columns="B", aggfunc=f_numpy) + + msg = "The default value of numeric_only" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = pivot_table(self.data, index="A", columns="B", aggfunc=f) + expected = pivot_table(self.data, index="A", columns="B", aggfunc=f_numpy) tm.assert_frame_equal(result, expected) @pytest.mark.slow diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py index f42a1a5449c5c..8977d1a0d9d1b 100644 --- a/pandas/tests/window/conftest.py +++ b/pandas/tests/window/conftest.py @@ -102,7 +102,7 @@ def engine_and_raw(request): return request.param -@pytest.fixture(params=["1 day", timedelta(days=1)]) +@pytest.fixture(params=["1 day", timedelta(days=1), np.timedelta64(1, "D")]) def halflife_with_times(request): """Halflife argument for EWM when times is specified.""" return request.param diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index b1e8b43258750..66cd36d121750 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -90,7 +90,7 @@ def test_ewma_times_not_same_length(): def test_ewma_halflife_not_correct_type(): - msg = "halflife must be a string or datetime.timedelta object" + msg = "halflife must be a timedelta convertible object" with pytest.raises(ValueError, match=msg): Series(range(5)).ewm(halflife=1, times=np.arange(5).astype("datetime64[ns]"))