diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cd5d81bc70dd9..4fcefd5c32b6b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -77,6 +77,7 @@ ensure_platform_int, infer_dtype_from_object, is_bool_dtype, + is_datetime64_any_dtype, is_dict_like, is_dtype_equal, is_extension_array_dtype, @@ -88,6 +89,7 @@ is_list_like, is_named_tuple, is_object_dtype, + is_period_dtype, is_scalar, is_sequence, needs_i8_conversion, @@ -7789,11 +7791,13 @@ def _reduce( self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds ): - dtype_is_dt = self.dtypes.apply(lambda x: x.kind == "M") + dtype_is_dt = self.dtypes.apply( + lambda x: is_datetime64_any_dtype(x) or is_period_dtype(x) + ) if numeric_only is None and name in ["mean", "median"] and dtype_is_dt.any(): warnings.warn( "DataFrame.mean and DataFrame.median with numeric_only=None " - "will include datetime64 and datetime64tz columns in a " + "will include datetime64, datetime64tz, and PeriodDtype columns in a " "future version.", FutureWarning, stacklevel=3, @@ -7854,6 +7858,10 @@ def blk_func(values): assert len(res) == max(list(res.keys())) + 1, res.keys() out = df._constructor_sliced(res, index=range(len(res)), dtype=out_dtype) out.index = df.columns + if axis == 0 and df.dtypes.apply(needs_i8_conversion).any(): + # FIXME: needs_i8_conversion check is kludge, not sure + # why it is necessary in this case and this case alone + out[:] = coerce_to_dtypes(out.values, df.dtypes) return out if numeric_only is None: diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 78313f5c3bbbf..269843abb15ee 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -7,7 +7,7 @@ from pandas._config import get_option -from pandas._libs import NaT, Timedelta, Timestamp, iNaT, lib +from pandas._libs import NaT, Period, Timedelta, Timestamp, iNaT, lib from pandas._typing import Dtype, Scalar from pandas.compat._optional import import_optional_dependency @@ -17,9 +17,7 @@ is_any_int_dtype, is_bool_dtype, is_complex, - is_datetime64_dtype, - is_datetime64tz_dtype, - is_datetime_or_timedelta_dtype, + is_datetime64_any_dtype, is_float, is_float_dtype, is_integer, @@ -28,8 +26,10 @@ is_object_dtype, is_scalar, is_timedelta64_dtype, + needs_i8_conversion, pandas_dtype, ) +from pandas.core.dtypes.dtypes import PeriodDtype from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna from pandas.core.construction import extract_array @@ -134,10 +134,8 @@ def f( def _bn_ok_dtype(dtype: Dtype, name: str) -> bool: - # Bottleneck chokes on datetime64 - if not is_object_dtype(dtype) and not ( - is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype) - ): + # Bottleneck chokes on datetime64, PeriodDtype (or and EA) + if not is_object_dtype(dtype) and not needs_i8_conversion(dtype): # GH 15507 # bottleneck does not properly upcast during the sum @@ -283,17 +281,16 @@ def _get_values( # with scalar fill_value. This guarantee is important for the # maybe_upcast_putmask call below assert is_scalar(fill_value) + values = extract_array(values, extract_numpy=True) mask = _maybe_get_mask(values, skipna, mask) - values = extract_array(values, extract_numpy=True) dtype = values.dtype - if is_datetime_or_timedelta_dtype(values) or is_datetime64tz_dtype(values): + if needs_i8_conversion(values): # changing timedelta64/datetime64 to int64 needs to happen after # finding `mask` above - values = getattr(values, "asi8", values) - values = values.view(np.int64) + values = np.asarray(values.view("i8")) dtype_ok = _na_ok_dtype(dtype) @@ -307,7 +304,8 @@ def _get_values( if skipna and copy: values = values.copy() - if dtype_ok: + assert mask is not None # for mypy + if dtype_ok and mask.any(): np.putmask(values, mask, fill_value) # promote if needed @@ -325,13 +323,14 @@ def _get_values( def _na_ok_dtype(dtype) -> bool: - # TODO: what about datetime64tz? PeriodDtype? - return not issubclass(dtype.type, (np.integer, np.timedelta64, np.datetime64)) + if needs_i8_conversion(dtype): + return False + return not issubclass(dtype.type, np.integer) def _wrap_results(result, dtype: Dtype, fill_value=None): """ wrap our results if needed """ - if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): + if is_datetime64_any_dtype(dtype): if fill_value is None: # GH#24293 fill_value = iNaT @@ -342,7 +341,8 @@ def _wrap_results(result, dtype: Dtype, fill_value=None): result = np.nan result = Timestamp(result, tz=tz) else: - result = result.view(dtype) + # If we have float dtype, taking a view will give the wrong result + result = result.astype(dtype) elif is_timedelta64_dtype(dtype): if not isinstance(result, np.ndarray): if result == fill_value: @@ -356,6 +356,14 @@ def _wrap_results(result, dtype: Dtype, fill_value=None): else: result = result.astype("m8[ns]").view(dtype) + elif isinstance(dtype, PeriodDtype): + if is_float(result) and result.is_integer(): + result = int(result) + if is_integer(result): + result = Period._from_ordinal(result, freq=dtype.freq) + else: + raise NotImplementedError(type(result), result) + return result @@ -542,12 +550,7 @@ def nanmean(values, axis=None, skipna=True, mask=None): ) dtype_sum = dtype_max dtype_count = np.float64 - if ( - is_integer_dtype(dtype) - or is_timedelta64_dtype(dtype) - or is_datetime64_dtype(dtype) - or is_datetime64tz_dtype(dtype) - ): + if is_integer_dtype(dtype) or needs_i8_conversion(dtype): dtype_sum = np.float64 elif is_float_dtype(dtype): dtype_sum = dtype diff --git a/pandas/core/series.py b/pandas/core/series.py index 568e99622dd29..bf66f9224148f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1984,7 +1984,7 @@ def idxmin(self, axis=0, skipna=True, *args, **kwargs): nan """ skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs) - i = nanops.nanargmin(com.values_from_object(self), skipna=skipna) + i = nanops.nanargmin(self._values, skipna=skipna) if i == -1: return np.nan return self.index[i] @@ -2055,7 +2055,7 @@ def idxmax(self, axis=0, skipna=True, *args, **kwargs): nan """ skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs) - i = nanops.nanargmax(com.values_from_object(self), skipna=skipna) + i = nanops.nanargmax(self._values, skipna=skipna) if i == -1: return np.nan return self.index[i] @@ -2093,7 +2093,7 @@ def round(self, decimals=0, *args, **kwargs) -> "Series": dtype: float64 """ nv.validate_round(args, kwargs) - result = com.values_from_object(self).round(decimals) + result = self._values.round(decimals) result = self._constructor(result, index=self.index).__finalize__(self) return result diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 07e30d41c216d..d7cd3bc3b1c49 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -875,11 +875,6 @@ def test_mean_datetimelike(self): expected = pd.Series({"A": 1.0, "C": df.loc[1, "C"]}) tm.assert_series_equal(result, expected) - @pytest.mark.xfail( - reason="casts to object-dtype and then tries to add timestamps", - raises=TypeError, - strict=True, - ) def test_mean_datetimelike_numeric_only_false(self): df = pd.DataFrame( {