diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 1fef65349976b..4dc1dfcae0777 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -148,8 +148,10 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1): new_blocks = [] new_items = [] deleted_items = [] + no_result = object() for block in data.blocks: - + # Avoid inheriting result from earlier in the loop + result = no_result locs = block.mgr_locs.as_array try: result, _ = self.grouper.aggregate( @@ -174,15 +176,15 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1): except TypeError: # we may have an exception in trying to aggregate # continue and exclude the block - pass - + deleted_items.append(locs) + continue finally: + if result is not no_result: + dtype = block.values.dtype - dtype = block.values.dtype - - # see if we can cast the block back to the original dtype - result = block._try_coerce_and_cast_result(result, dtype=dtype) - newb = block.make_block(result) + # see if we can cast the block back to the original dtype + result = block._try_coerce_and_cast_result(result, dtype=dtype) + newb = block.make_block(result) new_items.append(locs) new_blocks.append(newb) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 3d4dbd3f8d887..5961a7ff72832 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -47,6 +47,7 @@ class providing the base-class of operations. SpecificationError, ) import pandas.core.common as com +from pandas.core.construction import extract_array from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame from pandas.core.groupby import base @@ -803,10 +804,9 @@ def _try_cast(self, result, obj, numeric_only=False): # Prior results _may_ have been generated in UTC. # Ensure we localize to UTC first before converting # to the target timezone + arr = extract_array(obj) try: - result = obj._values._from_sequence( - result, dtype="datetime64[ns, UTC]" - ) + result = arr._from_sequence(result, dtype="datetime64[ns, UTC]") result = result.astype(dtype) except TypeError: # _try_cast was called at a point where the result diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 6e5a2aab298c7..4ca867b1088e7 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -7,7 +7,7 @@ import numpy as np -from pandas._libs import NaT, lib, tslib, tslibs +from pandas._libs import NaT, Timestamp, lib, tslib, tslibs import pandas._libs.internals as libinternals from pandas._libs.tslibs import Timedelta, conversion from pandas._libs.tslibs.timezones import tz_compare @@ -715,20 +715,6 @@ def _try_cast_result(self, result, dtype=None): # may need to change the dtype here return maybe_downcast_to_dtype(result, dtype) - def _coerce_values(self, values): - """ - Coerce values (usually derived from self.values) for an operation. - - Parameters - ---------- - values : ndarray or ExtensionArray - - Returns - ------- - ndarray or ExtensionArray - """ - return values - def _try_coerce_args(self, other): """ provide coercion to our input arguments """ @@ -817,7 +803,7 @@ def replace( convert=convert, ) - values = self._coerce_values(self.values) + values = self.values to_replace = self._try_coerce_args(to_replace) mask = missing.mask_missing(values, to_replace) @@ -882,7 +868,6 @@ def setitem(self, indexer, value): if self._can_hold_element(value): value = self._try_coerce_args(value) - values = self._coerce_values(values) # can keep its own dtype if hasattr(value, "dtype") and is_dtype_equal(values.dtype, value.dtype): dtype = self.dtype @@ -1229,7 +1214,6 @@ def _interpolate_with_fill( return [self.copy()] values = self.values if inplace else self.values.copy() - values = self._coerce_values(values) fill_value = self._try_coerce_args(fill_value) values = missing.interpolate_2d( values, @@ -1444,7 +1428,6 @@ def func(cond, values, other): else: # see if we can operate on the entire block, or need item-by-item # or if we are a single block (ndim == 1) - values = self._coerce_values(values) try: result = func(cond, values, other) except TypeError: @@ -1548,14 +1531,13 @@ def quantile(self, qs, interpolation="linear", axis=0): # We need to operate on i8 values for datetimetz # but `Block.get_values()` returns an ndarray of objects # right now. We need an API for "values to do numeric-like ops on" - values = self.values.asi8 + values = self.values.view("M8[ns]") # TODO: NonConsolidatableMixin shape # Usual shape inconsistencies for ExtensionBlocks values = values[None, :] else: values = self.get_values() - values = self._coerce_values(values) is_empty = values.shape[axis] == 0 orig_scalar = not is_list_like(qs) @@ -1720,7 +1702,6 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False) # use block's copy logic. # .values may be an Index which does shallow copy by default new_values = self.values if inplace else self.copy().values - new_values = self._coerce_values(new_values) new = self._try_coerce_args(new) if isinstance(new, np.ndarray) and len(new) == len(mask): @@ -1919,12 +1900,6 @@ def _try_cast_result(self, result, dtype=None): result could also be an EA Array itself, in which case it is already a 1-D array """ - try: - - result = self._holder._from_sequence(result.ravel(), dtype=dtype) - except Exception: - pass - return result def formatting_values(self): @@ -2304,8 +2279,8 @@ def _try_coerce_args(self, other): if is_valid_nat_for_dtype(other, self.dtype): other = np.datetime64("NaT", "ns") elif isinstance(other, (datetime, np.datetime64, date)): - other = self._box_func(other) - if getattr(other, "tz") is not None: + other = Timestamp(other) + if other.tz is not None: raise TypeError("cannot coerce a Timestamp with a tz on a naive Block") other = other.asm8 elif hasattr(other, "dtype") and is_datetime64_dtype(other): @@ -2320,18 +2295,11 @@ def _try_coerce_args(self, other): def _try_coerce_result(self, result): """ reverse of try_coerce_args """ - if isinstance(result, np.ndarray): - if result.dtype.kind in ["i", "f"]: - result = result.astype("M8[ns]") - - elif isinstance(result, (np.integer, np.float, np.datetime64)): - result = self._box_func(result) + if isinstance(result, np.ndarray) and result.dtype.kind == "i": + # needed for _interpolate_with_ffill + result = result.view("M8[ns]") return result - @property - def _box_func(self): - return tslibs.Timestamp - def to_native_types( self, slicer=None, na_rep=None, date_format=None, quoting=None, **kwargs ): @@ -2387,6 +2355,7 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): is_extension = True _can_hold_element = DatetimeBlock._can_hold_element + fill_value = np.datetime64("NaT", "ns") @property def _holder(self): @@ -2442,7 +2411,7 @@ def get_values(self, dtype=None): """ values = self.values if is_object_dtype(dtype): - values = values._box_values(values._data) + values = values.astype(object) values = np.asarray(values) @@ -2468,9 +2437,6 @@ def _slice(self, slicer): return self.values[loc] return self.values[slicer] - def _coerce_values(self, values): - return _block_shape(values, ndim=self.ndim) - def _try_coerce_args(self, other): """ localize and return i8 for the values @@ -2483,17 +2449,7 @@ def _try_coerce_args(self, other): ------- base-type other """ - - if isinstance(other, ABCSeries): - other = self._holder(other) - - if isinstance(other, bool): - raise TypeError - elif is_datetime64_dtype(other): - # add the tz back - other = self._holder(other, dtype=self.dtype) - - elif is_valid_nat_for_dtype(other, self.dtype): + if is_valid_nat_for_dtype(other, self.dtype): other = np.datetime64("NaT", "ns") elif isinstance(other, self._holder): if not tz_compare(other.tz, self.values.tz): @@ -2513,22 +2469,23 @@ def _try_coerce_args(self, other): def _try_coerce_result(self, result): """ reverse of try_coerce_args """ if isinstance(result, np.ndarray): - if result.dtype.kind in ["i", "f"]: - result = result.astype("M8[ns]") + if result.ndim == 2: + # kludge for 2D blocks with 1D EAs + result = result[0, :] + if result.dtype == np.float64: + # needed for post-groupby.median + result = self._holder._from_sequence( + result.astype(np.int64), freq=None, dtype=self.values.dtype + ) + elif result.dtype == "M8[ns]": + # otherwise we get here via quantile and already have M8[ns] + result = self._holder._simple_new( + result, freq=None, dtype=self.values.dtype + ) - elif isinstance(result, (np.integer, np.float, np.datetime64)): + elif isinstance(result, np.datetime64): + # also for post-quantile result = self._box_func(result) - - if isinstance(result, np.ndarray): - # allow passing of > 1dim if its trivial - - if result.ndim > 1: - result = result.reshape(np.prod(result.shape)) - # GH#24096 new values invalidates a frequency - result = self._holder._simple_new( - result, freq=None, dtype=self.values.dtype - ) - return result @property @@ -2627,10 +2584,6 @@ def __init__(self, values, placement, ndim=None): def _holder(self): return TimedeltaArray - @property - def _box_func(self): - return lambda x: Timedelta(x, unit="ns") - def _can_hold_element(self, element): tipo = maybe_infer_dtype_type(element) if tipo is not None: @@ -2688,15 +2641,6 @@ def _try_coerce_args(self, other): def _try_coerce_result(self, result): """ reverse of try_coerce_args / try_operate """ - if isinstance(result, np.ndarray): - mask = isna(result) - if result.dtype.kind in ["i", "f"]: - result = result.astype("m8[ns]") - result[mask] = np.timedelta64("NaT", "ns") - - elif isinstance(result, (np.integer, np.float)): - result = self._box_func(result) - return result def should_store(self, value): diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index 31e9cff68445e..fb8f62d7a06c5 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -51,7 +51,7 @@ def test_indexing_with_datetime_tz(self): # indexing result = df.iloc[1] expected = Series( - [Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), np.nan, np.nan], + [Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT], index=list("ABC"), dtype="object", name=1, @@ -59,7 +59,7 @@ def test_indexing_with_datetime_tz(self): tm.assert_series_equal(result, expected) result = df.loc[1] expected = Series( - [Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), np.nan, np.nan], + [Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT], index=list("ABC"), dtype="object", name=1,