From 8d247a6c990b0c9631a337151de396c4a6dbbfca Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 30 Apr 2021 13:33:39 -0700 Subject: [PATCH] REF: make libreduction behavior match _aggregate_series_pure_python --- pandas/_libs/reduction.pyx | 10 ++++------ pandas/core/groupby/generic.py | 4 +++- pandas/core/groupby/ops.py | 19 ++++++------------- 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 5b958163159aa..191967585c431 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -27,11 +27,11 @@ from pandas._libs.lib import ( ) -cpdef check_result_array(object obj, Py_ssize_t cnt): +cpdef check_result_array(object obj): if (is_array(obj) or - (isinstance(obj, list) and len(obj) == cnt) or - getattr(obj, 'shape', None) == (cnt,)): + (isinstance(obj, list) and len(obj) == 0) or + getattr(obj, 'shape', None) == (0,)): raise ValueError('Must produce aggregated value') @@ -89,9 +89,7 @@ cdef class _BaseGrouper: # On the first pass, we check the output shape to see # if this looks like a reduction. initialized = True - # In all tests other than test_series_grouper and - # test_series_bin_grouper, we have len(self.dummy_arr) == 0 - check_result_array(res, len(self.dummy_arr)) + check_result_array(res) return res, initialized diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 80351a832ec7e..da803badee8c0 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -511,6 +511,8 @@ def _get_index() -> Index: return self._reindex_output(result) def _aggregate_named(self, func, *args, **kwargs): + # Note: this is very similar to _aggregate_series_pure_python, + # but that does not pin group.name result = {} initialized = False @@ -523,7 +525,7 @@ def _aggregate_named(self, func, *args, **kwargs): output = libreduction.extract_result(output) if not initialized: # We only do this validation on the first iteration - libreduction.check_result_array(output, 0) + libreduction.check_result_array(output) initialized = True result[name] = output diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index ed3b6d68b71cd..9edbeb412026d 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -985,15 +985,7 @@ def agg_series(self, obj: Series, func: F) -> tuple[ArrayLike, np.ndarray]: # Preempt TypeError in _aggregate_series_fast return self._aggregate_series_pure_python(obj, func) - try: - return self._aggregate_series_fast(obj, func) - except ValueError as err: - if "Must produce aggregated value" in str(err): - # raised in libreduction - pass - else: - raise - return self._aggregate_series_pure_python(obj, func) + return self._aggregate_series_fast(obj, func) def _aggregate_series_fast( self, obj: Series, func: F @@ -1023,9 +1015,10 @@ def _aggregate_series_pure_python(self, obj: Series, func: F): result = np.empty(ngroups, dtype="O") initialized = False + # equiv: splitter = self._get_splitter(obj, axis=0) splitter = get_splitter(obj, group_index, ngroups, axis=0) - for label, group in enumerate(splitter): + for i, group in enumerate(splitter): # Each step of this loop corresponds to # libreduction._BaseGrouper._apply_to_group @@ -1034,11 +1027,11 @@ def _aggregate_series_pure_python(self, obj: Series, func: F): if not initialized: # We only do this validation on the first iteration - libreduction.check_result_array(res, 0) + libreduction.check_result_array(res) initialized = True - counts[label] = group.shape[0] - result[label] = res + counts[i] = group.shape[0] + result[i] = res npvalues = lib.maybe_convert_objects(result, try_float=False) out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True)