diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst index f9c756b2518af..ac366337a3c59 100644 --- a/doc/source/whatsnew/v1.0.1.rst +++ b/doc/source/whatsnew/v1.0.1.rst @@ -19,6 +19,7 @@ Fixed regressions - Fixed regression when indexing a ``Series`` or ``DataFrame`` indexed by ``DatetimeIndex`` with a slice containg a :class:`datetime.date` (:issue:`31501`) - Fixed regression in ``DataFrame.__setitem__`` raising an ``AttributeError`` with a :class:`MultiIndex` and a non-monotonic indexer (:issue:`31449`) - Fixed regression in :class:`Series` multiplication when multiplying a numeric :class:`Series` with >10000 elements with a timedelta-like scalar (:issue:`31457`) +- Fixed regression in ``.groupby()`` aggregations with categorical dtype using Cythonized reduction functions (e.g. ``first``) (:issue:`31450`) - Fixed regression in :meth:`GroupBy.apply` if called with a function which returned a non-pandas non-scalar object (e.g. a list or numpy array) (:issue:`31441`) - Fixed regression in :meth:`to_datetime` when parsing non-nanosecond resolution datetimes (:issue:`31491`) - Fixed regression in :meth:`~DataFrame.to_csv` where specifying an ``na_rep`` might truncate the values written (:issue:`31447`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index b52d1bb4db360..0245b9f74d944 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1394,7 +1394,9 @@ def func(self, numeric_only=numeric_only, min_count=min_count): except DataError: pass except NotImplementedError as err: - if "function is not implemented for this dtype" in str(err): + if "function is not implemented for this dtype" in str( + err + ) or "category dtype not supported" in str(err): # raised in _get_cython_function, in some cases can # be trimmed by implementing cython funcs for more dtypes pass diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 2d31996a8a964..934d0224a4d9a 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -377,6 +377,22 @@ def test_agg_index_has_complex_internals(index): tm.assert_frame_equal(result, expected) +def test_agg_cython_category_not_implemented_fallback(): + # https://github.com/pandas-dev/pandas/issues/31450 + df = pd.DataFrame({"col_num": [1, 1, 2, 3]}) + df["col_cat"] = df["col_num"].astype("category") + + result = df.groupby("col_num").col_cat.first() + expected = pd.Series( + [1, 2, 3], index=pd.Index([1, 2, 3], name="col_num"), name="col_cat" + ) + tm.assert_series_equal(result, expected) + + result = df.groupby("col_num").agg({"col_cat": "first"}) + expected = expected.to_frame() + tm.assert_frame_equal(result, expected) + + class TestNamedAggregationSeries: def test_series_named_agg(self): df = pd.Series([1, 2, 3, 4])