From 8b0ad717d1ec54dd40136817a326b41817ffcb86 Mon Sep 17 00:00:00 2001 From: Dennis Chukwunta Date: Tue, 27 Sep 2022 21:21:24 +0100 Subject: [PATCH] BUG: Fix calling groupBy(...).apply(func) on an empty dataframe invokes func (#48579) --- doc/source/whatsnew/v1.5.1.rst | 1 + pandas/core/groupby/ops.py | 13 ++++++------- pandas/tests/groupby/test_apply.py | 25 +++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.5.1.rst b/doc/source/whatsnew/v1.5.1.rst index 852166cbfaa7f..ee66f2f649bc2 100644 --- a/doc/source/whatsnew/v1.5.1.rst +++ b/doc/source/whatsnew/v1.5.1.rst @@ -77,6 +77,7 @@ Fixed regressions - Fixed performance regression in :func:`factorize` when ``na_sentinel`` is not ``None`` and ``sort=False`` (:issue:`48620`) - Fixed regression causing an ``AttributeError`` during warning emitted if the provided table name in :meth:`DataFrame.to_sql` and the table name actually used in the database do not match (:issue:`48733`) - Fixed :meth:`.DataFrameGroupBy.size` not returning a Series when ``axis=1`` (:issue:`48738`) +- Fixed Regression in :meth:`DataFrameGroupBy.apply` when user defined function is called on an empty dataframe (:issue:`47985`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 762e135111cf8..04f18369f4fcc 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -787,15 +787,14 @@ def apply( if not mutated and not _is_indexed_like(res, group_axes, axis): mutated = True result_values.append(res) - # getattr pattern for __name__ is needed for functools.partial objects - if len(group_keys) == 0 and getattr(f, "__name__", None) not in [ - "idxmin", - "idxmax", - "nanargmin", - "nanargmax", + if len(group_keys) == 0 and getattr(f, "__name__", None) in [ + "mad", + "skew", + "sum", + "prod", ]: - # If group_keys is empty, then no function calls have been made, + # If group_keys is empty, then no function calls have been made, # so we will not have raised even if this is an invalid dtype. # So do one dummy call here to raise appropriate TypeError. f(data.iloc[:0]) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index b064c12f89c21..47ea6a99ffea9 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1331,3 +1331,28 @@ def test_result_name_when_one_group(name): expected = Series([1, 2], name=name) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "method, op", + [ + ("apply", lambda gb: gb.values[-1]), + ("apply", lambda gb: gb["b"].iloc[0]), + ("agg", "mad"), + ("agg", "skew"), + ("agg", "prod"), + ("agg", "sum"), + ], +) +def test_empty_df(method, op): + # GH 47985 + empty_df = DataFrame({"a": [], "b": []}) + gb = empty_df.groupby("a", group_keys=True) + group = getattr(gb, "b") + + result = getattr(group, method)(op) + expected = Series( + [], name="b", dtype="float64", index=Index([], dtype="float64", name="a") + ) + + tm.assert_series_equal(result, expected)