From d6e943e91d291aaf52a43b3ebef48a47cf35de64 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sun, 21 May 2023 08:04:50 +0100 Subject: [PATCH 1/3] BUG: make Series.agg aggregate when possible --- doc/source/whatsnew/v2.1.0.rst | 26 ++++++++++++++++++++++++- pandas/core/apply.py | 15 +------------- pandas/tests/apply/test_series_apply.py | 17 ---------------- 3 files changed, 26 insertions(+), 32 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 6a1e5cdef30bc..07929b1cf44fb 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -108,7 +108,31 @@ Notable bug fixes These are bug fixes that might have notable behavior changes. -.. _whatsnew_210.notable_bug_fixes.notable_bug_fix1: +.. _whatsnew_210.notable_bug_fixes.series.agg: + +Previously, :meth:`Series.agg` did not necessary aggregate, even if given an aggregation function: + +*Previous behavior*: + +.. code-block:: ipython + + In [1]: ser = pd.Series([1, 2, 3]) + In [2]: ser.agg(np.sum) + 0 1 + 1 2 + 2 3 + dtype: int64 + +Now it will always aggregate, when passed an aggregation function: + +*New behavior*: + +.. ipython:: python + + ser = pd.Series([1, 2, 3]) + ser.agg(np.sum) + +More generally, the result from :meth:`Series.agg` will now always be the same as the single-column result from :meth:`DataFrame.agg`. notable_bug_fix1 ^^^^^^^^^^^^^^^^ diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 020aa4e8916da..1e9bd85832eec 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1084,22 +1084,9 @@ def agg(self): result = super().agg() if result is None: f = self.f - # string, list-like, and dict-like are entirely handled in super assert callable(f) - - # try a regular apply, this evaluates lambdas - # row-by-row; however if the lambda is expected a Series - # expression, e.g.: lambda x: x-x.quantile(0.25) - # this will fail, so we can try a vectorized evaluation - - # we cannot FIRST try the vectorized evaluation, because - # then .agg and .apply would have different semantics if the - # operation is actually defined on the Series, e.g. str - try: - result = self.obj.apply(f) - except (ValueError, AttributeError, TypeError): - result = f(self.obj) + result = f(self.obj) return result diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index e37006eb0a5f6..f316aa74943a7 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -346,18 +346,6 @@ def test_demo(): tm.assert_series_equal(result, expected) -def test_agg_apply_evaluate_lambdas_the_same(string_series): - # test that we are evaluating row-by-row first - # before vectorized evaluation - result = string_series.apply(lambda x: str(x)) - expected = string_series.agg(lambda x: str(x)) - tm.assert_series_equal(result, expected) - - result = string_series.apply(str) - expected = string_series.agg(str) - tm.assert_series_equal(result, expected) - - def test_with_nested_series(datetime_series): # GH 2316 # .agg with a reducer and a transform, what to do @@ -370,11 +358,6 @@ def test_with_nested_series(datetime_series): expected = DataFrame({"x": datetime_series, "x^2": datetime_series**2}) tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH52123 - result = datetime_series.agg(lambda x: Series([x, x**2], index=["x", "x^2"])) - tm.assert_frame_equal(result, expected) - def test_replicate_describe(string_series): # this also tests a result set that is all scalars From 5ed11509178999e0d49a5cbda2c619216c53e2b0 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sun, 21 May 2023 08:33:23 +0100 Subject: [PATCH 2/3] add GH number --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 07929b1cf44fb..3cccf008f4f0b 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -132,7 +132,7 @@ Now it will always aggregate, when passed an aggregation function: ser = pd.Series([1, 2, 3]) ser.agg(np.sum) -More generally, the result from :meth:`Series.agg` will now always be the same as the single-column result from :meth:`DataFrame.agg`. +More generally, the result from :meth:`Series.agg` will now always be the same as the single-column result from :meth:`DataFrame.agg` (:issue:'53324'). notable_bug_fix1 ^^^^^^^^^^^^^^^^ From ca286ac6202a0284f54d430147b69f214b3b67fa Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sun, 21 May 2023 08:34:05 +0100 Subject: [PATCH 3/3] add GH number fix --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 3cccf008f4f0b..21ff2ece09fe1 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -132,7 +132,7 @@ Now it will always aggregate, when passed an aggregation function: ser = pd.Series([1, 2, 3]) ser.agg(np.sum) -More generally, the result from :meth:`Series.agg` will now always be the same as the single-column result from :meth:`DataFrame.agg` (:issue:'53324'). +More generally, the result from :meth:`Series.agg` will now always be the same as the single-column result from :meth:`DataFrame.agg` (:issue:`53324`). notable_bug_fix1 ^^^^^^^^^^^^^^^^