diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index ba5334b2f4fa8..bdd83e2adefd6 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -458,8 +458,9 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.groups` with a datetime key in conjunction with another key produced incorrect number of group keys (:issue:`51158`) - Bug in :meth:`GroupBy.quantile` may implicitly sort the result index with ``sort=False`` (:issue:`53009`) - Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64, timedelta64 or :class:`PeriodDtype` values (:issue:`52128`, :issue:`53045`) -- Bug in :meth:`SeriresGroupBy.nth` and :meth:`DataFrameGroupBy.nth` after performing column selection when using ``dropna="any"`` or ``dropna="all"`` would not subset columns (:issue:`53518`) -- Bug in :meth:`SeriresGroupBy.nth` and :meth:`DataFrameGroupBy.nth` raised after performing column selection when using ``dropna="any"`` or ``dropna="all"`` resulted in rows being dropped (:issue:`53518`) +- Bug in :meth:`SeriesGroupBy.nth` and :meth:`DataFrameGroupBy.nth` after performing column selection when using ``dropna="any"`` or ``dropna="all"`` would not subset columns (:issue:`53518`) +- Bug in :meth:`SeriesGroupBy.nth` and :meth:`DataFrameGroupBy.nth` raised after performing column selection when using ``dropna="any"`` or ``dropna="all"`` resulted in rows being dropped (:issue:`53518`) +- Bug in :meth:`SeriesGroupBy.sum` and :meth:`DataFrameGroupby.sum` summing ``np.inf + np.inf`` and ``(-np.inf) + (-np.inf)`` to ``np.nan`` (:issue:`53606`) Reshaping ^^^^^^^^^ diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 61f448cbe0c3f..0baae23a4a71c 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -746,6 +746,13 @@ def group_sum( y = val - compensation[lab, j] t = sumx[lab, j] + y compensation[lab, j] = t - sumx[lab, j] - y + if compensation[lab, j] != compensation[lab, j]: + # GH#53606 + # If val is +/- infinity compensation is NaN + # which would lead to results being NaN instead + # of +/- infinity. We cannot use util.is_nan + # because of no gil + compensation[lab, j] = 0 sumx[lab, j] = t _check_below_mincount( diff --git a/pandas/tests/groupby/test_libgroupby.py b/pandas/tests/groupby/test_libgroupby.py index d10bcf9053d1a..92c3b68d87fad 100644 --- a/pandas/tests/groupby/test_libgroupby.py +++ b/pandas/tests/groupby/test_libgroupby.py @@ -6,6 +6,7 @@ group_cumprod, group_cumsum, group_mean, + group_sum, group_var, ) @@ -302,3 +303,29 @@ def test_cython_group_mean_Inf_at_begining_and_end(): actual, expected, ) + + +@pytest.mark.parametrize( + "values, out", + [ + ([[np.inf], [np.inf], [np.inf]], [[np.inf], [np.inf]]), + ([[np.inf], [np.inf], [-np.inf]], [[np.inf], [np.nan]]), + ([[np.inf], [-np.inf], [np.inf]], [[np.inf], [np.nan]]), + ([[np.inf], [-np.inf], [-np.inf]], [[np.inf], [-np.inf]]), + ], +) +def test_cython_group_sum_Inf_at_begining_and_end(values, out): + # GH #53606 + actual = np.array([[np.nan], [np.nan]], dtype="float64") + counts = np.array([0, 0], dtype="int64") + data = np.array(values, dtype="float64") + labels = np.array([0, 1, 1], dtype=np.intp) + + group_sum(actual, counts, data, labels, None, is_datetimelike=False) + + expected = np.array(out, dtype="float64") + + tm.assert_numpy_array_equal( + actual, + expected, + )