pandas-dev · jreback · May 9, 2020 · Mar 22, 2020 · Mar 22, 2020 · Mar 22, 2020
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -364,6 +364,7 @@ Groupby/resample/rolling
 
 - Bug in :meth:`GroupBy.apply` raises ``ValueError`` when the ``by`` axis is not sorted and has duplicates and the applied ``func`` does not mutate passed in objects (:issue:`30667`)
 - Bug in :meth:`DataFrameGroupby.transform` produces incorrect result with transformation functions (:issue:`30918`)
+- Bug in :meth:`DataFrameGroupBy.sum` and :meth:`SeriesGroupBy.sum` where a large negative number would be returned when the number of non-null values was below ``min_count`` for nullable integer dtypes (:issue:`32861`)
 
 Reshaping
 ^^^^^^^^^

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -38,6 +38,7 @@
 )
 from pandas.core.dtypes.missing import _maybe_fill, isna
 
+import pandas as pd
 import pandas.core.algorithms as algorithms
 from pandas.core.base import SelectionMixin
 import pandas.core.common as com
@@ -547,12 +548,17 @@ def _cython_operation(
             how == "add"
             and is_integer_dtype(orig_values.dtype)
             and is_extension_array_dtype(orig_values.dtype)
+            and not isna(result).any()
         ):
             # We need this to ensure that Series[Int64Dtype].resample().sum()
             # remains int64 dtype.
             # Two options for avoiding this special case
             # 1. mask-aware ops and avoid casting to float with NaN above
             # 2. specify the result dtype when calling this method
+            #
+            # Sometimes result can contain null values (e.g. see
+            # https://github.com/pandas-dev/pandas/issues/32861)
+            # and so we must check for that before casting to int
             result = result.astype("int64")
 
         if kind == "aggregate" and self._filter_empty_groups and not counts.all():
@@ -577,6 +583,14 @@ def _cython_operation(
         elif is_datetimelike and kind == "aggregate":
             result = result.astype(orig_values.dtype)
 
+        if (
+            how == "add"
+            and is_integer_dtype(orig_values.dtype)
+            and is_extension_array_dtype(orig_values.dtype)
+            and isna(result).any()
+        ):
+            result = pd.array(result.ravel(), dtype="Int64")
+
         return result, names
 
     def aggregate(

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
@@ -1636,3 +1636,20 @@ def test_apply_to_nullable_integer_returns_float(values, function):
     result = groups.agg([function])
     expected.columns = MultiIndex.from_tuples([("b", function)])
     tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_sum_below_mincount_nullable_integer():
+    # https://github.com/pandas-dev/pandas/issues/32861
+    df = pd.DataFrame({"a": [0, 1, 2], "b": [0, 1, 2], "c": [0, 1, 2]}, dtype="Int64")
+    grouped = df.groupby("a")
+    idx = pd.Index([0, 1, 2], dtype=object, name="a")
+
+    result = grouped["b"].sum(min_count=2)
+    expected = pd.Series([pd.NA] * 3, dtype="Int64", index=idx, name="b")
+    tm.assert_series_equal(result, expected)
+
+    result = grouped.sum(min_count=2)
+    expected = pd.DataFrame(
+        {"b": [pd.NA] * 3, "c": [pd.NA] * 3}, dtype="Int64", index=idx
+    )
+    tm.assert_frame_equal(result, expected)