From 7ff6a4e94484f6f5c41c223e08e4bc53890c021d Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 3 Apr 2020 16:23:23 +0200
Subject: [PATCH 1/7] PERF: masked ops for reductions (min/max)

---
 pandas/core/array_algos/masked_reductions.py | 39 +++++++++++++
 pandas/core/arrays/integer.py                |  7 ++-
 pandas/tests/reductions/test_reductions.py   | 61 ++++++++++++++------
 3 files changed, 87 insertions(+), 20 deletions(-)

diff --git a/pandas/core/array_algos/masked_reductions.py b/pandas/core/array_algos/masked_reductions.py
index 0fb2605b554c2..32a61dd7c39ad 100644
--- a/pandas/core/array_algos/masked_reductions.py
+++ b/pandas/core/array_algos/masked_reductions.py
@@ -45,3 +45,42 @@ def sum(
             return np.sum(values[~mask])
         else:
             return np.sum(values, where=~mask)
+
+
+def _minmax(func):
+    def reduction(values: np.ndarray, mask: np.ndarray, skipna: bool = True):
+        """
+        Reduction for 1D masked array.
+
+        Parameters
+        ----------
+        values : np.ndarray
+            Numpy array with the values (can be of any dtype that support the
+            operation).
+        mask : np.ndarray
+            Boolean numpy array (True values indicate missing values).
+        skipna : bool, default True
+            Whether to skip NA.
+        """
+        if not skipna:
+            if mask.any():
+                return libmissing.NA
+            else:
+                if values.size:
+                    return func(values)
+                else:
+                    # min/max with empty array raise in numpy, pandas returns NA
+                    return libmissing.NA
+        else:
+            subset = values[~mask]
+            if subset.size:
+                return func(values[~mask])
+            else:
+                # min/max with empty array raise in numpy, pandas returns NA
+                return libmissing.NA
+
+    return reduction
+
+
+min = _minmax(np.min)
+max = _minmax(np.max)
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 4f3c68aa03b16..8966d7ef48844 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -561,8 +561,9 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs):
         data = self._data
         mask = self._mask
 
-        if name == "sum":
-            return masked_reductions.sum(data, mask, skipna=skipna, **kwargs)
+        if name in {"sum", "min", "max"}:
+            op = getattr(masked_reductions, name)
+            return op(data, mask, skipna=skipna, **kwargs)
 
         # coerce to a nan-aware float if needed
         # (we explicitly use NaN within reductions)
@@ -581,7 +582,7 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs):
 
         # if we have a preservable numeric op,
         # provide coercion back to an integer type if possible
-        elif name in ["min", "max", "prod"]:
+        elif name in ["prod"]:
             # GH#31409 more performant than casting-then-checking
             result = com.cast_scalar_indexer(result)
 
diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py
index 962b105d1e8fc..1c0eee2226a6d 100644
--- a/pandas/tests/reductions/test_reductions.py
+++ b/pandas/tests/reductions/test_reductions.py
@@ -65,27 +65,57 @@ def test_ops(self, opname, obj):
             assert result.value == expected
 
     @pytest.mark.parametrize("opname", ["max", "min"])
-    def test_nanops(self, opname, index_or_series):
+    @pytest.mark.parametrize(
+        "dtype, val",
+        [
+            ("object", 2.0),
+            ("float64", 2.0),
+            ("Int64", 2),
+            ("datetime64[ns]", datetime(2011, 11, 1)),
+        ],
+    )
+    def test_nanminmax(self, opname, dtype, val, index_or_series):
         # GH#7261
         klass = index_or_series
-        arg_op = "arg" + opname if klass is Index else "idx" + opname
 
-        obj = klass([np.nan, 2.0])
-        assert getattr(obj, opname)() == 2.0
+        if dtype == "Int64" and klass == pd.Index:
+            pytest.skip("EAs can't yet be stored in an index")
 
-        obj = klass([np.nan])
-        assert pd.isna(getattr(obj, opname)())
-        assert pd.isna(getattr(obj, opname)(skipna=False))
+        def check_missing(res):
+            if dtype == "datetime64[ns]":
+                return res is pd.NaT
+            elif dtype == "Int64":
+                return res is pd.NA
+            else:
+                return pd.isna(res)
 
-        obj = klass([], dtype=object)
-        assert pd.isna(getattr(obj, opname)())
-        assert pd.isna(getattr(obj, opname)(skipna=False))
+        obj = klass([None], dtype=dtype)
+        assert check_missing(getattr(obj, opname)())
+        assert check_missing(getattr(obj, opname)(skipna=False))
 
-        obj = klass([pd.NaT, datetime(2011, 11, 1)])
-        # check DatetimeIndex monotonic path
-        assert getattr(obj, opname)() == datetime(2011, 11, 1)
-        assert getattr(obj, opname)(skipna=False) is pd.NaT
+        obj = klass([], dtype=dtype)
+        assert check_missing(getattr(obj, opname)())
+        assert check_missing(getattr(obj, opname)(skipna=False))
+
+        if dtype == "object":
+            # generic test with object only works for empty / all NaN
+            return
+
+        obj = klass([None, val], dtype=dtype)
+        assert getattr(obj, opname)() == val
+        assert check_missing(getattr(obj, opname)(skipna=False))
 
+        obj = klass([None, val, None], dtype=dtype)
+        assert getattr(obj, opname)() == val
+        assert check_missing(getattr(obj, opname)(skipna=False))
+
+    @pytest.mark.parametrize("opname", ["max", "min"])
+    def test_nanargminmax(self, opname, index_or_series):
+        # GH#7261
+        klass = index_or_series
+        arg_op = "arg" + opname if klass is Index else "idx" + opname
+
+        obj = klass([pd.NaT, datetime(2011, 11, 1)])
         assert getattr(obj, arg_op)() == 1
         result = getattr(obj, arg_op)(skipna=False)
         if klass is Series:
@@ -95,9 +125,6 @@ def test_nanops(self, opname, index_or_series):
 
         obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT])
         # check DatetimeIndex non-monotonic path
-        assert getattr(obj, opname)(), datetime(2011, 11, 1)
-        assert getattr(obj, opname)(skipna=False) is pd.NaT
-
         assert getattr(obj, arg_op)() == 1
         result = getattr(obj, arg_op)(skipna=False)
         if klass is Series:

From d93e811d25edee51cf703f05d5b079e31250f41f Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 3 Apr 2020 17:27:04 +0200
Subject: [PATCH 2/7] fix test

---
 pandas/tests/arrays/integer/test_dtypes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py
index ee1ec86745246..515013e95c717 100644
--- a/pandas/tests/arrays/integer/test_dtypes.py
+++ b/pandas/tests/arrays/integer/test_dtypes.py
@@ -34,7 +34,7 @@ def test_preserve_dtypes(op):
 
     # op
     result = getattr(df.C, op)()
-    if op == "sum":
+    if op in {"sum", "min", "max"}:
         assert isinstance(result, np.int64)
     else:
         assert isinstance(result, int)

From 899913d253a0f9f3f1e98fe1111b732f8b7eb802 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 3 Apr 2020 17:28:00 +0200
Subject: [PATCH 3/7] add whatsnew

---
 doc/source/whatsnew/v1.1.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 19e8acdaa7384..d232b68e3c450 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -274,7 +274,7 @@ Performance improvements
   sparse values from ``scipy.sparse`` matrices using the
   :meth:`DataFrame.sparse.from_spmatrix` constructor (:issue:`32821`,
   :issue:`32825`,  :issue:`32826`, :issue:`32856`, :issue:`32858`).
-- Performance improvement in :meth:`Series.sum` for nullable (integer and boolean) dtypes (:issue:`30982`).
+- Performance improvement in reductions (sum, min, max) for nullable (integer and boolean) dtypes (:issue:`30982`, :issue:`33261`).
 
 
 .. ---------------------------------------------------------------------------

From 9d496ebe004565b7c208a4e2fa1c8dd65e9f5553 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 3 Apr 2020 21:29:32 +0200
Subject: [PATCH 4/7] also add for boolean

---
 pandas/core/arrays/boolean.py              | 8 +++-----
 pandas/tests/reductions/test_reductions.py | 5 +++--
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
index 442d4ca8cef6d..e85534def6b97 100644
--- a/pandas/core/arrays/boolean.py
+++ b/pandas/core/arrays/boolean.py
@@ -696,8 +696,9 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs):
         data = self._data
         mask = self._mask
 
-        if name == "sum":
-            return masked_reductions.sum(data, mask, skipna=skipna, **kwargs)
+        if name in {"sum", "min", "max"}:
+            op = getattr(masked_reductions, name)
+            return op(data, mask, skipna=skipna, **kwargs)
 
         # coerce to a nan-aware float if needed
         if self._hasna:
@@ -715,9 +716,6 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs):
             if int_result == result:
                 result = int_result
 
-        elif name in ["min", "max"] and notna(result):
-            result = np.bool_(result)
-
         return result
 
     def _maybe_mask_result(self, result, mask, other, op_name: str):
diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py
index 1c0eee2226a6d..8fb035e085d40 100644
--- a/pandas/tests/reductions/test_reductions.py
+++ b/pandas/tests/reductions/test_reductions.py
@@ -70,15 +70,16 @@ def test_ops(self, opname, obj):
         [
             ("object", 2.0),
             ("float64", 2.0),
-            ("Int64", 2),
             ("datetime64[ns]", datetime(2011, 11, 1)),
+            ("Int64", 2),
+            ("boolean", True),
         ],
     )
     def test_nanminmax(self, opname, dtype, val, index_or_series):
         # GH#7261
         klass = index_or_series
 
-        if dtype == "Int64" and klass == pd.Index:
+        if dtype in ["Int64", "boolean"] and klass == pd.Index:
             pytest.skip("EAs can't yet be stored in an index")
 
         def check_missing(res):

From cd1e2913d58428f8316627c286bf85df3623364a Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 3 Apr 2020 21:29:56 +0200
Subject: [PATCH 5/7] Update pandas/core/arrays/integer.py

Co-Authored-By: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com>
---
 pandas/core/arrays/integer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 8966d7ef48844..541ecdeb45830 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -582,7 +582,7 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs):
 
         # if we have a preservable numeric op,
         # provide coercion back to an integer type if possible
-        elif name in ["prod"]:
+        elif name == "prod":
             # GH#31409 more performant than casting-then-checking
             result = com.cast_scalar_indexer(result)
 

From cb992a22982d2dcdff1b33b283030d638338c764 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 3 Apr 2020 21:36:30 +0200
Subject: [PATCH 6/7] different nesting to be mypy friendlier

---
 pandas/core/array_algos/masked_reductions.py | 59 ++++++++++----------
 1 file changed, 30 insertions(+), 29 deletions(-)

diff --git a/pandas/core/array_algos/masked_reductions.py b/pandas/core/array_algos/masked_reductions.py
index 32a61dd7c39ad..58325aac9ab80 100644
--- a/pandas/core/array_algos/masked_reductions.py
+++ b/pandas/core/array_algos/masked_reductions.py
@@ -47,40 +47,41 @@ def sum(
             return np.sum(values, where=~mask)
 
 
-def _minmax(func):
-    def reduction(values: np.ndarray, mask: np.ndarray, skipna: bool = True):
-        """
-        Reduction for 1D masked array.
+def _minmax(func, values: np.ndarray, mask: np.ndarray, skipna: bool = True):
+    """
+    Reduction for 1D masked array.
 
-        Parameters
-        ----------
-        values : np.ndarray
-            Numpy array with the values (can be of any dtype that support the
-            operation).
-        mask : np.ndarray
-            Boolean numpy array (True values indicate missing values).
-        skipna : bool, default True
-            Whether to skip NA.
-        """
-        if not skipna:
-            if mask.any():
-                return libmissing.NA
-            else:
-                if values.size:
-                    return func(values)
-                else:
-                    # min/max with empty array raise in numpy, pandas returns NA
-                    return libmissing.NA
+    Parameters
+    ----------
+    values : np.ndarray
+        Numpy array with the values (can be of any dtype that support the
+        operation).
+    mask : np.ndarray
+        Boolean numpy array (True values indicate missing values).
+    skipna : bool, default True
+        Whether to skip NA.
+    """
+    if not skipna:
+        if mask.any():
+            return libmissing.NA
         else:
-            subset = values[~mask]
-            if subset.size:
-                return func(values[~mask])
+            if values.size:
+                return func(values)
             else:
                 # min/max with empty array raise in numpy, pandas returns NA
                 return libmissing.NA
+    else:
+        subset = values[~mask]
+        if subset.size:
+            return func(values[~mask])
+        else:
+            # min/max with empty array raise in numpy, pandas returns NA
+            return libmissing.NA
+
 
-    return reduction
+def min(values: np.ndarray, mask: np.ndarray, skipna: bool = True):
+    return _minmax(np.min, values=values, mask=mask, skipna=skipna)
 
 
-min = _minmax(np.min)
-max = _minmax(np.max)
+def max(values: np.ndarray, mask: np.ndarray, skipna: bool = True):
+    return _minmax(np.max, values=values, mask=mask, skipna=skipna)

From eb00ad00f69d6848b38070c3edc62e86198caed5 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 3 Apr 2020 21:37:58 +0200
Subject: [PATCH 7/7] update docstring

---
 pandas/core/array_algos/masked_reductions.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/array_algos/masked_reductions.py b/pandas/core/array_algos/masked_reductions.py
index 58325aac9ab80..b3723340cefd6 100644
--- a/pandas/core/array_algos/masked_reductions.py
+++ b/pandas/core/array_algos/masked_reductions.py
@@ -53,6 +53,7 @@ def _minmax(func, values: np.ndarray, mask: np.ndarray, skipna: bool = True):
 
     Parameters
     ----------
+    func : np.min or np.max
     values : np.ndarray
         Numpy array with the values (can be of any dtype that support the
         operation).