pandas-dev · phofl · Nov 18, 2023 · Sep 30, 2023 · Sep 30, 2023 · Sep 30, 2023
diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
@@ -77,7 +77,7 @@ Other enhancements
 - :func:`read_csv` now supports ``on_bad_lines`` parameter with ``engine="pyarrow"``. (:issue:`54480`)
 - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`)
 - DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`)
-- Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`)
+- Implement masked algorithms for :meth:`Series.value_counts` and :meth:`Series.mode` (:issue:`54984`, :issue:`55340`)
 -
 
 .. ---------------------------------------------------------------------------

diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -404,12 +404,13 @@ def mode(ndarray[htfunc_t] values, bint dropna, const uint8_t[:] mask=None):
     cdef:
         ndarray[htfunc_t] keys
         ndarray[htfunc_t] modes
+        ndarray[uint8_t] res_mask = None
 
         int64_t[::1] counts
         int64_t count, _, max_count = -1
-        Py_ssize_t nkeys, k, j = 0
+        Py_ssize_t nkeys, k, na_counter, j = 0
 
-    keys, counts, _ = value_count(values, dropna, mask=mask)
+    keys, counts, na_counter = value_count(values, dropna, mask=mask)
     nkeys = len(keys)
 
     modes = np.empty(nkeys, dtype=values.dtype)
@@ -440,7 +441,10 @@ def mode(ndarray[htfunc_t] values, bint dropna, const uint8_t[:] mask=None):
 
             modes[j] = keys[k]
 
-    return modes[:j + 1]
+    if na_counter > 0:
+        res_mask = np.zeros(j+1, dtype=np.bool_)
+        res_mask[j] = True
+    return modes[:j + 1], res_mask
 
 
 {{py:

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -1039,7 +1039,10 @@ def mode(
 
     values = _ensure_data(values)
 
-    npresult = htable.mode(values, dropna=dropna, mask=mask)
+    npresult, res_mask = htable.mode(values, dropna=dropna, mask=mask)
+    if res_mask is not None:
+        return npresult, res_mask  # type: ignore[return-value]
+
     try:
         npresult = np.sort(npresult)
     except TypeError as err:

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -69,6 +69,7 @@
 from pandas.core.algorithms import (
     factorize_array,
     isin,
+    mode,
     take,
 )
 from pandas.core.array_algos import (
@@ -1061,6 +1062,15 @@ def value_counts(self, dropna: bool = True) -> Series:
         )
         return Series(arr, index=index, name="count", copy=False)
 
+    def _mode(self, dropna: bool = True) -> Self:
+        if dropna:
+            result = mode(self._data, dropna=dropna, mask=self._mask)
+            res_mask = np.zeros(result.shape, dtype=np.bool_)
+        else:
+            result, res_mask = mode(self._data, dropna=dropna, mask=self._mask)
+        result = type(self)(result, res_mask)  # type: ignore[arg-type]
+        return result[result.argsort()]
+
     @doc(ExtensionArray.equals)
     def equals(self, other) -> bool:
         if type(self) != type(other):

diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
@@ -644,21 +644,21 @@ def test_mode(self, dtype, writable):
         values = np.repeat(np.arange(N).astype(dtype), 5)
         values[0] = 42
         values.flags.writeable = writable
-        result = ht.mode(values, False)
+        result = ht.mode(values, False)[0]
         assert result == 42
 
     def test_mode_stable(self, dtype, writable):
         values = np.array([2, 1, 5, 22, 3, -1, 8]).astype(dtype)
         values.flags.writeable = writable
-        keys = ht.mode(values, False)
+        keys = ht.mode(values, False)[0]
         tm.assert_numpy_array_equal(keys, values)
 
 
 def test_modes_with_nans():
     # GH42688, nans aren't mangled
     nulls = [pd.NA, np.nan, pd.NaT, None]
     values = np.array([True] + nulls * 2, dtype=np.object_)
-    modes = ht.mode(values, False)
+    modes = ht.mode(values, False)[0]
     assert modes.size == len(nulls)
 
 
@@ -724,8 +724,8 @@ def test_ismember_no(self, dtype):
 
     def test_mode(self, dtype):
         values = np.array([42, np.nan, np.nan, np.nan], dtype=dtype)
-        assert ht.mode(values, True) == 42
-        assert np.isnan(ht.mode(values, False))
+        assert ht.mode(values, True)[0] == 42
+        assert np.isnan(ht.mode(values, False)[0])
 
 
 def test_ismember_tuple_with_nans():

diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py
@@ -29,6 +29,28 @@ def test_mode_extension_dtype(as_period):
     tm.assert_series_equal(res, ser)
 
 
+def test_mode_nullable_dtype(any_numeric_ea_dtype):
+    # GH#55340
+    ser = Series([1, 3, 2, pd.NA, 3, 2, pd.NA], dtype=any_numeric_ea_dtype)
+    result = ser.mode(dropna=False)
+    expected = Series([2, 3, pd.NA], dtype=any_numeric_ea_dtype)
+    tm.assert_series_equal(result, expected)
+
+    result = ser.mode(dropna=True)
+    expected = Series([2, 3], dtype=any_numeric_ea_dtype)
+    tm.assert_series_equal(result, expected)
+
+    ser[-1] = pd.NA
+
+    result = ser.mode(dropna=True)
+    expected = Series([2, 3], dtype=any_numeric_ea_dtype)
+    tm.assert_series_equal(result, expected)
+
+    result = ser.mode(dropna=False)
+    expected = Series([pd.NA], dtype=any_numeric_ea_dtype)
+    tm.assert_series_equal(result, expected)
+
+
 def test_reductions_td64_with_nat():
     # GH#8617
     ser = Series([0, pd.NaT], dtype="m8[ns]")