From b12d51ded08d4fca97231a7d47f3d894a20d11fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?=
 <8431159+mtsokol@users.noreply.github.com>
Date: Tue, 2 Jan 2024 10:29:00 +0100
Subject: [PATCH] API: Refactor argmax and argmin (#614)

---
 sparse/_coo/common.py    | 193 ++++++++++++++++++++++++++-------------
 sparse/tests/test_coo.py |  43 +++++----
 2 files changed, 153 insertions(+), 83 deletions(-)

diff --git a/sparse/_coo/common.py b/sparse/_coo/common.py
index 62500c26..f9f49a5f 100644
--- a/sparse/_coo/common.py
+++ b/sparse/_coo/common.py
@@ -2,7 +2,7 @@
 import operator
 import warnings
 from collections.abc import Iterable
-from typing import Callable, Optional
+from typing import Optional, Tuple
 
 import numpy as np
 import scipy.sparse
@@ -613,65 +613,6 @@ def argwhere(a):
     return np.transpose(a.nonzero())
 
 
-def _arg_minmax_common(
-    x: SparseArray,
-    axis: Optional[int],
-    keepdims: bool,
-    comp_op: Callable,
-    np_arg_func: Callable,
-):
-    """ """
-    assert comp_op in (operator.lt, operator.gt)
-    assert np_arg_func in (np.argmax, np.argmin)
-
-    if not isinstance(axis, (int, type(None))):
-        raise ValueError(f"axis must be int or None, but it's: {type(axis)}")
-
-    if isinstance(axis, int) and axis >= x.ndim:
-        raise ValueError(
-            f"axis {axis} is out of bounds for array of dimension {x.ndim}"
-        )
-
-    if x.fill_value != 0.0:
-        raise ValueError(
-            f"Only 0.0 fill value is supported, but found: {x.fill_value}."
-        )
-
-    if np.any(comp_op(x.data, 0.0)):
-        raise ValueError(
-            f"None of the non-zero values can be {comp_op.__name__} the fill value."
-        )
-
-    # fast path
-    if axis is None or x.ndim == 1:
-        x_flat = x.reshape(-1)
-        result = x_flat.coords[0, np_arg_func(x_flat.data)]
-        return np.array(result).reshape([1] * x.ndim) if keepdims else result
-
-    # search for min/max value & index for each retained axis
-    minmax_indexes = {}
-    minmax_values = {}
-
-    for idx, coord in enumerate(x.coords.T):
-        coord = list(coord)
-        axis_index = coord[axis]
-        coord[axis] = 0
-        coord = tuple(coord)
-        if not coord in minmax_values or comp_op(minmax_values[coord], x.data[idx]):
-            minmax_values[coord] = x.data[idx]
-            minmax_indexes[coord] = axis_index
-
-    new_shape = list(x.shape)
-    new_shape[axis] = 1
-    new_shape = tuple(new_shape)
-
-    result = np.zeros(shape=new_shape, dtype=np.intp)
-    for idx, minmax_index in minmax_indexes.items():
-        result[idx] = minmax_index
-
-    return result if keepdims else result.squeeze()
-
-
 def argmax(x, /, *, axis=None, keepdims=False):
     """
     Returns the indices of the maximum values along a specified axis.
@@ -697,9 +638,7 @@ def argmax(x, /, *, axis=None, keepdims=False):
         the first occurrence of the maximum value. Otherwise, a non-zero-dimensional
         array containing the indices of the maximum values.
     """
-    return _arg_minmax_common(
-        x, axis=axis, keepdims=keepdims, comp_op=operator.lt, np_arg_func=np.argmax
-    )
+    return _arg_minmax_common(x, axis=axis, keepdims=keepdims, mode="max")
 
 
 def argmin(x, /, *, axis=None, keepdims=False):
@@ -727,9 +666,7 @@ def argmin(x, /, *, axis=None, keepdims=False):
         the first occurrence of the minimum value. Otherwise, a non-zero-dimensional
         array containing the indices of the minimum values.
     """
-    return _arg_minmax_common(
-        x, axis=axis, keepdims=keepdims, comp_op=operator.gt, np_arg_func=np.argmin
-    )
+    return _arg_minmax_common(x, axis=axis, keepdims=keepdims, mode="min")
 
 
 def _replace_nan(array, value):
@@ -1144,3 +1081,127 @@ def clip(a, a_min=None, a_max=None, out=None):
     """
     a = asCOO(a, name="clip")
     return a.clip(a_min, a_max)
+
+
+@numba.jit(nopython=True, nogil=True)
+def _compute_minmax_args(
+    coords: np.ndarray,
+    data: np.ndarray,
+    reduce_size: int,
+    fill_value: float,
+    max_mode_flag: bool,
+) -> Tuple[np.ndarray, np.ndarray]:
+    assert coords.shape[0] == 2
+    reduce_coords = coords[0, :]
+    index_coords = coords[1, :]
+
+    result_indices = np.unique(index_coords)
+    result_data = []
+
+    # we iterate through each trace
+    for result_index in np.nditer(result_indices):
+        mask = index_coords == result_index
+        masked_reduce_coords = reduce_coords[mask]
+        masked_data = data[mask]
+
+        if max_mode_flag:
+            compared_data = operator.gt(masked_data, fill_value)
+        else:
+            compared_data = operator.lt(masked_data, fill_value)
+
+        if np.any(compared_data) or len(masked_data) == reduce_size:
+            # best value is a non-fill value
+            best_arg = (
+                np.argmax(masked_data) if max_mode_flag else np.argmin(masked_data)
+            )
+            result_data.append(masked_reduce_coords[best_arg])
+        else:
+            # best value is a fill value, find the first occurrence of it
+            current_coord = np.array(-1, dtype=coords.dtype)
+            found = False
+            for idx, new_coord in enumerate(np.nditer(np.sort(masked_reduce_coords))):
+                # there is at least one fill value between consecutive non-fill values
+                if new_coord - current_coord > 1:
+                    result_data.append(idx)
+                    found = True
+                    break
+                current_coord = new_coord
+            # get the first fill value after all non-fill values
+            if not found:
+                result_data.append(current_coord + 1)
+
+    return (result_indices, result_data)
+
+
+def _arg_minmax_common(
+    x: SparseArray,
+    axis: Optional[int],
+    keepdims: bool,
+    mode: str,
+):
+    """
+    Internal implementation for argmax and argmin functions.
+    """
+    assert mode in ("max", "min")
+    max_mode_flag = mode == "max"
+
+    if not isinstance(axis, (int, type(None))):
+        raise ValueError(f"axis must be int or None, but it's: {type(axis)}")
+    if isinstance(axis, int) and axis >= x.ndim:
+        raise ValueError(
+            f"axis {axis} is out of bounds for array of dimension {x.ndim}"
+        )
+    if x.ndim == 0:
+        raise ValueError("Input array must be at least 1-D, but it's 0-D.")
+
+    # If `axis` is None then we need to flatten the input array and memorize
+    # the original dimensionality for the final reshape operation.
+    axis_none_original_ndim: Optional[int] = None
+    if axis is None:
+        axis_none_original_ndim = x.ndim
+        x = x.reshape(-1)[:, None]
+        axis = 0
+
+    # A 1-D array must have one more singleton dimension.
+    if axis == 0 and x.ndim == 1:
+        x = x[:, None]
+
+    # We need to move `axis` to the front.
+    new_transpose = list(range(x.ndim))
+    new_transpose.insert(0, new_transpose.pop(axis))
+    new_transpose = tuple(new_transpose)
+
+    # And reshape it to 2-D (reduce axis, the rest of axes flattened)
+    new_shape = list(x.shape)
+    new_shape.insert(0, new_shape.pop(axis))
+    new_shape = tuple(new_shape)
+
+    x = x.transpose(new_transpose)
+    x = x.reshape((new_shape[0], np.prod(new_shape[1:])))
+
+    # Compute max/min arguments
+    result_indices, result_data = _compute_minmax_args(
+        x.coords.copy(),
+        x.data.copy(),
+        reduce_size=x.shape[0],
+        fill_value=x.fill_value,
+        max_mode_flag=max_mode_flag,
+    )
+
+    from .core import COO
+
+    result = COO(
+        result_indices, result_data, shape=(x.shape[1],), fill_value=0, prune=True
+    )
+
+    # Let's reshape the result to the original shape.
+    result = result.reshape((1, *new_shape[1:]))
+    new_transpose = list(range(result.ndim))
+    new_transpose.insert(axis, new_transpose.pop(0))
+    result = result.transpose(new_transpose)
+
+    # If `axis=None` we need to reshape flattened array into original dimensionality.
+    if not axis_none_original_ndim is None:
+        result = result.reshape([1 for _ in range(axis_none_original_ndim)])
+
+    return result if keepdims else result.squeeze()
diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py
index bc1460a7..1f811768 100644
--- a/sparse/tests/test_coo.py
+++ b/sparse/tests/test_coo.py
@@ -1694,42 +1694,51 @@ def test_array_as_shape():
 
 @pytest.mark.parametrize(
     "arr",
-    [np.array([[0, 3, 0], [1, 2, 0]]), np.array([[[0, 0], [1, 0]], [[5, 0], [0, 3]]])],
+    [np.array([[0, 3, 0], [1, 2, 0]]), np.array([[[0, 0], [1, 0]], [[5, 0], [0, -3]]])],
 )
 @pytest.mark.parametrize("axis", [None, 0, 1])
 @pytest.mark.parametrize("keepdims", [True, False])
 @pytest.mark.parametrize(
-    "mode",
-    [(sparse.argmax, np.argmax, lambda x: x), (sparse.argmin, np.argmin, lambda x: -x)],
+    "mode", [(sparse.argmax, np.argmax), (sparse.argmin, np.argmin)]
 )
 def test_argmax_argmin(arr, axis, keepdims, mode):
-    sparse_func, np_func, transform = mode
-    arr = transform(arr)
+    sparse_func, np_func = mode
 
     s_arr = sparse.COO.from_numpy(arr)
 
-    result = sparse_func(s_arr, axis=axis, keepdims=keepdims)
+    result = sparse_func(s_arr, axis=axis, keepdims=keepdims).todense()
     expected = np_func(arr, axis=axis, keepdims=keepdims)
 
     np.testing.assert_equal(result, expected)
 
 
-@pytest.mark.parametrize("func", [np.argmax, np.argmin])
-def test_argmax_argmin_value_constraint(func):
-    s = sparse.COO.from_numpy(np.full((2, 2), 2), fill_value=2)
+@pytest.mark.parametrize("axis", [None, 0, 1, 2])
+@pytest.mark.parametrize(
+    "mode", [(sparse.argmax, np.argmax), (sparse.argmin, np.argmin)]
+)
+def test_argmax_argmin_3D(axis, mode):
+    sparse_func, np_func = mode
 
-    with pytest.raises(
-        ValueError, match="Only 0.0 fill value is supported, but found: 2."
-    ):
-        func(s)
+    s_arr = sparse.zeros(shape=(1000, 550, 3), format="dok")
+    s_arr[100, 100, 0] = 3
+    s_arr[100, 100, 1] = 3
+    s_arr[100, 99, 0] = -2
+    s_arr = s_arr.to_coo()
 
-    arr = np.array([[-2, 0], [0, 2]])
-    s = sparse.COO.from_numpy(arr)
+    result = sparse_func(s_arr, axis=axis).todense()
+    expected = np_func(s_arr.todense(), axis=axis)
+
+    np.testing.assert_equal(result, expected)
+
+
+@pytest.mark.parametrize("func", [sparse.argmax, sparse.argmin])
+def test_argmax_argmin_constraint(func):
+    s = sparse.COO.from_numpy(np.full((2, 2), 2), fill_value=2)
 
     with pytest.raises(
-        ValueError, match=r"None of the non-zero values can be (lt|gt) the fill value"
+        ValueError, match="axis 2 is out of bounds for array of dimension 2"
     ):
-        func(s, axis=0)
+        func(s, axis=2)
 
 
 @pytest.mark.parametrize("config", [(np.inf, "isinf"), (np.nan, "isnan")])