Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement dpnp.digitize() #1847

Merged
merged 14 commits into from
May 23, 2024
Merged
95 changes: 94 additions & 1 deletion dpnp/dpnp_iface_histograms.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import dpnp

__all__ = [
"digitize",
"histogram",
"histogram_bin_edges",
]
Expand Down Expand Up @@ -208,6 +209,98 @@ def _search_sorted_inclusive(a, v):
)


def digitize(x, bins, right=False):
"""
Return the indices of the bins to which each value in input array belongs.

For full documentation refer to :obj:`numpy.digitize`.

Parameters
----------
a : {dpnp.ndarray, usm_ndarray}
Input array to be binned.
bins : {dpnp.ndarray, usm_ndarray}
antonwolfy marked this conversation as resolved.
Show resolved Hide resolved
Array of bins. It has to be 1-dimensional and monotonic
increasing or decreasing.
right : bool, optional
Indicates whether the intervals include the right or the left bin edge.
Default: ``False``.

Returns
-------
indices : dpnp.ndarray
Array of indices with the same shape as `x`.

Notes
-----
This will not raise an exception when the input array is
not monotonic.

See Also
--------
:obj:`dpnp.bincount` : Count number of occurrences of each value in array
of non-negative integers.
:obj:`dpnp.histogram` : Compute the histogram of a data set.
:obj:`dpnp.unique` : Find the unique elements of an array.
:obj:`dpnp.searchsorted` : Find indices where elements should be inserted
to maintain order.

Examples
--------
>>> import dpnp as np
>>> x = np.array([0.2, 6.4, 3.0, 1.6])
>>> bins = np.array([0.0, 1.0, 2.5, 4.0, 10.0])
>>> inds = np.digitize(x, bins)
>>> inds
array([1, 4, 3, 2])
>>> for n in range(x.size):
... print(bins[inds[n]-1], "<=", x[n], "<", bins[inds[n]])
...
0. <= 0.2 < 1.
4. <= 6.4 < 10.
2.5 <= 3. < 4.
1. <= 1.6 < 2.5

>>> x = np.array([1.2, 10.0, 12.4, 15.5, 20.])
>>> bins = np.array([0, 5, 10, 15, 20])
>>> np.digitize(x, bins, right=True)
array([1, 2, 3, 4, 4])
>>> np.digitize(x, bins, right=False)
array([1, 3, 3, 4, 5])

"""

dpnp.check_supported_arrays_type(x, bins)

if dpnp.issubdtype(x.dtype, dpnp.complexfloating):
raise TypeError("x may not be complex")

if bins.ndim > 1:
raise ValueError("object too deep for desired array")
if bins.ndim < 1:
raise ValueError("object of too small depth for desired array")
vlad-perevezentsev marked this conversation as resolved.
Show resolved Hide resolved

# This is backwards because the arguments below are swapped
side = "left" if right else "right"

# Check if bins are monotonically increasing.
# If bins is empty, the array is considered to be increasing.
# If all bins are NaN, the array is considered to be decreasing.
if bins.size == 0:
bins_increasing = True
else:
bins_increasing = bins[0] <= bins[-1] or (
not dpnp.isnan(bins[0]) and dpnp.isnan(bins[-1])
)

if bins_increasing:
# Use dpnp.searchsorted directly if bins are increasing
return dpnp.searchsorted(bins, x, side=side)

# Reverse bins and adjust indices if bins are decreasing
return bins.size - dpnp.searchsorted(bins[::-1], x, side=side)


def histogram(a, bins=10, range=None, density=None, weights=None):
"""
Compute the histogram of a data set.
Expand Down Expand Up @@ -335,8 +428,8 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
n = dpnp.diff(cum_n)

if density:
db = dpnp.diff(bin_edges).astype(dpnp.default_float_type())
# pylint: disable=possibly-used-before-assignment
db = dpnp.diff(bin_edges).astype(dpnp.default_float_type())
return n / db / n.sum(), bin_edges

return n, bin_edges
Expand Down
55 changes: 0 additions & 55 deletions tests/skipped_tests.tbl
Original file line number Diff line number Diff line change
Expand Up @@ -613,61 +613,6 @@ tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_rowvar
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_y

tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeInvalid::test_digitize_complex
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeInvalid::test_digitize_nd_bins
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_0_{right=True}::test_digitize_all_nan_bins
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_0_{right=True}::test_digitize_nan
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_0_{right=True}::test_digitize_nan_bins
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_0_{right=True}::test_digitize_nan_bins_decreasing
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_0_{right=True}::test_digitize_nan_bins_decreasing_repeated
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_0_{right=True}::test_digitize_nan_bins_repeated
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_0_{right=True}::test_searchsorted_inf
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_0_{right=True}::test_searchsorted_minf
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_1_{right=False}::test_digitize_all_nan_bins
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_1_{right=False}::test_digitize_nan
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_1_{right=False}::test_digitize_nan_bins
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_1_{right=False}::test_digitize_nan_bins_decreasing
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_1_{right=False}::test_digitize_nan_bins_decreasing_repeated
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_1_{right=False}::test_digitize_nan_bins_repeated
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_1_{right=False}::test_searchsorted_inf
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_1_{right=False}::test_searchsorted_minf
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_0_{bins=[1.5, 2.5, 4.0, 6.0], increasing=True, right=True, shape=()}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_10_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=False, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_11_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=False, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_12_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=True, right=True, shape=()}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_13_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=True, right=True, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_14_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=True, right=True, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_15_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=True, right=False, shape=()}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_16_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=True, right=False, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_17_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=True, right=False, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_18_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=False, right=True, shape=()}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_19_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=False, right=True, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_1_{bins=[1.5, 2.5, 4.0, 6.0], increasing=True, right=True, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_20_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=False, right=True, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_21_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=False, right=False, shape=()}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_22_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=False, right=False, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_23_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=False, right=False, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_24_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=True, right=True, shape=()}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_25_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=True, right=True, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_26_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=True, right=True, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_27_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=True, right=False, shape=()}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_28_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=True, right=False, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_29_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=True, right=False, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_2_{bins=[1.5, 2.5, 4.0, 6.0], increasing=True, right=True, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_30_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=False, right=True, shape=()}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_31_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=False, right=True, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_32_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=False, right=True, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_33_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=False, right=False, shape=()}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_34_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=False, right=False, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_35_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=False, right=False, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_3_{bins=[1.5, 2.5, 4.0, 6.0], increasing=True, right=False, shape=()}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_4_{bins=[1.5, 2.5, 4.0, 6.0], increasing=True, right=False, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_5_{bins=[1.5, 2.5, 4.0, 6.0], increasing=True, right=False, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_6_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=True, shape=()}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_7_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=True, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_8_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=True, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_9_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=False, shape=()}::test_digitize

tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[linear]
tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[lower]
tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[higher]
Expand Down
Loading
Loading