Skip to content

Commit

Permalink
Implement dpnp.digitize() (#1847)
Browse files Browse the repository at this point in the history
* Implement dpnp.digitize

* Update cupy tests for digitize func

* Update skipped_tests files

* Add tests in test_sycl_queue and test_usm_type

* Return pylint disable

* Handle empty bins

* Small update cupy tests

* Add dpnp tests for dpnp.digitize

* Increase code coverage

* Apply remarks

* Move tests from test_statistic to test_histogram

* Add test with different dtypes
  • Loading branch information
vlad-perevezentsev authored May 23, 2024
1 parent 9021941 commit 6c41c4f
Show file tree
Hide file tree
Showing 7 changed files with 196 additions and 119 deletions.
95 changes: 94 additions & 1 deletion dpnp/dpnp_iface_histograms.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import dpnp

__all__ = [
"digitize",
"histogram",
"histogram_bin_edges",
]
Expand Down Expand Up @@ -208,6 +209,98 @@ def _search_sorted_inclusive(a, v):
)


def digitize(x, bins, right=False):
"""
Return the indices of the bins to which each value in input array belongs.
For full documentation refer to :obj:`numpy.digitize`.
Parameters
----------
a : {dpnp.ndarray, usm_ndarray}
Input array to be binned.
bins : {dpnp.ndarray, usm_ndarray}
Array of bins. It has to be 1-dimensional and monotonic
increasing or decreasing.
right : bool, optional
Indicates whether the intervals include the right or the left bin edge.
Default: ``False``.
Returns
-------
indices : dpnp.ndarray
Array of indices with the same shape as `x`.
Notes
-----
This will not raise an exception when the input array is
not monotonic.
See Also
--------
:obj:`dpnp.bincount` : Count number of occurrences of each value in array
of non-negative integers.
:obj:`dpnp.histogram` : Compute the histogram of a data set.
:obj:`dpnp.unique` : Find the unique elements of an array.
:obj:`dpnp.searchsorted` : Find indices where elements should be inserted
to maintain order.
Examples
--------
>>> import dpnp as np
>>> x = np.array([0.2, 6.4, 3.0, 1.6])
>>> bins = np.array([0.0, 1.0, 2.5, 4.0, 10.0])
>>> inds = np.digitize(x, bins)
>>> inds
array([1, 4, 3, 2])
>>> for n in range(x.size):
... print(bins[inds[n]-1], "<=", x[n], "<", bins[inds[n]])
...
0. <= 0.2 < 1.
4. <= 6.4 < 10.
2.5 <= 3. < 4.
1. <= 1.6 < 2.5
>>> x = np.array([1.2, 10.0, 12.4, 15.5, 20.])
>>> bins = np.array([0, 5, 10, 15, 20])
>>> np.digitize(x, bins, right=True)
array([1, 2, 3, 4, 4])
>>> np.digitize(x, bins, right=False)
array([1, 3, 3, 4, 5])
"""

dpnp.check_supported_arrays_type(x, bins)

if dpnp.issubdtype(x.dtype, dpnp.complexfloating):
raise TypeError("x may not be complex")

if bins.ndim > 1:
raise ValueError("object too deep for desired array")
if bins.ndim < 1:
raise ValueError("object of too small depth for desired array")

# This is backwards because the arguments below are swapped
side = "left" if right else "right"

# Check if bins are monotonically increasing.
# If bins is empty, the array is considered to be increasing.
# If all bins are NaN, the array is considered to be decreasing.
if bins.size == 0:
bins_increasing = True
else:
bins_increasing = bins[0] <= bins[-1] or (
not dpnp.isnan(bins[0]) and dpnp.isnan(bins[-1])
)

if bins_increasing:
# Use dpnp.searchsorted directly if bins are increasing
return dpnp.searchsorted(bins, x, side=side)

# Reverse bins and adjust indices if bins are decreasing
return bins.size - dpnp.searchsorted(bins[::-1], x, side=side)


def histogram(a, bins=10, range=None, density=None, weights=None):
"""
Compute the histogram of a data set.
Expand Down Expand Up @@ -335,8 +428,8 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
n = dpnp.diff(cum_n)

if density:
db = dpnp.diff(bin_edges).astype(dpnp.default_float_type())
# pylint: disable=possibly-used-before-assignment
db = dpnp.diff(bin_edges).astype(dpnp.default_float_type())
return n / db / n.sum(), bin_edges

return n, bin_edges
Expand Down
55 changes: 0 additions & 55 deletions tests/skipped_tests.tbl
Original file line number Diff line number Diff line change
Expand Up @@ -613,61 +613,6 @@ tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_rowvar
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_y

tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeInvalid::test_digitize_complex
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeInvalid::test_digitize_nd_bins
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_0_{right=True}::test_digitize_all_nan_bins
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_0_{right=True}::test_digitize_nan
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_0_{right=True}::test_digitize_nan_bins
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_0_{right=True}::test_digitize_nan_bins_decreasing
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_0_{right=True}::test_digitize_nan_bins_decreasing_repeated
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_0_{right=True}::test_digitize_nan_bins_repeated
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_0_{right=True}::test_searchsorted_inf
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_0_{right=True}::test_searchsorted_minf
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_1_{right=False}::test_digitize_all_nan_bins
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_1_{right=False}::test_digitize_nan
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_1_{right=False}::test_digitize_nan_bins
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_1_{right=False}::test_digitize_nan_bins_decreasing
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_1_{right=False}::test_digitize_nan_bins_decreasing_repeated
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_1_{right=False}::test_digitize_nan_bins_repeated
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_1_{right=False}::test_searchsorted_inf
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitizeNanInf_param_1_{right=False}::test_searchsorted_minf
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_0_{bins=[1.5, 2.5, 4.0, 6.0], increasing=True, right=True, shape=()}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_10_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=False, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_11_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=False, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_12_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=True, right=True, shape=()}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_13_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=True, right=True, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_14_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=True, right=True, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_15_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=True, right=False, shape=()}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_16_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=True, right=False, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_17_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=True, right=False, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_18_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=False, right=True, shape=()}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_19_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=False, right=True, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_1_{bins=[1.5, 2.5, 4.0, 6.0], increasing=True, right=True, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_20_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=False, right=True, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_21_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=False, right=False, shape=()}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_22_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=False, right=False, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_23_{bins=[-1.0, 1.0, 2.5, 4.0, 20.0], increasing=False, right=False, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_24_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=True, right=True, shape=()}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_25_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=True, right=True, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_26_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=True, right=True, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_27_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=True, right=False, shape=()}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_28_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=True, right=False, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_29_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=True, right=False, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_2_{bins=[1.5, 2.5, 4.0, 6.0], increasing=True, right=True, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_30_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=False, right=True, shape=()}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_31_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=False, right=True, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_32_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=False, right=True, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_33_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=False, right=False, shape=()}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_34_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=False, right=False, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_35_{bins=[0.0, 1.0, 1.0, 4.0, 4.0, 10.0], increasing=False, right=False, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_3_{bins=[1.5, 2.5, 4.0, 6.0], increasing=True, right=False, shape=()}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_4_{bins=[1.5, 2.5, 4.0, 6.0], increasing=True, right=False, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_5_{bins=[1.5, 2.5, 4.0, 6.0], increasing=True, right=False, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_6_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=True, shape=()}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_7_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=True, shape=(10,)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_8_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=True, shape=(6, 3, 3)}::test_digitize
tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_9_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=False, shape=()}::test_digitize

tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[linear]
tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[lower]
tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[higher]
Expand Down
Loading

0 comments on commit 6c41c4f

Please sign in to comment.