From 87fa69a4f5edbf0c194225c45b82770e87ff319c Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 25 Mar 2025 23:22:14 -0700 Subject: [PATCH 01/13] Update integer advanced indexing for array API 2024 array API 2024 spec requires advanced indexing with integer arrays, and when advanced indexing behavior is triggered, integral scalars are to be converted to arrays and broadcast with the other indices --- dpctl/tensor/_copy_utils.py | 75 +++++++++++++++++++----- dpctl/tensor/_slicing.pxi | 44 ++++++++------ dpctl/tensor/_usmarray.pyx | 45 +++++++------- dpctl/tests/test_usm_ndarray_indexing.py | 10 +++- 4 files changed, 117 insertions(+), 57 deletions(-) diff --git a/dpctl/tensor/_copy_utils.py b/dpctl/tensor/_copy_utils.py index a3e925bc1c..94c8ae0ab8 100644 --- a/dpctl/tensor/_copy_utils.py +++ b/dpctl/tensor/_copy_utils.py @@ -15,6 +15,7 @@ # limitations under the License. import builtins import operator +from numbers import Integral import numpy as np @@ -819,15 +820,26 @@ def _take_multi_index(ary, inds, p, mode=0): ] if not isinstance(inds, (list, tuple)): inds = (inds,) + any_usmarray = False for ind in inds: - if not isinstance(ind, dpt.usm_ndarray): - raise TypeError("all elements of `ind` expected to be usm_ndarrays") - queues_.append(ind.sycl_queue) - usm_types_.append(ind.usm_type) - if ind.dtype.kind not in "ui": - raise IndexError( - "arrays used as indices must be of integer (or boolean) type" + if isinstance(ind, dpt.usm_ndarray): + any_usmarray = True + if ind.dtype.kind not in "ui": + raise IndexError( + "arrays used as indices must be of integer (or boolean) " + "type" + ) + queues_.append(ind.sycl_queue) + usm_types_.append(ind.usm_type) + elif not isinstance(ind, Integral): + raise TypeError( + "all elements of `ind` expected to be usm_ndarrays " + "or integers" ) + if not any_usmarray: + raise TypeError( + "at least one element of `ind` expected to be a usm_ndarray" + ) res_usm_type = dpctl.utils.get_coerced_usm_type(usm_types_) exec_q = dpctl.utils.get_execution_queue(queues_) if exec_q is None: @@ -838,6 +850,18 @@ def _take_multi_index(ary, inds, p, mode=0): "be associated with the same queue." ) if len(inds) > 1: + inds = tuple( + map( + lambda ind: ( + ind + if isinstance(ind, dpt.usm_ndarray) + else dpt.asarray( + ind, usm_type=res_usm_type, sycl_queue=exec_q + ) + ), + inds, + ) + ) ind_dt = dpt.result_type(*inds) # ind arrays have been checked to be of integer dtype if ind_dt.kind not in "ui": @@ -968,15 +992,26 @@ def _put_multi_index(ary, inds, p, vals, mode=0): ] if not isinstance(inds, (list, tuple)): inds = (inds,) + any_usmarray = False for ind in inds: - if not isinstance(ind, dpt.usm_ndarray): - raise TypeError("all elements of `ind` expected to be usm_ndarrays") - queues_.append(ind.sycl_queue) - usm_types_.append(ind.usm_type) - if ind.dtype.kind not in "ui": - raise IndexError( - "arrays used as indices must be of integer (or boolean) type" + if isinstance(ind, dpt.usm_ndarray): + any_usmarray = True + if ind.dtype.kind not in "ui": + raise IndexError( + "arrays used as indices must be of integer (or boolean) " + "type" + ) + queues_.append(ind.sycl_queue) + usm_types_.append(ind.usm_type) + elif not isinstance(ind, Integral): + raise TypeError( + "all elements of `ind` expected to be usm_ndarrays " + "or integers" ) + if not any_usmarray: + raise TypeError( + "at least one element of `ind` expected to be a usm_ndarray" + ) vals_usm_type = dpctl.utils.get_coerced_usm_type(usm_types_) exec_q = dpctl.utils.get_execution_queue(queues_) if exec_q is not None: @@ -994,6 +1029,18 @@ def _put_multi_index(ary, inds, p, vals, mode=0): "be associated with the same queue." ) if len(inds) > 1: + inds = tuple( + map( + lambda ind: ( + ind + if isinstance(ind, dpt.usm_ndarray) + else dpt.asarray( + ind, usm_type=vals_usm_type, sycl_queue=exec_q + ) + ), + inds, + ) + ) ind_dt = dpt.result_type(*inds) # ind arrays have been checked to be of integer dtype if ind_dt.kind not in "ui": diff --git a/dpctl/tensor/_slicing.pxi b/dpctl/tensor/_slicing.pxi index 240e42a61d..b5bd3ab565 100644 --- a/dpctl/tensor/_slicing.pxi +++ b/dpctl/tensor/_slicing.pxi @@ -46,11 +46,7 @@ cdef Py_ssize_t _slice_len( cdef bint _is_integral(object x) except *: """Gives True if x is an integral slice spec""" if isinstance(x, usm_ndarray): - if x.ndim > 0: - return False - if x.dtype.kind not in "ui": - return False - return True + return False if isinstance(x, bool): return False if isinstance(x, int): @@ -179,10 +175,12 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): if array_streak_started: array_streak_interrupted = True elif _is_integral(i): - explicit_index += 1 axes_referenced += 1 if array_streak_started: - array_streak_interrupted = True + # integers converted to arrays in this case + array_count += 1 + else: + explicit_index += 1 elif isinstance(i, usm_ndarray): if not seen_arrays_yet: seen_arrays_yet = True @@ -196,7 +194,7 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): dt_k = i.dtype.kind if dt_k == "b" and i.ndim > 0: axes_referenced += i.ndim - elif dt_k in "ui" and i.ndim > 0: + elif dt_k in "ui": axes_referenced += 1 else: raise IndexError( @@ -260,20 +258,28 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): new_strides.append(0) elif _is_integral(ind_i): ind_i = ind_i.__index__() - if 0 <= ind_i < shape[k]: + if advanced_start_pos_set: + # integers converted to arrays in this case + new_advanced_ind.append(ind_i) k_new = k + 1 - if not is_empty: - new_offset = new_offset + ind_i * strides[k] - k = k_new - elif -shape[k] <= ind_i < 0: - k_new = k + 1 - if not is_empty: - new_offset = new_offset + (shape[k] + ind_i) * strides[k] + new_shape.extend(shape[k:k_new]) + new_strides.extend(strides[k:k_new]) k = k_new else: - raise IndexError( - ("Index {0} is out of range for " - "axes {1} with size {2}").format(ind_i, k, shape[k])) + if 0 <= ind_i < shape[k]: + k_new = k + 1 + if not is_empty: + new_offset = new_offset + ind_i * strides[k] + k = k_new + elif -shape[k] <= ind_i < 0: + k_new = k + 1 + if not is_empty: + new_offset = new_offset + (shape[k] + ind_i) * strides[k] + k = k_new + else: + raise IndexError( + ("Index {0} is out of range for " + "axes {1} with size {2}").format(ind_i, k, shape[k])) elif isinstance(ind_i, usm_ndarray): if not advanced_start_pos_set: new_advanced_start_pos = len(new_shape) diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 91f14660fb..47b072d2b0 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -161,7 +161,6 @@ cdef void _validate_and_use_stream(object stream, c_dpctl.SyclQueue self_queue) ev = self_queue.submit_barrier() stream.submit_barrier(dependent_events=[ev]) - cdef class usm_ndarray: """ usm_ndarray(shape, dtype=None, strides=None, buffer="device", \ offset=0, order="C", buffer_ctor_kwargs=dict(), \ @@ -962,28 +961,30 @@ cdef class usm_ndarray: return res from ._copy_utils import _extract_impl, _nonzero_impl, _take_multi_index - if len(adv_ind) == 1 and adv_ind[0].dtype == dpt_bool: - key_ = adv_ind[0] - adv_ind_end_p = key_.ndim + adv_ind_start_p - if adv_ind_end_p > res.ndim: - raise IndexError("too many indices for the array") - key_shape = key_.shape - arr_shape = res.shape[adv_ind_start_p:adv_ind_end_p] - for i in range(key_.ndim): - if matching: - if not key_shape[i] == arr_shape[i] and key_shape[i] > 0: - matching = 0 - if not matching: - raise IndexError("boolean index did not match indexed array in dimensions") - res = _extract_impl(res, key_, axis=adv_ind_start_p) - res.flags_ = _copy_writable(res.flags_, self.flags_) - return res - if any(ind.dtype == dpt_bool for ind in adv_ind): + # if len(adv_ind == 1), the (only) element is always an array + if len(adv_ind) == 1 and adv_ind[0].dtype == dpt_bool: + key_ = adv_ind[0] + adv_ind_end_p = key_.ndim + adv_ind_start_p + if adv_ind_end_p > res.ndim: + raise IndexError("too many indices for the array") + key_shape = key_.shape + arr_shape = res.shape[adv_ind_start_p:adv_ind_end_p] + for i in range(key_.ndim): + if matching: + if not key_shape[i] == arr_shape[i] and key_shape[i] > 0: + matching = 0 + if not matching: + raise IndexError("boolean index did not match indexed array in dimensions") + res = _extract_impl(res, key_, axis=adv_ind_start_p) + res.flags_ = _copy_writable(res.flags_, self.flags_) + return res + + if any((isinstance(ind, usm_ndarray) and ind.dtype == dpt_bool) for ind in adv_ind): adv_ind_int = list() for ind in adv_ind: - if ind.dtype == dpt_bool: - adv_ind_int.extend(_nonzero_impl(ind)) + if isinstance(ind, usm_ndarray) and ind.dtype == dpt_bool: + adv_ind_int.extend(_nonzero_impl(ind)) else: adv_ind_int.append(ind) res = _take_multi_index(res, tuple(adv_ind_int), adv_ind_start_p) @@ -1433,10 +1434,10 @@ cdef class usm_ndarray: _place_impl(Xv, adv_ind[0], rhs, axis=adv_ind_start_p) return - if any(ind.dtype == dpt_bool for ind in adv_ind): + if any((isinstance(ind, usm_ndarray) and ind.dtype == dpt_bool) for ind in adv_ind): adv_ind_int = list() for ind in adv_ind: - if ind.dtype == dpt_bool: + if isinstance(ind, usm_ndarray) and ind.dtype == dpt_bool: adv_ind_int.extend(_nonzero_impl(ind)) else: adv_ind_int.append(ind) diff --git a/dpctl/tests/test_usm_ndarray_indexing.py b/dpctl/tests/test_usm_ndarray_indexing.py index 78501580a8..4d8a394340 100644 --- a/dpctl/tests/test_usm_ndarray_indexing.py +++ b/dpctl/tests/test_usm_ndarray_indexing.py @@ -252,8 +252,14 @@ def test_advanced_slice5(): q = get_queue_or_skip() ii = dpt.asarray([1, 2], sycl_queue=q) x = _make_3d("i4", q) - with pytest.raises(IndexError): - x[ii, 0, ii] + y = x[ii, 0, ii] + assert isinstance(y, dpt.usm_ndarray) + # 0 broadcast to [0, 0] per array API + assert y.shape == ii.shape + assert _all_equal( + (x[ii[i], 0, ii[i]] for i in range(ii.shape[0])), + (y[i] for i in range(ii.shape[0])), + ) def test_advanced_slice6(): From 47639da35df63b20ac7469125e57768c19acbbd3 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Wed, 26 Mar 2025 14:39:25 -0700 Subject: [PATCH 02/13] only treat integers as arrays when arrays have been consecutive this fixes cases like `x[..., arr1, int1, arr2, int2, :, int3]`, the final int will be treated as an integer instead of an array, and `arr1` through `int2` will be treated as arrays --- dpctl/tensor/_slicing.pxi | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/dpctl/tensor/_slicing.pxi b/dpctl/tensor/_slicing.pxi index b5bd3ab565..bd4e909f27 100644 --- a/dpctl/tensor/_slicing.pxi +++ b/dpctl/tensor/_slicing.pxi @@ -176,7 +176,7 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): array_streak_interrupted = True elif _is_integral(i): axes_referenced += 1 - if array_streak_started: + if array_streak_started and not array_streak_interrupted: # integers converted to arrays in this case array_count += 1 else: @@ -227,6 +227,7 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): advanced_start_pos_set = False new_offset = offset is_empty = False + array_streak = False for i in range(len(ind)): ind_i = ind[i] if (ind_i is Ellipsis): @@ -237,9 +238,13 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): is_empty = True new_offset = offset k = k_new + if array_streak: + array_streak = False elif ind_i is None: new_shape.append(1) new_strides.append(0) + if array_streak: + array_streak = False elif isinstance(ind_i, slice): k_new = k + 1 sl_start, sl_stop, sl_step = ind_i.indices(shape[k]) @@ -253,13 +258,16 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): is_empty = True new_offset = offset k = k_new + if array_streak: + array_streak = False elif _is_boolean(ind_i): new_shape.append(1 if ind_i else 0) new_strides.append(0) + if array_streak: + array_streak = False elif _is_integral(ind_i): ind_i = ind_i.__index__() - if advanced_start_pos_set: - # integers converted to arrays in this case + if array_streak: new_advanced_ind.append(ind_i) k_new = k + 1 new_shape.extend(shape[k:k_new]) @@ -281,6 +289,8 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): ("Index {0} is out of range for " "axes {1} with size {2}").format(ind_i, k, shape[k])) elif isinstance(ind_i, usm_ndarray): + if not array_streak: + array_streak = True if not advanced_start_pos_set: new_advanced_start_pos = len(new_shape) advanced_start_pos_set = True From b18fe9d796b3a13091f7b126ee36907589b75245 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Wed, 26 Mar 2025 16:10:52 -0700 Subject: [PATCH 03/13] raise for OOB indices even if they will be converted to arrays since the index is on the host, we can raise like we normally would, which follows NumPy behavior --- dpctl/tensor/_slicing.pxi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dpctl/tensor/_slicing.pxi b/dpctl/tensor/_slicing.pxi index bd4e909f27..2d45a5c91b 100644 --- a/dpctl/tensor/_slicing.pxi +++ b/dpctl/tensor/_slicing.pxi @@ -268,6 +268,11 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): elif _is_integral(ind_i): ind_i = ind_i.__index__() if array_streak: + # integer will be converted to an array, still raise if OOB + if not (0 <= ind_i < shape[k] or -shape[k] <= ind_i < 0): + raise IndexError( + ("Index {0} is out of range for " + "axes {1} with size {2}").format(ind_i, k, shape[k])) new_advanced_ind.append(ind_i) k_new = k + 1 new_shape.extend(shape[k:k_new]) From 9a7705eefbcbc09bfe39b6a07c80d14c4abdae36 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Wed, 2 Apr 2025 18:45:12 -0700 Subject: [PATCH 04/13] Add common utilities for _put_multi_index and _take_multi_index Reduces code duplication --- dpctl/tensor/_copy_utils.py | 178 +++++++++++++++++------------------- 1 file changed, 84 insertions(+), 94 deletions(-) diff --git a/dpctl/tensor/_copy_utils.py b/dpctl/tensor/_copy_utils.py index 94c8ae0ab8..a2189defe7 100644 --- a/dpctl/tensor/_copy_utils.py +++ b/dpctl/tensor/_copy_utils.py @@ -800,6 +800,79 @@ def _nonzero_impl(ary): return res +def _validate_indices(inds, queue_list, usm_type_list): + """ + Utility for validating indices are usm_ndarray of integral dtype or Python + integers. At least one must be an array. + + For each array, the queue and usm type are appended to `queue_list` and + `usm_type_list`, respectively. + """ + any_usmarray = False + for ind in inds: + if isinstance(ind, dpt.usm_ndarray): + any_usmarray = True + if ind.dtype.kind not in "ui": + raise IndexError( + "arrays used as indices must be of integer (or boolean) " + "type" + ) + queue_list.append(ind.sycl_queue) + usm_type_list.append(ind.usm_type) + elif not isinstance(ind, Integral): + raise TypeError( + "all elements of `ind` expected to be usm_ndarrays " + f"or integers, found {type(ind)}" + ) + if not any_usmarray: + raise TypeError( + "at least one element of `inds` expected to be a usm_ndarray" + ) + return inds + + +def _prepare_indices_arrays(inds, q, usm_type): + """ + Utility taking a mix of usm_ndarray and possibly Python int scalar indices, + a queue (assumed to be common to arrays in inds), and a usm type. + + Python scalar integers are promoted to arrays on the provided queue and + with the provided usm type. All arrays are then promoted to a common + integral type (if possible) before being broadcast to a common shape. + """ + # scalar integers -> arrays + inds = tuple( + map( + lambda ind: ( + ind + if isinstance(ind, dpt.usm_ndarray) + else dpt.asarray(ind, usm_type=usm_type, sycl_queue=q) + ), + inds, + ) + ) + + # promote to a common integral type if possible + ind_dt = dpt.result_type(*inds) + if ind_dt.kind not in "ui": + raise ValueError( + "cannot safely promote indices to an integer data type" + ) + inds = tuple( + map( + lambda ind: ( + ind if ind.dtype == ind_dt else dpt.astype(ind, ind_dt) + ), + inds, + ) + ) + + # broadcast + inds = dpt.broadcast_arrays(*inds) + + return inds + + def _take_multi_index(ary, inds, p, mode=0): if not isinstance(ary, dpt.usm_ndarray): raise TypeError( @@ -820,26 +893,8 @@ def _take_multi_index(ary, inds, p, mode=0): ] if not isinstance(inds, (list, tuple)): inds = (inds,) - any_usmarray = False - for ind in inds: - if isinstance(ind, dpt.usm_ndarray): - any_usmarray = True - if ind.dtype.kind not in "ui": - raise IndexError( - "arrays used as indices must be of integer (or boolean) " - "type" - ) - queues_.append(ind.sycl_queue) - usm_types_.append(ind.usm_type) - elif not isinstance(ind, Integral): - raise TypeError( - "all elements of `ind` expected to be usm_ndarrays " - "or integers" - ) - if not any_usmarray: - raise TypeError( - "at least one element of `ind` expected to be a usm_ndarray" - ) + + _validate_indices(inds, queues_, usm_types_) res_usm_type = dpctl.utils.get_coerced_usm_type(usm_types_) exec_q = dpctl.utils.get_execution_queue(queues_) if exec_q is None: @@ -849,34 +904,10 @@ def _take_multi_index(ary, inds, p, mode=0): "Use `usm_ndarray.to_device` method to migrate data to " "be associated with the same queue." ) + if len(inds) > 1: - inds = tuple( - map( - lambda ind: ( - ind - if isinstance(ind, dpt.usm_ndarray) - else dpt.asarray( - ind, usm_type=res_usm_type, sycl_queue=exec_q - ) - ), - inds, - ) - ) - ind_dt = dpt.result_type(*inds) - # ind arrays have been checked to be of integer dtype - if ind_dt.kind not in "ui": - raise ValueError( - "cannot safely promote indices to an integer data type" - ) - inds = tuple( - map( - lambda ind: ( - ind if ind.dtype == ind_dt else dpt.astype(ind, ind_dt) - ), - inds, - ) - ) - inds = dpt.broadcast_arrays(*inds) + inds = _prepare_indices_arrays(inds, exec_q, res_usm_type) + ind0 = inds[0] ary_sh = ary.shape p_end = p + len(inds) @@ -992,26 +1023,9 @@ def _put_multi_index(ary, inds, p, vals, mode=0): ] if not isinstance(inds, (list, tuple)): inds = (inds,) - any_usmarray = False - for ind in inds: - if isinstance(ind, dpt.usm_ndarray): - any_usmarray = True - if ind.dtype.kind not in "ui": - raise IndexError( - "arrays used as indices must be of integer (or boolean) " - "type" - ) - queues_.append(ind.sycl_queue) - usm_types_.append(ind.usm_type) - elif not isinstance(ind, Integral): - raise TypeError( - "all elements of `ind` expected to be usm_ndarrays " - "or integers" - ) - if not any_usmarray: - raise TypeError( - "at least one element of `ind` expected to be a usm_ndarray" - ) + + _validate_indices(inds, queues_, usm_types_) + vals_usm_type = dpctl.utils.get_coerced_usm_type(usm_types_) exec_q = dpctl.utils.get_execution_queue(queues_) if exec_q is not None: @@ -1028,34 +1042,10 @@ def _put_multi_index(ary, inds, p, vals, mode=0): "Use `usm_ndarray.to_device` method to migrate data to " "be associated with the same queue." ) + if len(inds) > 1: - inds = tuple( - map( - lambda ind: ( - ind - if isinstance(ind, dpt.usm_ndarray) - else dpt.asarray( - ind, usm_type=vals_usm_type, sycl_queue=exec_q - ) - ), - inds, - ) - ) - ind_dt = dpt.result_type(*inds) - # ind arrays have been checked to be of integer dtype - if ind_dt.kind not in "ui": - raise ValueError( - "cannot safely promote indices to an integer data type" - ) - inds = tuple( - map( - lambda ind: ( - ind if ind.dtype == ind_dt else dpt.astype(ind, ind_dt) - ), - inds, - ) - ) - inds = dpt.broadcast_arrays(*inds) + inds = _prepare_indices_arrays(inds, exec_q, vals_usm_type) + ind0 = inds[0] ary_sh = ary.shape p_end = p + len(inds) From 25fb1a2a3e1d5371355d72d361080b25b7e2fa7a Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Wed, 2 Apr 2025 18:52:00 -0700 Subject: [PATCH 05/13] Remove redundant IndexError in _slicing.pxi --- dpctl/tensor/_slicing.pxi | 2 -- 1 file changed, 2 deletions(-) diff --git a/dpctl/tensor/_slicing.pxi b/dpctl/tensor/_slicing.pxi index 2d45a5c91b..358b3e7f26 100644 --- a/dpctl/tensor/_slicing.pxi +++ b/dpctl/tensor/_slicing.pxi @@ -308,8 +308,6 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): new_shape.extend(shape[k:k_new]) new_strides.extend(strides[k:k_new]) k = k_new - else: - raise IndexError new_shape.extend(shape[k:]) new_strides.extend(strides[k:]) new_shape_len += len(shape) - k From 14a55d004d3e6e0a43002a9d2ee0b6dd3cb7b6c1 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Wed, 2 Apr 2025 18:56:26 -0700 Subject: [PATCH 06/13] Fix over-indentation Typos from advanced indexing updates --- dpctl/tensor/_usmarray.pyx | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 47b072d2b0..a375bf93fe 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -964,27 +964,27 @@ cdef class usm_ndarray: # if len(adv_ind == 1), the (only) element is always an array if len(adv_ind) == 1 and adv_ind[0].dtype == dpt_bool: - key_ = adv_ind[0] - adv_ind_end_p = key_.ndim + adv_ind_start_p - if adv_ind_end_p > res.ndim: - raise IndexError("too many indices for the array") - key_shape = key_.shape - arr_shape = res.shape[adv_ind_start_p:adv_ind_end_p] - for i in range(key_.ndim): - if matching: - if not key_shape[i] == arr_shape[i] and key_shape[i] > 0: - matching = 0 - if not matching: - raise IndexError("boolean index did not match indexed array in dimensions") - res = _extract_impl(res, key_, axis=adv_ind_start_p) - res.flags_ = _copy_writable(res.flags_, self.flags_) - return res + key_ = adv_ind[0] + adv_ind_end_p = key_.ndim + adv_ind_start_p + if adv_ind_end_p > res.ndim: + raise IndexError("too many indices for the array") + key_shape = key_.shape + arr_shape = res.shape[adv_ind_start_p:adv_ind_end_p] + for i in range(key_.ndim): + if matching: + if not key_shape[i] == arr_shape[i] and key_shape[i] > 0: + matching = 0 + if not matching: + raise IndexError("boolean index did not match indexed array in dimensions") + res = _extract_impl(res, key_, axis=adv_ind_start_p) + res.flags_ = _copy_writable(res.flags_, self.flags_) + return res if any((isinstance(ind, usm_ndarray) and ind.dtype == dpt_bool) for ind in adv_ind): adv_ind_int = list() for ind in adv_ind: if isinstance(ind, usm_ndarray) and ind.dtype == dpt_bool: - adv_ind_int.extend(_nonzero_impl(ind)) + adv_ind_int.extend(_nonzero_impl(ind)) else: adv_ind_int.append(ind) res = _take_multi_index(res, tuple(adv_ind_int), adv_ind_start_p) From 72f89387306ce729f6152daa2f7552616959e054 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Wed, 2 Apr 2025 22:30:15 -0700 Subject: [PATCH 07/13] Add tests for uncovered branches in advanced indexing --- dpctl/tests/test_usm_ndarray_indexing.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/dpctl/tests/test_usm_ndarray_indexing.py b/dpctl/tests/test_usm_ndarray_indexing.py index 4d8a394340..a4f097cefb 100644 --- a/dpctl/tests/test_usm_ndarray_indexing.py +++ b/dpctl/tests/test_usm_ndarray_indexing.py @@ -22,6 +22,7 @@ import dpctl import dpctl.tensor as dpt import dpctl.tensor._tensor_impl as ti +from dpctl.tensor._copy_utils import _take_multi_index from dpctl.utils import ExecutionPlacementError from .helper import get_queue_or_skip, skip_if_dtype_not_supported @@ -1962,3 +1963,17 @@ def test_take_out_errors(): out_bad_q = dpt.empty(ind.shape, dtype=x.dtype, sycl_queue=q2) with pytest.raises(dpctl.utils.ExecutionPlacementError): dpt.take(x, ind, out=out_bad_q) + + +def test_getitem_impl_fn_invalid_inp(): + get_queue_or_skip() + + x = dpt.ones((10, 10), dtype="i4") + + bad_ind_type = (dpt.ones((), dtype="i4"), 2.0) + with pytest.raises(TypeError): + _take_multi_index(x, bad_ind_type, 0, 0) + + no_array_inds = (2, 3) + with pytest.raises(TypeError): + _take_multi_index(x, no_array_inds, 0, 0) From b38478bca93d5cc9b17e1e186b9ffa5a32c2218c Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 7 Apr 2025 18:31:18 -0700 Subject: [PATCH 08/13] Add another test for advanced indexing Test for the case where an basic integral index appears between two integral arrays, followed by a basic index, and then followed by `:` --- dpctl/tests/test_usm_ndarray_indexing.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/dpctl/tests/test_usm_ndarray_indexing.py b/dpctl/tests/test_usm_ndarray_indexing.py index a4f097cefb..911964a257 100644 --- a/dpctl/tests/test_usm_ndarray_indexing.py +++ b/dpctl/tests/test_usm_ndarray_indexing.py @@ -402,6 +402,24 @@ def test_advanced_slice13(): assert (dpt.asnumpy(y) == dpt.asnumpy(expected)).all() +def test_advanced_slice14(): + q = get_queue_or_skip() + ii = dpt.asarray([1, 2], sycl_queue=q) + x = dpt.reshape(dpt.arange(3**5, dtype="i4", sycl_queue=q), (3,) * 5) + y = x[ii, 0, ii, 1, :] + assert isinstance(y, dpt.usm_ndarray) + # integers broadcast to ii.shape per array API + assert y.shape == ii.shape + x.shape[-1:] + assert _all_equal( + ( + x[ii[i], 0, ii[i], 1, k] + for i in range(ii.shape[0]) + for k in range(x.shape[-1]) + ), + (y[i, k] for i in range(ii.shape[0]) for k in range(x.shape[-1])), + ) + + def test_boolean_indexing_validation(): get_queue_or_skip() x = dpt.zeros(10, dtype="i4") From d873f350d5b51543dde961eea8ebd571849ee39e Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 7 Apr 2025 18:34:05 -0700 Subject: [PATCH 09/13] Add another test for advanced indexing errors --- dpctl/tests/test_usm_ndarray_indexing.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dpctl/tests/test_usm_ndarray_indexing.py b/dpctl/tests/test_usm_ndarray_indexing.py index 911964a257..0a6b28394d 100644 --- a/dpctl/tests/test_usm_ndarray_indexing.py +++ b/dpctl/tests/test_usm_ndarray_indexing.py @@ -420,6 +420,15 @@ def test_advanced_slice14(): ) +def test_advanced_slice15(): + q = get_queue_or_skip() + ii = dpt.asarray([1, 2], sycl_queue=q) + x = dpt.reshape(dpt.arange(3**5, dtype="i4", sycl_queue=q), (3,) * 5) + # : cannot appear between two integral arrays + with pytest.raises(IndexError): + x[ii, 0, ii, :, ii] + + def test_boolean_indexing_validation(): get_queue_or_skip() x = dpt.zeros(10, dtype="i4") From 0784d706245930945cf0373d5c272d2eab20c84c Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 8 Apr 2025 20:26:43 -0700 Subject: [PATCH 10/13] Partial reversion of advanced indexing changes 0D array indices are treated as Python scalars and moved to the host when not part of other, consecutive array indices This means that 0D arrays also can't start a series of consecutive advanced indices --- dpctl/tensor/_slicing.pxi | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/dpctl/tensor/_slicing.pxi b/dpctl/tensor/_slicing.pxi index 358b3e7f26..c2de903cb6 100644 --- a/dpctl/tensor/_slicing.pxi +++ b/dpctl/tensor/_slicing.pxi @@ -46,7 +46,11 @@ cdef Py_ssize_t _slice_len( cdef bint _is_integral(object x) except *: """Gives True if x is an integral slice spec""" if isinstance(x, usm_ndarray): - return False + if x.ndim > 0: + return False + if x.dtype.kind not in "ui": + return False + return True if isinstance(x, bool): return False if isinstance(x, int): @@ -194,7 +198,7 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): dt_k = i.dtype.kind if dt_k == "b" and i.ndim > 0: axes_referenced += i.ndim - elif dt_k in "ui": + elif dt_k in "ui" and i.ndim > 0: axes_referenced += 1 else: raise IndexError( @@ -266,19 +270,21 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): if array_streak: array_streak = False elif _is_integral(ind_i): - ind_i = ind_i.__index__() if array_streak: - # integer will be converted to an array, still raise if OOB - if not (0 <= ind_i < shape[k] or -shape[k] <= ind_i < 0): - raise IndexError( - ("Index {0} is out of range for " - "axes {1} with size {2}").format(ind_i, k, shape[k])) + if not isinstance(ind_i, usm_ndarray): + ind_i = ind_i.__index__() + # integer will be converted to an array, still raise if OOB + if not (0 <= ind_i < shape[k] or -shape[k] <= ind_i < 0): + raise IndexError( + ("Index {0} is out of range for " + "axes {1} with size {2}").format(ind_i, k, shape[k])) new_advanced_ind.append(ind_i) k_new = k + 1 new_shape.extend(shape[k:k_new]) new_strides.extend(strides[k:k_new]) k = k_new else: + ind_i = ind_i.__index__() if 0 <= ind_i < shape[k]: k_new = k + 1 if not is_empty: From 87db03a4b8cff575f934c8c019cc87e03deea11c Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Wed, 9 Apr 2025 09:03:31 -0700 Subject: [PATCH 11/13] Use operator.index in _slicing.pxi --- dpctl/tensor/_slicing.pxi | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dpctl/tensor/_slicing.pxi b/dpctl/tensor/_slicing.pxi index c2de903cb6..bc7b13f7c7 100644 --- a/dpctl/tensor/_slicing.pxi +++ b/dpctl/tensor/_slicing.pxi @@ -15,6 +15,7 @@ # limitations under the License. import numbers +from operator import index from cpython.buffer cimport PyObject_CheckBuffer @@ -64,7 +65,7 @@ cdef bint _is_integral(object x) except *: return False if callable(getattr(x, "__index__", None)): try: - x.__index__() + index(x) except (TypeError, ValueError): return False return True @@ -136,7 +137,7 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): else: return ((0,) + shape, (0,) + strides, offset, _no_advanced_ind, _no_advanced_pos) elif _is_integral(ind): - ind = ind.__index__() + ind = index(ind) new_shape = shape[1:] new_strides = strides[1:] is_empty = any(sh_i == 0 for sh_i in new_shape) @@ -272,7 +273,7 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): elif _is_integral(ind_i): if array_streak: if not isinstance(ind_i, usm_ndarray): - ind_i = ind_i.__index__() + ind_i = index(ind_i) # integer will be converted to an array, still raise if OOB if not (0 <= ind_i < shape[k] or -shape[k] <= ind_i < 0): raise IndexError( @@ -284,7 +285,7 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): new_strides.extend(strides[k:k_new]) k = k_new else: - ind_i = ind_i.__index__() + ind_i = index(ind_i) if 0 <= ind_i < shape[k]: k_new = k + 1 if not is_empty: From 2fdac4cb555aa3c82f9a6d036076d3128bc7d9d0 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Wed, 9 Apr 2025 09:07:42 -0700 Subject: [PATCH 12/13] Adds a test that 0D integral indices are treated as Python scalars when not among consecutive integer arrays --- dpctl/tests/test_usm_ndarray_indexing.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/dpctl/tests/test_usm_ndarray_indexing.py b/dpctl/tests/test_usm_ndarray_indexing.py index 0a6b28394d..eeb97461fd 100644 --- a/dpctl/tests/test_usm_ndarray_indexing.py +++ b/dpctl/tests/test_usm_ndarray_indexing.py @@ -429,6 +429,17 @@ def test_advanced_slice15(): x[ii, 0, ii, :, ii] +def test_advanced_slice16(): + q = get_queue_or_skip() + ii = dpt.asarray(1, sycl_queue=q) + i0 = dpt.asarray(False, sycl_queue=q) + i1 = dpt.asarray(True, sycl_queue=q) + x = dpt.reshape(dpt.arange(3**5, dtype="i4", sycl_queue=q), (3,) * 5) + y = x[ii, i0, ii, i1, :] + # TODO: add a shape check here when discrepancy with NumPy is investigated + assert isinstance(y, dpt.usm_ndarray) + + def test_boolean_indexing_validation(): get_queue_or_skip() x = dpt.zeros(10, dtype="i4") From 8a210c0a2907a5617ffac6bf635b3be0573739e9 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Fri, 28 Feb 2025 19:47:16 -0800 Subject: [PATCH 13/13] Add ARRAY_API_TESTS_VERSION to array API conformity job and set to 2024.12 --- .github/workflows/conda-package.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 5102ccbdcb..e0a8145056 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -771,6 +771,7 @@ jobs: cd /home/runner/work/array-api-tests ${CONDA_PREFIX}/bin/python -c "import dpctl; dpctl.lsplatform()" export ARRAY_API_TESTS_MODULE=dpctl.tensor + export ARRAY_API_TESTS_VERSION=2024.12 ${CONDA_PREFIX}/bin/python -m pytest --json-report --json-report-file=$FILE --disable-deadline --skips-file ${GITHUB_WORKSPACE}/.github/workflows/array-api-skips.txt array_api_tests/ || true - name: Set Github environment variables shell: bash -l {0}