From 2224ce2f06b2e9a6e01fd3c3a0a7723310018c82 Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Sun, 5 Mar 2023 11:42:56 +0100 Subject: [PATCH] Add parameter out in dpnp.dot() (#1327) --- dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx | 35 +++++++--- dpnp/dpnp_array.py | 3 +- dpnp/dpnp_iface_linearalgebra.py | 67 +++++++++++-------- tests/skipped_tests.tbl | 4 -- tests/skipped_tests_gpu.tbl | 5 -- tests/test_dot.py | 23 +++---- tests/test_sycl_queue.py | 28 +++++--- tests/test_usm_type.py | 19 ++++++ .../cupy/linalg_tests/test_product.py | 1 - 9 files changed, 113 insertions(+), 72 deletions(-) diff --git a/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx b/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx index c738cc75b70..91c1da88405 100644 --- a/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx @@ -1,7 +1,7 @@ # cython: language_level=3 # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -65,8 +65,9 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_matmul_t)(c_dpctl.DPCTLSyclQue const shape_elem_type *, const shape_elem_type * , const c_dpctl.DPCTLEventVectorRef) -cpdef utils.dpnp_descriptor dpnp_dot(utils.dpnp_descriptor in_array1, utils.dpnp_descriptor in_array2): - +cpdef utils.dpnp_descriptor dpnp_dot(utils.dpnp_descriptor in_array1, + utils.dpnp_descriptor in_array2, + utils.dpnp_descriptor out=None): cdef shape_type_c shape1, shape2 shape1 = in_array1.shape @@ -78,6 +79,7 @@ cpdef utils.dpnp_descriptor dpnp_dot(utils.dpnp_descriptor in_array1, utils.dpnp # get the FPTR data structure cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DOT_EXT, param1_type, param2_type) + cdef utils.dpnp_descriptor result ndim1 = in_array1.ndim ndim2 = in_array2.ndim @@ -89,7 +91,7 @@ cpdef utils.dpnp_descriptor dpnp_dot(utils.dpnp_descriptor in_array1, utils.dpnp elif ndim1 == 1 and ndim2 == 1: result_shape = () elif ndim1 == 1: # ndim2 > 1 - result_shape = shape2[:-1] + result_shape = shape2[::-2] if ndim2 == 2 else shape2[::2] elif ndim2 == 1: # ndim1 > 1 result_shape = shape1[:-1] else: @@ -101,13 +103,24 @@ cpdef utils.dpnp_descriptor dpnp_dot(utils.dpnp_descriptor in_array1, utils.dpnp result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(in_array1, in_array2) - # create result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, - kernel_data.return_type, - None, - device=result_sycl_device, - usm_type=result_usm_type, - sycl_queue=result_sycl_queue) + if out is None: + # create result array with type given by FPTR data + result = utils.create_output_descriptor(result_shape, + kernel_data.return_type, + None, + device=result_sycl_device, + usm_type=result_usm_type, + sycl_queue=result_sycl_queue) + else: + result_type = dpnp_DPNPFuncType_to_dtype(< size_t > kernel_data.return_type) + if out.dtype != result_type: + utils.checker_throw_value_error('dot', 'out.dtype', out.dtype, result_type) + if out.shape != result_shape: + utils.checker_throw_value_error('dot', 'out.shape', out.shape, result_shape) + + result = out + + utils.get_common_usm_allocation(in_array1, result) # check USM allocation is common cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result.shape) cdef shape_type_c in_array1_shape = in_array1.shape diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py index b3dfe7a339e..6c743f4e1c9 100644 --- a/dpnp/dpnp_array.py +++ b/dpnp/dpnp_array.py @@ -592,7 +592,8 @@ def diagonal(input, offset=0, axis1=0, axis2=1): return dpnp.diagonal(input, offset, axis1, axis2) - # 'dot', + def dot(self, other, out=None): + return dpnp.dot(self, other, out) @property def dtype(self): diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py index 117a1b9b61d..a989f745c0a 100644 --- a/dpnp/dpnp_iface_linearalgebra.py +++ b/dpnp/dpnp_iface_linearalgebra.py @@ -44,9 +44,9 @@ from dpnp.dpnp_algo import * from dpnp.dpnp_utils import * import dpnp -import dpnp.config as config import numpy +import dpctl.tensor as dpt __all__ = [ @@ -62,18 +62,25 @@ ] -def dot(x1, x2, **kwargs): +def dot(x1, x2, out=None, **kwargs): """ - Returns the dot product of `x1` and `x2`. + Dot product of `x1` and `x2`. For full documentation refer to :obj:`numpy.dot`. + Returns + ------- + y : dpnp.ndarray + Returns the dot product of `x1` and `x2`. + If `out` is given, then it is returned. + Limitations ----------- - Parameters ``x1`` and ``x2`` are supported as :obj:`dpnp.ndarray` of the same type. - Keyword arguments ``kwargs`` are currently unsupported. - Otherwise the functions will be executed sequentially on CPU. - Input array data types are limited by supported DPNP :ref:`Data types`. + Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray` + or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time. + Keyword argument ``kwargs`` is currently unsupported. + Otherwise the functions will be executed sequentially on CPU. + Input array data types are limited by supported DPNP :ref:`Data types`. See Also -------- @@ -82,31 +89,37 @@ def dot(x1, x2, **kwargs): Examples -------- - >>> import dpnp as np - >>> np.dot(3, 4) - 12 - >>> a = np.array([1, 2, 3]) - >>> b = np.array([1, 2, 3]) - >>> np.dot(a, b) + >>> import dpnp as dp + >>> a = dp.array([1, 2, 3]) + >>> b = dp.array([1, 2, 3]) + >>> dp.dot(a, b) 14 """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False) - if x1_desc and x2_desc and not kwargs: - # TODO: remove fallback with scalars when muliply backend func will support strides - if(x1_desc.ndim == 0 and x2_desc.strides is not None - or x2_desc.ndim == 0 and x1_desc.strides is not None): - pass - elif (x1_desc.ndim >= 1 and x2_desc.ndim > 1 and x1_desc.shape[-1] != x2_desc.shape[-2]): - pass - elif (x1_desc.ndim > 0 and x2_desc.ndim == 1 and x1_desc.shape[-1] != x2_desc.shape[0]): - pass - else: - return dpnp_dot(x1_desc, x2_desc).get_pyobj() + if kwargs: + pass + elif dpnp.isscalar(x1) and dpnp.isscalar(x2): + # at least either x1 or x2 has to be an array + pass + else: + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) + + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + if x1_desc and x2_desc: + if out is not None: + if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)): + raise TypeError("return array must be of supported array type") + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) + else: + out_desc = None + return dpnp_dot(x1_desc, x2_desc, out=out_desc).get_pyobj() - return call_origin(numpy.dot, x1, x2, **kwargs) + return call_origin(numpy.dot, x1, x2, out=out, **kwargs) def einsum(*args, **kwargs): diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index 57a7d393e09..995ddd682c7 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -610,10 +610,6 @@ tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_9_{opt tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWithScalar::test_scalar_float tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWithScalar::test_scalar_int tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_invalid_sub1 -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_64_{shape=((2,), (2, 4)), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_65_{shape=((2,), (2, 4)), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_66_{shape=((2,), (2, 4)), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_67_{shape=((2,), (2, 4)), trans_a=False, trans_b=False}::test_dot tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_invlarge tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_large tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_of_two diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index 79debec15a2..e3e0f889b2d 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -812,10 +812,6 @@ tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWith tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWithScalar::test_scalar_int tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_invalid_sub1 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_64_{shape=((2,), (2, 4)), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_65_{shape=((2,), (2, 4)), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_66_{shape=((2,), (2, 4)), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_67_{shape=((2,), (2, 4)), trans_a=False, trans_b=False}::test_dot tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_invlarge tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_large tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_of_two @@ -827,7 +823,6 @@ tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transpose tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_int_axes tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_list_axes -tests/third_party/cupy/linalg_tests/test_product.py::TestProduct:: tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_tensordot_zero_dim tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_dot_with_out_f_contiguous tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_multidim_vdot diff --git a/tests/test_dot.py b/tests/test_dot.py index ae6341ea909..b9cb5659973 100644 --- a/tests/test_dot.py +++ b/tests/test_dot.py @@ -1,13 +1,16 @@ import pytest +from .helper import get_all_dtypes import dpnp as inp import numpy +from numpy.testing import ( + assert_allclose, + assert_array_equal +) -@pytest.mark.parametrize("type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True)) def test_dot_ones(type): n = 10**5 a = numpy.ones(n, dtype=type) @@ -17,12 +20,10 @@ def test_dot_ones(type): result = inp.dot(ia, ib) expected = numpy.dot(a, b) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) -@pytest.mark.parametrize("type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True)) def test_dot_arange(type): n = 10**2 m = 10**3 @@ -33,12 +34,10 @@ def test_dot_arange(type): result = inp.dot(ia, ib) expected = numpy.dot(a, b) - numpy.testing.assert_allclose(expected, result) + assert_allclose(expected, result) -@pytest.mark.parametrize("type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True)) def test_multi_dot(type): n = 16 a = inp.reshape(inp.arange(n, dtype=type), (4, 4)) @@ -53,4 +52,4 @@ def test_multi_dot(type): result = inp.linalg.multi_dot([a, b, c, d]) expected = numpy.linalg.multi_dot([a1, b1, c1, d1]) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index ea36a0992b2..42cbe745951 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -297,7 +297,7 @@ def test_1in_1out(func, data, device): x = dpnp.array(data, device=device) result = getattr(dpnp, func)(x) - numpy.testing.assert_array_equal(result, expected) + assert_array_equal(result, expected) expected_queue = x.get_array().sycl_queue result_queue = result.get_array().sycl_queue @@ -320,6 +320,9 @@ def test_1in_1out(func, data, device): pytest.param("divide", [0., 1., 2., 3., 4.], [4., 4., 4., 4., 4.]), + pytest.param("dot", + [[0., 1., 2.], [3., 4., 5.]], + [[4., 4.], [4., 4.], [4., 4.]]), pytest.param("floor_divide", [1., 2., 3., 4.], [2.5, 2.5, 2.5, 2.5]), @@ -364,7 +367,7 @@ def test_2in_1out(func, data1, data2, device): x2 = dpnp.array(data2, device=device) result = getattr(dpnp, func)(x1, x2) - numpy.testing.assert_array_equal(result, expected) + assert_array_equal(result, expected) assert_sycl_queue_equal(result.sycl_queue, x1.sycl_queue) assert_sycl_queue_equal(result.sycl_queue, x2.sycl_queue) @@ -539,6 +542,9 @@ def test_random_state(func, args, kwargs, device, usm_type): pytest.param("divide", [0., 1., 2., 3., 4.], [4., 4., 4., 4., 4.]), + pytest.param("dot", + [[0., 1., 2.], [3., 4., 5.]], + [[4., 4.], [4., 4.], [4., 4.]]), pytest.param("floor_divide", [1., 2., 3., 4.], [2.5, 2.5, 2.5, 2.5]), @@ -571,20 +577,20 @@ def test_random_state(func, args, kwargs, device, usm_type): def test_out(func, data1, data2, device): x1_orig = numpy.array(data1) x2_orig = numpy.array(data2) - expected = numpy.empty(x1_orig.size) - numpy.add(x1_orig, x2_orig, out=expected) + np_out = getattr(numpy, func)(x1_orig, x2_orig) + expected = numpy.empty_like(np_out) + getattr(numpy, func)(x1_orig, x2_orig, out=expected) x1 = dpnp.array(data1, device=device) x2 = dpnp.array(data2, device=device) - result = dpnp.empty(x1.size, device=device) - dpnp.add(x1, x2, out=result) + dp_out = getattr(dpnp, func)(x1, x2) + result = dpnp.empty_like(dp_out) + getattr(dpnp, func)(x1, x2, out=result) - numpy.testing.assert_array_equal(result, expected) + assert_array_equal(result, expected) - expected_queue = x1.get_array().sycl_queue - result_queue = result.get_array().sycl_queue - - assert_sycl_queue_equal(result_queue, expected_queue) + assert_sycl_queue_equal(result.sycl_queue, x1.sycl_queue) + assert_sycl_queue_equal(result.sycl_queue, x2.sycl_queue) @pytest.mark.parametrize("device", diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index 5fec346a000..96d55f6875c 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -154,3 +154,22 @@ def test_meshgrid(usm_type_x, usm_type_y): z = dp.meshgrid(x, y) assert z[0].usm_type == usm_type_x assert z[1].usm_type == usm_type_y + +@pytest.mark.parametrize( + "func,data1,data2", + [ + pytest.param("dot", + [[0., 1., 2.], [3., 4., 5.]], + [[4., 4.], [4., 4.], [4., 4.]]), + ], +) +@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types) +def test_2in_1out(func, data1, data2, usm_type_x, usm_type_y): + x = dp.array(data1, usm_type = usm_type_x) + y = dp.array(data2, usm_type = usm_type_y) + z = getattr(dp, func)(x, y) + + assert x.usm_type == usm_type_x + assert y.usm_type == usm_type_y + assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y]) diff --git a/tests/third_party/cupy/linalg_tests/test_product.py b/tests/third_party/cupy/linalg_tests/test_product.py index 2a97fa79b7c..d25cebbfa67 100644 --- a/tests/third_party/cupy/linalg_tests/test_product.py +++ b/tests/third_party/cupy/linalg_tests/test_product.py @@ -31,7 +31,6 @@ 'trans_a': [True, False], 'trans_b': [True, False], })) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.gpu class TestDot(unittest.TestCase):