From 6c2036fbf716287f3429671a9da35b9f9cea7cfb Mon Sep 17 00:00:00 2001
From: vtavana <120411540+vtavana@users.noreply.github.com>
Date: Fri, 23 Feb 2024 13:49:37 -0600
Subject: [PATCH] update `dpnp.linalg.multi_dot` implementation (#1729)

* update dpnp.multi_dot

* fix pre-commit

* update check limitation calls

* address comments

* use sycl_queue and usm_type for m and s
---
 dpnp/dpnp_array.py               |   2 +-
 dpnp/dpnp_iface.py               |  53 +++++++
 dpnp/dpnp_iface_arraycreation.py |  77 +++--------
 dpnp/dpnp_iface_linearalgebra.py |   8 +-
 dpnp/dpnp_iface_manipulation.py  |  19 ++-
 dpnp/dpnp_iface_mathematical.py  |   8 +-
 dpnp/dpnp_iface_nanfunctions.py  |  57 +++-----
 dpnp/dpnp_iface_searching.py     |   6 +-
 dpnp/dpnp_iface_statistics.py    |  66 +++------
 dpnp/dpnp_iface_trigonometric.py |  32 ++---
 dpnp/linalg/dpnp_iface_linalg.py |  58 ++++++--
 dpnp/linalg/dpnp_utils_linalg.py | 111 +++++++++++++++
 tests/test_product.py            | 230 ++++++++++++++++++++++++++++---
 tests/test_sycl_queue.py         |  51 +++++++
 tests/test_usm_type.py           |  22 +++
 15 files changed, 584 insertions(+), 216 deletions(-)

diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 6b4ccb4e33a..8f784201a2b 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -540,7 +540,7 @@ def astype(self, dtype, order="K", casting="unsafe", subok=True, copy=True):
             Array data type casting.
         dtype : dtype
             Target data type.
-        order : {'C', 'F', 'A', 'K'}
+        order : {"C", "F", "A", "K"}, optional
             Row-major (C-style) or column-major (Fortran-style) order.
             When ``order`` is 'A', it uses 'F' if ``a`` is column-major and uses 'C' otherwise.
             And when ``order`` is 'K', it keeps strides as closely as possible.
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index 49c6ab5c179..b5d4b8022a9 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -56,6 +56,7 @@
     "array_equal",
     "asnumpy",
     "astype",
+    "check_limitations",
     "check_supported_arrays_type",
     "convert_single_elem_array_to_scalar",
     "default_float_type",
@@ -232,6 +233,58 @@ def astype(x1, dtype, order="K", casting="unsafe", copy=True):
     return dpnp_array._create_from_usm_ndarray(array_obj)
 
 
+def check_limitations(
+    order=None, subok=False, like=None, initial=None, where=True
+):
+    """
+    Checking limitation kwargs for their supported values.
+
+    Parameter `order` is only supported with values ``"C"``, ``"F"``
+    and ``None``.
+    Parameter `subok` is only supported with default value ``False``.
+    Parameter `like` is only supported with default value ``None``.
+    Parameter `initial` is only supported with default value ``None``.
+    Parameter `where` is only supported with default value ``True``.
+
+    Raises
+    ------
+    NotImplementedError
+        If any input kwargs is of unsupported value.
+
+    """
+
+    if order in ("A", "a", "K", "k"):
+        raise NotImplementedError(
+            "Keyword argument `order` is supported only with "
+            f"values ``'C'`` and ``'F'``, but got {order}"
+        )
+    if order not in ("C", "c", "F", "f", None):
+        raise ValueError(
+            "Unrecognized `order` keyword value, expecting "
+            f"``'C'`` or ``'F'``, but got {order}"
+        )
+    if like is not None:
+        raise NotImplementedError(
+            "Keyword argument `like` is supported only with "
+            f"default value ``None``, but got {like}"
+        )
+    if subok is not False:
+        raise NotImplementedError(
+            "Keyword argument `subok` is supported only with "
+            f"default value ``False``, but got {subok}"
+        )
+    if initial is not None:
+        raise NotImplementedError(
+            "Keyword argument `initial` is only supported with "
+            f"default value ``None``, but got {initial}"
+        )
+    if where is not True:
+        raise NotImplementedError(
+            "Keyword argument `where` is supported only with "
+            f"default value ``True``, but got {where}"
+        )
+
+
 def check_supported_arrays_type(*arrays, scalar_type=False, all_scalars=False):
     """
     Return ``True`` if each array has either type of scalar,
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index c3999594fe3..ebfca03ec7f 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -99,43 +99,6 @@
 ]
 
 
-def _check_limitations(order=None, subok=False, like=None):
-    """
-    Checking limitation kwargs for their supported values.
-
-    Parameter `order` is supported only with values ``C``, ``F`` and ``None``.
-    Parameter `subok` is supported only with default value ``False``.
-    Parameter `like` is supported only with default value ``None``.
-
-    Raises
-    ------
-    NotImplementedError
-        If any input kwargs is of unsupported value.
-
-    """
-
-    if order in ("A", "a", "K", "k"):
-        raise NotImplementedError(
-            "Keyword argument `order` is supported only with "
-            f"values ``'C'`` and ``'F'``, but got {order}"
-        )
-    if order not in ("C", "c", "F", "f", None):
-        raise ValueError(
-            "Unrecognized `order` keyword value, expecting "
-            f"``'C'`` or ``'F'``, but got {order}"
-        )
-    if like is not None:
-        raise NotImplementedError(
-            "Keyword argument `like` is supported only with "
-            f"default value ``None``, but got {like}"
-        )
-    if subok is not False:
-        raise NotImplementedError(
-            "Keyword argument `subok` is supported only with "
-            f"default value ``False``, but got {subok}"
-        )
-
-
 def arange(
     start,
     /,
@@ -223,7 +186,7 @@ def arange(
 
     """
 
-    _check_limitations(like=like)
+    dpnp.check_limitations(like=like)
 
     return dpnp_container.arange(
         start,
@@ -343,7 +306,7 @@ def array(
 
     """
 
-    _check_limitations(subok=subok, like=like)
+    dpnp.check_limitations(subok=subok, like=like)
     if ndmin != 0:
         raise NotImplementedError(
             "Keyword argument `ndmin` is supported only with "
@@ -451,7 +414,7 @@ def asanyarray(
 
     """
 
-    _check_limitations(like=like)
+    dpnp.check_limitations(like=like)
 
     return asarray(
         a,
@@ -548,7 +511,7 @@ def asarray(
 
     """
 
-    _check_limitations(like=like)
+    dpnp.check_limitations(like=like)
 
     return dpnp_container.asarray(
         a,
@@ -654,7 +617,7 @@ def ascontiguousarray(
 
     """
 
-    _check_limitations(like=like)
+    dpnp.check_limitations(like=like)
 
     # at least 1-d array has to be returned
     if dpnp.isscalar(a) or hasattr(a, "ndim") and a.ndim == 0:
@@ -768,7 +731,7 @@ def asfortranarray(
 
     """
 
-    _check_limitations(like=like)
+    dpnp.check_limitations(like=like)
 
     # at least 1-d array has to be returned
     if dpnp.isscalar(a) or hasattr(a, "ndim") and a.ndim == 0:
@@ -867,7 +830,7 @@ def copy(
 
     """
 
-    _check_limitations(subok=subok)
+    dpnp.check_limitations(subok=subok)
 
     if dpnp.is_supported_array_type(a):
         sycl_queue_normalized = dpnp.get_normalized_queue_device(
@@ -1176,7 +1139,7 @@ def empty(
 
     """
 
-    _check_limitations(order=order, like=like)
+    dpnp.check_limitations(order=order, like=like)
     return dpnp_container.empty(
         shape,
         dtype=dtype,
@@ -1276,7 +1239,7 @@ def empty_like(
     """
 
     dpnp.check_supported_arrays_type(a)
-    _check_limitations(order=order, subok=subok)
+    dpnp.check_limitations(order=order, subok=subok)
 
     _shape = a.shape if shape is None else shape
     _dtype = a.dtype if dtype is None else dtype
@@ -1385,7 +1348,7 @@ def eye(
 
     """
 
-    _check_limitations(order=order, like=like)
+    dpnp.check_limitations(order=order, like=like)
 
     return dpnp_container.eye(
         N,
@@ -1485,7 +1448,7 @@ def frombuffer(
 
     """
 
-    _check_limitations(like=like)
+    dpnp.check_limitations(like=like)
     return asarray(
         numpy.frombuffer(buffer, dtype=dtype, count=count, offset=offset),
         device=device,
@@ -1609,7 +1572,7 @@ def fromfile(
 
     """
 
-    _check_limitations(like=like)
+    dpnp.check_limitations(like=like)
     return asarray(
         numpy.fromfile(file, dtype=dtype, count=count, sep=sep, offset=offset),
         device=device,
@@ -1725,7 +1688,7 @@ def fromstring(
 
     """
 
-    _check_limitations(like=like)
+    dpnp.check_limitations(like=like)
     return asarray(
         numpy.fromstring(string, dtype=dtype, count=count, sep=sep),
         device=device,
@@ -1819,7 +1782,7 @@ def full(
 
     """
 
-    _check_limitations(order=order, like=like)
+    dpnp.check_limitations(order=order, like=like)
 
     return dpnp_container.full(
         shape,
@@ -1926,7 +1889,7 @@ def full_like(
     """
 
     dpnp.check_supported_arrays_type(a)
-    _check_limitations(order=order, subok=subok)
+    dpnp.check_limitations(order=order, subok=subok)
 
     _shape = a.shape if shape is None else shape
     _dtype = a.dtype if dtype is None else dtype
@@ -2155,7 +2118,7 @@ def identity(
     if n < 0:
         raise ValueError("negative dimensions are not allowed")
 
-    _check_limitations(like=like)
+    dpnp.check_limitations(like=like)
 
     _dtype = dpnp.default_float_type() if dtype is None else dtype
     return dpnp.eye(
@@ -2759,7 +2722,7 @@ def ones(
 
     """
 
-    _check_limitations(order=order, like=like)
+    dpnp.check_limitations(order=order, like=like)
 
     return dpnp_container.ones(
         shape,
@@ -2861,7 +2824,7 @@ def ones_like(
 
     """
     dpnp.check_supported_arrays_type(a)
-    _check_limitations(order=order, subok=subok)
+    dpnp.check_limitations(order=order, subok=subok)
 
     _shape = a.shape if shape is None else shape
     _dtype = a.dtype if dtype is None else dtype
@@ -3347,7 +3310,7 @@ def zeros(
 
     """
 
-    _check_limitations(order=order, like=like)
+    dpnp.check_limitations(order=order, like=like)
 
     return dpnp_container.zeros(
         shape,
@@ -3450,7 +3413,7 @@ def zeros_like(
     """
 
     dpnp.check_supported_arrays_type(a)
-    _check_limitations(order=order, subok=subok)
+    dpnp.check_limitations(order=order, subok=subok)
 
     _shape = a.shape if shape is None else shape
     _dtype = a.dtype if dtype is None else dtype
diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py
index 366a91afbd1..312a101524b 100644
--- a/dpnp/dpnp_iface_linearalgebra.py
+++ b/dpnp/dpnp_iface_linearalgebra.py
@@ -82,7 +82,7 @@ def dot(a, b, out=None):
     b : {dpnp.ndarray, usm_ndarray, scalar}
         Second input array. Both inputs `a` and `b` can not be scalars
         at the same time.
-    out : {dpnp.ndarray, usm_ndarray}, optional
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
         Alternative output array in which to place the result. It must have
         the same shape and data type as the expected output and should be
         C-contiguous. If these conditions are not met, an exception is
@@ -345,11 +345,11 @@ def matmul(
 
     Parameters
     ----------
-    x1 : {dpnp_array, usm_ndarray}
+    x1 : {dpnp.ndarray, usm_ndarray}
         First input array.
-    x2 : {dpnp_array, usm_ndarray}
+    x2 : {dpnp.ndarray, usm_ndarray}
         Second input array.
-    out : {dpnp.ndarray, usm_ndarray}, optional
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
         Alternative output array in which to place the result. It must have
         a shape that matches the signature `(n,k),(k,m)->(n,m)` but the type
         (of the calculated values) will be cast if necessary. Default: ``None``.
diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py
index a3516eeaf00..2cbf6f98b49 100644
--- a/dpnp/dpnp_iface_manipulation.py
+++ b/dpnp/dpnp_iface_manipulation.py
@@ -1202,14 +1202,13 @@ def ravel(a, order="C"):
     x : {dpnp.ndarray, usm_ndarray}
         Input array. The elements in `a` are read in the order specified by
         order, and packed as a 1-D array.
-    order : {'C', 'F'}, optional
-        The elements of `a` are read using this index order. ``C`` means to
+    order : {"C", "F"}, optional
+        The elements of `a` are read using this index order. ``"C"`` means to
         index the elements in row-major, C-style order, with the last axis
         index changing fastest, back to the first axis index changing slowest.
-        ``F`` means to index the elements in column-major, Fortran-style order,
-        with the first index changing fastest, and the last index changing
-        slowest.
-        By default, ``C`` index order is used.
+        ``"F"`` means to index the elements in column-major, Fortran-style
+        order, with the first index changing fastest, and the last index
+        changing slowest. By default, ``"C"`` index order is used.
 
     Returns
     -------
@@ -1313,15 +1312,15 @@ def reshape(a, /, newshape, order="C", copy=None):
         an integer, then the result will be a 1-D array of that length.
         One shape dimension can be -1. In this case, the value is
         inferred from the length of the array and remaining dimensions.
-    order : {'C', 'F'}, optional
+    order : {"C", "F"}, optional
         Read the elements of `a` using this index order, and place the
-        elements into the reshaped array using this index order. 'C'
+        elements into the reshaped array using this index order. ``"C"``
         means to read / write the elements using C-like index order,
         with the last axis index changing fastest, back to the first
-        axis index changing slowest. 'F' means to read / write the
+        axis index changing slowest. ``"F"`` means to read / write the
         elements using Fortran-like index order, with the first index
         changing fastest, and the last index changing slowest. Note that
-        the 'C' and 'F' options take no account of the memory layout of
+        the ``"C"`` and ``"F"`` options take no account of the memory layout of
         the underlying array, and only refer to the order of indexing.
     copy : bool, optional
         Boolean indicating whether or not to copy the input array.
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index eb985ea6a65..1e56782c7d4 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -449,7 +449,7 @@ def clip(a, a_min, a_max, *, out=None, order="K", **kwargs):
     a_min, a_max : {dpnp.ndarray, usm_ndarray, None}
         Minimum and maximum value. If ``None``, clipping is not performed on the corresponding edge.
         Only one of `a_min` and `a_max` may be ``None``. Both are broadcast against `a`.
-    out : {dpnp.ndarray, usm_ndarray}, optional
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
         The results will be placed in this array. It may be the input array for in-place clipping.
         `out` must be of the right shape to hold the output. Its type is preserved.
     order : {"C", "F", "A", "K", None}, optional
@@ -614,8 +614,8 @@ def copysign(
     out : ({None, dpnp.ndarray, usm_ndarray}, optional):
         Output array to populate.
         Array must have the correct shape and the expected data type.
-    order : ({'C', 'F', 'A', 'K'}, optional):
-        Memory layout of the newly output array, if parameter `out` is `None`.
+    order : {"C", "F", "A", "K"}, optional
+        Memory layout of the newly output array, if parameter `out` is ``None``.
         Default: "K".
 
     Returns
@@ -2848,7 +2848,7 @@ def sum(
         data type of `a`, the input array elements are cast to the
         specified data type before computing the sum.
         Default: ``None``.
-    out : {dpnp.ndarray, usm_ndarray}, optional
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
         Alternative output array in which to place the result. It must
         have the same shape as the expected output, but the type of
         the output values will be cast if necessary.
diff --git a/dpnp/dpnp_iface_nanfunctions.py b/dpnp/dpnp_iface_nanfunctions.py
index 354b872aae6..c3ba4a60e6e 100644
--- a/dpnp/dpnp_iface_nanfunctions.py
+++ b/dpnp/dpnp_iface_nanfunctions.py
@@ -116,13 +116,13 @@ def nanargmax(a, axis=None, out=None, *, keepdims=False):
 
     Parameters
     ----------
-    a :  {dpnp.ndarray, usm_ndarray}
+    a : {dpnp.ndarray, usm_ndarray}
         Input array.
     axis : int, optional
         Axis along which to search. If ``None``, the function must return
         the index of the maximum value of the flattened array.
         Default: ``None``.
-    out :  {dpnp.ndarray, usm_ndarray}, optional
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
         If provided, the result will be inserted into this array. It should
         be of the appropriate shape and dtype.
     keepdims : bool
@@ -194,7 +194,7 @@ def nanargmin(a, axis=None, out=None, *, keepdims=False):
         Axis along which to search. If ``None``, the function must return
         the index of the minimum value of the flattened array.
         Default: ``None``.
-    out : {dpnp.ndarray, usm_ndarray}, optional
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
         If provided, the result will be inserted into this array. It should
         be of the appropriate shape and dtype.
     keepdims : bool
@@ -339,14 +339,14 @@ def nanmax(a, axis=None, out=None, keepdims=False, initial=None, where=True):
 
     Parameters
     ----------
-    a :  {dpnp.ndarray, usm_ndarray}
+    a : {dpnp.ndarray, usm_ndarray}
         Input array.
     axis : int or tuple of ints, optional
         Axis or axes along which maximum values must be computed. By default,
         the maximum value must be computed over the entire array. If a tuple
         of integers, maximum values must be computed over multiple axes.
         Default: ``None``.
-    out :  {dpnp.ndarray, usm_ndarray}, optional
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
         If provided, the result will be inserted into this array. It should
         be of the appropriate shape and dtype.
     keepdims : bool
@@ -407,14 +407,7 @@ def nanmax(a, axis=None, out=None, keepdims=False, initial=None, where=True):
 
     """
 
-    if initial is not None:
-        raise NotImplementedError(
-            "initial keyword argument is only supported with its default value."
-        )
-    if where is not True:
-        raise NotImplementedError(
-            "where keyword argument is only supported with its default value."
-        )
+    dpnp.check_limitations(initial=initial, where=where)
 
     a, mask = _replace_nan(a, -dpnp.inf)
     res = dpnp.max(a, axis=axis, out=out, keepdims=keepdims)
@@ -450,7 +443,7 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False, *, where=True):
         If `a` has a boolean or integral data type, the returned array
         will have the default floating point data type for the device
         where input array `a` is allocated.
-    out : {dpnp.ndarray, usm_ndarray}, optional
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
         Alternative output array in which to place the result. It must have
         the same shape as the expected output but the type (of the calculated
         values) will be cast if necessary. Default: ``None``.
@@ -498,10 +491,7 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False, *, where=True):
 
     """
 
-    if where is not True:
-        raise NotImplementedError(
-            "where keyword argument is only supported with its default value."
-        )
+    dpnp.check_limitations(where=where)
 
     arr, mask = _replace_nan(a, 0)
     if mask is None:
@@ -550,14 +540,14 @@ def nanmin(a, axis=None, out=None, keepdims=False, initial=None, where=True):
 
     Parameters
     ----------
-    a :  {dpnp.ndarray, usm_ndarray}
+    a : {dpnp.ndarray, usm_ndarray}
         Input array.
     axis : int or tuple of ints, optional
         Axis or axes along which minimum values must be computed. By default,
         the minimum value must be computed over the entire array. If a tuple
         of integers, minimum values must be computed over multiple axes.
         Default: ``None``.
-    out :  {dpnp.ndarray, usm_ndarray}, optional
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
         If provided, the result will be inserted into this array. It should
         be of the appropriate shape and dtype.
     keepdims : bool, optional
@@ -618,14 +608,7 @@ def nanmin(a, axis=None, out=None, keepdims=False, initial=None, where=True):
 
     """
 
-    if initial is not None:
-        raise NotImplementedError(
-            "initial keyword argument is only supported with its default value."
-        )
-    if where is not True:
-        raise NotImplementedError(
-            "where keyword argument is only supported with its default value."
-        )
+    dpnp.check_limitations(initial=initial, where=where)
 
     a, mask = _replace_nan(a, +dpnp.inf)
     res = dpnp.min(a, axis=axis, out=out, keepdims=keepdims)
@@ -754,7 +737,7 @@ def nansum(
         data type of `a`, the input array elements are cast to the
         specified data type before computing the sum.
         Default: ``None``.
-    out : {dpnp.ndarray, usm_ndarray}, optional
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
         Alternative output array in which to place the result. It must have
         the same shape as the expected output but the type (of the calculated
         values) will be cast if necessary. Default: ``None``.
@@ -850,7 +833,7 @@ def nanstd(
         If `a` has a boolean or integral data type, the returned array
         will have the default floating point data type for the device
         where input array `a` is allocated.
-    out : {dpnp.ndarray, usm_ndarray}, optional
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
         Alternative output array in which to place the result. It must have
         the same shape as the expected output but the type (of the calculated
         values) will be cast if necessary.
@@ -907,10 +890,7 @@ def nanstd(
 
     """
 
-    if where is not True:
-        raise NotImplementedError(
-            "where keyword argument is only supported with its default value."
-        )
+    dpnp.check_limitations(where=where)
     if not isinstance(ddof, (int, float)):
         raise TypeError(
             f"An integer or float is required, but got {type(ddof)}"
@@ -939,7 +919,7 @@ def nanvar(
 
     Parameters
     ----------
-    a : {dpnp_array, usm_ndarray}
+    a : {dpnp.ndarray, usm_ndarray}
         Input array.
     axis : int or tuple of ints, optional
         axis or axes along which the variances must be computed. If a tuple
@@ -953,7 +933,7 @@ def nanvar(
         If `a` has a boolean or integral data type, the returned array
         will have the default floating point data type for the device
         where input array `a` is allocated.
-    out : {dpnp_array, usm_ndarray}, optional
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
         Alternative output array in which to place the result. It must have
         the same shape as the expected output but the type (of the calculated
         values) will be cast if necessary.
@@ -1008,10 +988,7 @@ def nanvar(
 
     """
 
-    if where is not True:
-        raise NotImplementedError(
-            "where keyword argument is only supported with its default value."
-        )
+    dpnp.check_limitations(where=where)
     if not isinstance(ddof, (int, float)):
         raise TypeError(
             f"An integer or float is required, but got {type(ddof)}"
diff --git a/dpnp/dpnp_iface_searching.py b/dpnp/dpnp_iface_searching.py
index ea0dc7ccdfd..3e1fb4c4d98 100644
--- a/dpnp/dpnp_iface_searching.py
+++ b/dpnp/dpnp_iface_searching.py
@@ -62,13 +62,13 @@ def argmax(a, axis=None, out=None, *, keepdims=False):
 
     Parameters
     ----------
-    a :  {dpnp.ndarray, usm_ndarray}
+    a : {dpnp.ndarray, usm_ndarray}
         Input array.
     axis : int, optional
         Axis along which to search. If ``None``, the function must return
         the index of the maximum value of the flattened array.
         Default: ``None``.
-    out :  {dpnp.ndarray, usm_ndarray}, optional
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
         If provided, the result will be inserted into this array. It should
         be of the appropriate shape and dtype.
     keepdims : bool
@@ -154,7 +154,7 @@ def argmin(a, axis=None, out=None, *, keepdims=False):
         Axis along which to search. If ``None``, the function must return
         the index of the minimum value of the flattened array.
         Default: ``None``.
-    out : {dpnp.ndarray, usm_ndarray}, optional
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
         If provided, the result will be inserted into this array. It should
         be of the appropriate shape and dtype.
     keepdims : bool, optional
diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py
index b5d16e710ad..db323456bf4 100644
--- a/dpnp/dpnp_iface_statistics.py
+++ b/dpnp/dpnp_iface_statistics.py
@@ -497,14 +497,14 @@ def max(a, axis=None, out=None, keepdims=False, initial=None, where=True):
 
     Parameters
     ----------
-    a :  {dpnp.ndarray, usm_ndarray}
+    a : {dpnp.ndarray, usm_ndarray}
         Input array.
     axis : int or tuple of ints, optional
         Axis or axes along which maximum values must be computed. By default,
         the maximum value must be computed over the entire array. If a tuple of
         integers, maximum values must be computed over multiple axes.
         Default: ``None``.
-    out :  {dpnp.ndarray, usm_ndarray}, optional
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
         If provided, the result will be inserted into this array. It should
         be of the appropriate shape and dtype.
     keepdims : bool
@@ -523,13 +523,9 @@ def max(a, axis=None, out=None, keepdims=False, initial=None, where=True):
         The returned array must have the same data type as `a`.
 
     Limitations
-    -----------
-    Input array is only supported as either :class:`dpnp.ndarray`
-    or :class:`dpctl.tensor.usm_ndarray`.
+    -----------.
     Parameters `where`, and `initial` are only supported with their default
-    values.
-    Otherwise ``NotImplementedError`` exception will be raised.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    values. Otherwise ``NotImplementedError`` exception will be raised.
 
     See Also
     --------
@@ -563,14 +559,7 @@ def max(a, axis=None, out=None, keepdims=False, initial=None, where=True):
 
     """
 
-    if initial is not None:
-        raise NotImplementedError(
-            "initial keyword argument is only supported with its default value."
-        )
-    if where is not True:
-        raise NotImplementedError(
-            "where keyword argument is only supported with its default value."
-        )
+    dpnp.check_limitations(initial=initial, where=where)
 
     dpt_array = dpnp.get_usm_ndarray(a)
     result = dpnp_array._create_from_usm_ndarray(
@@ -602,7 +591,7 @@ def mean(a, /, axis=None, dtype=None, out=None, keepdims=False, *, where=True):
         If `a` has a boolean or integral data type, the returned array
         will have the default floating point data type for the device
         where input array `a` is allocated.
-    out : {dpnp.ndarray, usm_ndarray}, optional
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
         Alternative output array in which to place the result. It must have
         the same shape as the expected output but the type (of the calculated
         values) will be cast if necessary. Default: ``None``.
@@ -650,10 +639,7 @@ def mean(a, /, axis=None, dtype=None, out=None, keepdims=False, *, where=True):
 
     """
 
-    if where is not True:
-        raise NotImplementedError(
-            "where keyword argument is only supported with its default value."
-        )
+    dpnp.check_limitations(where=where)
 
     dpt_array = dpnp.get_usm_ndarray(a)
     result = dpnp_array._create_from_usm_ndarray(
@@ -722,14 +708,14 @@ def min(a, axis=None, out=None, keepdims=False, initial=None, where=True):
 
     Parameters
     ----------
-    a :  {dpnp.ndarray, usm_ndarray}
+    a : {dpnp.ndarray, usm_ndarray}
         Input array.
     axis : int or tuple of ints, optional
         Axis or axes along which minimum values must be computed. By default,
         the minimum value must be computed over the entire array. If a tuple
         of integers, minimum values must be computed over multiple axes.
         Default: ``None``.
-    out :  {dpnp.ndarray, usm_ndarray}, optional
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
         If provided, the result will be inserted into this array. It should
         be of the appropriate shape and dtype.
     keepdims : bool, optional
@@ -749,12 +735,8 @@ def min(a, axis=None, out=None, keepdims=False, initial=None, where=True):
 
     Limitations
     -----------
-    Input array is only supported as either :class:`dpnp.ndarray`
-    or :class:`dpctl.tensor.usm_ndarray`.
     Parameters `where`, and `initial` are only supported with their default
-    values.
-    Otherwise ``NotImplementedError`` exception will be raised.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    values. Otherwise ``NotImplementedError`` exception will be raised.
 
     See Also
     --------
@@ -788,14 +770,7 @@ def min(a, axis=None, out=None, keepdims=False, initial=None, where=True):
 
     """
 
-    if initial is not None:
-        raise NotImplementedError(
-            "initial keyword argument is only supported with its default value."
-        )
-    if where is not True:
-        raise NotImplementedError(
-            "where keyword argument is only supported with its default value."
-        )
+    dpnp.check_limitations(initial=initial, where=where)
 
     dpt_array = dpnp.get_usm_ndarray(a)
     result = dpnp_array._create_from_usm_ndarray(
@@ -859,7 +834,7 @@ def std(
 
     Parameters
     ----------
-    a : {dpnp_array, usm_ndarray}
+    a : {dpnp.ndarray, usm_ndarray}
         Input array.
     axis : int or tuple of ints, optional
         Axis or axes along which the standard deviations must be computed.
@@ -874,7 +849,7 @@ def std(
         If `a` has a boolean or integral data type, the returned array
         will have the default floating point data type for the device
         where input array `a` is allocated.
-    out : {dpnp_array, usm_ndarray}, optional
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
         Alternative output array in which to place the result. It must have
         the same shape as the expected output but the type (of the calculated
         values) will be cast if necessary.
@@ -933,11 +908,8 @@ def std(
     """
 
     dpnp.check_supported_arrays_type(a)
+    dpnp.check_limitations(where=where)
 
-    if where is not True:
-        raise NotImplementedError(
-            "where keyword argument is only supported with its default value."
-        )
     if not isinstance(ddof, (int, float)):
         raise TypeError(
             f"An integer or float is required, but got {type(ddof)}"
@@ -976,7 +948,7 @@ def var(
 
     Parameters
     ----------
-    a : {dpnp_array, usm_ndarray}
+    a : {dpnp.ndarray, usm_ndarray}
         Input array.
     axis : int or tuple of ints, optional
         axis or axes along which the variances must be computed. If a tuple
@@ -990,7 +962,7 @@ def var(
         If `a` has a boolean or integral data type, the returned array
         will have the default floating point data type for the device
         where input array `a` is allocated.
-    out : {dpnp_array, usm_ndarray}, optional
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
         Alternative output array in which to place the result. It must have
         the same shape as the expected output but the type (of the calculated
         values) will be cast if necessary.
@@ -1048,10 +1020,8 @@ def var(
     """
 
     dpnp.check_supported_arrays_type(a)
-    if where is not True:
-        raise NotImplementedError(
-            "where keyword argument is only supported with its default value."
-        )
+    dpnp.check_limitations(where=where)
+
     if not isinstance(ddof, (int, float)):
         raise TypeError(
             f"An integer or float is required, but got {type(ddof)}"
diff --git a/dpnp/dpnp_iface_trigonometric.py b/dpnp/dpnp_iface_trigonometric.py
index 2be5c96217a..6e1b27f640e 100644
--- a/dpnp/dpnp_iface_trigonometric.py
+++ b/dpnp/dpnp_iface_trigonometric.py
@@ -558,12 +558,12 @@ def cbrt(
     Parameters
     ----------
     x : {dpnp.ndarray, usm_ndarray}
-        Input array, expected to have a real-valued data type.
+        Input array, must have a real-valued data type.
     out : ({None, dpnp.ndarray, usm_ndarray}, optional):
         Output array to populate.
         Array must have the correct shape and the expected data type.
-    order : ({'C', 'F', 'A', 'K'}, optional):
-        Memory layout of the newly output array, if parameter `out` is `None`.
+    order : {"C", "F", "A", "K"}, optional
+        Memory layout of the newly output array, if parameter `out` is ``None``.
         Default: "K".
 
     Returns
@@ -573,11 +573,9 @@ def cbrt(
 
     Limitations
     -----------
-    Parameter `x` is only supported as either :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`.
     Parameters `where`, `dtype` and `subok` are supported with their default values.
     Keyword argument `kwargs` is currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by real-valued data types.
 
     See Also
     --------
@@ -849,12 +847,12 @@ def exp2(
     Parameters
     ----------
     x : {dpnp.ndarray, usm_ndarray}
-        Input array, expected to have a floating-point data type.
+        Input array.
     out : ({None, dpnp.ndarray, usm_ndarray}, optional):
         Output array to populate.
         Array must have the correct shape and the expected data type.
-    order : ({'C', 'F', 'A', 'K'}, optional):
-        Memory layout of the newly output array, if parameter `out` is `None`.
+    order : {"C", "F", "A", "K"}, optional
+        Memory layout of the newly output array, if parameter `out` is ``None``.
         Default: "K".
 
     Returns
@@ -864,11 +862,9 @@ def exp2(
 
     Limitations
     -----------
-    Parameter `x` is only supported as either :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`.
     Parameters `where`, `dtype` and `subok` are supported with their default values.
     Keyword argument `kwargs` is currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
 
     See Also
     --------
@@ -1352,7 +1348,7 @@ def logsumexp(x, axis=None, out=None, dtype=None, keepdims=False):
         of unique integers, values are computed over multiple axes.
         If ``None``, the result is computed over the entire array.
         Default: ``None``.
-    out : {dpnp.ndarray, usm_ndarray}, optional
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
         If provided, the result will be inserted into this array. It should
         be of the appropriate shape and dtype.
     dtype : data type, optional
@@ -1434,7 +1430,7 @@ def reciprocal(
     ----------
     x : {dpnp.ndarray, usm_ndarray}
         Input array.
-    out : {None, dpnp.ndarray}, optional
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
         Output array to populate.
         Array must have the correct shape and the expected data type.
     order : {"C", "F", "A", "K"}, optional
@@ -1498,7 +1494,7 @@ def reduce_hypot(x, axis=None, out=None, dtype=None, keepdims=False):
         of unique integers, values are computed over multiple axes.
         If ``None``, the result is computed over the entire array.
         Default: ``None``.
-    out : {dpnp.ndarray, usm_ndarray}, optional
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
         If provided, the result will be inserted into this array. It should
         be of the appropriate shape and dtype.
     dtype : data type, optional
@@ -1575,13 +1571,13 @@ def rsqrt(
     Parameters
     ----------
     x : {dpnp.ndarray, usm_ndarray}
-        Input array, expected to have a real floating-point data type.
+        Input array, must have a real-valued data type.
     out : ({None, dpnp.ndarray, usm_ndarray}, optional):
         Output array to populate.
         Array must have the correct shape and the expected data type.
-    order : ({'C', 'F', 'A', 'K'}, optional):
-        Memory layout of the newly output array, if parameter `out` is `None`.
-        Default: "K"
+    order : {"C", "F", "A", "K"}, optional
+        Memory layout of the newly output array, if parameter `out` is ``None``.
+        Default: "K".
 
     Returns
     -------
@@ -1590,11 +1586,9 @@ def rsqrt(
 
     Limitations
     -----------
-    Parameter `x` is only supported as either :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`.
     Parameters `where`, `dtype` and `subok` are supported with their default values.
     Keyword argument `kwargs` is currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by real-valued data types.
 
     See Also
     --------
diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py
index 33abe38c4e2..81680cbe028 100644
--- a/dpnp/linalg/dpnp_iface_linalg.py
+++ b/dpnp/linalg/dpnp_iface_linalg.py
@@ -52,6 +52,7 @@
     dpnp_eigh,
     dpnp_inv,
     dpnp_matrix_rank,
+    dpnp_multi_dot,
     dpnp_pinv,
     dpnp_qr,
     dpnp_slogdet,
@@ -451,40 +452,69 @@ def matrix_rank(A, tol=None, hermitian=False):
     return dpnp_matrix_rank(A, tol=tol, hermitian=hermitian)
 
 
-def multi_dot(arrays, out=None):
+def multi_dot(arrays, *, out=None):
     """
-    Compute the dot product of two or more arrays in a single function call
+    Compute the dot product of two or more arrays in a single function call.
+
+    For full documentation refer to :obj:`numpy.multi_dot`.
 
     Parameters
     ----------
-    arrays : sequence of array_like
+    arrays : sequence of dpnp.ndarray or usm_ndarray
         If the first argument is 1-D it is treated as row vector.
         If the last argument is 1-D it is treated as column vector.
         The other arguments must be 2-D.
-    out : ndarray, optional
-        unsupported
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
+        Output argument. This must have the exact kind that would be returned
+        if it was not used. In particular, it must have the right type, must be
+        C-contiguous, and its dtype must be the dtype that would be returned
+        for `dot(a, b)`. If these conditions are not met, an exception is
+        raised, instead of attempting to be flexible.
 
     Returns
     -------
-    output : ndarray
+    out : dpnp.ndarray
         Returns the dot product of the supplied arrays.
 
     See Also
     --------
-    :obj:`numpy.multi_dot`
+    :obj:`dpnp.dot` : Returns the dot product of two arrays.
+    :obj:`dpnp.inner` : Returns the inner product of two arrays.
+
+    Examples
+    --------
+    >>> import dpnp as np
+    >>> from dpnp.linalg import multi_dot
+    >>> A = np.random.random((10000, 100))
+    >>> B = np.random.random((100, 1000))
+    >>> C = np.random.random((1000, 5))
+    >>> D = np.random.random((5, 333))
+
+    the actual dot multiplication
+
+    >>> multi_dot([A, B, C, D]).shape
+    (10000, 333)
+
+    instead of
+
+    >>> np.dot(np.dot(np.dot(A, B), C), D).shape
+    (10000, 333)
+
+    or
+
+    >>> A.dot(B).dot(C).dot(D).shape
+    (10000, 333)
 
     """
 
+    dpnp.check_supported_arrays_type(*arrays)
     n = len(arrays)
-
     if n < 2:
-        checker_throw_value_error("multi_dot", "arrays", n, ">1")
-
-    result = arrays[0]
-    for id in range(1, n):
-        result = dpnp.dot(result, arrays[id])
+        raise ValueError("Expecting at least two arrays.")
+    if n == 2:
+        return dpnp.dot(arrays[0], arrays[1], out=out)
 
-    return result
+    return dpnp_multi_dot(n, arrays, out)
 
 
 def pinv(a, rcond=1e-15, hermitian=False):
diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index b8c366f5413..e6564c70a71 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -26,6 +26,7 @@
 
 import dpctl
 import dpctl.tensor._tensor_impl as ti
+import numpy
 from numpy import prod
 
 import dpnp
@@ -40,6 +41,7 @@
     "dpnp_eigh",
     "dpnp_inv",
     "dpnp_matrix_rank",
+    "dpnp_multi_dot",
     "dpnp_pinv",
     "dpnp_qr",
     "dpnp_slogdet",
@@ -383,6 +385,86 @@ def _lu_factor(a, res_type):
         return (a_h, ipiv_h, dev_info_array)
 
 
+def _multi_dot(arrays, order, i, j, out=None):
+    """Actually do the multiplication with the given order."""
+    if i == j:
+        # the initial call with non-None out should never get here
+        assert out is None
+        return arrays[i]
+
+    return dpnp.dot(
+        _multi_dot(arrays, order, i, order[i, j]),
+        _multi_dot(arrays, order, order[i, j] + 1, j),
+        out=out,
+    )
+
+
+def _multi_dot_matrix_chain_order(n, arrays, return_costs=False):
+    """
+    Return a dpnp.ndarray that encodes the optimal order of mutiplications.
+
+    The optimal order array is then used by `_multi_dot()` to do the
+    multiplication.
+
+    Also return the cost matrix if `return_costs` is ``True``.
+
+    The implementation CLOSELY follows Cormen, "Introduction to Algorithms",
+    Chapter 15.2, p. 370-378.  Note that Cormen uses 1-based indices.
+
+        cost[i, j] = min([
+            cost[prefix] + cost[suffix] + cost_mult(prefix, suffix)
+            for k in range(i, j)])
+
+    """
+
+    usm_type, exec_q = get_usm_allocations(arrays)
+    # p stores the dimensions of the matrices
+    # Example for p: A_{10x100}, B_{100x5}, C_{5x50} --> p = [10, 100, 5, 50]
+    p = [1 if arrays[0].ndim == 1 else arrays[0].shape[0]]
+    p += [a.shape[0] for a in arrays[1:-1]]
+    p += (
+        [arrays[-1].shape[0], 1]
+        if arrays[-1].ndim == 1
+        else [arrays[-1].shape[0], arrays[-1].shape[1]]
+    )
+    # m is a matrix of costs of the subproblems
+    # m[i,j]: min number of scalar multiplications needed to compute A_{i..j}
+    m = dpnp.zeros((n, n), usm_type=usm_type, sycl_queue=exec_q)
+    # s is the actual ordering
+    # s[i, j] is the value of k at which we split the product A_i..A_j
+    s = dpnp.zeros(
+        (n, n), dtype=dpnp.intp, usm_type=usm_type, sycl_queue=exec_q
+    )
+
+    for ll in range(1, n):
+        for i in range(n - ll):
+            j = i + ll
+            m[i, j] = dpnp.Inf
+            for k in range(i, j):
+                q = m[i, k] + m[k + 1, j] + p[i] * p[k + 1] * p[j + 1]
+                if q < m[i, j]:
+                    m[i, j] = q
+                    s[i, j] = k  # Note that Cormen uses 1-based index
+
+    return (s, m) if return_costs else s
+
+
+def _multi_dot_three(A, B, C, out=None):
+    """Find the best order for three arrays and do the multiplication."""
+
+    a0, a1b0 = (1, A.shape[0]) if A.ndim == 1 else A.shape
+    b1c0, c1 = (C.shape[0], 1) if C.ndim == 1 else C.shape
+    # cost1 = cost((AB)C) = a0*a1b0*b1c0 + a0*b1c0*c1
+    cost1 = a0 * b1c0 * (a1b0 + c1)
+    # cost2 = cost(A(BC)) = a1b0*b1c0*c1 + a0*a1b0*c1
+    cost2 = a1b0 * c1 * (a0 + b1c0)
+
+    if cost1 < cost2:
+        return dpnp.dot(dpnp.dot(A, B), C, out=out)
+
+    return dpnp.dot(A, dpnp.dot(B, C), out=out)
+
+
 def _real_type(dtype, device=None):
     """
     Returns the real data type corresponding to a given dpnp data type.
@@ -1023,6 +1105,35 @@ def dpnp_matrix_rank(A, tol=None, hermitian=False):
     return dpnp.count_nonzero(S > tol, axis=-1)
 
 
+def dpnp_multi_dot(n, arrays, out=None):
+    """Compute the dot product of two or more arrays in a single function call."""
+
+    if not arrays[0].ndim in [1, 2]:
+        raise numpy.linalg.LinAlgError(
+            f"{arrays[0].ndim}-dimensional array given. First array must be 1-D or 2-D."
+        )
+
+    if not arrays[-1].ndim in [1, 2]:
+        raise numpy.linalg.LinAlgError(
+            f"{arrays[-1].ndim}-dimensional array given. Last array must be 1-D or 2-D."
+        )
+
+    for arr in arrays[1:-1]:
+        if arr.ndim != 2:
+            raise numpy.linalg.LinAlgError(
+                f"{arr.ndim}-dimensional array given. Inner arrays must be 2-D."
+            )
+
+    # _multi_dot_three is much faster than _multi_dot_matrix_chain_order
+    if n == 3:
+        result = _multi_dot_three(arrays[0], arrays[1], arrays[2], out=out)
+    else:
+        order = _multi_dot_matrix_chain_order(n, arrays)
+        result = _multi_dot(arrays, order, 0, n - 1, out=out)
+
+    return result
+
+
 def dpnp_pinv(a, rcond=1e-15, hermitian=False):
     """
     dpnp_pinv(a, rcond=1e-15, hermitian=False):
diff --git a/tests/test_product.py b/tests/test_product.py
index 48a7119e7f3..e1b95b939f9 100644
--- a/tests/test_product.py
+++ b/tests/test_product.py
@@ -660,22 +660,220 @@ def test_inner_error(self):
             dpnp.inner(a, b)
 
 
-@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
-def test_multi_dot(type):
-    n = 16
-    a = dpnp.reshape(dpnp.arange(n, dtype=type), (4, 4))
-    b = dpnp.reshape(dpnp.arange(n, dtype=type), (4, 4))
-    c = dpnp.reshape(dpnp.arange(n, dtype=type), (4, 4))
-    d = dpnp.reshape(dpnp.arange(n, dtype=type), (4, 4))
-
-    a1 = numpy.arange(n, dtype=type).reshape((4, 4))
-    b1 = numpy.arange(n, dtype=type).reshape((4, 4))
-    c1 = numpy.arange(n, dtype=type).reshape((4, 4))
-    d1 = numpy.arange(n, dtype=type).reshape((4, 4))
-
-    result = dpnp.linalg.multi_dot([a, b, c, d])
-    expected = numpy.linalg.multi_dot([a1, b1, c1, d1])
-    assert_array_equal(expected, result)
+class TestMultiDot:
+    def setup_method(self):
+        numpy.random.seed(70)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    def test_multi_dot_all_2d(self, dtype):
+        n = 16
+        a = dpnp.reshape(dpnp.arange(n, dtype=dtype), (4, 4))
+        b = dpnp.reshape(dpnp.arange(n, dtype=dtype), (4, 4))
+        c = dpnp.reshape(dpnp.arange(n, dtype=dtype), (4, 4))
+        d = dpnp.reshape(dpnp.arange(n, dtype=dtype), (4, 4))
+
+        a1 = numpy.arange(n, dtype=dtype).reshape((4, 4))
+        b1 = numpy.arange(n, dtype=dtype).reshape((4, 4))
+        c1 = numpy.arange(n, dtype=dtype).reshape((4, 4))
+        d1 = numpy.arange(n, dtype=dtype).reshape((4, 4))
+
+        result = dpnp.linalg.multi_dot([a, b, c, d])
+        expected = numpy.linalg.multi_dot([a1, b1, c1, d1])
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True))
+    @pytest.mark.parametrize(
+        "shapes",
+        [
+            ((4, 5), (5, 4)),
+            ((4,), (4, 6), (6, 8)),
+            ((4, 8), (8, 6), (6,)),
+            ((6,), (6, 8), (8,)),
+            ((2, 10), (10, 5), (5, 8)),
+            ((8, 5), (5, 10), (10, 2)),
+            ((4, 6), (6, 9), (9, 7), (7, 8)),
+            ((6,), (6, 10), (10, 7), (7, 8)),
+            ((4, 6), (6, 10), (10, 7), (7,)),
+            ((6,), (6, 10), (10, 7), (7,)),
+            ((4, 6), (6, 9), (9, 7), (7, 8), (8, 3)),
+        ],
+        ids=[
+            "two_arrays",
+            "three_arrays_1st_1D",
+            "three_arrays_last_1D",
+            "three_arrays_1st_last_1D",
+            "three_arrays_cost1",
+            "three_arrays_cost2",
+            "four_arrays",
+            "four_arrays_1st_1D",
+            "four_arrays_last_1D",
+            "four_arrays_1st_last_1D",
+            "five_arrays",
+        ],
+    )
+    def test_multi_dot(self, shapes, dtype):
+        numpy_array_list = []
+        dpnp_array_list = []
+        for shape in shapes:
+            a = numpy.array(
+                numpy.random.uniform(-5, 5, numpy.prod(shape)), dtype=dtype
+            ).reshape(shape)
+            ia = dpnp.array(a)
+
+            numpy_array_list.append(a)
+            dpnp_array_list.append(ia)
+
+        result = dpnp.linalg.multi_dot(dpnp_array_list)
+        expected = numpy.linalg.multi_dot(numpy_array_list)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_complex_dtypes())
+    @pytest.mark.parametrize(
+        "shapes",
+        [
+            ((4, 5), (5, 4)),
+            ((4,), (4, 6), (6, 8)),
+            ((4, 8), (8, 6), (6,)),
+            ((6,), (6, 8), (8,)),
+            ((2, 10), (10, 5), (5, 8)),
+            ((8, 5), (5, 10), (10, 2)),
+            ((4, 6), (6, 9), (9, 7), (7, 8)),
+            ((6,), (6, 10), (10, 7), (7, 8)),
+            ((4, 6), (6, 10), (10, 7), (7,)),
+            ((6,), (6, 10), (10, 7), (7,)),
+            ((4, 6), (6, 9), (9, 7), (7, 8), (8, 3)),
+        ],
+        ids=[
+            "two_arrays",
+            "three_arrays_1st_1D",
+            "three_arrays_last_1D",
+            "three_arrays_1st_last_1D",
+            "three_arrays_cost1",
+            "three_arrays_cost2",
+            "four_arrays",
+            "four_arrays_1st_1D",
+            "four_arrays_last_1D",
+            "four_arrays_1st_last_1D",
+            "five_arrays",
+        ],
+    )
+    def test_multi_dot_complex(self, shapes, dtype):
+        numpy_array_list = []
+        dpnp_array_list = []
+        for shape in shapes:
+            x1 = numpy.random.uniform(-5, 5, numpy.prod(shape))
+            x2 = numpy.random.uniform(-5, 5, numpy.prod(shape))
+            a = numpy.array(x1 + 1j * x2, dtype=dtype).reshape(shape)
+            ia = dpnp.array(a)
+
+            numpy_array_list.append(a)
+            dpnp_array_list.append(ia)
+
+        result = dpnp.linalg.multi_dot(dpnp_array_list)
+        expected = numpy.linalg.multi_dot(numpy_array_list)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
+    @pytest.mark.parametrize(
+        "shapes",
+        [
+            ((4, 5), (5, 4), (4, 4)),
+            ((4,), (4, 6), (6, 8), (8,)),
+            ((4, 8), (8, 6), (6,), (4,)),
+            ((6,), (6, 8), (8,), ()),
+            ((2, 10), (10, 5), (5, 8), (2, 8)),
+            ((8, 5), (5, 10), (10, 2), (8, 2)),
+            ((4, 6), (6, 9), (9, 7), (7, 8), (4, 8)),
+            ((6,), (6, 10), (10, 7), (7, 8), (8,)),
+            ((4, 6), (6, 10), (10, 7), (7,), (4,)),
+            ((6,), (6, 10), (10, 7), (7,), ()),
+            ((4, 6), (6, 9), (9, 7), (7, 8), (8, 3), (4, 3)),
+        ],
+        ids=[
+            "two_arrays",
+            "three_arrays_1st_1D",
+            "three_arrays_last_1D",
+            "three_arrays_1st_last_1D",
+            "three_arrays_cost1",
+            "three_arrays_cost2",
+            "four_arrays",
+            "four_arrays_1st_1D",
+            "four_arrays_last_1D",
+            "four_arrays_1st_last_1D",
+            "five_arrays",
+        ],
+    )
+    def test_multi_dot_out(self, shapes, dtype):
+        numpy_array_list = []
+        dpnp_array_list = []
+        for shape in shapes[:-1]:
+            a = numpy.array(
+                numpy.random.uniform(-5, 5, numpy.prod(shape)), dtype=dtype
+            ).reshape(shape)
+            ia = dpnp.array(a)
+
+            numpy_array_list.append(a)
+            dpnp_array_list.append(ia)
+
+        dp_out = dpnp.empty(shapes[-1], dtype=dtype)
+        result = dpnp.linalg.multi_dot(dpnp_array_list, out=dp_out)
+        assert result is dp_out
+        expected = numpy.linalg.multi_dot(numpy_array_list)
+        assert_dtype_allclose(result, expected)
+
+    def test_multi_dot_strides(self):
+        numpy_array_list = []
+        dpnp_array_list = []
+        for num_array in [2, 3, 4, 5]:  # number of arrays in multi_dot
+            for _ in range(num_array):  # creat arrays one by one
+                A = numpy.random.rand(20, 20)
+                B = dpnp.array(A)
+
+                slices = (slice(None, None, 2), slice(None, None, 2))
+                a = A[slices]
+                b = B[slices]
+
+                numpy_array_list.append(a)
+                dpnp_array_list.append(b)
+
+            result = dpnp.linalg.multi_dot(dpnp_array_list)
+            expected = numpy.linalg.multi_dot(numpy_array_list)
+            assert_dtype_allclose(result, expected)
+
+    def test_multi_dot_error(self):
+        a = dpnp.ones(25)
+        # Expecting at least two arrays
+        with pytest.raises(ValueError):
+            dpnp.linalg.multi_dot([a])
+
+        a = dpnp.ones((5, 8, 10))
+        b = dpnp.ones((10, 5))
+        c = dpnp.ones((8, 15))
+        # First array must be 1-D or 2-D
+        with pytest.raises(numpy.linalg.LinAlgError):
+            dpnp.linalg.multi_dot([a, b, c])
+
+        a = dpnp.ones((5, 8))
+        b = dpnp.ones((10, 5))
+        c = dpnp.ones((8, 15, 6))
+        # Last array must be 1-D or 2-D
+        with pytest.raises(numpy.linalg.LinAlgError):
+            dpnp.linalg.multi_dot([a, b, c])
+
+        a = dpnp.ones((5, 10))
+        b = dpnp.ones((10, 5, 8))
+        c = dpnp.ones((8, 15))
+        # Inner array must be 2-D
+        with pytest.raises(numpy.linalg.LinAlgError):
+            dpnp.linalg.multi_dot([a, b, c])
+
+        a = dpnp.ones((5, 10))
+        b = dpnp.ones((10, 8))
+        c = dpnp.ones((8, 15))
+        # output should be C-contiguous
+        dp_out = dpnp.empty((5, 15), order="F")
+        with pytest.raises(ValueError):
+            dpnp.linalg.multi_dot([a, b, c], out=dp_out)
 
 
 class TestTensordot:
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index b0bc2c32ba1..a7b3e896d3e 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -9,6 +9,7 @@
 
 import dpnp
 from dpnp.dpnp_array import dpnp_array
+from dpnp.dpnp_utils import get_usm_allocations
 
 from .helper import assert_dtype_allclose, get_all_dtypes, is_win_platform
 
@@ -965,6 +966,56 @@ def test_modf(device):
     assert_sycl_queue_equal(result2_queue, expected_queue)
 
 
+@pytest.mark.parametrize(
+    "device",
+    valid_devices,
+    ids=[device.filter_string for device in valid_devices],
+)
+def test_multi_dot(device):
+    numpy_array_list = []
+    dpnp_array_list = []
+    for num_array in [3, 5]:  # number of arrays in multi_dot
+        for _ in range(num_array):  # creat arrays one by one
+            a = numpy.random.rand(10, 10)
+            b = dpnp.array(a, device=device)
+
+            numpy_array_list.append(a)
+            dpnp_array_list.append(b)
+
+        result = dpnp.linalg.multi_dot(dpnp_array_list)
+        expected = numpy.linalg.multi_dot(numpy_array_list)
+        assert_dtype_allclose(result, expected)
+
+        _, exec_q = get_usm_allocations(dpnp_array_list)
+        assert_sycl_queue_equal(result.sycl_queue, exec_q)
+
+
+@pytest.mark.parametrize(
+    "device",
+    valid_devices,
+    ids=[device.filter_string for device in valid_devices],
+)
+def test_out_multi_dot(device):
+    numpy_array_list = []
+    dpnp_array_list = []
+    for num_array in [3, 5]:  # number of arrays in multi_dot
+        for _ in range(num_array):  # creat arrays one by one
+            a = numpy.random.rand(10, 10)
+            b = dpnp.array(a, device=device)
+
+            numpy_array_list.append(a)
+            dpnp_array_list.append(b)
+
+        dp_out = dpnp.empty((10, 10), device=device)
+        result = dpnp.linalg.multi_dot(dpnp_array_list, out=dp_out)
+        assert result is dp_out
+        expected = numpy.linalg.multi_dot(numpy_array_list)
+        assert_dtype_allclose(result, expected)
+
+        _, exec_q = get_usm_allocations(dpnp_array_list)
+        assert_sycl_queue_equal(result.sycl_queue, exec_q)
+
+
 @pytest.mark.parametrize("type", ["complex128"])
 @pytest.mark.parametrize(
     "device",
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index 82db42c9bcc..b8d35e80fd6 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -7,6 +7,7 @@
 import pytest
 
 import dpnp as dp
+from dpnp.dpnp_utils import get_usm_allocations
 
 from .helper import assert_dtype_allclose
 
@@ -582,6 +583,27 @@ def test_concat_stack(func, data1, data2, usm_type_x, usm_type_y):
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
 
 
+@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
+def test_multi_dot(usm_type):
+    numpy_array_list = []
+    dpnp_array_list = []
+    for num_array in [3, 5]:  # number of arrays in multi_dot
+        for _ in range(num_array):  # creat arrays one by one
+            a = numpy.random.rand(10, 10)
+            b = dp.array(a, usm_type=usm_type)
+
+            numpy_array_list.append(a)
+            dpnp_array_list.append(b)
+
+        result = dp.linalg.multi_dot(dpnp_array_list)
+        expected = numpy.linalg.multi_dot(numpy_array_list)
+        assert_dtype_allclose(result, expected)
+
+        input_usm_type, _ = get_usm_allocations(dpnp_array_list)
+        assert input_usm_type == usm_type
+        assert result.usm_type == usm_type
+
+
 @pytest.mark.parametrize("func", ["take", "take_along_axis"])
 @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
 @pytest.mark.parametrize(