diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml index 5f7f9f2f9eca..1902da37493f 100644 --- a/.github/workflows/build-sphinx.yml +++ b/.github/workflows/build-sphinx.yml @@ -223,6 +223,7 @@ jobs: PR_NUM: ${{ github.event.number }} uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2.8.2 with: + message-id: url_to_docs message: | View rendered docs @ https://intelpython.github.io/dpnp/pull/${{ env.PR_NUM }}/index.html allow-repeats: false diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 7e6ce46f28a0..0e83675ba57e 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -600,9 +600,9 @@ jobs: if: ${{ github.event.pull_request && !github.event.pull_request.head.repo.fork }} uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2.8.2 with: + message-id: array_api_results message: | ${{ env.MESSAGE }} - refresh-message-position: true cleanup_packages: name: Clean up anaconda packages diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py index ab9b29e3bad0..10bad7a18d58 100644 --- a/dpnp/dpnp_array.py +++ b/dpnp/dpnp_array.py @@ -1731,6 +1731,7 @@ def std( keepdims=False, *, where=True, + mean=None, ): """ Returns the standard deviation of the array elements, along given axis. @@ -1739,7 +1740,9 @@ def std( """ - return dpnp.std(self, axis, dtype, out, ddof, keepdims, where=where) + return dpnp.std( + self, axis, dtype, out, ddof, keepdims, where=where, mean=mean + ) @property def strides(self): @@ -1938,6 +1941,7 @@ def var( keepdims=False, *, where=True, + mean=None, ): """ Returns the variance of the array elements, along given axis. @@ -1946,7 +1950,9 @@ def var( """ - return dpnp.var(self, axis, dtype, out, ddof, keepdims, where=where) + return dpnp.var( + self, axis, dtype, out, ddof, keepdims, where=where, mean=mean + ) # 'view' diff --git a/dpnp/dpnp_iface_nanfunctions.py b/dpnp/dpnp_iface_nanfunctions.py index ae0d6cbb1218..281f72a89bf0 100644 --- a/dpnp/dpnp_iface_nanfunctions.py +++ b/dpnp/dpnp_iface_nanfunctions.py @@ -37,6 +37,8 @@ """ +# pylint: disable=duplicate-code + import warnings import dpnp @@ -955,7 +957,15 @@ def nansum( def nanstd( - a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, *, where=True + a, + axis=None, + dtype=None, + out=None, + ddof=0, + keepdims=False, + *, + where=True, + mean=None, ): """ Compute the standard deviation along the specified axis, @@ -969,40 +979,52 @@ def nanstd( Input array. axis : {None, int, tuple of ints}, optional Axis or axes along which the standard deviations must be computed. - If a tuple of unique integers is given, the standard deviations - are computed over multiple axes. If ``None``, the standard deviation - is computed over the entire array. + If a tuple of unique integers is given, the standard deviations are + computed over multiple axes. If ``None``, the standard deviation is + computed over the entire array. + Default: ``None``. dtype : {None, dtype}, optional - Type to use in computing the standard deviation. By default, - if `a` has a floating-point data type, the returned array - will have the same data type as `a`. - If `a` has a boolean or integral data type, the returned array - will have the default floating point data type for the device + Type to use in computing the standard deviation. By default, if `a` has + a floating-point data type, the returned array will have the same data + type as `a`. If `a` has a boolean or integral data type, the returned + array will have the default floating point data type for the device where input array `a` is allocated. + + Default: ``None``. out : {None, dpnp.ndarray, usm_ndarray}, optional Alternative output array in which to place the result. It must have the same shape as the expected output but the type (of the calculated values) will be cast if necessary. + + Default: ``None``. ddof : {int, float}, optional - Means Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` the number of non-NaN elements. - Default: `0.0`. + Means Delta Degrees of Freedom. The divisor used in calculations is + ``N - ddof``, where ``N`` the number of non-NaN elements. + + Default: ``0.0``. keepdims : {None, bool}, optional If ``True``, the reduced axes (dimensions) are included in the result - as singleton dimensions, so that the returned array remains - compatible with the input array according to Array Broadcasting - rules. Otherwise, if ``False``, the reduced axes are not included in - the returned array. Default: ``False``. + as singleton dimensions, so that the returned array remains compatible + with the input array according to Array Broadcasting rules. Otherwise, + if ``False``, the reduced axes are not included in the returned array. + + Default: ``False``. + mean : {dpnp.ndarray, usm_ndarray}, optional + Provide the mean to prevent its recalculation. The mean should have + a shape as if it was calculated with ``keepdims=True``. + The axis for the calculation of the mean should be the same as used in + the call to this `nanstd` function. + + Default: ``None``. Returns ------- out : dpnp.ndarray - An array containing the standard deviations. If the standard - deviation was computed over the entire array, a zero-dimensional - array is returned. If `ddof` is >= the number of non-NaN elements - in a slice or the slice contains only NaNs, then the result for - that slice is NaN. + An array containing the standard deviations. If the standard deviation + was computed over the entire array, a zero-dimensional array is + returned. If `ddof` is >= the number of non-NaN elements in a slice or + the slice contains only NaNs, then the result for that slice is NaN. Limitations ----------- @@ -1011,6 +1033,19 @@ def nanstd( Notes ----- + The standard deviation is the square root of the average of the squared + deviations from the mean: ``std = sqrt(mean(abs(x - x.mean())**2))``. + + The average squared deviation is normally calculated as ``x.sum() / N``, + where ``N = len(x)``. If, however, `ddof` is specified, the divisor + ``N - ddof`` is used instead. In standard statistical practice, ``ddof=1`` + provides an unbiased estimator of the variance of the infinite population. + ``ddof=0`` provides a maximum likelihood estimate of the variance for + normally distributed variables. + The standard deviation computed in this function is the square root of + the estimated variance, so even with ``ddof=1``, it will not be an unbiased + estimate of the standard deviation per se. + Note that, for complex numbers, the absolute value is taken before squaring, so that the result is always real and non-negative. @@ -1029,11 +1064,18 @@ def nanstd( >>> import dpnp as np >>> a = np.array([[1, np.nan], [3, 4]]) >>> np.nanstd(a) - array(1.247219128924647) + array(1.24721913) >>> np.nanstd(a, axis=0) - array([1., 0.]) + array([1., 0.]) >>> np.nanstd(a, axis=1) - array([0., 0.5]) # may vary + array([0. , 0.5]) # may vary + + Using the mean keyword to save computation time: + + >>> a = np.array([[14, 8, np.nan, 10], [7, 9, 10, 11], [np.nan, 15, 5, 10]]) + >>> mean = np.nanmean(a, axis=1, keepdims=True) + >>> np.nanstd(a, axis=1, mean=mean) + array([2.49443826, 1.47901995, 4.0824829 ]) """ @@ -1051,13 +1093,21 @@ def nanstd( ddof=ddof, keepdims=keepdims, where=where, + mean=mean, ) - dpnp.sqrt(res, out=res) - return res + return dpnp.sqrt(res, out=res) def nanvar( - a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, *, where=True + a, + axis=None, + dtype=None, + out=None, + ddof=0, + keepdims=False, + *, + where=True, + mean=None, ): """ Compute the variance along the specified axis, while ignoring NaNs. @@ -1069,39 +1119,52 @@ def nanvar( a : {dpnp.ndarray, usm_ndarray} Input array. axis : {None, int, tuple of ints}, optional - axis or axes along which the variances must be computed. If a tuple + Axis or axes along which the variances must be computed. If a tuple of unique integers is given, the variances are computed over multiple axes. If ``None``, the variance is computed over the entire array. + Default: ``None``. dtype : {None, dtype}, optional Type to use in computing the variance. By default, if `a` has a floating-point data type, the returned array will have - the same data type as `a`. - If `a` has a boolean or integral data type, the returned array - will have the default floating point data type for the device - where input array `a` is allocated. + the same data type as `a`. If `a` has a boolean or integral data type, + the returned array will have the default floating point data type for + the device where input array `a` is allocated. + + Default: ``None``. out : {None, dpnp.ndarray, usm_ndarray}, optional Alternative output array in which to place the result. It must have the same shape as the expected output but the type (of the calculated values) will be cast if necessary. + + Default: ``None``. ddof : {int, float}, optional - Means Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` represents the number of non-NaN elements. - Default: `0.0`. + Means Delta Degrees of Freedom. The divisor used in calculations is + ``N - ddof``, where ``N`` represents the number of non-NaN elements. + + Default: ``0.0``. keepdims : {None, bool}, optional If ``True``, the reduced axes (dimensions) are included in the result - as singleton dimensions, so that the returned array remains - compatible with the input array according to Array Broadcasting - rules. Otherwise, if ``False``, the reduced axes are not included in - the returned array. Default: ``False``. + as singleton dimensions, so that the returned array remains compatible + with the input array according to Array Broadcasting rules. Otherwise, + if ``False``, the reduced axes are not included in the returned array. + + Default: ``False``. + mean : {dpnp.ndarray, usm_ndarray}, optional + Provide the mean to prevent its recalculation. The mean should have + a shape as if it was calculated with ``keepdims=True``. + The axis for the calculation of the mean should be the same as used in + the call to this `nanvar` function. + + Default: ``None``. Returns ------- out : dpnp.ndarray - An array containing the variances. If the variance was computed - over the entire array, a zero-dimensional array is returned. - If `ddof` is >= the number of non-NaN elements in a slice or the - slice contains only NaNs, then the result for that slice is NaN. + An array containing the variances. If the variance was computed over + the entire array, a zero-dimensional array is returned. If `ddof` is >= + the number of non-NaN elements in a slice or the slice contains only + NaNs, then the result for that slice is NaN. Limitations ----------- @@ -1110,6 +1173,16 @@ def nanvar( Notes ----- + The variance is the average of the squared deviations from the mean, + that is ``var = mean(abs(x - x.mean())**2)``. + + The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``. + If, however, `ddof` is specified, the divisor ``N - ddof`` is used instead. + In standard statistical practice, ``ddof=1`` provides an unbiased estimator + of the variance of a hypothetical infinite population. ``ddof=0`` provides + a maximum likelihood estimate of the variance for normally distributed + variables. + Note that, for complex numbers, the absolute value is taken before squaring, so that the result is always real and non-negative. @@ -1127,11 +1200,18 @@ def nanvar( >>> import dpnp as np >>> a = np.array([[1, np.nan], [3, 4]]) >>> np.nanvar(a) - array(1.5555555555555554) + array(1.55555556) >>> np.nanvar(a, axis=0) - array([1., 0.]) + array([1., 0.]) >>> np.nanvar(a, axis=1) - array([0., 0.25]) # may vary + array([0. , 0.25]) # may vary + + Using the mean keyword to save computation time: + + >>> a = np.array([[14, 8, np.nan, 10], [7, 9, 10, 11], [np.nan, 15, 5, 10]]) + >>> mean = np.nanmean(a, axis=1, keepdims=True) + >>> np.nanvar(a, axis=1, mean=mean) + array([ 6.22222222, 2.1875 , 16.66666667]) """ @@ -1157,35 +1237,40 @@ def nanvar( dtype = dpnp.dtype(dtype) if not dpnp.issubdtype(dtype, dpnp.inexact): raise TypeError("If input is inexact, then dtype must be inexact.") + if out is not None: dpnp.check_supported_arrays_type(out) if not dpnp.issubdtype(out.dtype, dpnp.inexact): raise TypeError("If input is inexact, then out must be inexact.") # Compute mean - var_dtype = a.real.dtype if dtype is None else dtype cnt = dpnp.sum( - ~mask, axis=axis, dtype=var_dtype, keepdims=True, where=where + ~mask, axis=axis, dtype=dpnp.intp, keepdims=True, where=where ) - avg = dpnp.sum(arr, axis=axis, dtype=dtype, keepdims=True, where=where) - avg = dpnp.divide(avg, cnt, out=avg) - # Compute squared deviation from mean. + if mean is not None: + avg = mean + else: + avg = dpnp.sum(arr, axis=axis, dtype=dtype, keepdims=True, where=where) + avg = dpnp.divide(avg, cnt, out=avg) + + # Compute squared deviation from mean if arr.dtype == avg.dtype: arr = dpnp.subtract(arr, avg, out=arr) else: arr = dpnp.subtract(arr, avg) dpnp.copyto(arr, 0.0, where=mask) + if dpnp.issubdtype(arr.dtype, dpnp.complexfloating): sqr = dpnp.multiply(arr, arr.conj(), out=arr).real else: - sqr = dpnp.multiply(arr, arr, out=arr) + sqr = dpnp.square(arr, out=arr) # Compute variance var = dpnp.sum( sqr, axis=axis, - dtype=var_dtype, + dtype=dtype, out=out, keepdims=keepdims, where=where, @@ -1193,10 +1278,10 @@ def nanvar( if var.ndim < cnt.ndim: cnt = cnt.squeeze(axis) - cnt -= ddof - dpnp.divide(var, cnt, out=var) + dof = cnt - ddof + dpnp.divide(var, dof, out=var) - isbad = cnt <= 0 + isbad = dof <= 0 if dpnp.any(isbad): # NaN, inf, or negative numbers are all possible bad # values, so explicitly replace them with NaN. diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py index 49a87a99c202..857f576b1c42 100644 --- a/dpnp/dpnp_iface_statistics.py +++ b/dpnp/dpnp_iface_statistics.py @@ -38,6 +38,7 @@ """ import dpctl.tensor as dpt +import dpctl.tensor._tensor_elementwise_impl as ti import dpctl.utils as dpu import numpy from dpctl.tensor._numpy_helper import normalize_axis_index @@ -86,7 +87,8 @@ def _count_reduce_items(arr, axis, where=True): operation must be counted. If a tuple of unique integers is given, the items are counted over multiple axes. If ``None``, the variance is computed over the entire array. - Default: `None`. + + Default: ``None``. Returns ------- @@ -115,6 +117,42 @@ def _count_reduce_items(arr, axis, where=True): return items +def _divide_by_scalar(a, v): + """ + Divide input array by a scalar. + + The division is implemented through dedicated ``ti._divide_by_scalar`` + function which has a better performance comparing to standard ``divide`` + function, because there is no need to have internal call of ``asarray`` + for the denominator `v` and so it avoids allocating extra device memory. + + Parameters + ---------- + a : {dpnp.ndarray, usm_ndarray} + Input array. + v : scalar + The scalar denominator. + + Returns + ------- + out : dpnp.ndarray + An array containing the result of division. + + """ + + usm_a = dpnp.get_usm_ndarray(a) + queue = usm_a.sycl_queue + _manager = dpu.SequentialOrderManager[queue] + dep_evs = _manager.submitted_events + + # pylint: disable=protected-access + ht_ev, div_ev = ti._divide_by_scalar( + src=usm_a, scalar=v, dst=usm_a, sycl_queue=queue, depends=dep_evs + ) + _manager.add_event_pair(ht_ev, div_ev) + return a + + def amax(a, axis=None, out=None, keepdims=False, initial=None, where=True): """ Return the maximum of an array or maximum along an axis. @@ -1072,10 +1110,19 @@ def ptp( ) +# pylint: disable=redefined-outer-name def std( - a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, *, where=True + a, + axis=None, + dtype=None, + out=None, + ddof=0, + keepdims=False, + *, + where=True, + mean=None, ): - """ + r""" Compute the standard deviation along the specified axis. For full documentation refer to :obj:`numpy.std`. @@ -1089,36 +1136,49 @@ def std( If a tuple of unique integers is given, the standard deviations are computed over multiple axes. If ``None``, the standard deviation is computed over the entire array. + Default: ``None``. dtype : {None, dtype}, optional - Type to use in computing the standard deviation. By default, - if `a` has a floating-point data type, the returned array - will have the same data type as `a`. - If `a` has a boolean or integral data type, the returned array - will have the default floating point data type for the device + Type to use in computing the standard deviation. By default, if `a` has + a floating-point data type, the returned array will have the same data + type as `a`. If `a` has a boolean or integral data type, the returned + array will have the default floating point data type for the device where input array `a` is allocated. + + Default: ``None``. out : {None, dpnp.ndarray, usm_ndarray}, optional Alternative output array in which to place the result. It must have the same shape as the expected output but the type (of the calculated values) will be cast if necessary. + + Default: ``None``. ddof : {int, float}, optional - Means Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` corresponds to the total - number of elements over which the standard deviation is calculated. - Default: `0.0`. + Means Delta Degrees of Freedom. The divisor used in calculations is + ``N - ddof``, where ``N`` corresponds to the total number of elements + over which the standard deviation is calculated. + + Default: ``0.0``. keepdims : {None, bool}, optional If ``True``, the reduced axes (dimensions) are included in the result - as singleton dimensions, so that the returned array remains - compatible with the input array according to Array Broadcasting - rules. Otherwise, if ``False``, the reduced axes are not included in - the returned array. Default: ``False``. + as singleton dimensions, so that the returned array remains compatible + with the input array according to Array Broadcasting rules. Otherwise, + if ``False``, the reduced axes are not included in the returned array. + + Default: ``False``. + mean : {dpnp.ndarray, usm_ndarray}, optional + Provide the mean to prevent its recalculation. The mean should have + a shape as if it was calculated with ``keepdims=True``. + The axis for the calculation of the mean should be the same as used in + the call to this `std` function. + + Default: ``None``. Returns ------- out : dpnp.ndarray - An array containing the standard deviations. If the standard - deviation was computed over the entire array, a zero-dimensional - array is returned. + An array containing the standard deviations. If the standard deviation + was computed over the entire array, a zero-dimensional array is + returned. Limitations ----------- @@ -1127,6 +1187,45 @@ def std( Notes ----- + There are several common variants of the array standard deviation + calculation. Assuming the input `a` is a one-dimensional array and `mean` + is either provided as an argument or computed as ``a.mean()``, DPNP + computes the standard deviation of an array as:: + + N = len(a) + d2 = abs(a - mean)**2 # abs is for complex `a` + var = d2.sum() / (N - ddof) # note use of `ddof` + std = var**0.5 + + Different values of the argument `ddof` are useful in different contexts. + The default ``ddof=0`` corresponds with the expression: + + .. math:: + + \sqrt{\frac{\sum_i{|a_i - \bar{a}|^2 }}{N}} + + which is sometimes called the "population standard deviation" in the field + of statistics because it applies the definition of standard deviation to `a` + as if `a` were a complete population of possible observations. + + Many other libraries define the standard deviation of an array + differently, e.g.: + + .. math:: + + \sqrt{\frac{\sum_i{|a_i - \bar{a}|^2 }}{N - 1}} + + In statistics, the resulting quantity is sometimes called the "sample + standard deviation" because if `a` is a random sample from a larger + population, this calculation provides the square root of an unbiased + estimate of the variance of the population. The use of :math:`N-1` in the + denominator is often called "Bessel's correction" because it corrects for + bias (toward lower values) in the variance estimate introduced when the + sample mean of `a` is used in place of the true mean of the population. + The resulting estimate of the standard deviation is still biased, but less + than it would have been without the correction. For this quantity, use + ``ddof=1``. + Note that, for complex numbers, the absolute value is taken before squaring, so that the result is always real and non-negative. @@ -1147,11 +1246,18 @@ def std( >>> import dpnp as np >>> a = np.array([[1, 2], [3, 4]]) >>> np.std(a) - array(1.118033988749895) + array(1.11803399) >>> np.std(a, axis=0) - array([1., 1.]) + array([1., 1.]) >>> np.std(a, axis=1) - array([0.5, 0.5]) + array([0.5, 0.5]) + + Using the mean keyword to save computation time: + + >>> a = np.array([[14, 8, 11, 10], [7, 9, 10, 11], [10, 15, 5, 10]]) + >>> mean = np.mean(a, axis=1, keepdims=True) + >>> np.std(a, axis=1, mean=mean) + array([2.16506351, 1.47901995, 3.53553391]) """ @@ -1163,7 +1269,7 @@ def std( f"An integer or float is required, but got {type(ddof)}" ) - if dpnp.issubdtype(a.dtype, dpnp.complexfloating): + if dpnp.issubdtype(a.dtype, dpnp.complexfloating) or mean is not None: result = dpnp.var( a, axis=axis, @@ -1172,6 +1278,7 @@ def std( ddof=ddof, keepdims=keepdims, where=where, + mean=mean, ) dpnp.sqrt(result, out=result) else: @@ -1184,10 +1291,19 @@ def std( return result +# pylint: disable=redefined-outer-name def var( - a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, *, where=True + a, + axis=None, + dtype=None, + out=None, + ddof=0, + keepdims=False, + *, + where=True, + mean=None, ): - """ + r""" Compute the variance along the specified axis. For full documentation refer to :obj:`numpy.var`. @@ -1197,38 +1313,51 @@ def var( a : {dpnp.ndarray, usm_ndarray} Input array. axis : {None, int, tuple of ints}, optional - axis or axes along which the variances must be computed. If a tuple + Axis or axes along which the variances must be computed. If a tuple of unique integers is given, the variances are computed over multiple axes. If ``None``, the variance is computed over the entire array. + Default: ``None``. dtype : {None, dtype}, optional Type to use in computing the variance. By default, if `a` has a - floating-point data type, the returned array will have - the same data type as `a`. - If `a` has a boolean or integral data type, the returned array - will have the default floating point data type for the device + floating-point data type, the returned array will have the same data + type as `a`. If `a` has a boolean or integral data type, the returned + array will have the default floating point data type for the device where input array `a` is allocated. + + Default: ``None``. out : {None, dpnp.ndarray, usm_ndarray}, optional Alternative output array in which to place the result. It must have the same shape as the expected output but the type (of the calculated values) will be cast if necessary. + + Default: ``None``. ddof : {int, float}, optional - Means Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` corresponds to the total - number of elements over which the variance is calculated. - Default: `0.0`. + Means Delta Degrees of Freedom. The divisor used in calculations is + ``N - ddof``, where ``N`` corresponds to the total number of elements + over which the variance is calculated. + + Default: ``0.0``. keepdims : {None, bool}, optional If ``True``, the reduced axes (dimensions) are included in the result - as singleton dimensions, so that the returned array remains - compatible with the input array according to Array Broadcasting - rules. Otherwise, if ``False``, the reduced axes are not included in - the returned array. Default: ``False``. + as singleton dimensions, so that the returned array remains compatible + with the input array according to Array Broadcasting rules. Otherwise, + if ``False``, the reduced axes are not included in the returned array. + + Default: ``False``. + mean : {dpnp.ndarray, usm_ndarray}, optional + Provide the mean to prevent its recalculation. The mean should have + a shape as if it was calculated with ``keepdims=True``. + The axis for the calculation of the mean should be the same as used in + the call to this `var` function. + + Default: ``None``. Returns ------- out : dpnp.ndarray - An array containing the variances. If the variance was computed - over the entire array, a zero-dimensional array is returned. + An array containing the variances. If the variance was computed over + the entire array, a zero-dimensional array is returned. Limitations ----------- @@ -1237,6 +1366,40 @@ def var( Notes ----- + There are several common variants of the array variance calculation. + Assuming the input `a` is a one-dimensional array and `mean` is either + provided as an argument or computed as ``a.mean()``, DPNP computes the + variance of an array as:: + + N = len(a) + d2 = abs(a - mean)**2 # abs is for complex `a` + var = d2.sum() / (N - ddof) # note use of `ddof` + + Different values of the argument `ddof` are useful in different contexts. + The default ``ddof=0`` corresponds with the expression: + + .. math:: + + \frac{\sum_i{|a_i - \bar{a}|^2 }}{N} + + which is sometimes called the "population variance" in the field of + statistics because it applies the definition of variance to `a` as if `a` + were a complete population of possible observations. + + Many other libraries define the variance of an array differently, e.g.: + + .. math:: + + \frac{\sum_i{|a_i - \bar{a}|^2}}{N - 1} + + In statistics, the resulting quantity is sometimes called the "sample + variance" because if `a` is a random sample from a larger population, this + calculation provides an unbiased estimate of the variance of the population. + The use of :math:`N-1` in the denominator is often called "Bessel's + correction" because it corrects for bias (toward lower values) in the + variance estimate introduced when the sample mean of `a` is used in place + of the true mean of the population. For this quantity, use ``ddof=1``. + Note that, for complex numbers, the absolute value is taken before squaring, so that the result is always real and non-negative. @@ -1259,9 +1422,16 @@ def var( >>> np.var(a) array(1.25) >>> np.var(a, axis=0) - array([1., 1.]) + array([1., 1.]) >>> np.var(a, axis=1) - array([0.25, 0.25]) + array([0.25, 0.25]) + + Using the mean keyword to save computation time: + + >>> a = np.array([[14, 8, 11, 10], [7, 9, 10, 11], [10, 15, 5, 10]]) + >>> mean = np.mean(a, axis=1, keepdims=True) + >>> np.var(a, axis=1, mean=mean) + array([ 4.6875, 2.1875, 12.5 ]) """ @@ -1273,29 +1443,46 @@ def var( f"An integer or float is required, but got {type(ddof)}" ) - if dpnp.issubdtype(a.dtype, dpnp.complexfloating): - # Note that if dtype is not of inexact type then arrmean - # will not be either. - arrmean = dpnp.mean( - a, axis=axis, dtype=dtype, keepdims=True, where=where - ) + if dpnp.issubdtype(a.dtype, dpnp.complexfloating) or mean is not None: + # cast bool and integer types to default floating type + if dtype is None and not dpnp.issubdtype(a.dtype, dpnp.inexact): + dtype = dpnp.default_float_type(device=a.device) + + if mean is not None: + arrmean = mean + else: + # Compute the mean. + # Note that if dtype is not of inexact type + # then `arrmean` will not be either. + arrmean = dpnp.mean( + a, axis=axis, dtype=dtype, keepdims=True, where=where + ) + + # Compute sum of squared deviations from mean. + # Note that `x` may not be inexact. x = dpnp.subtract(a, arrmean) - x = dpnp.multiply(x, x.conj(), out=x).real + if dpnp.issubdtype(x.dtype, dpnp.complexfloating): + x = dpnp.multiply(x, x.conj(), out=x).real + else: + x = dpnp.square(x, out=x) + result = dpnp.sum( x, axis=axis, - dtype=a.real.dtype, + dtype=dtype, out=out, keepdims=keepdims, where=where, ) + # compute degrees of freedom and make sure it is not negative cnt = _count_reduce_items(a, axis, where) cnt = numpy.max(cnt - ddof, 0).astype(result.dtype, casting="same_kind") if not cnt: cnt = dpnp.nan - dpnp.divide(result, cnt, out=result) + # divide by degrees of freedom + result = _divide_by_scalar(result, cnt) else: usm_a = dpnp.get_usm_ndarray(a) usm_res = dpt.var(usm_a, axis=axis, correction=ddof, keepdims=keepdims) diff --git a/dpnp/tests/test_nanfunctions.py b/dpnp/tests/test_nanfunctions.py index 5645586219e4..92dda789b78b 100644 --- a/dpnp/tests/test_nanfunctions.py +++ b/dpnp/tests/test_nanfunctions.py @@ -15,11 +15,13 @@ from .helper import ( assert_dtype_allclose, + generate_random_numpy_array, get_all_dtypes, get_complex_dtypes, get_float_complex_dtypes, get_float_dtypes, has_support_aspect64, + numpy_version, ) from .third_party.cupy import testing @@ -604,7 +606,14 @@ def test_nanprod_Error(self): dpnp.nanprod(dpnp.asnumpy(ia)) -class TestNanStd: +@testing.parameterize( + *testing.product( + { + "func": ("nanstd", "nanvar"), + } + ) +) +class TestNanStdVar: @pytest.mark.parametrize( "array", [ @@ -646,27 +655,26 @@ class TestNanStd: @pytest.mark.parametrize( "dtype", get_all_dtypes(no_none=True, no_bool=True) ) - def test_nanstd(self, array, dtype): + def test_basic(self, array, dtype): try: a = numpy.array(array, dtype=dtype) except: pytest.skip("floating datat type is needed to store NaN") ia = dpnp.array(a) + for ddof in range(a.ndim): - expected = numpy.nanstd(a, ddof=ddof) - result = dpnp.nanstd(ia, ddof=ddof) + expected = getattr(numpy, self.func)(a, ddof=ddof) + result = getattr(dpnp, self.func)(ia, ddof=ddof) assert_dtype_allclose(result, expected) @pytest.mark.parametrize("dtype", get_complex_dtypes()) - def test_nanstd_complex(self, dtype): - x1 = numpy.random.rand(10) - x2 = numpy.random.rand(10) - a = numpy.array(x1 + 1j * x2, dtype=dtype) + def test_complex_dtype(self, dtype): + a = generate_random_numpy_array(10, dtype=dtype) a[::3] = numpy.nan ia = dpnp.array(a) - expected = numpy.nanstd(a) - result = dpnp.nanstd(ia) + expected = getattr(numpy, self.func)(a) + result = getattr(dpnp, self.func)(ia) assert_dtype_allclose(result, expected) @pytest.mark.usefixtures("suppress_dof_numpy_warnings") @@ -674,61 +682,94 @@ def test_nanstd_complex(self, dtype): @pytest.mark.parametrize("axis", [None, 0, 1, 2, (0, 1), (1, 2)]) @pytest.mark.parametrize("keepdims", [True, False]) @pytest.mark.parametrize("ddof", [0, 0.5, 1, 1.5, 2, 3]) - def test_nanstd_out(self, dtype, axis, keepdims, ddof): + def test_out_keyword(self, dtype, axis, keepdims, ddof): a = numpy.arange(4 * 3 * 5, dtype=dtype) a[::2] = numpy.nan a = a.reshape(4, 3, 5) ia = dpnp.array(a) - expected = numpy.nanstd(a, axis=axis, ddof=ddof, keepdims=keepdims) + expected = getattr(numpy, self.func)( + a, axis=axis, ddof=ddof, keepdims=keepdims + ) if has_support_aspect64(): res_dtype = expected.dtype else: res_dtype = dpnp.default_float_type(ia.device) out = dpnp.empty(expected.shape, dtype=res_dtype) - result = dpnp.nanstd( + result = getattr(dpnp, self.func)( ia, out=out, axis=axis, ddof=ddof, keepdims=keepdims ) assert result is out assert_dtype_allclose(result, expected) @pytest.mark.parametrize("dtype", get_float_complex_dtypes()) - def test_nanstd_strided(self, dtype): - dp_array = dpnp.arange(20, dtype=dtype) - dp_array[::3] = dpnp.nan - np_array = dpnp.asnumpy(dp_array) + def test_strided_array(self, dtype): + a = numpy.arange(20, dtype=dtype) + a[::3] = numpy.nan + ia = dpnp.array(a) - result = dpnp.nanstd(dp_array[::-1]) - expected = numpy.nanstd(np_array[::-1]) + result = getattr(dpnp, self.func)(ia[::-1]) + expected = getattr(numpy, self.func)(a[::-1]) assert_dtype_allclose(result, expected) - result = dpnp.nanstd(dp_array[::2]) - expected = numpy.nanstd(np_array[::2]) + result = getattr(dpnp, self.func)(ia[::2]) + expected = getattr(numpy, self.func)(a[::2]) assert_dtype_allclose(result, expected) @pytest.mark.usefixtures("suppress_complex_warning") @pytest.mark.parametrize("dt_in", get_float_complex_dtypes()) @pytest.mark.parametrize("dt_out", get_float_complex_dtypes()) - def test_nanstd_dtype(self, dt_in, dt_out): + def test_dtype_keyword(self, dt_in, dt_out): a = numpy.arange(4 * 3 * 5, dtype=dt_in) a[::2] = numpy.nan a = a.reshape(4, 3, 5) ia = dpnp.array(a) - expected = numpy.nanstd(a, dtype=dt_out) - result = dpnp.nanstd(ia, dtype=dt_out) + expected = getattr(numpy, self.func)(a, dtype=dt_out) + result = getattr(dpnp, self.func)(ia, dtype=dt_out) assert_dtype_allclose(result, expected) - def test_nanstd_error(self): + @pytest.mark.parametrize("dtype", get_float_complex_dtypes()) + @pytest.mark.parametrize("axis", [1, (0, 2), None]) + @pytest.mark.parametrize("keepdims", [True, False]) + def test_mean_keyword(self, dtype, axis, keepdims): + a = generate_random_numpy_array((10, 20, 5), dtype) + mask = numpy.random.choice([True, False], size=a.size, p=[0.3, 0.7]) + numpy.place(a, mask, numpy.nan) + ia = dpnp.array(a) + + mean = numpy.nanmean(a, axis=axis, keepdims=True) + imean = dpnp.nanmean(ia, axis=axis, keepdims=True) + + mean_kw = {"mean": mean} if numpy_version() >= "2.0.0" else {} + expected = getattr(numpy, self.func)( + a, axis=axis, keepdims=keepdims, **mean_kw + ) + result = getattr(dpnp, self.func)( + ia, axis=axis, keepdims=keepdims, mean=imean + ) + assert_dtype_allclose(result, expected) + + def test_error(self): ia = dpnp.arange(5, dtype=dpnp.float32) ia[0] = dpnp.nan + # where keyword is not implemented with pytest.raises(NotImplementedError): - dpnp.nanstd(ia, where=False) + getattr(dpnp, self.func)(ia, where=False) + + # dtype should be floating + with pytest.raises(TypeError): + getattr(dpnp, self.func)(ia, dtype=dpnp.int32) + + # out dtype should be inexact + res = dpnp.empty((1,), dtype=dpnp.int32) + with pytest.raises(TypeError): + getattr(dpnp, self.func)(ia, out=res) # ddof should be an integer or float with pytest.raises(TypeError): - dpnp.nanstd(ia, ddof="1") + getattr(dpnp, self.func)(ia, ddof="1") class TestNanSum: @@ -789,139 +830,3 @@ def test_nansum_strided(self, dtype): result = dpnp.nansum(dp_array[::2]) expected = numpy.nansum(np_array[::2]) assert_allclose(result, expected) - - -class TestNanVar: - @pytest.mark.parametrize( - "array", - [ - [2, 0, 6, 2], - [2, 0, 6, 2, 5, 6, 7, 8], - [], - [2, 1, numpy.nan, 5, 3], - [-1, numpy.nan, 1, numpy.inf], - [3, 6, 0, 1], - [3, 6, 0, 1, 8], - [3, 2, 9, 6, numpy.nan], - [numpy.nan, numpy.nan, numpy.inf, numpy.nan], - [[2, 0], [6, 2]], - [[2, 0, 6, 2], [5, 6, 7, 8]], - [[[2, 0], [6, 2]], [[5, 6], [7, 8]]], - [[-1, numpy.nan], [1, numpy.inf]], - [[numpy.nan, numpy.nan], [numpy.inf, numpy.nan]], - ], - ids=[ - "[2, 0, 6, 2]", - "[2, 0, 6, 2, 5, 6, 7, 8]", - "[]", - "[2, 1, np.nan, 5, 3]", - "[-1, np.nan, 1, np.inf]", - "[3, 6, 0, 1]", - "[3, 6, 0, 1, 8]", - "[3, 2, 9, 6, np.nan]", - "[np.nan, np.nan, np.inf, np.nan]", - "[[2, 0], [6, 2]]", - "[[2, 0, 6, 2], [5, 6, 7, 8]]", - "[[[2, 0], [6, 2]], [[5, 6], [7, 8]]]", - "[[-1, np.nan], [1, np.inf]]", - "[[np.nan, np.nan], [np.inf, np.nan]]", - ], - ) - @pytest.mark.usefixtures( - "suppress_invalid_numpy_warnings", "suppress_dof_numpy_warnings" - ) - @pytest.mark.parametrize( - "dtype", get_all_dtypes(no_none=True, no_bool=True) - ) - def test_nanvar(self, array, dtype): - try: - a = numpy.array(array, dtype=dtype) - except: - pytest.skip("floating datat type is needed to store NaN") - ia = dpnp.array(a) - for ddof in range(a.ndim): - expected = numpy.nanvar(a, ddof=ddof) - result = dpnp.nanvar(ia, ddof=ddof) - assert_dtype_allclose(result, expected) - - @pytest.mark.parametrize("dtype", get_complex_dtypes()) - def test_nanvar_complex(self, dtype): - x1 = numpy.random.rand(10) - x2 = numpy.random.rand(10) - a = numpy.array(x1 + 1j * x2, dtype=dtype) - a[::3] = numpy.nan - ia = dpnp.array(a) - - expected = numpy.nanvar(a) - result = dpnp.nanvar(ia) - assert_dtype_allclose(result, expected) - - @pytest.mark.usefixtures("suppress_dof_numpy_warnings") - @pytest.mark.parametrize("dtype", get_float_complex_dtypes()) - @pytest.mark.parametrize("axis", [None, 0, 1, 2, (0, 1), (1, 2)]) - @pytest.mark.parametrize("keepdims", [True, False]) - @pytest.mark.parametrize("ddof", [0, 0.5, 1, 1.5, 2, 3]) - def test_nanvar_out(self, dtype, axis, keepdims, ddof): - a = numpy.arange(4 * 3 * 5, dtype=dtype) - a[::2] = numpy.nan - a = a.reshape(4, 3, 5) - ia = dpnp.array(a) - - expected = numpy.nanvar(a, axis=axis, ddof=ddof, keepdims=keepdims) - if has_support_aspect64(): - res_dtype = expected.dtype - else: - res_dtype = dpnp.default_float_type(ia.device) - out = dpnp.empty(expected.shape, dtype=res_dtype) - result = dpnp.nanvar( - ia, out=out, axis=axis, ddof=ddof, keepdims=keepdims - ) - assert result is out - assert_dtype_allclose(result, expected) - - @pytest.mark.parametrize("dtype", get_float_complex_dtypes()) - def test_nanvar_strided(self, dtype): - dp_array = dpnp.arange(20, dtype=dtype) - dp_array[::3] = dpnp.nan - np_array = dpnp.asnumpy(dp_array) - - result = dpnp.nanvar(dp_array[::-1]) - expected = numpy.nanvar(np_array[::-1]) - assert_dtype_allclose(result, expected) - - result = dpnp.nanvar(dp_array[::2]) - expected = numpy.nanvar(np_array[::2]) - assert_dtype_allclose(result, expected) - - @pytest.mark.usefixtures("suppress_complex_warning") - @pytest.mark.parametrize("dt_in", get_float_complex_dtypes()) - @pytest.mark.parametrize("dt_out", get_float_complex_dtypes()) - def test_nanvar_dtype(self, dt_in, dt_out): - a = numpy.arange(4 * 3 * 5, dtype=dt_in) - a[::2] = numpy.nan - a = a.reshape(4, 3, 5) - ia = dpnp.array(a) - - expected = numpy.nanvar(a, dtype=dt_out) - result = dpnp.nanvar(ia, dtype=dt_out) - assert_dtype_allclose(result, expected) - - def test_nanvar_error(self): - ia = dpnp.arange(5, dtype=dpnp.float32) - ia[0] = dpnp.nan - # where keyword is not implemented - with pytest.raises(NotImplementedError): - dpnp.nanvar(ia, where=False) - - # dtype should be floating - with pytest.raises(TypeError): - dpnp.nanvar(ia, dtype=dpnp.int32) - - # out dtype should be inexact - res = dpnp.empty((1,), dtype=dpnp.int32) - with pytest.raises(TypeError): - dpnp.nanvar(ia, out=res) - - # ddof should be an integer or float - with pytest.raises(TypeError): - dpnp.nanvar(ia, ddof="1") diff --git a/dpnp/tests/test_statistics.py b/dpnp/tests/test_statistics.py index 58c2bda24030..23317236367d 100644 --- a/dpnp/tests/test_statistics.py +++ b/dpnp/tests/test_statistics.py @@ -13,10 +13,11 @@ assert_dtype_allclose, generate_random_numpy_array, get_all_dtypes, - get_complex_dtypes, get_float_complex_dtypes, has_support_aspect64, + numpy_version, ) +from .third_party.cupy import testing from .third_party.cupy.testing import with_requires @@ -123,6 +124,168 @@ def test_avg_error(self): dpnp.average(a, axis=0, weights=w) +class TestCorrcoef: + @pytest.mark.usefixtures( + "suppress_divide_invalid_numpy_warnings", + "suppress_dof_numpy_warnings", + ) + @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) + @pytest.mark.parametrize("rowvar", [True, False]) + def test_corrcoef(self, dtype, rowvar): + ia = dpnp.array([[0, 1, 2], [3, 4, 0]], dtype=dtype) + a = dpnp.asnumpy(ia) + + expected = numpy.corrcoef(a, rowvar=rowvar) + result = dpnp.corrcoef(ia, rowvar=rowvar) + + assert_dtype_allclose(result, expected) + + @pytest.mark.usefixtures( + "suppress_divide_invalid_numpy_warnings", + "suppress_dof_numpy_warnings", + "suppress_mean_empty_slice_numpy_warnings", + ) + @pytest.mark.parametrize("shape", [(2, 0), (0, 2)]) + def test_corrcoef_empty(self, shape): + ia = dpnp.empty(shape, dtype=dpnp.int64) + a = dpnp.asnumpy(ia) + + result = dpnp.corrcoef(ia) + expected = numpy.corrcoef(a) + assert_dtype_allclose(result, expected) + + @pytest.mark.usefixtures("suppress_complex_warning") + @pytest.mark.parametrize("dt_in", get_all_dtypes(no_bool=True)) + @pytest.mark.parametrize("dt_out", get_float_complex_dtypes()) + def test_corrcoef_dtype(self, dt_in, dt_out): + ia = dpnp.array([[0, 1, 2], [3, 4, 0]], dtype=dt_in) + a = dpnp.asnumpy(ia) + + expected = numpy.corrcoef(a, dtype=dt_out) + result = dpnp.corrcoef(ia, dtype=dt_out) + assert expected.dtype == result.dtype + assert_allclose(result, expected, rtol=1e-6) + + @pytest.mark.usefixtures( + "suppress_divide_invalid_numpy_warnings", + "suppress_dof_numpy_warnings", + ) + def test_corrcoef_scalar(self): + ia = dpnp.array(5) + a = dpnp.asnumpy(ia) + + result = dpnp.corrcoef(ia) + expected = numpy.corrcoef(a) + assert_dtype_allclose(result, expected) + + +class TestCorrelate: + @pytest.mark.parametrize( + "a, v", [([1], [1, 2, 3]), ([1, 2, 3], [1]), ([1, 2, 3], [1, 2])] + ) + @pytest.mark.parametrize("mode", [None, "full", "valid", "same"]) + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + def test_correlate(self, a, v, mode, dtype): + an = numpy.array(a, dtype=dtype) + vn = numpy.array(v, dtype=dtype) + ad = dpnp.array(an) + vd = dpnp.array(vn) + + if mode is None: + expected = numpy.correlate(an, vn) + result = dpnp.correlate(ad, vd) + else: + expected = numpy.correlate(an, vn, mode=mode) + result = dpnp.correlate(ad, vd, mode=mode) + + assert_dtype_allclose(result, expected) + + def test_correlate_mode_error(self): + a = dpnp.arange(5) + v = dpnp.arange(3) + + # invalid mode + with pytest.raises(ValueError): + dpnp.correlate(a, v, mode="unknown") + + @pytest.mark.parametrize("a, v", [([], [1]), ([1], []), ([], [])]) + def test_correlate_empty(self, a, v): + a = dpnp.asarray(a) + v = dpnp.asarray(v) + + with pytest.raises(ValueError): + dpnp.correlate(a, v) + + @pytest.mark.parametrize( + "a, v", + [ + ([[1, 2], [2, 3]], [1]), + ([1], [[1, 2], [2, 3]]), + ([[1, 2], [2, 3]], [[1, 2], [2, 3]]), + ], + ) + def test_correlate_shape_error(self, a, v): + a = dpnp.asarray(a) + v = dpnp.asarray(v) + + with pytest.raises(ValueError): + dpnp.correlate(a, v) + + @pytest.mark.parametrize("size", [2, 10**1, 10**2, 10**3, 10**4, 10**5]) + def test_correlate_different_sizes(self, size): + a = numpy.random.rand(size).astype(numpy.float32) + v = numpy.random.rand(size // 2).astype(numpy.float32) + + ad = dpnp.array(a) + vd = dpnp.array(v) + + expected = numpy.correlate(a, v) + result = dpnp.correlate(ad, vd) + + assert_dtype_allclose(result, expected, factor=20) + + def test_correlate_another_sycl_queue(self): + a = dpnp.arange(5, sycl_queue=dpctl.SyclQueue()) + v = dpnp.arange(3, sycl_queue=dpctl.SyclQueue()) + + with pytest.raises(ValueError): + dpnp.correlate(a, v) + + +class TestCov: + @pytest.mark.parametrize( + "dtype", get_all_dtypes(no_bool=True, no_none=True, no_complex=True) + ) + def test_false_rowvar_dtype(self, dtype): + a = numpy.array([[0, 2], [1, 1], [2, 0]], dtype=dtype) + ia = dpnp.array(a) + + assert_allclose(dpnp.cov(ia.T), dpnp.cov(ia, rowvar=False)) + assert_allclose(dpnp.cov(ia, rowvar=False), numpy.cov(a, rowvar=False)) + + # numpy 2.2 properly transposes 2d array when rowvar=False + @with_requires("numpy>=2.2") + @pytest.mark.filterwarnings("ignore::RuntimeWarning") + def test_false_rowvar_1x3(self): + a = numpy.array([[0, 1, 2]]) + ia = dpnp.array(a) + + expected = numpy.cov(a, rowvar=False) + result = dpnp.cov(ia, rowvar=False) + assert_allclose(expected, result) + + # numpy 2.2 properly transposes 2d array when rowvar=False + @with_requires("numpy>=2.2") + @pytest.mark.usefixtures("allow_fall_back_on_numpy") + def test_true_rowvar(self): + a = numpy.ones((3, 1)) + ia = dpnp.array(a) + + expected = numpy.cov(a, ddof=0, rowvar=True) + result = dpnp.cov(ia, ddof=0, rowvar=True) + assert_allclose(expected, result) + + class TestMaxMin: @pytest.mark.parametrize("func", ["max", "min"]) @pytest.mark.parametrize("axis", [None, 0, 1, -1, 2, -2, (1, 2), (0, -2)]) @@ -360,91 +523,69 @@ def test_usm_ndarray(self, axis, overwrite_input): assert_dtype_allclose(result, expected) -class TestVar: - @pytest.mark.usefixtures( - "suppress_divide_invalid_numpy_warnings", "suppress_dof_numpy_warnings" +class TestPtp: + @pytest.mark.parametrize("axis", [None, 0, 1]) + @pytest.mark.parametrize( + "v", + [ + [[0, 0], [0, 0]], + [[1, 2], [1, 2]], + [[1, 2], [3, 4]], + [[0, 1, 2], [3, 4, 5], [6, 7, 8]], + [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], + ], + ids=[ + "[[0, 0], [0, 0]]", + "[[1, 2], [1, 2]]", + "[[1, 2], [3, 4]]", + "[[0, 1, 2], [3, 4, 5], [6, 7, 8]]", + "[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]", + ], ) - @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) - @pytest.mark.parametrize("axis", [None, 0, 1, (0, 1)]) - @pytest.mark.parametrize("keepdims", [True, False]) - @pytest.mark.parametrize("ddof", [0, 0.5, 1, 1.5, 2]) - def test_var(self, dtype, axis, keepdims, ddof): - a = generate_random_numpy_array((2, 3), dtype) + def test_basic(self, v, axis): + a = numpy.array(v) ia = dpnp.array(a) - expected = numpy.var(a, axis=axis, keepdims=keepdims, ddof=ddof) - result = dpnp.var(ia, axis=axis, keepdims=keepdims, ddof=ddof) - - if axis == 0 and ddof == 2: - assert dpnp.all(dpnp.isnan(result)) - else: - assert_dtype_allclose(result, expected) + expected = numpy.ptp(a, axis) + result = dpnp.ptp(ia, axis) + assert_array_equal(result, expected) - @pytest.mark.usefixtures( - "suppress_divide_invalid_numpy_warnings", "suppress_dof_numpy_warnings" - ) - @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) @pytest.mark.parametrize("axis", [None, 0, 1]) - @pytest.mark.parametrize("ddof", [0, 1]) - def test_var_out(self, dtype, axis, ddof): - a = generate_random_numpy_array((2, 3), dtype) - ia = dpnp.array(a) - - expected = numpy.var(a, axis=axis, ddof=ddof) - if has_support_aspect64(): - res_dtype = expected.dtype - else: - res_dtype = dpnp.default_float_type(ia.device) - out = dpnp.empty(expected.shape, dtype=res_dtype) - result = dpnp.var(ia, axis=axis, out=out, ddof=ddof) - assert result is out - assert_dtype_allclose(result, expected) - - @pytest.mark.usefixtures( - "suppress_invalid_numpy_warnings", "suppress_dof_numpy_warnings" + @pytest.mark.parametrize( + "v", + [ + [[0, 0], [0, 0]], + [[1, 2], [1, 2]], + [[1, 2], [3, 4]], + [[0, 1, 2], [3, 4, 5], [6, 7, 8]], + [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], + ], + ids=[ + "[[0, 0], [0, 0]]", + "[[1, 2], [1, 2]]", + "[[1, 2], [3, 4]]", + "[[0, 1, 2], [3, 4, 5], [6, 7, 8]]", + "[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]", + ], ) - @pytest.mark.parametrize("axis", [0, 1, (0, 1)]) - @pytest.mark.parametrize("shape", [(2, 3), (2, 0), (0, 3)]) - def test_var_empty(self, axis, shape): - ia = dpnp.empty(shape, dtype=dpnp.int64) - a = dpnp.asnumpy(ia) - - result = dpnp.var(ia, axis=axis) - expected = numpy.var(a, axis=axis) - assert_dtype_allclose(result, expected) - - @pytest.mark.usefixtures("suppress_complex_warning") - @pytest.mark.parametrize("dt_in", get_all_dtypes(no_bool=True)) - @pytest.mark.parametrize("dt_out", get_float_complex_dtypes()) - def test_var_dtype(self, dt_in, dt_out): - ia = dpnp.array([[0, 1, 2], [3, 4, 0]], dtype=dt_in) - a = dpnp.asnumpy(ia) - - expected = numpy.var(a, dtype=dt_out) - result = dpnp.var(ia, dtype=dt_out) - assert expected.dtype == result.dtype - assert_allclose(result, expected, rtol=1e-06) - - def test_var_scalar(self): - ia = dpnp.array(5) - a = dpnp.asnumpy(ia) - - result = ia.var() - expected = a.var() - assert_allclose(result, expected) - - def test_var_error(self): - ia = dpnp.arange(5) - # where keyword is not implemented - with pytest.raises(NotImplementedError): - dpnp.var(ia, where=False) + def test_out(self, v, axis): + a = numpy.array(v) + ia = dpnp.array(a) - # ddof should be an integer or float - with pytest.raises(TypeError): - dpnp.var(ia, ddof="1") + expected = numpy.ptp(a, axis) + result = dpnp.array(numpy.empty_like(expected)) + dpnp.ptp(ia, axis, out=result) + assert_array_equal(result, expected) -class TestStd: +@testing.parameterize( + *testing.product( + { + "func": ("std", "var"), + } + ) +) +class TestStdVar: @pytest.mark.usefixtures( "suppress_divide_invalid_numpy_warnings", "suppress_dof_numpy_warnings" ) @@ -452,12 +593,16 @@ class TestStd: @pytest.mark.parametrize("axis", [0, 1, (0, 1)]) @pytest.mark.parametrize("keepdims", [True, False]) @pytest.mark.parametrize("ddof", [0, 0.5, 1, 1.5, 2]) - def test_std(self, dtype, axis, keepdims, ddof): + def test_basic(self, dtype, axis, keepdims, ddof): a = generate_random_numpy_array((2, 3), dtype) ia = dpnp.array(a) - expected = numpy.std(a, axis=axis, keepdims=keepdims, ddof=ddof) - result = dpnp.std(ia, axis=axis, keepdims=keepdims, ddof=ddof) + expected = getattr(numpy, self.func)( + a, axis=axis, keepdims=keepdims, ddof=ddof + ) + result = getattr(dpnp, self.func)( + ia, axis=axis, keepdims=keepdims, ddof=ddof + ) if axis == 0 and ddof == 2: assert dpnp.all(dpnp.isnan(result)) else: @@ -469,18 +614,19 @@ def test_std(self, dtype, axis, keepdims, ddof): @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize("ddof", [0, 1]) - def test_std_out(self, dtype, axis, ddof): + def test_out_keyword(self, dtype, axis, ddof): a = generate_random_numpy_array((2, 3), dtype) ia = dpnp.array(a) - expected = numpy.std(a, axis=axis, ddof=ddof) + expected = getattr(numpy, self.func)(a, axis=axis, ddof=ddof) if has_support_aspect64(): res_dtype = expected.dtype else: res_dtype = dpnp.default_float_type(ia.device) + out = dpnp.empty(expected.shape, dtype=res_dtype) - result = dpnp.std(ia, axis=axis, out=out, ddof=ddof) - assert out is result + result = getattr(dpnp, self.func)(ia, axis=axis, out=out, ddof=ddof) + assert result is out assert_dtype_allclose(result, expected) @pytest.mark.usefixtures( @@ -488,255 +634,59 @@ def test_std_out(self, dtype, axis, ddof): ) @pytest.mark.parametrize("axis", [None, 0, 1, (0, 1)]) @pytest.mark.parametrize("shape", [(2, 3), (2, 0), (0, 3)]) - def test_std_empty(self, axis, shape): + def test_empty_array(self, axis, shape): ia = dpnp.empty(shape, dtype=dpnp.int64) a = dpnp.asnumpy(ia) - result = dpnp.std(ia, axis=axis) - expected = numpy.std(a, axis=axis) + expected = getattr(numpy, self.func)(a, axis=axis) + result = getattr(dpnp, self.func)(ia, axis=axis) assert_dtype_allclose(result, expected) @pytest.mark.usefixtures("suppress_complex_warning") @pytest.mark.parametrize("dt_in", get_all_dtypes(no_bool=True)) @pytest.mark.parametrize("dt_out", get_float_complex_dtypes()) - def test_std_dtype(self, dt_in, dt_out): + def test_dtype_keyword(self, dt_in, dt_out): ia = dpnp.array([[0, 1, 2], [3, 4, 0]], dtype=dt_in) a = dpnp.asnumpy(ia) - expected = numpy.std(a, dtype=dt_out) - result = dpnp.std(ia, dtype=dt_out) + expected = getattr(numpy, self.func)(a, dtype=dt_out) + result = getattr(dpnp, self.func)(ia, dtype=dt_out) assert expected.dtype == result.dtype assert_allclose(result, expected, rtol=1e-6) - def test_std_scalar(self): - ia = dpnp.array(5) - a = dpnp.asnumpy(ia) - - result = ia.std() - expected = a.std() - assert_dtype_allclose(result, expected) - - def test_std_error(self): - ia = dpnp.arange(5) - # where keyword is not implemented - with pytest.raises(NotImplementedError): - dpnp.std(ia, where=False) - - # ddof should be an integer or float - with pytest.raises(TypeError): - dpnp.std(ia, ddof="1") - - -class TestCorrcoef: - @pytest.mark.usefixtures( - "suppress_divide_invalid_numpy_warnings", - "suppress_dof_numpy_warnings", - ) @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) - @pytest.mark.parametrize("rowvar", [True, False]) - def test_corrcoef(self, dtype, rowvar): - ia = dpnp.array([[0, 1, 2], [3, 4, 0]], dtype=dtype) - a = dpnp.asnumpy(ia) - - expected = numpy.corrcoef(a, rowvar=rowvar) - result = dpnp.corrcoef(ia, rowvar=rowvar) - - assert_dtype_allclose(result, expected) + @pytest.mark.parametrize("axis", [1, (0, 2), None]) + @pytest.mark.parametrize("keepdims", [True, False]) + def test_mean_keyword(self, dtype, axis, keepdims): + a = generate_random_numpy_array((10, 20, 5), dtype) + ia = dpnp.array(a) - @pytest.mark.usefixtures( - "suppress_divide_invalid_numpy_warnings", - "suppress_dof_numpy_warnings", - "suppress_mean_empty_slice_numpy_warnings", - ) - @pytest.mark.parametrize("shape", [(2, 0), (0, 2)]) - def test_corrcoef_empty(self, shape): - ia = dpnp.empty(shape, dtype=dpnp.int64) - a = dpnp.asnumpy(ia) + mean = numpy.mean(a, axis=axis, keepdims=True) + imean = dpnp.mean(ia, axis=axis, keepdims=True) - result = dpnp.corrcoef(ia) - expected = numpy.corrcoef(a) + mean_kw = {"mean": mean} if numpy_version() >= "2.0.0" else {} + expected = getattr(a, self.func)( + axis=axis, keepdims=keepdims, **mean_kw + ) + result = getattr(ia, self.func)( + axis=axis, keepdims=keepdims, mean=imean + ) assert_dtype_allclose(result, expected) - @pytest.mark.usefixtures("suppress_complex_warning") - @pytest.mark.parametrize("dt_in", get_all_dtypes(no_bool=True)) - @pytest.mark.parametrize("dt_out", get_float_complex_dtypes()) - def test_corrcoef_dtype(self, dt_in, dt_out): - ia = dpnp.array([[0, 1, 2], [3, 4, 0]], dtype=dt_in) - a = dpnp.asnumpy(ia) - - expected = numpy.corrcoef(a, dtype=dt_out) - result = dpnp.corrcoef(ia, dtype=dt_out) - assert expected.dtype == result.dtype - assert_allclose(result, expected, rtol=1e-6) - - @pytest.mark.usefixtures( - "suppress_divide_invalid_numpy_warnings", - "suppress_dof_numpy_warnings", - ) - def test_corrcoef_scalar(self): + def test_scalar(self): ia = dpnp.array(5) a = dpnp.asnumpy(ia) - result = dpnp.corrcoef(ia) - expected = numpy.corrcoef(a) + expected = getattr(a, self.func)() + result = getattr(ia, self.func)() assert_dtype_allclose(result, expected) + def test_error(self): + ia = dpnp.arange(5) + # where keyword is not implemented + with pytest.raises(NotImplementedError): + getattr(dpnp, self.func)(ia, where=False) -class TestCorrelate: - @pytest.mark.parametrize( - "a, v", [([1], [1, 2, 3]), ([1, 2, 3], [1]), ([1, 2, 3], [1, 2])] - ) - @pytest.mark.parametrize("mode", [None, "full", "valid", "same"]) - @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) - def test_correlate(self, a, v, mode, dtype): - an = numpy.array(a, dtype=dtype) - vn = numpy.array(v, dtype=dtype) - ad = dpnp.array(an) - vd = dpnp.array(vn) - - if mode is None: - expected = numpy.correlate(an, vn) - result = dpnp.correlate(ad, vd) - else: - expected = numpy.correlate(an, vn, mode=mode) - result = dpnp.correlate(ad, vd, mode=mode) - - assert_dtype_allclose(result, expected) - - def test_correlate_mode_error(self): - a = dpnp.arange(5) - v = dpnp.arange(3) - - # invalid mode - with pytest.raises(ValueError): - dpnp.correlate(a, v, mode="unknown") - - @pytest.mark.parametrize("a, v", [([], [1]), ([1], []), ([], [])]) - def test_correlate_empty(self, a, v): - a = dpnp.asarray(a) - v = dpnp.asarray(v) - - with pytest.raises(ValueError): - dpnp.correlate(a, v) - - @pytest.mark.parametrize( - "a, v", - [ - ([[1, 2], [2, 3]], [1]), - ([1], [[1, 2], [2, 3]]), - ([[1, 2], [2, 3]], [[1, 2], [2, 3]]), - ], - ) - def test_correlate_shape_error(self, a, v): - a = dpnp.asarray(a) - v = dpnp.asarray(v) - - with pytest.raises(ValueError): - dpnp.correlate(a, v) - - @pytest.mark.parametrize("size", [2, 10**1, 10**2, 10**3, 10**4, 10**5]) - def test_correlate_different_sizes(self, size): - a = numpy.random.rand(size).astype(numpy.float32) - v = numpy.random.rand(size // 2).astype(numpy.float32) - - ad = dpnp.array(a) - vd = dpnp.array(v) - - expected = numpy.correlate(a, v) - result = dpnp.correlate(ad, vd) - - assert_dtype_allclose(result, expected, factor=20) - - def test_correlate_another_sycl_queue(self): - a = dpnp.arange(5, sycl_queue=dpctl.SyclQueue()) - v = dpnp.arange(3, sycl_queue=dpctl.SyclQueue()) - - with pytest.raises(ValueError): - dpnp.correlate(a, v) - - -class TestCov: - @pytest.mark.parametrize( - "dtype", get_all_dtypes(no_bool=True, no_none=True, no_complex=True) - ) - def test_false_rowvar_dtype(self, dtype): - a = numpy.array([[0, 2], [1, 1], [2, 0]], dtype=dtype) - ia = dpnp.array(a) - - assert_allclose(dpnp.cov(ia.T), dpnp.cov(ia, rowvar=False)) - assert_allclose(dpnp.cov(ia, rowvar=False), numpy.cov(a, rowvar=False)) - - # numpy 2.2 properly transposes 2d array when rowvar=False - @with_requires("numpy>=2.2") - @pytest.mark.filterwarnings("ignore::RuntimeWarning") - def test_false_rowvar_1x3(self): - a = numpy.array([[0, 1, 2]]) - ia = dpnp.array(a) - - expected = numpy.cov(a, rowvar=False) - result = dpnp.cov(ia, rowvar=False) - assert_allclose(expected, result) - - # numpy 2.2 properly transposes 2d array when rowvar=False - @with_requires("numpy>=2.2") - @pytest.mark.usefixtures("allow_fall_back_on_numpy") - def test_true_rowvar(self): - a = numpy.ones((3, 1)) - ia = dpnp.array(a) - - expected = numpy.cov(a, ddof=0, rowvar=True) - result = dpnp.cov(ia, ddof=0, rowvar=True) - assert_allclose(expected, result) - - -@pytest.mark.parametrize("axis", [None, 0, 1]) -@pytest.mark.parametrize( - "v", - [ - [[0, 0], [0, 0]], - [[1, 2], [1, 2]], - [[1, 2], [3, 4]], - [[0, 1, 2], [3, 4, 5], [6, 7, 8]], - [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], - ], - ids=[ - "[[0, 0], [0, 0]]", - "[[1, 2], [1, 2]]", - "[[1, 2], [3, 4]]", - "[[0, 1, 2], [3, 4, 5], [6, 7, 8]]", - "[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]", - ], -) -def test_ptp(v, axis): - a = numpy.array(v) - ia = dpnp.array(a) - expected = numpy.ptp(a, axis) - result = dpnp.ptp(ia, axis) - assert_array_equal(result, expected) - - -@pytest.mark.parametrize("axis", [None, 0, 1]) -@pytest.mark.parametrize( - "v", - [ - [[0, 0], [0, 0]], - [[1, 2], [1, 2]], - [[1, 2], [3, 4]], - [[0, 1, 2], [3, 4, 5], [6, 7, 8]], - [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], - ], - ids=[ - "[[0, 0], [0, 0]]", - "[[1, 2], [1, 2]]", - "[[1, 2], [3, 4]]", - "[[0, 1, 2], [3, 4, 5], [6, 7, 8]]", - "[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]", - ], -) -def test_ptp_out(v, axis): - a = numpy.array(v) - ia = dpnp.array(a) - expected = numpy.ptp(a, axis) - result = dpnp.array(numpy.empty_like(expected)) - dpnp.ptp(ia, axis, out=result) - assert_array_equal(result, expected) + # ddof should be an integer or float + with pytest.raises(TypeError): + getattr(dpnp, self.func)(ia, ddof="1")