Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

impelment dpnp.std, dpnp.var and dpnp.nanvar #1635

Merged
merged 15 commits into from
Dec 16, 2023
109 changes: 58 additions & 51 deletions dpnp/dpnp_iface_nanfunctions.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def nanvar(
Alternative output array in which to place the result. It must have
the same shape as the expected output but the type (of the calculated
values) will be cast if necessary.
ddof : int, optional
ddof : {int, float}, optional
Means Delta Degrees of Freedom. The divisor used in calculations
is ``N - ddof``, where ``N`` corresponds to the total
number of elements over which the variance is calculated.
Expand Down Expand Up @@ -319,6 +319,7 @@ def nanvar(
See Also
--------
:obj:`dpnp.var` : Compute the variance along the specified axis.
:obj:`dpnp.std` : Compute the standard deviation along the specified axis.
:obj:`dpnp.nanmean` : Compute the arithmetic mean along the specified axis,
vtavana marked this conversation as resolved.
Show resolved Hide resolved
ignoring NaNs.
:obj:`dpnp.nanstd` : Compute the standard deviation along
Expand All @@ -341,62 +342,68 @@ def nanvar(
raise NotImplementedError(
"where keyword argument is only supported with its default value."
)
elif not isinstance(ddof, (int, float)):
raise TypeError(
"An integer or float is required, but got {}".format(type(ddof))
)
else:
arr, mask = _replace_nan(a, 0)
if mask is None:
return dpnp.var(
arr,
axis=axis,
dtype=dtype,
out=out,
ddof=ddof,
keepdims=keepdims,
where=where,
)

if dtype is not None:
dtype = dpnp.dtype(dtype)
if not issubclass(dtype.type, dpnp.inexact):
raise TypeError(
"If input is inexact, then dtype must be inexact."
)
if out is not None and not issubclass(out.dtype.type, dpnp.inexact):
vtavana marked this conversation as resolved.
Show resolved Hide resolved
raise TypeError("If input is inexact, then out must be inexact.")

# Compute mean
cnt = dpnp.sum(
~mask, axis=axis, dtype=dpnp.intp, keepdims=True, where=where
)
avg = dpnp.sum(arr, axis=axis, dtype=dtype, keepdims=True, where=where)
vtavana marked this conversation as resolved.
Show resolved Hide resolved
avg = dpnp.divide(avg, cnt)
vtavana marked this conversation as resolved.
Show resolved Hide resolved

# Compute squared deviation from mean.
arr = dpnp.subtract(arr, avg)
dpnp.copyto(arr, 0.0, where=mask, casting="safe")
if dpnp.issubdtype(arr.dtype, dpnp.complexfloating):
sqr = dpnp.multiply(arr, arr.conj(), out=arr).real
else:
sqr = dpnp.multiply(arr, arr, out=arr)

arr, mask = _replace_nan(a, 0)
if mask is None:
return dpnp.var(
arr,
# Compute variance
var_dtype = a.real.dtype if dtype is None else dtype
var = dpnp.sum(
sqr,
axis=axis,
dtype=dtype,
dtype=var_dtype,
out=out,
ddof=ddof,
keepdims=keepdims,
where=where,
)

if dtype is not None:
dtype = dpnp.dtype(dtype)
if not issubclass(dtype.type, dpnp.inexact):
raise TypeError("If input is inexact, then dtype must be inexact")
if out is not None and not issubclass(out.dtype.type, dpnp.inexact):
raise TypeError("If input is inexact, then out must be inexact")
if var.ndim < cnt.ndim:
cnt = cnt.squeeze(axis)
cnt = cnt.astype(var.dtype, casting="same_kind")
vtavana marked this conversation as resolved.
Show resolved Hide resolved
cnt -= ddof
dpnp.divide(var, cnt, out=var)

# Compute mean
cnt = dpnp.sum(
~mask, axis=axis, dtype=dpnp.intp, keepdims=True, where=where
)
avg = dpnp.sum(arr, axis=axis, dtype=dtype, keepdims=True, where=where)
avg = dpnp.divide(avg, cnt)

# Compute squared deviation from mean.
arr = dpnp.subtract(arr, avg, where=where)
dpnp.copyto(arr, 0.0, where=mask, casting="safe")
if issubclass(arr.dtype.type, dpnp.complexfloating):
sqr = dpnp.multiply(arr, arr.conj(), out=arr, where=where).real
else:
sqr = dpnp.multiply(arr, arr, out=arr, where=where)

# Compute variance
var = dpnp.sum(sqr, axis=axis, dtype=dtype, keepdims=keepdims, where=where)

if var.ndim < cnt.ndim:
cnt = cnt.squeeze(axis)
dof = cnt - ddof

var = dpnp.divide(var, dof)

isbad = dof <= 0
if dpnp.any(isbad):
# NaN, inf, or negative numbers are all possible bad
# values, so explicitly replace them with NaN.
dpnp.copyto(var, dpnp.nan, where=isbad, casting="safe")

res = dpnp.get_result_array(var, out, casting="same_kind")

if out is None:
if dtype is not None:
res = res.astype(dtype, casting="same_kind")
elif res.dtype != a.real.dtype:
res = res.astype(a.real.dtype, casting="same_kind")
isbad = cnt <= 0
if dpnp.any(isbad):
# NaN, inf, or negative numbers are all possible bad
# values, so explicitly replace them with NaN.
dpnp.copyto(var, dpnp.nan, where=isbad, casting="same_kind")
vtavana marked this conversation as resolved.
Show resolved Hide resolved

return res
return var
151 changes: 80 additions & 71 deletions dpnp/dpnp_iface_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,47 @@
]


def _count_reduce_items(arr, axis, where=True):
"""
Calculates the number of items used in a reduction operation along the specified axis or axes

Parameters
----------
arr : {dpnp_array, usm_ndarray}
Input array.
axis : int or tuple of ints, optional
axis or axes along which the number of items used in a reduction operation must be counted.
If a tuple of unique integers is given, the items are counted over multiple axes.
If ``None``, the variance is computed over the entire array.
Default: `None`.

Returns
-------
out : int
The number of items should be used in a reduction operation.

Limitations
-----------
Parameters `where` is only supported with its default value.

"""
if where is True:
# no boolean mask given, calculate items according to axis
if axis is None:
axis = tuple(range(arr.ndim))
elif not isinstance(axis, tuple):
axis = (axis,)
items = 1
for ax in axis:
items *= arr.shape[normalize_axis_index(ax, arr.ndim)]
items = dpnp.intp(items)
else:
raise NotImplementedError(
"where keyword argument is only supported with its default value."
)
return items


def amax(a, axis=None, out=None, keepdims=False, initial=None, where=True):
"""
Return the maximum of an array or maximum along an axis.
Expand Down Expand Up @@ -636,7 +677,7 @@ def std(
Alternative output array in which to place the result. It must have
the same shape as the expected output but the type (of the calculated
values) will be cast if necessary.
ddof : int, optional
ddof : {int, float}, optional
Means Delta Degrees of Freedom. The divisor used in calculations
is ``N - ddof``, where ``N`` corresponds to the total
number of elements over which the variance is calculated.
Expand Down Expand Up @@ -697,33 +738,39 @@ def std(

"""

dpnp.check_supported_arrays_type(a)
if where is not True:
raise NotImplementedError(
"where keyword argument is only supported with its default value."
)
elif not isinstance(ddof, (int, float)):
raise TypeError(
"An integer or float is required, but got {}".format(type(ddof))
)
else:
if issubclass(a.dtype.type, dpnp.complexfloating):
var = dpnp.var(
if dpnp.issubdtype(a.dtype, dpnp.complexfloating):
result = dpnp.var(
a,
axis=axis,
dtype=dtype,
dtype=None,
out=out,
ddof=ddof,
keepdims=keepdims,
where=where,
)
res = dpnp.sqrt(var, out=out, where=where)
dpnp.sqrt(result, out=result)
else:
dpt_array = dpnp.get_usm_ndarray(a)
result = dpnp_array._create_from_usm_ndarray(
dpt.std(
dpt_array, axis=axis, correction=ddof, keepdims=keepdims
)
)
res = dpnp.get_result_array(result, out)
result = dpnp.get_result_array(result, out)

if dtype is not None and out is None:
res = res.astype(dtype, casting="same_kind")
return res
result = result.astype(dtype, casting="same_kind")
return result


def var(
Expand Down Expand Up @@ -751,7 +798,7 @@ def var(
Alternative output array in which to place the result. It must have
the same shape as the expected output but the type (of the calculated
values) will be cast if necessary.
ddof : int, optional
ddof : {int, float}, optional
Means Delta Degrees of Freedom. The divisor used in calculations
is ``N - ddof``, where ``N`` corresponds to the total
number of elements over which the variance is calculated.
Expand Down Expand Up @@ -811,87 +858,49 @@ def var(

"""

dpnp.check_supported_arrays_type(a)
if where is not True:
raise NotImplementedError(
"where keyword argument is only supported with its default value."
)
elif not isinstance(ddof, (int, float)):
raise TypeError(
"An integer or float is required, but got {}".format(type(ddof))
)
else:
if issubclass(a.dtype.type, dpnp.complexfloating):
if dpnp.issubdtype(a.dtype, dpnp.complexfloating):
# Note that if dtype is not of inexact type then arrmean will not be either.
arrmean = dpnp.mean(
a, axis=axis, dtype=dtype, keepdims=True, where=where
)
x = dpnp.subtract(a, arrmean, where=where)
x = dpnp.multiply(x, x.conj(), where=where).real
res = dpnp.sum(
x, axis=axis, dtype=dtype, keepdims=keepdims, where=where
x = dpnp.subtract(a, arrmean)
x = dpnp.multiply(x, x.conj(), out=x).real
result = dpnp.sum(
x,
axis=axis,
dtype=a.real.dtype,
out=out,
keepdims=keepdims,
where=where,
)
if dtype is None and a.real.dtype != res.dtype:
res = res.astype(a.real.dtype, casting="same_kind")

cnt = _count_reduce_items(a, axis, where)
dof = dpnp.max(
dpnp.array(
[cnt - ddof, 0],
dtype=res.dtype,
sycl_queue=res.sycl_queue,
device=res.device,
)
cnt = numpy.max(cnt - ddof, 0).astype(
result.dtype, casting="same_kind"
)
if not dof:
dof = dpnp.nan
if not cnt:
cnt = dpnp.nan

res = dpnp.divide(res, dof, out=out)
dpnp.divide(result, cnt, out=result)
else:
dpt_array = dpnp.get_usm_ndarray(a)
result = dpnp_array._create_from_usm_ndarray(
dpt.var(
dpt_array, axis=axis, correction=ddof, keepdims=keepdims
)
)
res = dpnp.get_result_array(result, out)

if dtype is not None and out is None:
res = res.astype(dtype, casting="same_kind")
return res


def _count_reduce_items(arr, axis, where=True):
"""
Calculates the number of items used in a reduction operation along the specified axis or axes

Parameters
----------
arr : {dpnp_array, usm_ndarray}
Input array.
axis : int or tuple of ints, optional
axis or axes along which the number of items used in a reduction operation must be counted.
If a tuple of unique integers is given, the items are counted over multiple axes.
If ``None``, the variance is computed over the entire array.
Default: `None`.

Returns
-------
out : int
The number of items should be used in a reduction operation.

Limitations
-----------
Parameters `where` is only supported with its default value.
result = dpnp.get_result_array(result, out)

"""
if where is True:
# no boolean mask given, calculate items according to axis
if axis is None:
axis = tuple(range(arr.ndim))
elif not isinstance(axis, tuple):
axis = (axis,)
items = 1
for ax in axis:
items *= arr.shape[normalize_axis_index(ax, arr.ndim)]
items = dpnp.intp(items)
else:
raise NotImplementedError(
"where keyword argument is only supported with its default value."
)
return items
if out is None and dtype is not None:
result = result.astype(dtype, casting="same_kind")
return result
Loading
Loading