Skip to content

Commit

Permalink
Add ddof for var, std
Browse files Browse the repository at this point in the history
  • Loading branch information
dcherian committed Nov 8, 2021
1 parent 6b9a81a commit 569c67f
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 2 deletions.
112 changes: 112 additions & 0 deletions xarray/core/_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,7 @@ def std(
self: DatasetReduce,
dim: Union[None, Hashable, Sequence[Hashable]] = None,
skipna: bool = True,
ddof: int = 0,
keep_attrs: bool = None,
**kwargs,
) -> T_Dataset:
Expand All @@ -751,6 +752,9 @@ def std(
skips missing values for float dtypes; other dtypes either do not
have a sentinel missing value (int) or skipna=True has not been
implemented (object, datetime64 or timedelta64).
ddof : int, default: 0
“Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
where ``N`` represents the number of elements.
keep_attrs : bool, optional
If True, ``attrs`` will be copied from the original
object to the new one. If False (default), the new object will be
Expand Down Expand Up @@ -803,6 +807,16 @@ def std(
Data variables:
da (labels) float64 nan 0.0 1.0
Specify ``ddof=1`` for an unbiased estimate.
>>> ds.groupby("labels").std(skipna=True, ddof=1)
<xarray.Dataset>
Dimensions: (labels: 3)
Coordinates:
* labels (labels) object 'a' 'b' 'c'
Data variables:
da (labels) float64 nan 0.0 1.414
See Also
--------
numpy.std
Expand All @@ -814,6 +828,7 @@ def std(
duck_array_ops.std,
dim=dim,
skipna=skipna,
ddof=ddof,
numeric_only=True,
keep_attrs=keep_attrs,
**kwargs,
Expand All @@ -823,6 +838,7 @@ def var(
self: DatasetReduce,
dim: Union[None, Hashable, Sequence[Hashable]] = None,
skipna: bool = True,
ddof: int = 0,
keep_attrs: bool = None,
**kwargs,
) -> T_Dataset:
Expand All @@ -839,6 +855,9 @@ def var(
skips missing values for float dtypes; other dtypes either do not
have a sentinel missing value (int) or skipna=True has not been
implemented (object, datetime64 or timedelta64).
ddof : int, default: 0
“Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
where ``N`` represents the number of elements.
keep_attrs : bool, optional
If True, ``attrs`` will be copied from the original
object to the new one. If False (default), the new object will be
Expand Down Expand Up @@ -891,6 +910,16 @@ def var(
Data variables:
da (labels) float64 nan 0.0 1.0
Specify ``ddof=1`` for an unbiased estimate.
>>> ds.groupby("labels").var(skipna=True, ddof=1)
<xarray.Dataset>
Dimensions: (labels: 3)
Coordinates:
* labels (labels) object 'a' 'b' 'c'
Data variables:
da (labels) float64 nan 0.0 2.0
See Also
--------
numpy.var
Expand All @@ -902,6 +931,7 @@ def var(
duck_array_ops.var,
dim=dim,
skipna=skipna,
ddof=ddof,
numeric_only=True,
keep_attrs=keep_attrs,
**kwargs,
Expand Down Expand Up @@ -1692,6 +1722,7 @@ def std(
self: DatasetReduce,
dim: Union[None, Hashable, Sequence[Hashable]] = None,
skipna: bool = True,
ddof: int = 0,
keep_attrs: bool = None,
**kwargs,
) -> T_Dataset:
Expand All @@ -1708,6 +1739,9 @@ def std(
skips missing values for float dtypes; other dtypes either do not
have a sentinel missing value (int) or skipna=True has not been
implemented (object, datetime64 or timedelta64).
ddof : int, default: 0
“Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
where ``N`` represents the number of elements.
keep_attrs : bool, optional
If True, ``attrs`` will be copied from the original
object to the new one. If False (default), the new object will be
Expand Down Expand Up @@ -1760,6 +1794,16 @@ def std(
Data variables:
da (time) float64 0.0 0.8165 nan
Specify ``ddof=1`` for an unbiased estimate.
>>> ds.resample(time="3M").std(skipna=True, ddof=1)
<xarray.Dataset>
Dimensions: (time: 3)
Coordinates:
* time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
Data variables:
da (time) float64 nan 1.0 nan
See Also
--------
numpy.std
Expand All @@ -1771,6 +1815,7 @@ def std(
duck_array_ops.std,
dim=dim,
skipna=skipna,
ddof=ddof,
numeric_only=True,
keep_attrs=keep_attrs,
**kwargs,
Expand All @@ -1780,6 +1825,7 @@ def var(
self: DatasetReduce,
dim: Union[None, Hashable, Sequence[Hashable]] = None,
skipna: bool = True,
ddof: int = 0,
keep_attrs: bool = None,
**kwargs,
) -> T_Dataset:
Expand All @@ -1796,6 +1842,9 @@ def var(
skips missing values for float dtypes; other dtypes either do not
have a sentinel missing value (int) or skipna=True has not been
implemented (object, datetime64 or timedelta64).
ddof : int, default: 0
“Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
where ``N`` represents the number of elements.
keep_attrs : bool, optional
If True, ``attrs`` will be copied from the original
object to the new one. If False (default), the new object will be
Expand Down Expand Up @@ -1848,6 +1897,16 @@ def var(
Data variables:
da (time) float64 0.0 0.6667 nan
Specify ``ddof=1`` for an unbiased estimate.
>>> ds.resample(time="3M").var(skipna=True, ddof=1)
<xarray.Dataset>
Dimensions: (time: 3)
Coordinates:
* time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
Data variables:
da (time) float64 nan 1.0 nan
See Also
--------
numpy.var
Expand All @@ -1859,6 +1918,7 @@ def var(
duck_array_ops.var,
dim=dim,
skipna=skipna,
ddof=ddof,
numeric_only=True,
keep_attrs=keep_attrs,
**kwargs,
Expand Down Expand Up @@ -2587,6 +2647,7 @@ def std(
self: DataArrayReduce,
dim: Union[None, Hashable, Sequence[Hashable]] = None,
skipna: bool = True,
ddof: int = 0,
keep_attrs: bool = None,
**kwargs,
) -> T_DataArray:
Expand All @@ -2603,6 +2664,9 @@ def std(
skips missing values for float dtypes; other dtypes either do not
have a sentinel missing value (int) or skipna=True has not been
implemented (object, datetime64 or timedelta64).
ddof : int, default: 0
“Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
where ``N`` represents the number of elements.
keep_attrs : bool, optional
If True, ``attrs`` will be copied from the original
object to the new one. If False (default), the new object will be
Expand Down Expand Up @@ -2648,6 +2712,14 @@ def std(
Coordinates:
* labels (labels) object 'a' 'b' 'c'
Specify ``ddof=1`` for an unbiased estimate.
>>> da.groupby("labels").std(skipna=True, ddof=1)
<xarray.DataArray (labels: 3)>
array([ nan, 0. , 1.41421356])
Coordinates:
* labels (labels) object 'a' 'b' 'c'
See Also
--------
numpy.std
Expand All @@ -2659,6 +2731,7 @@ def std(
duck_array_ops.std,
dim=dim,
skipna=skipna,
ddof=ddof,
keep_attrs=keep_attrs,
**kwargs,
)
Expand All @@ -2667,6 +2740,7 @@ def var(
self: DataArrayReduce,
dim: Union[None, Hashable, Sequence[Hashable]] = None,
skipna: bool = True,
ddof: int = 0,
keep_attrs: bool = None,
**kwargs,
) -> T_DataArray:
Expand All @@ -2683,6 +2757,9 @@ def var(
skips missing values for float dtypes; other dtypes either do not
have a sentinel missing value (int) or skipna=True has not been
implemented (object, datetime64 or timedelta64).
ddof : int, default: 0
“Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
where ``N`` represents the number of elements.
keep_attrs : bool, optional
If True, ``attrs`` will be copied from the original
object to the new one. If False (default), the new object will be
Expand Down Expand Up @@ -2728,6 +2805,14 @@ def var(
Coordinates:
* labels (labels) object 'a' 'b' 'c'
Specify ``ddof=1`` for an unbiased estimate.
>>> da.groupby("labels").var(skipna=True, ddof=1)
<xarray.DataArray (labels: 3)>
array([nan, 0., 2.])
Coordinates:
* labels (labels) object 'a' 'b' 'c'
See Also
--------
numpy.var
Expand All @@ -2739,6 +2824,7 @@ def var(
duck_array_ops.var,
dim=dim,
skipna=skipna,
ddof=ddof,
keep_attrs=keep_attrs,
**kwargs,
)
Expand Down Expand Up @@ -3458,6 +3544,7 @@ def std(
self: DataArrayReduce,
dim: Union[None, Hashable, Sequence[Hashable]] = None,
skipna: bool = True,
ddof: int = 0,
keep_attrs: bool = None,
**kwargs,
) -> T_DataArray:
Expand All @@ -3474,6 +3561,9 @@ def std(
skips missing values for float dtypes; other dtypes either do not
have a sentinel missing value (int) or skipna=True has not been
implemented (object, datetime64 or timedelta64).
ddof : int, default: 0
“Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
where ``N`` represents the number of elements.
keep_attrs : bool, optional
If True, ``attrs`` will be copied from the original
object to the new one. If False (default), the new object will be
Expand Down Expand Up @@ -3519,6 +3609,14 @@ def std(
Coordinates:
* time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
Specify ``ddof=1`` for an unbiased estimate.
>>> da.resample(time="3M").std(skipna=True, ddof=1)
<xarray.DataArray (time: 3)>
array([nan, 1., nan])
Coordinates:
* time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
See Also
--------
numpy.std
Expand All @@ -3530,6 +3628,7 @@ def std(
duck_array_ops.std,
dim=dim,
skipna=skipna,
ddof=ddof,
keep_attrs=keep_attrs,
**kwargs,
)
Expand All @@ -3538,6 +3637,7 @@ def var(
self: DataArrayReduce,
dim: Union[None, Hashable, Sequence[Hashable]] = None,
skipna: bool = True,
ddof: int = 0,
keep_attrs: bool = None,
**kwargs,
) -> T_DataArray:
Expand All @@ -3554,6 +3654,9 @@ def var(
skips missing values for float dtypes; other dtypes either do not
have a sentinel missing value (int) or skipna=True has not been
implemented (object, datetime64 or timedelta64).
ddof : int, default: 0
“Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
where ``N`` represents the number of elements.
keep_attrs : bool, optional
If True, ``attrs`` will be copied from the original
object to the new one. If False (default), the new object will be
Expand Down Expand Up @@ -3599,6 +3702,14 @@ def var(
Coordinates:
* time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
Specify ``ddof=1`` for an unbiased estimate.
>>> da.resample(time="3M").var(skipna=True, ddof=1)
<xarray.DataArray (time: 3)>
array([nan, 1., nan])
Coordinates:
* time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
See Also
--------
numpy.var
Expand All @@ -3610,6 +3721,7 @@ def var(
duck_array_ops.var,
dim=dim,
skipna=skipna,
ddof=ddof,
keep_attrs=keep_attrs,
**kwargs,
)
Expand Down
17 changes: 15 additions & 2 deletions xarray/util/generate_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ def {method}(
array's dtype. Changed in version 0.17.0: if specified on an integer
array and skipna=True, the result will be a float array."""

_DDOF_DOCSTRING = """ddof : int, default: 0
“Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
where ``N`` represents the number of elements."""

_KEEP_ATTRS_DOCSTRING = """keep_attrs : bool, optional
If True, ``attrs`` will be copied from the original
object to the new one. If False (default), the new object will be
Expand Down Expand Up @@ -132,6 +136,15 @@ def {method}(
>>> {calculation}(skipna=True, min_count=2)""",
)
ddof = extra_kwarg(
docs=_DDOF_DOCSTRING,
kwarg="ddof: int = 0,",
call="ddof=ddof,",
example="""\n
Specify ``ddof=1`` for an unbiased estimate.
>>> {calculation}(skipna=True, ddof=1)""",
)


class Method:
Expand Down Expand Up @@ -275,8 +288,8 @@ def generate_code(self, method):
Method("mean", extra_kwargs=(skip_na,), numeric_only=True),
Method("prod", extra_kwargs=(skip_na, min_count), numeric_only=True),
Method("sum", extra_kwargs=(skip_na, min_count), numeric_only=True),
Method("std", extra_kwargs=(skip_na,), numeric_only=True),
Method("var", extra_kwargs=(skip_na,), numeric_only=True),
Method("std", extra_kwargs=(skip_na, ddof), numeric_only=True),
Method("var", extra_kwargs=(skip_na, ddof), numeric_only=True),
Method("median", extra_kwargs=(skip_na,), numeric_only=True),
)

Expand Down

0 comments on commit 569c67f

Please sign in to comment.