Add ddof for var, std

andersy005 · Nov 8, 2021 · 569c67f · 569c67f
1 parent 6b9a81a
commit 569c67f
Show file tree

Hide file tree

Showing 2 changed files with 127 additions and 2 deletions.
diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py
@@ -735,6 +735,7 @@ def std(
         self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = True,
+        ddof: int = 0,
         keep_attrs: bool = None,
         **kwargs,
     ) -> T_Dataset:
@@ -751,6 +752,9 @@ def std(
             skips missing values for float dtypes; other dtypes either do not
             have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
+        ddof : int, default: 0
+            “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
+            where ``N`` represents the number of elements.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -803,6 +807,16 @@ def std(
         Data variables:
             da       (labels) float64 nan 0.0 1.0
 
+        Specify ``ddof=1`` for an unbiased estimate.
+
+        >>> ds.groupby("labels").std(skipna=True, ddof=1)
+        <xarray.Dataset>
+        Dimensions:  (labels: 3)
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
+        Data variables:
+            da       (labels) float64 nan 0.0 1.414
+
         See Also
         --------
         numpy.std
@@ -814,6 +828,7 @@ def std(
             duck_array_ops.std,
             dim=dim,
             skipna=skipna,
+            ddof=ddof,
             numeric_only=True,
             keep_attrs=keep_attrs,
             **kwargs,
@@ -823,6 +838,7 @@ def var(
         self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = True,
+        ddof: int = 0,
         keep_attrs: bool = None,
         **kwargs,
     ) -> T_Dataset:
@@ -839,6 +855,9 @@ def var(
             skips missing values for float dtypes; other dtypes either do not
             have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
+        ddof : int, default: 0
+            “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
+            where ``N`` represents the number of elements.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -891,6 +910,16 @@ def var(
         Data variables:
             da       (labels) float64 nan 0.0 1.0
 
+        Specify ``ddof=1`` for an unbiased estimate.
+
+        >>> ds.groupby("labels").var(skipna=True, ddof=1)
+        <xarray.Dataset>
+        Dimensions:  (labels: 3)
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
+        Data variables:
+            da       (labels) float64 nan 0.0 2.0
+
         See Also
         --------
         numpy.var
@@ -902,6 +931,7 @@ def var(
             duck_array_ops.var,
             dim=dim,
             skipna=skipna,
+            ddof=ddof,
             numeric_only=True,
             keep_attrs=keep_attrs,
             **kwargs,
@@ -1692,6 +1722,7 @@ def std(
         self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = True,
+        ddof: int = 0,
         keep_attrs: bool = None,
         **kwargs,
     ) -> T_Dataset:
@@ -1708,6 +1739,9 @@ def std(
             skips missing values for float dtypes; other dtypes either do not
             have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
+        ddof : int, default: 0
+            “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
+            where ``N`` represents the number of elements.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -1760,6 +1794,16 @@ def std(
         Data variables:
             da       (time) float64 0.0 0.8165 nan
 
+        Specify ``ddof=1`` for an unbiased estimate.
+
+        >>> ds.resample(time="3M").std(skipna=True, ddof=1)
+        <xarray.Dataset>
+        Dimensions:  (time: 3)
+        Coordinates:
+          * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
+        Data variables:
+            da       (time) float64 nan 1.0 nan
+
         See Also
         --------
         numpy.std
@@ -1771,6 +1815,7 @@ def std(
             duck_array_ops.std,
             dim=dim,
             skipna=skipna,
+            ddof=ddof,
             numeric_only=True,
             keep_attrs=keep_attrs,
             **kwargs,
@@ -1780,6 +1825,7 @@ def var(
         self: DatasetReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = True,
+        ddof: int = 0,
         keep_attrs: bool = None,
         **kwargs,
     ) -> T_Dataset:
@@ -1796,6 +1842,9 @@ def var(
             skips missing values for float dtypes; other dtypes either do not
             have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
+        ddof : int, default: 0
+            “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
+            where ``N`` represents the number of elements.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -1848,6 +1897,16 @@ def var(
         Data variables:
             da       (time) float64 0.0 0.6667 nan
 
+        Specify ``ddof=1`` for an unbiased estimate.
+
+        >>> ds.resample(time="3M").var(skipna=True, ddof=1)
+        <xarray.Dataset>
+        Dimensions:  (time: 3)
+        Coordinates:
+          * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
+        Data variables:
+            da       (time) float64 nan 1.0 nan
+
         See Also
         --------
         numpy.var
@@ -1859,6 +1918,7 @@ def var(
             duck_array_ops.var,
             dim=dim,
             skipna=skipna,
+            ddof=ddof,
             numeric_only=True,
             keep_attrs=keep_attrs,
             **kwargs,
@@ -2587,6 +2647,7 @@ def std(
         self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = True,
+        ddof: int = 0,
         keep_attrs: bool = None,
         **kwargs,
     ) -> T_DataArray:
@@ -2603,6 +2664,9 @@ def std(
             skips missing values for float dtypes; other dtypes either do not
             have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
+        ddof : int, default: 0
+            “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
+            where ``N`` represents the number of elements.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -2648,6 +2712,14 @@ def std(
         Coordinates:
           * labels   (labels) object 'a' 'b' 'c'
 
+        Specify ``ddof=1`` for an unbiased estimate.
+
+        >>> da.groupby("labels").std(skipna=True, ddof=1)
+        <xarray.DataArray (labels: 3)>
+        array([       nan, 0.        , 1.41421356])
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
+
         See Also
         --------
         numpy.std
@@ -2659,6 +2731,7 @@ def std(
             duck_array_ops.std,
             dim=dim,
             skipna=skipna,
+            ddof=ddof,
             keep_attrs=keep_attrs,
             **kwargs,
         )
@@ -2667,6 +2740,7 @@ def var(
         self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = True,
+        ddof: int = 0,
         keep_attrs: bool = None,
         **kwargs,
     ) -> T_DataArray:
@@ -2683,6 +2757,9 @@ def var(
             skips missing values for float dtypes; other dtypes either do not
             have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
+        ddof : int, default: 0
+            “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
+            where ``N`` represents the number of elements.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -2728,6 +2805,14 @@ def var(
         Coordinates:
           * labels   (labels) object 'a' 'b' 'c'
 
+        Specify ``ddof=1`` for an unbiased estimate.
+
+        >>> da.groupby("labels").var(skipna=True, ddof=1)
+        <xarray.DataArray (labels: 3)>
+        array([nan,  0.,  2.])
+        Coordinates:
+          * labels   (labels) object 'a' 'b' 'c'
+
         See Also
         --------
         numpy.var
@@ -2739,6 +2824,7 @@ def var(
             duck_array_ops.var,
             dim=dim,
             skipna=skipna,
+            ddof=ddof,
             keep_attrs=keep_attrs,
             **kwargs,
         )
@@ -3458,6 +3544,7 @@ def std(
         self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = True,
+        ddof: int = 0,
         keep_attrs: bool = None,
         **kwargs,
     ) -> T_DataArray:
@@ -3474,6 +3561,9 @@ def std(
             skips missing values for float dtypes; other dtypes either do not
             have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
+        ddof : int, default: 0
+            “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
+            where ``N`` represents the number of elements.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -3519,6 +3609,14 @@ def std(
         Coordinates:
           * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
 
+        Specify ``ddof=1`` for an unbiased estimate.
+
+        >>> da.resample(time="3M").std(skipna=True, ddof=1)
+        <xarray.DataArray (time: 3)>
+        array([nan,  1., nan])
+        Coordinates:
+          * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
+
         See Also
         --------
         numpy.std
@@ -3530,6 +3628,7 @@ def std(
             duck_array_ops.std,
             dim=dim,
             skipna=skipna,
+            ddof=ddof,
             keep_attrs=keep_attrs,
             **kwargs,
         )
@@ -3538,6 +3637,7 @@ def var(
         self: DataArrayReduce,
         dim: Union[None, Hashable, Sequence[Hashable]] = None,
         skipna: bool = True,
+        ddof: int = 0,
         keep_attrs: bool = None,
         **kwargs,
     ) -> T_DataArray:
@@ -3554,6 +3654,9 @@ def var(
             skips missing values for float dtypes; other dtypes either do not
             have a sentinel missing value (int) or skipna=True has not been
             implemented (object, datetime64 or timedelta64).
+        ddof : int, default: 0
+            “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
+            where ``N`` represents the number of elements.
         keep_attrs : bool, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False (default), the new object will be
@@ -3599,6 +3702,14 @@ def var(
         Coordinates:
           * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
 
+        Specify ``ddof=1`` for an unbiased estimate.
+
+        >>> da.resample(time="3M").var(skipna=True, ddof=1)
+        <xarray.DataArray (time: 3)>
+        array([nan,  1., nan])
+        Coordinates:
+          * time     (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
+
         See Also
         --------
         numpy.var
@@ -3610,6 +3721,7 @@ def var(
             duck_array_ops.var,
             dim=dim,
             skipna=skipna,
+            ddof=ddof,
             keep_attrs=keep_attrs,
             **kwargs,
         )

diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py
@@ -97,6 +97,10 @@ def {method}(
     array's dtype. Changed in version 0.17.0: if specified on an integer
     array and skipna=True, the result will be a float array."""
 
+_DDOF_DOCSTRING = """ddof : int, default: 0
+    “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
+    where ``N`` represents the number of elements."""
+
 _KEEP_ATTRS_DOCSTRING = """keep_attrs : bool, optional
     If True, ``attrs`` will be copied from the original
     object to the new one.  If False (default), the new object will be
@@ -132,6 +136,15 @@ def {method}(
 
         >>> {calculation}(skipna=True, min_count=2)""",
 )
+ddof = extra_kwarg(
+    docs=_DDOF_DOCSTRING,
+    kwarg="ddof: int = 0,",
+    call="ddof=ddof,",
+    example="""\n
+        Specify ``ddof=1`` for an unbiased estimate.
+
+        >>> {calculation}(skipna=True, ddof=1)""",
+)
 
 
 class Method:
@@ -275,8 +288,8 @@ def generate_code(self, method):
     Method("mean", extra_kwargs=(skip_na,), numeric_only=True),
     Method("prod", extra_kwargs=(skip_na, min_count), numeric_only=True),
     Method("sum", extra_kwargs=(skip_na, min_count), numeric_only=True),
-    Method("std", extra_kwargs=(skip_na,), numeric_only=True),
-    Method("var", extra_kwargs=(skip_na,), numeric_only=True),
+    Method("std", extra_kwargs=(skip_na, ddof), numeric_only=True),
+    Method("var", extra_kwargs=(skip_na, ddof), numeric_only=True),
     Method("median", extra_kwargs=(skip_na,), numeric_only=True),
 )