From 80936fd4a561f7ad4ca6a15fdbd774bc4fc33825 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Thu, 30 Jan 2020 11:40:22 -0500
Subject: [PATCH 01/20] [WIP] Implementation of polyfit and polyval - minimum
 testing - no docs

---
 xarray/core/dask_array_ops.py  | 12 +++++
 xarray/core/dataarray.py       | 97 ++++++++++++++++++++++++++++++++++
 xarray/core/duck_array_ops.py  |  9 ++++
 xarray/core/nputils.py         | 18 +++++++
 xarray/tests/test_dataarray.py | 31 +++++++++++
 5 files changed, 167 insertions(+)

diff --git a/xarray/core/dask_array_ops.py b/xarray/core/dask_array_ops.py
index 37f261cc3ad..811483c1d70 100644
--- a/xarray/core/dask_array_ops.py
+++ b/xarray/core/dask_array_ops.py
@@ -95,3 +95,15 @@ def func(x, window, axis=-1):
     # crop boundary.
     index = (slice(None),) * axis + (slice(drop_size, drop_size + orig_shape[axis]),)
     return out[index]
+
+
+def least_squares(lhs, rhs, rcond=None, skipna=False):
+    import dask.array as da
+    lhs_da = da.from_array(lhs, chunks=(rhs.chunks[0], lhs.shape[1]))
+    if skipna:
+        results = da.apply_along_axis(nputils._nanpolyfit_1d, 0, rhs.data, lhs_da.data, dtype=float, shape=(lhs.shape[1] + 1,), rcond=rcond)
+        coeffs = results[:-1, ...]
+        residuals = results[-1, ...]
+    else:
+        coeffs, residuals, _, _ = da.linalg.lstsq(lhs_da.data, rhs.data)
+    return coeffs, residuals
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index a658f125054..02a4df59794 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -26,6 +26,7 @@
 from . import (
     computation,
     dtypes,
+    duck_array_ops,
     groupby,
     indexing,
     ops,
@@ -54,6 +55,7 @@
 from .indexes import Indexes, default_indexes, propagate_indexes
 from .indexing import is_fancy_indexer
 from .merge import PANDAS_TYPES, _extract_indexes_from_coords
+from .missing import get_clean_interp_index
 from .options import OPTIONS
 from .utils import Default, ReprObject, _check_inplace, _default, either_dict_or_kwargs
 from .variable import (
@@ -3234,10 +3236,105 @@ def map_blocks(
 
         return map_blocks(func, self, args, kwargs)
 
+    def polyfit(
+        self,
+        dim : Hashable,
+        deg : int,
+        rcond : float = None,
+        w : Union[Hashable, Any] = None,
+        full : bool = False,
+        cov : bool = False,
+        skipna : bool = False
+    ):
+        x = get_clean_interp_index(self, dim)
+        order = int(deg) + 1
+        lhs = np.vander(x, order, increasing=True)
+
+        dims_to_stack = [dimname for dimname in self.dims if dimname != dim]
+        stacked_dim = utils.get_temp_dimname(dims_to_stack, 'stacked')
+        rhs = self.transpose(dim, *dims_to_stack).stack({stacked_dim: dims_to_stack})
+
+        if rcond is None:
+            rcond = x.shape[0] * np.core.finfo(x.dtype).eps
+
+        # Weights:
+        if w is not None:
+            if isinstance(w, Hashable):
+                w = self[w]
+            w = np.asarray(w)
+            if w.ndim != 1:
+                raise TypeError("Expected a 1-d array for weights.")
+            if w.shape[0] != lhs.shape[0]:
+                raise TypeError("Expected w and {} to have the same length".format(dim))
+            lhs *= w[:, np.newaxis]
+            rhs *= w[:, np.newaxis]
+
+        # Scaling
+        scale = np.sqrt((lhs * lhs).sum(axis=0))
+        lhs /= scale
+
+        coeffs, residuals = duck_array_ops.least_squares(lhs, rhs, rcond=rcond, skipna=skipna)
+
+        degree_dim = utils.get_temp_dimname(dims_to_stack, 'degree')
+        coeffs = DataArray(coeffs / scale[:, np.newaxis], dims=(degree_dim, stacked_dim),
+                           coords={degree_dim: np.arange(order),
+                                   stacked_dim: rhs[stacked_dim]},
+                           name=(self.name or '') + '_polyfit_coefficients'
+                           ).unstack(stacked_dim)
+
+        rank = np.linalg.matrix_rank(lhs)
+        if rank != order and not full:
+            warnings.warn("Polyfit may be poorly conditioned", np.RankWarning, stacklevel=4)
+
+        if full or (cov is True):
+            residuals = DataArray(residuals, dims=(stacked_dim), coords={stacked_dim: rhs[stacked_dim]},
+                                  name=(self.name or '') + '_polyfit_residuals'
+                                  ).unstack(stacked_dim)
+        if full:
+            sing = np.linalg.svd(lhs, compute_uv=False)
+            sing = DataArray(sing, dims=(degree_dim,), coords={degree_dim: np.arange(order)},
+                             name=(self.name or '') + '_polyfit_singular_values')
+            return coeffs, residuals, rank, sing, rcond
+        if cov:
+            Vbase = np.linalg.inv(np.dot(lhs.T, lhs))
+            Vbase /= np.outer(scale, scale)
+            if cov == 'unscaled':
+                fac = 1
+            else:
+                if x.shape[0] <= order:
+                    raise ValueError("The number of data points must exceed order to scale the covariance matrix.")
+                fac = residuals / (x.shape[0] - order)
+            covariance = DataArray(Vbase, dims=('cov_i', 'cov_j'), name=(self.name or '') + '_polyfit_covariance') * fac
+            return coeffs, covariance
+        return coeffs
+
     # this needs to be at the end, or mypy will confuse with `str`
     # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names
     str = property(StringAccessor)
 
 
+def polyval(
+    coord : Union["DataArray", Any],
+    coefficients : Union["DataArray", Any],
+    degree_dim : Union[str, int] = 'degree'
+) -> "DataArray" :
+    if not isinstance(coord, DataArray):
+        coord = DataArray(coord, dims=('x',), name='x')
+    x = get_clean_interp_index(coord, coord.name)
+
+    if isinstance(degree_dim, str):
+        deg_coord = coefficients[degree_dim]
+    else:  # Axis number
+        if isinstance(coefficients, DataArray):
+            deg_coord = coefficients[coefficients.dims[degree_dim]]
+        else:
+            deg_coord = np.arange(coefficients.shape[degree_dim])
+
+    lhs = DataArray(np.vander(x, int(deg_coord.max()) + 1, increasing=True),
+                    dims=(coord.name, degree_dim),
+                    coords={coord.name: coord, degree_dim: np.arange(deg_coord.max() + 1)})
+    return (lhs * coefficients).sum(degree_dim)
+
+
 # priority most be higher than Variable to properly work with binary ufuncs
 ops.inject_all_ops_and_reduce_methods(DataArray, priority=60)
diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
index c2fe604a9d3..df81d93f9a0 100644
--- a/xarray/core/duck_array_ops.py
+++ b/xarray/core/duck_array_ops.py
@@ -600,3 +600,12 @@ def rolling_window(array, axis, window, center, fill_value):
         return dask_array_ops.rolling_window(array, axis, window, center, fill_value)
     else:  # np.ndarray
         return nputils.rolling_window(array, axis, window, center, fill_value)
+
+
+def least_squares(lhs, rhs, rcond=None, skipna=False):
+    """Return the coefficients and residuals of a least-squares fit.
+    """
+    if isinstance(rhs, dask_array_type):
+        return dask_array_ops.least_squares(lhs, rhs, rcond=rcond, skipna=skipna)
+    else:
+        return nputils.least_squares(lhs, rhs, rcond=rcond, skipna=skipna)
diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py
index dba67174fc1..1513f9055dd 100644
--- a/xarray/core/nputils.py
+++ b/xarray/core/nputils.py
@@ -221,6 +221,24 @@ def f(values, axis=None, **kwargs):
     return f
 
 
+def _nanpolyfit_1d(arr, x, rcond=None):
+    out = np.full((x.shape[1] + 1,), np.nan)
+    mask = np.isnan(arr)
+    if not np.all(mask):
+        out[:-1], out[-1], _, _ = np.linalg.lstsq(x[~mask, :], arr[~mask], rcond=rcond)
+    return out
+
+
+def least_squares(lhs, rhs, rcond=None, skipna=False):
+    coeffs, residuals, _, _ = np.linalg.lstsq(lhs, rhs.data, rcond=rcond)
+    if skipna:
+        nan_cols = np.nonzero(np.isnan(coeffs[0, :]))[0]
+        out = np.apply_along_axis(_nanpolyfit_1d, 0, rhs.data[:, nan_cols], lhs, rcond=rcond)
+        coeffs[:, nan_cols] = out[:-1, :]
+        residuals[nan_cols] = out[-1, :]
+    return coeffs, residuals
+
+
 nanmin = _create_bottleneck_method("nanmin")
 nanmax = _create_bottleneck_method("nanmax")
 nanmean = _create_bottleneck_method("nanmean")
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index b9b719e8af9..c41330c4ba8 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -4152,6 +4152,37 @@ def test_rank(self):
         y = DataArray([0.75, 0.25, np.nan, 0.5, 1.0], dims=("z",))
         assert_equal(y.rank("z", pct=True), y)
 
+    def test_polyfit(self):
+        x = np.arange(10)
+        da = DataArray(np.stack((1. + x + 2. * x**2, 1. + 2. * x + 3. * x**2)),
+                       dims=('d', 'x'), coords={'x': x, 'd': [0, 1]})
+        coeffs = da.polyfit('x', 2)
+        expected = DataArray([[1, 1, 2], [1, 2, 3]], dims=('d', 'degree'),
+                             coords={'degree': [0, 1, 2], 'd': [0, 1]}).T
+        assert_allclose(coeffs, expected)
+
+        # With NaN
+        da[0, 1] = np.nan
+        coeffs = da.polyfit('x', 2)
+        expected_na = DataArray([[np.nan, np.nan, np.nan], [1, 2, 3]], dims=('d', 'degree'),
+                                coords={'degree': [0, 1, 2], 'd': [0, 1]}).T
+        assert_allclose(coeffs, expected_na)
+
+        # Skipna + Full output
+        coeffs, resids, rank, sing, rcond = da.polyfit('x', 2, skipna=True, full=True)
+        assert_allclose(coeffs, expected)
+        assert rank == np.linalg.matrix_rank(np.vander(x, 3))
+        np.testing.assert_almost_equal(resids, [0, 0])
+
+
+def test_polyval():
+    x = np.arange(10)
+    da = DataArray(np.stack((1. + x + 2. * x**2, 1. + 2. * x + 3. * x**2)),
+                   dims=('d', 'x'), coords={'x': x, 'd': [0, 1]})
+    coeffs = da.polyfit('x', 2)
+    da_pv = xr.core.dataarray.polyval(da.x, coeffs)
+    assert_allclose(da, da_pv)
+
 
 @pytest.fixture(params=[1])
 def da(request):

From 05355af618ccfc68c061c20ec3318e974f71e497 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Thu, 30 Jan 2020 11:49:45 -0500
Subject: [PATCH 02/20] Formatting with black, flake8

---
 xarray/core/dask_array_ops.py  | 11 +++-
 xarray/core/dataarray.py       | 92 ++++++++++++++++++++++------------
 xarray/core/nputils.py         |  4 +-
 xarray/tests/test_dataarray.py | 36 ++++++++-----
 4 files changed, 97 insertions(+), 46 deletions(-)

diff --git a/xarray/core/dask_array_ops.py b/xarray/core/dask_array_ops.py
index 811483c1d70..961de63f664 100644
--- a/xarray/core/dask_array_ops.py
+++ b/xarray/core/dask_array_ops.py
@@ -99,9 +99,18 @@ def func(x, window, axis=-1):
 
 def least_squares(lhs, rhs, rcond=None, skipna=False):
     import dask.array as da
+
     lhs_da = da.from_array(lhs, chunks=(rhs.chunks[0], lhs.shape[1]))
     if skipna:
-        results = da.apply_along_axis(nputils._nanpolyfit_1d, 0, rhs.data, lhs_da.data, dtype=float, shape=(lhs.shape[1] + 1,), rcond=rcond)
+        results = da.apply_along_axis(
+            nputils._nanpolyfit_1d,
+            0,
+            rhs.data,
+            lhs_da.data,
+            dtype=float,
+            shape=(lhs.shape[1] + 1,),
+            rcond=rcond,
+        )
         coeffs = results[:-1, ...]
         residuals = results[-1, ...]
     else:
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 02a4df59794..b59d87f5f9e 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -3238,20 +3238,20 @@ def map_blocks(
 
     def polyfit(
         self,
-        dim : Hashable,
-        deg : int,
-        rcond : float = None,
-        w : Union[Hashable, Any] = None,
-        full : bool = False,
-        cov : bool = False,
-        skipna : bool = False
+        dim: Hashable,
+        deg: int,
+        rcond: float = None,
+        w: Union[Hashable, Any] = None,
+        full: bool = False,
+        cov: bool = False,
+        skipna: bool = False,
     ):
         x = get_clean_interp_index(self, dim)
         order = int(deg) + 1
         lhs = np.vander(x, order, increasing=True)
 
         dims_to_stack = [dimname for dimname in self.dims if dimname != dim]
-        stacked_dim = utils.get_temp_dimname(dims_to_stack, 'stacked')
+        stacked_dim = utils.get_temp_dimname(dims_to_stack, "stacked")
         rhs = self.transpose(dim, *dims_to_stack).stack({stacked_dim: dims_to_stack})
 
         if rcond is None:
@@ -3273,38 +3273,64 @@ def polyfit(
         scale = np.sqrt((lhs * lhs).sum(axis=0))
         lhs /= scale
 
-        coeffs, residuals = duck_array_ops.least_squares(lhs, rhs, rcond=rcond, skipna=skipna)
+        coeffs, residuals = duck_array_ops.least_squares(
+            lhs, rhs, rcond=rcond, skipna=skipna
+        )
+
+        if self.name:
+            name = "{}_".format(self.name)
+        else:
+            name = ""
 
-        degree_dim = utils.get_temp_dimname(dims_to_stack, 'degree')
-        coeffs = DataArray(coeffs / scale[:, np.newaxis], dims=(degree_dim, stacked_dim),
-                           coords={degree_dim: np.arange(order),
-                                   stacked_dim: rhs[stacked_dim]},
-                           name=(self.name or '') + '_polyfit_coefficients'
-                           ).unstack(stacked_dim)
+        degree_dim = utils.get_temp_dimname(dims_to_stack, "degree")
+        coeffs = DataArray(
+            coeffs / scale[:, np.newaxis],
+            dims=(degree_dim, stacked_dim),
+            coords={degree_dim: np.arange(order), stacked_dim: rhs[stacked_dim]},
+            name=name + "_polyfit_coefficients",
+        ).unstack(stacked_dim)
 
         rank = np.linalg.matrix_rank(lhs)
         if rank != order and not full:
-            warnings.warn("Polyfit may be poorly conditioned", np.RankWarning, stacklevel=4)
+            warnings.warn(
+                "Polyfit may be poorly conditioned", np.RankWarning, stacklevel=4
+            )
 
         if full or (cov is True):
-            residuals = DataArray(residuals, dims=(stacked_dim), coords={stacked_dim: rhs[stacked_dim]},
-                                  name=(self.name or '') + '_polyfit_residuals'
-                                  ).unstack(stacked_dim)
+            residuals = DataArray(
+                residuals,
+                dims=(stacked_dim),
+                coords={stacked_dim: rhs[stacked_dim]},
+                name=name + "_polyfit_residuals",
+            ).unstack(stacked_dim)
         if full:
             sing = np.linalg.svd(lhs, compute_uv=False)
-            sing = DataArray(sing, dims=(degree_dim,), coords={degree_dim: np.arange(order)},
-                             name=(self.name or '') + '_polyfit_singular_values')
+            sing = DataArray(
+                sing,
+                dims=(degree_dim,),
+                coords={degree_dim: np.arange(order)},
+                name=name + "_polyfit_singular_values",
+            )
             return coeffs, residuals, rank, sing, rcond
         if cov:
             Vbase = np.linalg.inv(np.dot(lhs.T, lhs))
             Vbase /= np.outer(scale, scale)
-            if cov == 'unscaled':
+            if cov == "unscaled":
                 fac = 1
             else:
                 if x.shape[0] <= order:
-                    raise ValueError("The number of data points must exceed order to scale the covariance matrix.")
+                    raise ValueError(
+                        "The number of data points must exceed order to scale the covariance matrix."
+                    )
                 fac = residuals / (x.shape[0] - order)
-            covariance = DataArray(Vbase, dims=('cov_i', 'cov_j'), name=(self.name or '') + '_polyfit_covariance') * fac
+            covariance = (
+                DataArray(
+                    Vbase,
+                    dims=("cov_i", "cov_j"),
+                    name=name + "_polyfit_covariance",
+                )
+                * fac
+            )
             return coeffs, covariance
         return coeffs
 
@@ -3314,12 +3340,12 @@ def polyfit(
 
 
 def polyval(
-    coord : Union["DataArray", Any],
-    coefficients : Union["DataArray", Any],
-    degree_dim : Union[str, int] = 'degree'
-) -> "DataArray" :
+    coord: Union["DataArray", Any],
+    coefficients: Union["DataArray", Any],
+    degree_dim: Union[str, int] = "degree",
+) -> "DataArray":
     if not isinstance(coord, DataArray):
-        coord = DataArray(coord, dims=('x',), name='x')
+        coord = DataArray(coord, dims=("x",), name="x")
     x = get_clean_interp_index(coord, coord.name)
 
     if isinstance(degree_dim, str):
@@ -3330,9 +3356,11 @@ def polyval(
         else:
             deg_coord = np.arange(coefficients.shape[degree_dim])
 
-    lhs = DataArray(np.vander(x, int(deg_coord.max()) + 1, increasing=True),
-                    dims=(coord.name, degree_dim),
-                    coords={coord.name: coord, degree_dim: np.arange(deg_coord.max() + 1)})
+    lhs = DataArray(
+        np.vander(x, int(deg_coord.max()) + 1, increasing=True),
+        dims=(coord.name, degree_dim),
+        coords={coord.name: coord, degree_dim: np.arange(deg_coord.max() + 1)},
+    )
     return (lhs * coefficients).sum(degree_dim)
 
 
diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py
index 1513f9055dd..201dcd9ffb9 100644
--- a/xarray/core/nputils.py
+++ b/xarray/core/nputils.py
@@ -233,7 +233,9 @@ def least_squares(lhs, rhs, rcond=None, skipna=False):
     coeffs, residuals, _, _ = np.linalg.lstsq(lhs, rhs.data, rcond=rcond)
     if skipna:
         nan_cols = np.nonzero(np.isnan(coeffs[0, :]))[0]
-        out = np.apply_along_axis(_nanpolyfit_1d, 0, rhs.data[:, nan_cols], lhs, rcond=rcond)
+        out = np.apply_along_axis(
+            _nanpolyfit_1d, 0, rhs.data[:, nan_cols], lhs, rcond=rcond
+        )
         coeffs[:, nan_cols] = out[:-1, :]
         residuals[nan_cols] = out[-1, :]
     return coeffs, residuals
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index c41330c4ba8..09b80bdd4d3 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -4154,22 +4154,31 @@ def test_rank(self):
 
     def test_polyfit(self):
         x = np.arange(10)
-        da = DataArray(np.stack((1. + x + 2. * x**2, 1. + 2. * x + 3. * x**2)),
-                       dims=('d', 'x'), coords={'x': x, 'd': [0, 1]})
-        coeffs = da.polyfit('x', 2)
-        expected = DataArray([[1, 1, 2], [1, 2, 3]], dims=('d', 'degree'),
-                             coords={'degree': [0, 1, 2], 'd': [0, 1]}).T
+        da = DataArray(
+            np.stack((1.0 + x + 2.0 * x ** 2, 1.0 + 2.0 * x + 3.0 * x ** 2)),
+            dims=("d", "x"),
+            coords={"x": x, "d": [0, 1]},
+        )
+        coeffs = da.polyfit("x", 2)
+        expected = DataArray(
+            [[1, 1, 2], [1, 2, 3]],
+            dims=("d", "degree"),
+            coords={"degree": [0, 1, 2], "d": [0, 1]},
+        ).T
         assert_allclose(coeffs, expected)
 
         # With NaN
         da[0, 1] = np.nan
-        coeffs = da.polyfit('x', 2)
-        expected_na = DataArray([[np.nan, np.nan, np.nan], [1, 2, 3]], dims=('d', 'degree'),
-                                coords={'degree': [0, 1, 2], 'd': [0, 1]}).T
+        coeffs = da.polyfit("x", 2)
+        expected_na = DataArray(
+            [[np.nan, np.nan, np.nan], [1, 2, 3]],
+            dims=("d", "degree"),
+            coords={"degree": [0, 1, 2], "d": [0, 1]},
+        ).T
         assert_allclose(coeffs, expected_na)
 
         # Skipna + Full output
-        coeffs, resids, rank, sing, rcond = da.polyfit('x', 2, skipna=True, full=True)
+        coeffs, resids, rank, sing, rcond = da.polyfit("x", 2, skipna=True, full=True)
         assert_allclose(coeffs, expected)
         assert rank == np.linalg.matrix_rank(np.vander(x, 3))
         np.testing.assert_almost_equal(resids, [0, 0])
@@ -4177,9 +4186,12 @@ def test_polyfit(self):
 
 def test_polyval():
     x = np.arange(10)
-    da = DataArray(np.stack((1. + x + 2. * x**2, 1. + 2. * x + 3. * x**2)),
-                   dims=('d', 'x'), coords={'x': x, 'd': [0, 1]})
-    coeffs = da.polyfit('x', 2)
+    da = DataArray(
+        np.stack((1.0 + x + 2.0 * x ** 2, 1.0 + 2.0 * x + 3.0 * x ** 2)),
+        dims=("d", "x"),
+        coords={"x": x, "d": [0, 1]},
+    )
+    coeffs = da.polyfit("x", 2)
     da_pv = xr.core.dataarray.polyval(da.x, coeffs)
     assert_allclose(da, da_pv)
 

From 7f26cb17df807f5b9c8e851903a9592e37be7435 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Thu, 30 Jan 2020 12:43:37 -0500
Subject: [PATCH 03/20] Fix failing test

---
 xarray/tests/test_dataarray.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index 09b80bdd4d3..f7863083cb7 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -4193,7 +4193,7 @@ def test_polyval():
     )
     coeffs = da.polyfit("x", 2)
     da_pv = xr.core.dataarray.polyval(da.x, coeffs)
-    assert_allclose(da, da_pv)
+    assert_allclose(da, da_pv.T)
 
 
 @pytest.fixture(params=[1])

From fdc181985a47de84d6ddc9d738bd893863210029 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Thu, 30 Jan 2020 14:27:46 -0500
Subject: [PATCH 04/20] More intelligent skipna switching

---
 xarray/core/dataarray.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index b59d87f5f9e..41fa27615fe 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -31,6 +31,7 @@
     indexing,
     ops,
     pdcompat,
+    pycompat,
     resample,
     rolling,
     utils,
@@ -3244,8 +3245,14 @@ def polyfit(
         w: Union[Hashable, Any] = None,
         full: bool = False,
         cov: bool = False,
-        skipna: bool = False,
+        skipna: bool = None,
     ):
+        if skipna is None:
+            if isinstance(self.data, pycompat.dask_array_type):
+                skipna = True
+            else:
+                skipna = np.any(self.isnull())
+
         x = get_clean_interp_index(self, dim)
         order = int(deg) + 1
         lhs = np.vander(x, order, increasing=True)
@@ -3325,9 +3332,7 @@ def polyfit(
                 fac = residuals / (x.shape[0] - order)
             covariance = (
                 DataArray(
-                    Vbase,
-                    dims=("cov_i", "cov_j"),
-                    name=name + "_polyfit_covariance",
+                    Vbase, dims=("cov_i", "cov_j"), name=name + "_polyfit_covariance",
                 )
                 * fac
             )

From 924f4ebdf5fb8e6af33d6d512b421c4e15302d8f Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Fri, 31 Jan 2020 12:20:17 -0500
Subject: [PATCH 05/20] Add docs | Change coeff order to fit numpy | move
 polyval

---
 xarray/__init__.py               |  3 +-
 xarray/core/computation.py       | 46 ++++++++++++++++++++++++++++++
 xarray/core/dataarray.py         | 48 ++++++++++++++------------------
 xarray/tests/test_computation.py | 12 ++++++++
 xarray/tests/test_dataarray.py   | 27 ++++--------------
 5 files changed, 86 insertions(+), 50 deletions(-)

diff --git a/xarray/__init__.py b/xarray/__init__.py
index 44dc66411c4..28bb901b251 100644
--- a/xarray/__init__.py
+++ b/xarray/__init__.py
@@ -15,7 +15,7 @@
 from .core.alignment import align, broadcast
 from .core.combine import auto_combine, combine_by_coords, combine_nested
 from .core.common import ALL_DIMS, full_like, ones_like, zeros_like
-from .core.computation import apply_ufunc, dot, where
+from .core.computation import apply_ufunc, dot, where, polyval
 from .core.concat import concat
 from .core.dataarray import DataArray
 from .core.dataset import Dataset
@@ -65,6 +65,7 @@
     "open_mfdataset",
     "open_rasterio",
     "open_zarr",
+    "polyval",
     "register_dataarray_accessor",
     "register_dataset_accessor",
     "save_mfdataset",
diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index d2c5c32bc00..d27b1562857 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -1299,3 +1299,49 @@ def where(cond, x, y):
         dataset_join="exact",
         dask="allowed",
     )
+
+
+def polyval(coord, coeffs, degree_dim="degree"):
+    """Evaluate a polynomial at specific values
+
+    Parameters
+    ----------
+    coord : DataArray or array-like
+        The 1D coordinate along which to evaluate the polynomial.
+    coeffs : DataArray or array-like
+        Coefficients of the polynomials.
+    degree_dim : str or int, optional
+        Name of dimension or axis number along which the degree increases in `coeffs`.
+
+    See also
+    --------
+    `xarray.DataArray.polyfit`
+    `numpy.polyval`
+    """
+    from .dataarray import DataArray
+    from .missing import get_clean_interp_index
+
+    if not isinstance(coord, DataArray):
+        coord = DataArray(coord, dims=("x",), name="x")
+    x = get_clean_interp_index(coord, coord.name)
+
+    if isinstance(degree_dim, str):
+        deg_coord = coeffs[degree_dim]
+    else:  # Axis number
+        degree_axis = degree_dim
+        if isinstance(coeffs, DataArray):
+            degree_dim = coeffs.dims[degree_axis]
+            deg_coord = coeffs[degree_dim]
+        else:
+            degree_dim = "degree"
+            deg_coord = np.arange(coeffs.shape[degree_axis])[::-1]
+            coeffs = DataArray(coeffs)
+            coeffs.coords[coeffs.dims[degree_axis]] = deg_coord
+            coeffs = coeffs.rename({coeffs.dims[degree_axis]: degree_dim})
+
+    lhs = DataArray(
+        np.vander(x, int(deg_coord.max()) + 1),
+        dims=(coord.name, degree_dim),
+        coords={coord.name: coord, degree_dim: np.arange(deg_coord.max() + 1)[::-1]},
+    )
+    return (lhs * coeffs).sum(degree_dim)
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 41fa27615fe..6e7c86c05fd 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -3241,12 +3241,28 @@ def polyfit(
         self,
         dim: Hashable,
         deg: int,
+        skipna: bool = None,
         rcond: float = None,
         w: Union[Hashable, Any] = None,
         full: bool = False,
         cov: bool = False,
-        skipna: bool = None,
     ):
+        """Least squares polynomial fit.
+
+        This replicates the behaviour of `numpy.polyfit` but differs by skipping
+        invalid values when `skipna = True`.
+
+        Parameters
+        ----------
+        dim : hashable
+            Coordinate along which to fit the polynomials.
+        skipna : bool, optional
+            If True, removes all invalid values before fitting each 1D slices of the array
+            Defaults is True if data is stored in as dask.array or if there is any
+            invalid values, False otherwise.
+
+        Documentation of `numpy.polyfit` follows:
+        """
         if skipna is None:
             if isinstance(self.data, pycompat.dask_array_type):
                 skipna = True
@@ -3255,7 +3271,7 @@ def polyfit(
 
         x = get_clean_interp_index(self, dim)
         order = int(deg) + 1
-        lhs = np.vander(x, order, increasing=True)
+        lhs = np.vander(x, order)
 
         dims_to_stack = [dimname for dimname in self.dims if dimname != dim]
         stacked_dim = utils.get_temp_dimname(dims_to_stack, "stacked")
@@ -3293,7 +3309,7 @@ def polyfit(
         coeffs = DataArray(
             coeffs / scale[:, np.newaxis],
             dims=(degree_dim, stacked_dim),
-            coords={degree_dim: np.arange(order), stacked_dim: rhs[stacked_dim]},
+            coords={degree_dim: np.arange(order)[::-1], stacked_dim: rhs[stacked_dim]},
             name=name + "_polyfit_coefficients",
         ).unstack(stacked_dim)
 
@@ -3315,7 +3331,7 @@ def polyfit(
             sing = DataArray(
                 sing,
                 dims=(degree_dim,),
-                coords={degree_dim: np.arange(order)},
+                coords={degree_dim: np.arange(order)[::-1]},
                 name=name + "_polyfit_singular_values",
             )
             return coeffs, residuals, rank, sing, rcond
@@ -3344,29 +3360,7 @@ def polyfit(
     str = property(StringAccessor)
 
 
-def polyval(
-    coord: Union["DataArray", Any],
-    coefficients: Union["DataArray", Any],
-    degree_dim: Union[str, int] = "degree",
-) -> "DataArray":
-    if not isinstance(coord, DataArray):
-        coord = DataArray(coord, dims=("x",), name="x")
-    x = get_clean_interp_index(coord, coord.name)
-
-    if isinstance(degree_dim, str):
-        deg_coord = coefficients[degree_dim]
-    else:  # Axis number
-        if isinstance(coefficients, DataArray):
-            deg_coord = coefficients[coefficients.dims[degree_dim]]
-        else:
-            deg_coord = np.arange(coefficients.shape[degree_dim])
-
-    lhs = DataArray(
-        np.vander(x, int(deg_coord.max()) + 1, increasing=True),
-        dims=(coord.name, degree_dim),
-        coords={coord.name: coord, degree_dim: np.arange(deg_coord.max() + 1)},
-    )
-    return (lhs * coefficients).sum(degree_dim)
+DataArray.polyfit.__doc__ += np.polyfit.__doc__
 
 
 # priority most be higher than Variable to properly work with binary ufuncs
diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index 369903552ad..f41b87db594 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -1120,3 +1120,15 @@ def test_where():
     actual = xr.where(cond, 1, 0)
     expected = xr.DataArray([1, 0], dims="x")
     assert_identical(expected, actual)
+
+
+def test_polyval():
+    x = np.arange(10)
+    da = xr.DataArray(
+        np.stack((1.0 + x + 2.0 * x ** 2, 1.0 + 2.0 * x + 3.0 * x ** 2)),
+        dims=("d", "x"),
+        coords={"x": x, "d": [0, 1]},
+    )
+    coeffs = da.polyfit("x", 2)
+    da_pv = xr.polyval(da.x, coeffs)
+    xr.testing.assert_allclose(da, da_pv.T)
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index f7863083cb7..755d57d6216 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -4155,27 +4155,22 @@ def test_rank(self):
     def test_polyfit(self):
         x = np.arange(10)
         da = DataArray(
-            np.stack((1.0 + x + 2.0 * x ** 2, 1.0 + 2.0 * x + 3.0 * x ** 2)),
+            np.stack((1.0 + 1.0 * x + 2.0 * x ** 2, 1.0 + 2.0 * x + 3.0 * x ** 2)),
             dims=("d", "x"),
             coords={"x": x, "d": [0, 1]},
         )
         coeffs = da.polyfit("x", 2)
         expected = DataArray(
-            [[1, 1, 2], [1, 2, 3]],
+            [[2, 1, 1], [3, 2, 1]],
             dims=("d", "degree"),
-            coords={"degree": [0, 1, 2], "d": [0, 1]},
+            coords={"degree": [2, 1, 0], "d": [0, 1]},
         ).T
         assert_allclose(coeffs, expected)
 
         # With NaN
         da[0, 1] = np.nan
-        coeffs = da.polyfit("x", 2)
-        expected_na = DataArray(
-            [[np.nan, np.nan, np.nan], [1, 2, 3]],
-            dims=("d", "degree"),
-            coords={"degree": [0, 1, 2], "d": [0, 1]},
-        ).T
-        assert_allclose(coeffs, expected_na)
+        coeffs = da.polyfit("x", 2, skipna=True)
+        assert_allclose(coeffs, expected)
 
         # Skipna + Full output
         coeffs, resids, rank, sing, rcond = da.polyfit("x", 2, skipna=True, full=True)
@@ -4184,18 +4179,6 @@ def test_polyfit(self):
         np.testing.assert_almost_equal(resids, [0, 0])
 
 
-def test_polyval():
-    x = np.arange(10)
-    da = DataArray(
-        np.stack((1.0 + x + 2.0 * x ** 2, 1.0 + 2.0 * x + 3.0 * x ** 2)),
-        dims=("d", "x"),
-        coords={"x": x, "d": [0, 1]},
-    )
-    coeffs = da.polyfit("x", 2)
-    da_pv = xr.core.dataarray.polyval(da.x, coeffs)
-    assert_allclose(da, da_pv.T)
-
-
 @pytest.fixture(params=[1])
 def da(request):
     if request.param == 1:

From 1f6030a09a113f8c119d1593797a74ce62561efd Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Fri, 31 Jan 2020 13:16:39 -0500
Subject: [PATCH 06/20] Move doc patching to class

---
 xarray/__init__.py                  |  2 +-
 xarray/core/dataarray.py            | 10 +++++-----
 xarray/core/dataset.py              |  2 +-
 xarray/core/missing.py              |  2 +-
 xarray/core/nputils.py              |  1 -
 xarray/tests/test_duck_array_ops.py | 10 +++++-----
 xarray/tests/test_missing.py        |  5 ++---
 7 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/xarray/__init__.py b/xarray/__init__.py
index 28bb901b251..0f6a759e356 100644
--- a/xarray/__init__.py
+++ b/xarray/__init__.py
@@ -15,7 +15,7 @@
 from .core.alignment import align, broadcast
 from .core.combine import auto_combine, combine_by_coords, combine_nested
 from .core.common import ALL_DIMS, full_like, ones_like, zeros_like
-from .core.computation import apply_ufunc, dot, where, polyval
+from .core.computation import apply_ufunc, dot, polyval, where
 from .core.concat import concat
 from .core.dataarray import DataArray
 from .core.dataset import Dataset
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 6e7c86c05fd..0938b930ea9 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -1,3 +1,4 @@
+import datetime
 import functools
 import warnings
 from numbers import Number
@@ -18,7 +19,6 @@
     cast,
 )
 
-import datetime
 import numpy as np
 import pandas as pd
 
@@ -3247,7 +3247,8 @@ def polyfit(
         full: bool = False,
         cov: bool = False,
     ):
-        """Least squares polynomial fit.
+        """
+        Least squares polynomial fit.
 
         This replicates the behaviour of `numpy.polyfit` but differs by skipping
         invalid values when `skipna = True`.
@@ -3355,13 +3356,12 @@ def polyfit(
             return coeffs, covariance
         return coeffs
 
+    polyfit.__doc__ += np.polyfit.__doc__
+
     # this needs to be at the end, or mypy will confuse with `str`
     # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names
     str = property(StringAccessor)
 
 
-DataArray.polyfit.__doc__ += np.polyfit.__doc__
-
-
 # priority most be higher than Variable to properly work with binary ufuncs
 ops.inject_all_ops_and_reduce_methods(DataArray, priority=60)
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index c72ed6cc7d6..07bea6dac19 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -1,4 +1,5 @@
 import copy
+import datetime
 import functools
 import sys
 import warnings
@@ -27,7 +28,6 @@
     cast,
 )
 
-import datetime
 import numpy as np
 import pandas as pd
 
diff --git a/xarray/core/missing.py b/xarray/core/missing.py
index b20441e993c..40f010b3514 100644
--- a/xarray/core/missing.py
+++ b/xarray/core/missing.py
@@ -1,8 +1,8 @@
+import datetime as dt
 import warnings
 from functools import partial
 from numbers import Number
 from typing import Any, Callable, Dict, Hashable, Sequence, Union
-import datetime as dt
 
 import numpy as np
 import pandas as pd
diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py
index 201dcd9ffb9..394b130e93f 100644
--- a/xarray/core/nputils.py
+++ b/xarray/core/nputils.py
@@ -2,7 +2,6 @@
 
 import numpy as np
 import pandas as pd
-
 from numpy.core.multiarray import normalize_axis_index
 
 try:
diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
index 96c883baa67..497a1348be1 100644
--- a/xarray/tests/test_duck_array_ops.py
+++ b/xarray/tests/test_duck_array_ops.py
@@ -1,7 +1,7 @@
+import datetime as dt
 import warnings
 from textwrap import dedent
 
-import datetime as dt
 import numpy as np
 import pandas as pd
 import pytest
@@ -17,13 +17,13 @@
     gradient,
     last,
     mean,
-    rolling_window,
-    stack,
-    where,
-    py_timedelta_to_float,
     np_timedelta64_to_float,
     pd_timedelta_to_float,
+    py_timedelta_to_float,
+    rolling_window,
+    stack,
     timedelta_to_numeric,
+    where,
 )
 from xarray.core.pycompat import dask_array_type
 from xarray.testing import assert_allclose, assert_equal
diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py
index 8d70d9a0fcc..35c71c2854c 100644
--- a/xarray/tests/test_missing.py
+++ b/xarray/tests/test_missing.py
@@ -14,16 +14,15 @@
 )
 from xarray.core.pycompat import dask_array_type
 from xarray.tests import (
+    assert_allclose,
     assert_array_equal,
     assert_equal,
-    assert_allclose,
     raises_regex,
     requires_bottleneck,
+    requires_cftime,
     requires_dask,
     requires_scipy,
-    requires_cftime,
 )
-
 from xarray.tests.test_cftime_offsets import _CFTIME_CALENDARS
 
 

From 0f922e7afbfc628ee0c28e4c1678aefd7c59a02c Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Fri, 31 Jan 2020 15:58:59 -0500
Subject: [PATCH 07/20] conditional doc patching

---
 xarray/core/dataarray.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 0938b930ea9..8ba3c5536ea 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -3356,7 +3356,8 @@ def polyfit(
             return coeffs, covariance
         return coeffs
 
-    polyfit.__doc__ += np.polyfit.__doc__
+    if polyfit.__doc__ is not None:
+        polyfit.__doc__ += np.polyfit.__doc__
 
     # this needs to be at the end, or mypy will confuse with `str`
     # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names

From bfec9c8b58258cbbfdc0f7739ee7cf3c28ed858a Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Fri, 31 Jan 2020 16:48:29 -0500
Subject: [PATCH 08/20] Fix windows fail - more efficient nan skipping

---
 xarray/core/nputils.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py
index 394b130e93f..b7e9fa3c9b0 100644
--- a/xarray/core/nputils.py
+++ b/xarray/core/nputils.py
@@ -229,14 +229,19 @@ def _nanpolyfit_1d(arr, x, rcond=None):
 
 
 def least_squares(lhs, rhs, rcond=None, skipna=False):
-    coeffs, residuals, _, _ = np.linalg.lstsq(lhs, rhs.data, rcond=rcond)
     if skipna:
-        nan_cols = np.nonzero(np.isnan(coeffs[0, :]))[0]
-        out = np.apply_along_axis(
-            _nanpolyfit_1d, 0, rhs.data[:, nan_cols], lhs, rcond=rcond
+        nan_cols = np.any(np.isnan(rhs.data), axis=0)
+        out = np.empty((lhs.shape + 1, rhs.data.shape[1]))
+        out[:, nan_cols] = np.apply_along_axis(
+            _nanpolyfit_1d, 0, rhs.data[:, nan_cols], lhs
         )
-        coeffs[:, nan_cols] = out[:-1, :]
-        residuals[nan_cols] = out[-1, :]
+        out[:-1, ~nan_cols], out[-1, ~nan_cols], _, _ = np.linalg.lstsq(
+            lhs, rhs.data[:, ~nan_cols], rcond=rcond
+        )
+        coeffs = out[:-1, :]
+        residuals = out[-1, :]
+    else:
+        coeffs, residuals, _, _ = np.linalg.lstsq(lhs, rhs.data, rcond=rcond)
     return coeffs, residuals
 
 

From 2d3235a84a034d5106b7bd7f020896a18b575af8 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Fri, 31 Jan 2020 17:31:57 -0500
Subject: [PATCH 09/20] Fix typo in least_squares

---
 xarray/core/nputils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py
index b7e9fa3c9b0..a069d6d9a18 100644
--- a/xarray/core/nputils.py
+++ b/xarray/core/nputils.py
@@ -231,7 +231,7 @@ def _nanpolyfit_1d(arr, x, rcond=None):
 def least_squares(lhs, rhs, rcond=None, skipna=False):
     if skipna:
         nan_cols = np.any(np.isnan(rhs.data), axis=0)
-        out = np.empty((lhs.shape + 1, rhs.data.shape[1]))
+        out = np.empty((lhs.shape[1] + 1, rhs.data.shape[1]))
         out[:, nan_cols] = np.apply_along_axis(
             _nanpolyfit_1d, 0, rhs.data[:, nan_cols], lhs
         )

From 31440aec91afe47c32c9af266ae4751b17d51f05 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Tue, 11 Feb 2020 17:53:10 -0500
Subject: [PATCH 10/20] Move polyfit to dataset

---
 xarray/core/dataarray.py         | 106 ++--------------------
 xarray/core/dataset.py           | 150 +++++++++++++++++++++++++++++++
 xarray/tests/test_computation.py |   2 +-
 xarray/tests/test_dataarray.py   |  16 ++--
 4 files changed, 168 insertions(+), 106 deletions(-)

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 8ba3c5536ea..e2d698d61fa 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -26,12 +26,10 @@
 from . import (
     computation,
     dtypes,
-    duck_array_ops,
     groupby,
     indexing,
     ops,
     pdcompat,
-    pycompat,
     resample,
     rolling,
     utils,
@@ -56,7 +54,6 @@
 from .indexes import Indexes, default_indexes, propagate_indexes
 from .indexing import is_fancy_indexer
 from .merge import PANDAS_TYPES, _extract_indexes_from_coords
-from .missing import get_clean_interp_index
 from .options import OPTIONS
 from .utils import Default, ReprObject, _check_inplace, _default, either_dict_or_kwargs
 from .variable import (
@@ -3262,103 +3259,18 @@ def polyfit(
             Defaults is True if data is stored in as dask.array or if there is any
             invalid values, False otherwise.
 
-        Documentation of `numpy.polyfit` follows:
+        See documentation of `numpy.polyfit`.
+
+        Returns
+        -------
+        A single dataset with the coefficients of the best fit for this DataArray.
+        The residuals, singular values and rank are included if `full` is True.
+        The covariance matrix is included if `full` is False and `cov` is True.
         """
-        if skipna is None:
-            if isinstance(self.data, pycompat.dask_array_type):
-                skipna = True
-            else:
-                skipna = np.any(self.isnull())
-
-        x = get_clean_interp_index(self, dim)
-        order = int(deg) + 1
-        lhs = np.vander(x, order)
-
-        dims_to_stack = [dimname for dimname in self.dims if dimname != dim]
-        stacked_dim = utils.get_temp_dimname(dims_to_stack, "stacked")
-        rhs = self.transpose(dim, *dims_to_stack).stack({stacked_dim: dims_to_stack})
-
-        if rcond is None:
-            rcond = x.shape[0] * np.core.finfo(x.dtype).eps
-
-        # Weights:
-        if w is not None:
-            if isinstance(w, Hashable):
-                w = self[w]
-            w = np.asarray(w)
-            if w.ndim != 1:
-                raise TypeError("Expected a 1-d array for weights.")
-            if w.shape[0] != lhs.shape[0]:
-                raise TypeError("Expected w and {} to have the same length".format(dim))
-            lhs *= w[:, np.newaxis]
-            rhs *= w[:, np.newaxis]
-
-        # Scaling
-        scale = np.sqrt((lhs * lhs).sum(axis=0))
-        lhs /= scale
-
-        coeffs, residuals = duck_array_ops.least_squares(
-            lhs, rhs, rcond=rcond, skipna=skipna
+        return self._to_temp_dataset().polyfit(
+            dim, deg, skipna=skipna, rcond=rcond, w=w, full=full, cov=cov
         )
 
-        if self.name:
-            name = "{}_".format(self.name)
-        else:
-            name = ""
-
-        degree_dim = utils.get_temp_dimname(dims_to_stack, "degree")
-        coeffs = DataArray(
-            coeffs / scale[:, np.newaxis],
-            dims=(degree_dim, stacked_dim),
-            coords={degree_dim: np.arange(order)[::-1], stacked_dim: rhs[stacked_dim]},
-            name=name + "_polyfit_coefficients",
-        ).unstack(stacked_dim)
-
-        rank = np.linalg.matrix_rank(lhs)
-        if rank != order and not full:
-            warnings.warn(
-                "Polyfit may be poorly conditioned", np.RankWarning, stacklevel=4
-            )
-
-        if full or (cov is True):
-            residuals = DataArray(
-                residuals,
-                dims=(stacked_dim),
-                coords={stacked_dim: rhs[stacked_dim]},
-                name=name + "_polyfit_residuals",
-            ).unstack(stacked_dim)
-        if full:
-            sing = np.linalg.svd(lhs, compute_uv=False)
-            sing = DataArray(
-                sing,
-                dims=(degree_dim,),
-                coords={degree_dim: np.arange(order)[::-1]},
-                name=name + "_polyfit_singular_values",
-            )
-            return coeffs, residuals, rank, sing, rcond
-        if cov:
-            Vbase = np.linalg.inv(np.dot(lhs.T, lhs))
-            Vbase /= np.outer(scale, scale)
-            if cov == "unscaled":
-                fac = 1
-            else:
-                if x.shape[0] <= order:
-                    raise ValueError(
-                        "The number of data points must exceed order to scale the covariance matrix."
-                    )
-                fac = residuals / (x.shape[0] - order)
-            covariance = (
-                DataArray(
-                    Vbase, dims=("cov_i", "cov_j"), name=name + "_polyfit_covariance",
-                )
-                * fac
-            )
-            return coeffs, covariance
-        return coeffs
-
-    if polyfit.__doc__ is not None:
-        polyfit.__doc__ += np.polyfit.__doc__
-
     # this needs to be at the end, or mypy will confuse with `str`
     # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names
     str = property(StringAccessor)
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 07bea6dac19..0d56fb0ed90 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -75,6 +75,7 @@
     merge_coordinates_without_align,
     merge_data_and_coords,
 )
+from .missing import get_clean_interp_index
 from .options import OPTIONS, _get_keep_attrs
 from .pycompat import dask_array_type
 from .utils import (
@@ -5690,5 +5691,154 @@ def map_blocks(
 
         return map_blocks(func, self, args, kwargs)
 
+    def polyfit(
+        self,
+        dim: Hashable,
+        deg: int,
+        skipna: bool = None,
+        rcond: float = None,
+        w: Union[Hashable, Any] = None,
+        full: bool = False,
+        cov: bool = False,
+    ):
+        """
+        Least squares polynomial fit.
+
+        This replicates the behaviour of `numpy.polyfit` but differs by skipping
+        invalid values when `skipna = True`.
+
+        Parameters
+        ----------
+        dim : hashable
+            Coordinate along which to fit the polynomials.
+        skipna : bool, optional
+            If True, removes all invalid values before fitting each 1D slices of the array
+            Default is True if data is stored in as dask.array or if there is any
+            invalid values, False otherwise.
+
+        See documentation of `numpy.polyfit`.
+
+        Returns
+        -------
+        A single dataset with the coefficients of the best fit for each variable in this dataset.
+        The residuals, singular values and rank are included if `full` is True.
+        The covariance matrices are included if `full` is False and `cov` is True.
+        """
+        variables = {}
+        skipna_da = skipna
+
+        x = get_clean_interp_index(self, dim)
+        xname = "{}_".format(self[dim].name)
+        order = int(deg) + 1
+        lhs = np.vander(x, order)
+
+        if rcond is None:
+            rcond = x.shape[0] * np.core.finfo(x.dtype).eps
+
+        # Weights:
+        if w is not None:
+            if isinstance(w, Hashable):
+                w = self.coords[w]
+            w = np.asarray(w)
+            if w.ndim != 1:
+                raise TypeError("Expected a 1-d array for weights.")
+            if w.shape[0] != lhs.shape[0]:
+                raise TypeError("Expected w and {} to have the same length".format(dim))
+            lhs *= w[:, np.newaxis]
+
+        # Scaling
+        scale = np.sqrt((lhs * lhs).sum(axis=0))
+        lhs /= scale
+
+        degree_dim = utils.get_temp_dimname(self.dims, "degree")
+
+        rank = np.linalg.matrix_rank(lhs)
+        if rank != order and not full:
+            warnings.warn(
+                "Polyfit may be poorly conditioned", np.RankWarning, stacklevel=4
+            )
+
+        if full:
+            rank = xr.DataArray(rank, name=xname + "matrix_rank")
+            variables[rank.name] = rank
+            sing = np.linalg.svd(lhs, compute_uv=False)
+            sing = xr.DataArray(
+                sing,
+                dims=(degree_dim,),
+                coords={degree_dim: np.arange(order)[::-1]},
+                name=xname + "singular_values",
+            )
+            variables[sing.name] = sing
+
+        for name, da in self.data_vars.items():
+            if dim not in da.coords:
+                continue
+
+            if skipna is None:
+                if isinstance(da.data, dask_array_type):
+                    skipna_da = True
+                else:
+                    skipna_da = np.any(da.isnull())
+
+            dims_to_stack = [dimname for dimname in da.dims if dimname != dim]
+            stacked_dim = utils.get_temp_dimname(dims_to_stack, "stacked")
+            rhs = da.transpose(dim, *dims_to_stack).stack({stacked_dim: dims_to_stack})
+
+            if w is not None:
+                rhs *= w[:, np.newaxis]
+
+            coeffs, residuals = duck_array_ops.least_squares(
+                lhs, rhs, rcond=rcond, skipna=skipna_da
+            )
+
+            if isinstance(name, str):
+                name = "{}_".format(name)
+            else:
+                # Thus a ReprObject => polyfit was called on a DataArray
+                name = ""
+
+            coeffs = xr.DataArray(
+                coeffs / scale[:, np.newaxis],
+                dims=(degree_dim, stacked_dim),
+                coords={
+                    degree_dim: np.arange(order)[::-1],
+                    stacked_dim: rhs[stacked_dim],
+                },
+                name=name + "polyfit_coefficients",
+            ).unstack(stacked_dim)
+            variables[coeffs.name] = coeffs
+
+            if full or (cov is True):
+                residuals = xr.DataArray(
+                    residuals,
+                    dims=(stacked_dim),
+                    coords={stacked_dim: rhs[stacked_dim]},
+                    name=name + "polyfit_residuals",
+                ).unstack(stacked_dim)
+                variables[residuals.name] = residuals
+
+            if cov:
+                Vbase = np.linalg.inv(np.dot(lhs.T, lhs))
+                Vbase /= np.outer(scale, scale)
+                if cov == "unscaled":
+                    fac = 1
+                else:
+                    if x.shape[0] <= order:
+                        raise ValueError(
+                            "The number of data points must exceed order to scale the covariance matrix."
+                        )
+                    fac = residuals / (x.shape[0] - order)
+                covariance = (
+                    xr.DataArray(
+                        Vbase,
+                        dims=("cov_i", "cov_j"),
+                        name=name + "polyfit_covariance",
+                    )
+                    * fac
+                )
+                variables[covariance.name] = covariance
+
+        return Dataset(data_vars=variables, attrs=self.attrs.copy())
+
 
 ops.inject_all_ops_and_reduce_methods(Dataset, array_only=False)
diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index f41b87db594..8d9e72db8bb 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -1130,5 +1130,5 @@ def test_polyval():
         coords={"x": x, "d": [0, 1]},
     )
     coeffs = da.polyfit("x", 2)
-    da_pv = xr.polyval(da.x, coeffs)
+    da_pv = xr.polyval(da.x, coeffs.polyfit_coefficients)
     xr.testing.assert_allclose(da, da_pv.T)
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index 755d57d6216..5b50d1d9adb 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -4159,24 +4159,24 @@ def test_polyfit(self):
             dims=("d", "x"),
             coords={"x": x, "d": [0, 1]},
         )
-        coeffs = da.polyfit("x", 2)
+        out = da.polyfit("x", 2)
         expected = DataArray(
             [[2, 1, 1], [3, 2, 1]],
             dims=("d", "degree"),
             coords={"degree": [2, 1, 0], "d": [0, 1]},
         ).T
-        assert_allclose(coeffs, expected)
+        assert_allclose(out.polyfit_coefficients, expected)
 
         # With NaN
         da[0, 1] = np.nan
-        coeffs = da.polyfit("x", 2, skipna=True)
-        assert_allclose(coeffs, expected)
+        out = da.polyfit("x", 2, skipna=True)
+        assert_allclose(out.polyfit_coefficients, expected)
 
         # Skipna + Full output
-        coeffs, resids, rank, sing, rcond = da.polyfit("x", 2, skipna=True, full=True)
-        assert_allclose(coeffs, expected)
-        assert rank == np.linalg.matrix_rank(np.vander(x, 3))
-        np.testing.assert_almost_equal(resids, [0, 0])
+        out = da.polyfit("x", 2, skipna=True, full=True)
+        assert_allclose(out.polyfit_coefficients, expected)
+        assert out.x_matrix_rank == np.linalg.matrix_rank(np.vander(x, 3))
+        np.testing.assert_almost_equal(out.polyfit_residuals, [0, 0])
 
 
 @pytest.fixture(params=[1])

From da831a5de27a35b15ac2d144bc4dc557036742a9 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Wed, 12 Feb 2020 12:34:53 -0500
Subject: [PATCH 11/20] Add more tests | fix some edge cases

---
 xarray/core/dask_array_ops.py       | 12 ++++++--
 xarray/core/dataset.py              | 15 +++-------
 xarray/core/nputils.py              | 26 +++++++++++------
 xarray/tests/test_computation.py    | 31 ++++++++++++++++----
 xarray/tests/test_dataarray.py      | 45 +++++++++++++++++++++--------
 xarray/tests/test_dataset.py        | 13 +++++++++
 xarray/tests/test_duck_array_ops.py | 18 ++++++++++++
 7 files changed, 120 insertions(+), 40 deletions(-)

diff --git a/xarray/core/dask_array_ops.py b/xarray/core/dask_array_ops.py
index 961de63f664..87f646352eb 100644
--- a/xarray/core/dask_array_ops.py
+++ b/xarray/core/dask_array_ops.py
@@ -102,17 +102,23 @@ def least_squares(lhs, rhs, rcond=None, skipna=False):
 
     lhs_da = da.from_array(lhs, chunks=(rhs.chunks[0], lhs.shape[1]))
     if skipna:
+        added_dim = rhs.ndim == 1
+        if added_dim:
+            rhs = rhs.reshape(rhs.shape[0], 1)
         results = da.apply_along_axis(
             nputils._nanpolyfit_1d,
             0,
-            rhs.data,
-            lhs_da.data,
+            rhs,
+            lhs_da,
             dtype=float,
             shape=(lhs.shape[1] + 1,),
             rcond=rcond,
         )
         coeffs = results[:-1, ...]
         residuals = results[-1, ...]
+        if added_dim:
+            coeffs = coeffs.reshape(coeffs.shape[0])
+            residuals = residuals.reshape(residuals.shape[0])
     else:
-        coeffs, residuals, _, _ = da.linalg.lstsq(lhs_da.data, rhs.data)
+        coeffs, residuals, _, _ = da.linalg.lstsq(lhs_da, rhs)
     return coeffs, residuals
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 0d56fb0ed90..4ebd2fc6f0f 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -5771,7 +5771,7 @@ def polyfit(
             variables[sing.name] = sing
 
         for name, da in self.data_vars.items():
-            if dim not in da.coords:
+            if dim not in da.dims:
                 continue
 
             if skipna is None:
@@ -5788,7 +5788,7 @@ def polyfit(
                 rhs *= w[:, np.newaxis]
 
             coeffs, residuals = duck_array_ops.least_squares(
-                lhs, rhs, rcond=rcond, skipna=skipna_da
+                lhs, rhs.data, rcond=rcond, skipna=skipna_da
             )
 
             if isinstance(name, str):
@@ -5828,15 +5828,8 @@ def polyfit(
                             "The number of data points must exceed order to scale the covariance matrix."
                         )
                     fac = residuals / (x.shape[0] - order)
-                covariance = (
-                    xr.DataArray(
-                        Vbase,
-                        dims=("cov_i", "cov_j"),
-                        name=name + "polyfit_covariance",
-                    )
-                    * fac
-                )
-                variables[covariance.name] = covariance
+                covariance = xr.DataArray(Vbase, dims=("cov_i", "cov_j"),) * fac
+                variables[name + "polyfit_covariance"] = covariance
 
         return Dataset(data_vars=variables, attrs=self.attrs.copy())
 
diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py
index a069d6d9a18..b2cd0ba9b72 100644
--- a/xarray/core/nputils.py
+++ b/xarray/core/nputils.py
@@ -230,18 +230,26 @@ def _nanpolyfit_1d(arr, x, rcond=None):
 
 def least_squares(lhs, rhs, rcond=None, skipna=False):
     if skipna:
-        nan_cols = np.any(np.isnan(rhs.data), axis=0)
-        out = np.empty((lhs.shape[1] + 1, rhs.data.shape[1]))
-        out[:, nan_cols] = np.apply_along_axis(
-            _nanpolyfit_1d, 0, rhs.data[:, nan_cols], lhs
-        )
-        out[:-1, ~nan_cols], out[-1, ~nan_cols], _, _ = np.linalg.lstsq(
-            lhs, rhs.data[:, ~nan_cols], rcond=rcond
-        )
+        added_dim = rhs.ndim == 1
+        if added_dim:
+            rhs = rhs.reshape(rhs.shape[0], 1)
+        nan_cols = np.any(np.isnan(rhs), axis=0)
+        out = np.empty((lhs.shape[1] + 1, rhs.shape[1]))
+        if np.any(nan_cols):
+            out[:, nan_cols] = np.apply_along_axis(
+                _nanpolyfit_1d, 0, rhs[:, nan_cols], lhs
+            )
+        if np.any(~nan_cols):
+            out[:-1, ~nan_cols], out[-1, ~nan_cols], _, _ = np.linalg.lstsq(
+                lhs, rhs[:, ~nan_cols], rcond=rcond
+            )
         coeffs = out[:-1, :]
         residuals = out[-1, :]
+        if added_dim:
+            coeffs = coeffs.reshape(coeffs.shape[0])
+            residuals = residuals.reshape(residuals.shape[0])
     else:
-        coeffs, residuals, _, _ = np.linalg.lstsq(lhs, rhs.data, rcond=rcond)
+        coeffs, residuals, _, _ = np.linalg.lstsq(lhs, rhs, rcond=rcond)
     return coeffs, residuals
 
 
diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index 8d9e72db8bb..01cd4d526fb 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -1122,13 +1122,34 @@ def test_where():
     assert_identical(expected, actual)
 
 
-def test_polyval():
-    x = np.arange(10)
+@pytest.mark.parametrize("use_dask", [True, False])
+@pytest.mark.parametrize("use_datetime", [True, False])
+@pytest.mark.parametrize("provide_coord", [True, False])
+def test_polyval(use_dask, use_datetime, provide_coord):
+    if use_datetime:
+        xcoord = xr.DataArray(
+            pd.date_range("2000-01-01", freq="D", periods=10), dims=("x",), name="x"
+        )
+        x = xr.core.missing.get_clean_interp_index(xcoord, "x")
+    else:
+        xcoord = x = np.arange(10)
+
     da = xr.DataArray(
         np.stack((1.0 + x + 2.0 * x ** 2, 1.0 + 2.0 * x + 3.0 * x ** 2)),
         dims=("d", "x"),
-        coords={"x": x, "d": [0, 1]},
+        coords={"x": xcoord, "d": [0, 1]},
     )
-    coeffs = da.polyfit("x", 2)
-    da_pv = xr.polyval(da.x, coeffs.polyfit_coefficients)
+    coeffs = xr.DataArray(
+        [[2, 1, 1], [3, 2, 1]],
+        dims=("d", "degree"),
+        coords={"d": [0, 1], "degree": [2, 1, 0]},
+    )
+    if use_dask:
+        coeffs = coeffs.chunk({"d": 2})
+    if provide_coord:
+        da_pv = xr.polyval(da.x, coeffs)
+    else:
+        da_pv = xr.polyval(da.x, coeffs.values, degree_dim=1)
+        da_pv = da_pv.rename(dim_0="d")
+        da_pv["d"] = [0, 1]
     xr.testing.assert_allclose(da, da_pv.T)
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index 5b50d1d9adb..c707b625ab6 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -4152,30 +4152,51 @@ def test_rank(self):
         y = DataArray([0.75, 0.25, np.nan, 0.5, 1.0], dims=("z",))
         assert_equal(y.rank("z", pct=True), y)
 
-    def test_polyfit(self):
-        x = np.arange(10)
-        da = DataArray(
-            np.stack((1.0 + 1.0 * x + 2.0 * x ** 2, 1.0 + 2.0 * x + 3.0 * x ** 2)),
+    @pytest.mark.parametrize("use_dask", [True, False])
+    @pytest.mark.parametrize("use_datetime", [True, False])
+    def test_polyfit(self, use_dask, use_datetime):
+        xcoord = xr.DataArray(
+            pd.date_range("1970-01-01", freq="D", periods=10), dims=("x",), name="x"
+        )
+        x = xr.core.missing.get_clean_interp_index(xcoord, "x")
+        if not use_datetime:
+            xcoord = x
+
+        da_raw = DataArray(
+            np.stack(
+                (10 + 1e-15 * x + 2e-28 * x ** 2, 30 + 2e-14 * x + 1e-29 * x ** 2)
+            ),
             dims=("d", "x"),
-            coords={"x": x, "d": [0, 1]},
+            coords={"x": xcoord, "d": [0, 1]},
         )
+
+        if use_dask:
+            da = da_raw.chunk({"d": 1})
+        else:
+            da = da_raw
+
         out = da.polyfit("x", 2)
         expected = DataArray(
-            [[2, 1, 1], [3, 2, 1]],
+            [[2e-28, 1e-15, 10], [1e-29, 2e-14, 30]],
             dims=("d", "degree"),
             coords={"degree": [2, 1, 0], "d": [0, 1]},
         ).T
-        assert_allclose(out.polyfit_coefficients, expected)
+        assert_allclose(out.polyfit_coefficients, expected, rtol=1e-3)
 
         # With NaN
-        da[0, 1] = np.nan
-        out = da.polyfit("x", 2, skipna=True)
-        assert_allclose(out.polyfit_coefficients, expected)
+        da_raw[0, 1] = np.nan
+        if use_dask:
+            da = da_raw.chunk({"d": 1})
+        else:
+            da = da_raw
+        out = da.polyfit("x", 2, skipna=True, cov=True)
+        assert_allclose(out.polyfit_coefficients, expected, rtol=1e-3)
+        assert "polyfit_covariance" in out
 
         # Skipna + Full output
         out = da.polyfit("x", 2, skipna=True, full=True)
-        assert_allclose(out.polyfit_coefficients, expected)
-        assert out.x_matrix_rank == np.linalg.matrix_rank(np.vander(x, 3))
+        assert_allclose(out.polyfit_coefficients, expected, rtol=1e-3)
+        assert out.x_matrix_rank == 3
         np.testing.assert_almost_equal(out.polyfit_residuals, [0, 0])
 
 
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index 4e51e229b29..aaa3e8bb645 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -5438,6 +5438,19 @@ def test_ipython_key_completion(self):
             ds.data_vars[item]  # should not raise
         assert sorted(actual) == sorted(expected)
 
+    def test_polyfit_output(self):
+        ds = create_test_data(seed=1)
+
+        out = ds.polyfit("dim2", 2, full=False)
+        assert "var1_polyfit_coefficients" in out
+
+        out = ds.polyfit("dim1", 2, full=True)
+        assert "var1_polyfit_coefficients" in out
+        assert "dim1_matrix_rank" in out
+
+        out = ds.polyfit("time", 2)
+        assert len(out.data_vars) == 0
+
 
 # Py.test tests
 
diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
index 497a1348be1..9d8a8cbc27f 100644
--- a/xarray/tests/test_duck_array_ops.py
+++ b/xarray/tests/test_duck_array_ops.py
@@ -16,6 +16,7 @@
     first,
     gradient,
     last,
+    least_squares,
     mean,
     np_timedelta64_to_float,
     pd_timedelta_to_float,
@@ -760,3 +761,20 @@ def test_timedelta_to_numeric(td):
     out = timedelta_to_numeric(td, "ns")
     np.testing.assert_allclose(out, 86400 * 1e9)
     assert isinstance(out, float)
+
+
+@pytest.mark.parametrize("use_dask", [True, False])
+@pytest.mark.parametrize("skipna", [True, False])
+def test_least_squares(use_dask, skipna):
+    if use_dask and not has_dask:
+        pytest.skip("requires dask")
+    lhs = np.array([[1, 2], [1, 2], [3, 2]])
+    rhs = DataArray(np.array([3, 5, 7]), dims=("y",))
+
+    if use_dask:
+        rhs = rhs.chunk({"y": 1})
+
+    coeffs, residuals = least_squares(lhs, rhs.data, skipna=skipna)
+
+    np.testing.assert_allclose(coeffs, [1.5, 1.25])
+    np.testing.assert_allclose(residuals, [2.0])

From f13123fff2beedb087136e342d8574873a16733d Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Wed, 12 Feb 2020 13:06:01 -0500
Subject: [PATCH 12/20] Skip test without dask

---
 xarray/tests/test_dataarray.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index c707b625ab6..fc6e999b1fe 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -23,6 +23,7 @@
     assert_array_equal,
     assert_equal,
     assert_identical,
+    has_dask,
     raises_regex,
     requires_bottleneck,
     requires_dask,
@@ -4155,6 +4156,8 @@ def test_rank(self):
     @pytest.mark.parametrize("use_dask", [True, False])
     @pytest.mark.parametrize("use_datetime", [True, False])
     def test_polyfit(self, use_dask, use_datetime):
+        if use_dask and not has_dask:
+            pytest.skip("requires dask")
         xcoord = xr.DataArray(
             pd.date_range("1970-01-01", freq="D", periods=10), dims=("x",), name="x"
         )

From 896313fa4d94fd18510be2a8255721cf9815ca2b Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Wed, 12 Feb 2020 13:57:54 -0500
Subject: [PATCH 13/20] Fix 1D case | add docs

---
 doc/api.rst            |  3 +++
 doc/computation.rst    | 26 ++++++++++++++++++++++++++
 doc/whats-new.rst      |  2 ++
 xarray/core/dataset.py | 36 +++++++++++++++++++++++-------------
 4 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/doc/api.rst b/doc/api.rst
index 9735eb0c708..857e12d4227 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -30,6 +30,7 @@ Top-level functions
    zeros_like
    ones_like
    dot
+   polyval
    map_blocks
    show_versions
    set_options
@@ -171,6 +172,7 @@ Computation
    Dataset.quantile
    Dataset.differentiate
    Dataset.integrate
+   Dataset.polyfit
 
 **Aggregation**:
 :py:attr:`~Dataset.all`
@@ -349,6 +351,7 @@ Computation
    DataArray.quantile
    DataArray.differentiate
    DataArray.integrate
+   DataArray.polyfit
    DataArray.str
 
 **Aggregation**:
diff --git a/doc/computation.rst b/doc/computation.rst
index 1ac30f55ee7..3c6d90d413e 100644
--- a/doc/computation.rst
+++ b/doc/computation.rst
@@ -317,6 +317,32 @@ trapezoidal rule using their coordinates,
     and integration along multidimensional coordinate are not supported.
 
 
+.. _compute.polyfit:
+
+Fitting polynomials
+===================
+
+Xarray objects provide an interface for performing linear or polynomial regressions
+using the least-squares method. :py:meth:`~xarray.DataArray.polyfit` computes the
+best fitting coefficients along a given dimension and for a given order,
+
+.. ipython:: python
+
+    x = np.arange(10)
+    a = xr.DataArray(3 + 4 * x, dims=['x'], coords={'x': x})
+    out = a.polyfit('x', 1, full=True)
+    out
+
+The method outputs a dataset containing the coefficients (and more if `full = True`).
+The inverse operation is done with :py:meth:`~xarray.polyval`,
+
+.. ipython::python
+
+    xr.polyval(x, out.polyfit_coefficients)
+
+.. note::
+    These methods replicate the behaviour of :py:func:`numpy.polyfit` and :py:func:`numpy.polyval`.
+
 .. _compute.broadcasting:
 
 Broadcasting by dimension name
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 4aad6ad3701..926681c8f6d 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -40,6 +40,8 @@ Breaking changes
 
 New Features
 ~~~~~~~~~~~~
+- Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`)
+  By `Pascal Bourgault <https://github.com/aulemahal>`_.
 - :py:meth:`DataArray.sel` and :py:meth:`Dataset.sel` now support :py:class:`pandas.CategoricalIndex`. (:issue:`3669`)
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 - Support using an existing, opened h5netcdf ``File`` with
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 4ebd2fc6f0f..f76ccf41e29 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -5781,8 +5781,17 @@ def polyfit(
                     skipna_da = np.any(da.isnull())
 
             dims_to_stack = [dimname for dimname in da.dims if dimname != dim]
-            stacked_dim = utils.get_temp_dimname(dims_to_stack, "stacked")
-            rhs = da.transpose(dim, *dims_to_stack).stack({stacked_dim: dims_to_stack})
+            stacked_coords = {}
+            if dims_to_stack:
+                stacked_dim = utils.get_temp_dimname(dims_to_stack, "stacked")
+                rhs = da.transpose(dim, *dims_to_stack).stack(
+                    {stacked_dim: dims_to_stack}
+                )
+                stacked_coords = {stacked_dim: rhs[stacked_dim]}
+                scale_da = scale[:, np.newaxis]
+            else:
+                rhs = da
+                scale_da = scale
 
             if w is not None:
                 rhs *= w[:, np.newaxis]
@@ -5798,23 +5807,24 @@ def polyfit(
                 name = ""
 
             coeffs = xr.DataArray(
-                coeffs / scale[:, np.newaxis],
-                dims=(degree_dim, stacked_dim),
-                coords={
-                    degree_dim: np.arange(order)[::-1],
-                    stacked_dim: rhs[stacked_dim],
-                },
+                coeffs / scale_da,
+                dims=[degree_dim] + list(stacked_coords.keys()),
+                coords={degree_dim: np.arange(order)[::-1], **stacked_coords},
                 name=name + "polyfit_coefficients",
-            ).unstack(stacked_dim)
+            )
+            if dims_to_stack:
+                coeffs = coeffs.unstack(stacked_dim)
             variables[coeffs.name] = coeffs
 
             if full or (cov is True):
                 residuals = xr.DataArray(
-                    residuals,
-                    dims=(stacked_dim),
-                    coords={stacked_dim: rhs[stacked_dim]},
+                    residuals if dims_to_stack else residuals.squeeze(),
+                    dims=list(stacked_coords.keys()),
+                    coords=stacked_coords,
                     name=name + "polyfit_residuals",
-                ).unstack(stacked_dim)
+                )
+                if dims_to_stack:
+                    residuals = residuals.unstack(stacked_dim)
                 variables[residuals.name] = residuals
 
             if cov:

From 5b709716c79b03776aae9b3b25a3df0cb6d68f34 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Wed, 12 Feb 2020 14:33:28 -0500
Subject: [PATCH 14/20] skip polyval test without dask

---
 xarray/tests/test_computation.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index 01cd4d526fb..f01ebb5759a 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -1126,6 +1126,8 @@ def test_where():
 @pytest.mark.parametrize("use_datetime", [True, False])
 @pytest.mark.parametrize("provide_coord", [True, False])
 def test_polyval(use_dask, use_datetime, provide_coord):
+    if use_dask and not has_dask:
+        pytest.skip("requires dask")
     if use_datetime:
         xcoord = xr.DataArray(
             pd.date_range("2000-01-01", freq="D", periods=10), dims=("x",), name="x"

From 5af3c64190ace1d6216d06473bc672851ddc7858 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Thu, 13 Feb 2020 13:55:54 -0500
Subject: [PATCH 15/20] Explicit docs | More restrictive polyval

---
 doc/computation.rst              |  4 ++--
 xarray/core/computation.py       | 24 +++++-------------------
 xarray/core/dataarray.py         | 24 ++++++++++++++++++++----
 xarray/core/dataset.py           | 24 ++++++++++++++++++++----
 xarray/tests/test_computation.py | 13 +++++--------
 5 files changed, 52 insertions(+), 37 deletions(-)

diff --git a/doc/computation.rst b/doc/computation.rst
index 3c6d90d413e..84b4d6002a6 100644
--- a/doc/computation.rst
+++ b/doc/computation.rst
@@ -330,7 +330,7 @@ best fitting coefficients along a given dimension and for a given order,
 
     x = np.arange(10)
     a = xr.DataArray(3 + 4 * x, dims=['x'], coords={'x': x})
-    out = a.polyfit('x', 1, full=True)
+    out = a.polyfit(dim='x', deg=1, full=True)
     out
 
 The method outputs a dataset containing the coefficients (and more if `full = True`).
@@ -338,7 +338,7 @@ The inverse operation is done with :py:meth:`~xarray.polyval`,
 
 .. ipython::python
 
-    xr.polyval(x, out.polyfit_coefficients)
+    xr.polyval(coord=x, coeffs=out.polyfit_coefficients)
 
 .. note::
     These methods replicate the behaviour of :py:func:`numpy.polyfit` and :py:func:`numpy.polyval`.
diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index d27b1562857..9096b9fa1d4 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -1306,12 +1306,12 @@ def polyval(coord, coeffs, degree_dim="degree"):
 
     Parameters
     ----------
-    coord : DataArray or array-like
+    coord : DataArray
         The 1D coordinate along which to evaluate the polynomial.
-    coeffs : DataArray or array-like
+    coeffs : DataArray
         Coefficients of the polynomials.
-    degree_dim : str or int, optional
-        Name of dimension or axis number along which the degree increases in `coeffs`.
+    degree_dim : str, optional
+        Name of the polynomial degree dimension in `coeffs`.
 
     See also
     --------
@@ -1321,23 +1321,9 @@ def polyval(coord, coeffs, degree_dim="degree"):
     from .dataarray import DataArray
     from .missing import get_clean_interp_index
 
-    if not isinstance(coord, DataArray):
-        coord = DataArray(coord, dims=("x",), name="x")
     x = get_clean_interp_index(coord, coord.name)
 
-    if isinstance(degree_dim, str):
-        deg_coord = coeffs[degree_dim]
-    else:  # Axis number
-        degree_axis = degree_dim
-        if isinstance(coeffs, DataArray):
-            degree_dim = coeffs.dims[degree_axis]
-            deg_coord = coeffs[degree_dim]
-        else:
-            degree_dim = "degree"
-            deg_coord = np.arange(coeffs.shape[degree_axis])[::-1]
-            coeffs = DataArray(coeffs)
-            coeffs.coords[coeffs.dims[degree_axis]] = deg_coord
-            coeffs = coeffs.rename({coeffs.dims[degree_axis]: degree_dim})
+    deg_coord = coeffs[degree_dim]
 
     lhs = DataArray(
         np.vander(x, int(deg_coord.max()) + 1),
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index e2d698d61fa..c6a786a9351 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -3254,18 +3254,34 @@ def polyfit(
         ----------
         dim : hashable
             Coordinate along which to fit the polynomials.
+        deg : int
+            Degree of the fitting polynomial.
         skipna : bool, optional
             If True, removes all invalid values before fitting each 1D slices of the array
-            Defaults is True if data is stored in as dask.array or if there is any
+            Default is True if data is stored in as dask.array or if there is any
             invalid values, False otherwise.
+        rcond : float, optional
+            Relative condition number to the fit.
+        w : Union[Hashable, Any], optional
+            Weights to apply to the y-coordinate of the sample points.
+            Can be an array-like object or the name of a coordinate in the dataset.
+        full : bool, optional
+            Whether to return the residuals, matrix rank and singular values in addition
+            to the coefficients.
+        cov : Union[bool, str], optional
+            Whether to return to the covariance matrix in addition to the coefficients.
+            The matrix is not scaled if `cov = 'unscaled'`.
 
         See documentation of `numpy.polyfit`.
 
         Returns
         -------
-        A single dataset with the coefficients of the best fit for this DataArray.
-        The residuals, singular values and rank are included if `full` is True.
-        The covariance matrix is included if `full` is False and `cov` is True.
+        A single dataset which containts:
+            polyfit_coefficients : The coefficients of the best fit.
+            polyfit_residuals : The residuals of the least-square computation (only included if `full=True`)
+            [dim]_matrix_rank : The effective rank of the scaled Vandermonde coefficient matrix (only included if `full=True`)
+            [dim]_singular_values : The singular values of the scaled Vandermonde coefficient matrix (only included if `full=True`)
+            polyfit_covariance : The covariance matrix of the polynomial coefficient estimates (only included if `full=False` and `cov=True`)
         """
         return self._to_temp_dataset().polyfit(
             dim, deg, skipna=skipna, rcond=rcond, w=w, full=full, cov=cov
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index f76ccf41e29..3675c76e705 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -5699,7 +5699,7 @@ def polyfit(
         rcond: float = None,
         w: Union[Hashable, Any] = None,
         full: bool = False,
-        cov: bool = False,
+        cov: Union[bool, str] = False,
     ):
         """
         Least squares polynomial fit.
@@ -5711,18 +5711,34 @@ def polyfit(
         ----------
         dim : hashable
             Coordinate along which to fit the polynomials.
+        deg : int
+            Degree of the fitting polynomial.
         skipna : bool, optional
             If True, removes all invalid values before fitting each 1D slices of the array
             Default is True if data is stored in as dask.array or if there is any
             invalid values, False otherwise.
+        rcond : float, optional
+            Relative condition number to the fit.
+        w : Union[Hashable, Any], optional
+            Weights to apply to the y-coordinate of the sample points.
+            Can be an array-like object or the name of a coordinate in the dataset.
+        full : bool, optional
+            Whether to return the residuals, matrix rank and singular values in addition
+            to the coefficients.
+        cov : Union[bool, str], optional
+            Whether to return to the covariance matrix in addition to the coefficients.
+            The matrix is not scaled if `cov = 'unscaled'`.
 
         See documentation of `numpy.polyfit`.
 
         Returns
         -------
-        A single dataset with the coefficients of the best fit for each variable in this dataset.
-        The residuals, singular values and rank are included if `full` is True.
-        The covariance matrices are included if `full` is False and `cov` is True.
+        A single dataset which containts:
+            [var_]polyfit_coefficients : The coefficients of the best fit for each variable in this dataset.
+            [var_]polyfit_residuals : The residuals of the least-square computation for each variable (only included if `full=True`)
+            [dim]_matrix_rank : The effective rank of the scaled Vandermonde coefficient matrix (only included if `full=True`)
+            [dim]_singular_values : The singular values of the scaled Vandermonde coefficient matrix (only included if `full=True`)
+            [var_]polyfit_covariance : The covariance matrix of the polynomial coefficient estimates (only included if `full=False` and `cov=True`)
         """
         variables = {}
         skipna_da = skipna
diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index f01ebb5759a..4eed464d2dc 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -1124,10 +1124,10 @@ def test_where():
 
 @pytest.mark.parametrize("use_dask", [True, False])
 @pytest.mark.parametrize("use_datetime", [True, False])
-@pytest.mark.parametrize("provide_coord", [True, False])
-def test_polyval(use_dask, use_datetime, provide_coord):
+def test_polyval(use_dask, use_datetime):
     if use_dask and not has_dask:
         pytest.skip("requires dask")
+
     if use_datetime:
         xcoord = xr.DataArray(
             pd.date_range("2000-01-01", freq="D", periods=10), dims=("x",), name="x"
@@ -1148,10 +1148,7 @@ def test_polyval(use_dask, use_datetime, provide_coord):
     )
     if use_dask:
         coeffs = coeffs.chunk({"d": 2})
-    if provide_coord:
-        da_pv = xr.polyval(da.x, coeffs)
-    else:
-        da_pv = xr.polyval(da.x, coeffs.values, degree_dim=1)
-        da_pv = da_pv.rename(dim_0="d")
-        da_pv["d"] = [0, 1]
+
+    da_pv = xr.polyval(da.x, coeffs)
+
     xr.testing.assert_allclose(da, da_pv.T)

From 3400dea560267f39d51eedc5159b6728b4d68d71 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Fri, 13 Mar 2020 12:39:24 -0400
Subject: [PATCH 16/20] Small typo in polyfit docstrings

---
 xarray/core/dataarray.py | 2 +-
 xarray/core/dataset.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index ec77a666894..8668f7fab89 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -3269,7 +3269,7 @@ def polyfit(
             Degree of the fitting polynomial.
         skipna : bool, optional
             If True, removes all invalid values before fitting each 1D slices of the array
-            Default is True if data is stored in as dask.array or if there is any
+            Default is True if data is stored in a dask.array or if there is any
             invalid values, False otherwise.
         rcond : float, optional
             Relative condition number to the fit.
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index eda08eea08a..168f1755f47 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -5749,7 +5749,7 @@ def polyfit(
             Degree of the fitting polynomial.
         skipna : bool, optional
             If True, removes all invalid values before fitting each 1D slices of the array
-            Default is True if data is stored in as dask.array or if there is any
+            Default is True if data is stored in a dask.array or if there is any
             invalid values, False otherwise.
         rcond : float, optional
             Relative condition number to the fit.

From 198ff81eaa540f3f0cb119dba6225ee0b9c7d223 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <pascal.bourgault@gmail.com>
Date: Fri, 13 Mar 2020 13:53:30 -0400
Subject: [PATCH 17/20] Apply suggestions from code review

Co-Authored-By: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
---
 doc/computation.rst        | 4 ++--
 xarray/core/computation.py | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/computation.rst b/doc/computation.rst
index 84b4d6002a6..291172edca1 100644
--- a/doc/computation.rst
+++ b/doc/computation.rst
@@ -333,10 +333,10 @@ best fitting coefficients along a given dimension and for a given order,
     out = a.polyfit(dim='x', deg=1, full=True)
     out
 
-The method outputs a dataset containing the coefficients (and more if `full = True`).
+The method outputs a dataset containing the coefficients (and more if `full=True`).
 The inverse operation is done with :py:meth:`~xarray.polyval`,
 
-.. ipython::python
+.. ipython:: python
 
     xr.polyval(coord=x, coeffs=out.polyfit_coefficients)
 
diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index 9096b9fa1d4..9aefd8e5d5e 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -1310,13 +1310,13 @@ def polyval(coord, coeffs, degree_dim="degree"):
         The 1D coordinate along which to evaluate the polynomial.
     coeffs : DataArray
         Coefficients of the polynomials.
-    degree_dim : str, optional
+    degree_dim : str, default "degree"
         Name of the polynomial degree dimension in `coeffs`.
 
     See also
     --------
-    `xarray.DataArray.polyfit`
-    `numpy.polyval`
+    xarray.DataArray.polyfit
+    numpy.polyval
     """
     from .dataarray import DataArray
     from .missing import get_clean_interp_index

From 34726a14127107db93dbfa8e165dfa0227043886 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Fri, 13 Mar 2020 16:09:51 -0400
Subject: [PATCH 18/20] Polyfit : fix style in docstring | add see also section

---
 xarray/core/dataarray.py | 6 +++++-
 xarray/core/dataset.py   | 6 +++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 8668f7fab89..aeb16eed46f 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -3281,7 +3281,7 @@ def polyfit(
             to the coefficients.
         cov : Union[bool, str], optional
             Whether to return to the covariance matrix in addition to the coefficients.
-            The matrix is not scaled if `cov = 'unscaled'`.
+            The matrix is not scaled if `cov='unscaled'`.
 
         See documentation of `numpy.polyfit`.
 
@@ -3293,6 +3293,10 @@ def polyfit(
             [dim]_matrix_rank : The effective rank of the scaled Vandermonde coefficient matrix (only included if `full=True`)
             [dim]_singular_values : The singular values of the scaled Vandermonde coefficient matrix (only included if `full=True`)
             polyfit_covariance : The covariance matrix of the polynomial coefficient estimates (only included if `full=False` and `cov=True`)
+
+        See also
+        --------
+        numpy.polyfit
         """
         return self._to_temp_dataset().polyfit(
             dim, deg, skipna=skipna, rcond=rcond, w=w, full=full, cov=cov
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 168f1755f47..0e883979ddf 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -5761,7 +5761,7 @@ def polyfit(
             to the coefficients.
         cov : Union[bool, str], optional
             Whether to return to the covariance matrix in addition to the coefficients.
-            The matrix is not scaled if `cov = 'unscaled'`.
+            The matrix is not scaled if `cov='unscaled'`.
 
         See documentation of `numpy.polyfit`.
 
@@ -5773,6 +5773,10 @@ def polyfit(
             [dim]_matrix_rank : The effective rank of the scaled Vandermonde coefficient matrix (only included if `full=True`)
             [dim]_singular_values : The singular values of the scaled Vandermonde coefficient matrix (only included if `full=True`)
             [var_]polyfit_covariance : The covariance matrix of the polynomial coefficient estimates (only included if `full=False` and `cov=True`)
+
+        See also
+        --------
+        numpy.polyfit
         """
         variables = {}
         skipna_da = skipna

From 3f7fe4c43972883c3e7e18eeb68d833c70a57697 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Fri, 13 Mar 2020 17:49:31 -0400
Subject: [PATCH 19/20] Clean up docstrings and documentation.

---
 doc/computation.rst      |  2 +-
 xarray/core/dataarray.py | 23 ++++++++++++++---------
 xarray/core/dataset.py   | 22 ++++++++++++++--------
 3 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/doc/computation.rst b/doc/computation.rst
index 291172edca1..520a2261e31 100644
--- a/doc/computation.rst
+++ b/doc/computation.rst
@@ -328,7 +328,7 @@ best fitting coefficients along a given dimension and for a given order,
 
 .. ipython:: python
 
-    x = np.arange(10)
+    x = xr.DataArray(np.arange(10), dims=['x'], name='x')
     a = xr.DataArray(3 + 4 * x, dims=['x'], coords={'x': x})
     out = a.polyfit(dim='x', deg=1, full=True)
     out
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index aeb16eed46f..b923f9b15e3 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -3268,7 +3268,7 @@ def polyfit(
         deg : int
             Degree of the fitting polynomial.
         skipna : bool, optional
-            If True, removes all invalid values before fitting each 1D slices of the array
+            If True, removes all invalid values before fitting each 1D slices of the array.
             Default is True if data is stored in a dask.array or if there is any
             invalid values, False otherwise.
         rcond : float, optional
@@ -3283,16 +3283,21 @@ def polyfit(
             Whether to return to the covariance matrix in addition to the coefficients.
             The matrix is not scaled if `cov='unscaled'`.
 
-        See documentation of `numpy.polyfit`.
-
         Returns
         -------
-        A single dataset which containts:
-            polyfit_coefficients : The coefficients of the best fit.
-            polyfit_residuals : The residuals of the least-square computation (only included if `full=True`)
-            [dim]_matrix_rank : The effective rank of the scaled Vandermonde coefficient matrix (only included if `full=True`)
-            [dim]_singular_values : The singular values of the scaled Vandermonde coefficient matrix (only included if `full=True`)
-            polyfit_covariance : The covariance matrix of the polynomial coefficient estimates (only included if `full=False` and `cov=True`)
+        polyfit_results : Dataset
+            A single dataset which contains:
+
+            polyfit_coefficients
+                The coefficients of the best fit.
+            polyfit_residuals
+                The residuals of the least-square computation (only included if `full=True`)
+            [dim]_matrix_rank
+                The effective rank of the scaled Vandermonde coefficient matrix (only included if `full=True`)
+            [dim]_singular_value
+                The singular values of the scaled Vandermonde coefficient matrix (only included if `full=True`)
+            polyfit_covariance
+                The covariance matrix of the polynomial coefficient estimates (only included if `full=False` and `cov=True`)
 
         See also
         --------
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 0e883979ddf..b40773c75f3 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -5748,7 +5748,7 @@ def polyfit(
         deg : int
             Degree of the fitting polynomial.
         skipna : bool, optional
-            If True, removes all invalid values before fitting each 1D slices of the array
+            If True, removes all invalid values before fitting each 1D slices of the array.
             Default is True if data is stored in a dask.array or if there is any
             invalid values, False otherwise.
         rcond : float, optional
@@ -5763,16 +5763,22 @@ def polyfit(
             Whether to return to the covariance matrix in addition to the coefficients.
             The matrix is not scaled if `cov='unscaled'`.
 
-        See documentation of `numpy.polyfit`.
 
         Returns
         -------
-        A single dataset which containts:
-            [var_]polyfit_coefficients : The coefficients of the best fit for each variable in this dataset.
-            [var_]polyfit_residuals : The residuals of the least-square computation for each variable (only included if `full=True`)
-            [dim]_matrix_rank : The effective rank of the scaled Vandermonde coefficient matrix (only included if `full=True`)
-            [dim]_singular_values : The singular values of the scaled Vandermonde coefficient matrix (only included if `full=True`)
-            [var_]polyfit_covariance : The covariance matrix of the polynomial coefficient estimates (only included if `full=False` and `cov=True`)
+        polyfit_results : Dataset
+            A single dataset which contains (for each "var" in the input dataset):
+
+            [var]_polyfit_coefficients
+                The coefficients of the best fit for each variable in this dataset.
+            [var]_polyfit_residuals
+                The residuals of the least-square computation for each variable (only included if `full=True`)
+            [dim]_matrix_rank
+                The effective rank of the scaled Vandermonde coefficient matrix (only included if `full=True`)
+            [dim]_singular_values
+                The singular values of the scaled Vandermonde coefficient matrix (only included if `full=True`)
+            [var]_polyfit_covariance
+                The covariance matrix of the polynomial coefficient estimates (only included if `full=False` and `cov=True`)
 
         See also
         --------

From 7eeba59ff487d5bc51809da4ae824e7283b5b2aa Mon Sep 17 00:00:00 2001
From: Phobos <pascal.bourgault@gmail.com>
Date: Wed, 25 Mar 2020 10:37:46 -0400
Subject: [PATCH 20/20] Move whats new entry to 0.16 | fix PEP8 issue in
 test_dataarray

---
 doc/whats-new.rst              | 4 ++--
 xarray/tests/test_dataarray.py | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 3f1a3c6e993..2a9ee846b28 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -24,6 +24,8 @@ Breaking changes
 
 New Features
 ~~~~~~~~~~~~
+- Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`)
+  By `Pascal Bourgault <https://github.com/aulemahal>`_.
 - Control over attributes of result in :py:func:`merge`, :py:func:`concat`,
   :py:func:`combine_by_coords` and :py:func:`combine_nested` using
   combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`)
@@ -212,8 +214,6 @@ Breaking changes
 
 New Features
 ~~~~~~~~~~~~
-- Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`)
-  By `Pascal Bourgault <https://github.com/aulemahal>`_.
 - Implement :py:meth:`DataArray.pad` and :py:meth:`Dataset.pad`. (:issue:`2605`, :pull:`3596`).
   By `Mark Boer <https://github.com/mark-boer>`_.
 - :py:meth:`DataArray.sel` and :py:meth:`Dataset.sel` now support :py:class:`pandas.CategoricalIndex`. (:issue:`3669`)
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index 6416ca877bc..e23ff2f7e31 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -4240,6 +4240,7 @@ def test_polyfit(self, use_dask, use_datetime):
         assert_allclose(out.polyfit_coefficients, expected, rtol=1e-3)
         assert out.x_matrix_rank == 3
         np.testing.assert_almost_equal(out.polyfit_residuals, [0, 0])
+
     def test_pad_constant(self):
         ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5))
         actual = ar.pad(dim_0=(1, 3))