pydata · max-sixty · Mar 25, 2020 · Jan 30, 2020 · Jan 30, 2020 · Jan 30, 2020
diff --git a/xarray/__init__.py b/xarray/__init__.py
@@ -15,7 +15,7 @@
 from .core.alignment import align, broadcast
 from .core.combine import auto_combine, combine_by_coords, combine_nested
 from .core.common import ALL_DIMS, full_like, ones_like, zeros_like
-from .core.computation import apply_ufunc, dot, where
+from .core.computation import apply_ufunc, dot, polyval, where
 from .core.concat import concat
 from .core.dataarray import DataArray
 from .core.dataset import Dataset
@@ -65,6 +65,7 @@
     "open_mfdataset",
     "open_rasterio",
     "open_zarr",
+    "polyval",
     "register_dataarray_accessor",
     "register_dataset_accessor",
     "save_mfdataset",

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
@@ -1299,3 +1299,49 @@ def where(cond, x, y):
         dataset_join="exact",
         dask="allowed",
     )
+
+
+def polyval(coord, coeffs, degree_dim="degree"):
+    """Evaluate a polynomial at specific values
+
+    Parameters
+    ----------
+    coord : DataArray or array-like
+        The 1D coordinate along which to evaluate the polynomial.
+    coeffs : DataArray or array-like
+        Coefficients of the polynomials.
+    degree_dim : str or int, optional
+        Name of dimension or axis number along which the degree increases in `coeffs`.
+
+    See also
+    --------
+    `xarray.DataArray.polyfit`
+    `numpy.polyval`
+    """
+    from .dataarray import DataArray
+    from .missing import get_clean_interp_index
+
+    if not isinstance(coord, DataArray):
+        coord = DataArray(coord, dims=("x",), name="x")
+    x = get_clean_interp_index(coord, coord.name)
+
+    if isinstance(degree_dim, str):
+        deg_coord = coeffs[degree_dim]
+    else:  # Axis number
+        degree_axis = degree_dim
+        if isinstance(coeffs, DataArray):
+            degree_dim = coeffs.dims[degree_axis]
+            deg_coord = coeffs[degree_dim]
+        else:
+            degree_dim = "degree"
+            deg_coord = np.arange(coeffs.shape[degree_axis])[::-1]
+            coeffs = DataArray(coeffs)
+            coeffs.coords[coeffs.dims[degree_axis]] = deg_coord
+            coeffs = coeffs.rename({coeffs.dims[degree_axis]: degree_dim})
+
+    lhs = DataArray(
+        np.vander(x, int(deg_coord.max()) + 1),
+        dims=(coord.name, degree_dim),
+        coords={coord.name: coord, degree_dim: np.arange(deg_coord.max() + 1)[::-1]},
+    )
+    return (lhs * coeffs).sum(degree_dim)
diff --git a/xarray/core/dask_array_ops.py b/xarray/core/dask_array_ops.py
@@ -95,3 +95,24 @@ def func(x, window, axis=-1):
     # crop boundary.
     index = (slice(None),) * axis + (slice(drop_size, drop_size + orig_shape[axis]),)
     return out[index]
+
+
+def least_squares(lhs, rhs, rcond=None, skipna=False):
+    import dask.array as da
+
+    lhs_da = da.from_array(lhs, chunks=(rhs.chunks[0], lhs.shape[1]))
+    if skipna:
+        results = da.apply_along_axis(
+            nputils._nanpolyfit_1d,
+            0,
+            rhs.data,
+            lhs_da.data,
+            dtype=float,
+            shape=(lhs.shape[1] + 1,),
+            rcond=rcond,
+        )
+        coeffs = results[:-1, ...]
+        residuals = results[-1, ...]
+    else:
+        coeffs, residuals, _, _ = da.linalg.lstsq(lhs_da.data, rhs.data)
+    return coeffs, residuals
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -1,3 +1,4 @@
+import datetime
 import functools
 import warnings
 from numbers import Number
@@ -18,18 +19,19 @@
     cast,
 )
 
-import datetime
 import numpy as np
 import pandas as pd
 
 from ..plot.plot import _PlotMethods
 from . import (
     computation,
     dtypes,
+    duck_array_ops,
     groupby,
     indexing,
     ops,
     pdcompat,
+    pycompat,
     resample,
     rolling,
     utils,
@@ -54,6 +56,7 @@
 from .indexes import Indexes, default_indexes, propagate_indexes
 from .indexing import is_fancy_indexer
 from .merge import PANDAS_TYPES, _extract_indexes_from_coords
+from .missing import get_clean_interp_index
 from .options import OPTIONS
 from .utils import Default, ReprObject, _check_inplace, _default, either_dict_or_kwargs
 from .variable import (
@@ -3234,6 +3237,128 @@ def map_blocks(
 
         return map_blocks(func, self, args, kwargs)
 
+    def polyfit(
+        self,
+        dim: Hashable,
+        deg: int,
+        skipna: bool = None,
+        rcond: float = None,
+        w: Union[Hashable, Any] = None,
+        full: bool = False,
+        cov: bool = False,
+    ):
+        """
+        Least squares polynomial fit.
+
+        This replicates the behaviour of `numpy.polyfit` but differs by skipping
+        invalid values when `skipna = True`.
+
+        Parameters
+        ----------
+        dim : hashable
+            Coordinate along which to fit the polynomials.
+        skipna : bool, optional
+            If True, removes all invalid values before fitting each 1D slices of the array
+            Defaults is True if data is stored in as dask.array or if there is any
+            invalid values, False otherwise.
+
+        Documentation of `numpy.polyfit` follows:
+        """
+        if skipna is None:
+            if isinstance(self.data, pycompat.dask_array_type):
+                skipna = True
+            else:
+                skipna = np.any(self.isnull())
+
+        x = get_clean_interp_index(self, dim)
+        order = int(deg) + 1
+        lhs = np.vander(x, order)
+
+        dims_to_stack = [dimname for dimname in self.dims if dimname != dim]
+        stacked_dim = utils.get_temp_dimname(dims_to_stack, "stacked")
+        rhs = self.transpose(dim, *dims_to_stack).stack({stacked_dim: dims_to_stack})
+
+        if rcond is None:
+            rcond = x.shape[0] * np.core.finfo(x.dtype).eps
+
+        # Weights:
+        if w is not None:
+            if isinstance(w, Hashable):
+                w = self[w]
+            w = np.asarray(w)
+            if w.ndim != 1:
+                raise TypeError("Expected a 1-d array for weights.")
+            if w.shape[0] != lhs.shape[0]:
+                raise TypeError("Expected w and {} to have the same length".format(dim))
+            lhs *= w[:, np.newaxis]
+            rhs *= w[:, np.newaxis]
+
+        # Scaling
+        scale = np.sqrt((lhs * lhs).sum(axis=0))
+        lhs /= scale
+
+        coeffs, residuals = duck_array_ops.least_squares(
+            lhs, rhs, rcond=rcond, skipna=skipna
+        )
+
+        if self.name:
+            name = "{}_".format(self.name)
+        else:
+            name = ""
+
+        degree_dim = utils.get_temp_dimname(dims_to_stack, "degree")
+        coeffs = DataArray(
+            coeffs / scale[:, np.newaxis],
+            dims=(degree_dim, stacked_dim),
+            coords={degree_dim: np.arange(order)[::-1], stacked_dim: rhs[stacked_dim]},
+            name=name + "_polyfit_coefficients",
+        ).unstack(stacked_dim)
+
+        rank = np.linalg.matrix_rank(lhs)
+        if rank != order and not full:
+            warnings.warn(
+                "Polyfit may be poorly conditioned", np.RankWarning, stacklevel=4
+            )
+
+        if full or (cov is True):
+            residuals = DataArray(
+                residuals,
+                dims=(stacked_dim),
+                coords={stacked_dim: rhs[stacked_dim]},
+                name=name + "_polyfit_residuals",
+            ).unstack(stacked_dim)
+        if full:
+            sing = np.linalg.svd(lhs, compute_uv=False)
+            sing = DataArray(
+                sing,
+                dims=(degree_dim,),
+                coords={degree_dim: np.arange(order)[::-1]},
+                name=name + "_polyfit_singular_values",
+            )
+            return coeffs, residuals, rank, sing, rcond
+        if cov:
+            Vbase = np.linalg.inv(np.dot(lhs.T, lhs))
+            Vbase /= np.outer(scale, scale)
+            if cov == "unscaled":
+                fac = 1
+            else:
+                if x.shape[0] <= order:
+                    raise ValueError(
+                        "The number of data points must exceed order to scale the covariance matrix."
+                    )
+                fac = residuals / (x.shape[0] - order)
+            covariance = (
+                DataArray(
+                    Vbase, dims=("cov_i", "cov_j"), name=name + "_polyfit_covariance",
+                )
+                * fac
+            )
+            return coeffs, covariance
+        return coeffs
+
+    if polyfit.__doc__ is not None:
+        polyfit.__doc__ += np.polyfit.__doc__
+
     # this needs to be at the end, or mypy will confuse with `str`
     # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names
     str = property(StringAccessor)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -1,4 +1,5 @@
 import copy
+import datetime
 import functools
 import sys
 import warnings
@@ -27,7 +28,6 @@
     cast,
 )
 
-import datetime
 import numpy as np
 import pandas as pd
 

diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
@@ -600,3 +600,12 @@ def rolling_window(array, axis, window, center, fill_value):
         return dask_array_ops.rolling_window(array, axis, window, center, fill_value)
     else:  # np.ndarray
         return nputils.rolling_window(array, axis, window, center, fill_value)
+
+
+def least_squares(lhs, rhs, rcond=None, skipna=False):
+    """Return the coefficients and residuals of a least-squares fit.
+    """
+    if isinstance(rhs, dask_array_type):
+        return dask_array_ops.least_squares(lhs, rhs, rcond=rcond, skipna=skipna)
+    else:
+        return nputils.least_squares(lhs, rhs, rcond=rcond, skipna=skipna)
diff --git a/xarray/core/missing.py b/xarray/core/missing.py
@@ -1,8 +1,8 @@
+import datetime as dt
 import warnings
 from functools import partial
 from numbers import Number
 from typing import Any, Callable, Dict, Hashable, Sequence, Union
-import datetime as dt
 
 import numpy as np
 import pandas as pd

diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py
@@ -2,7 +2,6 @@
 
 import numpy as np
 import pandas as pd
-
 from numpy.core.multiarray import normalize_axis_index
 
 try:
@@ -221,6 +220,31 @@ def f(values, axis=None, **kwargs):
     return f
 
 
+def _nanpolyfit_1d(arr, x, rcond=None):
+    out = np.full((x.shape[1] + 1,), np.nan)
+    mask = np.isnan(arr)
+    if not np.all(mask):
+        out[:-1], out[-1], _, _ = np.linalg.lstsq(x[~mask, :], arr[~mask], rcond=rcond)
+    return out
+
+
+def least_squares(lhs, rhs, rcond=None, skipna=False):
+    if skipna:
+        nan_cols = np.any(np.isnan(rhs.data), axis=0)
+        out = np.empty((lhs.shape[1] + 1, rhs.data.shape[1]))
+        out[:, nan_cols] = np.apply_along_axis(
+            _nanpolyfit_1d, 0, rhs.data[:, nan_cols], lhs
+        )
+        out[:-1, ~nan_cols], out[-1, ~nan_cols], _, _ = np.linalg.lstsq(
+            lhs, rhs.data[:, ~nan_cols], rcond=rcond
+        )
+        coeffs = out[:-1, :]
+        residuals = out[-1, :]
+    else:
+        coeffs, residuals, _, _ = np.linalg.lstsq(lhs, rhs.data, rcond=rcond)
+    return coeffs, residuals
+
+
 nanmin = _create_bottleneck_method("nanmin")
 nanmax = _create_bottleneck_method("nanmax")
 nanmean = _create_bottleneck_method("nanmean")

diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
@@ -1120,3 +1120,15 @@ def test_where():
     actual = xr.where(cond, 1, 0)
     expected = xr.DataArray([1, 0], dims="x")
     assert_identical(expected, actual)
+
+
+def test_polyval():
+    x = np.arange(10)
+    da = xr.DataArray(
+        np.stack((1.0 + x + 2.0 * x ** 2, 1.0 + 2.0 * x + 3.0 * x ** 2)),
+        dims=("d", "x"),
+        coords={"x": x, "d": [0, 1]},
+    )
+    coeffs = da.polyfit("x", 2)
+    da_pv = xr.polyval(da.x, coeffs)
+    xr.testing.assert_allclose(da, da_pv.T)
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
@@ -4152,6 +4152,32 @@ def test_rank(self):
         y = DataArray([0.75, 0.25, np.nan, 0.5, 1.0], dims=("z",))
         assert_equal(y.rank("z", pct=True), y)
 
+    def test_polyfit(self):
+        x = np.arange(10)
+        da = DataArray(
+            np.stack((1.0 + 1.0 * x + 2.0 * x ** 2, 1.0 + 2.0 * x + 3.0 * x ** 2)),
+            dims=("d", "x"),
+            coords={"x": x, "d": [0, 1]},
+        )
+        coeffs = da.polyfit("x", 2)
+        expected = DataArray(
+            [[2, 1, 1], [3, 2, 1]],
+            dims=("d", "degree"),
+            coords={"degree": [2, 1, 0], "d": [0, 1]},
+        ).T
+        assert_allclose(coeffs, expected)
+
+        # With NaN
+        da[0, 1] = np.nan
+        coeffs = da.polyfit("x", 2, skipna=True)
+        assert_allclose(coeffs, expected)
+
+        # Skipna + Full output
+        coeffs, resids, rank, sing, rcond = da.polyfit("x", 2, skipna=True, full=True)
+        assert_allclose(coeffs, expected)
+        assert rank == np.linalg.matrix_rank(np.vander(x, 3))
+        np.testing.assert_almost_equal(resids, [0, 0])
+
 
 @pytest.fixture(params=[1])
 def da(request):