pydata · jhamman · Feb 20, 2016 · Dec 2, 2015 · shoyer · Feb 18, 2016
diff --git a/doc/Makefile b/doc/Makefile
@@ -4,6 +4,7 @@
 # You can set these variables from the command line.
 SPHINXOPTS    =
 SPHINXBUILD   = sphinx-build
+SPHINXAUTOBUILD = sphinx-autobuild
 PAPER         =
 BUILDDIR      = _build
 
@@ -24,6 +25,7 @@ I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 help:
 	@echo "Please use \`make <target>' where <target> is one of"
 	@echo "  html       to make standalone HTML files"
+	@echo "  livehtml   to make and auto-rebuild standalone HTML files, requires sphinx-autorebuild"
 	@echo "  dirhtml    to make HTML files named index.html in directories"
 	@echo "  singlehtml to make a single large HTML file"
 	@echo "  pickle     to make pickle files"
@@ -55,6 +57,11 @@ html:
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 
+livehtml:
+	$(SPHINXAUTOBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
 dirhtml:
 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 	@echo

diff --git a/doc/api.rst b/doc/api.rst
@@ -17,6 +17,7 @@ Top-level functions
    align
    broadcast
    concat
+   empty_like
    set_options
 
 Dataset
@@ -245,6 +246,7 @@ Computation
 
    DataArray.reduce
    DataArray.groupby
+   DataArray.rolling
    DataArray.resample
    DataArray.get_axis_num
    DataArray.diff

diff --git a/doc/computation.rst b/doc/computation.rst
@@ -99,6 +99,49 @@ These operations automatically skip missing values, like in pandas:
 If desired, you can disable this behavior by invoking the aggregation method
 with ``skipna=False``.
 
+Rolling window operations
+=========================
+
+``DataArray`` objects include a :py:meth:`~xarray.DataArray.rolling` method. This
+method supports rolling window aggregation:
+
+.. ipython:: python
+
+    arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5),
+                       dims=('x', 'y'))
+    arr
+
+:py:meth:`~xarray.DataArray.rolling` is applied along one dimension using the
+name of the dimension as a key (e.g. ``y``) and the window size as the value
+(e.g. ``3``).  We get back a ``Rolling`` object:
+
+.. ipython:: python
+
+    arr.rolling(y=3)
+
+The label position and minimum number of periods in the rolling window are
+controlled by the ``center`` and ``min_periods`` arguments:
+
+.. ipython:: python
+
+    arr.rolling(y=3, min_periods=2, center=True)
+
+Aggregation and summary methods can be applied directly to the ``Rolling`` object:
+
+.. ipython:: python
+
+    r = arr.rolling(y=3)
+    r.mean()
+    r.reduce(np.std)
+
+Finally, we can manually iterate through ``Rolling`` objects:
+
+.. ipython:: python
+
+   @verbatim
+   for label, arr_window in r:
+      # arr_window is a view of x
+
 Broadcasting by dimension name
 ==============================
 

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -19,6 +19,37 @@ v0.7.2 (unreleased)
 Enhancements
 ~~~~~~~~~~~~
 
+- Rolling window operations on DataArray objects are now supported via a new
+  :py:meth:`xarray.DataArray.rolling` method.
+
+  .. ipython::
+    :verbatim:
+
+    In [1]: import xarray as xr; import numpy as np
+
+    In [2]: arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5),
+                               dims=('x', 'y'))
+
+    In [3]: arr
+    Out[3]:
+    <xarray.DataArray (x: 3, y: 5)>
+    array([[ 0. ,  0.5,  1. ,  1.5,  2. ],
+           [ 2.5,  3. ,  3.5,  4. ,  4.5],
+           [ 5. ,  5.5,  6. ,  6.5,  7. ]])
+    Coordinates:
+      * x        (x) int64 0 1 2
+      * y        (y) int64 0 1 2 3 4
+
+    In [4]: arr.rolling(y=3, min_periods=2).mean()
+    Out[4]:
+    <xarray.DataArray (x: 3, y: 5)>
+    array([[  nan,  0.25,  0.5 ,  1.  ,  1.5 ],
+           [  nan,  2.75,  3.  ,  3.5 ,  4.  ],
+           [  nan,  5.25,  5.5 ,  6.  ,  6.5 ]])
+    Coordinates:
+      * x        (x) int64 0 1 2
+      * y        (y) int64 0 1 2 3 4
+
 Bug fixes
 ~~~~~~~~~
 

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
@@ -110,7 +110,7 @@ def concat(objs, dim=None, data_vars='all', coords='different',
         f = _dataset_concat
     else:
         raise TypeError('can only concatenate xarray Dataset and DataArray '
-                        'objects')
+                        'objects, got %s' % type(first_obj))
     return f(objs, dim, data_vars, coords, compat, positions)
 
 

diff --git a/xarray/core/common.py b/xarray/core/common.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pandas as pd
 
-from .pycompat import basestring, iteritems, suppress
+from .pycompat import basestring, iteritems, suppress, dask_array_type
 from . import formatting
 from .utils import SortedKeysDict
 
@@ -52,6 +52,63 @@ def wrapped_func(self, dim=None, keep_attrs=False, **kwargs):
             applied over all dimensions."""
 
 
+class ImplementsRollingArrayReduce(object):
+    @classmethod
+    def _reduce_method(cls, func):
+        def wrapped_func(self, **kwargs):
+            return self.reduce(func, **kwargs)
+        return wrapped_func
+
+    @classmethod
+    def _bottleneck_reduce(cls, func):
+        def wrapped_func(self, **kwargs):
+            from .dataarray import DataArray
+
+            if isinstance(self.obj.data, dask_array_type):
+                raise NotImplementedError(
+                    'Rolling window operation does not work with dask arrays')
+
+            # bottleneck doesn't allow min_count to be 0, although it should
+            # work the same as if min_count = 1
+            if self.min_periods is not None and self.min_periods == 0:
+                min_count = self.min_periods + 1
+            else:
+                min_count = self.min_periods
+
+            values = func(self.obj.data, window=self.window,
+                          min_count=min_count, axis=self._axis_num)
+
+            result = DataArray(values, self.obj.coords)
+
+            if self.center:
+                result = self._center_result(result)
+
+            return result
+        return wrapped_func
+
+    @classmethod
+    def _bottleneck_reduce_without_min_count(cls, func):
+        def wrapped_func(self, **kwargs):
+            from .dataarray import DataArray
+
+            if self.min_periods is not None:
+                raise ValueError('Rolling.median does not accept min_periods')
+
+            if isinstance(self.obj.data, dask_array_type):
+                raise NotImplementedError(
+                    'Rolling window operation does not work with dask arrays')
+
+            values = func(self.obj.data, window=self.window, axis=self._axis_num)
+
+            result = DataArray(values, self.obj.coords)
+
+            if self.center:
+                result = self._center_result(result)
+
+            return result
+        return wrapped_func
+
+
 class AbstractArray(ImplementsArrayReduce):
     def __bool__(self):
         return bool(self.values)
@@ -286,6 +343,31 @@ def groupby(self, group, squeeze=True):
             group = self[group]
         return self.groupby_cls(self, group, squeeze=squeeze)
 
+    def rolling(self, min_periods=None, center=False, **kwarg):
+        """Returns a Rolling object for performing moving window operations.
+
+        Parameters
+        ----------
+        min_periods : int, default None
+            Minimum number of observations in window required to have a value
+            (otherwise result is NA).
+        center : boolean, default False
+            Set the labels at the center of the window.
+        kwarg : dim=window
+            dim : str
+                Name of the dimension to create the rolling iterator
+                along (e.g., `time`).
+            window : int
+                Size of the moving window.
+
+        Returns
+        -------
+        rolling : type of input argument
+        """
+
+        return self.rolling_cls(self, min_periods=min_periods,
+                                center=center, **kwarg)
+
     def resample(self, freq, dim, how='mean', skipna=None, closed=None,
                  label=None, base=0):
         """Resample this object to a new temporal resolution.
@@ -435,3 +517,64 @@ def _possibly_convert_objects(values):
     datetime64 and timedelta64, according to the pandas convention.
     """
     return np.asarray(pd.Series(values.ravel())).reshape(values.shape)
+
+
+def _get_fill_value(dtype):
+    """Return a fill value that appropriately promotes types when used with
+    np.concatenate
+    """
+    _, fill_value = _maybe_promote(dtype)
+    return fill_value
+
+
+def _full_like_dataarray(arr, keep_attrs=False, fill_value=None):
+    """empty DataArray"""
+    from .dataarray import DataArray
+
+    attrs = arr.attrs if keep_attrs else {}
+
+    if fill_value is None:
+        values = np.empty_like(arr)
+    elif fill_value is True:
+        dtype, fill_value = _maybe_promote(arr.dtype)
+        values = np.full_like(arr, fill_value=fill_value, dtype=dtype)
+    else:
+        dtype, _ = _maybe_promote(np.array(fill_value).dtype)
+        values = np.full_like(arr, fill_value=fill_value, dtype=dtype)
+
+    return DataArray(values, dims=arr.dims, coords=arr.coords, attrs=attrs)
+
+
+def _full_like(xray_obj, keep_attrs=False, fill_value=None):
+    """Return a new object with the same shape and type as a given object.
+
+    Parameters
+    ----------
+    xray_obj : DataArray or Dataset
+        Return a full object with the same shape/dims/coords/attrs.
+            `func` is calculated over all dimension for each group item.
+    keep_attrs : bool, optional
+        If True, the datasets's attributes (`attrs`) will be copied from
+        the original object to the new one.  If False (default), the new
+        object will be returned without attributes.
+    fill_value : scalar, optional
+        Value to fill DataArray(s) with before returning.
+
+    Returns
+    -------
+    out : same as xray_obj
+        New object with the same shape and type as a given object.
+    """
+    from .dataarray import DataArray
+    from .dataset import Dataset
+
+    if isinstance(xray_obj, Dataset):
+        attrs = xray_obj.attrs if keep_attrs else {}
+
+        return Dataset(dict((k, _full_like_dataarray(v, keep_attrs=keep_attrs,
+                                                     fill_value=fill_value))
+                            for k, v in iteritems(xray_obj.data_vars)),
+                       name=xray_obj.name, attrs=attrs)
+    elif isinstance(xray_obj, DataArray):
+        return _full_like_dataarray(xray_obj, keep_attrs=keep_attrs,
+                                    fill_value=fill_value)
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -9,6 +9,7 @@
 
 from . import indexing
 from . import groupby
+from . import rolling
 from . import ops
 from . import utils
 from .alignment import align
@@ -152,6 +153,7 @@ class DataArray(AbstractArray, BaseDataObject):
         Dictionary for holding arbitrary metadata.
     """
     groupby_cls = groupby.DataArrayGroupBy
+    rolling_cls = rolling.DataArrayRolling
 
     def __init__(self, data, coords=None, dims=None, name=None,
                  attrs=None, encoding=None, fastpath=False):

diff --git a/xarray/core/ops.py b/xarray/core/ops.py
@@ -17,9 +17,11 @@
 
 try:
     import bottleneck as bn
+    has_bottleneck = True
 except ImportError:
     # use numpy methods instead
     bn = np
+    has_bottleneck = False
 
 try:
     import dask.array as da
@@ -46,6 +48,9 @@
 REDUCE_METHODS = ['all', 'any']
 NAN_REDUCE_METHODS = ['argmax', 'argmin', 'max', 'min', 'mean', 'prod', 'sum',
                       'std', 'var', 'median']
+BOTTLENECK_ROLLING_METHODS = {'move_sum': 'sum', 'move_mean': 'mean',
+                              'move_std': 'std', 'move_min': 'min',
+                              'move_max': 'max'}
 # TODO: wrap cumprod/cumsum, take, dot, sort
 
 
@@ -465,3 +470,29 @@ def inject_all_ops_and_reduce_methods(cls, priority=50, array_only=True):
             setattr(cls, name, _values_method_wrapper(name))
 
     inject_reduce_methods(cls)
+
+
+def inject_bottleneck_rolling_methods(cls):
+    # standard numpy reduce methods
+    for name in NAN_REDUCE_METHODS:
+        f = getattr(np, name)
+        func = cls._reduce_method(f)
+        func.__name__ = name
+        func.__doc__ = 'todo'
+        setattr(cls, name, func)
+
+    # bottleneck rolling methods
+    if has_bottleneck:
+        for bn_name, method_name in BOTTLENECK_ROLLING_METHODS.items():
+            f = getattr(bn, bn_name)
+            func = cls._bottleneck_reduce(f)
+            func.__name__ = method_name
+            func.__doc__ = 'todo'
+            setattr(cls, method_name, func)
+
+        # bottleneck rolling methods without min_count
+        f = getattr(bn, 'move_median')
+        func = cls._bottleneck_reduce_without_min_count(f)
+        func.__name__ = 'median'
+        func.__doc__ = 'todo'
+        setattr(cls, 'median', func)