Skip to content

Feature/rolling #668

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 20, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions doc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
SPHINXAUTOBUILD = sphinx-autobuild
PAPER =
BUILDDIR = _build

Expand All @@ -24,6 +25,7 @@ I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
help:
@echo "Please use \`make <target>' where <target> is one of"
@echo " html to make standalone HTML files"
@echo " livehtml to make and auto-rebuild standalone HTML files, requires sphinx-autorebuild"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
Expand Down Expand Up @@ -55,6 +57,11 @@ html:
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."

livehtml:
$(SPHINXAUTOBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."

dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
Expand Down
2 changes: 2 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Top-level functions
align
broadcast
concat
empty_like
set_options

Dataset
Expand Down Expand Up @@ -245,6 +246,7 @@ Computation

DataArray.reduce
DataArray.groupby
DataArray.rolling
DataArray.resample
DataArray.get_axis_num
DataArray.diff
Expand Down
43 changes: 43 additions & 0 deletions doc/computation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,49 @@ These operations automatically skip missing values, like in pandas:
If desired, you can disable this behavior by invoking the aggregation method
with ``skipna=False``.

Rolling window operations
=========================

``DataArray`` objects include a :py:meth:`~xarray.DataArray.rolling` method. This
method supports rolling window aggregation:

.. ipython:: python

arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5),
dims=('x', 'y'))
arr

:py:meth:`~xarray.DataArray.rolling` is applied along one dimension using the
name of the dimension as a key (e.g. ``y``) and the window size as the value
(e.g. ``3``). We get back a ``Rolling`` object:

.. ipython:: python

arr.rolling(y=3)

The label position and minimum number of periods in the rolling window are
controlled by the ``center`` and ``min_periods`` arguments:

.. ipython:: python

arr.rolling(y=3, min_periods=2, center=True)

Aggregation and summary methods can be applied directly to the ``Rolling`` object:

.. ipython:: python

r = arr.rolling(y=3)
r.mean()
r.reduce(np.std)

Finally, we can manually iterate through ``Rolling`` objects:

.. ipython:: python

@verbatim
for label, arr_window in r:
# arr_window is a view of x

Broadcasting by dimension name
==============================

Expand Down
31 changes: 31 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,37 @@ v0.7.2 (unreleased)
Enhancements
~~~~~~~~~~~~

- Rolling window operations on DataArray objects are now supported via a new
:py:meth:`xarray.DataArray.rolling` method.

.. ipython::
:verbatim:

In [1]: import xarray as xr; import numpy as np

In [2]: arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5),
dims=('x', 'y'))

In [3]: arr
Out[3]:
<xarray.DataArray (x: 3, y: 5)>
array([[ 0. , 0.5, 1. , 1.5, 2. ],
[ 2.5, 3. , 3.5, 4. , 4.5],
[ 5. , 5.5, 6. , 6.5, 7. ]])
Coordinates:
* x (x) int64 0 1 2
* y (y) int64 0 1 2 3 4

In [4]: arr.rolling(y=3, min_periods=2).mean()
Out[4]:
<xarray.DataArray (x: 3, y: 5)>
array([[ nan, 0.25, 0.5 , 1. , 1.5 ],
[ nan, 2.75, 3. , 3.5 , 4. ],
[ nan, 5.25, 5.5 , 6. , 6.5 ]])
Coordinates:
* x (x) int64 0 1 2
* y (y) int64 0 1 2 3 4

Bug fixes
~~~~~~~~~

Expand Down
2 changes: 1 addition & 1 deletion xarray/core/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def concat(objs, dim=None, data_vars='all', coords='different',
f = _dataset_concat
else:
raise TypeError('can only concatenate xarray Dataset and DataArray '
'objects')
'objects, got %s' % type(first_obj))
return f(objs, dim, data_vars, coords, compat, positions)


Expand Down
145 changes: 144 additions & 1 deletion xarray/core/common.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import pandas as pd

from .pycompat import basestring, iteritems, suppress
from .pycompat import basestring, iteritems, suppress, dask_array_type
from . import formatting
from .utils import SortedKeysDict

Expand Down Expand Up @@ -52,6 +52,63 @@ def wrapped_func(self, dim=None, keep_attrs=False, **kwargs):
applied over all dimensions."""


class ImplementsRollingArrayReduce(object):
@classmethod
def _reduce_method(cls, func):
def wrapped_func(self, **kwargs):
return self.reduce(func, **kwargs)
return wrapped_func

@classmethod
def _bottleneck_reduce(cls, func):
def wrapped_func(self, **kwargs):
from .dataarray import DataArray

if isinstance(self.obj.data, dask_array_type):
raise NotImplementedError(
'Rolling window operation does not work with dask arrays')

# bottleneck doesn't allow min_count to be 0, although it should
# work the same as if min_count = 1
if self.min_periods is not None and self.min_periods == 0:
min_count = self.min_periods + 1
else:
min_count = self.min_periods

values = func(self.obj.data, window=self.window,
min_count=min_count, axis=self._axis_num)

result = DataArray(values, self.obj.coords)

if self.center:
result = self._center_result(result)

return result
return wrapped_func

@classmethod
def _bottleneck_reduce_without_min_count(cls, func):
def wrapped_func(self, **kwargs):
from .dataarray import DataArray

if self.min_periods is not None:
raise ValueError('Rolling.median does not accept min_periods')

if isinstance(self.obj.data, dask_array_type):
raise NotImplementedError(
'Rolling window operation does not work with dask arrays')

values = func(self.obj.data, window=self.window, axis=self._axis_num)

result = DataArray(values, self.obj.coords)

if self.center:
result = self._center_result(result)

return result
return wrapped_func


class AbstractArray(ImplementsArrayReduce):
def __bool__(self):
return bool(self.values)
Expand Down Expand Up @@ -286,6 +343,31 @@ def groupby(self, group, squeeze=True):
group = self[group]
return self.groupby_cls(self, group, squeeze=squeeze)

def rolling(self, min_periods=None, center=False, **kwarg):
"""Returns a Rolling object for performing moving window operations.

Parameters
----------
min_periods : int, default None
Minimum number of observations in window required to have a value
(otherwise result is NA).
center : boolean, default False
Set the labels at the center of the window.
kwarg : dim=window
dim : str
Name of the dimension to create the rolling iterator
along (e.g., `time`).
window : int
Size of the moving window.

Returns
-------
rolling : type of input argument
"""

return self.rolling_cls(self, min_periods=min_periods,
center=center, **kwarg)

def resample(self, freq, dim, how='mean', skipna=None, closed=None,
label=None, base=0):
"""Resample this object to a new temporal resolution.
Expand Down Expand Up @@ -435,3 +517,64 @@ def _possibly_convert_objects(values):
datetime64 and timedelta64, according to the pandas convention.
"""
return np.asarray(pd.Series(values.ravel())).reshape(values.shape)


def _get_fill_value(dtype):
"""Return a fill value that appropriately promotes types when used with
np.concatenate
"""
_, fill_value = _maybe_promote(dtype)
return fill_value


def _full_like_dataarray(arr, keep_attrs=False, fill_value=None):
"""empty DataArray"""
from .dataarray import DataArray

attrs = arr.attrs if keep_attrs else {}

if fill_value is None:
values = np.empty_like(arr)
elif fill_value is True:
dtype, fill_value = _maybe_promote(arr.dtype)
values = np.full_like(arr, fill_value=fill_value, dtype=dtype)
else:
dtype, _ = _maybe_promote(np.array(fill_value).dtype)
values = np.full_like(arr, fill_value=fill_value, dtype=dtype)

return DataArray(values, dims=arr.dims, coords=arr.coords, attrs=attrs)


def _full_like(xray_obj, keep_attrs=False, fill_value=None):
"""Return a new object with the same shape and type as a given object.

Parameters
----------
xray_obj : DataArray or Dataset
Return a full object with the same shape/dims/coords/attrs.
`func` is calculated over all dimension for each group item.
keep_attrs : bool, optional
If True, the datasets's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
fill_value : scalar, optional
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like a weird argument to have here -- shouldn't this always be ignored on empty_like? This would make sense on full_like....

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good point. Renaming to full_like...

Value to fill DataArray(s) with before returning.

Returns
-------
out : same as xray_obj
New object with the same shape and type as a given object.
"""
from .dataarray import DataArray
from .dataset import Dataset

if isinstance(xray_obj, Dataset):
attrs = xray_obj.attrs if keep_attrs else {}

return Dataset(dict((k, _full_like_dataarray(v, keep_attrs=keep_attrs,
fill_value=fill_value))
for k, v in iteritems(xray_obj.data_vars)),
name=xray_obj.name, attrs=attrs)
elif isinstance(xray_obj, DataArray):
return _full_like_dataarray(xray_obj, keep_attrs=keep_attrs,
fill_value=fill_value)
2 changes: 2 additions & 0 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from . import indexing
from . import groupby
from . import rolling
from . import ops
from . import utils
from .alignment import align
Expand Down Expand Up @@ -152,6 +153,7 @@ class DataArray(AbstractArray, BaseDataObject):
Dictionary for holding arbitrary metadata.
"""
groupby_cls = groupby.DataArrayGroupBy
rolling_cls = rolling.DataArrayRolling

def __init__(self, data, coords=None, dims=None, name=None,
attrs=None, encoding=None, fastpath=False):
Expand Down
31 changes: 31 additions & 0 deletions xarray/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@

try:
import bottleneck as bn
has_bottleneck = True
except ImportError:
# use numpy methods instead
bn = np
has_bottleneck = False

try:
import dask.array as da
Expand All @@ -46,6 +48,9 @@
REDUCE_METHODS = ['all', 'any']
NAN_REDUCE_METHODS = ['argmax', 'argmin', 'max', 'min', 'mean', 'prod', 'sum',
'std', 'var', 'median']
BOTTLENECK_ROLLING_METHODS = {'move_sum': 'sum', 'move_mean': 'mean',
'move_std': 'std', 'move_min': 'min',
'move_max': 'max'}
# TODO: wrap cumprod/cumsum, take, dot, sort


Expand Down Expand Up @@ -465,3 +470,29 @@ def inject_all_ops_and_reduce_methods(cls, priority=50, array_only=True):
setattr(cls, name, _values_method_wrapper(name))

inject_reduce_methods(cls)


def inject_bottleneck_rolling_methods(cls):
# standard numpy reduce methods
for name in NAN_REDUCE_METHODS:
f = getattr(np, name)
func = cls._reduce_method(f)
func.__name__ = name
func.__doc__ = 'todo'
setattr(cls, name, func)

# bottleneck rolling methods
if has_bottleneck:
for bn_name, method_name in BOTTLENECK_ROLLING_METHODS.items():
f = getattr(bn, bn_name)
func = cls._bottleneck_reduce(f)
func.__name__ = method_name
func.__doc__ = 'todo'
setattr(cls, method_name, func)

# bottleneck rolling methods without min_count
f = getattr(bn, 'move_median')
func = cls._bottleneck_reduce_without_min_count(f)
func.__name__ = 'median'
func.__doc__ = 'todo'
setattr(cls, 'median', func)
Loading