Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/average #650

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ Computation
:py:attr:`~DataArray.max`
:py:attr:`~DataArray.mean`
:py:attr:`~DataArray.median`
:py:attr:`~DataArray.average`
:py:attr:`~DataArray.min`
:py:attr:`~DataArray.prod`
:py:attr:`~DataArray.sum`
Expand Down
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ v0.7.2 (unreleased)
Enhancements
~~~~~~~~~~~~

- Add :py:meth:`~xray.DataArray.average` and :py:meth:`~xray.Dataset.plot`
methods (:issue:`650`).

Bug fixes
~~~~~~~~~

Expand Down
78 changes: 78 additions & 0 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1364,6 +1364,84 @@ def roll(self, **shifts):
ds = self._to_temp_dataset().roll(**shifts)
return self._from_temp_dataset(ds)

def average(self, dim=None, axis=None, weights=None, returned=False,
keep_attrs=False, **kwargs):
"""
Reduce this DataArray's data by applying `average` along some
dimension(s).

Parameters
----------
dim : str or sequence of str, optional
Dimension(s) over which to apply `average`.
axis : int or sequence of int, optional
Axis(es) over which to apply `average`. Only one of the 'dim'
and 'axis' arguments can be supplied. If neither are supplied, then
`average` is calculated over axes.
weights : DataArray, optional
An array of weights associated with the values in this DataArray.
Each value in a contributes to the average according to its
associated weight. The weights array can either be 1-D (in which
case its length must be the size of a along the given axis or
dimension) or of he same shape this DataArray. If weights=None, then
all data in this DataArray are assumed to have a weight equal to one.
keep_attrs : bool, optional
If True, the attributes (`attrs`) will be copied from the original
object to the new one. If False (default), the new object will be
returned without attributes.
returned : bool, optional
Default is False. If True, the tuple (average, sum_of_weights) is
returned, otherwise only the average is returned. If weights=None,
sum_of_weights is equivalent to the number of elements over which
the average is taken.

**kwargs : dict
Additional keyword arguments passed on to `sum`.

Returns
-------
reduced, [sum_of_weights] : DataArray, [DataArray]
New DataArray object with `average` applied to its data and the
indicated dimension(s) removed. When returned is True, return a
tuple with the average as the first element and the sum of the
weights as the second element. The return type is Float if a is of
integer type, otherwise it is of the same type as a.
sum_of_weights is of the same type as average.

See Also
--------
Dataset.average
DataArray.mean
"""

if weights is None:
mean = self.mean(dim=dim, axis=axis, keep_attrs=keep_attrs)
if not returned:
return mean
else:
return (mean, DataArray(self.size))
elif not isinstance(weights, DataArray):
# TODO: coerce weights of other types to dataarray?
raise TypeError("weights must be a DataArray")

# check that weights.dims are in DataArray
invalid = set([d for d in weights.dims if d not in self.dims])
if invalid:
raise ValueError("Invalid dims in weights: %s" % " ".join(invalid))

# if NaNs are present, we need individual weights
valid = self.notnull()
sum_of_weights = weights.where(valid).sum(dim=dim, axis=axis)

w = weights / sum_of_weights

average = (self * w).sum(dim=dim, axis=axis, keep_attrs=keep_attrs)

if not returned:
return average
else:
return average, sum_of_weights

@property
def real(self):
return self._replace(self.variable.real)
Expand Down
49 changes: 49 additions & 0 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2171,6 +2171,55 @@ def roll(self, **shifts):

return self._replace_vars_and_dims(variables)

def average(self, **kwargs):
"""
Reduce this Dataset's data by applying `average` along some
dimension(s).

Parameters
----------
dim : str or sequence of str, optional
Dimension(s) over which to apply `average`.
axis : int or sequence of int, optional
Axis(es) over which to apply `average`. Only one of the 'dim'
and 'axis' arguments can be supplied. If neither are supplied, then
`average` is calculated over axes.
weights : Dataset, optional
An array of weights associated with the values in this Dataset.
Each value in a contributes to the average according to its
associated weight. The weights array can either be 1-D (in which
case its length must be the size of a along the given axis or
dimension) or of he same shape this Dataset. If weights=None, then
all data in this Dataset are assumed to have a weight equal to one.
keep_attrs : bool, optional
If True, the attributes (`attrs`) will be copied from the original
object to the new one. If False (default), the new object will be
returned without attributes.
**kwargs : dict
Additional keyword arguments passed on to `sum`.

Returns
-------
reduced : Dataset
New Dataset object with `average` applied to its data and the
indicated dimension(s) removed.

See Also
--------
DataArray.average
Dataset.mean
"""

if 'returned' in kwargs:
raise ValueError('returned argument is not supported on '
'Dataset.average')

if 'weights' in kwargs:
from .dataarray import DataArray
return self.apply(DataArray.average, **kwargs)
else:
return self.mean(**kwargs)

@property
def real(self):
return self._unary_op(lambda x: x.real, keep_attrs=True)(self)
Expand Down
36 changes: 36 additions & 0 deletions xarray/test/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -990,6 +990,42 @@ def test_reduce(self):
expected = DataArray(5, {'c': -999})
self.assertDataArrayIdentical(expected, actual)

def test_average(self):
# same as mean without weights
a = np.array([1.0, 2.0, 3.0])
da = DataArray(a, dims=('dim', ))
self.assertDataArrayIdentical(da.mean(), da.average())

# using weights
weights = np.array([0.5, 0.25, 0.25])
dweights = DataArray(weights, dims=('dim', ))
actual = da.average(weights=dweights)
expected = 1.75
self.assertEqual(actual, expected)

# returns sum of weights
dweights = DataArray([0.5, 1.25, 0.25], dims=('dim', ))
actual = da.average(weights=dweights, returned=True)[1]
self.assertEqual(actual, 2)

# raises if weights are not a DataArray
with self.assertRaisesRegexp(TypeError, 'weights must be a DataArray'):
da.average(weights=[1, 2, 3])

# make sure dims match
with self.assertRaisesRegexp(ValueError, 'Invalid dims'):
weights = DataArray([0.5, 0.25, 0.25], dims=('notadim', ))
actual = da.average(weights=weights)

# case when weights has less dims than DataArray
a = np.random.random(size=(3, 4, 5))
da = DataArray(a, dims=('time', 'y', 'x'))
weights = np.array([1, 2, 3])
dweights = DataArray(weights, dims=('time', ))
expected = np.average(a, weights=weights, axis=0)
actual = da.average(weights=dweights, dim='time')
np.testing.assert_allclose(actual.values, expected)

def test_reduce_keep_attrs(self):
# Test dropped attrs
vm = self.va.mean()
Expand Down
18 changes: 18 additions & 0 deletions xarray/test/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2087,6 +2087,24 @@ def mean_only_one_axis(x, axis):
with self.assertRaisesRegexp(TypeError, 'non-integer axis'):
ds.reduce(mean_only_one_axis, ['x', 'y'])

def test_average(self):
# same as mean without weights
a = np.array([1.0, 2.0, 3.0])
da = DataArray(a, dims=('dim', ))
ds = da.to_dataset(name='x')
self.assertDatasetIdentical(ds.mean(), ds.average())

# using weights
weights = np.array([0.5, 0.25, 0.25])
dweights = DataArray(weights, dims=('dim', ))
actual = ds.average(weights=dweights)
expected = 1.75
self.assertEqual(actual, expected)

# raise error if trying to return sum of weights
with self.assertRaisesRegexp(ValueError, 'returned argument is not'):
ds.average(weights=dweights, returned=True)

def test_count(self):
ds = Dataset({'x': ('a', [np.nan, 1]), 'y': 0, 'z': np.nan})
expected = Dataset({'x': 1, 'y': 1, 'z': 0})
Expand Down