pydata · jhamman · Oct 21, 2015
diff --git a/doc/api.rst b/doc/api.rst
@@ -257,6 +257,7 @@ Computation
 :py:attr:`~DataArray.max`
 :py:attr:`~DataArray.mean`
 :py:attr:`~DataArray.median`
+:py:attr:`~DataArray.average`
 :py:attr:`~DataArray.min`
 :py:attr:`~DataArray.prod`
 :py:attr:`~DataArray.sum`

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -19,6 +19,9 @@ v0.7.2 (unreleased)
 Enhancements
 ~~~~~~~~~~~~
 
+- Add :py:meth:`~xray.DataArray.average` and :py:meth:`~xray.Dataset.plot`
+  methods (:issue:`650`).
+
 Bug fixes
 ~~~~~~~~~
 

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -1364,6 +1364,84 @@ def roll(self, **shifts):
         ds = self._to_temp_dataset().roll(**shifts)
         return self._from_temp_dataset(ds)
 
+    def average(self, dim=None, axis=None, weights=None, returned=False,
+                keep_attrs=False, **kwargs):
+        """
+        Reduce this DataArray's data by applying `average` along some
+        dimension(s).
+
+        Parameters
+        ----------
+        dim : str or sequence of str, optional
+            Dimension(s) over which to apply `average`.
+        axis : int or sequence of int, optional
+            Axis(es) over which to apply `average`. Only one of the 'dim'
+            and 'axis' arguments can be supplied. If neither are supplied, then
+            `average` is calculated over axes.
+        weights : DataArray, optional
+            An array of weights associated with the values in this DataArray.
+            Each value in a contributes to the average according to its
+            associated weight. The weights array can either be 1-D (in which
+            case its length must be the size of a along the given axis or
+            dimension) or of he same shape this DataArray. If weights=None, then
+            all data in this DataArray are assumed to have a weight equal to one.
+        keep_attrs : bool, optional
+            If True, the attributes (`attrs`) will be copied from the original
+            object to the new one.  If False (default), the new object will be
+            returned without attributes.
+        returned : bool, optional
+            Default is False. If True, the tuple (average, sum_of_weights) is
+            returned, otherwise only the average is returned. If weights=None,
+            sum_of_weights is equivalent to the number of elements over which
+            the average is taken.
+
+        **kwargs : dict
+            Additional keyword arguments passed on to `sum`.
+
+        Returns
+        -------
+        reduced, [sum_of_weights] : DataArray, [DataArray]
+            New DataArray object with `average` applied to its data and the
+            indicated dimension(s) removed. When returned is True, return a
+            tuple with the average as the first element and the sum of the
+            weights as the second element. The return type is Float if a is of
+            integer type, otherwise it is of the same type as a.
+            sum_of_weights is of the same type as average.
+
+        See Also
+        --------
+        Dataset.average
+        DataArray.mean
+        """
+
+        if weights is None:
+            mean = self.mean(dim=dim, axis=axis, keep_attrs=keep_attrs)
+            if not returned:
+                return mean
+            else:
+                return (mean, DataArray(self.size))
+        elif not isinstance(weights, DataArray):
+            # TODO: coerce weights of other types to dataarray?
+            raise TypeError("weights must be a DataArray")
+
+        # check that weights.dims are in DataArray
+        invalid = set([d for d in weights.dims if d not in self.dims])
+        if invalid:
+            raise ValueError("Invalid dims in weights: %s" % " ".join(invalid))
+
+        # if NaNs are present, we need individual weights
+        valid = self.notnull()
+        sum_of_weights = weights.where(valid).sum(dim=dim, axis=axis)
+
+        w = weights / sum_of_weights
+
+        average = (self * w).sum(dim=dim, axis=axis, keep_attrs=keep_attrs)
+
+        if not returned:
+            return average
+        else:
+            return average, sum_of_weights
+
     @property
     def real(self):
         return self._replace(self.variable.real)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -2171,6 +2171,55 @@ def roll(self, **shifts):
 
         return self._replace_vars_and_dims(variables)
 
+    def average(self, **kwargs):
+        """
+        Reduce this Dataset's data by applying `average` along some
+        dimension(s).
+
+        Parameters
+        ----------
+        dim : str or sequence of str, optional
+            Dimension(s) over which to apply `average`.
+        axis : int or sequence of int, optional
+            Axis(es) over which to apply `average`. Only one of the 'dim'
+            and 'axis' arguments can be supplied. If neither are supplied, then
+            `average` is calculated over axes.
+        weights : Dataset, optional
+            An array of weights associated with the values in this Dataset.
+            Each value in a contributes to the average according to its
+            associated weight. The weights array can either be 1-D (in which
+            case its length must be the size of a along the given axis or
+            dimension) or of he same shape this Dataset. If weights=None, then
+            all data in this Dataset are assumed to have a weight equal to one.
+        keep_attrs : bool, optional
+            If True, the attributes (`attrs`) will be copied from the original
+            object to the new one.  If False (default), the new object will be
+            returned without attributes.
+        **kwargs : dict
+            Additional keyword arguments passed on to `sum`.
+
+        Returns
+        -------
+        reduced : Dataset
+            New Dataset object with `average` applied to its data and the
+            indicated dimension(s) removed.
+
+        See Also
+        --------
+        DataArray.average
+        Dataset.mean
+        """
+
+        if 'returned' in kwargs:
+            raise ValueError('returned argument is not supported on '
+                             'Dataset.average')
+
+        if 'weights' in kwargs:
+            from .dataarray import DataArray
+            return self.apply(DataArray.average, **kwargs)
+        else:
+            return self.mean(**kwargs)
+
     @property
     def real(self):
         return self._unary_op(lambda x: x.real, keep_attrs=True)(self)

diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py
@@ -990,6 +990,42 @@ def test_reduce(self):
         expected = DataArray(5, {'c': -999})
         self.assertDataArrayIdentical(expected, actual)
 
+    def test_average(self):
+        # same as mean without weights
+        a = np.array([1.0, 2.0, 3.0])
+        da = DataArray(a, dims=('dim', ))
+        self.assertDataArrayIdentical(da.mean(), da.average())
+
+        # using weights
+        weights = np.array([0.5, 0.25, 0.25])
+        dweights = DataArray(weights, dims=('dim', ))
+        actual = da.average(weights=dweights)
+        expected = 1.75
+        self.assertEqual(actual, expected)
+
+        # returns sum of weights
+        dweights = DataArray([0.5, 1.25, 0.25], dims=('dim', ))
+        actual = da.average(weights=dweights, returned=True)[1]
+        self.assertEqual(actual, 2)
+
+        # raises if weights are not a DataArray
+        with self.assertRaisesRegexp(TypeError, 'weights must be a DataArray'):
+            da.average(weights=[1, 2, 3])
+
+        # make sure dims match
+        with self.assertRaisesRegexp(ValueError, 'Invalid dims'):
+            weights = DataArray([0.5, 0.25, 0.25], dims=('notadim', ))
+            actual = da.average(weights=weights)
+
+        # case when weights has less dims than DataArray
+        a = np.random.random(size=(3, 4, 5))
+        da = DataArray(a, dims=('time', 'y', 'x'))
+        weights = np.array([1, 2, 3])
+        dweights = DataArray(weights, dims=('time', ))
+        expected = np.average(a, weights=weights, axis=0)
+        actual = da.average(weights=dweights, dim='time')
+        np.testing.assert_allclose(actual.values, expected)
+
     def test_reduce_keep_attrs(self):
         # Test dropped attrs
         vm = self.va.mean()

diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py
@@ -2087,6 +2087,24 @@ def mean_only_one_axis(x, axis):
         with self.assertRaisesRegexp(TypeError, 'non-integer axis'):
             ds.reduce(mean_only_one_axis, ['x', 'y'])
 
+    def test_average(self):
+        # same as mean without weights
+        a = np.array([1.0, 2.0, 3.0])
+        da = DataArray(a, dims=('dim', ))
+        ds = da.to_dataset(name='x')
+        self.assertDatasetIdentical(ds.mean(), ds.average())
+
+        # using weights
+        weights = np.array([0.5, 0.25, 0.25])
+        dweights = DataArray(weights, dims=('dim', ))
+        actual = ds.average(weights=dweights)
+        expected = 1.75
+        self.assertEqual(actual, expected)
+
+        # raise error if trying to return sum of weights
+        with self.assertRaisesRegexp(ValueError, 'returned argument is not'):
+            ds.average(weights=dweights, returned=True)
+
     def test_count(self):
         ds = Dataset({'x': ('a', [np.nan, 1]), 'y': 0, 'z': np.nan})
         expected = Dataset({'x': 1, 'y': 1, 'z': 0})