From dcd1b2400d120125a8e9fb284cbe46cb60b3526d Mon Sep 17 00:00:00 2001 From: Christian Jauvin Date: Thu, 9 Dec 2021 19:19:40 -0500 Subject: [PATCH 01/41] Add weighted quantile --- xarray/core/weighted.py | 140 ++++++++++++++++++++++++++++- xarray/tests/test_weighted.py | 162 +++++++++++++++++++++++++++++++--- 2 files changed, 285 insertions(+), 17 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 0676d351b6f..d6e49475a41 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -1,9 +1,18 @@ -from typing import TYPE_CHECKING, Generic, Hashable, Iterable, Optional, Union, cast +from typing import ( + TYPE_CHECKING, + Generic, + Hashable, + Iterable, + Optional, + Sequence, + Union, + cast, +) import numpy as np -from . import duck_array_ops -from .computation import dot +from . import duck_array_ops, utils +from .computation import apply_ufunc, dot from .pycompat import is_duck_dask_array from .types import T_Xarray @@ -54,6 +63,42 @@ New {cls} object with the sum of the weights over the given dimension. """ +_WEIGHTED_QUANTILE_DOCSTRING_TEMPLATE = """ + Apply a a weighted ``quantile`` to this {cls}'s data along some dimension(s). + For compatibility with NumPy's non-weighted ``quantile`` (which is used by + ``DataArray.quantile`` and ``Dataset.quantile``), the only interpolation + method supported by this weighted version is the Type 7, described in + https://doi.org/10.2307/2684934, which corresponds to the default "linear" + option of ``numpy.quantile``. + + Parameters + ---------- + q : float or sequence of float + Quantile to compute, which must be between 0 and 1 inclusive. + dim : str or sequence of str, optional + Dimension(s) over which to apply the weighted ``quantile``. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, the attributes (``attrs``) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + + Returns + ------- + quantiles : {cls} + New {cls} object with weighted ``quantile`` applied to its data and + the indicated dimension(s) removed. + + See Also + -------- + numpy.nanquantile, pandas.Series.quantile, Dataset.quantile + DataArray.quantile + """ + if TYPE_CHECKING: from .dataarray import DataArray @@ -239,6 +284,81 @@ def _weighted_std( return cast("DataArray", np.sqrt(self._weighted_var(da, dim, skipna))) + def _weighted_quantile( + self, + da: "DataArray", + q, + dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, + skipna: Optional[bool] = None, + keep_attrs: Optional[bool] = None, + ) -> "DataArray": + """Apply a weighted ``quantile`` to a DataArray along some dimension(s).""" + + def _weighted_quantile_type7_1d(a, weights, q, skipna): + # This algorithm has been adapted from: + # https://aakinshin.net/posts/weighted-quantiles/#reference-implementation + if skipna: + # Remove nans from a and weights + nan_mask = np.isnan(a) + a = a[~nan_mask] + weights = weights[~nan_mask] + elif np.isnan(a).any(): + return np.full(len(q), np.nan) + # Flatten input values because this function is 1d + a = a.flatten() + weights = weights.flatten() + n = len(a) + assert n == len(weights) + weights = weights / weights.sum() + sorter = np.argsort(a) + a = a[sorter] + weights = weights[sorter] + weights_cum = np.append(0, weights.cumsum()) + res = [] + for p in q: + h = p * (n - 1) + 1 + u = np.maximum((h - 1) / n, np.minimum(h / n, weights_cum)) + v = u * n - h + 1 + w = np.diff(v) + res.append(sum(a * w)) + + return np.asarray(res) + + scalar = utils.is_scalar(q) + q = np.atleast_1d(np.asarray(q, dtype=np.float64)) + + if q.ndim > 1: + raise ValueError("q must be a scalar or 1d") + + if dim is None: + dim = da.dims + + if utils.is_scalar(dim): + dim = [dim] + + dim = cast(Sequence, dim) + + result = apply_ufunc( + _weighted_quantile_type7_1d, + da, + self.weights, + input_core_dims=[dim, dim], + output_core_dims=[["quantile"]], + output_dtypes=[np.float64], + join="override", + dask_gufunc_kwargs=dict(output_sizes={"quantile": len(q)}), + dask="parallelized", + vectorize=True, + kwargs={"q": q, "skipna": skipna}, + ) + + # for backward compatibility + result = result.transpose("quantile", ...) + result = result.assign_coords(quantile=q) + if scalar: + result = result.squeeze("quantile") + return result + def _implementation(self, func, dim, **kwargs): raise NotImplementedError("Use `Dataset.weighted` or `DataArray.weighted`") @@ -308,6 +428,18 @@ def std( self._weighted_std, dim=dim, skipna=skipna, keep_attrs=keep_attrs ) + def quantile( + self, + q, + dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, + skipna: Optional[bool] = None, + keep_attrs: Optional[bool] = None, + ) -> T_Xarray: + + return self._implementation( + self._weighted_quantile, q=q, dim=dim, skipna=skipna, keep_attrs=keep_attrs + ) + def __repr__(self): """provide a nice str repr of our Weighted object""" @@ -358,6 +490,8 @@ def _inject_docstring(cls, cls_name): cls=cls_name, fcn="std", on_zero="NaN" ) + cls.quantile.__doc__ = _WEIGHTED_QUANTILE_DOCSTRING_TEMPLATE.format(cls=cls_name) + _inject_docstring(DataArrayWeighted, "DataArray") _inject_docstring(DatasetWeighted, "Dataset") diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index 36923ed49c3..dcb6b89e524 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -194,6 +194,91 @@ def test_weighted_mean_no_nan(weights, expected): assert_equal(expected, result) +@pytest.mark.parametrize( + ("weights", "expected"), + ( + ([0.25, 0.05, 0.15, 0.25, 0.15, 0.1, 0.05], [1.435, 2.4, 3, 3.63]), + ([0.05, 0.05, 0.1, 0.15, 0.15, 0.25, 0.25], [2.84, 3.665, 4.1, 4.595]), + ), +) +def test_weighted_quantile_no_nan(weights, expected): + + da = DataArray([1, 1.9, 2.2, 3, 3.7, 4.1, 5]) + q = [0.2, 0.4, 0.6, 0.8] + weights = DataArray(weights) + + expected = DataArray(expected, coords={"quantile": q}) + result = da.weighted(weights).quantile(q) + + assert_allclose(expected, result) + + +@pytest.mark.parametrize( + ("weights", "expected"), + ( + ([0.25, 0.05, 0.15, 0.25, 0.15, 0.1, 0.05], [1.410526, 2.326316, 3, 3.405263]), + ([0.05, 0.05, 0.1, 0.15, 0.15, 0.25, 0.25], [2.52, 3.14, 3.7, 4.1]), + ), +) +@pytest.mark.parametrize("skipna", (True, False)) +def test_weighted_quantile_nan(weights, expected, skipna): + + da = DataArray([1, 1.9, 2.2, 3, 3.7, 4.1, np.nan]) + q = [0.2, 0.4, 0.6, 0.8] + weights = DataArray(weights) + + result = da.weighted(weights).quantile(q, skipna=skipna) + + if skipna: + expected = DataArray(expected, coords={"quantile": q}) + else: + expected = DataArray(np.full(len(q), np.nan), coords={"quantile": q}) + + assert_allclose(expected, result) + + +@pytest.mark.parametrize( + "da", ([1, 1.9, 2.2, 3, 3.7, 4.1, 5], [1, 1.9, 2.2, 3, 3.7, 4.1, np.nan]) +) +@pytest.mark.parametrize("q", (0.5, (0.1, 0.9), (0.2, 0.4, 0.6, 0.8))) +@pytest.mark.parametrize("skipna", (True, False)) +@pytest.mark.parametrize("factor", [1, 2, 3.14]) +def test_weighted_quantile_equal_weights(da, q, skipna, factor): + # if all weights are equal (!= 0), should yield the same result as quantile + + da = DataArray(da) + + # all weights as 1. + weights = xr.full_like(da, factor) + + expected = da.quantile(q, skipna=skipna) + result = da.weighted(weights).quantile(q, skipna=skipna) + + assert_allclose(expected, result) + + +def test_weighted_quantile_bool(): + # https://github.com/pydata/xarray/issues/4074 + da = DataArray([1, 1]) + weights = DataArray([True, True]) + q = 0.5 + + expected = DataArray([1], coords={"quantile": [q]}).squeeze() + result = da.weighted(weights).quantile(q) + + assert_equal(expected, result) + + +def test_weighted_quantile_with_2d_q(): + + da = DataArray([1, 1.9, 2.2, 3, 3.7, 4.1, 5]) + q = np.array([0.2, 0.4, 0.6, 0.8]).reshape(2, 2) + weights = DataArray(np.ones(len(da))) + + with pytest.raises(ValueError): + da.weighted(weights).quantile(q) + + @pytest.mark.parametrize( ("weights", "expected"), (([4, 6], 2.0), ([1, 0], np.nan), ([0, 0], np.nan)) ) @@ -465,6 +550,38 @@ def test_weighted_operations_3D(dim, add_nans, skipna): check_weighted_operations(data, weights, dim, skipna) +@pytest.mark.parametrize("dim", ("a", "b", "c", ("a", "b"), ("a", "b", "c"), None)) +@pytest.mark.parametrize("q", (0.5, (0.1, 0.9), (0.2, 0.4, 0.6, 0.8))) +@pytest.mark.parametrize("add_nans", (True, False)) +@pytest.mark.parametrize("skipna", (None, True, False)) +def test_weighted_quantile_3D(dim, q, add_nans, skipna): + + dims = ("a", "b", "c") + coords = dict(a=[0, 1, 2], b=[0, 1, 2, 3], c=[0, 1, 2, 3, 4]) + + # Weights are all ones, because we will compare against DataArray.quantile (non-weighted) + weights = DataArray(np.ones((3, 4, 5)), dims=dims, coords=coords) + + data = np.arange(60).reshape(3, 4, 5).astype(float) + + # add approximately 25 % NaNs (https://stackoverflow.com/a/32182680/3010700) + if add_nans: + c = int(data.size * 0.25) + data.ravel()[np.random.choice(data.size, c, replace=False)] = np.NaN + + da = DataArray(data, dims=dims, coords=coords) + + result = da.weighted(weights).quantile(q, dim=dim, skipna=skipna) + expected = da.quantile(q, dim=dim, skipna=skipna) + + assert_allclose(expected, result) + + ds = da.to_dataset(name="data") + result2 = ds.weighted(weights).quantile(q, dim=dim, skipna=skipna) + + assert_allclose(expected, result2.data) + + def test_weighted_operations_nonequal_coords(): weights = DataArray(np.random.randn(4), dims=("a",), coords=dict(a=[0, 1, 2, 3])) @@ -472,9 +589,17 @@ def test_weighted_operations_nonequal_coords(): check_weighted_operations(data, weights, dim="a", skipna=None) + q = 0.5 + result = data.weighted(weights).quantile(q, dim="a") + expected = DataArray([0.440994], coords={"quantile": [q]}).squeeze() + assert_allclose(result, expected) + data = data.to_dataset(name="data") check_weighted_operations(data, weights, dim="a", skipna=None) + result = data.weighted(weights).quantile(q, dim="a") + assert_allclose(result, expected.to_dataset(name="data")) + @pytest.mark.parametrize("shape_data", ((4,), (4, 4), (4, 4, 4))) @pytest.mark.parametrize("shape_weights", ((4,), (4, 4), (4, 4, 4))) @@ -504,7 +629,8 @@ def test_weighted_operations_different_shapes( @pytest.mark.parametrize( - "operation", ("sum_of_weights", "sum", "mean", "sum_of_squares", "var", "std") + "operation", + ("sum_of_weights", "sum", "mean", "sum_of_squares", "var", "std", "quantile"), ) @pytest.mark.parametrize("as_dataset", (True, False)) @pytest.mark.parametrize("keep_attrs", (True, False, None)) @@ -518,22 +644,21 @@ def test_weighted_operations_keep_attr(operation, as_dataset, keep_attrs): data.attrs = dict(attr="weights") - result = getattr(data.weighted(weights), operation)(keep_attrs=True) + kwargs = {"keep_attrs": keep_attrs} + if operation == "quantile": + kwargs["q"] = 0.5 + + result = getattr(data.weighted(weights), operation)(**kwargs) if operation == "sum_of_weights": - assert weights.attrs == result.attrs + assert weights.attrs == result.attrs if keep_attrs else not result.attrs else: - assert data.attrs == result.attrs - - result = getattr(data.weighted(weights), operation)(keep_attrs=None) - assert not result.attrs - - result = getattr(data.weighted(weights), operation)(keep_attrs=False) - assert not result.attrs + assert data.attrs == result.attrs if keep_attrs else not result.attrs @pytest.mark.parametrize( - "operation", ("sum_of_weights", "sum", "mean", "sum_of_squares", "var", "std") + "operation", + ("sum_of_weights", "sum", "mean", "sum_of_squares", "var", "std", "quantile"), ) def test_weighted_operations_keep_attr_da_in_ds(operation): # GH #3595 @@ -542,22 +667,31 @@ def test_weighted_operations_keep_attr_da_in_ds(operation): data = DataArray(np.random.randn(2, 2), attrs=dict(attr="data")) data = data.to_dataset(name="a") - result = getattr(data.weighted(weights), operation)(keep_attrs=True) + kwargs = {"keep_attrs": True} + if operation == "quantile": + kwargs["q"] = 0.5 + + result = getattr(data.weighted(weights), operation)(**kwargs) assert data.a.attrs == result.a.attrs +@pytest.mark.parametrize("operation", ("sum_of_weights", "sum", "mean", "quantile")) @pytest.mark.parametrize("as_dataset", (True, False)) -def test_weighted_bad_dim(as_dataset): +def test_weighted_bad_dim(operation, as_dataset): data = DataArray(np.random.randn(2, 2)) weights = xr.ones_like(data) if as_dataset: data = data.to_dataset(name="data") + kwargs = {"dim": "bad_dim"} + if operation == "quantile": + kwargs["q"] = 0.5 + error_msg = ( f"{data.__class__.__name__}Weighted" " does not contain the dimensions: {'bad_dim'}" ) with pytest.raises(ValueError, match=error_msg): - data.weighted(weights).mean("bad_dim") + getattr(data.weighted(weights), operation)(**kwargs) From 875f766731344a1f4ae880ee895326b8f3d3d938 Mon Sep 17 00:00:00 2001 From: Christian Jauvin Date: Thu, 9 Dec 2021 19:32:27 -0500 Subject: [PATCH 02/41] Add weighted quantile to documentation --- doc/api.rst | 2 ++ doc/user-guide/computation.rst | 8 +++++++- doc/whats-new.rst | 3 ++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 9433ecfa56d..78c79418ecb 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -937,6 +937,7 @@ Dataset DatasetWeighted DatasetWeighted.mean + DatasetWeighted.quantile DatasetWeighted.sum DatasetWeighted.std DatasetWeighted.var @@ -951,6 +952,7 @@ DataArray DataArrayWeighted DataArrayWeighted.mean + DataArrayWeighted.quantile DataArrayWeighted.sum DataArrayWeighted.std DataArrayWeighted.var diff --git a/doc/user-guide/computation.rst b/doc/user-guide/computation.rst index f58767efb29..0725cb263a3 100644 --- a/doc/user-guide/computation.rst +++ b/doc/user-guide/computation.rst @@ -263,7 +263,7 @@ Weighted array reductions :py:class:`DataArray` and :py:class:`Dataset` objects include :py:meth:`DataArray.weighted` and :py:meth:`Dataset.weighted` array reduction methods. They currently -support weighted ``sum``, ``mean``, ``std`` and ``var``. +support weighted ``sum``, ``mean``, ``std``, ``var`` and ``quantile``. .. ipython:: python @@ -291,6 +291,12 @@ Calculate the weighted mean: weighted_prec.mean(dim="month") +Calculate the weighted quantile: + +.. ipython:: python + + weighted_prec.quantile(q=0.5, dim="month") + The weighted sum corresponds to: .. ipython:: python diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9a6f6e21e5e..51ce8806ec3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,7 +22,8 @@ v0.20.2 (unreleased) New Features ~~~~~~~~~~~~ - +- Add ``quantile`` to :py:class:`~core.weighted.DatasetWeighted` and :py:class:`~core.weighted.DataArrayWeighted`. + By `Christian Jauvin `_. Breaking changes ~~~~~~~~~~~~~~~~ From 217d2f0181593ce4eaa1e98180c71ac524a3cdb9 Mon Sep 17 00:00:00 2001 From: Christian Jauvin Date: Fri, 10 Dec 2021 15:53:18 -0500 Subject: [PATCH 03/41] Apply suggestions from code review Co-authored-by: Mathias Hauser --- xarray/core/weighted.py | 13 +++++++------ xarray/tests/test_weighted.py | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index d6e49475a41..fa5ddf1c594 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -65,6 +65,7 @@ _WEIGHTED_QUANTILE_DOCSTRING_TEMPLATE = """ Apply a a weighted ``quantile`` to this {cls}'s data along some dimension(s). + For compatibility with NumPy's non-weighted ``quantile`` (which is used by ``DataArray.quantile`` and ``Dataset.quantile``), the only interpolation method supported by this weighted version is the Type 7, described in @@ -299,15 +300,15 @@ def _weighted_quantile_type7_1d(a, weights, q, skipna): # https://aakinshin.net/posts/weighted-quantiles/#reference-implementation if skipna: # Remove nans from a and weights - nan_mask = np.isnan(a) - a = a[~nan_mask] - weights = weights[~nan_mask] + not_nan = ~np.isnan(a) + a = a[not_nan] + weights = weights[not_nan] elif np.isnan(a).any(): return np.full(len(q), np.nan) # Flatten input values because this function is 1d - a = a.flatten() - weights = weights.flatten() - n = len(a) + a = a.ravel() + weights = weights.ravel() + n = a.size assert n == len(weights) weights = weights / weights.sum() sorter = np.argsort(a) diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index dcb6b89e524..e561d5eafe5 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -273,9 +273,9 @@ def test_weighted_quantile_with_2d_q(): da = DataArray([1, 1.9, 2.2, 3, 3.7, 4.1, 5]) q = np.array([0.2, 0.4, 0.6, 0.8]).reshape(2, 2) - weights = DataArray(np.ones(len(da))) + weights = xr.ones_like(da) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="q must be a scalar or 1d"): da.weighted(weights).quantile(q) From 278bed9c9d3202e307d6cf5ea2832303cf5fc1c7 Mon Sep 17 00:00:00 2001 From: Christian Jauvin Date: Fri, 10 Dec 2021 15:57:49 -0500 Subject: [PATCH 04/41] Apply suggestions from code review Co-authored-by: Mathias Hauser --- xarray/core/weighted.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index fa5ddf1c594..0cf18cf8138 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -68,9 +68,9 @@ For compatibility with NumPy's non-weighted ``quantile`` (which is used by ``DataArray.quantile`` and ``Dataset.quantile``), the only interpolation - method supported by this weighted version is the Type 7, described in - https://doi.org/10.2307/2684934, which corresponds to the default "linear" - option of ``numpy.quantile``. + method supported by this weighted version corresponds to the default "linear" + option of ``numpy.quantile``. This is "Type 7" option, described in Hyndman + and Fan (1996): https://doi.org/10.2307/2684934. Parameters ---------- @@ -98,6 +98,11 @@ -------- numpy.nanquantile, pandas.Series.quantile, Dataset.quantile DataArray.quantile + + Notes + ----- + Returns NaN if the ``weights`` sum to 0.0 along the reduced + dimension(s). """ From 80ae2292cfced652ed4a53766db5a05b948f08ec Mon Sep 17 00:00:00 2001 From: Christian Jauvin Date: Sat, 11 Dec 2021 12:01:25 -0500 Subject: [PATCH 05/41] Improve _weighted_quantile_type7_1d ufunc with suggestions --- xarray/core/weighted.py | 51 +++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 0cf18cf8138..4709c12d7eb 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -98,7 +98,7 @@ -------- numpy.nanquantile, pandas.Series.quantile, Dataset.quantile DataArray.quantile - + Notes ----- Returns NaN if the ``weights`` sum to 0.0 along the reduced @@ -300,42 +300,45 @@ def _weighted_quantile( ) -> "DataArray": """Apply a weighted ``quantile`` to a DataArray along some dimension(s).""" - def _weighted_quantile_type7_1d(a, weights, q, skipna): + def _weighted_quantile_type7_1d(data, weights, q, skipna): # This algorithm has been adapted from: # https://aakinshin.net/posts/weighted-quantiles/#reference-implementation + not_nan = ~np.isnan(data) if skipna: - # Remove nans from a and weights - not_nan = ~np.isnan(a) - a = a[not_nan] + # Remove nans from data and weights + data = data[not_nan] weights = weights[not_nan] - elif np.isnan(a).any(): - return np.full(len(q), np.nan) + elif ~not_nan.any(): + return np.full(q.size, np.nan) # Flatten input values because this function is 1d - a = a.ravel() + data = data.ravel() weights = weights.ravel() - n = a.size - assert n == len(weights) + n = data.size weights = weights / weights.sum() - sorter = np.argsort(a) - a = a[sorter] + sorter = np.argsort(data) + data = data[sorter] weights = weights[sorter] weights_cum = np.append(0, weights.cumsum()) - res = [] - for p in q: - h = p * (n - 1) + 1 - u = np.maximum((h - 1) / n, np.minimum(h / n, weights_cum)) - v = u * n - h + 1 - w = np.diff(v) - res.append(sum(a * w)) + q = np.atleast_2d(q).T + h = q * (n - 1) + 1 + u = np.maximum((h - 1) / n, np.minimum(h / n, weights_cum)) + v = u * n - h + 1 + w = np.diff(v) + r = (data * w).sum(axis=1) + + return r - return np.asarray(res) + if da.shape != self.weights.shape: + raise ValueError("da and weights must have the same shape") - scalar = utils.is_scalar(q) q = np.atleast_1d(np.asarray(q, dtype=np.float64)) if q.ndim > 1: raise ValueError("q must be a scalar or 1d") + if np.any((q < 0) | (q > 1)): + raise ValueError("q values must be between 0 and 1") + if dim is None: dim = da.dims @@ -358,11 +361,9 @@ def _weighted_quantile_type7_1d(a, weights, q, skipna): kwargs={"q": q, "skipna": skipna}, ) - # for backward compatibility result = result.transpose("quantile", ...) - result = result.assign_coords(quantile=q) - if scalar: - result = result.squeeze("quantile") + result = result.assign_coords(quantile=q).squeeze() + return result def _implementation(self, func, dim, **kwargs): From 58af567608e163a325f5ae85bfbc50235e190ea6 Mon Sep 17 00:00:00 2001 From: Christian Jauvin Date: Sat, 11 Dec 2021 12:22:20 -0500 Subject: [PATCH 06/41] Expand scope of invalid q value test --- xarray/tests/test_weighted.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index e561d5eafe5..a49be288023 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -269,14 +269,19 @@ def test_weighted_quantile_bool(): assert_equal(expected, result) -def test_weighted_quantile_with_2d_q(): +@pytest.mark.parametrize("q", (-1, 1.1, ((0.2, 0.4), (0.6, 0.8)))) +def test_weighted_quantile_with_invalid_q(q): da = DataArray([1, 1.9, 2.2, 3, 3.7, 4.1, 5]) - q = np.array([0.2, 0.4, 0.6, 0.8]).reshape(2, 2) + q = np.asarray(q) weights = xr.ones_like(da) - with pytest.raises(ValueError, match="q must be a scalar or 1d"): - da.weighted(weights).quantile(q) + if q.ndim <= 1: + with pytest.raises(ValueError, match="q values must be between 0 and 1"): + da.weighted(weights).quantile(q) + else: + with pytest.raises(ValueError, match="q must be a scalar or 1d"): + da.weighted(weights).quantile(q) @pytest.mark.parametrize( From 15e3834d05c8a34be062a1647e9854eb7f1ebbd5 Mon Sep 17 00:00:00 2001 From: Christian Jauvin Date: Mon, 13 Dec 2021 12:35:24 -0500 Subject: [PATCH 07/41] Fix weighted quantile with zero weights --- xarray/core/weighted.py | 7 ++++--- xarray/tests/test_weighted.py | 14 +++++++++++++- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 4709c12d7eb..6e99c9eeafb 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -310,9 +310,10 @@ def _weighted_quantile_type7_1d(data, weights, q, skipna): weights = weights[not_nan] elif ~not_nan.any(): return np.full(q.size, np.nan) - # Flatten input values because this function is 1d - data = data.ravel() - weights = weights.ravel() + # Filter out data (and weights) associated with zero weights, which also flattens them + nonzero_weights = weights != 0 + data = data[nonzero_weights] + weights = weights[nonzero_weights] n = data.size weights = weights / weights.sum() sorter = np.argsort(data) diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index a49be288023..776f9e7279e 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -213,6 +213,18 @@ def test_weighted_quantile_no_nan(weights, expected): assert_allclose(expected, result) +def test_weighted_quantile_zero_weights(): + + da = DataArray([0, 1, 2, 3]) + weights = DataArray([1, 0, 1, 0]) + q = 0.75 + + result = da.weighted(weights).quantile(q) + expected = DataArray([0, 2]).quantile(0.75) + + assert_allclose(expected, result) + + @pytest.mark.parametrize( ("weights", "expected"), ( @@ -269,7 +281,7 @@ def test_weighted_quantile_bool(): assert_equal(expected, result) -@pytest.mark.parametrize("q", (-1, 1.1, ((0.2, 0.4), (0.6, 0.8)))) +@pytest.mark.parametrize("q", (-1, 1.1, (0.5, 1.1), ((0.2, 0.4), (0.6, 0.8)))) def test_weighted_quantile_with_invalid_q(q): da = DataArray([1, 1.9, 2.2, 3, 3.7, 4.1, 5]) From 83e4210b795697c6b44c8e2a5400e382d32b3ac5 Mon Sep 17 00:00:00 2001 From: Christian Jauvin Date: Mon, 13 Dec 2021 12:50:39 -0500 Subject: [PATCH 08/41] Replace np.ones by xr.ones_like in weighted quantile test --- xarray/tests/test_weighted.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index 776f9e7279e..5c484658289 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -576,9 +576,6 @@ def test_weighted_quantile_3D(dim, q, add_nans, skipna): dims = ("a", "b", "c") coords = dict(a=[0, 1, 2], b=[0, 1, 2, 3], c=[0, 1, 2, 3, 4]) - # Weights are all ones, because we will compare against DataArray.quantile (non-weighted) - weights = DataArray(np.ones((3, 4, 5)), dims=dims, coords=coords) - data = np.arange(60).reshape(3, 4, 5).astype(float) # add approximately 25 % NaNs (https://stackoverflow.com/a/32182680/3010700) @@ -588,6 +585,9 @@ def test_weighted_quantile_3D(dim, q, add_nans, skipna): da = DataArray(data, dims=dims, coords=coords) + # Weights are all ones, because we will compare against DataArray.quantile (non-weighted) + weights = xr.ones_like(da) + result = da.weighted(weights).quantile(q, dim=dim, skipna=skipna) expected = da.quantile(q, dim=dim, skipna=skipna) From 2237399e54e896895e467db1bda0d5876c57248d Mon Sep 17 00:00:00 2001 From: Christian Jauvin Date: Mon, 13 Dec 2021 14:57:44 -0500 Subject: [PATCH 09/41] Process weighted quantile data with all nans --- xarray/core/weighted.py | 4 ++++ xarray/tests/test_weighted.py | 7 ++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 6e99c9eeafb..09d8b0e8239 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -309,12 +309,16 @@ def _weighted_quantile_type7_1d(data, weights, q, skipna): data = data[not_nan] weights = weights[not_nan] elif ~not_nan.any(): + # Return nan if data contains any nan return np.full(q.size, np.nan) # Filter out data (and weights) associated with zero weights, which also flattens them nonzero_weights = weights != 0 data = data[nonzero_weights] weights = weights[nonzero_weights] n = data.size + if n == 0: + # Possibly empty after nan or zero weight filtering above + return np.full(q.size, np.nan) weights = weights / weights.sum() sorter = np.argsort(data) data = data[sorter] diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index 5c484658289..b85e3d05dd9 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -250,7 +250,12 @@ def test_weighted_quantile_nan(weights, expected, skipna): @pytest.mark.parametrize( - "da", ([1, 1.9, 2.2, 3, 3.7, 4.1, 5], [1, 1.9, 2.2, 3, 3.7, 4.1, np.nan]) + "da", + ( + [1, 1.9, 2.2, 3, 3.7, 4.1, 5], + [1, 1.9, 2.2, 3, 3.7, 4.1, np.nan], + [np.nan, np.nan, np.nan], + ), ) @pytest.mark.parametrize("q", (0.5, (0.1, 0.9), (0.2, 0.4, 0.6, 0.8))) @pytest.mark.parametrize("skipna", (True, False)) From b936e21b731bb4f8c0db0a4e6bca4b082eb6a6ac Mon Sep 17 00:00:00 2001 From: Christian Jauvin Date: Mon, 13 Dec 2021 16:20:35 -0500 Subject: [PATCH 10/41] Fix operator precedence bug --- xarray/core/weighted.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 09d8b0e8239..98a322b22d4 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -303,12 +303,13 @@ def _weighted_quantile( def _weighted_quantile_type7_1d(data, weights, q, skipna): # This algorithm has been adapted from: # https://aakinshin.net/posts/weighted-quantiles/#reference-implementation - not_nan = ~np.isnan(data) + is_nan = np.isnan(data) if skipna: # Remove nans from data and weights + not_nan = ~is_nan data = data[not_nan] weights = weights[not_nan] - elif ~not_nan.any(): + elif is_nan.any(): # Return nan if data contains any nan return np.full(q.size, np.nan) # Filter out data (and weights) associated with zero weights, which also flattens them From ab810d7838b06db60526393f31fba07703ac3882 Mon Sep 17 00:00:00 2001 From: David Huard Date: Tue, 11 Jan 2022 17:29:22 -0500 Subject: [PATCH 11/41] Used effective sample size. Generalize to different quantile types supporting weighted quantiles (4-9, but only 7 is exposed and tested). Fixed unit tests --- xarray/core/weighted.py | 46 ++++++++++++++++++++++++++-------- xarray/tests/test_weighted.py | 47 +++++++++++++++++++++-------------- 2 files changed, 64 insertions(+), 29 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 98a322b22d4..30fba09b22b 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -15,6 +15,7 @@ from .computation import apply_ufunc, dot from .pycompat import is_duck_dask_array from .types import T_Xarray +from .alignment import align _WEIGHTED_REDUCE_DOCSTRING_TEMPLATE = """ Reduce this {cls}'s data by a weighted ``{fcn}`` along some dimension(s). @@ -300,7 +301,22 @@ def _weighted_quantile( ) -> "DataArray": """Apply a weighted ``quantile`` to a DataArray along some dimension(s).""" - def _weighted_quantile_type7_1d(data, weights, q, skipna): + def _get_h(n, q, htype=7): + if htype == 4: + h = n * q + if htype == 5: + h = n * q + 0.5 + if htype == 6: + h = (n + 1) * q + if htype == 7: + h = (n - 1) * q + 1 + if htype == 8: + h = (n + 1/3) * q + 1/3 + if htype == 9: + h = (n + 1/4) * q + 3/8 + return h.clip(1, n) + + def _weighted_quantile_1d(data, weights, q, skipna, htype=7): # This algorithm has been adapted from: # https://aakinshin.net/posts/weighted-quantiles/#reference-implementation is_nan = np.isnan(data) @@ -312,27 +328,36 @@ def _weighted_quantile_type7_1d(data, weights, q, skipna): elif is_nan.any(): # Return nan if data contains any nan return np.full(q.size, np.nan) + # Filter out data (and weights) associated with zero weights, which also flattens them nonzero_weights = weights != 0 data = data[nonzero_weights] weights = weights[nonzero_weights] n = data.size + + # Kish's effective sample size + nw = weights.sum() ** 2 / (weights ** 2).sum() + if n == 0: # Possibly empty after nan or zero weight filtering above return np.full(q.size, np.nan) - weights = weights / weights.sum() + + # Sort data and weights sorter = np.argsort(data) data = data[sorter] weights = weights[sorter] + + # Normalize and sum the weights + weights = weights / weights.sum() weights_cum = np.append(0, weights.cumsum()) + + # Vectorize the computation for q q = np.atleast_2d(q).T - h = q * (n - 1) + 1 - u = np.maximum((h - 1) / n, np.minimum(h / n, weights_cum)) - v = u * n - h + 1 + h = _get_h(nw, q, htype) + u = np.maximum((h - 1) / nw, np.minimum(h / nw, weights_cum)) + v = u * nw - h + 1 w = np.diff(v) - r = (data * w).sum(axis=1) - - return r + return (data * w).sum(axis=1) if da.shape != self.weights.shape: raise ValueError("da and weights must have the same shape") @@ -353,10 +378,11 @@ def _weighted_quantile_type7_1d(data, weights, q, skipna): dim = cast(Sequence, dim) + da, w = align(da, self.weights) result = apply_ufunc( - _weighted_quantile_type7_1d, + _weighted_quantile_1d, da, - self.weights, + w, input_core_dims=[dim, dim], output_core_dims=[["quantile"]], output_dtypes=[np.float64], diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index b85e3d05dd9..f85895af153 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -197,11 +197,13 @@ def test_weighted_mean_no_nan(weights, expected): @pytest.mark.parametrize( ("weights", "expected"), ( - ([0.25, 0.05, 0.15, 0.25, 0.15, 0.1, 0.05], [1.435, 2.4, 3, 3.63]), - ([0.05, 0.05, 0.1, 0.15, 0.15, 0.25, 0.25], [2.84, 3.665, 4.1, 4.595]), + ([0.25, 0.05, 0.15, 0.25, 0.15, 0.1, 0.05], [1.554595, 2.463784, 3.000000, 3.518378]), + ([0.05, 0.05, 0.1, 0.15, 0.15, 0.25, 0.25], [2.840000, 3.632973, 4.076216, 4.523243]), ), ) def test_weighted_quantile_no_nan(weights, expected): + # Expected values are defined by running the reference implementation proposed in + # https://aakinshin.net/posts/weighted-quantiles/ da = DataArray([1, 1.9, 2.2, 3, 3.7, 4.1, 5]) q = [0.2, 0.4, 0.6, 0.8] @@ -225,24 +227,31 @@ def test_weighted_quantile_zero_weights(): assert_allclose(expected, result) -@pytest.mark.parametrize( - ("weights", "expected"), - ( - ([0.25, 0.05, 0.15, 0.25, 0.15, 0.1, 0.05], [1.410526, 2.326316, 3, 3.405263]), - ([0.05, 0.05, 0.1, 0.15, 0.15, 0.25, 0.25], [2.52, 3.14, 3.7, 4.1]), - ), -) -@pytest.mark.parametrize("skipna", (True, False)) -def test_weighted_quantile_nan(weights, expected, skipna): +def test_weighted_quantile_simple(): + # Check that weighted quantiles return the same value as numpy quantiles + da = DataArray([0, 1, 2, 3]) + w = DataArray([1, 0, 1, 0]) - da = DataArray([1, 1.9, 2.2, 3, 3.7, 4.1, np.nan]) - q = [0.2, 0.4, 0.6, 0.8] - weights = DataArray(weights) + weps = DataArray([1, 0.0001, 1, 0.0001]) + q = .75 - result = da.weighted(weights).quantile(q, skipna=skipna) + expected = DataArray(np.quantile([0, 2], q), coords={"quantile": q}) # 1.5 + + assert_equal(expected, da.weighted(w).quantile(q)) + assert_allclose(expected, da.weighted(weps).quantile(q), rtol=.001) + + +@pytest.mark.parametrize("skipna", (True, False)) +def test_weighted_quantile_nan(skipna): + # Check skipna behavior + da = DataArray([0, 1, 2, 3, np.nan]) + w = DataArray([1, 0, 1, 0, 1]) + q = [.5, .75] + + result = da.weighted(w).quantile(q, skipna=skipna) if skipna: - expected = DataArray(expected, coords={"quantile": q}) + expected = DataArray(np.quantile([0, 2], q), coords={"quantile": q}) # 1.5 else: expected = DataArray(np.full(len(q), np.nan), coords={"quantile": q}) @@ -605,15 +614,15 @@ def test_weighted_quantile_3D(dim, q, add_nans, skipna): def test_weighted_operations_nonequal_coords(): - + # There are no weights for a == 4, so that data point is ignored. weights = DataArray(np.random.randn(4), dims=("a",), coords=dict(a=[0, 1, 2, 3])) data = DataArray(np.random.randn(4), dims=("a",), coords=dict(a=[1, 2, 3, 4])) - check_weighted_operations(data, weights, dim="a", skipna=None) q = 0.5 result = data.weighted(weights).quantile(q, dim="a") - expected = DataArray([0.440994], coords={"quantile": [q]}).squeeze() + # Expected value computed using code from https://aakinshin.net/posts/weighted-quantiles/ with values at a=1,2,3 + expected = DataArray([0.9308707], coords={"quantile": [q]}).squeeze() assert_allclose(result, expected) data = data.to_dataset(name="data") From 7bcf09eb53d398b3b3a57dabc6a6845be8ab9c03 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 11 Jan 2022 22:30:43 +0000 Subject: [PATCH 12/41] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/weighted.py | 6 +++--- xarray/tests/test_weighted.py | 16 +++++++++++----- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 30fba09b22b..a4b73e622a1 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -12,10 +12,10 @@ import numpy as np from . import duck_array_ops, utils +from .alignment import align from .computation import apply_ufunc, dot from .pycompat import is_duck_dask_array from .types import T_Xarray -from .alignment import align _WEIGHTED_REDUCE_DOCSTRING_TEMPLATE = """ Reduce this {cls}'s data by a weighted ``{fcn}`` along some dimension(s). @@ -311,9 +311,9 @@ def _get_h(n, q, htype=7): if htype == 7: h = (n - 1) * q + 1 if htype == 8: - h = (n + 1/3) * q + 1/3 + h = (n + 1 / 3) * q + 1 / 3 if htype == 9: - h = (n + 1/4) * q + 3/8 + h = (n + 1 / 4) * q + 3 / 8 return h.clip(1, n) def _weighted_quantile_1d(data, weights, q, skipna, htype=7): diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index f85895af153..615a9ccbca1 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -197,8 +197,14 @@ def test_weighted_mean_no_nan(weights, expected): @pytest.mark.parametrize( ("weights", "expected"), ( - ([0.25, 0.05, 0.15, 0.25, 0.15, 0.1, 0.05], [1.554595, 2.463784, 3.000000, 3.518378]), - ([0.05, 0.05, 0.1, 0.15, 0.15, 0.25, 0.25], [2.840000, 3.632973, 4.076216, 4.523243]), + ( + [0.25, 0.05, 0.15, 0.25, 0.15, 0.1, 0.05], + [1.554595, 2.463784, 3.000000, 3.518378], + ), + ( + [0.05, 0.05, 0.1, 0.15, 0.15, 0.25, 0.25], + [2.840000, 3.632973, 4.076216, 4.523243], + ), ), ) def test_weighted_quantile_no_nan(weights, expected): @@ -233,12 +239,12 @@ def test_weighted_quantile_simple(): w = DataArray([1, 0, 1, 0]) weps = DataArray([1, 0.0001, 1, 0.0001]) - q = .75 + q = 0.75 expected = DataArray(np.quantile([0, 2], q), coords={"quantile": q}) # 1.5 assert_equal(expected, da.weighted(w).quantile(q)) - assert_allclose(expected, da.weighted(weps).quantile(q), rtol=.001) + assert_allclose(expected, da.weighted(weps).quantile(q), rtol=0.001) @pytest.mark.parametrize("skipna", (True, False)) @@ -246,7 +252,7 @@ def test_weighted_quantile_nan(skipna): # Check skipna behavior da = DataArray([0, 1, 2, 3, np.nan]) w = DataArray([1, 0, 1, 0, 1]) - q = [.5, .75] + q = [0.5, 0.75] result = da.weighted(w).quantile(q, skipna=skipna) From 8427637274e0b791cad6c0671dd44d7808dc1e3e Mon Sep 17 00:00:00 2001 From: David Huard Date: Wed, 12 Jan 2022 15:55:29 -0500 Subject: [PATCH 13/41] Apply suggestions from code review Co-authored-by: Mathias Hauser Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> --- xarray/core/weighted.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index a4b73e622a1..f9cf5b799df 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -295,28 +295,30 @@ def _weighted_quantile( self, da: "DataArray", q, - dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, + dim: Union[Hashable, Iterable[Hashable]] = None, skipna: Optional[bool] = None, keep_attrs: Optional[bool] = None, ) -> "DataArray": """Apply a weighted ``quantile`` to a DataArray along some dimension(s).""" - def _get_h(n, q, htype=7): - if htype == 4: + def _get_h(n, q, htype: Literal[7, 4, 5, 6, 8, 9]=7): + if htype == 7: + h = (n - 1) * q + 1 + elif htype == 4: h = n * q - if htype == 5: + elif htype == 5: h = n * q + 0.5 - if htype == 6: + elif htype == 6: h = (n + 1) * q - if htype == 7: - h = (n - 1) * q + 1 - if htype == 8: + elif htype == 8: h = (n + 1 / 3) * q + 1 / 3 - if htype == 9: + elif htype == 9: h = (n + 1 / 4) * q + 3 / 8 + else: + raise ValueError(f"Invalid htype.") return h.clip(1, n) - def _weighted_quantile_1d(data, weights, q, skipna, htype=7): + def _weighted_quantile_1d(data, weights, q, skipna: bool, htype: Literal[7, 4, 5, 6, 8, 9] =7): # This algorithm has been adapted from: # https://aakinshin.net/posts/weighted-quantiles/#reference-implementation is_nan = np.isnan(data) @@ -470,6 +472,7 @@ def std( def quantile( self, q, + *, dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, skipna: Optional[bool] = None, keep_attrs: Optional[bool] = None, From 321796245b8fb4beb4454bf0305c10407f570e77 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 12 Jan 2022 20:56:48 +0000 Subject: [PATCH 14/41] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/weighted.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index f9cf5b799df..cb8becc69d6 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -301,7 +301,7 @@ def _weighted_quantile( ) -> "DataArray": """Apply a weighted ``quantile`` to a DataArray along some dimension(s).""" - def _get_h(n, q, htype: Literal[7, 4, 5, 6, 8, 9]=7): + def _get_h(n, q, htype: Literal[7, 4, 5, 6, 8, 9] = 7): if htype == 7: h = (n - 1) * q + 1 elif htype == 4: @@ -318,7 +318,9 @@ def _get_h(n, q, htype: Literal[7, 4, 5, 6, 8, 9]=7): raise ValueError(f"Invalid htype.") return h.clip(1, n) - def _weighted_quantile_1d(data, weights, q, skipna: bool, htype: Literal[7, 4, 5, 6, 8, 9] =7): + def _weighted_quantile_1d( + data, weights, q, skipna: bool, htype: Literal[7, 4, 5, 6, 8, 9] = 7 + ): # This algorithm has been adapted from: # https://aakinshin.net/posts/weighted-quantiles/#reference-implementation is_nan = np.isnan(data) From 7379d22f803acc278ba9e041fc6b444a153c80df Mon Sep 17 00:00:00 2001 From: David Huard Date: Wed, 12 Jan 2022 16:26:22 -0500 Subject: [PATCH 15/41] added missing Typing hints --- xarray/core/weighted.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index cb8becc69d6..3dbd251476d 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -3,6 +3,7 @@ Generic, Hashable, Iterable, + Literal, Optional, Sequence, Union, @@ -294,14 +295,14 @@ def _weighted_std( def _weighted_quantile( self, da: "DataArray", - q, + q: Union[float, Iterable[float]], dim: Union[Hashable, Iterable[Hashable]] = None, skipna: Optional[bool] = None, keep_attrs: Optional[bool] = None, ) -> "DataArray": """Apply a weighted ``quantile`` to a DataArray along some dimension(s).""" - def _get_h(n, q, htype: Literal[7, 4, 5, 6, 8, 9] = 7): + def _get_h(n: float, q: np.ndarray, htype: Literal[7, 4, 5, 6, 8, 9] = 7) -> np.ndarray: if htype == 7: h = (n - 1) * q + 1 elif htype == 4: @@ -319,8 +320,8 @@ def _get_h(n, q, htype: Literal[7, 4, 5, 6, 8, 9] = 7): return h.clip(1, n) def _weighted_quantile_1d( - data, weights, q, skipna: bool, htype: Literal[7, 4, 5, 6, 8, 9] = 7 - ): + data: np.ndarray, weights: np.ndarray, q: np.ndarray, skipna: bool, htype: Literal[7, 4, 5, 6, 8, 9] = 7 + ) -> np.ndarray: # This algorithm has been adapted from: # https://aakinshin.net/posts/weighted-quantiles/#reference-implementation is_nan = np.isnan(data) @@ -473,7 +474,7 @@ def std( def quantile( self, - q, + q: Union[float, Iterable[float]], *, dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, skipna: Optional[bool] = None, From c8871d1df28415aa366b9bd8c11301f88a437d4b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 12 Jan 2022 21:27:43 +0000 Subject: [PATCH 16/41] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/weighted.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 3dbd251476d..76c9ac4159e 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -302,7 +302,9 @@ def _weighted_quantile( ) -> "DataArray": """Apply a weighted ``quantile`` to a DataArray along some dimension(s).""" - def _get_h(n: float, q: np.ndarray, htype: Literal[7, 4, 5, 6, 8, 9] = 7) -> np.ndarray: + def _get_h( + n: float, q: np.ndarray, htype: Literal[7, 4, 5, 6, 8, 9] = 7 + ) -> np.ndarray: if htype == 7: h = (n - 1) * q + 1 elif htype == 4: @@ -320,7 +322,11 @@ def _get_h(n: float, q: np.ndarray, htype: Literal[7, 4, 5, 6, 8, 9] = 7) -> np. return h.clip(1, n) def _weighted_quantile_1d( - data: np.ndarray, weights: np.ndarray, q: np.ndarray, skipna: bool, htype: Literal[7, 4, 5, 6, 8, 9] = 7 + data: np.ndarray, + weights: np.ndarray, + q: np.ndarray, + skipna: bool, + htype: Literal[7, 4, 5, 6, 8, 9] = 7, ) -> np.ndarray: # This algorithm has been adapted from: # https://aakinshin.net/posts/weighted-quantiles/#reference-implementation From b26a5fcc7114a3fc16b072254813ee787a1bd0ba Mon Sep 17 00:00:00 2001 From: David Huard Date: Wed, 12 Jan 2022 16:41:17 -0500 Subject: [PATCH 17/41] update what's new and pep8 fixes --- doc/whats-new.rst | 5 ++--- xarray/core/weighted.py | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e97a5cb25c6..75bea683634 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -23,7 +23,8 @@ New Features ~~~~~~~~~~~~ - New top-level function :py:func:`cross`. (:issue:`3279`, :pull:`5365`). By `Jimmy Westling `_. - +- Add ``quantile`` method to :py:class:`~core.weighted.DatasetWeighted` and :py:class:`~core.weighted.DataArrayWeighted` (:pull:`6059`). + By `Christian Jauvin `_ and `David Huard `_. Breaking changes ~~~~~~~~~~~~~~~~ @@ -55,8 +56,6 @@ v0.20.2 (9 December 2021) New Features ~~~~~~~~~~~~ -- Add ``quantile`` to :py:class:`~core.weighted.DatasetWeighted` and :py:class:`~core.weighted.DataArrayWeighted`. - By `Christian Jauvin `_. This is a bugfix release to resolve (:issue:`3391`, :issue:`5715`). It also includes performance improvements in unstacking to a ``sparse`` array and a number of documentation improvements. diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 3dbd251476d..3cf13c0b42c 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -316,11 +316,11 @@ def _get_h(n: float, q: np.ndarray, htype: Literal[7, 4, 5, 6, 8, 9] = 7) -> np. elif htype == 9: h = (n + 1 / 4) * q + 3 / 8 else: - raise ValueError(f"Invalid htype.") + raise ValueError(f"Invalid htype: {htype}.") return h.clip(1, n) def _weighted_quantile_1d( - data: np.ndarray, weights: np.ndarray, q: np.ndarray, skipna: bool, htype: Literal[7, 4, 5, 6, 8, 9] = 7 + data: np.ndarray, weights: np.ndarray, q: np.ndarray, skipna: bool, htype: Literal[7, 4, 5, 6, 8, 9] = 7 ) -> np.ndarray: # This algorithm has been adapted from: # https://aakinshin.net/posts/weighted-quantiles/#reference-implementation From abe253e8b53aadb7bbdfe58fa4c2aac504779ff9 Mon Sep 17 00:00:00 2001 From: David Huard Date: Wed, 19 Jan 2022 11:37:37 -0500 Subject: [PATCH 18/41] add docstring paragraph discussing weight interpretation --- xarray/core/weighted.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 12689b9106d..b0622008dd5 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -68,6 +68,12 @@ _WEIGHTED_QUANTILE_DOCSTRING_TEMPLATE = """ Apply a a weighted ``quantile`` to this {cls}'s data along some dimension(s). + Weights are interpreted as *sampling weights* (or probability weights) and + describe how a sample is scaled to the whole population. Although there are + other possible interpretations for weights, *precision weights* describing the + precision of observations, or *frequency weights* counting the number of identical + observations, they are not implemented here. + For compatibility with NumPy's non-weighted ``quantile`` (which is used by ``DataArray.quantile`` and ``Dataset.quantile``), the only interpolation method supported by this weighted version corresponds to the default "linear" From 82147aa5e6078e39891343ff4e4b84c04fc9f1ff Mon Sep 17 00:00:00 2001 From: David Huard Date: Wed, 19 Jan 2022 11:58:19 -0500 Subject: [PATCH 19/41] recognize numpy names for quantile interpolation methods --- xarray/core/weighted.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index b0622008dd5..1c1c074a13d 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -18,6 +18,13 @@ from .pycompat import is_duck_dask_array from .types import T_Xarray +QUANTILE_METHODS = Literal[7, "linear", + 4, "interpolated_inverted_cdf", + 5, "hazen", + 6, "weibull", + 8, "median_unbiased", + 9, "normal_unbiased"] + _WEIGHTED_REDUCE_DOCSTRING_TEMPLATE = """ Reduce this {cls}'s data by a weighted ``{fcn}`` along some dimension(s). @@ -309,19 +316,19 @@ def _weighted_quantile( """Apply a weighted ``quantile`` to a DataArray along some dimension(s).""" def _get_h( - n: float, q: np.ndarray, htype: Literal[7, 4, 5, 6, 8, 9] = 7 + n: float, q: np.ndarray, htype: "QUANTILE_METHODS" ) -> np.ndarray: - if htype == 7: + if htype in ["linear", 7]: h = (n - 1) * q + 1 - elif htype == 4: + elif htype in ["interpolated_inverted_cdf", 4]: h = n * q - elif htype == 5: + elif htype in ["hazen", 5]: h = n * q + 0.5 - elif htype == 6: + elif htype in ["weibull", 6]: h = (n + 1) * q - elif htype == 8: + elif htype in ["median_unbiased", 8]: h = (n + 1 / 3) * q + 1 / 3 - elif htype == 9: + elif htype in ["normal_unbiased", 9]: h = (n + 1 / 4) * q + 3 / 8 else: raise ValueError(f"Invalid htype: {htype}.") @@ -332,7 +339,7 @@ def _weighted_quantile_1d( weights: np.ndarray, q: np.ndarray, skipna: bool, - htype: Literal[7, 4, 5, 6, 8, 9] = 7, + htype: "QUANTILE_METHODS" = "linear", ) -> np.ndarray: # This algorithm has been adapted from: # https://aakinshin.net/posts/weighted-quantiles/#reference-implementation From 784ceddb6959a670fe0dfc02f21bf0cde0dbb752 Mon Sep 17 00:00:00 2001 From: David Huard Date: Wed, 19 Jan 2022 12:08:46 -0500 Subject: [PATCH 20/41] tweak to avoid warning with all nans data. simplify test --- xarray/core/weighted.py | 6 +++--- xarray/tests/test_weighted.py | 6 ++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 1c1c074a13d..fd33af9da60 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -359,13 +359,13 @@ def _weighted_quantile_1d( weights = weights[nonzero_weights] n = data.size - # Kish's effective sample size - nw = weights.sum() ** 2 / (weights ** 2).sum() - if n == 0: # Possibly empty after nan or zero weight filtering above return np.full(q.size, np.nan) + # Kish's effective sample size + nw = weights.sum() ** 2 / (weights ** 2).sum() + # Sort data and weights sorter = np.argsort(data) data = data[sorter] diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index 615a9ccbca1..75f0cc63496 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -272,15 +272,13 @@ def test_weighted_quantile_nan(skipna): [np.nan, np.nan, np.nan], ), ) -@pytest.mark.parametrize("q", (0.5, (0.1, 0.9), (0.2, 0.4, 0.6, 0.8))) +@pytest.mark.parametrize("q", (0.5, (0.2, 0.8))) @pytest.mark.parametrize("skipna", (True, False)) -@pytest.mark.parametrize("factor", [1, 2, 3.14]) +@pytest.mark.parametrize("factor", [1, 3.14]) def test_weighted_quantile_equal_weights(da, q, skipna, factor): # if all weights are equal (!= 0), should yield the same result as quantile da = DataArray(da) - - # all weights as 1. weights = xr.full_like(da, factor) expected = da.quantile(q, skipna=skipna) From 3ee62fdcb380c47bc816d019699deaf161ce78ab Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 19 Jan 2022 17:12:11 +0000 Subject: [PATCH 21/41] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/weighted.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index fd33af9da60..2d0aa176a60 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -18,12 +18,20 @@ from .pycompat import is_duck_dask_array from .types import T_Xarray -QUANTILE_METHODS = Literal[7, "linear", - 4, "interpolated_inverted_cdf", - 5, "hazen", - 6, "weibull", - 8, "median_unbiased", - 9, "normal_unbiased"] +QUANTILE_METHODS = Literal[ + 7, + "linear", + 4, + "interpolated_inverted_cdf", + 5, + "hazen", + 6, + "weibull", + 8, + "median_unbiased", + 9, + "normal_unbiased", +] _WEIGHTED_REDUCE_DOCSTRING_TEMPLATE = """ Reduce this {cls}'s data by a weighted ``{fcn}`` along some dimension(s). @@ -75,12 +83,12 @@ _WEIGHTED_QUANTILE_DOCSTRING_TEMPLATE = """ Apply a a weighted ``quantile`` to this {cls}'s data along some dimension(s). - Weights are interpreted as *sampling weights* (or probability weights) and - describe how a sample is scaled to the whole population. Although there are + Weights are interpreted as *sampling weights* (or probability weights) and + describe how a sample is scaled to the whole population. Although there are other possible interpretations for weights, *precision weights* describing the precision of observations, or *frequency weights* counting the number of identical - observations, they are not implemented here. - + observations, they are not implemented here. + For compatibility with NumPy's non-weighted ``quantile`` (which is used by ``DataArray.quantile`` and ``Dataset.quantile``), the only interpolation method supported by this weighted version corresponds to the default "linear" @@ -315,9 +323,7 @@ def _weighted_quantile( ) -> "DataArray": """Apply a weighted ``quantile`` to a DataArray along some dimension(s).""" - def _get_h( - n: float, q: np.ndarray, htype: "QUANTILE_METHODS" - ) -> np.ndarray: + def _get_h(n: float, q: np.ndarray, htype: "QUANTILE_METHODS") -> np.ndarray: if htype in ["linear", 7]: h = (n - 1) * q + 1 elif htype in ["interpolated_inverted_cdf", 4]: From 4d6a4fd8516dc1d541364074d0281a9a4af5b92d Mon Sep 17 00:00:00 2001 From: David Huard Date: Wed, 19 Jan 2022 16:38:08 -0500 Subject: [PATCH 22/41] remove integers from quantile interpolation available methods --- xarray/core/weighted.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index fd33af9da60..f86a97d2100 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -18,12 +18,12 @@ from .pycompat import is_duck_dask_array from .types import T_Xarray -QUANTILE_METHODS = Literal[7, "linear", - 4, "interpolated_inverted_cdf", - 5, "hazen", - 6, "weibull", - 8, "median_unbiased", - 9, "normal_unbiased"] +QUANTILE_METHODS = Literal["linear", + "interpolated_inverted_cdf", + "hazen", + "weibull", + "median_unbiased", + "normal_unbiased"] _WEIGHTED_REDUCE_DOCSTRING_TEMPLATE = """ Reduce this {cls}'s data by a weighted ``{fcn}`` along some dimension(s). @@ -318,17 +318,19 @@ def _weighted_quantile( def _get_h( n: float, q: np.ndarray, htype: "QUANTILE_METHODS" ) -> np.ndarray: - if htype in ["linear", 7]: + """Return the interpolation parameter.""" + # Note that these options are not yet exposed in the public API. + if htype == "linear": h = (n - 1) * q + 1 - elif htype in ["interpolated_inverted_cdf", 4]: + elif htype == "interpolated_inverted_cdf": h = n * q - elif htype in ["hazen", 5]: + elif htype == "hazen": h = n * q + 0.5 - elif htype in ["weibull", 6]: + elif htype == "weibull": h = (n + 1) * q - elif htype in ["median_unbiased", 8]: + elif htype == "median_unbiased": h = (n + 1 / 3) * q + 1 / 3 - elif htype in ["normal_unbiased", 9]: + elif htype == "normal_unbiased": h = (n + 1 / 4) * q + 3 / 8 else: raise ValueError(f"Invalid htype: {htype}.") From 4d7f5f5dcc8f2a5f0b1777e2c4be63992deccc36 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 19 Jan 2022 23:35:26 +0100 Subject: [PATCH 23/41] remove merge artifacts --- xarray/core/weighted.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 6d5b1767b11..9f39fb42e48 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -18,29 +18,12 @@ from .pycompat import is_duck_dask_array from .types import T_Xarray -<<<<<<< HEAD QUANTILE_METHODS = Literal["linear", "interpolated_inverted_cdf", "hazen", "weibull", "median_unbiased", "normal_unbiased"] -======= -QUANTILE_METHODS = Literal[ - 7, - "linear", - 4, - "interpolated_inverted_cdf", - 5, - "hazen", - 6, - "weibull", - 8, - "median_unbiased", - 9, - "normal_unbiased", -] ->>>>>>> 3ee62fdcb380c47bc816d019699deaf161ce78ab _WEIGHTED_REDUCE_DOCSTRING_TEMPLATE = """ Reduce this {cls}'s data by a weighted ``{fcn}`` along some dimension(s). From c268dddde0f10e087d26846ca49e0af7680450dd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 19 Jan 2022 22:37:18 +0000 Subject: [PATCH 24/41] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/weighted.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 9f39fb42e48..19978e93e50 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -18,12 +18,14 @@ from .pycompat import is_duck_dask_array from .types import T_Xarray -QUANTILE_METHODS = Literal["linear", - "interpolated_inverted_cdf", - "hazen", - "weibull", - "median_unbiased", - "normal_unbiased"] +QUANTILE_METHODS = Literal[ + "linear", + "interpolated_inverted_cdf", + "hazen", + "weibull", + "median_unbiased", + "normal_unbiased", +] _WEIGHTED_REDUCE_DOCSTRING_TEMPLATE = """ Reduce this {cls}'s data by a weighted ``{fcn}`` along some dimension(s). @@ -315,9 +317,7 @@ def _weighted_quantile( ) -> "DataArray": """Apply a weighted ``quantile`` to a DataArray along some dimension(s).""" - def _get_h( - n: float, q: np.ndarray, htype: "QUANTILE_METHODS" - ) -> np.ndarray: + def _get_h(n: float, q: np.ndarray, htype: "QUANTILE_METHODS") -> np.ndarray: """Return the interpolation parameter.""" # Note that options are not yet exposed in the public API. if htype == "linear": From db706aaf7b30f1307ee88ae32626a07955a8bf2b Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 19 Jan 2022 19:00:16 -0700 Subject: [PATCH 25/41] [skip-ci] fix bad merge in whats-new --- doc/whats-new.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3cbb2637d3f..b9820ca1435 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -30,7 +30,6 @@ New Features - Enable the limit option for dask array in the following methods :py:meth:`DataArray.ffill`, :py:meth:`DataArray.bfill`, :py:meth:`Dataset.ffill` and :py:meth:`Dataset.bfill` (:issue:`6112`) By `Joseph Nowak `_. - Breaking changes ~~~~~~~~~~~~~~~~ - Rely on matplotlib's default datetime converters instead of pandas' (:issue:`6102`, :pull:`6109`). @@ -89,8 +88,6 @@ Internal Changes v0.20.2 (9 December 2021) ------------------------- -New Features -~~~~~~~~~~~~ This is a bugfix release to resolve (:issue:`3391`, :issue:`5715`). It also includes performance improvements in unstacking to a ``sparse`` array and a number of documentation improvements. From 33ee96c2ad7766e6cfb113214d2db24737edc482 Mon Sep 17 00:00:00 2001 From: David Huard Date: Thu, 20 Jan 2022 11:31:18 -0500 Subject: [PATCH 26/41] Add references --- xarray/core/weighted.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 19978e93e50..15dd0f5f3e8 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -78,7 +78,7 @@ Apply a a weighted ``quantile`` to this {cls}'s data along some dimension(s). Weights are interpreted as *sampling weights* (or probability weights) and - describe how a sample is scaled to the whole population. Although there are + describe how a sample is scaled to the whole population [1]_. Although there are other possible interpretations for weights, *precision weights* describing the precision of observations, or *frequency weights* counting the number of identical observations, they are not implemented here. @@ -87,11 +87,11 @@ ``DataArray.quantile`` and ``Dataset.quantile``), the only interpolation method supported by this weighted version corresponds to the default "linear" option of ``numpy.quantile``. This is "Type 7" option, described in Hyndman - and Fan (1996): https://doi.org/10.2307/2684934. + and Fan (1996) [2]_. The implementation is largely inspired from A. Akinshin's [3]_. Parameters ---------- - q : float or sequence of float + q : float or sequence of float Quantile to compute, which must be between 0 and 1 inclusive. dim : str or sequence of str, optional Dimension(s) over which to apply the weighted ``quantile``. @@ -120,6 +120,13 @@ ----- Returns NaN if the ``weights`` sum to 0.0 along the reduced dimension(s). + + References + ---------- + .. [1] https://notstatschat.rbind.io/2020/08/04/weights-in-statistics/ + .. [2] Hyndman, R. J. & Fan, Y. (1996). Sample Quantiles in Statistical Packages. + The American Statistician, 50(4), 361–365. https://doi.org/10.2307/2684934 + .. [3] https://aakinshin.net/posts/weighted-quantiles """ From 2ffd3d3082c4c159b88e0c3516517807559d9640 Mon Sep 17 00:00:00 2001 From: David Huard Date: Tue, 8 Feb 2022 12:07:47 -0500 Subject: [PATCH 27/41] renamed htype argument to method in private functions --- xarray/core/weighted.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 15dd0f5f3e8..293849a4c0d 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -18,6 +18,7 @@ from .pycompat import is_duck_dask_array from .types import T_Xarray +# Weighted quantile methods are a subset of the numpy supported quantile methods. QUANTILE_METHODS = Literal[ "linear", "interpolated_inverted_cdf", @@ -324,23 +325,23 @@ def _weighted_quantile( ) -> "DataArray": """Apply a weighted ``quantile`` to a DataArray along some dimension(s).""" - def _get_h(n: float, q: np.ndarray, htype: "QUANTILE_METHODS") -> np.ndarray: + def _get_h(n: float, q: np.ndarray, method: "QUANTILE_METHODS") -> np.ndarray: """Return the interpolation parameter.""" # Note that options are not yet exposed in the public API. - if htype == "linear": + if method == "linear": h = (n - 1) * q + 1 - elif htype == "interpolated_inverted_cdf": + elif method == "interpolated_inverted_cdf": h = n * q - elif htype == "hazen": + elif method == "hazen": h = n * q + 0.5 - elif htype == "weibull": + elif method == "weibull": h = (n + 1) * q - elif htype == "median_unbiased": + elif method == "median_unbiased": h = (n + 1 / 3) * q + 1 / 3 - elif htype == "normal_unbiased": + elif method == "normal_unbiased": h = (n + 1 / 4) * q + 3 / 8 else: - raise ValueError(f"Invalid htype: {htype}.") + raise ValueError(f"Invalid method: {method}.") return h.clip(1, n) def _weighted_quantile_1d( @@ -348,7 +349,7 @@ def _weighted_quantile_1d( weights: np.ndarray, q: np.ndarray, skipna: bool, - htype: "QUANTILE_METHODS" = "linear", + method: QUANTILE_METHODS = "linear", ) -> np.ndarray: # This algorithm has been adapted from: # https://aakinshin.net/posts/weighted-quantiles/#reference-implementation @@ -386,7 +387,7 @@ def _weighted_quantile_1d( # Vectorize the computation for q q = np.atleast_2d(q).T - h = _get_h(nw, q, htype) + h = _get_h(nw, q, method) u = np.maximum((h - 1) / nw, np.minimum(h / nw, weights_cum)) v = u * nw - h + 1 w = np.diff(v) From 15ee99905142c0dc6cff14187c41d80cf5ec75aa Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 8 Feb 2022 17:12:04 +0000 Subject: [PATCH 28/41] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/weighted.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 3fa4ce1e14e..59aa3c96007 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -374,7 +374,7 @@ def _weighted_quantile_1d( return np.full(q.size, np.nan) # Kish's effective sample size - nw = weights.sum() ** 2 / (weights ** 2).sum() + nw = weights.sum() ** 2 / (weights**2).sum() # Sort data and weights sorter = np.argsort(data) From 9559c876fc10a30e8f0648c17868d1ce9c9c9fdf Mon Sep 17 00:00:00 2001 From: David Huard Date: Wed, 9 Feb 2022 07:05:15 -0500 Subject: [PATCH 29/41] Update xarray/core/weighted.py Co-authored-by: Abel Aoun --- xarray/core/weighted.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 59aa3c96007..67dd5fd8d84 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -325,7 +325,7 @@ def _weighted_quantile( ) -> "DataArray": """Apply a weighted ``quantile`` to a DataArray along some dimension(s).""" - def _get_h(n: float, q: np.ndarray, method: "QUANTILE_METHODS") -> np.ndarray: + def _get_h(n: float, q: np.ndarray, method: QUANTILE_METHODS) -> np.ndarray: """Return the interpolation parameter.""" # Note that options are not yet exposed in the public API. if method == "linear": From 73dde79a0648cfa6d9a83d4f07be41fbe8a026bb Mon Sep 17 00:00:00 2001 From: Christian Jauvin Date: Thu, 17 Feb 2022 13:12:49 -0500 Subject: [PATCH 30/41] Add skipped test to verify equal weights quantile with methods --- xarray/tests/test_weighted.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index 83947f3eee3..105a7e5426c 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -287,6 +287,41 @@ def test_weighted_quantile_equal_weights(da, q, skipna, factor): assert_allclose(expected, result) +@pytest.mark.skip(reason="Method argument is not currently exposed") +@pytest.mark.parametrize( + "da", + ( + [1, 1.9, 2.2, 3, 3.7, 4.1, 5], + [1, 1.9, 2.2, 3, 3.7, 4.1, np.nan], + [np.nan, np.nan, np.nan], + ), +) +@pytest.mark.parametrize("q", (0.5, (0.2, 0.8))) +@pytest.mark.parametrize("skipna", (True, False)) +@pytest.mark.parametrize("factor", [1, 3.14]) +@pytest.mark.parametrize( + "method", + [ + "linear", + "interpolated_inverted_cdf", + "hazen", + "weibull", + "median_unbiased", + "normal_unbiased2", + ], +) +def test_weighted_quantile_equal_weights_all_methods(da, q, skipna, factor, method): + # if all weights are equal (!= 0), should yield the same result as quantile + + da = DataArray(da) + weights = xr.full_like(da, factor) + + expected = da.quantile(q, skipna=skipna, interpolation=method) + result = da.weighted(weights).quantile(q, skipna=skipna, method=method) + + assert_allclose(expected, result) + + def test_weighted_quantile_bool(): # https://github.com/pydata/xarray/issues/4074 da = DataArray([1, 1]) From 59714afe7ac44a0ea4f828a191786210005ac027 Mon Sep 17 00:00:00 2001 From: David Huard Date: Thu, 17 Feb 2022 13:53:44 -0500 Subject: [PATCH 31/41] Apply suggestions from code review Co-authored-by: Mathias Hauser --- xarray/core/weighted.py | 3 ++- xarray/tests/test_weighted.py | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 67dd5fd8d84..d4fa733f914 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -88,7 +88,8 @@ ``DataArray.quantile`` and ``Dataset.quantile``), the only interpolation method supported by this weighted version corresponds to the default "linear" option of ``numpy.quantile``. This is "Type 7" option, described in Hyndman - and Fan (1996) [2]_. The implementation is largely inspired from A. Akinshin's [3]_. + and Fan (1996) [2]_. The implementation is largely inspired by a blog post + from A. Akinshin's [3]_. Parameters ---------- diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index 105a7e5426c..2d4edb770a9 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -208,8 +208,8 @@ def test_weighted_mean_no_nan(weights, expected): ), ) def test_weighted_quantile_no_nan(weights, expected): - # Expected values are defined by running the reference implementation proposed in - # https://aakinshin.net/posts/weighted-quantiles/ + # Expected values were calculated by running the reference implementation + # proposed in https://aakinshin.net/posts/weighted-quantiles/ da = DataArray([1, 1.9, 2.2, 3, 3.7, 4.1, 5]) q = [0.2, 0.4, 0.6, 0.8] @@ -238,13 +238,13 @@ def test_weighted_quantile_simple(): da = DataArray([0, 1, 2, 3]) w = DataArray([1, 0, 1, 0]) - weps = DataArray([1, 0.0001, 1, 0.0001]) + w_eps = DataArray([1, 0.0001, 1, 0.0001]) q = 0.75 expected = DataArray(np.quantile([0, 2], q), coords={"quantile": q}) # 1.5 assert_equal(expected, da.weighted(w).quantile(q)) - assert_allclose(expected, da.weighted(weps).quantile(q), rtol=0.001) + assert_allclose(expected, da.weighted(w_eps).quantile(q), rtol=0.001) @pytest.mark.parametrize("skipna", (True, False)) @@ -257,7 +257,7 @@ def test_weighted_quantile_nan(skipna): result = da.weighted(w).quantile(q, skipna=skipna) if skipna: - expected = DataArray(np.quantile([0, 2], q), coords={"quantile": q}) # 1.5 + expected = DataArray(np.quantile([0, 2], q), coords={"quantile": q}) else: expected = DataArray(np.full(len(q), np.nan), coords={"quantile": q}) From 585b705bfbe0f415ad2d2394cb7c2046e113fd69 Mon Sep 17 00:00:00 2001 From: David Huard Date: Thu, 17 Feb 2022 14:35:22 -0500 Subject: [PATCH 32/41] Update xarray/core/weighted.py Co-authored-by: Mathias Hauser --- xarray/core/weighted.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index d4fa733f914..75a9ae84f1c 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -76,7 +76,7 @@ """ _WEIGHTED_QUANTILE_DOCSTRING_TEMPLATE = """ - Apply a a weighted ``quantile`` to this {cls}'s data along some dimension(s). + Apply a weighted ``quantile`` to this {cls}'s data along some dimension(s). Weights are interpreted as *sampling weights* (or probability weights) and describe how a sample is scaled to the whole population [1]_. Although there are From 7443b826362841121f33bc9969ac0dbc9e6057b7 Mon Sep 17 00:00:00 2001 From: David Huard Date: Thu, 17 Feb 2022 14:53:13 -0500 Subject: [PATCH 33/41] modifications suggested by review: comments, remove align, clarify test logic --- xarray/core/weighted.py | 15 +++++++++++---- xarray/tests/test_weighted.py | 7 ++++--- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 75a9ae84f1c..46c8ef3e16d 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -386,12 +386,20 @@ def _weighted_quantile_1d( weights = weights / weights.sum() weights_cum = np.append(0, weights.cumsum()) - # Vectorize the computation for q + # Vectorize the computation by transposing q with respect to weights q = np.atleast_2d(q).T + + # Get the interpolation parameter for each q h = _get_h(nw, q, method) + + # Find the samples contributing to the quantile computation (at *positions* between (h-1)/nw and h/nw) u = np.maximum((h - 1) / nw, np.minimum(h / nw, weights_cum)) + + # Compute their relative weight v = u * nw - h + 1 w = np.diff(v) + + # Apply the weights return (data * w).sum(axis=1) if da.shape != self.weights.shape: @@ -413,15 +421,14 @@ def _weighted_quantile_1d( dim = cast(Sequence, dim) - da, w = align(da, self.weights) result = apply_ufunc( _weighted_quantile_1d, da, - w, + self.weights, input_core_dims=[dim, dim], output_core_dims=[["quantile"]], output_dtypes=[np.float64], - join="override", + join="inner", dask_gufunc_kwargs=dict(output_sizes={"quantile": len(q)}), dask="parallelized", vectorize=True, diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index 2d4edb770a9..23465dcadaa 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -298,7 +298,6 @@ def test_weighted_quantile_equal_weights(da, q, skipna, factor): ) @pytest.mark.parametrize("q", (0.5, (0.2, 0.8))) @pytest.mark.parametrize("skipna", (True, False)) -@pytest.mark.parametrize("factor", [1, 3.14]) @pytest.mark.parametrize( "method", [ @@ -311,10 +310,10 @@ def test_weighted_quantile_equal_weights(da, q, skipna, factor): ], ) def test_weighted_quantile_equal_weights_all_methods(da, q, skipna, factor, method): - # if all weights are equal (!= 0), should yield the same result as quantile + # If all weights are equal (!= 0), should yield the same result as numpy quantile da = DataArray(da) - weights = xr.full_like(da, factor) + weights = xr.full_like(da, 3.14) expected = da.quantile(q, skipna=skipna, interpolation=method) result = da.weighted(weights).quantile(q, skipna=skipna, method=method) @@ -724,8 +723,10 @@ def test_weighted_operations_keep_attr(operation, as_dataset, keep_attrs): if operation == "sum_of_weights": assert weights.attrs == result.attrs if keep_attrs else not result.attrs + assert result.attrs == (weights.attrs if keep_attrs else {}) else: assert data.attrs == result.attrs if keep_attrs else not result.attrs + assert result.attrs == (data.attrs if keep_attrs else {}) @pytest.mark.parametrize( From 2e0c16e84e59ae4bc9eaa9bbb0089c54c6e00d1d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 17 Feb 2022 20:22:38 +0000 Subject: [PATCH 34/41] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/weighted.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index eebb7f34a1a..ba0b239c8bf 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -1,7 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Generic, Hashable, Iterable, cast, Literal, Sequence - +from typing import TYPE_CHECKING, Generic, Hashable, Iterable, Literal, Sequence, cast import numpy as np From 4186a244d83f7b7dd089a07b8007cbbbbd9bd1e1 Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 21 Feb 2022 16:50:15 -0500 Subject: [PATCH 35/41] Apply suggestions from code review Co-authored-by: Mathias Hauser --- doc/whats-new.rst | 2 +- xarray/core/weighted.py | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6f2f4fa6bcb..5c01eac4f19 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -92,7 +92,7 @@ New Features ~~~~~~~~~~~~ - New top-level function :py:func:`cross`. (:issue:`3279`, :pull:`5365`). By `Jimmy Westling `_. -- Add ``quantile`` method to :py:class:`~core.weighted.DatasetWeighted` and :py:class:`~core.weighted.DataArrayWeighted` (:pull:`6059`). +- Add a weighted ``quantile`` method to :py:class:`~core.weighted.DatasetWeighted` and :py:class:`~core.weighted.DataArrayWeighted` (:pull:`6059`). By `Christian Jauvin `_ and `David Huard `_. - ``keep_attrs`` support for :py:func:`where` (:issue:`4141`, :issue:`4682`, :pull:`4687`). By `Justus Magin `_. diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index ba0b239c8bf..078dd3101a8 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -71,10 +71,10 @@ Apply a weighted ``quantile`` to this {cls}'s data along some dimension(s). Weights are interpreted as *sampling weights* (or probability weights) and - describe how a sample is scaled to the whole population [1]_. Although there are + describe how a sample is scaled to the whole population [1]_. There are other possible interpretations for weights, *precision weights* describing the precision of observations, or *frequency weights* counting the number of identical - observations, they are not implemented here. + observations, however, they are not implemented here. For compatibility with NumPy's non-weighted ``quantile`` (which is used by ``DataArray.quantile`` and ``Dataset.quantile``), the only interpolation @@ -107,8 +107,7 @@ See Also -------- - numpy.nanquantile, pandas.Series.quantile, Dataset.quantile - DataArray.quantile + numpy.nanquantile, pandas.Series.quantile, Dataset.quantile, DataArray.quantile Notes ----- @@ -312,7 +311,7 @@ def _weighted_quantile( self, da: DataArray, q: ArrayLike, - dim: Hashable | Iterable[Hashable] = None, + dim: Hashable | Iterable[Hashable] | None = None, skipna: bool = None, ) -> DataArray: """Apply a weighted ``quantile`` to a DataArray along some dimension(s).""" From 1be1f92ad7ee657fb339e1321eb86e74c4a66c79 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 21 Feb 2022 21:51:59 +0000 Subject: [PATCH 36/41] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/contributing.rst | 4 ++-- doc/whats-new.rst | 4 ++-- xarray/core/computation.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/contributing.rst b/doc/contributing.rst index df279caa54f..0913702fd83 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -274,13 +274,13 @@ Some other important things to know about the docs: .. ipython:: python x = 2 - x ** 3 + x**3 will be rendered as:: In [1]: x = 2 - In [2]: x ** 3 + In [2]: x**3 Out[2]: 8 Almost all code examples in the docs are run (and the output saved) during the diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 5c01eac4f19..61a40b1af65 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -5135,7 +5135,7 @@ Enhancements .. ipython:: python ds = xray.Dataset(coords={"x": range(100), "y": range(100)}) - ds["distance"] = np.sqrt(ds.x ** 2 + ds.y ** 2) + ds["distance"] = np.sqrt(ds.x**2 + ds.y**2) @savefig where_example.png width=4in height=4in ds.distance.where(ds.distance < 100).plot() @@ -5343,7 +5343,7 @@ Enhancements .. ipython:: python ds = xray.Dataset({"y": ("x", [1, 2, 3])}) - ds.assign(z=lambda ds: ds.y ** 2) + ds.assign(z=lambda ds: ds.y**2) ds.assign_coords(z=("x", ["a", "b", "c"])) These methods return a new Dataset (or DataArray) with updated data or diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 88eefbdc441..c11bd1a78a4 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -944,7 +944,7 @@ def apply_ufunc( Calculate the vector magnitude of two arguments: >>> def magnitude(a, b): - ... func = lambda x, y: np.sqrt(x ** 2 + y ** 2) + ... func = lambda x, y: np.sqrt(x**2 + y**2) ... return xr.apply_ufunc(func, a, b) ... From 5c251e02ef1dbe5f8582ef1a2ec1843dfbeb9a13 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 25 Feb 2022 10:25:54 +0100 Subject: [PATCH 37/41] use broadcast --- xarray/core/weighted.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 078dd3101a8..b77e97c3d9c 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -5,6 +5,7 @@ import numpy as np from . import duck_array_ops, utils +from .alignment import broadcast from .computation import apply_ufunc, dot from .npcompat import ArrayLike from .pycompat import is_duck_dask_array @@ -342,6 +343,7 @@ def _weighted_quantile_1d( skipna: bool, method: QUANTILE_METHODS = "linear", ) -> np.ndarray: + # This algorithm has been adapted from: # https://aakinshin.net/posts/weighted-quantiles/#reference-implementation is_nan = np.isnan(data) @@ -392,8 +394,8 @@ def _weighted_quantile_1d( # Apply the weights return (data * w).sum(axis=1) - if da.shape != self.weights.shape: - raise ValueError("da and weights must have the same shape") + if skipna is None and da.dtype.kind in "cfO": + skipna = True q = np.atleast_1d(np.asarray(q, dtype=np.float64)) @@ -412,14 +414,16 @@ def _weighted_quantile_1d( # To satisfy mypy dim = cast(Sequence, dim) + # need to ensure da & weights have the same shape + da, weights = broadcast(da, self.weights) + result = apply_ufunc( _weighted_quantile_1d, da, - self.weights, + weights, input_core_dims=[dim, dim], output_core_dims=[["quantile"]], output_dtypes=[np.float64], - join="inner", dask_gufunc_kwargs=dict(output_sizes={"quantile": len(q)}), dask="parallelized", vectorize=True, From c112d2cb158f2f68d158673ace1cf0d5e7f8688c Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 4 Mar 2022 10:06:57 +0100 Subject: [PATCH 38/41] move whatsnew entry --- doc/whats-new.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2adc36f42b5..66db305fd78 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,6 +22,9 @@ v2022.03.1 (unreleased) New Features ~~~~~~~~~~~~ +- Add a weighted ``quantile`` method to :py:class:`~core.weighted.DatasetWeighted` and + :py:class:`~core.weighted.DataArrayWeighted` (:pull:`6059`). By + `Christian Jauvin `_ and `David Huard `_. Breaking changes ~~~~~~~~~~~~~~~~ @@ -136,8 +139,6 @@ New Features ~~~~~~~~~~~~ - New top-level function :py:func:`cross`. (:issue:`3279`, :pull:`5365`). By `Jimmy Westling `_. -- Add a weighted ``quantile`` method to :py:class:`~core.weighted.DatasetWeighted` and :py:class:`~core.weighted.DataArrayWeighted` (:pull:`6059`). - By `Christian Jauvin `_ and `David Huard `_. - ``keep_attrs`` support for :py:func:`where` (:issue:`4141`, :issue:`4682`, :pull:`4687`). By `Justus Magin `_. - Enable the limit option for dask array in the following methods :py:meth:`DataArray.ffill`, :py:meth:`DataArray.bfill`, :py:meth:`Dataset.ffill` and :py:meth:`Dataset.bfill` (:issue:`6112`) From d4ba8ee9dbde184b60acbb556ee10433f8be4740 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 4 Mar 2022 10:10:15 +0100 Subject: [PATCH 39/41] Apply suggestions from code review --- xarray/tests/test_weighted.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index 54f43d14c71..63dd1ec0c94 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -722,10 +722,10 @@ def test_weighted_operations_keep_attr(operation, as_dataset, keep_attrs): result = getattr(data.weighted(weights), operation)(**kwargs) if operation == "sum_of_weights": - assert weights.attrs == result.attrs if keep_attrs else not result.attrs + assert result.attrs == (weights.attrs if keep_attrs else {}) assert result.attrs == (weights.attrs if keep_attrs else {}) else: - assert data.attrs == result.attrs if keep_attrs else not result.attrs + assert result.attrs == (weights.attrs if keep_attrs else {}) assert result.attrs == (data.attrs if keep_attrs else {}) From fd0a54ec4b58a871106107de66e87d066a76ebc5 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 4 Mar 2022 10:41:02 +0100 Subject: [PATCH 40/41] switch skipna determination --- xarray/core/weighted.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index b77e97c3d9c..be43883db4b 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -394,9 +394,6 @@ def _weighted_quantile_1d( # Apply the weights return (data * w).sum(axis=1) - if skipna is None and da.dtype.kind in "cfO": - skipna = True - q = np.atleast_1d(np.asarray(q, dtype=np.float64)) if q.ndim > 1: @@ -415,8 +412,13 @@ def _weighted_quantile_1d( dim = cast(Sequence, dim) # need to ensure da & weights have the same shape + # TODO: use join="inner", see https://github.com/pydata/xarray/issues/6304 da, weights = broadcast(da, self.weights) + # after broadcast because it can change the dtype (when padding with NaN) + if skipna is None and da.dtype.kind in "cfO": + skipna = True + result = apply_ufunc( _weighted_quantile_1d, da, From 343b47e52aff7e122bec3424152cf2cebda00ca2 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 4 Mar 2022 11:04:25 +0100 Subject: [PATCH 41/41] use align and broadcast --- xarray/core/weighted.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index be43883db4b..2e944eab1e0 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -5,7 +5,7 @@ import numpy as np from . import duck_array_ops, utils -from .alignment import broadcast +from .alignment import align, broadcast from .computation import apply_ufunc, dot from .npcompat import ArrayLike from .pycompat import is_duck_dask_array @@ -394,6 +394,9 @@ def _weighted_quantile_1d( # Apply the weights return (data * w).sum(axis=1) + if skipna is None and da.dtype.kind in "cfO": + skipna = True + q = np.atleast_1d(np.asarray(q, dtype=np.float64)) if q.ndim > 1: @@ -411,13 +414,16 @@ def _weighted_quantile_1d( # To satisfy mypy dim = cast(Sequence, dim) - # need to ensure da & weights have the same shape - # TODO: use join="inner", see https://github.com/pydata/xarray/issues/6304 - da, weights = broadcast(da, self.weights) + # need to align *and* broadcast + # - `_weighted_quantile_1d` requires arrays with the same shape + # - broadcast does an outer join, which can introduce NaN to weights + # - therefore we first need to do align(..., join="inner") - # after broadcast because it can change the dtype (when padding with NaN) - if skipna is None and da.dtype.kind in "cfO": - skipna = True + # TODO: use broadcast(..., join="inner") once available + # see https://github.com/pydata/xarray/issues/6304 + + da, weights = align(da, self.weights, join="inner") + da, weights = broadcast(da, weights) result = apply_ufunc( _weighted_quantile_1d,