diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c70dfd4f3f6..b9c81ad3474 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -45,6 +45,9 @@ New Features Bug fixes ~~~~~~~~~ +- Fix renaming of coords when one or more stacked coords is not in + sorted order during stack+groupby+apply operations. (:issue:`3287`, + :pull:`3906`) By `Spencer Hill `_ - Fix a regression where deleting a coordinate from a copied :py:class:`DataArray` can affect the original :py:class:`Dataarray`. (:issue:`3899`, :pull:`3871`) By `Todd Jennings `_ @@ -96,13 +99,13 @@ New Features - Added support for :py:class:`pandas.DatetimeIndex`-style rounding of ``cftime.datetime`` objects directly via a :py:class:`CFTimeIndex` or via the :py:class:`~core.accessor_dt.DatetimeAccessor`. - By `Spencer Clark `_ + By `Spencer Clark `_ - Support new h5netcdf backend keyword `phony_dims` (available from h5netcdf v0.8.0 for :py:class:`~xarray.backends.H5NetCDFStore`. By `Kai Mühlbauer `_. - Add partial support for unit aware arrays with pint. (:pull:`3706`, :pull:`3611`) By `Justus Magin `_. -- :py:meth:`Dataset.groupby` and :py:meth:`DataArray.groupby` now raise a +- :py:meth:`Dataset.groupby` and :py:meth:`DataArray.groupby` now raise a `TypeError` on multiple string arguments. Receiving multiple string arguments often means a user is attempting to pass multiple dimensions as separate arguments and should instead pass a single list of dimensions. @@ -120,7 +123,7 @@ New Features By `Maximilian Roos `_. - ``skipna`` is available in :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile`, :py:meth:`core.groupby.DatasetGroupBy.quantile`, :py:meth:`core.groupby.DataArrayGroupBy.quantile` - (:issue:`3843`, :pull:`3844`) + (:issue:`3843`, :pull:`3844`) By `Aaron Spring `_. Bug fixes @@ -814,7 +817,7 @@ Bug fixes Documentation ~~~~~~~~~~~~~ -- Created a `PR checklist `_ +- Created a `PR checklist `_ as a quick reference for tasks before creating a new PR or pushing new commits. By `Gregory Gundersen `_. diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 67e8f0588b3..5a5f4c0d296 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -370,8 +370,10 @@ def __init__( group = group.dropna(group_dim) # look through group to find the unique values + group_as_index = safe_cast_to_index(group) + sort = bins is None and (not isinstance(group_as_index, pd.MultiIndex)) unique_values, group_indices = unique_value_groups( - safe_cast_to_index(group), sort=(bins is None) + group_as_index, sort=sort ) unique_coord = IndexVariable(group.name, unique_values) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 5b3e122bf72..ced72d1bc06 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1202,6 +1202,25 @@ def test_selection_multiindex_from_level(self): expected = data.isel(xy=[0, 1]).unstack("xy").squeeze("y").drop_vars("y") assert_equal(actual, expected) + def test_stack_groupby_unsorted_coord(self): + data = [[0, 1], [2, 3]] + data_flat = [0, 1, 2, 3] + dims = ["x", "y"] + y_vals = [2, 3] + + arr = xr.DataArray(data, dims=dims, coords={"y": y_vals}) + actual1 = arr.stack(z=dims).groupby("z").first() + midx1 = pd.MultiIndex.from_product([[0, 1], [2, 3]], names=dims) + expected1 = xr.DataArray(data_flat, dims=["z"], coords={"z": midx1}) + xr.testing.assert_equal(actual1, expected1) + + # GH: 3287. Note that y coord values are not in sorted order. + arr = xr.DataArray(data, dims=dims, coords={"y": y_vals[::-1]}) + actual2 = arr.stack(z=dims).groupby("z").first() + midx2 = pd.MultiIndex.from_product([[0, 1], [3, 2]], names=dims) + expected2 = xr.DataArray(data_flat, dims=["z"], coords={"z": midx2}) + xr.testing.assert_equal(actual2, expected2) + def test_virtual_default_coords(self): array = DataArray(np.zeros((5,)), dims="x") expected = DataArray(range(5), dims="x", name="x")