From 65a5bff79479c4b56d6f733236fe544b7f4120a8 Mon Sep 17 00:00:00 2001 From: Eric Jansen Date: Tue, 17 Mar 2020 17:34:36 +0100 Subject: [PATCH] Fix recombination in groupby when changing size along the grouped dimension (#3807) * Fix recombination in groupby when changing size along the grouped dimension * cleanup tests * minor test rename * minor fix Co-authored-by: dcherian Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 6 ++++-- xarray/core/groupby.py | 8 +++++--- xarray/tests/test_groupby.py | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9b78d046148..aad0e083a8c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -57,8 +57,10 @@ Bug fixes - Fix :py:meth:`Dataset.interp` when indexing array shares coordinates with the indexed variable (:issue:`3252`). By `David Huard `_. - - +- Fix recombination of groups in :py:meth:`Dataset.groupby` and + :py:meth:`DataArray.groupby` when performing an operation that changes the + size of the groups along the grouped dimension. By `Eric Jansen + `_. - Fix use of multi-index with categorical values (:issue:`3674`). By `Matthieu Ancellin `_. - Fix alignment with ``join="override"`` when some dimensions are unindexed. (:issue:`3681`). diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 4223d9dc255..67e8f0588b3 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -720,7 +720,7 @@ def assign_coords(self, coords=None, **coords_kwargs): def _maybe_reorder(xarray_obj, dim, positions): order = _inverse_permutation_indices(positions) - if order is None: + if order is None or len(order) != xarray_obj.sizes[dim]: return xarray_obj else: return xarray_obj[{dim: order}] @@ -838,7 +838,8 @@ def _combine(self, applied, restore_coord_dims=False, shortcut=False): if isinstance(combined, type(self._obj)): # only restore dimension order for arrays combined = self._restore_dim_order(combined) - if coord is not None: + # assign coord when the applied function does not return that coord + if coord is not None and dim not in applied_example.dims: if shortcut: coord_var = as_variable(coord) combined._coords[coord.name] = coord_var @@ -954,7 +955,8 @@ def _combine(self, applied): coord, dim, positions = self._infer_concat_args(applied_example) combined = concat(applied, dim) combined = _maybe_reorder(combined, dim, positions) - if coord is not None: + # assign coord when the applied function does not return that coord + if coord is not None and dim not in applied_example.dims: combined[coord.name] = coord combined = self._maybe_restore_empty_groups(combined) combined = self._maybe_unstack(combined) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 77558e741be..8ab4b7b2f80 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -107,6 +107,39 @@ def test_groupby_input_mutation(): assert_identical(array, array_copy) # should not modify inputs +@pytest.mark.parametrize( + "obj", + [ + xr.DataArray([1, 2, 3, 4, 5, 6], [("x", [1, 1, 1, 2, 2, 2])]), + xr.Dataset({"foo": ("x", [1, 2, 3, 4, 5, 6])}, {"x": [1, 1, 1, 2, 2, 2]}), + ], +) +def test_groupby_map_shrink_groups(obj): + expected = obj.isel(x=[0, 1, 3, 4]) + actual = obj.groupby("x").map(lambda f: f.isel(x=[0, 1])) + assert_identical(expected, actual) + + +@pytest.mark.parametrize( + "obj", + [ + xr.DataArray([1, 2, 3], [("x", [1, 2, 2])]), + xr.Dataset({"foo": ("x", [1, 2, 3])}, {"x": [1, 2, 2]}), + ], +) +def test_groupby_map_change_group_size(obj): + def func(group): + if group.sizes["x"] == 1: + result = group.isel(x=[0, 0]) + else: + result = group.isel(x=[0]) + return result + + expected = obj.isel(x=[0, 0, 1]) + actual = obj.groupby("x").map(func) + assert_identical(expected, actual) + + def test_da_groupby_map_func_args(): def func(arg1, arg2, arg3=0): return arg1 + arg2 + arg3