Skip to content

Commit

Permalink
Fix recombination in groupby when changing size along the grouped dim…
Browse files Browse the repository at this point in the history
…ension (#3807)

* Fix recombination in groupby when changing size along the grouped dimension

* cleanup tests

* minor test rename

* minor fix

Co-authored-by: dcherian <deepak@cherian.net>
Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
  • Loading branch information
3 people authored Mar 17, 2020
1 parent cafab46 commit 65a5bff
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 5 deletions.
6 changes: 4 additions & 2 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,10 @@ Bug fixes
- Fix :py:meth:`Dataset.interp` when indexing array shares coordinates with the
indexed variable (:issue:`3252`).
By `David Huard <https://github.com/huard>`_.


- Fix recombination of groups in :py:meth:`Dataset.groupby` and
:py:meth:`DataArray.groupby` when performing an operation that changes the
size of the groups along the grouped dimension. By `Eric Jansen
<https://github.com/ej81>`_.
- Fix use of multi-index with categorical values (:issue:`3674`).
By `Matthieu Ancellin <https://github.com/mancellin>`_.
- Fix alignment with ``join="override"`` when some dimensions are unindexed. (:issue:`3681`).
Expand Down
8 changes: 5 additions & 3 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -720,7 +720,7 @@ def assign_coords(self, coords=None, **coords_kwargs):
def _maybe_reorder(xarray_obj, dim, positions):
order = _inverse_permutation_indices(positions)

if order is None:
if order is None or len(order) != xarray_obj.sizes[dim]:
return xarray_obj
else:
return xarray_obj[{dim: order}]
Expand Down Expand Up @@ -838,7 +838,8 @@ def _combine(self, applied, restore_coord_dims=False, shortcut=False):
if isinstance(combined, type(self._obj)):
# only restore dimension order for arrays
combined = self._restore_dim_order(combined)
if coord is not None:
# assign coord when the applied function does not return that coord
if coord is not None and dim not in applied_example.dims:
if shortcut:
coord_var = as_variable(coord)
combined._coords[coord.name] = coord_var
Expand Down Expand Up @@ -954,7 +955,8 @@ def _combine(self, applied):
coord, dim, positions = self._infer_concat_args(applied_example)
combined = concat(applied, dim)
combined = _maybe_reorder(combined, dim, positions)
if coord is not None:
# assign coord when the applied function does not return that coord
if coord is not None and dim not in applied_example.dims:
combined[coord.name] = coord
combined = self._maybe_restore_empty_groups(combined)
combined = self._maybe_unstack(combined)
Expand Down
33 changes: 33 additions & 0 deletions xarray/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,39 @@ def test_groupby_input_mutation():
assert_identical(array, array_copy) # should not modify inputs


@pytest.mark.parametrize(
"obj",
[
xr.DataArray([1, 2, 3, 4, 5, 6], [("x", [1, 1, 1, 2, 2, 2])]),
xr.Dataset({"foo": ("x", [1, 2, 3, 4, 5, 6])}, {"x": [1, 1, 1, 2, 2, 2]}),
],
)
def test_groupby_map_shrink_groups(obj):
expected = obj.isel(x=[0, 1, 3, 4])
actual = obj.groupby("x").map(lambda f: f.isel(x=[0, 1]))
assert_identical(expected, actual)


@pytest.mark.parametrize(
"obj",
[
xr.DataArray([1, 2, 3], [("x", [1, 2, 2])]),
xr.Dataset({"foo": ("x", [1, 2, 3])}, {"x": [1, 2, 2]}),
],
)
def test_groupby_map_change_group_size(obj):
def func(group):
if group.sizes["x"] == 1:
result = group.isel(x=[0, 0])
else:
result = group.isel(x=[0])
return result

expected = obj.isel(x=[0, 0, 1])
actual = obj.groupby("x").map(func)
assert_identical(expected, actual)


def test_da_groupby_map_func_args():
def func(arg1, arg2, arg3=0):
return arg1 + arg2 + arg3
Expand Down

0 comments on commit 65a5bff

Please sign in to comment.