From ad938c1fbb69e95696cdaf3689f0ddcffe9a780f Mon Sep 17 00:00:00 2001 From: Cars Chandler Date: Wed, 18 Oct 2023 08:29:12 -0500 Subject: [PATCH] Add invert option to DataArray/Dataset.stack() --- xarray/core/dataarray.py | 57 ++++++++++++++++++++++++++++------ xarray/core/dataset.py | 34 +++++++++++++++++++- xarray/tests/test_dataarray.py | 7 +++++ 3 files changed, 88 insertions(+), 10 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 2bcc5ab85e2..5842a78636e 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2731,6 +2731,7 @@ def stack( dimensions: Mapping[Any, Sequence[Hashable]] | None = None, create_index: bool | None = True, index_cls: type[Index] = PandasMultiIndex, + invert: bool = False, **dimensions_kwargs: Sequence[Hashable], ) -> Self: """ @@ -2752,9 +2753,15 @@ def stack( If False, don't create any index. If None, create a multi-index only if exactly one single (1-d) coordinate index is found for every dimension to stack. - index_cls: class, optional + index_cls : class, optional Can be used to pass a custom multi-index type. Must be an Xarray index that implements `.stack()`. By default, a pandas multi-index wrapper is used. + invert : bool, default: False + When `True`, all dimensions of the DataArray except for the sequence of + dimensions specified in the `dimensions` parameter will be stacked. + `dimensions` must have a length of 1 in this case, all dimensions listed + must exist in the current DataArray. Neither the `**dimensions_kwargs` nor + the ellipsis syntaxes may be used. **dimensions_kwargs The keyword arguments form of ``dimensions``. One of dimensions or dimensions_kwargs must be provided. @@ -2767,25 +2774,56 @@ def stack( Examples -------- >>> arr = xr.DataArray( - ... np.arange(6).reshape(2, 3), - ... coords=[("x", ["a", "b"]), ("y", [0, 1, 2])], + ... np.arange(12).reshape(2, 3, 2), + ... coords=[("x", ["a", "b"]), ("y", [0, 1, 2]), ("z", ["alpha", "beta"])], ... ) >>> arr - - array([[0, 1, 2], - [3, 4, 5]]) + + array([[[ 0, 1], + [ 2, 3], + [ 4, 5]], + + [[ 6, 7], + [ 8, 9], + [10, 11]]]) Coordinates: * x (x) >> stacked = arr.stack(z=("x", "y")) - >>> stacked.indexes["z"] + * z (z) >> stacked = arr.stack(newdim=("x", "y")) + >>> stacked + + array([[ 0, 2, 4, 6, 8, 10], + [ 1, 3, 5, 7, 9, 11]]) + Coordinates: + * z (z) >> stacked.indexes["newdim"] MultiIndex([('a', 0), ('a', 1), ('a', 2), ('b', 0), ('b', 1), ('b', 2)], - name='z') + name='newdim') + >>> inverted_stack = arr.stack({"newdim": "z"}, invert=True) + >>> inverted_stack + + array([[ 0, 2, 4, 6, 8, 10], + [ 1, 3, 5, 7, 9, 11]]) + Coordinates: + * z (z) >> np.all(inverted_stack.indexes["newdim"] == stacked.indexes["newdim"]) + True + >>> np.all(inverted_stack == stacked) + + array(True) + See Also -------- @@ -2795,6 +2833,7 @@ def stack( dimensions, create_index=create_index, index_cls=index_cls, + invert=invert, **dimensions_kwargs, ) return self._from_temp_dataset(ds) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index e49c981b827..48e8816866e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5169,6 +5169,7 @@ def stack( dimensions: Mapping[Any, Sequence[Hashable | ellipsis]] | None = None, create_index: bool | None = True, index_cls: type[Index] = PandasMultiIndex, + invert: bool = False, **dimensions_kwargs: Sequence[Hashable | ellipsis], ) -> Self: """ @@ -5192,9 +5193,15 @@ def stack( - None. create a multi-index only if exactly one single (1-d) coordinate index is found for every dimension to stack. - index_cls: Index-class, default: PandasMultiIndex + index_cls : Index-class, default: PandasMultiIndex Can be used to pass a custom multi-index type (must be an Xarray index that implements `.stack()`). By default, a pandas multi-index wrapper is used. + invert : bool, default: False + When `True`, all dimensions of the DataArray except for the sequence of + dimensions specified in the `dimensions` parameter will be stacked. + `dimensions` must have a length of 1 in this case, all dimensions listed + must exist in the current DataArray. Neither the `**dimensions_kwargs` nor + the ellipsis syntaxes may be used. **dimensions_kwargs The keyword arguments form of ``dimensions``. One of dimensions or dimensions_kwargs must be provided. @@ -5210,8 +5217,33 @@ def stack( """ dimensions = either_dict_or_kwargs(dimensions, dimensions_kwargs, "stack") result = self + + if invert: + if len(dimensions) > 1: + raise ValueError( + "The dimensions argument must have length 1 when invert=True" + ) + + for new_dim, dims in dimensions.items(): + # When inverting, all dims listed should be in the current + # DataArray's dims + for dim in dims: + if dim is Ellipsis: + raise ValueError( + "Ellipsis syntax cannot be used when invert=True" + ) + if dim not in self.dims: + raise ValueError(f'Dimension "{dim}" does not exist') + + # Subtract specified dimensions from the DataArray's current + # dimensions and stack the resulting dimensions + dimensions = { + new_dim: tuple(dim for dim in self.dims if dim not in dims) + } + for new_dim, dims in dimensions.items(): result = result._stack_once(dims, new_dim, index_cls, create_index) + return result def to_stacked_array( diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index d497cd5a54d..36f9aaa702a 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2547,6 +2547,13 @@ def test_stack_nonunique_consistency(self, da) -> None: expected = DataArray(da.to_pandas().stack(), dims="z") assert_identical(expected, actual) + def test_stack_inverted_consistency(self) -> None: + da = DataArray( + [[0, 1], [2, 3]], + dims=["x", "y"], + ) + assert_identical(da.stack(z=["x"]), da.stack({"z": ["y"]}, invert=True)) + def test_to_unstacked_dataset_raises_value_error(self) -> None: data = DataArray([0, 1], dims="x", coords={"x": [0, 1]}) with pytest.raises(ValueError, match="'x' is not a stacked coordinate"):