From 45c0a114e2b7b27b83c9618bc05b36afac82183c Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 27 Sep 2022 11:33:40 +0200 Subject: [PATCH] Raise UserWarning when rename creates a new dimension coord (#6999) * warn when rename creates a new dimension coord UseWarning: no index is created anymore. * update what's new --- doc/whats-new.rst | 4 +++ xarray/core/dataarray.py | 4 +-- xarray/core/dataset.py | 55 ++++++++++++++++++++++++++-------- xarray/tests/test_dataarray.py | 17 +++++++++++ xarray/tests/test_dataset.py | 17 +++++++++++ 5 files changed, 83 insertions(+), 14 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index efc03bb19da..36f523df240 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -65,6 +65,10 @@ Bug fixes By `András Gunyhó `_. - Avoid use of random numbers in `test_weighted.test_weighted_operations_nonequal_coords` (:issue:`6504`, :pull:`6961`). By `Luke Conibear `_. +- Raise a ``UserWarning`` when renaming a coordinate or a dimension creates a + non-indexed dimension coordinate, and suggest the user creating an index + either with ``swap_dims`` or ``set_index`` (:issue:`6607`, :pull:`6999`). By + `Benoît Bovy `_. - Use ``keep_attrs=True`` in grouping and resampling operations by default (:issue:`7012`). This means :py:attr:`Dataset.attrs` and :py:attr:`DataArray.attrs` are now preserved by default. By `Deepak Cherian `_. diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index bfb4d570a5d..6c09d8c15b4 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2063,11 +2063,11 @@ def rename( if utils.is_dict_like(new_name_or_name_dict) or new_name_or_name_dict is None: # change dims/coords name_dict = either_dict_or_kwargs(new_name_or_name_dict, names, "rename") - dataset = self._to_temp_dataset().rename(name_dict) + dataset = self._to_temp_dataset()._rename(name_dict) return self._from_temp_dataset(dataset) if utils.hashable(new_name_or_name_dict) and names: # change name + dims/coords - dataset = self._to_temp_dataset().rename(names) + dataset = self._to_temp_dataset()._rename(names) dataarray = self._from_temp_dataset(dataset) return dataarray._replace(name=new_name_or_name_dict) # only change name diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index eda5a727a51..620f32f8396 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3560,6 +3560,48 @@ def _rename_all( return variables, coord_names, dims, indexes + def _rename( + self: T_Dataset, + name_dict: Mapping[Any, Hashable] | None = None, + **names: Hashable, + ) -> T_Dataset: + """Also used internally by DataArray so that the warning (if any) + is raised at the right stack level. + """ + name_dict = either_dict_or_kwargs(name_dict, names, "rename") + for k in name_dict.keys(): + if k not in self and k not in self.dims: + raise ValueError( + f"cannot rename {k!r} because it is not a " + "variable or dimension in this dataset" + ) + + create_dim_coord = False + new_k = name_dict[k] + + if k in self.dims and new_k in self._coord_names: + coord_dims = self._variables[name_dict[k]].dims + if coord_dims == (k,): + create_dim_coord = True + elif k in self._coord_names and new_k in self.dims: + coord_dims = self._variables[k].dims + if coord_dims == (new_k,): + create_dim_coord = True + + if create_dim_coord: + warnings.warn( + f"rename {k!r} to {name_dict[k]!r} does not create an index " + "anymore. Try using swap_dims instead or use set_index " + "after rename to create an indexed coordinate.", + UserWarning, + stacklevel=3, + ) + + variables, coord_names, dims, indexes = self._rename_all( + name_dict=name_dict, dims_dict=name_dict + ) + return self._replace(variables, coord_names, dims=dims, indexes=indexes) + def rename( self: T_Dataset, name_dict: Mapping[Any, Hashable] | None = None, @@ -3588,18 +3630,7 @@ def rename( Dataset.rename_dims DataArray.rename """ - name_dict = either_dict_or_kwargs(name_dict, names, "rename") - for k in name_dict.keys(): - if k not in self and k not in self.dims: - raise ValueError( - f"cannot rename {k!r} because it is not a " - "variable or dimension in this dataset" - ) - - variables, coord_names, dims, indexes = self._rename_all( - name_dict=name_dict, dims_dict=name_dict - ) - return self._replace(variables, coord_names, dims=dims, indexes=indexes) + return self._rename(name_dict=name_dict, **names) def rename_dims( self: T_Dataset, diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index ab6e5763248..f27a6c07280 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1742,6 +1742,23 @@ def test_rename(self) -> None: ) assert_identical(renamed_all, expected_all) + def test_rename_dimension_coord_warnings(self) -> None: + # create a dimension coordinate by renaming a dimension or coordinate + # should raise a warning (no index created) + da = DataArray([0, 0], coords={"x": ("y", [0, 1])}, dims="y") + + with pytest.warns( + UserWarning, match="rename 'x' to 'y' does not create an index.*" + ): + da.rename(x="y") + + da = xr.DataArray([0, 0], coords={"y": ("x", [0, 1])}, dims="x") + + with pytest.warns( + UserWarning, match="rename 'x' to 'y' does not create an index.*" + ): + da.rename(x="y") + def test_init_value(self) -> None: expected = DataArray( np.full((3, 4), 3), dims=["x", "y"], coords=[range(3), range(4)] diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index e0bc73ec044..fc2806c79c5 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2892,6 +2892,23 @@ def test_rename_dimension_coord(self) -> None: actual_2 = original.rename_dims({"x": "x_new"}) assert "x" in actual_2.xindexes + def test_rename_dimension_coord_warnings(self) -> None: + # create a dimension coordinate by renaming a dimension or coordinate + # should raise a warning (no index created) + ds = Dataset(coords={"x": ("y", [0, 1])}) + + with pytest.warns( + UserWarning, match="rename 'x' to 'y' does not create an index.*" + ): + ds.rename(x="y") + + ds = Dataset(coords={"y": ("x", [0, 1])}) + + with pytest.warns( + UserWarning, match="rename 'x' to 'y' does not create an index.*" + ): + ds.rename(x="y") + def test_rename_multiindex(self) -> None: mindex = pd.MultiIndex.from_tuples([([1, 2]), ([3, 4])], names=["a", "b"]) original = Dataset({}, {"x": mindex})