diff --git a/doc/gallery/plot_cartopy_facetgrid.py b/doc/gallery/plot_cartopy_facetgrid.py index d8f5e73ee56..a4ab23c42a6 100644 --- a/doc/gallery/plot_cartopy_facetgrid.py +++ b/doc/gallery/plot_cartopy_facetgrid.py @@ -30,7 +30,7 @@ transform=ccrs.PlateCarree(), # the data's projection col="time", col_wrap=1, # multiplot settings - aspect=ds.dims["lon"] / ds.dims["lat"], # for a sensible figsize + aspect=ds.sizes["lon"] / ds.sizes["lat"], # for a sensible figsize subplot_kws={"projection": map_proj}, # the plot's projection ) diff --git a/doc/user-guide/interpolation.rst b/doc/user-guide/interpolation.rst index 7b40962e826..311e1bf0129 100644 --- a/doc/user-guide/interpolation.rst +++ b/doc/user-guide/interpolation.rst @@ -292,8 +292,8 @@ Let's see how :py:meth:`~xarray.DataArray.interp` works on real data. axes[0].set_title("Raw data") # Interpolated data - new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.dims["lon"] * 4) - new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.dims["lat"] * 4) + new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.sizes["lon"] * 4) + new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.sizes["lat"] * 4) dsi = ds.interp(lat=new_lat, lon=new_lon) dsi.air.plot(ax=axes[1]) @savefig interpolation_sample3.png width=8in diff --git a/doc/user-guide/terminology.rst b/doc/user-guide/terminology.rst index ce7e55546a4..55937310827 100644 --- a/doc/user-guide/terminology.rst +++ b/doc/user-guide/terminology.rst @@ -47,9 +47,9 @@ complete examples, please consult the relevant documentation.* all but one of these degrees of freedom is fixed. We can think of each dimension axis as having a name, for example the "x dimension". In xarray, a ``DataArray`` object's *dimensions* are its named dimension - axes, and the name of the ``i``-th dimension is ``arr.dims[i]``. If an - array is created without dimension names, the default dimension names are - ``dim_0``, ``dim_1``, and so forth. + axes ``da.dims``, and the name of the ``i``-th dimension is ``da.dims[i]``. + If an array is created without specifying dimension names, the default dimension + names will be ``dim_0``, ``dim_1``, and so forth. Coordinate An array that labels a dimension or set of dimensions of another @@ -61,8 +61,7 @@ complete examples, please consult the relevant documentation.* ``arr.coords[x]``. A ``DataArray`` can have more coordinates than dimensions because a single dimension can be labeled by multiple coordinate arrays. However, only one coordinate array can be a assigned - as a particular dimension's dimension coordinate array. As a - consequence, ``len(arr.dims) <= len(arr.coords)`` in general. + as a particular dimension's dimension coordinate array. Dimension coordinate A one-dimensional coordinate array assigned to ``arr`` with both a name diff --git a/doc/whats-new.rst b/doc/whats-new.rst index cda6d6f1d74..2dd1fbea64c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -66,10 +66,17 @@ Deprecations currently ``PendingDeprecationWarning``, which are silenced by default. We'll convert these to ``DeprecationWarning`` in a future release. By `Maximilian Roos `_. -- :py:meth:`Dataset.drop` & - :py:meth:`DataArray.drop` are now deprecated, since pending deprecation for +- Raise a ``FutureWarning`` warning that the type of :py:meth:`Dataset.dims` will be changed + from a mapping of dimension names to lengths to a set of dimension names. + This is to increase consistency with :py:meth:`DataArray.dims`. + To access a mapping of dimension names to lengths please use :py:meth:`Dataset.sizes`. + The same change also applies to `DatasetGroupBy.dims`. + (:issue:`8496`, :pull:`8500`) + By `Tom Nicholas `_. +- :py:meth:`Dataset.drop` & :py:meth:`DataArray.drop` are now deprecated, since pending deprecation for several years. :py:meth:`DataArray.drop_sel` & :py:meth:`DataArray.drop_var` - replace them for labels & variables respectively. + replace them for labels & variables respectively. (:pull:`8497`) + By `Maximilian Roos `_. Bug fixes ~~~~~~~~~ diff --git a/xarray/core/common.py b/xarray/core/common.py index c2c0a79edb6..6dff9cc4024 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1167,7 +1167,7 @@ def _dataset_indexer(dim: Hashable) -> DataArray: cond_wdim = cond.drop_vars( var for var in cond if dim not in cond[var].dims ) - keepany = cond_wdim.any(dim=(d for d in cond.dims.keys() if d != dim)) + keepany = cond_wdim.any(dim=(d for d in cond.dims if d != dim)) return keepany.to_dataarray().any("variable") _get_indexer = ( diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 8c558b38848..c7a7c178bd8 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -315,7 +315,7 @@ def _calc_concat_over(datasets, dim, dim_names, data_vars: T_DataVars, coords, c if dim in ds: ds = ds.set_coords(dim) concat_over.update(k for k, v in ds.variables.items() if dim in v.dims) - concat_dim_lengths.append(ds.dims.get(dim, 1)) + concat_dim_lengths.append(ds.sizes.get(dim, 1)) def process_subset_opt(opt, subset): if isinstance(opt, str): @@ -431,7 +431,7 @@ def _parse_datasets( variables_order: dict[Hashable, Variable] = {} # variables in order of appearance for ds in datasets: - dims_sizes.update(ds.dims) + dims_sizes.update(ds.sizes) all_coord_names.update(ds.coords) data_vars.update(ds.data_vars) variables_order.update(ds.variables) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index cfa72ab557e..cd143d96e7c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -105,6 +105,7 @@ from xarray.core.utils import ( Default, Frozen, + FrozenMappingWarningOnValuesAccess, HybridMappingProxy, OrderedSet, _default, @@ -778,14 +779,15 @@ def dims(self) -> Frozen[Hashable, int]: Note that type of this object differs from `DataArray.dims`. See `Dataset.sizes` and `DataArray.sizes` for consistently named - properties. + properties. This property will be changed to return a type more consistent with + `DataArray.dims` in the future, i.e. a set of dimension names. See Also -------- Dataset.sizes DataArray.dims """ - return Frozen(self._dims) + return FrozenMappingWarningOnValuesAccess(self._dims) @property def sizes(self) -> Frozen[Hashable, int]: @@ -800,7 +802,7 @@ def sizes(self) -> Frozen[Hashable, int]: -------- DataArray.sizes """ - return self.dims + return Frozen(self._dims) @property def dtypes(self) -> Frozen[Hashable, np.dtype]: @@ -1411,7 +1413,7 @@ def _copy_listed(self, names: Iterable[Hashable]) -> Self: variables[name] = self._variables[name] except KeyError: ref_name, var_name, var = _get_virtual_variable( - self._variables, name, self.dims + self._variables, name, self.sizes ) variables[var_name] = var if ref_name in self._coord_names or ref_name in self.dims: @@ -1426,7 +1428,7 @@ def _copy_listed(self, names: Iterable[Hashable]) -> Self: for v in variables.values(): needed_dims.update(v.dims) - dims = {k: self.dims[k] for k in needed_dims} + dims = {k: self.sizes[k] for k in needed_dims} # preserves ordering of coordinates for k in self._variables: @@ -1448,7 +1450,7 @@ def _construct_dataarray(self, name: Hashable) -> DataArray: try: variable = self._variables[name] except KeyError: - _, name, variable = _get_virtual_variable(self._variables, name, self.dims) + _, name, variable = _get_virtual_variable(self._variables, name, self.sizes) needed_dims = set(variable.dims) @@ -1475,7 +1477,7 @@ def _item_sources(self) -> Iterable[Mapping[Hashable, Any]]: yield HybridMappingProxy(keys=self._coord_names, mapping=self.coords) # virtual coordinates - yield HybridMappingProxy(keys=self.dims, mapping=self) + yield HybridMappingProxy(keys=self.sizes, mapping=self) def __contains__(self, key: object) -> bool: """The 'in' operator will return true or false depending on whether @@ -2569,7 +2571,7 @@ def info(self, buf: IO | None = None) -> None: lines = [] lines.append("xarray.Dataset {") lines.append("dimensions:") - for name, size in self.dims.items(): + for name, size in self.sizes.items(): lines.append(f"\t{name} = {size} ;") lines.append("\nvariables:") for name, da in self.variables.items(): @@ -2697,10 +2699,10 @@ def chunk( else: chunks_mapping = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") - bad_dims = chunks_mapping.keys() - self.dims.keys() + bad_dims = chunks_mapping.keys() - self.sizes.keys() if bad_dims: raise ValueError( - f"chunks keys {tuple(bad_dims)} not found in data dimensions {tuple(self.dims)}" + f"chunks keys {tuple(bad_dims)} not found in data dimensions {tuple(self.sizes.keys())}" ) chunkmanager = guess_chunkmanager(chunked_array_type) @@ -3952,7 +3954,7 @@ def maybe_variable(obj, k): try: return obj._variables[k] except KeyError: - return as_variable((k, range(obj.dims[k]))) + return as_variable((k, range(obj.sizes[k]))) def _validate_interp_indexer(x, new_x): # In the case of datetimes, the restrictions placed on indexers @@ -4176,7 +4178,7 @@ def _rename_vars( return variables, coord_names def _rename_dims(self, name_dict: Mapping[Any, Hashable]) -> dict[Hashable, int]: - return {name_dict.get(k, k): v for k, v in self.dims.items()} + return {name_dict.get(k, k): v for k, v in self.sizes.items()} def _rename_indexes( self, name_dict: Mapping[Any, Hashable], dims_dict: Mapping[Any, Hashable] @@ -5168,7 +5170,7 @@ def _get_stack_index( if dim in self._variables: var = self._variables[dim] else: - _, _, var = _get_virtual_variable(self._variables, dim, self.dims) + _, _, var = _get_virtual_variable(self._variables, dim, self.sizes) # dummy index (only `stack_coords` will be used to construct the multi-index) stack_index = PandasIndex([0], dim) stack_coords = {dim: var} @@ -5195,7 +5197,7 @@ def _stack_once( if any(d in var.dims for d in dims): add_dims = [d for d in dims if d not in var.dims] vdims = list(var.dims) + add_dims - shape = [self.dims[d] for d in vdims] + shape = [self.sizes[d] for d in vdims] exp_var = var.set_dims(vdims, shape) stacked_var = exp_var.stack(**{new_dim: dims}) new_variables[name] = stacked_var @@ -6351,7 +6353,7 @@ def dropna( if subset is None: subset = iter(self.data_vars) - count = np.zeros(self.dims[dim], dtype=np.int64) + count = np.zeros(self.sizes[dim], dtype=np.int64) size = np.int_(0) # for type checking for k in subset: @@ -6359,7 +6361,7 @@ def dropna( if dim in array.dims: dims = [d for d in array.dims if d != dim] count += np.asarray(array.count(dims)) - size += math.prod([self.dims[d] for d in dims]) + size += math.prod([self.sizes[d] for d in dims]) if thresh is not None: mask = count >= thresh @@ -7136,7 +7138,7 @@ def _normalize_dim_order( f"Dataset: {list(self.dims)}" ) - ordered_dims = {k: self.dims[k] for k in dim_order} + ordered_dims = {k: self.sizes[k] for k in dim_order} return ordered_dims @@ -7396,7 +7398,7 @@ def to_dask_dataframe( var = self.variables[name] except KeyError: # dimension without a matching coordinate - size = self.dims[name] + size = self.sizes[name] data = da.arange(size, chunks=size, dtype=np.int64) var = Variable((name,), data) @@ -7469,7 +7471,7 @@ def to_dict( d: dict = { "coords": {}, "attrs": decode_numpy_dict_values(self.attrs), - "dims": dict(self.dims), + "dims": dict(self.sizes), "data_vars": {}, } for k in self.coords: diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 11b60f3d1fe..92bfe2fbfc4 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -739,7 +739,7 @@ def dataset_repr(ds): def diff_dim_summary(a, b): - if a.dims != b.dims: + if a.sizes != b.sizes: return f"Differing dimensions:\n ({dim_summary(a)}) != ({dim_summary(b)})" else: return "" diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index 3627554cf57..efd74111823 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -37,17 +37,18 @@ def short_data_repr_html(array) -> str: return f"
{text}
" -def format_dims(dims, dims_with_index) -> str: - if not dims: +def format_dims(dim_sizes, dims_with_index) -> str: + if not dim_sizes: return "" dim_css_map = { - dim: " class='xr-has-index'" if dim in dims_with_index else "" for dim in dims + dim: " class='xr-has-index'" if dim in dims_with_index else "" + for dim in dim_sizes } dims_li = "".join( f"
  • " f"{escape(str(dim))}: {size}
  • " - for dim, size in dims.items() + for dim, size in dim_sizes.items() ) return f"
      {dims_li}
    " @@ -204,7 +205,7 @@ def _mapping_section( def dim_section(obj) -> str: - dim_list = format_dims(obj.dims, obj.xindexes.dims) + dim_list = format_dims(obj.sizes, obj.xindexes.dims) return collapsible_section( "Dimensions", inline_details=dim_list, enabled=False, collapsed=True diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 8c81d3e6a96..15bd8d1e35b 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -36,6 +36,7 @@ from xarray.core.pycompat import integer_types from xarray.core.types import Dims, QuantileMethods, T_DataArray, T_Xarray from xarray.core.utils import ( + FrozenMappingWarningOnValuesAccess, either_dict_or_kwargs, hashable, is_scalar, @@ -1519,7 +1520,7 @@ def dims(self) -> Frozen[Hashable, int]: if self._dims is None: self._dims = self._obj.isel({self._group_dim: self._group_indices[0]}).dims - return self._dims + return FrozenMappingWarningOnValuesAccess(self._dims) def map( self, diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 9ba4a43f6d9..61372513b2a 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -50,12 +50,15 @@ Collection, Container, Hashable, + ItemsView, Iterable, Iterator, + KeysView, Mapping, MutableMapping, MutableSet, Sequence, + ValuesView, ) from enum import Enum from typing import ( @@ -473,6 +476,57 @@ def FrozenDict(*args, **kwargs) -> Frozen: return Frozen(dict(*args, **kwargs)) +class FrozenMappingWarningOnValuesAccess(Frozen[K, V]): + """ + Class which behaves like a Mapping but warns if the values are accessed. + + Temporary object to aid in deprecation cycle of `Dataset.dims` (see GH issue #8496). + `Dataset.dims` is being changed from returning a mapping of dimension names to lengths to just + returning a frozen set of dimension names (to increase consistency with `DataArray.dims`). + This class retains backwards compatibility but raises a warning only if the return value + of ds.dims is used like a dictionary (i.e. it doesn't raise a warning if used in a way that + would also be valid for a FrozenSet, e.g. iteration). + """ + + __slots__ = ("mapping",) + + def _warn(self) -> None: + warnings.warn( + "The return type of `Dataset.dims` will be changed to return a set of dimension names in future, " + "in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, " + "please use `Dataset.sizes`.", + FutureWarning, + ) + + def __getitem__(self, key: K) -> V: + self._warn() + return super().__getitem__(key) + + @overload + def get(self, key: K, /) -> V | None: + ... + + @overload + def get(self, key: K, /, default: V | T) -> V | T: + ... + + def get(self, key: K, default: T | None = None) -> V | T | None: + self._warn() + return super().get(key, default) + + def keys(self) -> KeysView[K]: + self._warn() + return super().keys() + + def items(self) -> ItemsView[K, V]: + self._warn() + return super().items() + + def values(self) -> ValuesView[V]: + self._warn() + return super().values() + + class HybridMappingProxy(Mapping[K, V]): """Implements the Mapping interface. Uses the wrapped mapping for item lookup and a separate wrapped keys collection for iteration. diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index ffcae0fc664..b3a31b28016 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -223,11 +223,18 @@ def source_ndarray(array): return base +def format_record(record) -> str: + """Format warning record like `FutureWarning('Function will be deprecated...')`""" + return f"{str(record.category)[8:-2]}('{record.message}'))" + + @contextmanager def assert_no_warnings(): with warnings.catch_warnings(record=True) as record: yield record - assert len(record) == 0, "got unexpected warning(s)" + assert ( + len(record) == 0 + ), f"Got {len(record)} unexpected warning(s): {[format_record(r) for r in record]}" # Internal versions of xarray's test functions that validate additional diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 396507652c6..0d9b7c88ae1 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1378,7 +1378,7 @@ def func(da): expected = extract(ds) actual = extract(ds.chunk()) - assert actual.dims == {"lon_new": 3, "lat_new": 6} + assert actual.sizes == {"lon_new": 3, "lat_new": 6} assert_identical(expected.chunk(), actual) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 92415631748..d1fc085bf0f 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -509,7 +509,7 @@ def test_concat_coords_kwarg(self, data, dim, coords) -> None: actual = concat(datasets, data[dim], coords=coords) if coords == "all": - expected = np.array([data["extra"].values for _ in range(data.dims[dim])]) + expected = np.array([data["extra"].values for _ in range(data.sizes[dim])]) assert_array_equal(actual["extra"].values, expected) else: @@ -1214,7 +1214,7 @@ def test_concat_preserve_coordinate_order() -> None: # check dimension order for act, exp in zip(actual.dims, expected.dims): assert act == exp - assert actual.dims[act] == expected.dims[exp] + assert actual.sizes[act] == expected.sizes[exp] # check coordinate order for act, exp in zip(actual.coords, expected.coords): diff --git a/xarray/tests/test_coordinates.py b/xarray/tests/test_coordinates.py index ef73371dfe4..68ce55b05da 100644 --- a/xarray/tests/test_coordinates.py +++ b/xarray/tests/test_coordinates.py @@ -79,9 +79,10 @@ def test_from_pandas_multiindex(self) -> None: for name in ("x", "one", "two"): assert_identical(expected[name], coords.variables[name]) + @pytest.mark.filterwarnings("ignore:return type") def test_dims(self) -> None: coords = Coordinates(coords={"x": [0, 1, 2]}) - assert coords.dims == {"x": 3} + assert set(coords.dims) == {"x"} def test_sizes(self) -> None: coords = Coordinates(coords={"x": [0, 1, 2]}) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 37ddcf2786a..17515744a31 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -687,6 +687,7 @@ class CustomIndex(Index): # test coordinate variables copied assert ds.variables["x"] is not coords.variables["x"] + @pytest.mark.filterwarnings("ignore:return type") def test_properties(self) -> None: ds = create_test_data() @@ -694,10 +695,11 @@ def test_properties(self) -> None: # These exact types aren't public API, but this makes sure we don't # change them inadvertently: assert isinstance(ds.dims, utils.Frozen) + # TODO change after deprecation cycle in GH #8500 is complete assert isinstance(ds.dims.mapping, dict) assert type(ds.dims.mapping) is dict # noqa: E721 - assert ds.dims == {"dim1": 8, "dim2": 9, "dim3": 10, "time": 20} - assert ds.sizes == ds.dims + assert ds.dims == ds.sizes + assert ds.sizes == {"dim1": 8, "dim2": 9, "dim3": 10, "time": 20} # dtypes assert isinstance(ds.dtypes, utils.Frozen) @@ -749,6 +751,27 @@ def test_properties(self) -> None: == 16 ) + def test_warn_ds_dims_deprecation(self) -> None: + # TODO remove after deprecation cycle in GH #8500 is complete + ds = create_test_data() + + with pytest.warns(FutureWarning, match="return type"): + ds.dims["dim1"] + + with pytest.warns(FutureWarning, match="return type"): + ds.dims.keys() + + with pytest.warns(FutureWarning, match="return type"): + ds.dims.values() + + with pytest.warns(FutureWarning, match="return type"): + ds.dims.items() + + with assert_no_warnings(): + len(ds.dims) + ds.dims.__iter__() + "dim1" in ds.dims + def test_asarray(self) -> None: ds = Dataset({"x": 0}) with pytest.raises(TypeError, match=r"cannot directly convert"): @@ -804,7 +827,7 @@ def test_modify_inplace(self) -> None: b = Dataset() b["x"] = ("x", vec, attributes) assert_identical(a["x"], b["x"]) - assert a.dims == b.dims + assert a.sizes == b.sizes # this should work a["x"] = ("x", vec[:5]) a["z"] = ("x", np.arange(5)) @@ -865,7 +888,7 @@ def test_coords_properties(self) -> None: assert expected == actual # dims - assert coords.dims == {"x": 2, "y": 3} + assert coords.sizes == {"x": 2, "y": 3} # dtypes assert coords.dtypes == { @@ -1215,9 +1238,9 @@ def test_isel(self) -> None: assert list(data.dims) == list(ret.dims) for d in data.dims: if d in slicers: - assert ret.dims[d] == np.arange(data.dims[d])[slicers[d]].size + assert ret.sizes[d] == np.arange(data.sizes[d])[slicers[d]].size else: - assert data.dims[d] == ret.dims[d] + assert data.sizes[d] == ret.sizes[d] # Verify that the data is what we expect for v in data.variables: assert data[v].dims == ret[v].dims @@ -1251,19 +1274,19 @@ def test_isel(self) -> None: assert_identical(data, data.isel(not_a_dim=slice(0, 2), missing_dims="ignore")) ret = data.isel(dim1=0) - assert {"time": 20, "dim2": 9, "dim3": 10} == ret.dims + assert {"time": 20, "dim2": 9, "dim3": 10} == ret.sizes assert set(data.data_vars) == set(ret.data_vars) assert set(data.coords) == set(ret.coords) assert set(data.xindexes) == set(ret.xindexes) ret = data.isel(time=slice(2), dim1=0, dim2=slice(5)) - assert {"time": 2, "dim2": 5, "dim3": 10} == ret.dims + assert {"time": 2, "dim2": 5, "dim3": 10} == ret.sizes assert set(data.data_vars) == set(ret.data_vars) assert set(data.coords) == set(ret.coords) assert set(data.xindexes) == set(ret.xindexes) ret = data.isel(time=0, dim1=0, dim2=slice(5)) - assert {"dim2": 5, "dim3": 10} == ret.dims + assert {"dim2": 5, "dim3": 10} == ret.sizes assert set(data.data_vars) == set(ret.data_vars) assert set(data.coords) == set(ret.coords) assert set(data.xindexes) == set(list(ret.xindexes) + ["time"]) @@ -4971,7 +4994,7 @@ def test_pickle(self) -> None: roundtripped = pickle.loads(pickle.dumps(data)) assert_identical(data, roundtripped) # regression test for #167: - assert data.dims == roundtripped.dims + assert data.sizes == roundtripped.sizes def test_lazy_load(self) -> None: store = InaccessibleVariableDataStore() @@ -5429,7 +5452,7 @@ def test_reduce_non_numeric(self) -> None: data2 = create_test_data(seed=44) add_vars = {"var4": ["dim1", "dim2"], "var5": ["dim1"]} for v, dims in sorted(add_vars.items()): - size = tuple(data1.dims[d] for d in dims) + size = tuple(data1.sizes[d] for d in dims) data = np.random.randint(0, 100, size=size).astype(np.str_) data1[v] = (dims, data, {"foo": "variable"}) @@ -6478,7 +6501,7 @@ def test_pad(self) -> None: assert padded["var1"].shape == (8, 11) assert padded["var2"].shape == (8, 11) assert padded["var3"].shape == (10, 8) - assert dict(padded.dims) == {"dim1": 8, "dim2": 11, "dim3": 10, "time": 20} + assert dict(padded.sizes) == {"dim1": 8, "dim2": 11, "dim3": 10, "time": 20} np.testing.assert_equal(padded["var1"].isel(dim2=[0, -1]).data, 42) np.testing.assert_equal(padded["dim2"][[0, -1]].data, np.nan) @@ -7173,7 +7196,7 @@ def test_clip(ds) -> None: assert all((result.max(...) <= 0.75).values()) result = ds.clip(min=ds.mean("y"), max=ds.mean("y")) - assert result.dims == ds.dims + assert result.sizes == ds.sizes class TestDropDuplicates: diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index b166992deb1..84820d56c45 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -59,6 +59,7 @@ def test_consolidate_slices() -> None: _consolidate_slices([slice(3), 4]) # type: ignore[list-item] +@pytest.mark.filterwarnings("ignore:return type") def test_groupby_dims_property(dataset) -> None: assert dataset.groupby("x").dims == dataset.isel(x=1).dims assert dataset.groupby("y").dims == dataset.isel(y=1).dims @@ -67,6 +68,14 @@ def test_groupby_dims_property(dataset) -> None: assert stacked.groupby("xy").dims == stacked.isel(xy=0).dims +def test_groupby_sizes_property(dataset) -> None: + assert dataset.groupby("x").sizes == dataset.isel(x=1).sizes + assert dataset.groupby("y").sizes == dataset.isel(y=1).sizes + + stacked = dataset.stack({"xy": ("x", "y")}) + assert stacked.groupby("xy").sizes == stacked.isel(xy=0).sizes + + def test_multi_index_groupby_map(dataset) -> None: # regression test for GH873 ds = dataset.isel(z=1, drop=True)[["foo"]] diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 20a54c3ed53..45a649605f3 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -548,7 +548,7 @@ def test_ffill_limit(): def test_interpolate_dataset(ds): actual = ds.interpolate_na(dim="time") # no missing values in var1 - assert actual["var1"].count("time") == actual.dims["time"] + assert actual["var1"].count("time") == actual.sizes["time"] # var2 should be the same as it was assert_array_equal(actual["var2"], ds["var2"]) diff --git a/xarray/tests/test_rolling.py b/xarray/tests/test_rolling.py index 0ea373e3ab0..db5a76f5b7d 100644 --- a/xarray/tests/test_rolling.py +++ b/xarray/tests/test_rolling.py @@ -237,7 +237,7 @@ def test_rolling_reduce( actual = rolling_obj.reduce(getattr(np, "nan%s" % name)) expected = getattr(rolling_obj, name)() assert_allclose(actual, expected) - assert actual.dims == expected.dims + assert actual.sizes == expected.sizes @pytest.mark.parametrize("center", (True, False)) @pytest.mark.parametrize("min_periods", (None, 1, 2, 3)) @@ -259,7 +259,7 @@ def test_rolling_reduce_nonnumeric( actual = rolling_obj.reduce(getattr(np, "nan%s" % name)) expected = getattr(rolling_obj, name)() assert_allclose(actual, expected) - assert actual.dims == expected.dims + assert actual.sizes == expected.sizes def test_rolling_count_correct(self, compute_backend) -> None: da = DataArray([0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims="time") @@ -315,7 +315,7 @@ def test_ndrolling_reduce( )() assert_allclose(actual, expected) - assert actual.dims == expected.dims + assert actual.sizes == expected.sizes if name in ["mean"]: # test our reimplementation of nanmean using np.nanmean @@ -724,7 +724,7 @@ def test_rolling_reduce(self, ds, center, min_periods, window, name) -> None: actual = rolling_obj.reduce(getattr(np, "nan%s" % name)) expected = getattr(rolling_obj, name)() assert_allclose(actual, expected) - assert ds.dims == actual.dims + assert ds.sizes == actual.sizes # make sure the order of data_var are not changed. assert list(ds.data_vars.keys()) == list(actual.data_vars.keys()) @@ -751,7 +751,7 @@ def test_ndrolling_reduce(self, ds, center, min_periods, name, dask) -> None: name, )() assert_allclose(actual, expected) - assert actual.dims == expected.dims + assert actual.sizes == expected.sizes # Do it in the opposite order expected = getattr( @@ -762,7 +762,7 @@ def test_ndrolling_reduce(self, ds, center, min_periods, name, dask) -> None: )() assert_allclose(actual, expected) - assert actual.dims == expected.dims + assert actual.sizes == expected.sizes @pytest.mark.parametrize("center", (True, False, (True, False))) @pytest.mark.parametrize("fill_value", (np.nan, 0.0)) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 0bea3f63673..a2ae1e61cf2 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1705,6 +1705,7 @@ def test_broadcasting_math(self): v * w[0], Variable(["a", "b", "c", "d"], np.einsum("ab,cd->abcd", x, y[0])) ) + @pytest.mark.filterwarnings("ignore:Duplicate dimension names") def test_broadcasting_failures(self): a = Variable(["x"], np.arange(10)) b = Variable(["x"], np.arange(5))