From f045401ca79ecd1b80a0da67f44404c4e208fe31 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Sat, 16 Jul 2022 19:02:55 +0200 Subject: [PATCH 01/12] Fix typos found by codespell (#6794) --- doc/user-guide/io.rst | 2 +- doc/user-guide/plotting.rst | 2 +- xarray/coding/calendar_ops.py | 2 +- xarray/coding/cftime_offsets.py | 2 +- xarray/core/alignment.py | 2 +- xarray/core/concat.py | 2 +- xarray/core/dataarray.py | 2 +- xarray/core/dataset.py | 2 +- xarray/core/indexes.py | 2 +- xarray/core/rolling.py | 2 +- xarray/tests/test_cftime_offsets.py | 4 ++-- xarray/tests/test_dataset.py | 4 ++-- xarray/tests/test_groupby.py | 2 +- 13 files changed, 15 insertions(+), 15 deletions(-) diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index 5b3d7a324d2..beab5fc050b 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -790,7 +790,7 @@ Chunk sizes may be specified in one of three ways when writing to a zarr store: The resulting chunks will be determined based on the order of the above list; dask chunks will be overridden by manually-specified chunks in the encoding argument, and the presence of either dask chunks or chunks in the ``encoding`` attribute will -supercede the default chunking heuristics in zarr. +supersede the default chunking heuristics in zarr. Importantly, this logic applies to every array in the zarr store individually, including coordinate arrays. Therefore, if a dataset contains one or more dask diff --git a/doc/user-guide/plotting.rst b/doc/user-guide/plotting.rst index 78182ed265f..9fb34712f32 100644 --- a/doc/user-guide/plotting.rst +++ b/doc/user-guide/plotting.rst @@ -585,7 +585,7 @@ Faceting here refers to splitting an array along one or two dimensions and plotting each group. Xarray's basic plotting is useful for plotting two dimensional arrays. What about three or four dimensional arrays? That's where facets become helpful. -The general approach to plotting here is called “small multiples”, where the same kind of plot is repeated multiple times, and the specific use of small multiples to display the same relationship conditioned on one ore more other variables is often called a “trellis plot”. +The general approach to plotting here is called “small multiples”, where the same kind of plot is repeated multiple times, and the specific use of small multiples to display the same relationship conditioned on one or more other variables is often called a “trellis plot”. Consider the temperature data set. There are 4 observations per day for two years which makes for 2920 values along the time dimension. diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index a78ce3052bb..04e46e942a1 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -98,7 +98,7 @@ def convert_calendar( Notes ----- Passing a value to `missing` is only usable if the source's time coordinate as an - inferrable frequencies (see :py:func:`~xarray.infer_freq`) and is only appropriate + inferable frequencies (see :py:func:`~xarray.infer_freq`) and is only appropriate if the target coordinate, generated from this frequency, has dates equivalent to the source. It is usually **not** appropriate to use this mode with: diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index a4e2870650d..a029f39c7b8 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -1200,7 +1200,7 @@ def date_range_like(source, calendar, use_cftime=None): freq = infer_freq(source) if freq is None: raise ValueError( - "`date_range_like` was unable to generate a range as the source frequency was not inferrable." + "`date_range_like` was unable to generate a range as the source frequency was not inferable." ) use_cftime = _should_cftime_be_used(source, calendar, use_cftime) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index aed41e05777..303eb6c0ef0 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -285,7 +285,7 @@ def find_matching_unindexed_dims(self) -> None: self.unindexed_dim_sizes = unindexed_dim_sizes def assert_no_index_conflict(self) -> None: - """Check for uniqueness of both coordinate and dimension names accross all sets + """Check for uniqueness of both coordinate and dimension names across all sets of matching indexes. We need to make sure that all indexes used for re-indexing or alignment diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 92e81dca4e3..34cd2c82d92 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -535,7 +535,7 @@ def ensure_common_dims(vars): # get the indexes to concatenate together, create a PandasIndex # for any scalar coordinate variable found with ``name`` matching ``dim``. - # TODO: depreciate concat a mix of scalar and dimensional indexed coodinates? + # TODO: depreciate concat a mix of scalar and dimensional indexed coordinates? # TODO: (benbovy - explicit indexes): check index types and/or coordinates # of all datasets? def get_indexes(name): diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b4acdad9f1c..1f79d048379 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -5215,7 +5215,7 @@ def convert_calendar( Notes ----- Passing a value to `missing` is only usable if the source's time coordinate as an - inferrable frequencies (see :py:func:`~xarray.infer_freq`) and is only appropriate + inferable frequencies (see :py:func:`~xarray.infer_freq`) and is only appropriate if the target coordinate, generated from this frequency, has dates equivalent to the source. It is usually **not** appropriate to use this mode with: diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 80725b1bc11..dc147fa921d 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -8464,7 +8464,7 @@ def convert_calendar( Notes ----- Passing a value to `missing` is only usable if the source's time coordinate as an - inferrable frequencies (see :py:func:`~xarray.infer_freq`) and is only appropriate + inferable frequencies (see :py:func:`~xarray.infer_freq`) and is only appropriate if the target coordinate, generated from this frequency, has dates equivalent to the source. It is usually **not** appropriate to use this mode with: diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 8589496b5eb..d7133683d83 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -958,7 +958,7 @@ def create_default_index_implicit( Create a PandasMultiIndex if the given variable wraps a pandas.MultiIndex, otherwise create a PandasIndex (note that this will become obsolete once we - depreciate implcitly passing a pandas.MultiIndex as a coordinate). + depreciate implicitly passing a pandas.MultiIndex as a coordinate). """ if all_variables is None: diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 43a941b90d3..fc297f33cd9 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -834,7 +834,7 @@ def __init__( multiple of window size. If 'trim', the excess indexes are trimmed. If 'pad', NA will be padded. side : 'left' or 'right' or mapping from dimension to 'left' or 'right' - coord_func : function (name) or mapping from coordinate name to funcion (name). + coord_func : function (name) or mapping from coordinate name to function (name). Returns ------- diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 246be9d3514..075393e84e7 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1358,11 +1358,11 @@ def test_date_range_like_same_calendar(): def test_date_range_like_errors(): src = date_range("1899-02-03", periods=20, freq="D", use_cftime=False) - src = src[np.arange(20) != 10] # Remove 1 day so the frequency is not inferrable. + src = src[np.arange(20) != 10] # Remove 1 day so the frequency is not inferable. with pytest.raises( ValueError, - match="`date_range_like` was unable to generate a range as the source frequency was not inferrable.", + match="`date_range_like` was unable to generate a range as the source frequency was not inferable.", ): date_range_like(src, "gregorian") diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index fb7cf9430f3..459acfd87fa 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2834,7 +2834,7 @@ def test_rename_dims(self) -> None: {"x": ("x_new", [0, 1, 2]), "y": ("x_new", [10, 11, 12]), "z": 42} ) # TODO: (benbovy - explicit indexes) update when set_index supports - # seeting index for non-dimension variables + # setting index for non-dimension variables expected = expected.set_coords("x") actual = original.rename_dims({"x": "x_new"}) assert_identical(expected, actual, check_default_indexes=False) @@ -2855,7 +2855,7 @@ def test_rename_vars(self) -> None: {"x_new": ("x", [0, 1, 2]), "y": ("x", [10, 11, 12]), "z": 42} ) # TODO: (benbovy - explicit indexes) update when set_index supports - # seeting index for non-dimension variables + # setting index for non-dimension variables expected = expected.set_coords("x_new") actual = original.rename_vars({"x": "x_new"}) assert_identical(expected, actual, check_default_indexes=False) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index fc3e1434684..801dc7c6156 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1175,7 +1175,7 @@ def test_groupby_keep_attrs(self, keep_attrs): with xr.set_options(use_flox=True): actual = array.groupby("abc").mean(keep_attrs=keep_attrs) - # values are tested elsewhere, here we jsut check data + # values are tested elsewhere, here we just check data # TODO: add check_attrs kwarg to assert_allclose actual.data = expected.data assert_identical(expected, actual) From 8f983f1664954c669c329dcd20e4384323dafa3c Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 18 Jul 2022 16:46:07 +0200 Subject: [PATCH 02/12] Switch to T_DataArray and T_Dataset in concat (#6784) * Switch to T_DataArray in concat * Switch tp T_Dataset in concat * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update concat.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * cast types * Update concat.py * Update concat.py * Update concat.py Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- xarray/core/concat.py | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 34cd2c82d92..f7cc30b9eab 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Hashable, Iterable, overload +from typing import TYPE_CHECKING, Any, Hashable, Iterable, cast, overload import pandas as pd @@ -14,19 +14,18 @@ merge_attrs, merge_collected, ) +from .types import T_DataArray, T_Dataset from .variable import Variable from .variable import concat as concat_vars if TYPE_CHECKING: - from .dataarray import DataArray - from .dataset import Dataset from .types import CombineAttrsOptions, CompatOptions, ConcatOptions, JoinOptions @overload def concat( - objs: Iterable[Dataset], - dim: Hashable | DataArray | pd.Index, + objs: Iterable[T_Dataset], + dim: Hashable | T_DataArray | pd.Index, data_vars: ConcatOptions | list[Hashable] = "all", coords: ConcatOptions | list[Hashable] = "different", compat: CompatOptions = "equals", @@ -34,14 +33,14 @@ def concat( fill_value: object = dtypes.NA, join: JoinOptions = "outer", combine_attrs: CombineAttrsOptions = "override", -) -> Dataset: +) -> T_Dataset: ... @overload def concat( - objs: Iterable[DataArray], - dim: Hashable | DataArray | pd.Index, + objs: Iterable[T_DataArray], + dim: Hashable | T_DataArray | pd.Index, data_vars: ConcatOptions | list[Hashable] = "all", coords: ConcatOptions | list[Hashable] = "different", compat: CompatOptions = "equals", @@ -49,7 +48,7 @@ def concat( fill_value: object = dtypes.NA, join: JoinOptions = "outer", combine_attrs: CombineAttrsOptions = "override", -) -> DataArray: +) -> T_DataArray: ... @@ -402,7 +401,7 @@ def process_subset_opt(opt, subset): # determine dimensional coordinate names and a dict mapping name to DataArray def _parse_datasets( - datasets: Iterable[Dataset], + datasets: Iterable[T_Dataset], ) -> tuple[dict[Hashable, Variable], dict[Hashable, int], set[Hashable], set[Hashable]]: dims: set[Hashable] = set() @@ -429,8 +428,8 @@ def _parse_datasets( def _dataset_concat( - datasets: list[Dataset], - dim: str | DataArray | pd.Index, + datasets: list[T_Dataset], + dim: str | T_DataArray | pd.Index, data_vars: str | list[str], coords: str | list[str], compat: CompatOptions, @@ -438,7 +437,7 @@ def _dataset_concat( fill_value: object = dtypes.NA, join: JoinOptions = "outer", combine_attrs: CombineAttrsOptions = "override", -) -> Dataset: +) -> T_Dataset: """ Concatenate a sequence of datasets along a new or existing dimension """ @@ -482,7 +481,8 @@ def _dataset_concat( # case where concat dimension is a coordinate or data_var but not a dimension if (dim in coord_names or dim in data_names) and dim not in dim_names: - datasets = [ds.expand_dims(dim) for ds in datasets] + # TODO: Overriding type because .expand_dims has incorrect typing: + datasets = [cast(T_Dataset, ds.expand_dims(dim)) for ds in datasets] # determine which variables to concatenate concat_over, equals, concat_dim_lengths = _calc_concat_over( @@ -590,7 +590,7 @@ def get_indexes(name): # preserves original variable order result_vars[name] = result_vars.pop(name) - result = Dataset(result_vars, attrs=result_attrs) + result = type(datasets[0])(result_vars, attrs=result_attrs) absent_coord_names = coord_names - set(result.variables) if absent_coord_names: @@ -618,8 +618,8 @@ def get_indexes(name): def _dataarray_concat( - arrays: Iterable[DataArray], - dim: str | DataArray | pd.Index, + arrays: Iterable[T_DataArray], + dim: str | T_DataArray | pd.Index, data_vars: str | list[str], coords: str | list[str], compat: CompatOptions, @@ -627,7 +627,7 @@ def _dataarray_concat( fill_value: object = dtypes.NA, join: JoinOptions = "outer", combine_attrs: CombineAttrsOptions = "override", -) -> DataArray: +) -> T_DataArray: from .dataarray import DataArray arrays = list(arrays) @@ -650,7 +650,8 @@ def _dataarray_concat( if compat == "identical": raise ValueError("array names not identical") else: - arr = arr.rename(name) + # TODO: Overriding type because .rename has incorrect typing: + arr = cast(T_DataArray, arr.rename(name)) datasets.append(arr._to_temp_dataset()) ds = _dataset_concat( From 392a61484e80e6ccfd5774b68be51578077d4292 Mon Sep 17 00:00:00 2001 From: Mick <43316012+headtr1ck@users.noreply.github.com> Date: Mon, 18 Jul 2022 16:48:01 +0200 Subject: [PATCH 03/12] Update DataArray.rename + docu (#6665) * update dataarray rename incl docu * update whats-new * add support for changing name and dims/coords at the same time * fix runtime typing issue * Revert "add support for changing name and dims/coords at the same time" This reverts commit 31d852137916b72e3c1965a0d9c7fbfe3bfd2831. * enable rename to None again * fix a typing problem --- doc/whats-new.rst | 3 ++ xarray/core/dataarray.py | 27 +++++----- xarray/core/dataset.py | 6 +-- xarray/core/types.py | 2 +- xarray/tests/test_dataarray.py | 90 ++++++++++++++++++++++++++++++---- 5 files changed, 103 insertions(+), 25 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9f6f3622f71..df7509ccda7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -229,6 +229,9 @@ Documentation sizes. In particular, correct the syntax and replace lists with tuples in the examples. (:issue:`6333`, :pull:`6334`) By `Stan West `_. +- Mention that ``xr.DataArray.rename`` can rename coordinates. + (:issue:`5458`, :pull:`6665`) + By `Michael Niklas `_. - Added examples to :py:meth:`Dataset.thin` and :py:meth:`DataArray.thin` By `Emma Marshall `_. diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 1f79d048379..8ef05361193 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1996,17 +1996,17 @@ def rename( new_name_or_name_dict: Hashable | Mapping[Any, Hashable] | None = None, **names: Hashable, ) -> DataArray: - """Returns a new DataArray with renamed coordinates or a new name. + """Returns a new DataArray with renamed coordinates, dimensions or a new name. Parameters ---------- new_name_or_name_dict : str or dict-like, optional If the argument is dict-like, it used as a mapping from old - names to new names for coordinates. Otherwise, use the argument - as the new name for this array. + names to new names for coordinates or dimensions. Otherwise, + use the argument as the new name for this array. **names : Hashable, optional The keyword arguments form of a mapping from old names to - new names for coordinates. + new names for coordinates or dimensions. One of new_name_or_name_dict or names must be provided. Returns @@ -2019,16 +2019,21 @@ def rename( Dataset.rename DataArray.swap_dims """ - if names or utils.is_dict_like(new_name_or_name_dict): - new_name_or_name_dict = cast( - Mapping[Hashable, Hashable], new_name_or_name_dict - ) + if new_name_or_name_dict is None and not names: + # change name to None? + return self._replace(name=None) + if utils.is_dict_like(new_name_or_name_dict) or new_name_or_name_dict is None: + # change dims/coords name_dict = either_dict_or_kwargs(new_name_or_name_dict, names, "rename") dataset = self._to_temp_dataset().rename(name_dict) return self._from_temp_dataset(dataset) - else: - new_name_or_name_dict = cast(Hashable, new_name_or_name_dict) - return self._replace(name=new_name_or_name_dict) + if utils.hashable(new_name_or_name_dict) and names: + # change name + dims/coords + dataset = self._to_temp_dataset().rename(names) + dataarray = self._from_temp_dataset(dataset) + return dataarray._replace(name=new_name_or_name_dict) + # only change name + return self._replace(name=new_name_or_name_dict) def swap_dims( self: T_DataArray, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index dc147fa921d..4849738f453 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3558,12 +3558,12 @@ def rename( name_dict: Mapping[Any, Hashable] | None = None, **names: Hashable, ) -> T_Dataset: - """Returns a new object with renamed variables and dimensions. + """Returns a new object with renamed variables, coordinates and dimensions. Parameters ---------- name_dict : dict-like, optional - Dictionary whose keys are current variable or dimension names and + Dictionary whose keys are current variable, coordinate or dimension names and whose values are the desired names. **names : optional Keyword form of ``name_dict``. @@ -3572,7 +3572,7 @@ def rename( Returns ------- renamed : Dataset - Dataset with renamed variables and dimensions. + Dataset with renamed variables, coordinates and dimensions. See Also -------- diff --git a/xarray/core/types.py b/xarray/core/types.py index 5604c5365dd..477cc4c4820 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -56,7 +56,7 @@ InterpOptions = Union[Interp1dOptions, InterpolantOptions] DatetimeUnitOptions = Literal[ - "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "ns", "ps", "fs", "as", None + "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as", None ] QueryEngineOptions = Literal["python", "numexpr", None] diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index f26fc1c9d5e..db3c9824ba3 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1653,17 +1653,87 @@ def test_reindex_str_dtype(self, dtype) -> None: assert actual.dtype == expected.dtype def test_rename(self) -> None: - renamed = self.dv.rename("bar") - assert_identical(renamed.to_dataset(), self.ds.rename({"foo": "bar"})) - assert renamed.name == "bar" - renamed = self.dv.x.rename({"x": "z"}).rename("z") - assert_identical(renamed, self.ds.rename({"x": "z"}).z) - assert renamed.name == "z" - assert renamed.dims == ("z",) - - renamed_kwargs = self.dv.x.rename(x="z").rename("z") - assert_identical(renamed, renamed_kwargs) + da = xr.DataArray( + [1, 2, 3], dims="dim", name="name", coords={"coord": ("dim", [5, 6, 7])} + ) + + # change name + renamed_name = da.rename("name_new") + assert renamed_name.name == "name_new" + expected_name = da.copy() + expected_name.name = "name_new" + assert_identical(renamed_name, expected_name) + + # change name to None? + renamed_noname = da.rename(None) + assert renamed_noname.name is None + expected_noname = da.copy() + expected_noname.name = None + assert_identical(renamed_noname, expected_noname) + renamed_noname = da.rename() + assert renamed_noname.name is None + assert_identical(renamed_noname, expected_noname) + + # change dim + renamed_dim = da.rename({"dim": "dim_new"}) + assert renamed_dim.dims == ("dim_new",) + expected_dim = xr.DataArray( + [1, 2, 3], + dims="dim_new", + name="name", + coords={"coord": ("dim_new", [5, 6, 7])}, + ) + assert_identical(renamed_dim, expected_dim) + + # change dim with kwargs + renamed_dimkw = da.rename(dim="dim_new") + assert renamed_dimkw.dims == ("dim_new",) + assert_identical(renamed_dimkw, expected_dim) + + # change coords + renamed_coord = da.rename({"coord": "coord_new"}) + assert "coord_new" in renamed_coord.coords + expected_coord = xr.DataArray( + [1, 2, 3], dims="dim", name="name", coords={"coord_new": ("dim", [5, 6, 7])} + ) + assert_identical(renamed_coord, expected_coord) + + # change coords with kwargs + renamed_coordkw = da.rename(coord="coord_new") + assert "coord_new" in renamed_coordkw.coords + assert_identical(renamed_coordkw, expected_coord) + + # change coord and dim + renamed_both = da.rename({"dim": "dim_new", "coord": "coord_new"}) + assert renamed_both.dims == ("dim_new",) + assert "coord_new" in renamed_both.coords + expected_both = xr.DataArray( + [1, 2, 3], + dims="dim_new", + name="name", + coords={"coord_new": ("dim_new", [5, 6, 7])}, + ) + assert_identical(renamed_both, expected_both) + + # change coord and dim with kwargs + renamed_bothkw = da.rename(dim="dim_new", coord="coord_new") + assert renamed_bothkw.dims == ("dim_new",) + assert "coord_new" in renamed_bothkw.coords + assert_identical(renamed_bothkw, expected_both) + + # change all + renamed_all = da.rename("name_new", dim="dim_new", coord="coord_new") + assert renamed_all.name == "name_new" + assert renamed_all.dims == ("dim_new",) + assert "coord_new" in renamed_all.coords + expected_all = xr.DataArray( + [1, 2, 3], + dims="dim_new", + name="name_new", + coords={"coord_new": ("dim_new", [5, 6, 7])}, + ) + assert_identical(renamed_all, expected_all) def test_init_value(self) -> None: expected = DataArray( From 9b54b44aa0068ec2e1a4a5195e19c7ae08447bed Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 19 Jul 2022 19:30:09 -0600 Subject: [PATCH 04/12] Refactor groupby binary ops code. (#6789) --- xarray/core/groupby.py | 71 ++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 38 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 5fa78ae76de..7119332405b 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -23,6 +23,7 @@ from . import dtypes, duck_array_ops, nputils, ops from ._reductions import DataArrayGroupByReductions, DatasetGroupByReductions +from .alignment import align from .arithmetic import DataArrayGroupbyArithmetic, DatasetGroupbyArithmetic from .concat import concat from .formatting import format_array_flat @@ -309,7 +310,7 @@ class GroupBy(Generic[T_Xarray]): "_squeeze", # Save unstacked object for flox "_original_obj", - "_unstacked_group", + "_original_group", "_bins", ) _obj: T_Xarray @@ -374,7 +375,7 @@ def __init__( group.name = "group" self._original_obj: T_Xarray = obj - self._unstacked_group = group + self._original_group = group self._bins = bins group, obj, stacked_dim, inserted_dims = _ensure_1d(group, obj) @@ -571,11 +572,22 @@ def _binary_op(self, other, f, reflexive=False): g = f if not reflexive else lambda x, y: f(y, x) - obj = self._obj - group = self._group - dim = self._group_dim + if self._bins is None: + obj = self._original_obj + group = self._original_group + dims = group.dims + else: + obj = self._maybe_unstack(self._obj) + group = self._maybe_unstack(self._group) + dims = (self._group_dim,) + if isinstance(group, _DummyGroup): - group = obj[dim] + group = obj[group.name] + coord = group + else: + coord = self._unique_coord + if not isinstance(coord, DataArray): + coord = DataArray(self._unique_coord) name = group.name if not isinstance(other, (Dataset, DataArray)): @@ -592,37 +604,19 @@ def _binary_op(self, other, f, reflexive=False): "is not a dimension on the other argument" ) - try: - expanded = other.sel({name: group}) - except KeyError: - # some labels are absent i.e. other is not aligned - # so we align by reindexing and then rename dimensions. - - # Broadcast out scalars for backwards compatibility - # TODO: get rid of this when fixing GH2145 - for var in other.coords: - if other[var].ndim == 0: - other[var] = ( - other[var].drop_vars(var).expand_dims({name: other.sizes[name]}) - ) - expanded = ( - other.reindex({name: group.data}) - .rename({name: dim}) - .assign_coords({dim: obj[dim]}) - ) + # Broadcast out scalars for backwards compatibility + # TODO: get rid of this when fixing GH2145 + for var in other.coords: + if other[var].ndim == 0: + other[var] = ( + other[var].drop_vars(var).expand_dims({name: other.sizes[name]}) + ) - if self._bins is not None and name == dim and dim not in obj.xindexes: - # When binning by unindexed coordinate we need to reindex obj. - # _full_index is IntervalIndex, so idx will be -1 where - # a value does not belong to any bin. Using IntervalIndex - # accounts for any non-default cut_kwargs passed to the constructor - idx = pd.cut(group, bins=self._full_index).codes - obj = obj.isel({dim: np.arange(group.size)[idx != -1]}) + other, _ = align(other, coord, join="outer") + expanded = other.sel({name: group}) result = g(obj, expanded) - result = self._maybe_unstack(result) - group = self._maybe_unstack(group) if group.ndim > 1: # backcompat: # TODO: get rid of this when fixing GH2145 @@ -632,8 +626,9 @@ def _binary_op(self, other, f, reflexive=False): if isinstance(result, Dataset) and isinstance(obj, Dataset): for var in set(result): - if dim not in obj[var].dims: - result[var] = result[var].transpose(dim, ...) + for d in dims: + if d not in obj[var].dims: + result[var] = result[var].transpose(d, ...) return result def _maybe_restore_empty_groups(self, combined): @@ -695,10 +690,10 @@ def _flox_reduce(self, dim, keep_attrs=None, **kwargs): # group is only passed by resample group = kwargs.pop("group", None) if group is None: - if isinstance(self._unstacked_group, _DummyGroup): - group = self._unstacked_group.name + if isinstance(self._original_group, _DummyGroup): + group = self._original_group.name else: - group = self._unstacked_group + group = self._original_group unindexed_dims = tuple() if isinstance(group, str): From dabd9779bebe811e04fa71546baf70564174aeaa Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 19 Jul 2022 19:31:36 -0600 Subject: [PATCH 05/12] Add cumsum to DatasetGroupBy (#6525) * Add cumsum to DatasetGroupBy Fixes #3141 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * More fix. * Add whats-new * [skip-ci] Add to api.rst * Update xarray/tests/test_groupby.py Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> * Update xarray/core/groupby.py * Update xarray/core/groupby.py * Update xarray/tests/test_groupby.py Co-authored-by: Vlad Skripniuk Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> Co-authored-by: Anderson Banihirwe --- doc/api.rst | 2 ++ doc/whats-new.rst | 4 ++++ xarray/core/groupby.py | 16 ++++++++++++++-- xarray/tests/test_groupby.py | 28 ++++++++++++++++++++++++++++ 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 1036b476c83..840fa32bf43 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -736,6 +736,7 @@ Dataset DatasetGroupBy.all DatasetGroupBy.any DatasetGroupBy.count + DatasetGroupBy.cumsum DatasetGroupBy.max DatasetGroupBy.mean DatasetGroupBy.median @@ -765,6 +766,7 @@ DataArray DataArrayGroupBy.all DataArrayGroupBy.any DataArrayGroupBy.count + DataArrayGroupBy.cumsum DataArrayGroupBy.max DataArrayGroupBy.mean DataArrayGroupBy.median diff --git a/doc/whats-new.rst b/doc/whats-new.rst index df7509ccda7..8fee57893b5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -123,6 +123,10 @@ New Features - Allow passing chunks in ``**kwargs`` form to :py:meth:`Dataset.chunk`, :py:meth:`DataArray.chunk`, and :py:meth:`Variable.chunk`. (:pull:`6471`) By `Tom Nicholas `_. +- Add :py:meth:`core.groupby.DatasetGroupBy.cumsum` and :py:meth:`core.groupby.DataArrayGroupBy.cumsum`. + By `Vladislav Skripniuk `_ and `Deepak Cherian `_. (:pull:`3147`, :pull:`6525`, :issue:`3141`) +- Expose `inline_array` kwarg from `dask.array.from_array` in :py:func:`open_dataset`, :py:meth:`Dataset.chunk`, + :py:meth:`DataArray.chunk`, and :py:meth:`Variable.chunk`. (:pull:`6471`) - Expose the ``inline_array`` kwarg from :py:func:`dask.array.from_array` in :py:func:`open_dataset`, :py:meth:`Dataset.chunk`, :py:meth:`DataArray.chunk`, and :py:meth:`Variable.chunk`. (:pull:`6471`) By `Tom Nicholas `_. diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 7119332405b..9216248a945 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -25,10 +25,12 @@ from ._reductions import DataArrayGroupByReductions, DatasetGroupByReductions from .alignment import align from .arithmetic import DataArrayGroupbyArithmetic, DatasetGroupbyArithmetic +from .common import ImplementsArrayReduce, ImplementsDatasetReduce from .concat import concat from .formatting import format_array_flat from .indexes import create_default_index_implicit, filter_indexes_from_coords from .npcompat import QUANTILE_METHODS, ArrayLike +from .ops import IncludeCumMethods from .options import _get_keep_attrs from .pycompat import integer_types from .types import T_Xarray @@ -1187,7 +1189,12 @@ def reduce_array(ar: DataArray) -> DataArray: # https://github.com/python/mypy/issues/9031 -class DataArrayGroupBy(DataArrayGroupByBase, DataArrayGroupByReductions): # type: ignore[misc] +class DataArrayGroupBy( # type: ignore[misc] + DataArrayGroupByBase, + DataArrayGroupByReductions, + ImplementsArrayReduce, + IncludeCumMethods, +): __slots__ = () @@ -1341,5 +1348,10 @@ def assign(self, **kwargs: Any) -> Dataset: # https://github.com/python/mypy/issues/9031 -class DatasetGroupBy(DatasetGroupByBase, DatasetGroupByReductions): # type: ignore[misc] +class DatasetGroupBy( # type: ignore[misc] + DatasetGroupByBase, + DatasetGroupByReductions, + ImplementsDatasetReduce, + IncludeCumMethods, +): __slots__ = () diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 801dc7c6156..3d096daedbc 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -2002,3 +2002,31 @@ def func(arg1, arg2, arg3=0.0): expected = xr.Dataset({"foo": ("time", [3.0, 3.0, 3.0]), "time": times}) actual = ds.resample(time="D").map(func, args=(1.0,), arg3=1.0) assert_identical(expected, actual) + + +def test_groupby_cumsum() -> None: + ds = xr.Dataset( + {"foo": (("x",), [7, 3, 1, 1, 1, 1, 1])}, + coords={"x": [0, 1, 2, 3, 4, 5, 6], "group_id": ("x", [0, 0, 1, 1, 2, 2, 2])}, + ) + actual = ds.groupby("group_id").cumsum(dim="x") # type: ignore[attr-defined] # TODO: move cumsum to generate_reductions.py + expected = xr.Dataset( + { + "foo": (("x",), [7, 10, 1, 2, 1, 2, 3]), + }, + coords={ + "x": [0, 1, 2, 3, 4, 5, 6], + "group_id": ds.group_id, + }, + ) + # TODO: Remove drop_vars when GH6528 is fixed + # when Dataset.cumsum propagates indexes, and the group variable? + assert_identical(expected.drop_vars(["x", "group_id"]), actual) + + actual = ds.foo.groupby("group_id").cumsum(dim="x") + expected.coords["group_id"] = ds.group_id + expected.coords["x"] = np.arange(7) + assert_identical(expected.foo, actual) + + +# TODO: move other groupby tests from test_dataset and test_dataarray over here From 9f8d47c8acfaa925b3499e824a0807d7f20424c7 Mon Sep 17 00:00:00 2001 From: Tom White Date: Wed, 20 Jul 2022 07:09:14 +0100 Subject: [PATCH 06/12] Support NumPy array API (experimental) (#6804) * Support NumPy array API (experimental) * Address feedback * Update xarray/core/indexing.py Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> * Update xarray/core/indexing.py Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> * Update xarray/tests/test_array_api.py Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> * Update xarray/tests/test_array_api.py Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> * Update xarray/tests/test_array_api.py Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> * Update xarray/tests/test_array_api.py Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> * Update xarray/tests/test_array_api.py Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> * Fix import order * Fix import order * update whatsnew Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> Co-authored-by: Deepak Cherian Co-authored-by: Thomas Nicholas --- doc/whats-new.rst | 3 ++ xarray/core/duck_array_ops.py | 6 +++- xarray/core/indexing.py | 45 ++++++++++++++++++++++++++++++ xarray/core/utils.py | 7 +++-- xarray/core/variable.py | 4 ++- xarray/tests/test_array_api.py | 51 ++++++++++++++++++++++++++++++++++ 6 files changed, 112 insertions(+), 4 deletions(-) create mode 100644 xarray/tests/test_array_api.py diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8fee57893b5..9e6a4a3ceac 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -30,6 +30,9 @@ New Features :py:meth:`coarsen`, :py:meth:`weighted`, :py:meth:`resample`, (:pull:`6702`) By `Michael Niklas `_. +- Experimental support for wrapping any array type that conforms to the python array api standard. + (:pull:`6804`) + By `Tom White `_. Deprecations ~~~~~~~~~~~~ diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 6e73ee41b40..2cd2fb3af04 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -329,7 +329,11 @@ def f(values, axis=None, skipna=None, **kwargs): if name in ["sum", "prod"]: kwargs.pop("min_count", None) - func = getattr(np, name) + if hasattr(values, "__array_namespace__"): + xp = values.__array_namespace__() + func = getattr(xp, name) + else: + func = getattr(np, name) try: with warnings.catch_warnings(): diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 9a29b63f4d0..72ca60d4d5e 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -679,6 +679,8 @@ def as_indexable(array): return DaskIndexingAdapter(array) if hasattr(array, "__array_function__"): return NdArrayLikeIndexingAdapter(array) + if hasattr(array, "__array_namespace__"): + return ArrayApiIndexingAdapter(array) raise TypeError(f"Invalid array type: {type(array)}") @@ -1288,6 +1290,49 @@ def __init__(self, array): self.array = array +class ArrayApiIndexingAdapter(ExplicitlyIndexedNDArrayMixin): + """Wrap an array API array to use explicit indexing.""" + + __slots__ = ("array",) + + def __init__(self, array): + if not hasattr(array, "__array_namespace__"): + raise TypeError( + "ArrayApiIndexingAdapter must wrap an object that " + "implements the __array_namespace__ protocol" + ) + self.array = array + + def __getitem__(self, key): + if isinstance(key, BasicIndexer): + return self.array[key.tuple] + elif isinstance(key, OuterIndexer): + # manual orthogonal indexing (implemented like DaskIndexingAdapter) + key = key.tuple + value = self.array + for axis, subkey in reversed(list(enumerate(key))): + value = value[(slice(None),) * axis + (subkey, Ellipsis)] + return value + else: + if isinstance(key, VectorizedIndexer): + raise TypeError("Vectorized indexing is not supported") + else: + raise TypeError(f"Unrecognized indexer: {key}") + + def __setitem__(self, key, value): + if isinstance(key, (BasicIndexer, OuterIndexer)): + self.array[key.tuple] = value + else: + if isinstance(key, VectorizedIndexer): + raise TypeError("Vectorized indexing is not supported") + else: + raise TypeError(f"Unrecognized indexer: {key}") + + def transpose(self, order): + xp = self.array.__array_namespace__() + return xp.permute_dims(self.array, order) + + class DaskIndexingAdapter(ExplicitlyIndexedNDArrayMixin): """Wrap a dask array to support explicit indexing.""" diff --git a/xarray/core/utils.py b/xarray/core/utils.py index ab3f8d3a282..51bf1346506 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -263,8 +263,10 @@ def is_duck_array(value: Any) -> bool: hasattr(value, "ndim") and hasattr(value, "shape") and hasattr(value, "dtype") - and hasattr(value, "__array_function__") - and hasattr(value, "__array_ufunc__") + and ( + (hasattr(value, "__array_function__") and hasattr(value, "__array_ufunc__")) + or hasattr(value, "__array_namespace__") + ) ) @@ -298,6 +300,7 @@ def _is_scalar(value, include_0d): or not ( isinstance(value, (Iterable,) + NON_NUMPY_SUPPORTED_ARRAY_TYPES) or hasattr(value, "__array_function__") + or hasattr(value, "__array_namespace__") ) ) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 90edf652284..502bf8482f2 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -237,7 +237,9 @@ def as_compatible_data(data, fastpath=False): else: data = np.asarray(data) - if not isinstance(data, np.ndarray) and hasattr(data, "__array_function__"): + if not isinstance(data, np.ndarray) and ( + hasattr(data, "__array_function__") or hasattr(data, "__array_namespace__") + ): return data # validate whether the data is valid data types. diff --git a/xarray/tests/test_array_api.py b/xarray/tests/test_array_api.py new file mode 100644 index 00000000000..8e378054c29 --- /dev/null +++ b/xarray/tests/test_array_api.py @@ -0,0 +1,51 @@ +from typing import Tuple + +import pytest + +import xarray as xr +from xarray.testing import assert_equal + +np = pytest.importorskip("numpy", minversion="1.22") + +import numpy.array_api as xp # isort:skip +from numpy.array_api._array_object import Array # isort:skip + + +@pytest.fixture +def arrays() -> Tuple[xr.DataArray, xr.DataArray]: + np_arr = xr.DataArray(np.ones((2, 3)), dims=("x", "y"), coords={"x": [10, 20]}) + xp_arr = xr.DataArray(xp.ones((2, 3)), dims=("x", "y"), coords={"x": [10, 20]}) + assert isinstance(xp_arr.data, Array) + return np_arr, xp_arr + + +def test_arithmetic(arrays) -> None: + np_arr, xp_arr = arrays + expected = np_arr + 7 + actual = xp_arr + 7 + assert isinstance(actual.data, Array) + assert_equal(actual, expected) + + +def test_aggregation(arrays) -> None: + np_arr, xp_arr = arrays + expected = np_arr.sum(skipna=False) + actual = xp_arr.sum(skipna=False) + assert isinstance(actual.data, Array) + assert_equal(actual, expected) + + +def test_indexing(arrays) -> None: + np_arr, xp_arr = arrays + expected = np_arr[:, 0] + actual = xp_arr[:, 0] + assert isinstance(actual.data, Array) + assert_equal(actual, expected) + + +def test_reorganizing_operation(arrays) -> None: + np_arr, xp_arr = arrays + expected = np_arr.transpose() + actual = xp_arr.transpose() + assert isinstance(actual.data, Array) + assert_equal(actual, expected) From 4a52799620aea129e17d6b6cf9fde7d510b0f7ed Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 21 Jul 2022 08:46:57 -0600 Subject: [PATCH 07/12] Drop multi-indexes when assigning to a multi-indexed variable (#6798) Co-authored-by: Anderson Banihirwe Co-authored-by: Benoit Bovy --- xarray/core/coordinates.py | 55 +++++++++++++++++++++++++++++++++- xarray/core/dataset.py | 1 + xarray/core/indexes.py | 3 ++ xarray/tests/test_dataarray.py | 7 +++++ xarray/tests/test_dataset.py | 12 ++++++++ 5 files changed, 77 insertions(+), 1 deletion(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 65949a24369..42cc8130810 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -1,5 +1,6 @@ from __future__ import annotations +import warnings from contextlib import contextmanager from typing import TYPE_CHECKING, Any, Hashable, Iterator, Mapping, Sequence, cast @@ -7,7 +8,7 @@ import pandas as pd from . import formatting -from .indexes import Index, Indexes, assert_no_index_corrupted +from .indexes import Index, Indexes, PandasMultiIndex, assert_no_index_corrupted from .merge import merge_coordinates_without_align, merge_coords from .utils import Frozen, ReprObject from .variable import Variable, calculate_dimensions @@ -57,6 +58,9 @@ def variables(self): def _update_coords(self, coords, indexes): raise NotImplementedError() + def _maybe_drop_multiindex_coords(self, coords): + raise NotImplementedError() + def __iter__(self) -> Iterator[Hashable]: # needs to be in the same order as the dataset variables for k in self.variables: @@ -154,6 +158,7 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index: def update(self, other: Mapping[Any, Any]) -> None: other_vars = getattr(other, "variables", other) + self._maybe_drop_multiindex_coords(set(other_vars)) coords, indexes = merge_coords( [self.variables, other_vars], priority_arg=1, indexes=self.xindexes ) @@ -304,6 +309,15 @@ def _update_coords( original_indexes.update(indexes) self._data._indexes = original_indexes + def _maybe_drop_multiindex_coords(self, coords: set[Hashable]) -> None: + """Drops variables in coords, and any associated variables as well.""" + assert self._data.xindexes is not None + variables, indexes = drop_coords( + coords, self._data._variables, self._data.xindexes + ) + self._data._variables = variables + self._data._indexes = indexes + def __delitem__(self, key: Hashable) -> None: if key in self: del self._data[key] @@ -372,6 +386,14 @@ def _update_coords( original_indexes.update(indexes) self._data._indexes = original_indexes + def _maybe_drop_multiindex_coords(self, coords: set[Hashable]) -> None: + """Drops variables in coords, and any associated variables as well.""" + variables, indexes = drop_coords( + coords, self._data._coords, self._data.xindexes + ) + self._data._coords = variables + self._data._indexes = indexes + @property def variables(self): return Frozen(self._data._coords) @@ -397,6 +419,37 @@ def _ipython_key_completions_(self): return self._data._ipython_key_completions_() +def drop_coords( + coords_to_drop: set[Hashable], variables, indexes: Indexes +) -> tuple[dict, dict]: + """Drop index variables associated with variables in coords_to_drop.""" + # Only warn when we're dropping the dimension with the multi-indexed coordinate + # If asked to drop a subset of the levels in a multi-index, we raise an error + # later but skip the warning here. + new_variables = dict(variables.copy()) + new_indexes = dict(indexes.copy()) + for key in coords_to_drop & set(indexes): + maybe_midx = indexes[key] + idx_coord_names = set(indexes.get_all_coords(key)) + if ( + isinstance(maybe_midx, PandasMultiIndex) + and key == maybe_midx.dim + and (idx_coord_names - coords_to_drop) + ): + warnings.warn( + f"Updating MultiIndexed coordinate {key!r} would corrupt indices for " + f"other variables: {list(maybe_midx.index.names)!r}. " + f"This will raise an error in the future. Use `.drop_vars({idx_coord_names!r})` before " + "assigning new coordinate values.", + DeprecationWarning, + stacklevel=4, + ) + for k in idx_coord_names: + del new_variables[k] + del new_indexes[k] + return new_variables, new_indexes + + def assert_coordinate_consistent( obj: DataArray | Dataset, coords: Mapping[Any, Variable] ) -> None: diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 4849738f453..c677ee13c3d 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5764,6 +5764,7 @@ def assign( data = self.copy() # do all calculations first... results: CoercibleMapping = data._calc_assign_results(variables) + data.coords._maybe_drop_multiindex_coords(set(results.keys())) # ... and then assign data.update(results) return data diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index d7133683d83..8ff0d40ff07 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -1085,6 +1085,9 @@ def dims(self) -> Mapping[Hashable, int]: return Frozen(self._dims) + def copy(self): + return type(self)(dict(self._indexes), dict(self._variables)) + def get_unique(self) -> list[T_PandasOrXarrayIndex]: """Return a list of unique indexes, preserving order.""" diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index db3c9824ba3..298840f3f66 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1499,6 +1499,13 @@ def test_assign_coords(self) -> None: with pytest.raises(ValueError): da.coords["x"] = ("y", [1, 2, 3]) # no new dimension to a DataArray + def test_assign_coords_existing_multiindex(self) -> None: + data = self.mda + with pytest.warns( + DeprecationWarning, match=r"Updating MultiIndexed coordinate" + ): + data.assign_coords(x=range(4)) + def test_coords_alignment(self) -> None: lhs = DataArray([1, 2, 3], [("x", [0, 1, 2])]) rhs = DataArray([2, 3, 4], [("x", [1, 2, 3])]) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 459acfd87fa..9ea47163d05 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3967,6 +3967,18 @@ def test_assign_multiindex_level(self) -> None: data.assign(level_1=range(4)) data.assign_coords(level_1=range(4)) + def test_assign_coords_existing_multiindex(self) -> None: + data = create_test_multiindex() + with pytest.warns( + DeprecationWarning, match=r"Updating MultiIndexed coordinate" + ): + data.assign_coords(x=range(4)) + + with pytest.warns( + DeprecationWarning, match=r"Updating MultiIndexed coordinate" + ): + data.assign(x=range(4)) + def test_assign_all_multiindex_coords(self) -> None: data = create_test_multiindex() actual = data.assign(x=range(4), level_1=range(4), level_2=range(4)) From 4ad706fc4ef102c525555d55b20bc7ccc72d7045 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 22 Jul 2022 09:44:58 -0600 Subject: [PATCH 08/12] Release notes for v2022.06.0 (#6815) --- doc/api.rst | 15 ++++++++++- doc/whats-new.rst | 67 ++++++++++++++++++++++++----------------------- 2 files changed, 48 insertions(+), 34 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 840fa32bf43..f9770090e5e 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -648,11 +648,23 @@ DataArray methods Coordinates objects =================== +Dataset +------- + .. autosummary:: :toctree: generated/ - core.coordinates.DataArrayCoordinates core.coordinates.DatasetCoordinates + core.coordinates.DatasetCoordinates.dtypes + +DataArray +--------- + +.. autosummary:: + :toctree: generated/ + + core.coordinates.DataArrayCoordinates + core.coordinates.DataArrayCoordinates.dtypes Plotting ======== @@ -812,6 +824,7 @@ DataArray :toctree: generated/ DataArrayRolling + DataArrayRolling.__iter__ DataArrayRolling.construct DataArrayRolling.reduce DataArrayRolling.argmax diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9e6a4a3ceac..c7a2a50a73f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -16,33 +16,44 @@ What's New .. _whats-new.2022.06.0: -v2022.06.0 (unreleased) ------------------------ +v2022.06.0 (July 21, 2022) +-------------------------- + +This release brings a number of bug fixes and improvements, most notably a major internal +refactor of the indexing functionality, the use of `flox`_ in ``groupby`` operations, +and experimental support for the new Python `Array API standard `_. +It also stops testing support for the abandoned PyNIO. + +Much effort has been made to preserve backwards compatibility as part of the indexing refactor. +We are aware of one `unfixed issue `_. + +Please also see the `whats-new.2022.06.0rc0`_ for a full list of changes. + +Many thanks to our 18 contributors: +Bane Sullivan, Deepak Cherian, Dimitri Papadopoulos Orfanos, Emma Marshall, Hauke Schulz, Illviljan, +Julia Signell, Justus Magin, Keewis, Mathias Hauser, Michael Delgado, Mick, Pierre Manchon, Ray Bell, +Spencer Clark, Stefaan Lippens, Tom White, Travis A. O'Brien, New Features ~~~~~~~~~~~~ -- Add :py:meth:`Dataset.dtypes`, :py:meth:`DatasetCoordinates.dtypes`, - :py:meth:`DataArrayCoordinates.dtypes` properties: Mapping from variable names to dtypes. +- Add :py:attr:`Dataset.dtypes`, :py:attr:`core.coordinates.DatasetCoordinates.dtypes`, + :py:attr:`core.coordinates.DataArrayCoordinates.dtypes` properties: Mapping from variable names to dtypes. (:pull:`6706`) By `Michael Niklas `_. - Initial typing support for :py:meth:`groupby`, :py:meth:`rolling`, :py:meth:`rolling_exp`, :py:meth:`coarsen`, :py:meth:`weighted`, :py:meth:`resample`, (:pull:`6702`) By `Michael Niklas `_. -- Experimental support for wrapping any array type that conforms to the python array api standard. - (:pull:`6804`) +- Experimental support for wrapping any array type that conforms to the python + `array api standard `_. (:pull:`6804`) By `Tom White `_. -Deprecations -~~~~~~~~~~~~ - - Bug fixes ~~~~~~~~~ -- :py:meth:`xarray.save_mfdataset` now passes ``**kwargs`` on to ``to_netcdf``, - allowing the ``encoding`` and ``unlimited_dims`` options with ``save_mfdataset``. +- :py:meth:`save_mfdataset` now passes ``**kwargs`` on to :py:meth:`Dataset.to_netcdf`, + allowing the ``encoding`` and ``unlimited_dims`` options with :py:meth:`save_mfdataset`. (:issue:`6684`) By `Travis A. O'Brien `_. - Fix backend support of pydap versions <3.3.0 (:issue:`6648`, :pull:`6656`). @@ -61,16 +72,12 @@ Bug fixes (:issue:`6739`, :pull:`6744`) By `Michael Niklas `_. -Documentation -~~~~~~~~~~~~~ - - Internal Changes ~~~~~~~~~~~~~~~~ -- :py:meth:`xarray.core.groupby`, :py:meth:`xarray.core.rolling`, - :py:meth:`xarray.core.rolling_exp`, :py:meth:`xarray.core.weighted` - and :py:meth:`xarray.core.resample` modules are no longer imported by default. +- ``xarray.core.groupby``, ``xarray.core.rolling``, + ``xarray.core.rolling_exp``, ``xarray.core.weighted`` + and ``xarray.core.resample`` modules are no longer imported by default. (:pull:`6702`) .. _whats-new.2022.06.0rc0: @@ -123,7 +130,7 @@ New Features elements which trigger summarization rather than full repr in (numpy) array detailed views of the html repr (:pull:`6400`). By `Benoît Bovy `_. -- Allow passing chunks in ``**kwargs`` form to :py:meth:`Dataset.chunk`, :py:meth:`DataArray.chunk`, and +- Allow passing chunks in ``kwargs`` form to :py:meth:`Dataset.chunk`, :py:meth:`DataArray.chunk`, and :py:meth:`Variable.chunk`. (:pull:`6471`) By `Tom Nicholas `_. - Add :py:meth:`core.groupby.DatasetGroupBy.cumsum` and :py:meth:`core.groupby.DataArrayGroupBy.cumsum`. @@ -133,7 +140,7 @@ New Features - Expose the ``inline_array`` kwarg from :py:func:`dask.array.from_array` in :py:func:`open_dataset`, :py:meth:`Dataset.chunk`, :py:meth:`DataArray.chunk`, and :py:meth:`Variable.chunk`. (:pull:`6471`) By `Tom Nicholas `_. -- :py:meth:`xr.polyval` now supports :py:class:`Dataset` and :py:class:`DataArray` args of any shape, +- :py:func:`polyval` now supports :py:class:`Dataset` and :py:class:`DataArray` args of any shape, is faster and requires less memory. (:pull:`6548`) By `Michael Niklas `_. - Improved overall typing. @@ -166,7 +173,7 @@ Breaking changes zarr 2.5 2.8 =============== ===== ==== -- The Dataset and DataArray ``rename*`` methods do not implicitly add or drop +- The Dataset and DataArray ``rename```` methods do not implicitly add or drop indexes. (:pull:`5692`). By `Benoît Bovy `_. - Many arguments like ``keep_attrs``, ``axis``, and ``skipna`` are now keyword @@ -179,11 +186,6 @@ Breaking changes (:pull:`6548`) By `Michael Niklas `_. - -Deprecations -~~~~~~~~~~~~ - - Bug fixes ~~~~~~~~~ @@ -211,8 +213,8 @@ Bug fixes By `Stan West `_. - Fix bug in :py:func:`where` when passing non-xarray objects with ``keep_attrs=True``. (:issue:`6444`, :pull:`6461`) By `Sam Levang `_. -- Allow passing both ``other`` and ``drop=True`` arguments to ``xr.DataArray.where`` - and ``xr.Dataset.where`` (:pull:`6466`, :pull:`6467`). +- Allow passing both ``other`` and ``drop=True`` arguments to :py:meth:`DataArray.where` + and :py:meth:`Dataset.where` (:pull:`6466`, :pull:`6467`). By `Michael Delgado `_. - Ensure dtype encoding attributes are not added or modified on variables that contain datetime-like values prior to being passed to :py:func:`xarray.conventions.decode_cf_variable` (:issue:`6453`, @@ -220,7 +222,7 @@ Bug fixes By `Spencer Clark `_. - Dark themes are now properly detected in Furo-themed Sphinx documents (:issue:`6500`, :pull:`6501`). By `Kevin Paul `_. -- :py:meth:`isel` with `drop=True` works as intended with scalar :py:class:`DataArray` indexers. +- :py:meth:`Dataset.isel`, :py:meth:`DataArray.isel` with `drop=True` works as intended with scalar :py:class:`DataArray` indexers. (:issue:`6554`, :pull:`6579`) By `Michael Niklas `_. - Fixed silent overflow issue when decoding times encoded with 32-bit and below @@ -236,10 +238,9 @@ Documentation sizes. In particular, correct the syntax and replace lists with tuples in the examples. (:issue:`6333`, :pull:`6334`) By `Stan West `_. -- Mention that ``xr.DataArray.rename`` can rename coordinates. +- Mention that :py:meth:`DataArray.rename` can rename coordinates. (:issue:`5458`, :pull:`6665`) By `Michael Niklas `_. - - Added examples to :py:meth:`Dataset.thin` and :py:meth:`DataArray.thin` By `Emma Marshall `_. @@ -247,7 +248,7 @@ Performance ~~~~~~~~~~~ - GroupBy binary operations are now vectorized. - Previously this involved looping over all groups. (:issue:`5804`,:pull:`6160`) + Previously this involved looping over all groups. (:issue:`5804`, :pull:`6160`) By `Deepak Cherian `_. - Substantially improved GroupBy operations using `flox `_. This is auto-enabled when ``flox`` is installed. Use ``xr.set_options(use_flox=False)`` to use From ed56df2428780cc5db268bb0f8064947ac06946a Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 22 Jul 2022 10:25:42 -0600 Subject: [PATCH 09/12] Update whats-new --- doc/whats-new.rst | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c7a2a50a73f..efecc469106 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,6 +14,35 @@ What's New np.random.seed(123456) +.. _whats-new.2022.07.0: + +v2022.07.0 (unreleased) +----------------------- + +New Features +~~~~~~~~~~~~ + + +Breaking changes +~~~~~~~~~~~~~~~~ + + +Deprecations +~~~~~~~~~~~~ + + +Bug fixes +~~~~~~~~~ + + +Documentation +~~~~~~~~~~~~~ + + +Internal Changes +~~~~~~~~~~~~~~~~ + + .. _whats-new.2022.06.0: v2022.06.0 (July 21, 2022) From 60f8c3d3488d377b0b21009422c6121e1c8f1f70 Mon Sep 17 00:00:00 2001 From: Max Jones Date: Fri, 22 Jul 2022 13:25:32 -0400 Subject: [PATCH 10/12] Pull xarray's nbytes from nbytes attribute on arrays (#6797) * Pull xarray's nbytes from nbytes attribute on arrays * Calculate nbytes if it doesn't exist * Add test * Add docstrings * Apply suggestions from code review Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> * Add sparse variable test * Add whats-new note Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> Co-authored-by: dcherian Co-authored-by: Deepak Cherian --- doc/api.rst | 1 - doc/whats-new.rst | 2 ++ xarray/core/dataarray.py | 6 ++++++ xarray/core/dataset.py | 6 ++++++ xarray/core/variable.py | 10 ++++++++-- xarray/tests/test_array_api.py | 6 ++++++ xarray/tests/test_sparse.py | 3 +++ 7 files changed, 31 insertions(+), 3 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index f9770090e5e..11ae5de8531 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -282,7 +282,6 @@ ndarray attributes DataArray.shape DataArray.size DataArray.dtype - DataArray.nbytes DataArray.chunks diff --git a/doc/whats-new.rst b/doc/whats-new.rst index efecc469106..67f697597cf 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,6 +34,8 @@ Deprecations Bug fixes ~~~~~~~~~ +- :py:attr:`DataArray.nbytes` now uses the ``nbytes`` property of the underlying array if available. + By `Max Jones `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 8ef05361193..4a841f0fbdc 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -646,6 +646,12 @@ def size(self) -> int: @property def nbytes(self) -> int: + """ + Total bytes consumed by the elements of this DataArray's data. + + If the backend data array does not include ``nbytes``, estimates + the bytes consumed based on the ``size`` and ``dtype``. + """ return self.variable.nbytes @property diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index c677ee13c3d..d55e32dd7b9 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1378,6 +1378,12 @@ def __array__(self, dtype=None): @property def nbytes(self) -> int: + """ + Total bytes consumed by the data arrays of all variables in this dataset. + + If the backend array for any variable does not include ``nbytes``, estimates + the total bytes for that array based on the ``size`` and ``dtype``. + """ return sum(v.nbytes for v in self.variables.values()) @property diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 502bf8482f2..5827b90ad75 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -334,8 +334,14 @@ def shape(self): return self._data.shape @property - def nbytes(self): - return self.size * self.dtype.itemsize + def nbytes(self) -> int: + """ + Total bytes consumed by the elements of the data array. + """ + if hasattr(self.data, "nbytes"): + return self.data.nbytes + else: + return self.size * self.dtype.itemsize @property def _in_memory(self): diff --git a/xarray/tests/test_array_api.py b/xarray/tests/test_array_api.py index 8e378054c29..649bf3eec2b 100644 --- a/xarray/tests/test_array_api.py +++ b/xarray/tests/test_array_api.py @@ -43,6 +43,12 @@ def test_indexing(arrays) -> None: assert_equal(actual, expected) +def test_properties(arrays) -> None: + np_arr, xp_arr = arrays + assert np_arr.nbytes == np_arr.data.nbytes + assert xp_arr.nbytes == np_arr.data.nbytes + + def test_reorganizing_operation(arrays) -> None: np_arr, xp_arr = arrays expected = np_arr.transpose() diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index 5501d38fc48..5395845d63a 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -274,6 +274,9 @@ def setUp(self): self.data = sparse.random((4, 6), random_state=0, density=0.5) self.var = xr.Variable(("x", "y"), self.data) + def test_nbytes(self): + assert self.var.nbytes == self.data.nbytes + def test_unary_op(self): assert_sparse_equal(-self.var.data, -self.data) assert_sparse_equal(abs(self.var).data, abs(self.data)) From cbd1d4069e1afc5471deb4b427d0f68f0f038469 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 25 Jul 2022 12:11:36 -0700 Subject: [PATCH 11/12] [pre-commit.ci] pre-commit autoupdate (#6824) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f4b300b676c..9a183d512a4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ repos: hooks: - id: isort - repo: https://github.com/asottile/pyupgrade - rev: v2.37.1 + rev: v2.37.2 hooks: - id: pyupgrade args: @@ -46,7 +46,7 @@ repos: # - id: velin # args: ["--write", "--compact"] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.961 + rev: v0.971 hooks: - id: mypy # Copied from setup.cfg From 491fb16073ba72e5774387d0e94a3077459249f2 Mon Sep 17 00:00:00 2001 From: Riley Brady Date: Mon, 25 Jul 2022 15:28:25 -0600 Subject: [PATCH 12/12] Add docstring example for xr.open_mfdataset (#6825) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Deepak Cherian --- xarray/backends/api.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index b80d498ec3a..ca040306bf0 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -895,6 +895,22 @@ def open_mfdataset( combine_nested open_dataset + Examples + -------- + A user might want to pass additional arguments into ``preprocess`` when + applying some operation to many individual files that are being opened. One route + to do this is through the use of ``functools.partial``. + + >>> from functools import partial + >>> def _preprocess(x, lon_bnds, lat_bnds): + ... return x.sel(lon=slice(*lon_bnds), lat=slice(*lat_bnds)) + ... + >>> lon_bnds, lat_bnds = (-110, -105), (40, 45) + >>> partial_func = partial(_preprocess, lon_bnds=lon_bnds, lat_bnds=lat_bnds) + >>> ds = xr.open_mfdataset( + ... "file_*.nc", concat_dim="time", preprocess=_preprocess + ... ) # doctest: +SKIP + References ----------