From 70ac10a567cb22e8ece9c39778111eaaedb8b404 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 16 Oct 2023 11:13:39 -0400 Subject: [PATCH 01/54] move files across --- xarray/{core => namedarray}/daskmanager.py | 0 xarray/{core => namedarray}/parallelcompat.py | 0 xarray/{core => namedarray}/pycompat.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename xarray/{core => namedarray}/daskmanager.py (100%) rename xarray/{core => namedarray}/parallelcompat.py (100%) rename xarray/{core => namedarray}/pycompat.py (100%) diff --git a/xarray/core/daskmanager.py b/xarray/namedarray/daskmanager.py similarity index 100% rename from xarray/core/daskmanager.py rename to xarray/namedarray/daskmanager.py diff --git a/xarray/core/parallelcompat.py b/xarray/namedarray/parallelcompat.py similarity index 100% rename from xarray/core/parallelcompat.py rename to xarray/namedarray/parallelcompat.py diff --git a/xarray/core/pycompat.py b/xarray/namedarray/pycompat.py similarity index 100% rename from xarray/core/pycompat.py rename to xarray/namedarray/pycompat.py From 6b9cd7e63820041bcb48ef442fe7907fdb9bf7d8 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 16 Oct 2023 11:31:07 -0400 Subject: [PATCH 02/54] redirect imports from parallelcompat module --- xarray/backends/api.py | 2 +- xarray/backends/common.py | 2 +- xarray/backends/zarr.py | 2 +- xarray/coding/strings.py | 2 +- xarray/coding/variables.py | 2 +- xarray/core/common.py | 2 +- xarray/core/computation.py | 2 +- xarray/core/dataarray.py | 2 +- xarray/core/dataset.py | 4 ++-- xarray/core/duck_array_ops.py | 2 +- xarray/core/indexing.py | 2 +- xarray/core/missing.py | 2 +- xarray/core/variable.py | 4 ++-- xarray/namedarray/daskmanager.py | 2 +- xarray/namedarray/parallelcompat.py | 2 +- 15 files changed, 17 insertions(+), 17 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 27e155872de..ee7e0713118 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -37,8 +37,8 @@ from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset, _get_chunk, _maybe_chunk from xarray.core.indexes import Index -from xarray.core.parallelcompat import guess_chunkmanager from xarray.core.utils import is_remote_uri +from xarray.namedarray.parallelcompat import guess_chunkmanager if TYPE_CHECKING: try: diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 1ac988c6b4f..e4dfc2bf478 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -12,9 +12,9 @@ from xarray.conventions import cf_encoder from xarray.core import indexing -from xarray.core.parallelcompat import get_chunked_array_type from xarray.core.pycompat import is_chunked_array from xarray.core.utils import FrozenDict, NdimSizeLenMixin, is_remote_uri +from xarray.namedarray.parallelcompat import get_chunked_array_type if TYPE_CHECKING: from io import BufferedIOBase diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index d6ad15f4f87..aa38eba1455 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -19,7 +19,6 @@ ) from xarray.backends.store import StoreBackendEntrypoint from xarray.core import indexing -from xarray.core.parallelcompat import guess_chunkmanager from xarray.core.pycompat import integer_types from xarray.core.utils import ( FrozenDict, @@ -27,6 +26,7 @@ close_on_error, ) from xarray.core.variable import Variable +from xarray.namedarray.parallelcompat import guess_chunkmanager if TYPE_CHECKING: from io import BufferedIOBase diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 89ceaddd93b..359c6949380 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -14,8 +14,8 @@ unpack_for_encoding, ) from xarray.core import indexing -from xarray.core.parallelcompat import get_chunked_array_type, is_chunked_array from xarray.core.variable import Variable +from xarray.namedarray.parallelcompat import get_chunked_array_type, is_chunked_array def create_vlen_dtype(element_type): diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index c583afc93c2..028216a5c39 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -10,9 +10,9 @@ import pandas as pd from xarray.core import dtypes, duck_array_ops, indexing -from xarray.core.parallelcompat import get_chunked_array_type from xarray.core.pycompat import is_chunked_array from xarray.core.variable import Variable +from xarray.namedarray.parallelcompat import get_chunked_array_type if TYPE_CHECKING: T_VarTuple = tuple[tuple[Hashable, ...], Any, dict, dict] diff --git a/xarray/core/common.py b/xarray/core/common.py index ab8a4d84261..a32d5a79e98 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -13,7 +13,6 @@ from xarray.core import dtypes, duck_array_ops, formatting, formatting_html, ops from xarray.core.indexing import BasicIndexer, ExplicitlyIndexed from xarray.core.options import OPTIONS, _get_keep_attrs -from xarray.core.parallelcompat import get_chunked_array_type, guess_chunkmanager from xarray.core.pycompat import is_chunked_array from xarray.core.utils import ( Frozen, @@ -21,6 +20,7 @@ emit_user_level_warning, is_scalar, ) +from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager try: import cftime diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 9cb60e0c424..e59995f0943 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -21,11 +21,11 @@ from xarray.core.indexes import Index, filter_indexes_from_coords from xarray.core.merge import merge_attrs, merge_coordinates_without_align from xarray.core.options import OPTIONS, _get_keep_attrs -from xarray.core.parallelcompat import get_chunked_array_type from xarray.core.pycompat import is_chunked_array, is_duck_dask_array from xarray.core.types import Dims, T_DataArray from xarray.core.utils import is_dict_like, is_scalar from xarray.core.variable import Variable +from xarray.namedarray.parallelcompat import get_chunked_array_type if TYPE_CHECKING: from xarray.core.coordinates import Coordinates diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 391b4ed9412..cec08628484 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -93,7 +93,6 @@ from xarray.backends import ZarrStore from xarray.backends.api import T_NetcdfEngine, T_NetcdfTypes from xarray.core.groupby import DataArrayGroupBy - from xarray.core.parallelcompat import ChunkManagerEntrypoint from xarray.core.resample import DataArrayResample from xarray.core.rolling import DataArrayCoarsen, DataArrayRolling from xarray.core.types import ( @@ -116,6 +115,7 @@ T_Xarray, ) from xarray.core.weighted import DataArrayWeighted + from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint T_XarrayOther = TypeVar("T_XarrayOther", bound=Union["DataArray", Dataset]) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ebd6fb6f51f..d4b57a75652 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -86,7 +86,6 @@ ) from xarray.core.missing import get_clean_interp_index from xarray.core.options import OPTIONS, _get_keep_attrs -from xarray.core.parallelcompat import get_chunked_array_type, guess_chunkmanager from xarray.core.pycompat import ( array_type, is_chunked_array, @@ -122,6 +121,7 @@ broadcast_variables, calculate_dimensions, ) +from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager from xarray.plot.accessor import DatasetPlotAccessor from xarray.util.deprecation_helpers import _deprecate_positional_args @@ -133,7 +133,6 @@ from xarray.core.dataarray import DataArray from xarray.core.groupby import DatasetGroupBy from xarray.core.merge import CoercibleMapping, CoercibleValue, _MergeResult - from xarray.core.parallelcompat import ChunkManagerEntrypoint from xarray.core.resample import DatasetResample from xarray.core.rolling import DatasetCoarsen, DatasetRolling from xarray.core.types import ( @@ -159,6 +158,7 @@ T_Xarray, ) from xarray.core.weighted import DatasetWeighted + from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint try: from dask.delayed import Delayed diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 51b6ff5f59b..2f9b18aba54 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -34,9 +34,9 @@ from numpy.lib.stride_tricks import sliding_window_view # noqa from xarray.core import dask_array_ops, dtypes, nputils -from xarray.core.parallelcompat import get_chunked_array_type, is_chunked_array from xarray.core.pycompat import array_type, is_duck_dask_array from xarray.core.utils import is_duck_array, module_available +from xarray.namedarray.parallelcompat import get_chunked_array_type, is_chunked_array dask_available = module_available("dask") diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 6e6ce01a41f..349d105e9b9 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -17,7 +17,6 @@ from xarray.core import duck_array_ops from xarray.core.nputils import NumpyVIndexAdapter from xarray.core.options import OPTIONS -from xarray.core.parallelcompat import get_chunked_array_type, is_chunked_array from xarray.core.pycompat import ( array_type, integer_types, @@ -32,6 +31,7 @@ is_scalar, to_0d_array, ) +from xarray.namedarray.parallelcompat import get_chunked_array_type, is_chunked_array if TYPE_CHECKING: from numpy.typing import DTypeLike diff --git a/xarray/core/missing.py b/xarray/core/missing.py index e77ec34b307..01409859875 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -15,10 +15,10 @@ from xarray.core.computation import apply_ufunc from xarray.core.duck_array_ops import datetime_to_numeric, push, timedelta_to_numeric from xarray.core.options import OPTIONS, _get_keep_attrs -from xarray.core.parallelcompat import get_chunked_array_type, is_chunked_array from xarray.core.types import Interp1dOptions, InterpOptions from xarray.core.utils import OrderedSet, is_scalar from xarray.core.variable import Variable, broadcast_variables +from xarray.namedarray.parallelcompat import get_chunked_array_type, is_chunked_array if TYPE_CHECKING: from xarray.core.dataarray import DataArray diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 4207d31913d..240288bd707 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -26,7 +26,6 @@ as_indexable, ) from xarray.core.options import OPTIONS, _get_keep_attrs -from xarray.core.parallelcompat import get_chunked_array_type, guess_chunkmanager from xarray.core.pycompat import ( array_type, integer_types, @@ -46,6 +45,7 @@ maybe_coerce_to_str, ) from xarray.namedarray.core import NamedArray +from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( indexing.ExplicitlyIndexed, @@ -55,7 +55,6 @@ BASIC_INDEXING_TYPES = integer_types + (slice,) if TYPE_CHECKING: - from xarray.core.parallelcompat import ChunkManagerEntrypoint from xarray.core.types import ( Dims, ErrorOptionsWithWarn, @@ -65,6 +64,7 @@ Self, T_DuckArray, ) + from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint NON_NANOSECOND_WARNING = ( "Converting non-nanosecond precision {case} values to nanosecond precision. " diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py index 56d8dc9e23a..e8a01a4f4ca 100644 --- a/xarray/namedarray/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -8,8 +8,8 @@ from xarray.core.duck_array_ops import dask_available from xarray.core.indexing import ImplicitToExplicitIndexingAdapter -from xarray.core.parallelcompat import ChunkManagerEntrypoint, T_ChunkedArray from xarray.core.pycompat import is_duck_dask_array +from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint, T_ChunkedArray if TYPE_CHECKING: from xarray.core.types import DaskArray, T_Chunks, T_NormalizedChunks diff --git a/xarray/namedarray/parallelcompat.py b/xarray/namedarray/parallelcompat.py index 333059e00ae..78b966df10d 100644 --- a/xarray/namedarray/parallelcompat.py +++ b/xarray/namedarray/parallelcompat.py @@ -20,7 +20,7 @@ import numpy as np -from xarray.core.pycompat import is_chunked_array +from xarray.namedarray.pycompat import is_chunked_array T_ChunkedArray = TypeVar("T_ChunkedArray") From bb89c7e9dbf374958d0c7b1f3332dabc2a4e0d34 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 16 Oct 2023 12:26:29 -0400 Subject: [PATCH 03/54] fix imports --- xarray/core/utils.py | 88 ------------------- xarray/core/variable.py | 156 +-------------------------------- xarray/namedarray/core.py | 175 +++++++++++++++++++++++++++++++++++-- xarray/namedarray/utils.py | 92 ++++++++++++++++++- 4 files changed, 259 insertions(+), 252 deletions(-) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index ad86b2c7fec..b9111099233 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -65,7 +65,6 @@ Generic, Literal, TypeVar, - cast, overload, ) @@ -240,11 +239,6 @@ def remove_incompatible_items( del first_dict[k] -# It's probably OK to give this as a TypeGuard; though it's not perfectly robust. -def is_dict_like(value: Any) -> TypeGuard[Mapping]: - return hasattr(value, "keys") and hasattr(value, "__getitem__") - - def is_full_slice(value: Any) -> bool: return isinstance(value, slice) and value == slice(None) @@ -267,25 +261,6 @@ def is_duck_array(value: Any) -> TypeGuard[T_DuckArray]: ) -def either_dict_or_kwargs( - pos_kwargs: Mapping[Any, T] | None, - kw_kwargs: Mapping[str, T], - func_name: str, -) -> Mapping[Hashable, T]: - if pos_kwargs is None or pos_kwargs == {}: - # Need an explicit cast to appease mypy due to invariance; see - # https://github.com/python/mypy/issues/6228 - return cast(Mapping[Hashable, T], kw_kwargs) - - if not is_dict_like(pos_kwargs): - raise ValueError(f"the first argument to .{func_name} must be a dictionary") - if kw_kwargs: - raise ValueError( - f"cannot specify both keyword and positional arguments to .{func_name}" - ) - return pos_kwargs - - def _is_scalar(value, include_0d): from xarray.core.variable import NON_NUMPY_SUPPORTED_ARRAY_TYPES @@ -1201,66 +1176,3 @@ def emit_user_level_warning(message, category=None): """Emit a warning at the user level by inspecting the stack trace.""" stacklevel = find_stack_level() warnings.warn(message, category=category, stacklevel=stacklevel) - - -def consolidate_dask_from_array_kwargs( - from_array_kwargs: dict, - name: str | None = None, - lock: bool | None = None, - inline_array: bool | None = None, -) -> dict: - """ - Merge dask-specific kwargs with arbitrary from_array_kwargs dict. - - Temporary function, to be deleted once explicitly passing dask-specific kwargs to .chunk() is deprecated. - """ - - from_array_kwargs = _resolve_doubly_passed_kwarg( - from_array_kwargs, - kwarg_name="name", - passed_kwarg_value=name, - default=None, - err_msg_dict_name="from_array_kwargs", - ) - from_array_kwargs = _resolve_doubly_passed_kwarg( - from_array_kwargs, - kwarg_name="lock", - passed_kwarg_value=lock, - default=False, - err_msg_dict_name="from_array_kwargs", - ) - from_array_kwargs = _resolve_doubly_passed_kwarg( - from_array_kwargs, - kwarg_name="inline_array", - passed_kwarg_value=inline_array, - default=False, - err_msg_dict_name="from_array_kwargs", - ) - - return from_array_kwargs - - -def _resolve_doubly_passed_kwarg( - kwargs_dict: dict, - kwarg_name: str, - passed_kwarg_value: str | bool | None, - default: bool | None, - err_msg_dict_name: str, -) -> dict: - # if in kwargs_dict but not passed explicitly then just pass kwargs_dict through unaltered - if kwarg_name in kwargs_dict and passed_kwarg_value is None: - pass - # if passed explicitly but not in kwargs_dict then use that - elif kwarg_name not in kwargs_dict and passed_kwarg_value is not None: - kwargs_dict[kwarg_name] = passed_kwarg_value - # if in neither then use default - elif kwarg_name not in kwargs_dict and passed_kwarg_value is None: - kwargs_dict[kwarg_name] = default - # if in both then raise - else: - raise ValueError( - f"argument {kwarg_name} cannot be passed both as a keyword argument and within " - f"the {err_msg_dict_name} dictionary" - ) - - return kwargs_dict diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 240288bd707..934ed9273de 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -8,7 +8,7 @@ from collections.abc import Hashable, Iterable, Mapping, Sequence from datetime import timedelta from functools import partial -from typing import TYPE_CHECKING, Any, Callable, Literal, NoReturn, cast +from typing import TYPE_CHECKING, Any, Callable, NoReturn, cast import numpy as np import pandas as pd @@ -27,7 +27,6 @@ ) from xarray.core.options import OPTIONS, _get_keep_attrs from xarray.core.pycompat import ( - array_type, integer_types, is_0d_dask_array, is_chunked_array, @@ -45,7 +44,7 @@ maybe_coerce_to_str, ) from xarray.namedarray.core import NamedArray -from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager +from xarray.namedarray.parallelcompat import get_chunked_array_type NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( indexing.ExplicitlyIndexed, @@ -64,7 +63,6 @@ Self, T_DuckArray, ) - from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint NON_NANOSECOND_WARNING = ( "Converting non-nanosecond precision {case} values to nanosecond precision. " @@ -943,156 +941,6 @@ def _replace( encoding = copy.copy(self._encoding) return type(self)(dims, data, attrs, encoding, fastpath=True) - def chunk( - self, - chunks: ( - int - | Literal["auto"] - | tuple[int, ...] - | tuple[tuple[int, ...], ...] - | Mapping[Any, None | int | tuple[int, ...]] - ) = {}, - name: str | None = None, - lock: bool | None = None, - inline_array: bool | None = None, - chunked_array_type: str | ChunkManagerEntrypoint | None = None, - from_array_kwargs=None, - **chunks_kwargs: Any, - ) -> Self: - """Coerce this array's data into a dask array with the given chunks. - - If this variable is a non-dask array, it will be converted to dask - array. If it's a dask array, it will be rechunked to the given chunk - sizes. - - If neither chunks is not provided for one or more dimensions, chunk - sizes along that dimension will not be updated; non-dask arrays will be - converted into dask arrays with a single block. - - Parameters - ---------- - chunks : int, tuple or dict, optional - Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or - ``{'x': 5, 'y': 5}``. - name : str, optional - Used to generate the name for this array in the internal dask - graph. Does not need not be unique. - lock : bool, default: False - Passed on to :py:func:`dask.array.from_array`, if the array is not - already as dask array. - inline_array : bool, default: False - Passed on to :py:func:`dask.array.from_array`, if the array is not - already as dask array. - chunked_array_type: str, optional - Which chunked array type to coerce this datasets' arrays to. - Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEntrypoint` system. - Experimental API that should not be relied upon. - from_array_kwargs: dict, optional - Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create - chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg. - For example, with dask as the default chunked array type, this method would pass additional kwargs - to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon. - **chunks_kwargs : {dim: chunks, ...}, optional - The keyword arguments form of ``chunks``. - One of chunks or chunks_kwargs must be provided. - - Returns - ------- - chunked : xarray.Variable - - See Also - -------- - Variable.chunks - Variable.chunksizes - xarray.unify_chunks - dask.array.from_array - """ - - if chunks is None: - warnings.warn( - "None value for 'chunks' is deprecated. " - "It will raise an error in the future. Use instead '{}'", - category=FutureWarning, - ) - chunks = {} - - if isinstance(chunks, (float, str, int, tuple, list)): - # TODO we shouldn't assume here that other chunkmanagers can handle these types - # TODO should we call normalize_chunks here? - pass # dask.array.from_array can handle these directly - else: - chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") - - if utils.is_dict_like(chunks): - chunks = {self.get_axis_num(dim): chunk for dim, chunk in chunks.items()} - - chunkmanager = guess_chunkmanager(chunked_array_type) - - if from_array_kwargs is None: - from_array_kwargs = {} - - # TODO deprecate passing these dask-specific arguments explicitly. In future just pass everything via from_array_kwargs - _from_array_kwargs = utils.consolidate_dask_from_array_kwargs( - from_array_kwargs, - name=name, - lock=lock, - inline_array=inline_array, - ) - - data_old = self._data - if chunkmanager.is_chunked_array(data_old): - data_chunked = chunkmanager.rechunk(data_old, chunks) - else: - if not isinstance(data_old, indexing.ExplicitlyIndexed): - ndata = data_old - else: - # Unambiguously handle array storage backends (like NetCDF4 and h5py) - # that can't handle general array indexing. For example, in netCDF4 you - # can do "outer" indexing along two dimensions independent, which works - # differently from how NumPy handles it. - # da.from_array works by using lazy indexing with a tuple of slices. - # Using OuterIndexer is a pragmatic choice: dask does not yet handle - # different indexing types in an explicit way: - # https://github.com/dask/dask/issues/2883 - ndata = indexing.ImplicitToExplicitIndexingAdapter( - data_old, indexing.OuterIndexer - ) - - if utils.is_dict_like(chunks): - chunks = tuple(chunks.get(n, s) for n, s in enumerate(ndata.shape)) - - data_chunked = chunkmanager.from_array( - ndata, - chunks, - **_from_array_kwargs, - ) - - return self._replace(data=data_chunked) - - def to_numpy(self) -> np.ndarray: - """Coerces wrapped data to numpy and returns a numpy.ndarray""" - # TODO an entrypoint so array libraries can choose coercion method? - data = self.data - - # TODO first attempt to call .to_numpy() once some libraries implement it - if hasattr(data, "chunks"): - chunkmanager = get_chunked_array_type(data) - data, *_ = chunkmanager.compute(data) - if isinstance(data, array_type("cupy")): - data = data.get() - # pint has to be imported dynamically as pint imports xarray - if isinstance(data, array_type("pint")): - data = data.magnitude - if isinstance(data, array_type("sparse")): - data = data.todense() - data = np.asarray(data) - - return data - - def as_numpy(self) -> Self: - """Coerces wrapped data into a numpy array, returning a Variable.""" - return self._replace(data=self.to_numpy()) - def isel( self, indexers: Mapping[Any, Any] | None = None, diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 92cc742e131..4e99370748c 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -2,26 +2,37 @@ import copy import math +import warnings from collections.abc import Hashable, Iterable, Mapping, Sequence -from typing import TYPE_CHECKING, Any, Callable, Generic, Union, cast +from typing import TYPE_CHECKING, Any, Callable, Generic, Literal, Union, cast import numpy as np # TODO: get rid of this after migrating this class to array API from xarray.core import dtypes -from xarray.core.indexing import ExplicitlyIndexed +from xarray.core.indexing import ( + ExplicitlyIndexed, + ImplicitToExplicitIndexingAdapter, + OuterIndexer, +) +from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager +from xarray.namedarray.pycompat import array_type from xarray.namedarray.utils import ( Default, T_DuckArray, _default, astype, + consolidate_dask_from_array_kwargs, + either_dict_or_kwargs, is_chunked_duck_array, + is_dict_like, is_duck_array, is_duck_dask_array, to_0d_object_array, ) if TYPE_CHECKING: + from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint from xarray.namedarray.utils import Self # type: ignore[attr-defined] try: @@ -446,12 +457,153 @@ def copy( """ return self._copy(deep=deep, data=data) - def _nonzero(self) -> tuple[Self, ...]: - """Equivalent numpy's nonzero but returns a tuple of NamedArrays.""" - # TODO we should replace dask's native nonzero - # after https://github.com/dask/dask/issues/1076 is implemented. - nonzeros = np.nonzero(self.data) - return tuple(type(self)((dim,), nz) for nz, dim in zip(nonzeros, self.dims)) + def chunk( + self, + chunks: ( + int + | Literal["auto"] + | tuple[int, ...] + | tuple[tuple[int, ...], ...] + | Mapping[Any, None | int | tuple[int, ...]] + ) = {}, + name: str | None = None, + lock: bool | None = None, + inline_array: bool | None = None, + chunked_array_type: str | ChunkManagerEntrypoint | None = None, + from_array_kwargs=None, + **chunks_kwargs: Any, + ) -> Self: + """Coerce this array's data into a dask array with the given chunks. + + If this variable is a non-dask array, it will be converted to dask + array. If it's a dask array, it will be rechunked to the given chunk + sizes. + + If neither chunks is not provided for one or more dimensions, chunk + sizes along that dimension will not be updated; non-dask arrays will be + converted into dask arrays with a single block. + + Parameters + ---------- + chunks : int, tuple or dict, optional + Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or + ``{'x': 5, 'y': 5}``. + name : str, optional + Used to generate the name for this array in the internal dask + graph. Does not need not be unique. + lock : bool, default: False + Passed on to :py:func:`dask.array.from_array`, if the array is not + already as dask array. + inline_array : bool, default: False + Passed on to :py:func:`dask.array.from_array`, if the array is not + already as dask array. + chunked_array_type: str, optional + Which chunked array type to coerce this datasets' arrays to. + Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEntrypoint` system. + Experimental API that should not be relied upon. + from_array_kwargs: dict, optional + Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create + chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg. + For example, with dask as the default chunked array type, this method would pass additional kwargs + to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon. + **chunks_kwargs : {dim: chunks, ...}, optional + The keyword arguments form of ``chunks``. + One of chunks or chunks_kwargs must be provided. + + Returns + ------- + chunked : xarray.Variable + + See Also + -------- + Variable.chunks + Variable.chunksizes + xarray.unify_chunks + dask.array.from_array + """ + + if chunks is None: + warnings.warn( + "None value for 'chunks' is deprecated. " + "It will raise an error in the future. Use instead '{}'", + category=FutureWarning, + ) + chunks = {} + + if isinstance(chunks, (float, str, int, tuple, list)): + # TODO we shouldn't assume here that other chunkmanagers can handle these types + # TODO should we call normalize_chunks here? + pass # dask.array.from_array can handle these directly + else: + chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") + + if is_dict_like(chunks): + chunks = {self.get_axis_num(dim): chunk for dim, chunk in chunks.items()} + + chunkmanager = guess_chunkmanager(chunked_array_type) + + if from_array_kwargs is None: + from_array_kwargs = {} + + # TODO deprecate passing these dask-specific arguments explicitly. In future just pass everything via from_array_kwargs + _from_array_kwargs = consolidate_dask_from_array_kwargs( + from_array_kwargs, + name=name, + lock=lock, + inline_array=inline_array, + ) + + data_old = self._data + if chunkmanager.is_chunked_array(data_old): + data_chunked = chunkmanager.rechunk(data_old, chunks) + else: + if not isinstance(data_old, ExplicitlyIndexed): + ndata = data_old + else: + # Unambiguously handle array storage backends (like NetCDF4 and h5py) + # that can't handle general array indexing. For example, in netCDF4 you + # can do "outer" indexing along two dimensions independent, which works + # differently from how NumPy handles it. + # da.from_array works by using lazy indexing with a tuple of slices. + # Using OuterIndexer is a pragmatic choice: dask does not yet handle + # different indexing types in an explicit way: + # https://github.com/dask/dask/issues/2883 + ndata = ImplicitToExplicitIndexingAdapter(data_old, OuterIndexer) + + if is_dict_like(chunks): + chunks = tuple(chunks.get(n, s) for n, s in enumerate(ndata.shape)) + + data_chunked = chunkmanager.from_array( + ndata, + chunks, + **_from_array_kwargs, + ) + + return self._replace(data=data_chunked) + + def to_numpy(self) -> np.ndarray: + """Coerces wrapped data to numpy and returns a numpy.ndarray""" + # TODO an entrypoint so array libraries can choose coercion method? + data = self.data + + # TODO first attempt to call .to_numpy() once some libraries implement it + if hasattr(data, "chunks"): + chunkmanager = get_chunked_array_type(data) + data, *_ = chunkmanager.compute(data) + if isinstance(data, array_type("cupy")): + data = data.get() + # pint has to be imported dynamically as pint imports xarray + if isinstance(data, array_type("pint")): + data = data.magnitude + if isinstance(data, array_type("sparse")): + data = data.todense() + data = np.asarray(data) + + return data + + def as_numpy(self) -> Self: + """Coerces wrapped data into a numpy array, returning a Variable.""" + return self._replace(data=self.to_numpy()) def _as_sparse( self, @@ -486,3 +638,10 @@ def _to_dense(self) -> Self: if hasattr(self._data, "todense"): return self._replace(data=self._data.todense()) return self.copy(deep=False) + + def _nonzero(self) -> tuple[Self, ...]: + """Equivalent to numpy's nonzero but returns a tuple of NamedArrays.""" + # TODO we should replace dask's native nonzero + # after https://github.com/dask/dask/issues/1076 is implemented. + nonzeros = np.nonzero(self.data) + return tuple(type(self)((dim,), nz) for nz, dim in zip(nonzeros, self.dims)) diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index 8c598a9a3b3..d4bd0d9bc8d 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -2,10 +2,10 @@ import importlib import sys -from collections.abc import Hashable +from collections.abc import Hashable, Mapping from enum import Enum from types import ModuleType -from typing import TYPE_CHECKING, Any, Final, Protocol, TypeVar +from typing import TYPE_CHECKING, Any, Final, Protocol, TypeVar, cast import numpy as np @@ -27,6 +27,7 @@ DaskArray = np.ndarray # type: ignore DaskCollection: Any = np.ndarray # type: ignore +T = TypeVar("T") # https://stackoverflow.com/questions/74633074/how-to-type-hint-a-generic-numpy-array T_DType_co = TypeVar("T_DType_co", bound=np.dtype[np.generic], covariant=True) @@ -178,3 +179,90 @@ def imag(x: _Array[Any], /) -> _Array[Any]: def real(x: _Array[Any], /) -> _Array[Any]: xp = get_array_namespace(x) return xp.real(x) # type: ignore[no-any-return] + + +# It's probably OK to give this as a TypeGuard; though it's not perfectly robust. +def is_dict_like(value: Any) -> TypeGuard[Mapping]: + return hasattr(value, "keys") and hasattr(value, "__getitem__") + + +def either_dict_or_kwargs( + pos_kwargs: Mapping[Any, T] | None, + kw_kwargs: Mapping[str, T], + func_name: str, +) -> Mapping[Hashable, T]: + if pos_kwargs is None or pos_kwargs == {}: + # Need an explicit cast to appease mypy due to invariance; see + # https://github.com/python/mypy/issues/6228 + return cast(Mapping[Hashable, T], kw_kwargs) + + if not is_dict_like(pos_kwargs): + raise ValueError(f"the first argument to .{func_name} must be a dictionary") + if kw_kwargs: + raise ValueError( + f"cannot specify both keyword and positional arguments to .{func_name}" + ) + return pos_kwargs + + +def consolidate_dask_from_array_kwargs( + from_array_kwargs: dict, + name: str | None = None, + lock: bool | None = None, + inline_array: bool | None = None, +) -> dict: + """ + Merge dask-specific kwargs with arbitrary from_array_kwargs dict. + + Temporary function, to be deleted once explicitly passing dask-specific kwargs to .chunk() is deprecated. + """ + + from_array_kwargs = _resolve_doubly_passed_kwarg( + from_array_kwargs, + kwarg_name="name", + passed_kwarg_value=name, + default=None, + err_msg_dict_name="from_array_kwargs", + ) + from_array_kwargs = _resolve_doubly_passed_kwarg( + from_array_kwargs, + kwarg_name="lock", + passed_kwarg_value=lock, + default=False, + err_msg_dict_name="from_array_kwargs", + ) + from_array_kwargs = _resolve_doubly_passed_kwarg( + from_array_kwargs, + kwarg_name="inline_array", + passed_kwarg_value=inline_array, + default=False, + err_msg_dict_name="from_array_kwargs", + ) + + return from_array_kwargs + + +def _resolve_doubly_passed_kwarg( + kwargs_dict: dict, + kwarg_name: str, + passed_kwarg_value: str | bool | None, + default: bool | None, + err_msg_dict_name: str, +) -> dict: + # if in kwargs_dict but not passed explicitly then just pass kwargs_dict through unaltered + if kwarg_name in kwargs_dict and passed_kwarg_value is None: + pass + # if passed explicitly but not in kwargs_dict then use that + elif kwarg_name not in kwargs_dict and passed_kwarg_value is not None: + kwargs_dict[kwarg_name] = passed_kwarg_value + # if in neither then use default + elif kwarg_name not in kwargs_dict and passed_kwarg_value is None: + kwargs_dict[kwarg_name] = default + # if in both then raise + else: + raise ValueError( + f"argument {kwarg_name} cannot be passed both as a keyword argument and within " + f"the {err_msg_dict_name} dictionary" + ) + + return kwargs_dict From f3af5b5552c7e0dd34e4e3cca27b8ecd71f8f235 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 16 Oct 2023 17:46:04 -0400 Subject: [PATCH 04/54] move compute/load --- xarray/core/variable.py | 50 --------------------------------------- xarray/namedarray/core.py | 50 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 49 insertions(+), 51 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 934ed9273de..0ac45a9e937 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -29,7 +29,6 @@ from xarray.core.pycompat import ( integer_types, is_0d_dask_array, - is_chunked_array, is_duck_dask_array, ) from xarray.core.utils import ( @@ -44,7 +43,6 @@ maybe_coerce_to_str, ) from xarray.namedarray.core import NamedArray -from xarray.namedarray.parallelcompat import get_chunked_array_type NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( indexing.ExplicitlyIndexed, @@ -476,54 +474,6 @@ def astype( dask="allowed", ) - def load(self, **kwargs): - """Manually trigger loading of this variable's data from disk or a - remote source into memory and return this variable. - - Normally, it should not be necessary to call this method in user code, - because all xarray functions should either work on deferred data or - load data automatically. - - Parameters - ---------- - **kwargs : dict - Additional keyword arguments passed on to ``dask.array.compute``. - - See Also - -------- - dask.array.compute - """ - if is_chunked_array(self._data): - chunkmanager = get_chunked_array_type(self._data) - loaded_data, *_ = chunkmanager.compute(self._data, **kwargs) - self._data = as_compatible_data(loaded_data) - elif isinstance(self._data, indexing.ExplicitlyIndexed): - self._data = self._data.get_duck_array() - elif not is_duck_array(self._data): - self._data = np.asarray(self._data) - return self - - def compute(self, **kwargs): - """Manually trigger loading of this variable's data from disk or a - remote source into memory and return a new variable. The original is - left unaltered. - - Normally, it should not be necessary to call this method in user code, - because all xarray functions should either work on deferred data or - load data automatically. - - Parameters - ---------- - **kwargs : dict - Additional keyword arguments passed on to ``dask.array.compute``. - - See Also - -------- - dask.array.compute - """ - new = self.copy(deep=False) - return new.load(**kwargs) - def _dask_finalize(self, results, array_func, *args, **kwargs): data = array_func(results, *args, **kwargs) return Variable(self._dims, data, attrs=self._attrs, encoding=self._encoding) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 4e99370748c..9bffb0be18a 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -16,7 +16,7 @@ OuterIndexer, ) from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager -from xarray.namedarray.pycompat import array_type +from xarray.namedarray.pycompat import array_type, is_chunked_array from xarray.namedarray.utils import ( Default, T_DuckArray, @@ -338,6 +338,54 @@ def _dask_finalize( data = array_func(results, *args, **kwargs) return type(self)(self._dims, data, attrs=self._attrs) + def load(self, **kwargs): + """Manually trigger loading of this variable's data from disk or a + remote source into memory and return this variable. + + Normally, it should not be necessary to call this method in user code, + because all xarray functions should either work on deferred data or + load data automatically. + + Parameters + ---------- + **kwargs : dict + Additional keyword arguments passed on to ``dask.array.compute``. + + See Also + -------- + dask.array.compute + """ + if is_chunked_array(self._data): + chunkmanager = get_chunked_array_type(self._data) + loaded_data, *_ = chunkmanager.compute(self._data, **kwargs) + self._data = as_compatible_data(loaded_data) + elif isinstance(self._data, ExplicitlyIndexed): + self._data = self._data.get_duck_array() + elif not is_duck_array(self._data): + self._data = np.asarray(self._data) + return self + + def compute(self, **kwargs): + """Manually trigger loading of this variable's data from disk or a + remote source into memory and return a new variable. The original is + left unaltered. + + Normally, it should not be necessary to call this method in user code, + because all xarray functions should either work on deferred data or + load data automatically. + + Parameters + ---------- + **kwargs : dict + Additional keyword arguments passed on to ``dask.array.compute``. + + See Also + -------- + dask.array.compute + """ + new = self.copy(deep=False) + return new.load(**kwargs) + @property def chunks(self) -> tuple[tuple[int, ...], ...] | None: """ From 4b4afab19f1cd9d9730e35fc089bf44f8efff320 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 16 Oct 2023 18:10:49 -0400 Subject: [PATCH 05/54] redirect utils imports --- xarray/backends/api.py | 2 +- xarray/backends/common.py | 2 +- xarray/backends/pydap_.py | 4 ++-- xarray/backends/zarr.py | 2 +- xarray/coding/times.py | 2 +- xarray/coding/variables.py | 2 +- xarray/conventions.py | 2 +- xarray/convert.py | 2 +- xarray/core/accessor_dt.py | 2 +- xarray/core/alignment.py | 3 ++- xarray/core/arithmetic.py | 2 +- xarray/core/common.py | 4 ++-- xarray/core/computation.py | 5 +++-- xarray/core/coordinates.py | 2 +- xarray/core/dataarray.py | 2 +- xarray/core/dataset.py | 17 ++++++++--------- xarray/core/duck_array_ops.py | 5 +++-- xarray/core/formatting.py | 4 ++-- xarray/core/groupby.py | 4 ++-- xarray/core/indexes.py | 2 +- xarray/core/indexing.py | 14 +++++++------- xarray/core/nputils.py | 2 +- xarray/core/parallel.py | 2 +- xarray/core/rolling.py | 4 ++-- xarray/core/utils.py | 2 +- xarray/core/variable.py | 13 ++++++------- xarray/core/weighted.py | 2 +- xarray/namedarray/daskmanager.py | 2 +- xarray/namedarray/pycompat.py | 3 ++- xarray/plot/utils.py | 2 +- xarray/tests/test_backends.py | 2 +- xarray/tests/test_dataset.py | 2 +- xarray/tests/test_duck_array_ops.py | 2 +- xarray/tests/test_missing.py | 2 +- xarray/tests/test_parallelcompat.py | 6 +++--- xarray/tests/test_sparse.py | 2 +- xarray/tests/test_utils.py | 3 ++- xarray/tests/test_variable.py | 2 +- 38 files changed, 70 insertions(+), 67 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index ee7e0713118..0529a7f1f49 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -33,11 +33,11 @@ _nested_combine, combine_by_coords, ) -from xarray.core.daskmanager import DaskManager from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset, _get_chunk, _maybe_chunk from xarray.core.indexes import Index from xarray.core.utils import is_remote_uri +from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import guess_chunkmanager if TYPE_CHECKING: diff --git a/xarray/backends/common.py b/xarray/backends/common.py index e4dfc2bf478..ca393937886 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -12,9 +12,9 @@ from xarray.conventions import cf_encoder from xarray.core import indexing -from xarray.core.pycompat import is_chunked_array from xarray.core.utils import FrozenDict, NdimSizeLenMixin, is_remote_uri from xarray.namedarray.parallelcompat import get_chunked_array_type +from xarray.namedarray.pycompat import is_chunked_array if TYPE_CHECKING: from io import BufferedIOBase diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 9b5bcc82e6f..f14a32a5dde 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -14,15 +14,15 @@ ) from xarray.backends.store import StoreBackendEntrypoint from xarray.core import indexing -from xarray.core.pycompat import integer_types from xarray.core.utils import ( Frozen, FrozenDict, close_on_error, - is_dict_like, is_remote_uri, ) from xarray.core.variable import Variable +from xarray.namedarray.pycompat import integer_types +from xarray.namedarray.utils import is_dict_like if TYPE_CHECKING: import os diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index aa38eba1455..fea4459bac3 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -19,7 +19,6 @@ ) from xarray.backends.store import StoreBackendEntrypoint from xarray.core import indexing -from xarray.core.pycompat import integer_types from xarray.core.utils import ( FrozenDict, HiddenKeyDict, @@ -27,6 +26,7 @@ ) from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import guess_chunkmanager +from xarray.namedarray.pycompat import integer_types if TYPE_CHECKING: from io import BufferedIOBase diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 039fe371100..b312e797133 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -24,9 +24,9 @@ from xarray.core.common import contains_cftime_datetimes, is_np_datetime_like from xarray.core.formatting import first_n_items, format_timestamp, last_item from xarray.core.pdcompat import nanosecond_precision_timestamp -from xarray.core.pycompat import is_duck_dask_array from xarray.core.utils import emit_user_level_warning from xarray.core.variable import Variable +from xarray.namedarray.pycompat import is_duck_dask_array try: import cftime diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 028216a5c39..d0fc808c324 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -10,9 +10,9 @@ import pandas as pd from xarray.core import dtypes, duck_array_ops, indexing -from xarray.core.pycompat import is_chunked_array from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import get_chunked_array_type +from xarray.namedarray.pycompat import is_chunked_array if TYPE_CHECKING: T_VarTuple = tuple[tuple[Hashable, ...], Any, dict, dict] diff --git a/xarray/conventions.py b/xarray/conventions.py index cf207f0c37a..1b6b0107953 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -14,9 +14,9 @@ _contains_datetime_like_objects, contains_cftime_datetimes, ) -from xarray.core.pycompat import is_duck_dask_array from xarray.core.utils import emit_user_level_warning from xarray.core.variable import IndexVariable, Variable +from xarray.namedarray.pycompat import is_duck_dask_array CF_RELATED_DATA = ( "bounds", diff --git a/xarray/convert.py b/xarray/convert.py index 5863352ae41..53bd88484ae 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -10,7 +10,7 @@ from xarray.core import duck_array_ops from xarray.core.dataarray import DataArray from xarray.core.dtypes import get_fill_value -from xarray.core.pycompat import array_type +from xarray.namedarray.pycompat import array_type cdms2_ignored_attrs = {"name", "tileIndex"} iris_forbidden_keys = { diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 8255e2a5232..9de6f371ef4 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -13,9 +13,9 @@ is_np_datetime_like, is_np_timedelta_like, ) -from xarray.core.pycompat import is_duck_dask_array from xarray.core.types import T_DataArray from xarray.core.variable import IndexVariable +from xarray.namedarray.pycompat import is_duck_dask_array if TYPE_CHECKING: from numpy.typing import DTypeLike diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 732ec5d3ea6..62820c7f601 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -20,8 +20,9 @@ safe_cast_to_index, ) from xarray.core.types import T_Alignable -from xarray.core.utils import is_dict_like, is_full_slice +from xarray.core.utils import is_full_slice from xarray.core.variable import Variable, as_compatible_data, calculate_dimensions +from xarray.namedarray.utils import is_dict_like if TYPE_CHECKING: from xarray.core.dataarray import DataArray diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py index 5cdbc732741..46263b7b5a3 100644 --- a/xarray/core/arithmetic.py +++ b/xarray/core/arithmetic.py @@ -20,7 +20,7 @@ IncludeReduceMethods, ) from xarray.core.options import OPTIONS, _get_keep_attrs -from xarray.core.pycompat import is_duck_array +from xarray.namedarray.pycompat import is_duck_array class SupportsArithmetic: diff --git a/xarray/core/common.py b/xarray/core/common.py index a32d5a79e98..c7f82764f29 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -13,14 +13,14 @@ from xarray.core import dtypes, duck_array_ops, formatting, formatting_html, ops from xarray.core.indexing import BasicIndexer, ExplicitlyIndexed from xarray.core.options import OPTIONS, _get_keep_attrs -from xarray.core.pycompat import is_chunked_array from xarray.core.utils import ( Frozen, - either_dict_or_kwargs, emit_user_level_warning, is_scalar, ) from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager +from xarray.namedarray.pycompat import is_chunked_array +from xarray.namedarray.utils import either_dict_or_kwargs try: import cftime diff --git a/xarray/core/computation.py b/xarray/core/computation.py index e59995f0943..99b0f951919 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -21,11 +21,12 @@ from xarray.core.indexes import Index, filter_indexes_from_coords from xarray.core.merge import merge_attrs, merge_coordinates_without_align from xarray.core.options import OPTIONS, _get_keep_attrs -from xarray.core.pycompat import is_chunked_array, is_duck_dask_array from xarray.core.types import Dims, T_DataArray -from xarray.core.utils import is_dict_like, is_scalar +from xarray.core.utils import is_scalar from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import get_chunked_array_type +from xarray.namedarray.pycompat import is_chunked_array, is_duck_dask_array +from xarray.namedarray.utils import is_dict_like if TYPE_CHECKING: from xarray.core.coordinates import Coordinates diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 0c85b2a2d69..8e950698ffd 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -27,10 +27,10 @@ from xarray.core.utils import ( Frozen, ReprObject, - either_dict_or_kwargs, emit_user_level_warning, ) from xarray.core.variable import Variable, as_variable, calculate_dimensions +from xarray.namedarray.utils import either_dict_or_kwargs if TYPE_CHECKING: from xarray.core.common import DataWithCoords diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index cec08628484..e076b238797 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -55,7 +55,6 @@ HybridMappingProxy, ReprObject, _default, - either_dict_or_kwargs, emit_user_level_warning, ) from xarray.core.variable import ( @@ -64,6 +63,7 @@ as_compatible_data, as_variable, ) +from xarray.namedarray.utils import either_dict_or_kwargs from xarray.plot.accessor import DataArrayPlotAccessor from xarray.plot.utils import _get_units_from_attrs from xarray.util.deprecation_helpers import _deprecate_positional_args diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d4b57a75652..a2110a35230 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -63,7 +63,6 @@ assert_coordinate_consistent, create_coords_with_default_indexes, ) -from xarray.core.daskmanager import DaskManager from xarray.core.duck_array_ops import datetime_to_numeric from xarray.core.indexes import ( Index, @@ -86,12 +85,6 @@ ) from xarray.core.missing import get_clean_interp_index from xarray.core.options import OPTIONS, _get_keep_attrs -from xarray.core.pycompat import ( - array_type, - is_chunked_array, - is_duck_array, - is_duck_dask_array, -) from xarray.core.types import ( QuantileMethods, Self, @@ -108,9 +101,7 @@ _default, decode_numpy_dict_values, drop_dims_from_indexers, - either_dict_or_kwargs, infix_dims, - is_dict_like, is_scalar, maybe_wrap_array, ) @@ -121,7 +112,15 @@ broadcast_variables, calculate_dimensions, ) +from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager +from xarray.namedarray.pycompat import ( + array_type, + is_chunked_array, + is_duck_array, + is_duck_dask_array, +) +from xarray.namedarray.utils import either_dict_or_kwargs, is_dict_like from xarray.plot.accessor import DatasetPlotAccessor from xarray.util.deprecation_helpers import _deprecate_positional_args diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 2f9b18aba54..4cedfce8a6a 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -34,9 +34,10 @@ from numpy.lib.stride_tricks import sliding_window_view # noqa from xarray.core import dask_array_ops, dtypes, nputils -from xarray.core.pycompat import array_type, is_duck_dask_array -from xarray.core.utils import is_duck_array, module_available +from xarray.core.utils import module_available from xarray.namedarray.parallelcompat import get_chunked_array_type, is_chunked_array +from xarray.namedarray.pycompat import array_type, is_duck_dask_array +from xarray.namedarray.utils import is_duck_array dask_available = module_available("dask") diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 942bf5891ca..bc7f97976bd 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -19,8 +19,8 @@ from xarray.core.duck_array_ops import array_equiv from xarray.core.indexing import ExplicitlyIndexed, MemoryCachedArray from xarray.core.options import OPTIONS, _get_boolean_with_default -from xarray.core.pycompat import array_type -from xarray.core.utils import is_duck_array +from xarray.namedarray.pycompat import array_type +from xarray.namedarray.utils import is_duck_array if TYPE_CHECKING: from xarray.core.coordinates import AbstractCoordinates diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 8ed7148e2a1..390ffba97d1 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -33,16 +33,16 @@ safe_cast_to_index, ) from xarray.core.options import _get_keep_attrs -from xarray.core.pycompat import integer_types from xarray.core.types import Dims, QuantileMethods, T_DataArray, T_Xarray from xarray.core.utils import ( - either_dict_or_kwargs, hashable, is_scalar, maybe_wrap_array, peek_at, ) from xarray.core.variable import IndexVariable, Variable +from xarray.namedarray.pycompat import integer_types +from xarray.namedarray.utils import either_dict_or_kwargs from xarray.util.deprecation_helpers import _deprecate_positional_args if TYPE_CHECKING: diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 1697762f7ae..6e20a313ab4 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -19,9 +19,9 @@ Frozen, emit_user_level_warning, get_valid_numpy_dtype, - is_dict_like, is_scalar, ) +from xarray.namedarray.utils import is_dict_like if TYPE_CHECKING: from xarray.core.types import ErrorOptions, JoinOptions, Self diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 349d105e9b9..55a4eddb8ff 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -17,21 +17,21 @@ from xarray.core import duck_array_ops from xarray.core.nputils import NumpyVIndexAdapter from xarray.core.options import OPTIONS -from xarray.core.pycompat import ( - array_type, - integer_types, - is_duck_array, - is_duck_dask_array, -) from xarray.core.types import T_Xarray from xarray.core.utils import ( NDArrayMixin, - either_dict_or_kwargs, get_valid_numpy_dtype, is_scalar, to_0d_array, ) from xarray.namedarray.parallelcompat import get_chunked_array_type, is_chunked_array +from xarray.namedarray.pycompat import ( + array_type, + integer_types, + is_duck_array, + is_duck_dask_array, +) +from xarray.namedarray.utils import either_dict_or_kwargs if TYPE_CHECKING: from numpy.typing import DTypeLike diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index c49a06dfc9c..ab275eef72a 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -13,7 +13,7 @@ from numpy import RankWarning from xarray.core.options import OPTIONS -from xarray.core.pycompat import is_duck_array +from xarray.namedarray.pycompat import is_duck_array try: import bottleneck as bn diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index 949576b4ee8..aa635ec1cfd 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -11,7 +11,7 @@ from xarray.core.alignment import align from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset -from xarray.core.pycompat import is_dask_collection +from xarray.namedarray.pycompat import is_dask_collection if TYPE_CHECKING: from xarray.core.types import T_Xarray diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index b85092982e3..f236c83c530 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -12,9 +12,9 @@ from xarray.core import dtypes, duck_array_ops, utils from xarray.core.arithmetic import CoarsenArithmetic from xarray.core.options import OPTIONS, _get_keep_attrs -from xarray.core.pycompat import is_duck_dask_array from xarray.core.types import CoarsenBoundaryOptions, SideOptions, T_Xarray -from xarray.core.utils import either_dict_or_kwargs +from xarray.namedarray.pycompat import is_duck_dask_array +from xarray.namedarray.utils import either_dict_or_kwargs try: import bottleneck diff --git a/xarray/core/utils.py b/xarray/core/utils.py index b9111099233..8a15412766f 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -1104,7 +1104,7 @@ def contains_only_chunked_or_numpy(obj) -> bool: Expects obj to be Dataset or DataArray""" from xarray.core.dataarray import DataArray - from xarray.core.pycompat import is_chunked_array + from xarray.namedarray.pycompat import is_chunked_array if isinstance(obj, DataArray): obj = obj._to_temp_dataset() diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 0ac45a9e937..86f676e5a15 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -26,23 +26,22 @@ as_indexable, ) from xarray.core.options import OPTIONS, _get_keep_attrs -from xarray.core.pycompat import ( - integer_types, - is_0d_dask_array, - is_duck_dask_array, -) from xarray.core.utils import ( OrderedSet, _default, decode_numpy_dict_values, drop_dims_from_indexers, - either_dict_or_kwargs, ensure_us_time_resolution, infix_dims, - is_duck_array, maybe_coerce_to_str, ) from xarray.namedarray.core import NamedArray +from xarray.namedarray.pycompat import ( + integer_types, + is_0d_dask_array, + is_duck_dask_array, +) +from xarray.namedarray.utils import either_dict_or_kwargs, is_duck_array NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( indexing.ExplicitlyIndexed, diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 28740a99020..49c8938e3f6 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -9,8 +9,8 @@ from xarray.core import duck_array_ops, utils from xarray.core.alignment import align, broadcast from xarray.core.computation import apply_ufunc, dot -from xarray.core.pycompat import is_duck_dask_array from xarray.core.types import Dims, T_Xarray +from xarray.namedarray.pycompat import is_duck_dask_array from xarray.util.deprecation_helpers import _deprecate_positional_args # Weighted quantile methods are a subset of the numpy supported quantile methods. diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py index e8a01a4f4ca..0844bf5f78b 100644 --- a/xarray/namedarray/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -8,8 +8,8 @@ from xarray.core.duck_array_ops import dask_available from xarray.core.indexing import ImplicitToExplicitIndexingAdapter -from xarray.core.pycompat import is_duck_dask_array from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint, T_ChunkedArray +from xarray.namedarray.pycompat import is_duck_dask_array if TYPE_CHECKING: from xarray.core.types import DaskArray, T_Chunks, T_NormalizedChunks diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py index 9af5d693170..c1a6e6bcbf0 100644 --- a/xarray/namedarray/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -7,7 +7,8 @@ import numpy as np from packaging.version import Version -from xarray.core.utils import is_duck_array, is_scalar, module_available +from xarray.core.utils import is_scalar, module_available +from xarray.namedarray.utils import is_duck_array integer_types = (int, np.integer) diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 5694acc06e8..0258b0769e1 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -13,8 +13,8 @@ from xarray.core.indexes import PandasMultiIndex from xarray.core.options import OPTIONS -from xarray.core.pycompat import DuckArrayModule from xarray.core.utils import is_scalar, module_available +from xarray.namedarray.pycompat import DuckArrayModule nc_time_axis_available = module_available("nc_time_axis") diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 4c04841db8d..4a8fa2f1e2e 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -53,7 +53,7 @@ from xarray.conventions import encode_dataset_coordinates from xarray.core import indexing from xarray.core.options import set_options -from xarray.core.pycompat import array_type +from xarray.namedarray.pycompat import array_type from xarray.tests import ( assert_allclose, assert_array_equal, diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 687aae8f1dc..528efba494b 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -39,8 +39,8 @@ from xarray.core.common import duck_array_ops, full_like from xarray.core.coordinates import Coordinates, DatasetCoordinates from xarray.core.indexes import Index, PandasIndex -from xarray.core.pycompat import array_type, integer_types from xarray.core.utils import is_scalar +from xarray.namedarray.pycompat import array_type, integer_types from xarray.testing import _assert_internal_invariants from xarray.tests import ( DuckArrayWrapper, diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 0d6efa2a8d3..d049bd8a9c1 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -27,7 +27,7 @@ timedelta_to_numeric, where, ) -from xarray.core.pycompat import array_type +from xarray.namedarray.pycompat import array_type from xarray.testing import assert_allclose, assert_equal, assert_identical from xarray.tests import ( arm_xfail, diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index c57d84c927d..88c134a34c9 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -14,7 +14,7 @@ _get_nan_block_lengths, get_clean_interp_index, ) -from xarray.core.pycompat import array_type +from xarray.namedarray.pycompat import array_type from xarray.tests import ( _CFTIME_CALENDARS, assert_allclose, diff --git a/xarray/tests/test_parallelcompat.py b/xarray/tests/test_parallelcompat.py index ea324cafb76..055ade31e21 100644 --- a/xarray/tests/test_parallelcompat.py +++ b/xarray/tests/test_parallelcompat.py @@ -5,14 +5,14 @@ import numpy as np import pytest -from xarray.core.daskmanager import DaskManager -from xarray.core.parallelcompat import ( +from xarray.core.types import T_Chunks, T_DuckArray, T_NormalizedChunks +from xarray.namedarray.daskmanager import DaskManager +from xarray.namedarray.parallelcompat import ( ChunkManagerEntrypoint, get_chunked_array_type, guess_chunkmanager, list_chunkmanagers, ) -from xarray.core.types import T_Chunks, T_DuckArray, T_NormalizedChunks from xarray.tests import has_dask, requires_dask diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index 489836b70fd..fedac6e0498 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -10,7 +10,7 @@ import xarray as xr from xarray import DataArray, Variable -from xarray.core.pycompat import array_type +from xarray.namedarray.pycompat import array_type from xarray.tests import assert_equal, assert_identical, requires_dask filterwarnings = pytest.mark.filterwarnings diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index 36f62fad71f..86bf0869b9f 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -7,7 +7,8 @@ import pytest from xarray.core import duck_array_ops, utils -from xarray.core.utils import either_dict_or_kwargs, iterate_nested +from xarray.core.utils import iterate_nested +from xarray.namedarray.utils import either_dict_or_kwargs from xarray.tests import assert_array_equal, requires_dask diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 73238b6ae3a..c47bf55f6d7 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -26,10 +26,10 @@ PandasIndexingAdapter, VectorizedIndexer, ) -from xarray.core.pycompat import array_type from xarray.core.types import T_DuckArray from xarray.core.utils import NDArrayMixin from xarray.core.variable import as_compatible_data, as_variable +from xarray.namedarray.pycompat import array_type from xarray.tests import ( assert_allclose, assert_array_equal, From cb8a346935365a2a0e387af9d4bdee01ea247574 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 16 Oct 2023 18:19:21 -0400 Subject: [PATCH 06/54] entrypoint should point to namedarray --- doc/internals/chunked-arrays.rst | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/internals/chunked-arrays.rst b/doc/internals/chunked-arrays.rst index 7192c3f0bc5..d34d971a1ec 100644 --- a/doc/internals/chunked-arrays.rst +++ b/doc/internals/chunked-arrays.rst @@ -67,7 +67,7 @@ To register a new entrypoint you need to add an entry to the ``setup.cfg`` like [options.entry_points] xarray.chunkmanagers = - dask = xarray.core.daskmanager:DaskManager + dask = xarray.namedarray.daskmanager:DaskManager See also `cubed-xarray `_ for another example. diff --git a/pyproject.toml b/pyproject.toml index bdae33e4d0d..3cb251057c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ issue-tracker = "https://github.com/pydata/xarray/issues" source-code = "https://github.com/pydata/xarray" [project.entry-points."xarray.chunkmanagers"] -dask = "xarray.core.daskmanager:DaskManager" +dask = "xarray.namedarray.daskmanager:DaskManager" [project.optional-dependencies] accel = ["scipy", "bottleneck", "numbagg", "flox"] From a3b79aab619c9e929d744fb1b32a222a734a1cff Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 17 Oct 2023 10:25:07 -0400 Subject: [PATCH 07/54] is_dict_like import --- xarray/core/dataarray.py | 24 ++++++++++-------------- xarray/core/dataset.py | 14 +++++++------- xarray/core/rolling.py | 6 +++--- xarray/core/variable.py | 10 +++++----- 4 files changed, 25 insertions(+), 29 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index e076b238797..7082871bcea 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -63,7 +63,7 @@ as_compatible_data, as_variable, ) -from xarray.namedarray.utils import either_dict_or_kwargs +from xarray.namedarray.utils import either_dict_or_kwargs, is_dict_like from xarray.plot.accessor import DataArrayPlotAccessor from xarray.plot.utils import _get_units_from_attrs from xarray.util.deprecation_helpers import _deprecate_positional_args @@ -144,11 +144,7 @@ def _infer_coords_and_dims( ) -> tuple[Mapping[Hashable, Any], tuple[Hashable, ...]]: """All the logic for creating a new DataArray""" - if ( - coords is not None - and not utils.is_dict_like(coords) - and len(coords) != len(shape) - ): + if coords is not None and not is_dict_like(coords) and len(coords) != len(shape): raise ValueError( f"coords is not dict-like, but it has {len(coords)} items, " f"which does not match the {len(shape)} dimensions of the " @@ -162,7 +158,7 @@ def _infer_coords_and_dims( dims = [f"dim_{n}" for n in range(len(shape))] if coords is not None and len(coords) == len(shape): # try to infer dimensions from coords - if utils.is_dict_like(coords): + if is_dict_like(coords): dims = list(coords.keys()) else: for n, (dim, coord) in enumerate(zip(dims, coords)): @@ -185,7 +181,7 @@ def _infer_coords_and_dims( new_coords = coords else: new_coords = {} - if utils.is_dict_like(coords): + if is_dict_like(coords): for k, v in coords.items(): new_coords[k] = as_variable(v, name=k) elif coords is not None: @@ -203,7 +199,7 @@ def _check_data_shape(data, coords, dims): if data is dtypes.NA: data = np.nan if coords is not None and utils.is_scalar(data, include_0d=False): - if utils.is_dict_like(coords): + if is_dict_like(coords): if dims is None: return data else: @@ -224,14 +220,14 @@ def __init__(self, data_array: T_DataArray): self.data_array = data_array def __getitem__(self, key) -> T_DataArray: - if not utils.is_dict_like(key): + if not is_dict_like(key): # expand the indexer so we can handle Ellipsis labels = indexing.expanded_indexer(key, self.data_array.ndim) key = dict(zip(self.data_array.dims, labels)) return self.data_array.sel(key) def __setitem__(self, key, value) -> None: - if not utils.is_dict_like(key): + if not is_dict_like(key): # expand the indexer so we can handle Ellipsis labels = indexing.expanded_indexer(key, self.data_array.ndim) key = dict(zip(self.data_array.dims, labels)) @@ -827,7 +823,7 @@ def dims(self, value: Any) -> NoReturn: ) def _item_key_to_dict(self, key: Any) -> Mapping[Hashable, Any]: - if utils.is_dict_like(key): + if is_dict_like(key): return key key = indexing.expanded_indexer(key, self.ndim) return dict(zip(self.dims, key)) @@ -2411,7 +2407,7 @@ def rename( if new_name_or_name_dict is None and not names: # change name to None? return self._replace(name=None) - if utils.is_dict_like(new_name_or_name_dict) or new_name_or_name_dict is None: + if is_dict_like(new_name_or_name_dict) or new_name_or_name_dict is None: # change dims/coords name_dict = either_dict_or_kwargs(new_name_or_name_dict, names, "rename") dataset = self._to_temp_dataset()._rename(name_dict) @@ -3350,7 +3346,7 @@ def fillna(self, value: Any) -> Self: * Z (Z) int64 0 1 2 3 4 5 height (Z) int64 0 10 20 30 40 50 """ - if utils.is_dict_like(value): + if is_dict_like(value): raise TypeError( "cannot provide fill value as a dictionary with " "fillna on a DataArray" diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index a2110a35230..b9e4b7a8be5 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -494,12 +494,12 @@ def __init__(self, dataset: T_Dataset): self.dataset = dataset def __getitem__(self, key: Mapping[Any, Any]) -> T_Dataset: - if not utils.is_dict_like(key): + if not is_dict_like(key): raise TypeError("can only lookup dictionaries from Dataset.loc") return self.dataset.sel(key) def __setitem__(self, key, value) -> None: - if not utils.is_dict_like(key): + if not is_dict_like(key): raise TypeError( "can only set locations defined by dictionaries from Dataset.loc." f" Got: {key}" @@ -1341,7 +1341,7 @@ def _copy( ) -> Self: if data is None: data = {} - elif not utils.is_dict_like(data): + elif not is_dict_like(data): raise ValueError("Data must be dict-like") if data: @@ -1537,7 +1537,7 @@ def __getitem__( Indexing with a list of names will return a new ``Dataset`` object. """ - if utils.is_dict_like(key): + if is_dict_like(key): return self.isel(**key) if utils.hashable(key): return self._construct_dataarray(key) @@ -1568,7 +1568,7 @@ def __setitem__( """ from xarray.core.dataarray import DataArray - if utils.is_dict_like(key): + if is_dict_like(key): # check for consistency and convert value to dataset value = self._setitem_check(key, value) # loop over dataset variables and set new values @@ -6365,7 +6365,7 @@ def fillna(self, value: Any) -> Self: C (x) float64 2.0 2.0 2.0 5.0 D (x) float64 3.0 3.0 3.0 4.0 """ - if utils.is_dict_like(value): + if is_dict_like(value): value_keys = getattr(value, "data_vars", value).keys() if not set(value_keys) <= set(self.data_vars.keys()): raise ValueError( @@ -7567,7 +7567,7 @@ def apply_over_both(lhs_data_vars, rhs_data_vars, lhs_vars, rhs_vars): dest_vars[k] = f(rhs_vars[k], np.nan) return dest_vars - if utils.is_dict_like(other) and not isinstance(other, Dataset): + if is_dict_like(other) and not isinstance(other, Dataset): # can't use our shortcut of doing the binary operation with # Variable objects, so apply over our data vars instead. new_data_vars = apply_over_both( diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index f236c83c530..5e6a10dfe2b 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -14,7 +14,7 @@ from xarray.core.options import OPTIONS, _get_keep_attrs from xarray.core.types import CoarsenBoundaryOptions, SideOptions, T_Xarray from xarray.namedarray.pycompat import is_duck_dask_array -from xarray.namedarray.utils import either_dict_or_kwargs +from xarray.namedarray.utils import either_dict_or_kwargs, is_dict_like try: import bottleneck @@ -209,7 +209,7 @@ def _mapping_to_list( allow_default: bool = True, allow_allsame: bool = True, ) -> list[_T]: - if utils.is_dict_like(arg): + if is_dict_like(arg): if allow_default: return [arg.get(d, default) for d in self.dim] for d in self.dim: @@ -865,7 +865,7 @@ def __init__( f"dimensions {tuple(self.obj.dims)}" ) - if utils.is_dict_like(coord_func): + if is_dict_like(coord_func): coord_func_map = coord_func else: coord_func_map = {d: coord_func for d in self.obj.dims} diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 86f676e5a15..aeaa0201b93 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -41,7 +41,7 @@ is_0d_dask_array, is_duck_dask_array, ) -from xarray.namedarray.utils import either_dict_or_kwargs, is_duck_array +from xarray.namedarray.utils import either_dict_or_kwargs, is_dict_like, is_duck_array NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( indexing.ExplicitlyIndexed, @@ -535,7 +535,7 @@ def to_dict( return item def _item_key_to_tuple(self, key): - if utils.is_dict_like(key): + if is_dict_like(key): return tuple(key.get(dim, slice(None)) for dim in self.dims) else: return key @@ -1250,7 +1250,7 @@ def set_dims(self, dims, shape=None): if isinstance(dims, str): dims = [dims] - if shape is None and utils.is_dict_like(dims): + if shape is None and is_dict_like(dims): shape = dims.values() missing_dims = set(self.dims) - set(dims) @@ -2068,10 +2068,10 @@ def coarsen_reshape(self, windows, boundary, side): """ Construct a reshaped-array for coarsen """ - if not utils.is_dict_like(boundary): + if not is_dict_like(boundary): boundary = {d: boundary for d in windows.keys()} - if not utils.is_dict_like(side): + if not is_dict_like(side): side = {d: side for d in windows.keys()} # remove unrelated dimensions From c5546950cd9b7a6e94697c0a54e1464e568438b9 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 17 Oct 2023 10:52:45 -0400 Subject: [PATCH 08/54] fix import for consolidate_dask_from_array_kwargs --- xarray/core/dataset.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index b9e4b7a8be5..a7e3e0685c0 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -120,7 +120,11 @@ is_duck_array, is_duck_dask_array, ) -from xarray.namedarray.utils import either_dict_or_kwargs, is_dict_like +from xarray.namedarray.utils import ( + consolidate_dask_from_array_kwargs, + either_dict_or_kwargs, + is_dict_like, +) from xarray.plot.accessor import DatasetPlotAccessor from xarray.util.deprecation_helpers import _deprecate_positional_args @@ -311,7 +315,7 @@ def _maybe_chunk( token2 = tokenize(name, token if token else var._data, chunks) name2 = f"{name_prefix}{name}-{token2}" - from_array_kwargs = utils.consolidate_dask_from_array_kwargs( + from_array_kwargs = consolidate_dask_from_array_kwargs( from_array_kwargs, name=name2, lock=lock, From 412cbc1d4dceacb610d962f70f4b2198cf2d42fb Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 17 Oct 2023 15:05:47 -0400 Subject: [PATCH 09/54] fix test --- xarray/core/dataarray.py | 2 +- xarray/core/dataset.py | 4 ++-- xarray/tests/test_parallelcompat.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 7082871bcea..46a6e78b233 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2100,7 +2100,7 @@ def reindex( DataArray.reindex_like align """ - indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") return alignment.reindex( self, indexers=indexers, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index a7e3e0685c0..48dc68980a2 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3689,7 +3689,7 @@ def reindex( original dataset, use the :py:meth:`~Dataset.fillna()` method. """ - indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") return alignment.reindex( self, indexers=indexers, @@ -3712,7 +3712,7 @@ def _reindex( """ Same as reindex but supports sparse option. """ - indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") return alignment.reindex( self, indexers=indexers, diff --git a/xarray/tests/test_parallelcompat.py b/xarray/tests/test_parallelcompat.py index 055ade31e21..13cddce872d 100644 --- a/xarray/tests/test_parallelcompat.py +++ b/xarray/tests/test_parallelcompat.py @@ -131,14 +131,14 @@ def register_dummy_chunkmanager(monkeypatch): This preserves the presence of the existing DaskManager, so a test that relies on this and DaskManager both being returned from list_chunkmanagers() at once would still work. - The monkeypatching changes the behavior of list_chunkmanagers when called inside xarray.core.parallelcompat, + The monkeypatching changes the behavior of list_chunkmanagers when called inside xarray.namedarray.parallelcompat, but not when called from this tests file. """ # Should include DaskManager iff dask is available to be imported preregistered_chunkmanagers = list_chunkmanagers() monkeypatch.setattr( - "xarray.core.parallelcompat.list_chunkmanagers", + "xarray.namedarray.parallelcompat.list_chunkmanagers", lambda: {"dummy": DummyChunkManager()} | preregistered_chunkmanagers, ) yield From 74ee3f008eaebeed813ea210d45eff2718e57e85 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 17 Oct 2023 19:54:33 +0000 Subject: [PATCH 10/54] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/namedarray/core.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 5a8e4015e0e..707f91e59aa 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -15,9 +15,9 @@ ImplicitToExplicitIndexingAdapter, OuterIndexer, ) +from xarray.namedarray._aggregations import NamedArrayAggregations from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager from xarray.namedarray.pycompat import array_type, is_chunked_array -from xarray.namedarray._aggregations import NamedArrayAggregations from xarray.namedarray.utils import ( Default, T_DuckArray, @@ -33,8 +33,8 @@ ) if TYPE_CHECKING: - from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint from xarray.core.types import Dims + from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint from xarray.namedarray.utils import Self # type: ignore[attr-defined] try: @@ -678,7 +678,7 @@ def to_numpy(self) -> np.ndarray: def as_numpy(self) -> Self: """Coerces wrapped data into a numpy array, returning a Variable.""" return self._replace(data=self.to_numpy()) - + def reduce( self, func: Callable[..., Any], From fe69acafd874fdc65e8351ea06fe2d17c5fec7af Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 18 Oct 2023 13:42:36 -0400 Subject: [PATCH 11/54] move is_duck_array --- xarray/core/utils.py | 16 +--------------- xarray/namedarray/utils.py | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 8a15412766f..4db010ecf33 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -72,7 +72,7 @@ import pandas as pd if TYPE_CHECKING: - from xarray.core.types import Dims, ErrorOptionsWithWarn, OrderedDims, T_DuckArray + from xarray.core.types import Dims, ErrorOptionsWithWarn, OrderedDims K = TypeVar("K") V = TypeVar("V") @@ -247,20 +247,6 @@ def is_list_like(value: Any) -> TypeGuard[list | tuple]: return isinstance(value, (list, tuple)) -def is_duck_array(value: Any) -> TypeGuard[T_DuckArray]: - if isinstance(value, np.ndarray): - return True - return ( - hasattr(value, "ndim") - and hasattr(value, "shape") - and hasattr(value, "dtype") - and ( - (hasattr(value, "__array_function__") and hasattr(value, "__array_ufunc__")) - or hasattr(value, "__array_namespace__") - ) - ) - - def _is_scalar(value, include_0d): from xarray.core.variable import NON_NUMPY_SUPPORTED_ARRAY_TYPES diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index 423fe6b3ea2..ba111368c9f 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -101,6 +101,20 @@ def is_duck_dask_array(x: duckarray[Any, Any]) -> TypeGuard[DaskArray]: return is_dask_collection(x) +def is_duck_array(value: Any) -> TypeGuard[T_DuckArray]: + if isinstance(value, np.ndarray): + return True + return ( + hasattr(value, "ndim") + and hasattr(value, "shape") + and hasattr(value, "dtype") + and ( + (hasattr(value, "__array_function__") and hasattr(value, "__array_ufunc__")) + or hasattr(value, "__array_namespace__") + ) + ) + + def to_0d_object_array( value: object, ) -> NDArray[np.object_]: From ba0df3f338a16e9c878450f52eba5a3ee3a8e499 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 18 Oct 2023 14:28:41 -0400 Subject: [PATCH 12/54] passing but with load/compute on variable --- xarray/core/variable.py | 51 +++++++++++++++++++++++++++++++++++ xarray/namedarray/core.py | 51 +---------------------------------- xarray/namedarray/pycompat.py | 2 +- xarray/testing.py | 9 ++++--- 4 files changed, 58 insertions(+), 55 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 853a71c66ba..d525e7a1033 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -20,6 +20,7 @@ from xarray.core.common import AbstractArray from xarray.core.indexing import ( BasicIndexer, + ExplicitlyIndexed, OuterIndexer, PandasIndexingAdapter, VectorizedIndexer, @@ -36,9 +37,11 @@ maybe_coerce_to_str, ) from xarray.namedarray.core import NamedArray +from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import ( integer_types, is_0d_dask_array, + is_chunked_array, is_duck_dask_array, ) from xarray.namedarray.utils import either_dict_or_kwargs, is_dict_like, is_duck_array @@ -910,6 +913,54 @@ def _replace( encoding = copy.copy(self._encoding) return type(self)(dims, data, attrs, encoding, fastpath=True) + def load(self, **kwargs): + """Manually trigger loading of this variable's data from disk or a + remote source into memory and return this variable. + + Normally, it should not be necessary to call this method in user code, + because all xarray functions should either work on deferred data or + load data automatically. + + Parameters + ---------- + **kwargs : dict + Additional keyword arguments passed on to ``dask.array.compute``. + + See Also + -------- + dask.array.compute + """ + if is_chunked_array(self._data): + chunkmanager = get_chunked_array_type(self._data) + loaded_data, *_ = chunkmanager.compute(self._data, **kwargs) + self._data = as_compatible_data(loaded_data) + elif isinstance(self._data, ExplicitlyIndexed): + self._data = self._data.get_duck_array() + elif not is_duck_array(self._data): + self._data = np.asarray(self._data) + return self + + def compute(self, **kwargs): + """Manually trigger loading of this variable's data from disk or a + remote source into memory and return a new variable. The original is + left unaltered. + + Normally, it should not be necessary to call this method in user code, + because all xarray functions should either work on deferred data or + load data automatically. + + Parameters + ---------- + **kwargs : dict + Additional keyword arguments passed on to ``dask.array.compute``. + + See Also + -------- + dask.array.compute + """ + new = self.copy(deep=False) + return new.load(**kwargs) + def isel( self, indexers: Mapping[Any, Any] | None = None, diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 360e64f0dcc..ff75b385eab 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -35,13 +35,12 @@ _ShapeType_co, ) from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager -from xarray.namedarray.pycompat import array_type, is_chunked_array +from xarray.namedarray.pycompat import array_type from xarray.namedarray.utils import ( _default, consolidate_dask_from_array_kwargs, either_dict_or_kwargs, is_dict_like, - is_duck_array, is_duck_dask_array, to_0d_object_array, ) @@ -612,54 +611,6 @@ def _dask_finalize( data = array_func(results, *args, **kwargs) return type(self)(self._dims, data, attrs=self._attrs) - def load(self, **kwargs): - """Manually trigger loading of this variable's data from disk or a - remote source into memory and return this variable. - - Normally, it should not be necessary to call this method in user code, - because all xarray functions should either work on deferred data or - load data automatically. - - Parameters - ---------- - **kwargs : dict - Additional keyword arguments passed on to ``dask.array.compute``. - - See Also - -------- - dask.array.compute - """ - if is_chunked_array(self._data): - chunkmanager = get_chunked_array_type(self._data) - loaded_data, *_ = chunkmanager.compute(self._data, **kwargs) - self._data = as_compatible_data(loaded_data) - elif isinstance(self._data, ExplicitlyIndexed): - self._data = self._data.get_duck_array() - elif not is_duck_array(self._data): - self._data = np.asarray(self._data) - return self - - def compute(self, **kwargs): - """Manually trigger loading of this variable's data from disk or a - remote source into memory and return a new variable. The original is - left unaltered. - - Normally, it should not be necessary to call this method in user code, - because all xarray functions should either work on deferred data or - load data automatically. - - Parameters - ---------- - **kwargs : dict - Additional keyword arguments passed on to ``dask.array.compute``. - - See Also - -------- - dask.array.compute - """ - new = self.copy(deep=False) - return new.load(**kwargs) - def get_axis_num(self, dim: Hashable | Iterable[Hashable]) -> int | tuple[int, ...]: """Return axis number(s) corresponding to dimension(s) in this array. diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py index 208e0d7d122..9e41b4e4b29 100644 --- a/xarray/namedarray/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -106,7 +106,7 @@ def is_0d_dask_array(x): def to_numpy(data) -> np.ndarray: from xarray.core.indexing import ExplicitlyIndexed - from xarray.core.parallelcompat import get_chunked_array_type + from xarray.namedarray.parallelcompat import get_chunked_array_type if isinstance(data, ExplicitlyIndexed): data = data.get_duck_array() diff --git a/xarray/testing.py b/xarray/testing.py index 0837b562668..25b96e9d26d 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -13,6 +13,7 @@ from xarray.core.dataset import Dataset from xarray.core.indexes import Index, PandasIndex, PandasMultiIndex, default_indexes from xarray.core.variable import IndexVariable, Variable +from xarray.namedarray.utils import is_duck_array __all__ = ( "assert_allclose", @@ -228,14 +229,14 @@ def assert_duckarray_equal(x, y, err_msg="", verbose=True): """Like `np.testing.assert_array_equal`, but for duckarrays""" __tracebackhide__ = True - if not utils.is_duck_array(x) and not utils.is_scalar(x): + if not is_duck_array(x) and not utils.is_scalar(x): x = np.asarray(x) - if not utils.is_duck_array(y) and not utils.is_scalar(y): + if not is_duck_array(y) and not utils.is_scalar(y): y = np.asarray(y) - if (utils.is_duck_array(x) and utils.is_scalar(y)) or ( - utils.is_scalar(x) and utils.is_duck_array(y) + if (is_duck_array(x) and utils.is_scalar(y)) or ( + utils.is_scalar(x) and is_duck_array(y) ): equiv = (x == y).all() else: From 4723f839117953f2e9bee9026ea17ec0f68c24b6 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 24 Oct 2023 10:40:02 -0700 Subject: [PATCH 13/54] fix import --- xarray/core/variable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 9e7235b045c..5c524157e47 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -27,7 +27,6 @@ as_indexable, ) from xarray.core.options import OPTIONS, _get_keep_attrs -from xarray.core.parallelcompat import get_chunked_array_type from xarray.core.utils import ( OrderedSet, _default, @@ -38,6 +37,7 @@ maybe_coerce_to_str, ) from xarray.namedarray.core import NamedArray +from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import ( integer_types, is_0d_dask_array, From 0667b1e9eec1078913892513e1b8677f0d492202 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 24 Oct 2023 22:59:56 -0700 Subject: [PATCH 14/54] more typing --- xarray/namedarray/_typing.py | 1 + xarray/namedarray/core.py | 9 +-------- xarray/namedarray/parallelcompat.py | 28 +++++++++++----------------- 3 files changed, 13 insertions(+), 25 deletions(-) diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index 820371a7463..e2647723fca 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -55,6 +55,7 @@ def dtype(self) -> _DType_co: _ShapeType_co = TypeVar("_ShapeType_co", bound=Any, covariant=True) _Chunks = tuple[_Shape, ...] +_NormalizedChunks = tuple[tuple[int, ...], ...] _Dim = Hashable _Dims = tuple[_Dim, ...] diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 3ff5ca9bf13..0d10ad37dce 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -831,7 +831,7 @@ def chunk( def to_numpy(self) -> np.ndarray: """Coerces wrapped data to numpy and returns a numpy.ndarray""" # TODO an entrypoint so array libraries can choose coercion method? - data = self.data + data = self._data # TODO first attempt to call .to_numpy() once some libraries implement it if hasattr(data, "chunks"): @@ -996,12 +996,5 @@ def _to_dense(self) -> Self: else: raise TypeError("self.data is not a sparse array") - def _nonzero(self) -> tuple[Self, ...]: - """Equivalent to numpy's nonzero but returns a tuple of NamedArrays.""" - # TODO we should replace dask's native nonzero - # after https://github.com/dask/dask/issues/1076 is implemented. - nonzeros = np.nonzero(self.data) - return tuple(type(self)((dim,), nz) for nz, dim in zip(nonzeros, self.dims)) - _NamedArray = NamedArray[Any, np.dtype[_ScalarType_co]] diff --git a/xarray/namedarray/parallelcompat.py b/xarray/namedarray/parallelcompat.py index 78b966df10d..8dbc713bbe2 100644 --- a/xarray/namedarray/parallelcompat.py +++ b/xarray/namedarray/parallelcompat.py @@ -10,22 +10,16 @@ from abc import ABC, abstractmethod from collections.abc import Iterable, Sequence from importlib.metadata import EntryPoint, entry_points -from typing import ( - TYPE_CHECKING, - Any, - Callable, - Generic, - TypeVar, -) +from typing import TYPE_CHECKING, Any, Callable, Generic, TypeVar import numpy as np from xarray.namedarray.pycompat import is_chunked_array -T_ChunkedArray = TypeVar("T_ChunkedArray") - if TYPE_CHECKING: - from xarray.core.types import T_Chunks, T_DuckArray, T_NormalizedChunks + from xarray.namedarray._typing import T_DuckArray, _Chunks, _NormalizedChunks + + T_ChunkedArray = TypeVar("T_ChunkedArray") @functools.lru_cache(maxsize=1) @@ -195,7 +189,7 @@ def is_chunked_array(self, data: Any) -> bool: return isinstance(data, self.array_cls) @abstractmethod - def chunks(self, data: T_ChunkedArray) -> T_NormalizedChunks: + def chunks(self, data: T_ChunkedArray) -> _NormalizedChunks: """ Return the current chunks of the given array. @@ -221,12 +215,12 @@ def chunks(self, data: T_ChunkedArray) -> T_NormalizedChunks: @abstractmethod def normalize_chunks( self, - chunks: T_Chunks | T_NormalizedChunks, + chunks: _Chunks | _NormalizedChunks, shape: tuple[int, ...] | None = None, limit: int | None = None, dtype: np.dtype | None = None, - previous_chunks: T_NormalizedChunks | None = None, - ) -> T_NormalizedChunks: + previous_chunks: _NormalizedChunks | None = None, + ) -> _NormalizedChunks: """ Normalize given chunking pattern into an explicit tuple of tuples representation. @@ -257,7 +251,7 @@ def normalize_chunks( @abstractmethod def from_array( - self, data: T_DuckArray | np.typing.ArrayLike, chunks: T_Chunks, **kwargs + self, data: T_DuckArray | np.typing.ArrayLike, chunks: _Chunks, **kwargs ) -> T_ChunkedArray: """ Create a chunked array from a non-chunked numpy-like array. @@ -284,7 +278,7 @@ def from_array( def rechunk( self, data: T_ChunkedArray, - chunks: T_NormalizedChunks | tuple[int, ...] | T_Chunks, + chunks: _NormalizedChunks | tuple[int, ...] | _Chunks, **kwargs, ) -> T_ChunkedArray: """ @@ -593,7 +587,7 @@ def unify_chunks( self, *args: Any, # can't type this as mypy assumes args are all same type, but dask unify_chunks args alternate types **kwargs, - ) -> tuple[dict[str, T_NormalizedChunks], list[T_ChunkedArray]]: + ) -> tuple[dict[str, _NormalizedChunks], list[T_ChunkedArray]]: """ Unify chunks across a sequence of arrays. From d04fe49164e96bb37bc4a0e7e8095fb2c35cb946 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 25 Oct 2023 12:49:17 -0700 Subject: [PATCH 15/54] update utils --- xarray/namedarray/parallelcompat.py | 2 +- xarray/namedarray/pycompat.py | 4 +- xarray/namedarray/utils.py | 61 +---------------------------- 3 files changed, 5 insertions(+), 62 deletions(-) diff --git a/xarray/namedarray/parallelcompat.py b/xarray/namedarray/parallelcompat.py index 8dbc713bbe2..65da905e438 100644 --- a/xarray/namedarray/parallelcompat.py +++ b/xarray/namedarray/parallelcompat.py @@ -19,7 +19,7 @@ if TYPE_CHECKING: from xarray.namedarray._typing import T_DuckArray, _Chunks, _NormalizedChunks - T_ChunkedArray = TypeVar("T_ChunkedArray") +T_ChunkedArray = TypeVar("T_ChunkedArray") @functools.lru_cache(maxsize=1) diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py index 9e41b4e4b29..6eb9078a2ae 100644 --- a/xarray/namedarray/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -7,8 +7,8 @@ import numpy as np from packaging.version import Version -from xarray.core.utils import is_scalar, module_available -from xarray.namedarray.utils import is_duck_array +from xarray.core.utils import is_scalar +from xarray.namedarray.utils import is_duck_array, module_available integer_types = (int, np.integer) diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index ba111368c9f..ed04a7a461a 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -3,8 +3,7 @@ import sys from collections.abc import Hashable, Mapping from enum import Enum -from types import ModuleType -from typing import TYPE_CHECKING, Any, Final, Protocol, TypeVar, cast +from typing import TYPE_CHECKING, Any, Final, TypeVar, cast import numpy as np @@ -16,9 +15,7 @@ from numpy.typing import NDArray - from xarray.namedarray._typing import ( - duckarray, - ) + from xarray.namedarray._typing import T_DuckArray, duckarray try: from dask.array.core import Array as DaskArray @@ -34,33 +31,6 @@ T_DType = TypeVar("T_DType", bound=np.dtype[np.generic]) -class _Array(Protocol[T_DType_co]): - @property - def dtype(self) -> T_DType_co: - ... - - @property - def shape(self) -> tuple[int, ...]: - ... - - # TODO: numpy doesn't use any inputs: - # https://github.com/numpy/numpy/blob/v1.24.3/numpy/_typing/_array_like.py#L38 - def __array__(self) -> np.ndarray[Any, T_DType_co]: - ... - - -class _ChunkedArray(_Array[T_DType_co], Protocol[T_DType_co]): - @property - def chunks(self) -> tuple[tuple[int, ...], ...]: - ... - - -# temporary placeholder for indicating an array api compliant type. -# hopefully in the future we can narrow this down more -T_DuckArray = TypeVar("T_DuckArray", bound=_Array[np.dtype[np.generic]]) -T_ChunkedArray = TypeVar("T_ChunkedArray", bound=_ChunkedArray[np.dtype[np.generic]]) - - # Singleton type, as per https://github.com/python/typing/pull/240 class Default(Enum): token: Final = 0 @@ -150,33 +120,6 @@ def __dask_tokenize__(self) -> Hashable: return normalize_token((type(self), self._value)) # type: ignore[no-any-return] -# %% Array API functions -def get_array_namespace(x: _Array[Any]) -> ModuleType: - if hasattr(x, "__array_namespace__"): - return x.__array_namespace__() # type: ignore[no-any-return] - else: - return np - - -def astype(x: _Array[Any], dtype: T_DType, /, *, copy: bool = True) -> _Array[T_DType]: - if hasattr(x, "__array_namespace__"): - xp = x.__array_namespace__() - return xp.astype(x, dtype, copy=copy) # type: ignore[no-any-return] - - # np.astype doesn't exist yet: - return x.astype(dtype, copy=copy) # type: ignore[no-any-return, attr-defined] - - -def imag(x: _Array[Any], /) -> _Array[Any]: - xp = get_array_namespace(x) - return xp.imag(x) # type: ignore[no-any-return] - - -def real(x: _Array[Any], /) -> _Array[Any]: - xp = get_array_namespace(x) - return xp.real(x) # type: ignore[no-any-return] - - # It's probably OK to give this as a TypeGuard; though it's not perfectly robust. def is_dict_like(value: Any) -> TypeGuard[Mapping]: return hasattr(value, "keys") and hasattr(value, "__getitem__") From 466bb22c868da89f9e5ce0a09c6dd639fdc07169 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 25 Oct 2023 16:03:30 -0700 Subject: [PATCH 16/54] fix imports --- doc/internals/chunked-arrays.rst | 16 ++++++++-------- xarray/namedarray/core.py | 5 +---- xarray/namedarray/daskmanager.py | 6 ++++-- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/doc/internals/chunked-arrays.rst b/doc/internals/chunked-arrays.rst index d34d971a1ec..ba7ce72c834 100644 --- a/doc/internals/chunked-arrays.rst +++ b/doc/internals/chunked-arrays.rst @@ -35,24 +35,24 @@ The implementation of these functions is specific to the type of arrays passed t whereas :py:class:`cubed.Array` objects must be processed by :py:func:`cubed.map_blocks`. In order to use the correct implementation of a core operation for the array type encountered, xarray dispatches to the -corresponding subclass of :py:class:`~xarray.core.parallelcompat.ChunkManagerEntrypoint`, +corresponding subclass of :py:class:`~xarray.namedarray.parallelcompat.ChunkManagerEntrypoint`, also known as a "Chunk Manager". Therefore **a full list of the operations that need to be defined is set by the -API of the** :py:class:`~xarray.core.parallelcompat.ChunkManagerEntrypoint` **abstract base class**. Note that chunked array +API of the** :py:class:`~xarray.namedarray.parallelcompat.ChunkManagerEntrypoint` **abstract base class**. Note that chunked array methods are also currently dispatched using this class. Chunked array creation is also handled by this class. As chunked array objects have a one-to-one correspondence with in-memory numpy arrays, it should be possible to create a chunked array from a numpy array by passing the desired -chunking pattern to an implementation of :py:class:`~xarray.core.parallelcompat.ChunkManagerEntrypoint.from_array``. +chunking pattern to an implementation of :py:class:`~xarray.namedarray.parallelcompat.ChunkManagerEntrypoint.from_array``. .. note:: - The :py:class:`~xarray.core.parallelcompat.ChunkManagerEntrypoint` abstract base class is mostly just acting as a + The :py:class:`~xarray.namedarray.parallelcompat.ChunkManagerEntrypoint` abstract base class is mostly just acting as a namespace for containing the chunked-aware function primitives. Ideally in the future we would have an API standard for chunked array types which codified this structure, making the entrypoint system unnecessary. -.. currentmodule:: xarray.core.parallelcompat +.. currentmodule:: xarray.namedarray.parallelcompat -.. autoclass:: xarray.core.parallelcompat.ChunkManagerEntrypoint +.. autoclass:: xarray.namedarray.parallelcompat.ChunkManagerEntrypoint :members: Registering a new ChunkManagerEntrypoint subclass @@ -60,7 +60,7 @@ Registering a new ChunkManagerEntrypoint subclass Rather than hard-coding various chunk managers to deal with specific chunked array implementations, xarray uses an entrypoint system to allow developers of new chunked array implementations to register their corresponding subclass of -:py:class:`~xarray.core.parallelcompat.ChunkManagerEntrypoint`. +:py:class:`~xarray.namedarray.parallelcompat.ChunkManagerEntrypoint`. To register a new entrypoint you need to add an entry to the ``setup.cfg`` like this:: @@ -72,7 +72,7 @@ To register a new entrypoint you need to add an entry to the ``setup.cfg`` like See also `cubed-xarray `_ for another example. To check that the entrypoint has worked correctly, you may find it useful to display the available chunkmanagers using -the internal function :py:func:`~xarray.core.parallelcompat.list_chunkmanagers`. +the internal function :py:func:`~xarray.namedarray.parallelcompat.list_chunkmanagers`. .. autofunction:: list_chunkmanagers diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 5908584dc8a..eb0edcbba18 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -67,10 +67,7 @@ duckarray, ) from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint - from xarray.namedarray.utils import ( - Default, - Self, # type: ignore[attr-defined] - ) + from xarray.namedarray.utils import Default try: from dask.typing import ( diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py index 0844bf5f78b..2097ea1f420 100644 --- a/xarray/namedarray/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -6,15 +6,17 @@ import numpy as np from packaging.version import Version -from xarray.core.duck_array_ops import dask_available from xarray.core.indexing import ImplicitToExplicitIndexingAdapter from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint, T_ChunkedArray -from xarray.namedarray.pycompat import is_duck_dask_array +from xarray.namedarray.pycompat import is_duck_dask_array, module_available if TYPE_CHECKING: from xarray.core.types import DaskArray, T_Chunks, T_NormalizedChunks +dask_available = module_available("dask") + + class DaskManager(ChunkManagerEntrypoint["DaskArray"]): array_cls: type[DaskArray] available: bool = dask_available From f26e259b7f6487c8d274ba493e9c25a8fdb305d2 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 25 Oct 2023 16:25:39 -0700 Subject: [PATCH 17/54] more imports fixes --- xarray/coding/times.py | 2 +- xarray/conventions.py | 2 +- xarray/core/accessor_dt.py | 2 +- xarray/core/computation.py | 4 ++-- xarray/core/dataset.py | 4 ++-- xarray/core/duck_array_ops.py | 4 ++-- xarray/core/indexing.py | 7 +++---- xarray/core/parallel.py | 2 +- xarray/core/rolling.py | 7 +++++-- xarray/core/variable.py | 6 +++++- xarray/core/weighted.py | 2 +- xarray/namedarray/daskmanager.py | 2 +- xarray/namedarray/pycompat.py | 14 +------------- 13 files changed, 26 insertions(+), 32 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index b312e797133..3b2f346cf3d 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -26,7 +26,7 @@ from xarray.core.pdcompat import nanosecond_precision_timestamp from xarray.core.utils import emit_user_level_warning from xarray.core.variable import Variable -from xarray.namedarray.pycompat import is_duck_dask_array +from xarray.namedarray.utils import is_duck_dask_array try: import cftime diff --git a/xarray/conventions.py b/xarray/conventions.py index 1b6b0107953..ac5c74d0050 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -16,7 +16,7 @@ ) from xarray.core.utils import emit_user_level_warning from xarray.core.variable import IndexVariable, Variable -from xarray.namedarray.pycompat import is_duck_dask_array +from xarray.namedarray.utils import is_duck_dask_array CF_RELATED_DATA = ( "bounds", diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 9de6f371ef4..efebf12cbbb 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -15,7 +15,7 @@ ) from xarray.core.types import T_DataArray from xarray.core.variable import IndexVariable -from xarray.namedarray.pycompat import is_duck_dask_array +from xarray.namedarray.utils import is_duck_dask_array if TYPE_CHECKING: from numpy.typing import DTypeLike diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 99b0f951919..414562a0be7 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -25,8 +25,8 @@ from xarray.core.utils import is_scalar from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import get_chunked_array_type -from xarray.namedarray.pycompat import is_chunked_array, is_duck_dask_array -from xarray.namedarray.utils import is_dict_like +from xarray.namedarray.pycompat import is_chunked_array +from xarray.namedarray.utils import is_dict_like, is_duck_dask_array if TYPE_CHECKING: from xarray.core.coordinates import Coordinates diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 7e495dbeda3..71653039714 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -117,13 +117,13 @@ from xarray.namedarray.pycompat import ( array_type, is_chunked_array, - is_duck_array, - is_duck_dask_array, ) from xarray.namedarray.utils import ( consolidate_dask_from_array_kwargs, either_dict_or_kwargs, is_dict_like, + is_duck_array, + is_duck_dask_array, ) from xarray.plot.accessor import DatasetPlotAccessor from xarray.util.deprecation_helpers import _deprecate_positional_args diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 4cedfce8a6a..8da3bd23e07 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -36,8 +36,8 @@ from xarray.core import dask_array_ops, dtypes, nputils from xarray.core.utils import module_available from xarray.namedarray.parallelcompat import get_chunked_array_type, is_chunked_array -from xarray.namedarray.pycompat import array_type, is_duck_dask_array -from xarray.namedarray.utils import is_duck_array +from xarray.namedarray.pycompat import array_type +from xarray.namedarray.utils import is_duck_array, is_duck_dask_array dask_available = module_available("dask") diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 55a4eddb8ff..a77a8734040 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -25,13 +25,12 @@ to_0d_array, ) from xarray.namedarray.parallelcompat import get_chunked_array_type, is_chunked_array -from xarray.namedarray.pycompat import ( - array_type, - integer_types, +from xarray.namedarray.pycompat import array_type, integer_types +from xarray.namedarray.utils import ( + either_dict_or_kwargs, is_duck_array, is_duck_dask_array, ) -from xarray.namedarray.utils import either_dict_or_kwargs if TYPE_CHECKING: from numpy.typing import DTypeLike diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index aa635ec1cfd..f3c5aa5352e 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -11,7 +11,7 @@ from xarray.core.alignment import align from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset -from xarray.namedarray.pycompat import is_dask_collection +from xarray.namedarray.utils import is_dask_collection if TYPE_CHECKING: from xarray.core.types import T_Xarray diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 5e6a10dfe2b..5bab5c422e6 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -13,8 +13,11 @@ from xarray.core.arithmetic import CoarsenArithmetic from xarray.core.options import OPTIONS, _get_keep_attrs from xarray.core.types import CoarsenBoundaryOptions, SideOptions, T_Xarray -from xarray.namedarray.pycompat import is_duck_dask_array -from xarray.namedarray.utils import either_dict_or_kwargs, is_dict_like +from xarray.namedarray.utils import ( + either_dict_or_kwargs, + is_dict_like, + is_duck_dask_array, +) try: import bottleneck diff --git a/xarray/core/variable.py b/xarray/core/variable.py index e3007e96199..9979aaa26a5 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -42,9 +42,13 @@ integer_types, is_0d_dask_array, is_chunked_array, +) +from xarray.namedarray.utils import ( + either_dict_or_kwargs, + is_dict_like, + is_duck_array, is_duck_dask_array, ) -from xarray.namedarray.utils import either_dict_or_kwargs, is_dict_like, is_duck_array NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( indexing.ExplicitlyIndexed, diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 49c8938e3f6..dcb81a5e3e3 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -10,7 +10,7 @@ from xarray.core.alignment import align, broadcast from xarray.core.computation import apply_ufunc, dot from xarray.core.types import Dims, T_Xarray -from xarray.namedarray.pycompat import is_duck_dask_array +from xarray.namedarray.utils import is_duck_dask_array from xarray.util.deprecation_helpers import _deprecate_positional_args # Weighted quantile methods are a subset of the numpy supported quantile methods. diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py index 2097ea1f420..020e67afc2d 100644 --- a/xarray/namedarray/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -8,7 +8,7 @@ from xarray.core.indexing import ImplicitToExplicitIndexingAdapter from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint, T_ChunkedArray -from xarray.namedarray.pycompat import is_duck_dask_array, module_available +from xarray.namedarray.utils import is_duck_dask_array, module_available if TYPE_CHECKING: from xarray.core.types import DaskArray, T_Chunks, T_NormalizedChunks diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py index 6eb9078a2ae..5ab7c2dde79 100644 --- a/xarray/namedarray/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -8,7 +8,7 @@ from packaging.version import Version from xarray.core.utils import is_scalar -from xarray.namedarray.utils import is_duck_array, module_available +from xarray.namedarray.utils import is_duck_array, is_duck_dask_array integer_types = (int, np.integer) @@ -84,18 +84,6 @@ def mod_version(mod: ModType) -> Version: return _get_cached_duck_array_module(mod).version -def is_dask_collection(x): - if module_available("dask"): - from dask.base import is_dask_collection - - return is_dask_collection(x) - return False - - -def is_duck_dask_array(x): - return is_duck_array(x) and is_dask_collection(x) - - def is_chunked_array(x) -> bool: return is_duck_dask_array(x) or (is_duck_array(x) and hasattr(x, "chunks")) From 8c6e8961adf2ec7c1dcf995981ee7f4af6f8433a Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 25 Oct 2023 17:14:54 -0700 Subject: [PATCH 18/54] replace is_duck_array with _arrayfunction_or_api instead --- xarray/core/arithmetic.py | 9 +++------ xarray/core/dataset.py | 9 +++------ xarray/core/duck_array_ops.py | 15 +++++---------- xarray/core/formatting.py | 8 +++++--- xarray/core/indexing.py | 11 ++++------- xarray/core/nputils.py | 7 +++++-- xarray/core/variable.py | 16 ++++++---------- xarray/namedarray/pycompat.py | 9 ++++++--- xarray/namedarray/utils.py | 24 ++++-------------------- xarray/testing.py | 10 +++++----- 10 files changed, 46 insertions(+), 72 deletions(-) diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py index 331941a04b1..fe8098a8ae9 100644 --- a/xarray/core/arithmetic.py +++ b/xarray/core/arithmetic.py @@ -14,12 +14,9 @@ VariableOpsMixin, ) from xarray.core.common import ImplementsArrayReduce, ImplementsDatasetReduce -from xarray.core.ops import ( - IncludeNumpySameMethods, - IncludeReduceMethods, -) +from xarray.core.ops import IncludeNumpySameMethods, IncludeReduceMethods from xarray.core.options import OPTIONS, _get_keep_attrs -from xarray.namedarray.pycompat import is_duck_array +from xarray.namedarray._typing import _arrayfunction_or_api class SupportsArithmetic: @@ -48,7 +45,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): # See the docstring example for numpy.lib.mixins.NDArrayOperatorsMixin. out = kwargs.get("out", ()) for x in inputs + out: - if not is_duck_array(x) and not isinstance( + if not isinstance(x, _arrayfunction_or_api) and not isinstance( x, self._HANDLED_TYPES + (SupportsArithmetic,) ): return NotImplemented diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 71653039714..6721545e24e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -112,17 +112,14 @@ broadcast_variables, calculate_dimensions, ) +from xarray.namedarray._typing import _arrayfunction_or_api from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager -from xarray.namedarray.pycompat import ( - array_type, - is_chunked_array, -) +from xarray.namedarray.pycompat import array_type, is_chunked_array from xarray.namedarray.utils import ( consolidate_dask_from_array_kwargs, either_dict_or_kwargs, is_dict_like, - is_duck_array, is_duck_dask_array, ) from xarray.plot.accessor import DatasetPlotAccessor @@ -2719,7 +2716,7 @@ def _validate_indexers( elif isinstance(v, Sequence) and len(v) == 0: yield k, np.empty((0,), dtype="int64") else: - if not is_duck_array(v): + if not isinstance(v, _arrayfunction_or_api): v = np.asarray(v) if v.dtype.kind in "US": diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 8da3bd23e07..395a4f8ecf2 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -16,17 +16,11 @@ import pandas as pd from numpy import all as array_all # noqa from numpy import any as array_any # noqa -from numpy import ( # noqa +from numpy import ( around, # noqa - einsum, - gradient, isclose, - isin, isnat, - take, - tensordot, - transpose, - unravel_index, + take, # noqa zeros_like, # noqa ) from numpy import concatenate as _concatenate @@ -35,9 +29,10 @@ from xarray.core import dask_array_ops, dtypes, nputils from xarray.core.utils import module_available +from xarray.namedarray._typing import _arrayfunction_or_api from xarray.namedarray.parallelcompat import get_chunked_array_type, is_chunked_array from xarray.namedarray.pycompat import array_type -from xarray.namedarray.utils import is_duck_array, is_duck_dask_array +from xarray.namedarray.utils import is_duck_dask_array dask_available = module_available("dask") @@ -193,7 +188,7 @@ def astype(data, dtype, **kwargs): def asarray(data, xp=np): - return data if is_duck_array(data) else xp.asarray(data) + return data if isinstance(data, _arrayfunction_or_api) else xp.asarray(data) def as_shared_dtype(scalars_or_arrays, xp=np): diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 32ccd05ae01..fbfe4541ee0 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -19,8 +19,8 @@ from xarray.core.duck_array_ops import array_equiv, astype from xarray.core.indexing import MemoryCachedArray from xarray.core.options import OPTIONS, _get_boolean_with_default +from xarray.namedarray._typing import _arrayfunction_or_api from xarray.namedarray.pycompat import array_type, to_duck_array, to_numpy -from xarray.namedarray.utils import is_duck_array if TYPE_CHECKING: from xarray.core.coordinates import AbstractCoordinates @@ -630,7 +630,7 @@ def short_data_repr(array): internal_data = getattr(array, "variable", array)._data if isinstance(array, np.ndarray): return short_array_repr(array) - elif is_duck_array(internal_data): + elif isinstance(internal_data, _arrayfunction_or_api): return limit_lines(repr(array.data), limit=40) elif getattr(array, "_in_memory", None): return short_array_repr(array) @@ -789,7 +789,9 @@ def extra_items_repr(extra_keys, mapping, ab_side, kwargs): is_variable = True except AttributeError: # compare attribute value - if is_duck_array(a_mapping[k]) or is_duck_array(b_mapping[k]): + if isinstance(a_mapping[k], _arrayfunction_or_api) or isinstance( + b_mapping[k], _arrayfunction_or_api + ): compatible = array_equiv(a_mapping[k], b_mapping[k]) else: compatible = a_mapping[k] == b_mapping[k] diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index a77a8734040..89b8ac75941 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -24,13 +24,10 @@ is_scalar, to_0d_array, ) +from xarray.namedarray._typing import _arrayfunction_or_api from xarray.namedarray.parallelcompat import get_chunked_array_type, is_chunked_array from xarray.namedarray.pycompat import array_type, integer_types -from xarray.namedarray.utils import ( - either_dict_or_kwargs, - is_duck_array, - is_duck_dask_array, -) +from xarray.namedarray.utils import either_dict_or_kwargs, is_duck_dask_array if TYPE_CHECKING: from numpy.typing import DTypeLike @@ -377,7 +374,7 @@ def __init__(self, key): k = int(k) elif isinstance(k, slice): k = as_integer_slice(k) - elif is_duck_array(k): + elif isinstance(k, _arrayfunction_or_api): if not np.issubdtype(k.dtype, np.integer): raise TypeError( f"invalid indexer array, does not have integer dtype: {k!r}" @@ -424,7 +421,7 @@ def __init__(self, key): "Please pass a numpy array by calling ``.compute``. " "See https://github.com/dask/dask/issues/8958." ) - elif is_duck_array(k): + elif isinstance(k, _arrayfunction_or_api): if not np.issubdtype(k.dtype, np.integer): raise TypeError( f"invalid indexer array, does not have integer dtype: {k!r}" diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index 9d9f3f6b792..229895fa741 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -14,7 +14,7 @@ from numpy import RankWarning from xarray.core.options import OPTIONS -from xarray.namedarray.pycompat import is_duck_array +from xarray.namedarray._typing import _arrayfunction_or_api try: import bottleneck as bn @@ -143,7 +143,10 @@ def _advanced_indexer_subspaces(key): non_slices = [k for k in key if not isinstance(k, slice)] broadcasted_shape = np.broadcast_shapes( - *[item.shape if is_duck_array(item) else (0,) for item in non_slices] + *[ + item.shape if isinstance(item, _arrayfunction_or_api) else (0,) + for item in non_slices + ] ) ndim = len(broadcasted_shape) mixed_positions = advanced_index_positions[0] + np.arange(ndim) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 9979aaa26a5..2b3b4916c56 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -36,17 +36,13 @@ infix_dims, maybe_coerce_to_str, ) +from xarray.namedarray._typing import _arrayfunction_or_api from xarray.namedarray.core import NamedArray from xarray.namedarray.parallelcompat import get_chunked_array_type -from xarray.namedarray.pycompat import ( - integer_types, - is_0d_dask_array, - is_chunked_array, -) +from xarray.namedarray.pycompat import integer_types, is_0d_dask_array, is_chunked_array from xarray.namedarray.utils import ( either_dict_or_kwargs, is_dict_like, - is_duck_array, is_duck_dask_array, ) @@ -411,7 +407,7 @@ def data(self): Variable.as_numpy Variable.values """ - if is_duck_array(self._data): + if isinstance(self._data, _arrayfunction_or_api): return self._data elif isinstance(self._data, indexing.ExplicitlyIndexed): return self._data.get_duck_array() @@ -636,7 +632,7 @@ def _validate_indexers(self, key): for dim, k in zip(self.dims, key): if not isinstance(k, BASIC_INDEXING_TYPES): if not isinstance(k, Variable): - if not is_duck_array(k): + if not isinstance(k, _arrayfunction_or_api): k = np.asarray(k) if k.ndim > 1: raise IndexError( @@ -681,7 +677,7 @@ def _broadcast_indexes_outer(self, key): if isinstance(k, Variable): k = k.data if not isinstance(k, BASIC_INDEXING_TYPES): - if not is_duck_array(k): + if not isinstance(k, _arrayfunction_or_api): k = np.asarray(k) if k.size == 0: # Slice by empty list; numpy could not infer the dtype @@ -940,7 +936,7 @@ def load(self, **kwargs): self._data = as_compatible_data(loaded_data) elif isinstance(self._data, ExplicitlyIndexed): self._data = self._data.get_duck_array() - elif not is_duck_array(self._data): + elif not isinstance(self._data, _arrayfunction_or_api): self._data = np.asarray(self._data) return self diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py index 5ab7c2dde79..42a4fa6a6b5 100644 --- a/xarray/namedarray/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -8,7 +8,8 @@ from packaging.version import Version from xarray.core.utils import is_scalar -from xarray.namedarray.utils import is_duck_array, is_duck_dask_array +from xarray.namedarray._typing import _arrayfunction_or_api +from xarray.namedarray.utils import is_duck_dask_array integer_types = (int, np.integer) @@ -85,7 +86,9 @@ def mod_version(mod: ModType) -> Version: def is_chunked_array(x) -> bool: - return is_duck_dask_array(x) or (is_duck_array(x) and hasattr(x, "chunks")) + return is_duck_dask_array(x) or ( + isinstance(x, _arrayfunction_or_api) and hasattr(x, "chunks") + ) def is_0d_dask_array(x): @@ -120,7 +123,7 @@ def to_duck_array(data): if isinstance(data, ExplicitlyIndexed): return data.get_duck_array() - elif is_duck_array(data): + elif isinstance(data, _arrayfunction_or_api): return data else: return np.asarray(data) diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index ed04a7a461a..5432b883d4c 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -7,6 +7,8 @@ import numpy as np +from xarray.namedarray._typing import _arrayfunction_or_api + if TYPE_CHECKING: if sys.version_info >= (3, 10): from typing import TypeGuard @@ -15,7 +17,7 @@ from numpy.typing import NDArray - from xarray.namedarray._typing import T_DuckArray, duckarray + from xarray.namedarray._typing import duckarray try: from dask.array.core import Array as DaskArray @@ -26,10 +28,6 @@ T = TypeVar("T") -# https://stackoverflow.com/questions/74633074/how-to-type-hint-a-generic-numpy-array -T_DType_co = TypeVar("T_DType_co", bound=np.dtype[np.generic], covariant=True) -T_DType = TypeVar("T_DType", bound=np.dtype[np.generic]) - # Singleton type, as per https://github.com/python/typing/pull/240 class Default(Enum): @@ -68,21 +66,7 @@ def is_dask_collection(x: object) -> TypeGuard[DaskCollection]: def is_duck_dask_array(x: duckarray[Any, Any]) -> TypeGuard[DaskArray]: - return is_dask_collection(x) - - -def is_duck_array(value: Any) -> TypeGuard[T_DuckArray]: - if isinstance(value, np.ndarray): - return True - return ( - hasattr(value, "ndim") - and hasattr(value, "shape") - and hasattr(value, "dtype") - and ( - (hasattr(value, "__array_function__") and hasattr(value, "__array_ufunc__")) - or hasattr(value, "__array_namespace__") - ) - ) + return isinstance(x, _arrayfunction_or_api) and is_dask_collection(x) def to_0d_object_array( diff --git a/xarray/testing.py b/xarray/testing.py index 25b96e9d26d..72c0cf091cd 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -13,7 +13,7 @@ from xarray.core.dataset import Dataset from xarray.core.indexes import Index, PandasIndex, PandasMultiIndex, default_indexes from xarray.core.variable import IndexVariable, Variable -from xarray.namedarray.utils import is_duck_array +from xarray.namedarray._typing import _arrayfunction_or_api __all__ = ( "assert_allclose", @@ -229,14 +229,14 @@ def assert_duckarray_equal(x, y, err_msg="", verbose=True): """Like `np.testing.assert_array_equal`, but for duckarrays""" __tracebackhide__ = True - if not is_duck_array(x) and not utils.is_scalar(x): + if not isinstance(x, _arrayfunction_or_api) and not utils.is_scalar(x): x = np.asarray(x) - if not is_duck_array(y) and not utils.is_scalar(y): + if not isinstance(y, _arrayfunction_or_api) and not utils.is_scalar(y): y = np.asarray(y) - if (is_duck_array(x) and utils.is_scalar(y)) or ( - utils.is_scalar(x) and is_duck_array(y) + if (isinstance(x, _arrayfunction_or_api) and utils.is_scalar(y)) or ( + utils.is_scalar(x) and isinstance(y, _arrayfunction_or_api) ): equiv = (x == y).all() else: From ed4698cb4b3420abe26106ed7c753041769433d7 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 25 Oct 2023 18:01:33 -0700 Subject: [PATCH 19/54] more typing updates --- xarray/namedarray/daskmanager.py | 3 ++- xarray/namedarray/parallelcompat.py | 10 +++++++--- xarray/namedarray/pycompat.py | 7 ++++--- xarray/namedarray/utils.py | 8 ++++---- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py index 020e67afc2d..12471e8bb97 100644 --- a/xarray/namedarray/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -12,6 +12,7 @@ if TYPE_CHECKING: from xarray.core.types import DaskArray, T_Chunks, T_NormalizedChunks + from xarray.namedarray._typing import duckarray dask_available = module_available("dask") @@ -28,7 +29,7 @@ def __init__(self) -> None: self.array_cls = Array - def is_chunked_array(self, data: Any) -> bool: + def is_chunked_array(self, data: duckarray[Any, Any]) -> bool: return is_duck_dask_array(data) def chunks(self, data: DaskArray) -> T_NormalizedChunks: diff --git a/xarray/namedarray/parallelcompat.py b/xarray/namedarray/parallelcompat.py index 65da905e438..d8685308c63 100644 --- a/xarray/namedarray/parallelcompat.py +++ b/xarray/namedarray/parallelcompat.py @@ -17,7 +17,11 @@ from xarray.namedarray.pycompat import is_chunked_array if TYPE_CHECKING: - from xarray.namedarray._typing import T_DuckArray, _Chunks, _NormalizedChunks + from xarray.namedarray._typing import ( + _Chunks, + _NormalizedChunks, + duckarray, + ) T_ChunkedArray = TypeVar("T_ChunkedArray") @@ -168,7 +172,7 @@ def __init__(self) -> None: """Used to set the array_cls attribute at import time.""" raise NotImplementedError() - def is_chunked_array(self, data: Any) -> bool: + def is_chunked_array(self, data: duckarray[Any, Any]) -> bool: """ Check if the given object is an instance of this type of chunked array. @@ -251,7 +255,7 @@ def normalize_chunks( @abstractmethod def from_array( - self, data: T_DuckArray | np.typing.ArrayLike, chunks: _Chunks, **kwargs + self, data: duckarray[Any, Any], chunks: _Chunks, **kwargs ) -> T_ChunkedArray: """ Create a chunked array from a non-chunked numpy-like array. diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py index 42a4fa6a6b5..24cfcdaf782 100644 --- a/xarray/namedarray/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -16,6 +16,7 @@ if TYPE_CHECKING: ModType = Literal["dask", "pint", "cupy", "sparse", "cubed"] DuckArrayTypes = tuple[type[Any], ...] # TODO: improve this? maybe Generic + from xarray.namedarray._typing import _DType, _ShapeType, duckarray class DuckArrayModule: @@ -91,11 +92,11 @@ def is_chunked_array(x) -> bool: ) -def is_0d_dask_array(x): +def is_0d_dask_array(x) -> bool: return is_duck_dask_array(x) and is_scalar(x) -def to_numpy(data) -> np.ndarray: +def to_numpy(data) -> np.ndarray[_ShapeType, _DType]: from xarray.core.indexing import ExplicitlyIndexed from xarray.namedarray.parallelcompat import get_chunked_array_type @@ -118,7 +119,7 @@ def to_numpy(data) -> np.ndarray: return data -def to_duck_array(data): +def to_duck_array(data) -> duckarray[_ShapeType, _DType]: from xarray.core.indexing import ExplicitlyIndexed if isinstance(data, ExplicitlyIndexed): diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index 5432b883d4c..c1018125ba3 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -129,11 +129,11 @@ def either_dict_or_kwargs( def consolidate_dask_from_array_kwargs( - from_array_kwargs: dict, + from_array_kwargs: dict[Any, Any], name: str | None = None, lock: bool | None = None, inline_array: bool | None = None, -) -> dict: +) -> dict[Any, Any]: """ Merge dask-specific kwargs with arbitrary from_array_kwargs dict. @@ -166,12 +166,12 @@ def consolidate_dask_from_array_kwargs( def _resolve_doubly_passed_kwarg( - kwargs_dict: dict, + kwargs_dict: dict[Any, Any], kwarg_name: str, passed_kwarg_value: str | bool | None, default: bool | None, err_msg_dict_name: str, -) -> dict: +) -> dict[Any, Any]: # if in kwargs_dict but not passed explicitly then just pass kwargs_dict through unaltered if kwarg_name in kwargs_dict and passed_kwarg_value is None: pass From 309cd4d964400c702219f80a11c3e74eee3cecc6 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 26 Oct 2023 12:36:45 -0700 Subject: [PATCH 20/54] more typing --- xarray/backends/common.py | 4 ++-- xarray/namedarray/parallelcompat.py | 13 ++++++++----- xarray/namedarray/pycompat.py | 6 +++--- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index ca393937886..0766e94748a 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -13,8 +13,8 @@ from xarray.conventions import cf_encoder from xarray.core import indexing from xarray.core.utils import FrozenDict, NdimSizeLenMixin, is_remote_uri +from xarray.namedarray._typing import _chunkedarrayfunction_or_api from xarray.namedarray.parallelcompat import get_chunked_array_type -from xarray.namedarray.pycompat import is_chunked_array if TYPE_CHECKING: from io import BufferedIOBase @@ -232,7 +232,7 @@ def __init__(self, lock=None): self.lock = lock def add(self, source, target, region=None): - if is_chunked_array(source): + if isinstance(source, _chunkedarrayfunction_or_api): self.sources.append(source) self.targets.append(target) self.regions.append(region) diff --git a/xarray/namedarray/parallelcompat.py b/xarray/namedarray/parallelcompat.py index d8685308c63..cef1da4afb6 100644 --- a/xarray/namedarray/parallelcompat.py +++ b/xarray/namedarray/parallelcompat.py @@ -19,7 +19,10 @@ if TYPE_CHECKING: from xarray.namedarray._typing import ( _Chunks, + _DType, + _DTypeLike, _NormalizedChunks, + _ShapeType, duckarray, ) @@ -220,9 +223,9 @@ def chunks(self, data: T_ChunkedArray) -> _NormalizedChunks: def normalize_chunks( self, chunks: _Chunks | _NormalizedChunks, - shape: tuple[int, ...] | None = None, + shape: _ShapeType | None = None, limit: int | None = None, - dtype: np.dtype | None = None, + dtype: _DType | None = None, previous_chunks: _NormalizedChunks | None = None, ) -> _NormalizedChunks: """ @@ -359,7 +362,7 @@ def reduction( combine_func: Callable | None = None, aggregate_func: Callable | None = None, axis: int | Sequence[int] | None = None, - dtype: np.dtype | None = None, + dtype: _DTypeLike | None = None, keepdims: bool = False, ) -> T_ChunkedArray: """ @@ -409,7 +412,7 @@ def apply_gufunc( *args: Any, axes: Sequence[tuple[int, ...]] | None = None, keepdims: bool = False, - output_dtypes: Sequence[np.typing.DTypeLike] | None = None, + output_dtypes: Sequence[_DTypeLike] | None = None, vectorize: bool | None = None, **kwargs, ): @@ -492,7 +495,7 @@ def map_blocks( self, func: Callable, *args: Any, - dtype: np.typing.DTypeLike | None = None, + dtype: _DTypeLike | None = None, chunks: tuple[int, ...] | None = None, drop_axis: int | Sequence[int] | None = None, new_axis: int | Sequence[int] | None = None, diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py index 24cfcdaf782..022d33d05d8 100644 --- a/xarray/namedarray/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -86,17 +86,17 @@ def mod_version(mod: ModType) -> Version: return _get_cached_duck_array_module(mod).version -def is_chunked_array(x) -> bool: +def is_chunked_array(x: duckarray[Any, Any]) -> bool: return is_duck_dask_array(x) or ( isinstance(x, _arrayfunction_or_api) and hasattr(x, "chunks") ) -def is_0d_dask_array(x) -> bool: +def is_0d_dask_array(x: duckarray[Any, Any]) -> bool: return is_duck_dask_array(x) and is_scalar(x) -def to_numpy(data) -> np.ndarray[_ShapeType, _DType]: +def to_numpy(data: duckarray[Any, Any]) -> np.ndarray[_ShapeType, _DType]: from xarray.core.indexing import ExplicitlyIndexed from xarray.namedarray.parallelcompat import get_chunked_array_type From 6aa6b8066141d8c61f4ae90b9645cbae83eba0c1 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 26 Oct 2023 12:50:08 -0700 Subject: [PATCH 21/54] fix imports --- xarray/core/duck_array_ops.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 395a4f8ecf2..d23d627db8f 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -18,10 +18,16 @@ from numpy import any as array_any # noqa from numpy import ( around, # noqa + einsum, # noqa + gradient, # noqa isclose, + isin, # noqa isnat, take, # noqa - zeros_like, # noqa + tensordot, # noqa + transpose, # noqa + unravel_index, # noqa + zeros_like, ) from numpy import concatenate as _concatenate from numpy.core.multiarray import normalize_axis_index # type: ignore[attr-defined] @@ -29,6 +35,7 @@ from xarray.core import dask_array_ops, dtypes, nputils from xarray.core.utils import module_available +from xarray.namedarray._array_api import _get_data_namespace from xarray.namedarray._typing import _arrayfunction_or_api from xarray.namedarray.parallelcompat import get_chunked_array_type, is_chunked_array from xarray.namedarray.pycompat import array_type @@ -37,13 +44,6 @@ dask_available = module_available("dask") -def get_array_namespace(x): - if hasattr(x, "__array_namespace__"): - return x.__array_namespace__() - else: - return np - - def _dask_or_eager_func( name, eager_module=np, @@ -121,7 +121,7 @@ def isnull(data): return isnat(data) elif issubclass(scalar_type, np.inexact): # float types use NaN for null - xp = get_array_namespace(data) + xp = _get_data_namespace(data) return xp.isnan(data) elif issubclass(scalar_type, (np.bool_, np.integer, np.character, np.void)): # these types cannot represent missing values @@ -179,7 +179,7 @@ def cumulative_trapezoid(y, x, axis): def astype(data, dtype, **kwargs): if hasattr(data, "__array_namespace__"): - xp = get_array_namespace(data) + xp = _get_data_namespace(data) if xp == np: # numpy currently doesn't have a astype: return data.astype(dtype, **kwargs) @@ -211,7 +211,7 @@ def as_shared_dtype(scalars_or_arrays, xp=np): def broadcast_to(array, shape): - xp = get_array_namespace(array) + xp = _get_data_namespace(array) return xp.broadcast_to(array, shape) @@ -289,7 +289,7 @@ def count(data, axis=None): def sum_where(data, axis=None, dtype=None, where=None): - xp = get_array_namespace(data) + xp = _get_data_namespace(data) if where is not None: a = where_method(xp.zeros_like(data), where, data) else: @@ -300,7 +300,7 @@ def sum_where(data, axis=None, dtype=None, where=None): def where(condition, x, y): """Three argument where() with better dtype promotion rules.""" - xp = get_array_namespace(condition) + xp = _get_data_namespace(condition) return xp.where(condition, *as_shared_dtype([x, y], xp=xp)) @@ -320,19 +320,19 @@ def fillna(data, other): def concatenate(arrays, axis=0): """concatenate() with better dtype promotion rules.""" if hasattr(arrays[0], "__array_namespace__"): - xp = get_array_namespace(arrays[0]) + xp = _get_data_namespace(arrays[0]) return xp.concat(as_shared_dtype(arrays, xp=xp), axis=axis) return _concatenate(as_shared_dtype(arrays), axis=axis) def stack(arrays, axis=0): """stack() with better dtype promotion rules.""" - xp = get_array_namespace(arrays[0]) + xp = _get_data_namespace(arrays[0]) return xp.stack(as_shared_dtype(arrays, xp=xp), axis=axis) def reshape(array, shape): - xp = get_array_namespace(array) + xp = _get_data_namespace(array) return xp.reshape(array, shape) @@ -376,7 +376,7 @@ def f(values, axis=None, skipna=None, **kwargs): if name in ["sum", "prod"]: kwargs.pop("min_count", None) - xp = get_array_namespace(values) + xp = _get_data_namespace(values) func = getattr(xp, name) try: From 8a87810e44f966937e26c206797ddf44c2738c71 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 26 Oct 2023 12:53:40 -0700 Subject: [PATCH 22/54] revert get_array_namespace --- xarray/core/duck_array_ops.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index d23d627db8f..472996eccd4 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -34,16 +34,21 @@ from numpy.lib.stride_tricks import sliding_window_view # noqa from xarray.core import dask_array_ops, dtypes, nputils -from xarray.core.utils import module_available -from xarray.namedarray._array_api import _get_data_namespace from xarray.namedarray._typing import _arrayfunction_or_api from xarray.namedarray.parallelcompat import get_chunked_array_type, is_chunked_array from xarray.namedarray.pycompat import array_type -from xarray.namedarray.utils import is_duck_dask_array +from xarray.namedarray.utils import is_duck_dask_array, module_available dask_available = module_available("dask") +def get_array_namespace(x): + if hasattr(x, "__array_namespace__"): + return x.__array_namespace__() + else: + return np + + def _dask_or_eager_func( name, eager_module=np, @@ -121,7 +126,7 @@ def isnull(data): return isnat(data) elif issubclass(scalar_type, np.inexact): # float types use NaN for null - xp = _get_data_namespace(data) + xp = get_array_namespace(data) return xp.isnan(data) elif issubclass(scalar_type, (np.bool_, np.integer, np.character, np.void)): # these types cannot represent missing values @@ -179,7 +184,7 @@ def cumulative_trapezoid(y, x, axis): def astype(data, dtype, **kwargs): if hasattr(data, "__array_namespace__"): - xp = _get_data_namespace(data) + xp = get_array_namespace(data) if xp == np: # numpy currently doesn't have a astype: return data.astype(dtype, **kwargs) @@ -211,7 +216,7 @@ def as_shared_dtype(scalars_or_arrays, xp=np): def broadcast_to(array, shape): - xp = _get_data_namespace(array) + xp = get_array_namespace(array) return xp.broadcast_to(array, shape) @@ -289,7 +294,7 @@ def count(data, axis=None): def sum_where(data, axis=None, dtype=None, where=None): - xp = _get_data_namespace(data) + xp = get_array_namespace(data) if where is not None: a = where_method(xp.zeros_like(data), where, data) else: @@ -300,7 +305,7 @@ def sum_where(data, axis=None, dtype=None, where=None): def where(condition, x, y): """Three argument where() with better dtype promotion rules.""" - xp = _get_data_namespace(condition) + xp = get_array_namespace(condition) return xp.where(condition, *as_shared_dtype([x, y], xp=xp)) @@ -320,19 +325,19 @@ def fillna(data, other): def concatenate(arrays, axis=0): """concatenate() with better dtype promotion rules.""" if hasattr(arrays[0], "__array_namespace__"): - xp = _get_data_namespace(arrays[0]) + xp = get_array_namespace(arrays[0]) return xp.concat(as_shared_dtype(arrays, xp=xp), axis=axis) return _concatenate(as_shared_dtype(arrays), axis=axis) def stack(arrays, axis=0): """stack() with better dtype promotion rules.""" - xp = _get_data_namespace(arrays[0]) + xp = get_array_namespace(arrays[0]) return xp.stack(as_shared_dtype(arrays, xp=xp), axis=axis) def reshape(array, shape): - xp = _get_data_namespace(array) + xp = get_array_namespace(array) return xp.reshape(array, shape) @@ -376,7 +381,7 @@ def f(values, axis=None, skipna=None, **kwargs): if name in ["sum", "prod"]: kwargs.pop("min_count", None) - xp = _get_data_namespace(values) + xp = get_array_namespace(values) func = getattr(xp, name) try: From 78dec61fdf8c64487f70d561489d638aa5d841e2 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 30 Jan 2024 11:42:35 -0800 Subject: [PATCH 23/54] Update module imports --- xarray/backends/plugins.py | 2 +- xarray/core/_aggregations.py | 3 ++- xarray/core/nputils.py | 4 ++-- xarray/core/utils.py | 30 ---------------------------- xarray/core/variable.py | 2 +- xarray/namedarray/utils.py | 16 ++++++++++++--- xarray/plot/utils.py | 3 ++- xarray/util/generate_aggregations.py | 3 ++- 8 files changed, 23 insertions(+), 40 deletions(-) diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index a62ca6c9862..6ce81a8c7f7 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -9,7 +9,7 @@ from typing import TYPE_CHECKING, Any, Callable from xarray.backends.common import BACKEND_ENTRYPOINTS, BackendEntrypoint -from xarray.core.utils import module_available +from xarray.namedarray.utils import module_available if TYPE_CHECKING: import os diff --git a/xarray/core/_aggregations.py b/xarray/core/_aggregations.py index e214c2c7c5a..c59d50d010b 100644 --- a/xarray/core/_aggregations.py +++ b/xarray/core/_aggregations.py @@ -9,7 +9,8 @@ from xarray.core import duck_array_ops from xarray.core.options import OPTIONS from xarray.core.types import Dims, Self -from xarray.core.utils import contains_only_chunked_or_numpy, module_available +from xarray.core.utils import contains_only_chunked_or_numpy +from xarray.namedarray.utils import module_available if TYPE_CHECKING: from xarray.core.dataarray import DataArray diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index ce8db577ffc..82006b4eb5a 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -7,8 +7,8 @@ import pandas as pd from packaging.version import Version -from xarray.core import pycompat -from xarray.core.utils import module_available +from xarray.namedarray import pycompat +from xarray.namedarray.utils import module_available # remove once numpy 2.0 is the oldest supported version if module_available("numpy", minversion="2.0.0.dev0"): diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 77213291dd7..51d2b32e032 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -37,7 +37,6 @@ import contextlib import functools -import importlib import inspect import io import itertools @@ -73,7 +72,6 @@ import numpy as np import pandas as pd -from packaging.version import Version if TYPE_CHECKING: from xarray.core.types import Dims, ErrorOptionsWithWarn @@ -1127,34 +1125,6 @@ def contains_only_chunked_or_numpy(obj) -> bool: ) -def module_available(module: str, minversion: str | None = None) -> bool: - """Checks whether a module is installed without importing it. - - Use this for a lightweight check and lazy imports. - - Parameters - ---------- - module : str - Name of the module. - minversion : str, optional - Minimum version of the module - - Returns - ------- - available : bool - Whether the module is installed. - """ - if importlib.util.find_spec(module) is None: - return False - - if minversion is not None: - version = importlib.metadata.version(module) - - return Version(version) >= Version(minversion) - - return True - - def find_stack_level(test_mode=False) -> int: """Find the first place in the stack that is not inside xarray or the Python standard library. diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 2481e7aba7c..6fa49672834 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -33,7 +33,6 @@ decode_numpy_dict_values, drop_dims_from_indexers, ensure_us_time_resolution, - infix_dims, maybe_coerce_to_str, ) from xarray.namedarray._typing import _arrayfunction_or_api @@ -42,6 +41,7 @@ from xarray.namedarray.pycompat import integer_types, is_0d_dask_array, is_chunked_array from xarray.namedarray.utils import ( either_dict_or_kwargs, + infix_dims, is_dict_like, is_duck_dask_array, ) diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index 3d324d1dc5f..46d74744d68 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -1,11 +1,13 @@ from __future__ import annotations +import importlib import sys import warnings from collections.abc import Hashable, Iterable, Iterator, Mapping from typing import TYPE_CHECKING, Any, TypeVar, cast import numpy as np +from packaging.version import Version from xarray.namedarray._typing import ( ErrorOptionsWithWarn, @@ -36,7 +38,7 @@ T = TypeVar("T") -def module_available(module: str) -> bool: +def module_available(module: str, minversion: str | None = None) -> bool: """Checks whether a module is installed without importing it. Use this for a lightweight check and lazy imports. @@ -45,15 +47,23 @@ def module_available(module: str) -> bool: ---------- module : str Name of the module. + minversion : str, optional + Minimum version of the module Returns ------- available : bool Whether the module is installed. """ - from importlib.util import find_spec + if importlib.util.find_spec(module) is None: + return False - return find_spec(module) is not None + if minversion is not None: + version = importlib.metadata.version(module) + + return Version(version) >= Version(minversion) + + return True def is_dask_collection(x: object) -> TypeGuard[DaskCollection]: diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 916ac64aff1..3604bd14cb1 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -13,8 +13,9 @@ from xarray.core.indexes import PandasMultiIndex from xarray.core.options import OPTIONS -from xarray.core.utils import is_scalar, module_available +from xarray.core.utils import is_scalar from xarray.namedarray.pycompat import DuckArrayModule +from xarray.namedarray.utils import module_available nc_time_axis_available = module_available("nc_time_axis") diff --git a/xarray/util/generate_aggregations.py b/xarray/util/generate_aggregations.py index e436cd42335..82aae51ecb6 100644 --- a/xarray/util/generate_aggregations.py +++ b/xarray/util/generate_aggregations.py @@ -28,7 +28,8 @@ from xarray.core import duck_array_ops from xarray.core.options import OPTIONS from xarray.core.types import Dims, Self -from xarray.core.utils import contains_only_chunked_or_numpy, module_available +from xarray.core.utils import contains_only_chunked_or_numpy +from xarray.namedarray.utils import module_available if TYPE_CHECKING: from xarray.core.dataarray import DataArray From 5ce08f06530ff5cb74f87e8cb26067c3af7a759e Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 30 Jan 2024 14:18:47 -0800 Subject: [PATCH 24/54] Fix import statements --- xarray/core/dataset.py | 2 +- xarray/core/rolling.py | 3 ++- xarray/core/rolling_exp.py | 4 ++-- xarray/tests/test_coding_times.py | 2 +- xarray/tests/test_plot.py | 2 +- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 158f92c2908..d4105f1fc44 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1539,7 +1539,7 @@ def __getitem__( """ from xarray.core.formatting import shorten_list_repr - if utils.is_dict_like(key): + if is_dict_like(key): return self.isel(**key) if utils.hashable(key): try: diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 7fe4533612b..9ca9e7c890a 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -10,10 +10,11 @@ import numpy as np from packaging.version import Version -from xarray.core import dtypes, duck_array_ops, pycompat, utils +from xarray.core import dtypes, duck_array_ops, utils from xarray.core.arithmetic import CoarsenArithmetic from xarray.core.options import OPTIONS, _get_keep_attrs from xarray.core.types import CoarsenBoundaryOptions, SideOptions, T_Xarray +from xarray.namedarray import pycompat from xarray.namedarray.utils import ( either_dict_or_kwargs, is_dict_like, diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py index 144e26a86b2..72bfd14aceb 100644 --- a/xarray/core/rolling_exp.py +++ b/xarray/core/rolling_exp.py @@ -6,12 +6,12 @@ import numpy as np from packaging.version import Version -from xarray.core import pycompat from xarray.core.computation import apply_ufunc from xarray.core.options import _get_keep_attrs from xarray.core.pdcompat import count_not_none from xarray.core.types import T_DataWithCoords -from xarray.core.utils import module_available +from xarray.namedarray import pycompat +from xarray.namedarray.utils import module_available def _get_alpha( diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 9ece96d03b7..a24f885c0ed 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -33,7 +33,7 @@ from xarray.coding.variables import SerializationWarning from xarray.conventions import _update_bounds_attributes, cf_encoder from xarray.core.common import contains_cftime_datetimes -from xarray.core.pycompat import is_duck_dask_array +from xarray.namedarray.utils import is_duck_dask_array from xarray.testing import assert_equal, assert_identical from xarray.tests import ( FirstElementAccessibleArray, diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 697db9c5e80..1a2b9ab100c 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -15,7 +15,7 @@ import xarray as xr import xarray.plot as xplt from xarray import DataArray, Dataset -from xarray.core.utils import module_available +from xarray.namedarray.utils import module_available from xarray.plot.dataarray_plot import _infer_interval_breaks from xarray.plot.dataset_plot import _infer_meta_data from xarray.plot.utils import ( From 541049f45edeb518a767cb3b23fa53f6045aa508 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 2 Feb 2024 08:35:39 -0800 Subject: [PATCH 25/54] Use is_dask_collection function instead of dask.typing.DaskCollection --- xarray/namedarray/utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index 46d74744d68..b2e90a07ba9 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -68,9 +68,11 @@ def module_available(module: str, minversion: str | None = None) -> bool: def is_dask_collection(x: object) -> TypeGuard[DaskCollection]: if module_available("dask"): - from dask.typing import DaskCollection + from dask.base import is_dask_collection - return isinstance(x, DaskCollection) + # use is_dask_collection function instead of dask.typing.DaskCollection + # see https://github.com/pydata/xarray/pull/8241#discussion_r1476276023 + return is_dask_collection(x) return False From 01c3d24543f374c39542a42201b7904f99517c06 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 2 Feb 2024 12:34:48 -0800 Subject: [PATCH 26/54] revert to using is_duck_array: https://github.com/pydata/xarray/issues/8696 --- xarray/core/arithmetic.py | 4 ++-- xarray/core/dataset.py | 4 ++-- xarray/core/duck_array_ops.py | 5 ++--- xarray/core/formatting.py | 8 +++----- xarray/core/indexing.py | 11 +++++++---- xarray/core/nputils.py | 8 ++------ xarray/core/variable.py | 10 +++++----- xarray/namedarray/_typing.py | 8 ++++++++ xarray/namedarray/daskmanager.py | 14 +++++++------- xarray/namedarray/pycompat.py | 11 ++++------- xarray/namedarray/utils.py | 33 ++++++++++++++++++-------------- 11 files changed, 61 insertions(+), 55 deletions(-) diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py index fe8098a8ae9..900f16845b0 100644 --- a/xarray/core/arithmetic.py +++ b/xarray/core/arithmetic.py @@ -16,7 +16,7 @@ from xarray.core.common import ImplementsArrayReduce, ImplementsDatasetReduce from xarray.core.ops import IncludeNumpySameMethods, IncludeReduceMethods from xarray.core.options import OPTIONS, _get_keep_attrs -from xarray.namedarray._typing import _arrayfunction_or_api +from xarray.namedarray.utils import is_duck_array class SupportsArithmetic: @@ -45,7 +45,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): # See the docstring example for numpy.lib.mixins.NDArrayOperatorsMixin. out = kwargs.get("out", ()) for x in inputs + out: - if not isinstance(x, _arrayfunction_or_api) and not isinstance( + if not is_duck_array(x) and not isinstance( x, self._HANDLED_TYPES + (SupportsArithmetic,) ): return NotImplemented diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d4105f1fc44..346c928e77d 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -115,7 +115,6 @@ broadcast_variables, calculate_dimensions, ) -from xarray.namedarray._typing import _arrayfunction_or_api from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager from xarray.namedarray.pycompat import array_type, is_chunked_array @@ -124,6 +123,7 @@ either_dict_or_kwargs, infix_dims, is_dict_like, + is_duck_array, is_duck_dask_array, ) from xarray.plot.accessor import DatasetPlotAccessor @@ -2746,7 +2746,7 @@ def _validate_indexers( elif isinstance(v, Sequence) and len(v) == 0: yield k, np.empty((0,), dtype="int64") else: - if not isinstance(v, _arrayfunction_or_api): + if not is_duck_array(v): v = np.asarray(v) if v.dtype.kind in "US": diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 29782483876..d30b599a432 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -35,10 +35,9 @@ from xarray.core import dask_array_ops, dtypes, nputils from xarray.core.options import OPTIONS from xarray.namedarray import pycompat -from xarray.namedarray._typing import _arrayfunction_or_api from xarray.namedarray.parallelcompat import get_chunked_array_type, is_chunked_array from xarray.namedarray.pycompat import array_type -from xarray.namedarray.utils import is_duck_dask_array, module_available +from xarray.namedarray.utils import is_duck_array, is_duck_dask_array, module_available # remove once numpy 2.0 is the oldest supported version if module_available("numpy", minversion="2.0.0.dev0"): @@ -216,7 +215,7 @@ def astype(data, dtype, **kwargs): def asarray(data, xp=np): - return data if isinstance(data, _arrayfunction_or_api) else xp.asarray(data) + return data if is_duck_array(data) else xp.asarray(data) def as_shared_dtype(scalars_or_arrays, xp=np): diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index ebf18b0281b..10f74850b90 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -19,8 +19,8 @@ from xarray.core.duck_array_ops import array_equiv, astype from xarray.core.indexing import MemoryCachedArray from xarray.core.options import OPTIONS, _get_boolean_with_default -from xarray.namedarray._typing import _arrayfunction_or_api from xarray.namedarray.pycompat import array_type, to_duck_array, to_numpy +from xarray.namedarray.utils import is_duck_array if TYPE_CHECKING: from xarray.core.coordinates import AbstractCoordinates @@ -630,7 +630,7 @@ def short_data_repr(array): internal_data = getattr(array, "variable", array)._data if isinstance(array, np.ndarray): return short_array_repr(array) - elif isinstance(internal_data, _arrayfunction_or_api): + elif is_duck_array(internal_data): return limit_lines(repr(array.data), limit=40) elif getattr(array, "_in_memory", None): return short_array_repr(array) @@ -791,9 +791,7 @@ def extra_items_repr(extra_keys, mapping, ab_side, kwargs): is_variable = True except AttributeError: # compare attribute value - if isinstance(a_mapping[k], _arrayfunction_or_api) or isinstance( - b_mapping[k], _arrayfunction_or_api - ): + if is_duck_array(a_mapping[k]) or is_duck_array(b_mapping[k]): compatible = array_equiv(a_mapping[k], b_mapping[k]) else: compatible = a_mapping[k] == b_mapping[k] diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 89b8ac75941..a77a8734040 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -24,10 +24,13 @@ is_scalar, to_0d_array, ) -from xarray.namedarray._typing import _arrayfunction_or_api from xarray.namedarray.parallelcompat import get_chunked_array_type, is_chunked_array from xarray.namedarray.pycompat import array_type, integer_types -from xarray.namedarray.utils import either_dict_or_kwargs, is_duck_dask_array +from xarray.namedarray.utils import ( + either_dict_or_kwargs, + is_duck_array, + is_duck_dask_array, +) if TYPE_CHECKING: from numpy.typing import DTypeLike @@ -374,7 +377,7 @@ def __init__(self, key): k = int(k) elif isinstance(k, slice): k = as_integer_slice(k) - elif isinstance(k, _arrayfunction_or_api): + elif is_duck_array(k): if not np.issubdtype(k.dtype, np.integer): raise TypeError( f"invalid indexer array, does not have integer dtype: {k!r}" @@ -421,7 +424,7 @@ def __init__(self, key): "Please pass a numpy array by calling ``.compute``. " "See https://github.com/dask/dask/issues/8958." ) - elif isinstance(k, _arrayfunction_or_api): + elif is_duck_array(k): if not np.issubdtype(k.dtype, np.integer): raise TypeError( f"invalid indexer array, does not have integer dtype: {k!r}" diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index 82006b4eb5a..dd482365de1 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -8,7 +8,7 @@ from packaging.version import Version from xarray.namedarray import pycompat -from xarray.namedarray.utils import module_available +from xarray.namedarray.utils import is_duck_array, module_available # remove once numpy 2.0 is the oldest supported version if module_available("numpy", minversion="2.0.0.dev0"): @@ -27,7 +27,6 @@ from numpy import RankWarning # type: ignore[attr-defined,no-redef,unused-ignore] from xarray.core.options import OPTIONS -from xarray.namedarray._typing import _arrayfunction_or_api try: import bottleneck as bn @@ -147,10 +146,7 @@ def _advanced_indexer_subspaces(key): non_slices = [k for k in key if not isinstance(k, slice)] broadcasted_shape = np.broadcast_shapes( - *[ - item.shape if isinstance(item, _arrayfunction_or_api) else (0,) - for item in non_slices - ] + *[item.shape if is_duck_array(item) else (0,) for item in non_slices] ) ndim = len(broadcasted_shape) mixed_positions = advanced_index_positions[0] + np.arange(ndim) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 6fa49672834..766c67b855a 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -35,7 +35,6 @@ ensure_us_time_resolution, maybe_coerce_to_str, ) -from xarray.namedarray._typing import _arrayfunction_or_api from xarray.namedarray.core import NamedArray, _raise_if_any_duplicate_dimensions from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import integer_types, is_0d_dask_array, is_chunked_array @@ -43,6 +42,7 @@ either_dict_or_kwargs, infix_dims, is_dict_like, + is_duck_array, is_duck_dask_array, ) @@ -407,7 +407,7 @@ def data(self): Variable.as_numpy Variable.values """ - if isinstance(self._data, _arrayfunction_or_api): + if is_duck_array(self._data): return self._data elif isinstance(self._data, indexing.ExplicitlyIndexed): return self._data.get_duck_array() @@ -632,7 +632,7 @@ def _validate_indexers(self, key): for dim, k in zip(self.dims, key): if not isinstance(k, BASIC_INDEXING_TYPES): if not isinstance(k, Variable): - if not isinstance(k, _arrayfunction_or_api): + if not is_duck_array(k): k = np.asarray(k) if k.ndim > 1: raise IndexError( @@ -677,7 +677,7 @@ def _broadcast_indexes_outer(self, key): if isinstance(k, Variable): k = k.data if not isinstance(k, BASIC_INDEXING_TYPES): - if not isinstance(k, _arrayfunction_or_api): + if not is_duck_array(k): k = np.asarray(k) if k.size == 0: # Slice by empty list; numpy could not infer the dtype @@ -936,7 +936,7 @@ def load(self, **kwargs): self._data = as_compatible_data(loaded_data) elif isinstance(self._data, ExplicitlyIndexed): self._data = self._data.get_duck_array() - elif not isinstance(self._data, _arrayfunction_or_api): + elif not is_duck_array(self._data): self._data = np.asarray(self._data) return self diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index bb8e40a9b10..2168f50828e 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -17,6 +17,14 @@ ) import numpy as np +from numpy.typing import NDArray + +try: + from dask.array.core import Array as DaskArray + from dask.typing import DaskCollection +except ImportError: + DaskArray = NDArray # type: ignore + DaskCollection: Any = NDArray # type: ignore # Singleton type, as per https://github.com/python/typing/pull/240 diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py index 3ad34a21860..8e163e68143 100644 --- a/xarray/namedarray/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -11,8 +11,8 @@ from xarray.namedarray.utils import is_duck_dask_array, module_available if TYPE_CHECKING: - from xarray.core.types import DaskArray, T_Chunks, T_NormalizedChunks - from xarray.namedarray._typing import duckarray + from xarray.core.types import T_Chunks + from xarray.namedarray._typing import DaskArray, _NormalizedChunks, duckarray dask_available = module_available("dask") @@ -32,17 +32,17 @@ def __init__(self) -> None: def is_chunked_array(self, data: duckarray[Any, Any]) -> bool: return is_duck_dask_array(data) - def chunks(self, data: DaskArray) -> T_NormalizedChunks: + def chunks(self, data: DaskArray) -> _NormalizedChunks: return data.chunks def normalize_chunks( self, - chunks: T_Chunks | T_NormalizedChunks, + chunks: T_Chunks | _NormalizedChunks, shape: tuple[int, ...] | None = None, limit: int | None = None, dtype: np.dtype | None = None, - previous_chunks: T_NormalizedChunks | None = None, - ) -> T_NormalizedChunks: + previous_chunks: _NormalizedChunks | None = None, + ) -> _NormalizedChunks: """Called by open_dataset""" from dask.array.core import normalize_chunks @@ -220,7 +220,7 @@ def unify_chunks( self, *args: Any, # can't type this as mypy assumes args are all same type, but dask unify_chunks args alternate types **kwargs, - ) -> tuple[dict[str, T_NormalizedChunks], list[DaskArray]]: + ) -> tuple[dict[str, _NormalizedChunks], list[DaskArray]]: from dask.array.core import unify_chunks return unify_chunks(*args, **kwargs) diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py index 0f87e6685ee..e4b1548817c 100644 --- a/xarray/namedarray/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -8,8 +8,7 @@ from packaging.version import Version from xarray.core.utils import is_scalar -from xarray.namedarray._typing import _arrayfunction_or_api -from xarray.namedarray.utils import is_duck_dask_array +from xarray.namedarray.utils import is_duck_array, is_duck_dask_array integer_types = (int, np.integer) @@ -90,9 +89,7 @@ def mod_version(mod: ModType) -> Version: def is_chunked_array(x: duckarray[Any, Any]) -> bool: - return is_duck_dask_array(x) or ( - isinstance(x, _arrayfunction_or_api) and hasattr(x, "chunks") - ) + return is_duck_dask_array(x) or (is_duck_array(x) and hasattr(x, "chunks")) def is_0d_dask_array(x: duckarray[Any, Any]) -> bool: @@ -122,12 +119,12 @@ def to_numpy(data: duckarray[Any, Any]) -> np.ndarray[_ShapeType, _DType]: return data -def to_duck_array(data) -> duckarray[_ShapeType, _DType]: +def to_duck_array(data: Any) -> duckarray[_ShapeType, _DType]: from xarray.core.indexing import ExplicitlyIndexed if isinstance(data, ExplicitlyIndexed): return data.get_duck_array() - elif isinstance(data, _arrayfunction_or_api): + elif is_duck_array(data): return data else: return np.asarray(data) diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index b2e90a07ba9..b7a7d061c27 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -9,11 +9,7 @@ import numpy as np from packaging.version import Version -from xarray.namedarray._typing import ( - ErrorOptionsWithWarn, - _arrayfunction_or_api, - _DimsLike, -) +from xarray.namedarray._typing import ErrorOptionsWithWarn, _DimsLike if TYPE_CHECKING: if sys.version_info >= (3, 10): @@ -23,14 +19,7 @@ from numpy.typing import NDArray - from xarray.namedarray._typing import _Dim, duckarray - - try: - from dask.array.core import Array as DaskArray - from dask.typing import DaskCollection - except ImportError: - DaskArray = NDArray # type: ignore - DaskCollection: Any = NDArray # type: ignore + from xarray.namedarray._typing import DaskArray, DaskCollection, _Dim, duckarray K = TypeVar("K") @@ -76,8 +65,24 @@ def is_dask_collection(x: object) -> TypeGuard[DaskCollection]: return False +def is_duck_array(value: Any) -> TypeGuard[duckarray[Any, Any]]: + # TODO: replace is_duck_array with runtime checks via _arrayfunction_or_api protocol on + # python 3.12 and higher (see https://github.com/pydata/xarray/issues/8696#issuecomment-1924588981) + if isinstance(value, np.ndarray): + return True + return ( + hasattr(value, "ndim") + and hasattr(value, "shape") + and hasattr(value, "dtype") + and ( + (hasattr(value, "__array_function__") and hasattr(value, "__array_ufunc__")) + or hasattr(value, "__array_namespace__") + ) + ) + + def is_duck_dask_array(x: duckarray[Any, Any]) -> TypeGuard[DaskArray]: - return isinstance(x, _arrayfunction_or_api) and is_dask_collection(x) + return is_duck_array(x) and is_dask_collection(x) def to_0d_object_array( From cda1b261c7ddd779d86b8c76f0b929734f6aa432 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 2 Feb 2024 12:59:38 -0800 Subject: [PATCH 27/54] Fix import error in _typing.py --- xarray/namedarray/_typing.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index 2168f50828e..b9a0554d588 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -1,5 +1,6 @@ from __future__ import annotations +import typing from collections.abc import Hashable, Iterable, Mapping, Sequence from enum import Enum from types import ModuleType @@ -19,12 +20,13 @@ import numpy as np from numpy.typing import NDArray -try: - from dask.array.core import Array as DaskArray - from dask.typing import DaskCollection -except ImportError: - DaskArray = NDArray # type: ignore - DaskCollection: Any = NDArray # type: ignore +if typing.TYPE_CHECKING: + try: + from dask.array.core import Array as DaskArray + from dask.typing import DaskCollection + except ImportError: + DaskArray = NDArray # type: ignore + DaskCollection: Any = NDArray # type: ignore # Singleton type, as per https://github.com/python/typing/pull/240 From 57092ecf4a8f552ce88a7015fa3c7b6aca3e7492 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 2 Feb 2024 13:26:18 -0800 Subject: [PATCH 28/54] Update typing imports and add compatibility for Python 3.11 --- xarray/namedarray/_typing.py | 18 +++++++++--------- xarray/namedarray/daskmanager.py | 30 +++++++++++++++++++----------- 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index b9a0554d588..d83b0f2e19e 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -1,6 +1,6 @@ from __future__ import annotations -import typing +import sys from collections.abc import Hashable, Iterable, Mapping, Sequence from enum import Enum from types import ModuleType @@ -18,15 +18,11 @@ ) import numpy as np -from numpy.typing import NDArray -if typing.TYPE_CHECKING: - try: - from dask.array.core import Array as DaskArray - from dask.typing import DaskCollection - except ImportError: - DaskArray = NDArray # type: ignore - DaskCollection: Any = NDArray # type: ignore +if sys.version_info >= (3, 11): + from typing import TypeAlias +else: + from typing_extensions import TypeAlias # Singleton type, as per https://github.com/python/typing/pull/240 @@ -76,6 +72,10 @@ def dtype(self) -> _DType_co: _Chunks = tuple[_Shape, ...] _NormalizedChunks = tuple[tuple[int, ...], ...] +# FYI in some cases we don't allow `None`, which this doesn't take account of. +T_ChunkDim: TypeAlias = Union[int, Literal["auto"], None, tuple[int, ...]] +# We allow the tuple form of this (though arguably we could transition to named dims only) +T_Chunks: TypeAlias = Union[T_ChunkDim, Mapping[Any, T_ChunkDim]] _Dim = Hashable _Dims = tuple[_Dim, ...] diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py index 8e163e68143..7f078bd3dbd 100644 --- a/xarray/namedarray/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -11,8 +11,16 @@ from xarray.namedarray.utils import is_duck_dask_array, module_available if TYPE_CHECKING: - from xarray.core.types import T_Chunks - from xarray.namedarray._typing import DaskArray, _NormalizedChunks, duckarray + from numpy.typing import NDArray + + from xarray.namedarray._typing import T_Chunks, _NormalizedChunks, duckarray + + try: + from dask.array.core import Array as DaskArray + from dask.typing import DaskCollection + except ImportError: + DaskArray = NDArray # type: ignore + DaskCollection: Any = NDArray # type: ignore dask_available = module_available("dask") @@ -81,9 +89,9 @@ def array_api(self) -> Any: def reduction( self, arr: T_ChunkedArray, - func: Callable, - combine_func: Callable | None = None, - aggregate_func: Callable | None = None, + func: Callable[..., Any], + combine_func: Callable[..., Any] | None = None, + aggregate_func: Callable[..., Any] | None = None, axis: int | Sequence[int] | None = None, dtype: np.dtype | None = None, keepdims: bool = False, @@ -102,8 +110,8 @@ def reduction( def scan( self, - func: Callable, - binop: Callable, + func: Callable[..., Any], + binop: Callable[..., Any], ident: float, arr: T_ChunkedArray, axis: int | None = None, @@ -124,7 +132,7 @@ def scan( def apply_gufunc( self, - func: Callable, + func: Callable[..., Any], signature: str, *args: Any, axes: Sequence[tuple[int, ...]] | None = None, @@ -156,7 +164,7 @@ def apply_gufunc( def map_blocks( self, - func: Callable, + func: Callable[..., Any], *args: Any, dtype: np.typing.DTypeLike | None = None, chunks: tuple[int, ...] | None = None, @@ -185,14 +193,14 @@ def map_blocks( def blockwise( self, - func: Callable, + func: Callable[..., Any], out_ind: Iterable, *args: Any, # can't type this as mypy assumes args are all same type, but dask blockwise args alternate types name: str | None = None, token=None, dtype: np.dtype | None = None, - adjust_chunks: dict[Any, Callable] | None = None, + adjust_chunks: dict[Any, Callable[..., Any]] | None = None, new_axes: dict[Any, int] | None = None, align_arrays: bool = True, concatenate: bool | None = None, From f84b3ec10570a1c8402d8bf8c7309ed9ab7f6d8f Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 2 Feb 2024 13:37:27 -0800 Subject: [PATCH 29/54] Add support for TypeAlias in Python 3.11 and fallback to typing_extensions in earlier versions --- xarray/namedarray/_typing.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index d83b0f2e19e..13f4a71919c 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -5,6 +5,7 @@ from enum import Enum from types import ModuleType from typing import ( + TYPE_CHECKING, Any, Callable, Final, @@ -19,10 +20,16 @@ import numpy as np -if sys.version_info >= (3, 11): - from typing import TypeAlias -else: - from typing_extensions import TypeAlias +try: + if sys.version_info >= (3, 11): + from typing import TypeAlias + else: + from typing_extensions import TypeAlias +except ImportError: + if TYPE_CHECKING: + raise + else: + Self: Any = None # Singleton type, as per https://github.com/python/typing/pull/240 From f710504234b1eb59309ce77043f0d164c7d4c31d Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Sat, 3 Feb 2024 18:32:29 -0800 Subject: [PATCH 30/54] fix typing issues --- xarray/coding/strings.py | 3 +- xarray/core/duck_array_ops.py | 4 +- xarray/core/indexing.py | 4 +- xarray/core/missing.py | 3 +- xarray/namedarray/core.py | 12 ++-- xarray/namedarray/daskmanager.py | 96 ++++++++++++++--------------- xarray/namedarray/parallelcompat.py | 78 ++++++++++++----------- xarray/namedarray/pycompat.py | 8 +-- xarray/namedarray/utils.py | 9 ++- 9 files changed, 115 insertions(+), 102 deletions(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 325270a9bd4..857eb9c233b 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -15,7 +15,8 @@ ) from xarray.core import indexing from xarray.core.variable import Variable -from xarray.namedarray.parallelcompat import get_chunked_array_type, is_chunked_array +from xarray.namedarray.parallelcompat import get_chunked_array_type +from xarray.namedarray.pycompat import is_chunked_array def create_vlen_dtype(element_type): diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index d30b599a432..9762134fb55 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -35,8 +35,8 @@ from xarray.core import dask_array_ops, dtypes, nputils from xarray.core.options import OPTIONS from xarray.namedarray import pycompat -from xarray.namedarray.parallelcompat import get_chunked_array_type, is_chunked_array -from xarray.namedarray.pycompat import array_type +from xarray.namedarray.parallelcompat import get_chunked_array_type +from xarray.namedarray.pycompat import array_type, is_chunked_array from xarray.namedarray.utils import is_duck_array, is_duck_dask_array, module_available # remove once numpy 2.0 is the oldest supported version diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index a77a8734040..636ddee7025 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -24,8 +24,8 @@ is_scalar, to_0d_array, ) -from xarray.namedarray.parallelcompat import get_chunked_array_type, is_chunked_array -from xarray.namedarray.pycompat import array_type, integer_types +from xarray.namedarray.parallelcompat import get_chunked_array_type +from xarray.namedarray.pycompat import array_type, integer_types, is_chunked_array from xarray.namedarray.utils import ( either_dict_or_kwargs, is_duck_array, diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 67368f86421..cd26d4bae0e 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -23,7 +23,8 @@ from xarray.core.types import Interp1dOptions, InterpOptions from xarray.core.utils import OrderedSet, is_scalar from xarray.core.variable import Variable, broadcast_variables -from xarray.namedarray.parallelcompat import get_chunked_array_type, is_chunked_array +from xarray.namedarray.parallelcompat import get_chunked_array_type +from xarray.namedarray.pycompat import is_chunked_array if TYPE_CHECKING: from xarray.core.dataarray import DataArray diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index b280b025aaf..eae2fc644ab 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -625,7 +625,7 @@ def __dask_postcompute__( self, ) -> tuple[PostComputeCallable, tuple[Any, ...]]: if is_duck_dask_array(self._data): - array_func, array_args = self._data.__dask_postcompute__() # type: ignore[no-untyped-call] + array_func, array_args = self._data.__dask_postcompute__() # type: ignore return self._dask_finalize, (array_func,) + array_args else: raise AttributeError("Method requires self.data to be a dask array.") @@ -641,7 +641,7 @@ def __dask_postpersist__( ]: if is_duck_dask_array(self._data): a: tuple[PostPersistCallable[Any], tuple[Any, ...]] - a = self._data.__dask_postpersist__() # type: ignore[no-untyped-call] + a = self._data.__dask_postpersist__() # type: ignore array_func, array_args = a return self._dask_finalize, (array_func,) + array_args @@ -750,8 +750,8 @@ def chunk( name: str | None = None, lock: bool | None = None, inline_array: bool | None = None, - chunked_array_type: str | ChunkManagerEntrypoint | None = None, - from_array_kwargs=None, + chunked_array_type: str | ChunkManagerEntrypoint[Any] | None = None, + from_array_kwargs: Any = None, **chunks_kwargs: Any, ) -> Self: """Coerce this array's data into a dask array with the given chunks. @@ -849,7 +849,7 @@ def chunk( # Using OuterIndexer is a pragmatic choice: dask does not yet handle # different indexing types in an explicit way: # https://github.com/dask/dask/issues/2883 - ndata = ImplicitToExplicitIndexingAdapter(data_old, OuterIndexer) + ndata = ImplicitToExplicitIndexingAdapter(data_old, OuterIndexer) # type: ignore if is_dict_like(chunks): chunks = tuple(chunks.get(n, s) for n, s in enumerate(ndata.shape)) @@ -862,7 +862,7 @@ def chunk( return self._replace(data=data_chunked) - def to_numpy(self) -> np.ndarray: + def to_numpy(self) -> np.ndarray[Any, Any]: """Coerces wrapped data to numpy and returns a numpy.ndarray""" # TODO an entrypoint so array libraries can choose coercion method? data = self._data diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py index 7f078bd3dbd..c2ccfe00a32 100644 --- a/xarray/namedarray/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -11,23 +11,19 @@ from xarray.namedarray.utils import is_duck_dask_array, module_available if TYPE_CHECKING: - from numpy.typing import NDArray - - from xarray.namedarray._typing import T_Chunks, _NormalizedChunks, duckarray - - try: - from dask.array.core import Array as DaskArray - from dask.typing import DaskCollection - except ImportError: - DaskArray = NDArray # type: ignore - DaskCollection: Any = NDArray # type: ignore + from xarray.namedarray._typing import ( + T_Chunks, + _DType_co, + _NormalizedChunks, + duckarray, + ) dask_available = module_available("dask") -class DaskManager(ChunkManagerEntrypoint["DaskArray"]): - array_cls: type[DaskArray] +class DaskManager(ChunkManagerEntrypoint[Any]): + array_cls: type[Any] available: bool = dask_available def __init__(self) -> None: @@ -40,17 +36,17 @@ def __init__(self) -> None: def is_chunked_array(self, data: duckarray[Any, Any]) -> bool: return is_duck_dask_array(data) - def chunks(self, data: DaskArray) -> _NormalizedChunks: - return data.chunks + def chunks(self, data: Any) -> _NormalizedChunks: + return data.chunks # type: ignore def normalize_chunks( self, chunks: T_Chunks | _NormalizedChunks, shape: tuple[int, ...] | None = None, limit: int | None = None, - dtype: np.dtype | None = None, + dtype: _DType_co | None = None, previous_chunks: _NormalizedChunks | None = None, - ) -> _NormalizedChunks: + ) -> Any: """Called by open_dataset""" from dask.array.core import normalize_chunks @@ -60,9 +56,9 @@ def normalize_chunks( limit=limit, dtype=dtype, previous_chunks=previous_chunks, - ) + ) # type: ignore - def from_array(self, data: Any, chunks, **kwargs) -> DaskArray: + def from_array(self, data: Any, chunks: Any, **kwargs: Any) -> Any: import dask.array as da if isinstance(data, ImplicitToExplicitIndexingAdapter): @@ -73,12 +69,12 @@ def from_array(self, data: Any, chunks, **kwargs) -> DaskArray: data, chunks, **kwargs, - ) + ) # type: ignore - def compute(self, *data: DaskArray, **kwargs) -> tuple[np.ndarray, ...]: + def compute(self, *data: Any, **kwargs: Any) -> Any: from dask.array import compute - return compute(*data, **kwargs) + return compute(*data, **kwargs) # type: ignore @property def array_api(self) -> Any: @@ -93,9 +89,9 @@ def reduction( combine_func: Callable[..., Any] | None = None, aggregate_func: Callable[..., Any] | None = None, axis: int | Sequence[int] | None = None, - dtype: np.dtype | None = None, + dtype: _DType_co | None = None, keepdims: bool = False, - ) -> T_ChunkedArray: + ) -> Any: from dask.array import reduction return reduction( @@ -106,7 +102,7 @@ def reduction( axis=axis, dtype=dtype, keepdims=keepdims, - ) + ) # type: ignore def scan( self, @@ -115,9 +111,9 @@ def scan( ident: float, arr: T_ChunkedArray, axis: int | None = None, - dtype: np.dtype | None = None, - **kwargs, - ) -> DaskArray: + dtype: _DType_co | None = None, + **kwargs: Any, + ) -> Any: from dask.array.reductions import cumreduction return cumreduction( @@ -128,7 +124,7 @@ def scan( axis=axis, dtype=dtype, **kwargs, - ) + ) # type: ignore def apply_gufunc( self, @@ -138,13 +134,13 @@ def apply_gufunc( axes: Sequence[tuple[int, ...]] | None = None, axis: int | None = None, keepdims: bool = False, - output_dtypes: Sequence[np.typing.DTypeLike] | None = None, + output_dtypes: Sequence[_DType_co] | None = None, output_sizes: dict[str, int] | None = None, vectorize: bool | None = None, allow_rechunk: bool = False, - meta: tuple[np.ndarray, ...] | None = None, - **kwargs, - ): + meta: tuple[np.ndarray[Any, _DType_co], ...] | None = None, + **kwargs: Any, + ) -> Any: from dask.array.gufunc import apply_gufunc return apply_gufunc( @@ -160,18 +156,18 @@ def apply_gufunc( allow_rechunk=allow_rechunk, meta=meta, **kwargs, - ) + ) # type: ignore def map_blocks( self, func: Callable[..., Any], *args: Any, - dtype: np.typing.DTypeLike | None = None, + dtype: _DType_co | None = None, chunks: tuple[int, ...] | None = None, drop_axis: int | Sequence[int] | None = None, new_axis: int | Sequence[int] | None = None, - **kwargs, - ): + **kwargs: Any, + ) -> Any: import dask from dask.array import map_blocks @@ -189,24 +185,24 @@ def map_blocks( drop_axis=drop_axis, new_axis=new_axis, **kwargs, - ) + ) # type: ignore def blockwise( self, func: Callable[..., Any], - out_ind: Iterable, + out_ind: Iterable[Any], *args: Any, # can't type this as mypy assumes args are all same type, but dask blockwise args alternate types name: str | None = None, - token=None, - dtype: np.dtype | None = None, + token: Any | None = None, + dtype: _DType_co | None = None, adjust_chunks: dict[Any, Callable[..., Any]] | None = None, new_axes: dict[Any, int] | None = None, align_arrays: bool = True, concatenate: bool | None = None, - meta=None, - **kwargs, - ): + meta: tuple[np.ndarray[Any, _DType_co], ...] | None = None, + **kwargs: Any, + ) -> Any: from dask.array import blockwise return blockwise( @@ -222,23 +218,23 @@ def blockwise( concatenate=concatenate, meta=meta, **kwargs, - ) + ) # type: ignore def unify_chunks( self, *args: Any, # can't type this as mypy assumes args are all same type, but dask unify_chunks args alternate types - **kwargs, - ) -> tuple[dict[str, _NormalizedChunks], list[DaskArray]]: + **kwargs: Any, + ) -> Any: from dask.array.core import unify_chunks - return unify_chunks(*args, **kwargs) + return unify_chunks(*args, **kwargs) # type: ignore def store( self, - sources: DaskArray | Sequence[DaskArray], + sources: Any | Sequence[Any], targets: Any, - **kwargs, - ): + **kwargs: Any, + ) -> Any: from dask.array import store return store( diff --git a/xarray/namedarray/parallelcompat.py b/xarray/namedarray/parallelcompat.py index 3c37139d305..9a37cdd41f1 100644 --- a/xarray/namedarray/parallelcompat.py +++ b/xarray/namedarray/parallelcompat.py @@ -10,7 +10,7 @@ from abc import ABC, abstractmethod from collections.abc import Iterable, Sequence from importlib.metadata import EntryPoint, entry_points -from typing import TYPE_CHECKING, Any, Callable, Generic, TypeVar +from typing import TYPE_CHECKING, Any, Callable, Generic, Protocol, TypeVar import numpy as np @@ -20,17 +20,23 @@ from xarray.namedarray._typing import ( _Chunks, _DType, - _DTypeLike, + _DType_co, _NormalizedChunks, _ShapeType, duckarray, ) -T_ChunkedArray = TypeVar("T_ChunkedArray") + +class SupportsRechunk(Protocol): + def rechunk(self, chunks: Any, **kwargs: Any) -> Any: + ... + + +T_ChunkedArray = TypeVar("T_ChunkedArray", bound=SupportsRechunk) @functools.lru_cache(maxsize=1) -def list_chunkmanagers() -> dict[str, ChunkManagerEntrypoint]: +def list_chunkmanagers() -> dict[str, ChunkManagerEntrypoint[Any]]: """ Return a dictionary of available chunk managers and their ChunkManagerEntrypoint subclass objects. @@ -54,7 +60,7 @@ def list_chunkmanagers() -> dict[str, ChunkManagerEntrypoint]: def load_chunkmanagers( entrypoints: Sequence[EntryPoint], -) -> dict[str, ChunkManagerEntrypoint]: +) -> dict[str, ChunkManagerEntrypoint[Any]]: """Load entrypoints and instantiate chunkmanagers only once.""" loaded_entrypoints = { @@ -70,8 +76,8 @@ def load_chunkmanagers( def guess_chunkmanager( - manager: str | ChunkManagerEntrypoint | None, -) -> ChunkManagerEntrypoint: + manager: str | ChunkManagerEntrypoint[Any] | None, +) -> ChunkManagerEntrypoint[Any]: """ Get namespace of chunk-handling methods, guessing from what's available. @@ -105,7 +111,7 @@ def guess_chunkmanager( ) -def get_chunked_array_type(*args) -> ChunkManagerEntrypoint: +def get_chunked_array_type(*args: Any) -> ChunkManagerEntrypoint[Any]: """ Detects which parallel backend should be used for given set of arrays. @@ -258,7 +264,7 @@ def normalize_chunks( @abstractmethod def from_array( - self, data: duckarray[Any, Any], chunks: _Chunks, **kwargs + self, data: duckarray[Any, Any], chunks: _Chunks, **kwargs: Any ) -> T_ChunkedArray: """ Create a chunked array from a non-chunked numpy-like array. @@ -286,8 +292,8 @@ def rechunk( self, data: T_ChunkedArray, chunks: _NormalizedChunks | tuple[int, ...] | _Chunks, - **kwargs, - ) -> T_ChunkedArray: + **kwargs: Any, + ) -> Any: """ Changes the chunking pattern of the given array. @@ -314,7 +320,9 @@ def rechunk( return data.rechunk(chunks, **kwargs) @abstractmethod - def compute(self, *data: T_ChunkedArray | Any, **kwargs) -> tuple[np.ndarray, ...]: + def compute( + self, *data: T_ChunkedArray | Any, **kwargs: Any + ) -> tuple[np.ndarray[Any, _DType_co], ...]: """ Computes one or more chunked arrays, returning them as eager numpy arrays. @@ -358,11 +366,11 @@ def array_api(self) -> Any: def reduction( self, arr: T_ChunkedArray, - func: Callable, - combine_func: Callable | None = None, - aggregate_func: Callable | None = None, + func: Callable[..., Any], + combine_func: Callable[..., Any] | None = None, + aggregate_func: Callable[..., Any] | None = None, axis: int | Sequence[int] | None = None, - dtype: _DTypeLike | None = None, + dtype: _DType_co | None = None, keepdims: bool = False, ) -> T_ChunkedArray: """ @@ -406,13 +414,13 @@ def reduction( def scan( self, - func: Callable, - binop: Callable, + func: Callable[..., Any], + binop: Callable[..., Any], ident: float, arr: T_ChunkedArray, axis: int | None = None, - dtype: np.dtype | None = None, - **kwargs, + dtype: _DType_co | None = None, + **kwargs: Any, ) -> T_ChunkedArray: """ General version of a 1D scan, also known as a cumulative array reduction. @@ -444,15 +452,15 @@ def scan( @abstractmethod def apply_gufunc( self, - func: Callable, + func: Callable[..., Any], signature: str, *args: Any, axes: Sequence[tuple[int, ...]] | None = None, keepdims: bool = False, - output_dtypes: Sequence[_DTypeLike] | None = None, + output_dtypes: Sequence[_DType_co] | None = None, vectorize: bool | None = None, - **kwargs, - ): + **kwargs: Any, + ) -> Any: """ Apply a generalized ufunc or similar python function to arrays. @@ -530,14 +538,14 @@ def apply_gufunc( def map_blocks( self, - func: Callable, + func: Callable[..., Any], *args: Any, - dtype: _DTypeLike | None = None, + dtype: _DType_co | None = None, chunks: tuple[int, ...] | None = None, drop_axis: int | Sequence[int] | None = None, new_axis: int | Sequence[int] | None = None, - **kwargs, - ): + **kwargs: Any, + ) -> Any: """ Map a function across all blocks of a chunked array. @@ -578,14 +586,14 @@ def map_blocks( def blockwise( self, - func: Callable, - out_ind: Iterable, + func: Callable[..., Any], + out_ind: Iterable[Any], *args: Any, # can't type this as mypy assumes args are all same type, but dask blockwise args alternate types - adjust_chunks: dict[Any, Callable] | None = None, + adjust_chunks: dict[Any, Callable[..., Any]] | None = None, new_axes: dict[Any, int] | None = None, align_arrays: bool = True, - **kwargs, - ): + **kwargs: Any, + ) -> Any: """ Tensor operation: Generalized inner and outer products. @@ -630,7 +638,7 @@ def blockwise( def unify_chunks( self, *args: Any, # can't type this as mypy assumes args are all same type, but dask unify_chunks args alternate types - **kwargs, + **kwargs: Any, ) -> tuple[dict[str, _NormalizedChunks], list[T_ChunkedArray]]: """ Unify chunks across a sequence of arrays. @@ -654,7 +662,7 @@ def store( sources: T_ChunkedArray | Sequence[T_ChunkedArray], targets: Any, **kwargs: dict[str, Any], - ): + ) -> Any: """ Store chunked arrays in array-like objects, overwriting data in target. diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py index e4b1548817c..9f895ba9577 100644 --- a/xarray/namedarray/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -96,12 +96,12 @@ def is_0d_dask_array(x: duckarray[Any, Any]) -> bool: return is_duck_dask_array(x) and is_scalar(x) -def to_numpy(data: duckarray[Any, Any]) -> np.ndarray[_ShapeType, _DType]: +def to_numpy(data: duckarray[Any, Any]) -> np.ndarray[Any, np.dtype[Any]]: from xarray.core.indexing import ExplicitlyIndexed from xarray.namedarray.parallelcompat import get_chunked_array_type if isinstance(data, ExplicitlyIndexed): - data = data.get_duck_array() + data = data.get_duck_array() # type: ignore # TODO first attempt to call .to_numpy() once some libraries implement it if hasattr(data, "chunks"): @@ -123,8 +123,8 @@ def to_duck_array(data: Any) -> duckarray[_ShapeType, _DType]: from xarray.core.indexing import ExplicitlyIndexed if isinstance(data, ExplicitlyIndexed): - return data.get_duck_array() + return data.get_duck_array() # type: ignore elif is_duck_array(data): return data else: - return np.asarray(data) + return np.asarray(data) # type: ignore diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index b7a7d061c27..07b8ec812be 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -19,7 +19,14 @@ from numpy.typing import NDArray - from xarray.namedarray._typing import DaskArray, DaskCollection, _Dim, duckarray + try: + from dask.array.core import Array as DaskArray + from dask.typing import DaskCollection + except ImportError: + DaskArray = NDArray # type: ignore + DaskCollection: Any = NDArray # type: ignore + + from xarray.namedarray._typing import _Dim, duckarray K = TypeVar("K") From 4b229fe607beb308db41a80e9069bc41163cecf4 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Sat, 3 Feb 2024 19:12:57 -0800 Subject: [PATCH 31/54] fix typing --- xarray/core/dataset.py | 4 +++- xarray/namedarray/parallelcompat.py | 4 ++++ xarray/tests/test_parallelcompat.py | 3 ++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 346c928e77d..e780e63c4d0 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -843,7 +843,9 @@ def load(self, **kwargs) -> Self: chunkmanager = get_chunked_array_type(*lazy_data.values()) # evaluate all the chunked arrays simultaneously - evaluated_data = chunkmanager.compute(*lazy_data.values(), **kwargs) + evaluated_data: tuple[np.ndarray[Any, Any], ...] = chunkmanager.compute( + *lazy_data.values(), **kwargs + ) for k, data in zip(lazy_data, evaluated_data): self.variables[k].data = data diff --git a/xarray/namedarray/parallelcompat.py b/xarray/namedarray/parallelcompat.py index 9a37cdd41f1..d039acc915c 100644 --- a/xarray/namedarray/parallelcompat.py +++ b/xarray/namedarray/parallelcompat.py @@ -31,6 +31,10 @@ class SupportsRechunk(Protocol): def rechunk(self, chunks: Any, **kwargs: Any) -> Any: ... + @property + def dtype(self) -> np.dtype[Any]: + ... + T_ChunkedArray = TypeVar("T_ChunkedArray", bound=SupportsRechunk) diff --git a/xarray/tests/test_parallelcompat.py b/xarray/tests/test_parallelcompat.py index 13cddce872d..d4a4e273bc0 100644 --- a/xarray/tests/test_parallelcompat.py +++ b/xarray/tests/test_parallelcompat.py @@ -6,6 +6,7 @@ import pytest from xarray.core.types import T_Chunks, T_DuckArray, T_NormalizedChunks +from xarray.namedarray._typing import _Chunks from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import ( ChunkManagerEntrypoint, @@ -76,7 +77,7 @@ def normalize_chunks( return normalize_chunks(chunks, shape, limit, dtype, previous_chunks) def from_array( - self, data: T_DuckArray | np.typing.ArrayLike, chunks: T_Chunks, **kwargs + self, data: T_DuckArray | np.typing.ArrayLike, chunks: _Chunks, **kwargs ) -> DummyChunkedArray: from dask import array as da From a34d7a7bc5de7dfd4302279ccf313b40e28831a7 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 6 Feb 2024 18:29:51 -0800 Subject: [PATCH 32/54] Fix type annotations and ignore type errors --- xarray/coding/variables.py | 2 +- xarray/namedarray/core.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index b0425e3d086..adbf32bcbb7 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -163,7 +163,7 @@ def lazy_elemwise_func(array, func: Callable, dtype: np.typing.DTypeLike): if is_chunked_array(array): chunkmanager = get_chunked_array_type(array) - return chunkmanager.map_blocks(func, array, dtype=dtype) + return chunkmanager.map_blocks(func, array, dtype=dtype) # type: ignore else: return _ElementwiseFunctionArray(array, func, dtype) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index d168b135db2..ee4cc28bdc2 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -12,6 +12,7 @@ Generic, Literal, TypeVar, + Union, cast, overload, ) @@ -795,6 +796,8 @@ def chunk( dask.array.from_array """ + chunks = cast(Union[tuple[tuple[int, ...], ...], tuple[int, ...]], chunks) + if chunks is None: warnings.warn( "None value for 'chunks' is deprecated. " @@ -828,7 +831,7 @@ def chunk( data_old = self._data if chunkmanager.is_chunked_array(data_old): - data_chunked = chunkmanager.rechunk(data_old, chunks) + data_chunked = chunkmanager.rechunk(data_old, chunks) # type: ignore else: if not isinstance(data_old, ExplicitlyIndexed): ndata = data_old @@ -846,11 +849,7 @@ def chunk( if is_dict_like(chunks): chunks = tuple(chunks.get(n, s) for n, s in enumerate(ndata.shape)) - data_chunked = chunkmanager.from_array( - ndata, - chunks, - **_from_array_kwargs, - ) + data_chunked = chunkmanager.from_array(ndata, chunks, **_from_array_kwargs) # type: ignore return self._replace(data=data_chunked) @@ -870,7 +869,8 @@ def to_numpy(self) -> np.ndarray[Any, Any]: data = data.magnitude if isinstance(data, array_type("sparse")): data = data.todense() - data = np.asarray(data) + data = np.asarray(data) # type: ignore + data = cast(np.ndarray[Any, Any], data) return data From 4bb66e1b3da769c16d73c5bf0fa1558e4289bbbd Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 8 Feb 2024 19:20:03 -0800 Subject: [PATCH 33/54] use to_duck_array function to xarray.namedarray.pycompat --- xarray/core/variable.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 1c6e82a58e2..c9f1f3aa1ed 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -20,7 +20,6 @@ from xarray.core.common import AbstractArray from xarray.core.indexing import ( BasicIndexer, - ExplicitlyIndexed, OuterIndexer, PandasIndexingAdapter, VectorizedIndexer, @@ -37,7 +36,12 @@ ) from xarray.namedarray.core import NamedArray, _raise_if_any_duplicate_dimensions from xarray.namedarray.parallelcompat import get_chunked_array_type -from xarray.namedarray.pycompat import integer_types, is_0d_dask_array, is_chunked_array +from xarray.namedarray.pycompat import ( + integer_types, + is_0d_dask_array, + is_chunked_array, + to_duck_array, +) from xarray.namedarray.utils import ( either_dict_or_kwargs, infix_dims, @@ -934,10 +938,8 @@ def load(self, **kwargs): chunkmanager = get_chunked_array_type(self._data) loaded_data, *_ = chunkmanager.compute(self._data, **kwargs) self._data = as_compatible_data(loaded_data) - elif isinstance(self._data, ExplicitlyIndexed): - self._data = self._data.get_duck_array() - elif not is_duck_array(self._data): - self._data = np.asarray(self._data) + else: + self._data = to_duck_array(self._data) return self def compute(self, **kwargs): From ec7821f9b257c647ca535812a4b721e170334e12 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 8 Feb 2024 19:41:55 -0800 Subject: [PATCH 34/54] Fix type annotations --- xarray/namedarray/core.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index ee4cc28bdc2..16009dac872 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -620,7 +620,7 @@ def __dask_postcompute__( self, ) -> tuple[PostComputeCallable, tuple[Any, ...]]: if is_duck_dask_array(self._data): - array_func, array_args = self._data.__dask_postcompute__() # type: ignore + array_func, array_args = self._data.__dask_postcompute__() # type: ignore[no-untyped-call] return self._dask_finalize, (array_func,) + array_args else: raise AttributeError("Method requires self.data to be a dask array.") @@ -636,7 +636,7 @@ def __dask_postpersist__( ]: if is_duck_dask_array(self._data): a: tuple[PostPersistCallable[Any], tuple[Any, ...]] - a = self._data.__dask_postpersist__() # type: ignore + a = self._data.__dask_postpersist__() # type: ignore[no-untyped-call] array_func, array_args = a return self._dask_finalize, (array_func,) + array_args @@ -831,7 +831,7 @@ def chunk( data_old = self._data if chunkmanager.is_chunked_array(data_old): - data_chunked = chunkmanager.rechunk(data_old, chunks) # type: ignore + data_chunked = chunkmanager.rechunk(data_old, chunks) # type: ignore[arg-type] else: if not isinstance(data_old, ExplicitlyIndexed): ndata = data_old @@ -844,12 +844,12 @@ def chunk( # Using OuterIndexer is a pragmatic choice: dask does not yet handle # different indexing types in an explicit way: # https://github.com/dask/dask/issues/2883 - ndata = ImplicitToExplicitIndexingAdapter(data_old, OuterIndexer) # type: ignore + ndata = ImplicitToExplicitIndexingAdapter(data_old, OuterIndexer) # type: ignore[assignment] if is_dict_like(chunks): chunks = tuple(chunks.get(n, s) for n, s in enumerate(ndata.shape)) - data_chunked = chunkmanager.from_array(ndata, chunks, **_from_array_kwargs) # type: ignore + data_chunked = chunkmanager.from_array(ndata, chunks, **_from_array_kwargs) # type: ignore[arg-type] return self._replace(data=data_chunked) @@ -869,7 +869,7 @@ def to_numpy(self) -> np.ndarray[Any, Any]: data = data.magnitude if isinstance(data, array_type("sparse")): data = data.todense() - data = np.asarray(data) # type: ignore + data = np.asarray(data) # type: ignore[assignment] data = cast(np.ndarray[Any, Any], data) return data @@ -1055,7 +1055,7 @@ def permute_dims( if not dim: dims = self.dims[::-1] else: - dims = tuple(infix_dims(dim, self.dims, missing_dims)) # type: ignore + dims = tuple(infix_dims(dim, self.dims, missing_dims)) # type: ignore[arg-type] if len(dims) < 2 or dims == self.dims: # no need to transpose if only one dimension @@ -1134,7 +1134,7 @@ def broadcast_to( # Ensure the dimensions are in the correct order ordered_dims = list(broadcast_shape.keys()) ordered_shape = tuple(broadcast_shape[d] for d in ordered_dims) - data = duck_array_ops.broadcast_to(self._data, ordered_shape) # type: ignore # TODO: use array-api-compat function + data = duck_array_ops.broadcast_to(self._data, ordered_shape) # type: ignore[no-untyped-call] # TODO: use array-api-compat function return self._new(data=data, dims=ordered_dims) def expand_dims( From 3459f6067314e891bf2cabf8b5dc9be578d8acc0 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 8 Feb 2024 20:00:32 -0800 Subject: [PATCH 35/54] Refactor code to simplify data loading in Variable.compute() and NamedArray.to_duck_array() --- xarray/core/variable.py | 9 +-------- xarray/namedarray/core.py | 2 +- xarray/namedarray/pycompat.py | 14 ++++++++++---- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index c9f1f3aa1ed..91d307dee54 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -35,11 +35,9 @@ maybe_coerce_to_str, ) from xarray.namedarray.core import NamedArray, _raise_if_any_duplicate_dimensions -from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import ( integer_types, is_0d_dask_array, - is_chunked_array, to_duck_array, ) from xarray.namedarray.utils import ( @@ -934,12 +932,7 @@ def load(self, **kwargs): -------- dask.array.compute """ - if is_chunked_array(self._data): - chunkmanager = get_chunked_array_type(self._data) - loaded_data, *_ = chunkmanager.compute(self._data, **kwargs) - self._data = as_compatible_data(loaded_data) - else: - self._data = to_duck_array(self._data) + self._data = to_duck_array(self._data) return self def compute(self, **kwargs): diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 16009dac872..620e2862f8a 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -844,7 +844,7 @@ def chunk( # Using OuterIndexer is a pragmatic choice: dask does not yet handle # different indexing types in an explicit way: # https://github.com/dask/dask/issues/2883 - ndata = ImplicitToExplicitIndexingAdapter(data_old, OuterIndexer) # type: ignore[assignment] + ndata = ImplicitToExplicitIndexingAdapter(data_old, OuterIndexer) # type: ignore[no-untyped-call, assignment] if is_dict_like(chunks): chunks = tuple(chunks.get(n, s) for n, s in enumerate(ndata.shape)) diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py index 9f895ba9577..fcb24392878 100644 --- a/xarray/namedarray/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -101,10 +101,10 @@ def to_numpy(data: duckarray[Any, Any]) -> np.ndarray[Any, np.dtype[Any]]: from xarray.namedarray.parallelcompat import get_chunked_array_type if isinstance(data, ExplicitlyIndexed): - data = data.get_duck_array() # type: ignore + data = data.get_duck_array() # type: ignore[no-untyped-call] # TODO first attempt to call .to_numpy() once some libraries implement it - if hasattr(data, "chunks"): + if is_chunked_array(data): chunkmanager = get_chunked_array_type(data) data, *_ = chunkmanager.compute(data) if isinstance(data, array_type("cupy")): @@ -121,10 +121,16 @@ def to_numpy(data: duckarray[Any, Any]) -> np.ndarray[Any, np.dtype[Any]]: def to_duck_array(data: Any) -> duckarray[_ShapeType, _DType]: from xarray.core.indexing import ExplicitlyIndexed + from xarray.namedarray.parallelcompat import get_chunked_array_type + + if is_chunked_array(data): + chunkmanager = get_chunked_array_type(data) + loaded_data, *_ = chunkmanager.compute(data) # type: ignore[var-annotated] + return loaded_data if isinstance(data, ExplicitlyIndexed): - return data.get_duck_array() # type: ignore + return data.get_duck_array() # type: ignore[no-untyped-call, no-any-return] elif is_duck_array(data): return data else: - return np.asarray(data) # type: ignore + return np.asarray(data) # type: ignore[return-value] From 02382bd86058aa4c8d2902e674aded7a7ad46879 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 8 Feb 2024 20:03:46 -0800 Subject: [PATCH 36/54] Add to_numpy() function to convert data to numpy array --- xarray/namedarray/core.py | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 620e2862f8a..06242b68f3b 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -41,8 +41,8 @@ _SupportsImag, _SupportsReal, ) -from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager -from xarray.namedarray.pycompat import array_type +from xarray.namedarray.parallelcompat import guess_chunkmanager +from xarray.namedarray.pycompat import to_numpy from xarray.namedarray.utils import ( consolidate_dask_from_array_kwargs, either_dict_or_kwargs, @@ -856,23 +856,7 @@ def chunk( def to_numpy(self) -> np.ndarray[Any, Any]: """Coerces wrapped data to numpy and returns a numpy.ndarray""" # TODO an entrypoint so array libraries can choose coercion method? - data = self._data - - # TODO first attempt to call .to_numpy() once some libraries implement it - if hasattr(data, "chunks"): - chunkmanager = get_chunked_array_type(data) - data, *_ = chunkmanager.compute(data) - if isinstance(data, array_type("cupy")): - data = data.get() - # pint has to be imported dynamically as pint imports xarray - if isinstance(data, array_type("pint")): - data = data.magnitude - if isinstance(data, array_type("sparse")): - data = data.todense() - data = np.asarray(data) # type: ignore[assignment] - data = cast(np.ndarray[Any, Any], data) - - return data + return to_numpy(self._data) def as_numpy(self) -> Self: """Coerces wrapped data into a numpy array, returning a Variable.""" From d3e5e8395572c385e8ef4606312597c3cd769113 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 8 Feb 2024 20:09:44 -0800 Subject: [PATCH 37/54] move DaskManager import --- xarray/core/dataset.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 09906d1e4be..1c29718712a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -115,7 +115,6 @@ broadcast_variables, calculate_dimensions, ) -from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager from xarray.namedarray.pycompat import array_type, is_chunked_array from xarray.namedarray.utils import ( @@ -293,6 +292,9 @@ def _maybe_chunk( chunked_array_type: str | ChunkManagerEntrypoint | None = None, from_array_kwargs=None, ): + + from xarray.namedarray.daskmanager import DaskManager + if chunks is not None: chunks = {dim: chunks[dim] for dim in var.dims if dim in chunks} From 0829d78cf0e5f535e2c15634d725ac36d6ae72df Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 8 Feb 2024 20:25:35 -0800 Subject: [PATCH 38/54] Fix type annotations in DaskManager class --- xarray/namedarray/daskmanager.py | 20 ++++++++++---------- xarray/namedarray/parallelcompat.py | 9 +++++++-- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py index c2ccfe00a32..e5ce7dbd630 100644 --- a/xarray/namedarray/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -37,7 +37,7 @@ def is_chunked_array(self, data: duckarray[Any, Any]) -> bool: return is_duck_dask_array(data) def chunks(self, data: Any) -> _NormalizedChunks: - return data.chunks # type: ignore + return data.chunks # type: ignore[no-any-return] def normalize_chunks( self, @@ -56,7 +56,7 @@ def normalize_chunks( limit=limit, dtype=dtype, previous_chunks=previous_chunks, - ) # type: ignore + ) # type: ignore[no-untyped-call] def from_array(self, data: Any, chunks: Any, **kwargs: Any) -> Any: import dask.array as da @@ -69,12 +69,12 @@ def from_array(self, data: Any, chunks: Any, **kwargs: Any) -> Any: data, chunks, **kwargs, - ) # type: ignore + ) # type: ignore[no-untyped-call] def compute(self, *data: Any, **kwargs: Any) -> Any: from dask.array import compute - return compute(*data, **kwargs) # type: ignore + return compute(*data, **kwargs) # type: ignore[no-untyped-call] @property def array_api(self) -> Any: @@ -102,7 +102,7 @@ def reduction( axis=axis, dtype=dtype, keepdims=keepdims, - ) # type: ignore + ) # type: ignore[no-untyped-call] def scan( self, @@ -124,7 +124,7 @@ def scan( axis=axis, dtype=dtype, **kwargs, - ) # type: ignore + ) # type: ignore[no-untyped-call] def apply_gufunc( self, @@ -156,7 +156,7 @@ def apply_gufunc( allow_rechunk=allow_rechunk, meta=meta, **kwargs, - ) # type: ignore + ) # type: ignore[no-untyped-call] def map_blocks( self, @@ -185,7 +185,7 @@ def map_blocks( drop_axis=drop_axis, new_axis=new_axis, **kwargs, - ) # type: ignore + ) # type: ignore[no-untyped-call] def blockwise( self, @@ -218,7 +218,7 @@ def blockwise( concatenate=concatenate, meta=meta, **kwargs, - ) # type: ignore + ) # type: ignore[no-untyped-call] def unify_chunks( self, @@ -227,7 +227,7 @@ def unify_chunks( ) -> Any: from dask.array.core import unify_chunks - return unify_chunks(*args, **kwargs) # type: ignore + return unify_chunks(*args, **kwargs) # type: ignore[no-untyped-call] def store( self, diff --git a/xarray/namedarray/parallelcompat.py b/xarray/namedarray/parallelcompat.py index 35f3da33020..95eae305480 100644 --- a/xarray/namedarray/parallelcompat.py +++ b/xarray/namedarray/parallelcompat.py @@ -28,14 +28,19 @@ ) -class SupportsRechunk(Protocol): +class ChunkedArrayMixinProtocol(Protocol): def rechunk(self, chunks: Any, **kwargs: Any) -> Any: ... @property def dtype(self) -> np.dtype[Any]: ... + @property + def chunks(self) -> _NormalizedChunks: ... + + def compute(self, *data: Any, **kwargs: Any) -> np.ndarray[Any, Any]: ... + -T_ChunkedArray = TypeVar("T_ChunkedArray", bound=SupportsRechunk) +T_ChunkedArray = TypeVar("T_ChunkedArray", bound=ChunkedArrayMixinProtocol) @functools.lru_cache(maxsize=1) From 521b3193e87c65d7d7a774a4e407682c21ff02d4 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 8 Feb 2024 20:31:39 -0800 Subject: [PATCH 39/54] Update to_numpy and to_duck_array functions --- xarray/core/variable.py | 2 +- xarray/namedarray/pycompat.py | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 91d307dee54..577c72541e2 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -932,7 +932,7 @@ def load(self, **kwargs): -------- dask.array.compute """ - self._data = to_duck_array(self._data) + self._data = to_duck_array(self._data, **kwargs) return self def compute(self, **kwargs): diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py index fcb24392878..3ce33d4d8ea 100644 --- a/xarray/namedarray/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -96,7 +96,9 @@ def is_0d_dask_array(x: duckarray[Any, Any]) -> bool: return is_duck_dask_array(x) and is_scalar(x) -def to_numpy(data: duckarray[Any, Any]) -> np.ndarray[Any, np.dtype[Any]]: +def to_numpy( + data: duckarray[Any, Any], **kwargs: dict[str, Any] +) -> np.ndarray[Any, np.dtype[Any]]: from xarray.core.indexing import ExplicitlyIndexed from xarray.namedarray.parallelcompat import get_chunked_array_type @@ -106,7 +108,7 @@ def to_numpy(data: duckarray[Any, Any]) -> np.ndarray[Any, np.dtype[Any]]: # TODO first attempt to call .to_numpy() once some libraries implement it if is_chunked_array(data): chunkmanager = get_chunked_array_type(data) - data, *_ = chunkmanager.compute(data) + data, *_ = chunkmanager.compute(data, **kwargs) if isinstance(data, array_type("cupy")): data = data.get() # pint has to be imported dynamically as pint imports xarray @@ -119,13 +121,13 @@ def to_numpy(data: duckarray[Any, Any]) -> np.ndarray[Any, np.dtype[Any]]: return data -def to_duck_array(data: Any) -> duckarray[_ShapeType, _DType]: +def to_duck_array(data: Any, **kwargs: dict[str, Any]) -> duckarray[_ShapeType, _DType]: from xarray.core.indexing import ExplicitlyIndexed from xarray.namedarray.parallelcompat import get_chunked_array_type if is_chunked_array(data): chunkmanager = get_chunked_array_type(data) - loaded_data, *_ = chunkmanager.compute(data) # type: ignore[var-annotated] + loaded_data, *_ = chunkmanager.compute(data, **kwargs) # type: ignore[var-annotated] return loaded_data if isinstance(data, ExplicitlyIndexed): From 42a63db46b3c80e1dd25163f446ba650ff2be079 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 8 Feb 2024 21:18:43 -0800 Subject: [PATCH 40/54] Refactor dask-specific kwargs handling in .chunk() method --- xarray/core/dataset.py | 2 +- xarray/core/utils.py | 63 +++++++++++++++++++++++++++++ xarray/core/variable.py | 82 +++++++++++++++++++++++++++++++++++++- xarray/namedarray/core.py | 40 +++---------------- xarray/namedarray/utils.py | 63 ----------------------------- 5 files changed, 150 insertions(+), 100 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 1c29718712a..95ac374251d 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -102,6 +102,7 @@ HybridMappingProxy, OrderedSet, _default, + consolidate_dask_from_array_kwargs, decode_numpy_dict_values, drop_dims_from_indexers, emit_user_level_warning, @@ -118,7 +119,6 @@ from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager from xarray.namedarray.pycompat import array_type, is_chunked_array from xarray.namedarray.utils import ( - consolidate_dask_from_array_kwargs, either_dict_or_kwargs, infix_dims, is_dict_like, diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 8d250080c70..ea579f50ae3 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -1172,3 +1172,66 @@ def emit_user_level_warning(message, category=None): """Emit a warning at the user level by inspecting the stack trace.""" stacklevel = find_stack_level() warnings.warn(message, category=category, stacklevel=stacklevel) + + +def consolidate_dask_from_array_kwargs( + from_array_kwargs: dict[Any, Any], + name: str | None = None, + lock: bool | None = None, + inline_array: bool | None = None, +) -> dict[Any, Any]: + """ + Merge dask-specific kwargs with arbitrary from_array_kwargs dict. + + Temporary function, to be deleted once explicitly passing dask-specific kwargs to .chunk() is deprecated. + """ + + from_array_kwargs = _resolve_doubly_passed_kwarg( + from_array_kwargs, + kwarg_name="name", + passed_kwarg_value=name, + default=None, + err_msg_dict_name="from_array_kwargs", + ) + from_array_kwargs = _resolve_doubly_passed_kwarg( + from_array_kwargs, + kwarg_name="lock", + passed_kwarg_value=lock, + default=False, + err_msg_dict_name="from_array_kwargs", + ) + from_array_kwargs = _resolve_doubly_passed_kwarg( + from_array_kwargs, + kwarg_name="inline_array", + passed_kwarg_value=inline_array, + default=False, + err_msg_dict_name="from_array_kwargs", + ) + + return from_array_kwargs + + +def _resolve_doubly_passed_kwarg( + kwargs_dict: dict[Any, Any], + kwarg_name: str, + passed_kwarg_value: str | bool | None, + default: bool | None, + err_msg_dict_name: str, +) -> dict[Any, Any]: + # if in kwargs_dict but not passed explicitly then just pass kwargs_dict through unaltered + if kwarg_name in kwargs_dict and passed_kwarg_value is None: + pass + # if passed explicitly but not in kwargs_dict then use that + elif kwarg_name not in kwargs_dict and passed_kwarg_value is not None: + kwargs_dict[kwarg_name] = passed_kwarg_value + # if in neither then use default + elif kwarg_name not in kwargs_dict and passed_kwarg_value is None: + kwargs_dict[kwarg_name] = default + # if in both then raise + else: + raise ValueError( + f"argument {kwarg_name} cannot be passed both as a keyword argument and within " + f"the {err_msg_dict_name} dictionary" + ) + + return kwargs_dict diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 577c72541e2..108f5a9ad9d 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -8,7 +8,7 @@ from collections.abc import Hashable, Mapping, Sequence from datetime import timedelta from functools import partial -from typing import TYPE_CHECKING, Any, Callable, NoReturn, cast +from typing import TYPE_CHECKING, Any, Callable, Literal, NoReturn, cast import numpy as np import pandas as pd @@ -29,6 +29,7 @@ from xarray.core.utils import ( OrderedSet, _default, + consolidate_dask_from_array_kwargs, decode_numpy_dict_values, drop_dims_from_indexers, ensure_us_time_resolution, @@ -65,6 +66,8 @@ Self, T_DuckArray, ) + from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint + NON_NANOSECOND_WARNING = ( "Converting non-nanosecond precision {case} values to nanosecond precision. " @@ -2475,6 +2478,83 @@ def _to_dense(self) -> Variable: out = super()._to_dense() return cast("Variable", out) + def chunk( + self, + chunks: int | Literal["auto"] | Mapping[Any, None | int | tuple[int, ...]] = {}, + name: str | None = None, + lock: bool | None = None, + inline_array: bool | None = None, + chunked_array_type: str | ChunkManagerEntrypoint[Any] | None = None, + from_array_kwargs: Any = None, + **chunks_kwargs: Any, + ) -> Self: # type: ignore[override] + """Coerce this array's data into a dask array with the given chunks. + + If this variable is a non-dask array, it will be converted to dask + array. If it's a dask array, it will be rechunked to the given chunk + sizes. + + If neither chunks is not provided for one or more dimensions, chunk + sizes along that dimension will not be updated; non-dask arrays will be + converted into dask arrays with a single block. + + Parameters + ---------- + chunks : int, tuple or dict, optional + Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or + ``{'x': 5, 'y': 5}``. + name : str, optional + Used to generate the name for this array in the internal dask + graph. Does not need not be unique. + lock : bool, default: False + Passed on to :py:func:`dask.array.from_array`, if the array is not + already as dask array. + inline_array : bool, default: False + Passed on to :py:func:`dask.array.from_array`, if the array is not + already as dask array. + chunked_array_type: str, optional + Which chunked array type to coerce this datasets' arrays to. + Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEntrypoint` system. + Experimental API that should not be relied upon. + from_array_kwargs: dict, optional + Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create + chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg. + For example, with dask as the default chunked array type, this method would pass additional kwargs + to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon. + **chunks_kwargs : {dim: chunks, ...}, optional + The keyword arguments form of ``chunks``. + One of chunks or chunks_kwargs must be provided. + + Returns + ------- + chunked : xarray.Variable + + See Also + -------- + Variable.chunks + Variable.chunksizes + xarray.unify_chunks + dask.array.from_array + """ + + if from_array_kwargs is None: + from_array_kwargs = {} + + # TODO deprecate passing these dask-specific arguments explicitly. In future just pass everything via from_array_kwargs + _from_array_kwargs = consolidate_dask_from_array_kwargs( + from_array_kwargs, + name=name, + lock=lock, + inline_array=inline_array, + ) + + return super().chunk( + chunks=chunks, + chunked_array_type=chunked_array_type, + from_array_kwargs=_from_array_kwargs, + **chunks_kwargs, + ) + class IndexVariable(Variable): """Wrapper for accommodating a pandas.Index in an xarray.Variable. diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 06242b68f3b..29722690437 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -12,7 +12,6 @@ Generic, Literal, TypeVar, - Union, cast, overload, ) @@ -44,7 +43,6 @@ from xarray.namedarray.parallelcompat import guess_chunkmanager from xarray.namedarray.pycompat import to_numpy from xarray.namedarray.utils import ( - consolidate_dask_from_array_kwargs, either_dict_or_kwargs, infix_dims, is_dict_like, @@ -733,16 +731,7 @@ def sizes(self) -> dict[_Dim, _IntOrUnknown]: def chunk( self, - chunks: ( - int - | Literal["auto"] - | tuple[int, ...] - | tuple[tuple[int, ...], ...] - | Mapping[Any, None | int | tuple[int, ...]] - ) = {}, - name: str | None = None, - lock: bool | None = None, - inline_array: bool | None = None, + chunks: int | Literal["auto"] | Mapping[Any, None | int | tuple[int, ...]] = {}, chunked_array_type: str | ChunkManagerEntrypoint[Any] | None = None, from_array_kwargs: Any = None, **chunks_kwargs: Any, @@ -762,15 +751,6 @@ def chunk( chunks : int, tuple or dict, optional Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or ``{'x': 5, 'y': 5}``. - name : str, optional - Used to generate the name for this array in the internal dask - graph. Does not need not be unique. - lock : bool, default: False - Passed on to :py:func:`dask.array.from_array`, if the array is not - already as dask array. - inline_array : bool, default: False - Passed on to :py:func:`dask.array.from_array`, if the array is not - already as dask array. chunked_array_type: str, optional Which chunked array type to coerce this datasets' arrays to. Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEntrypoint` system. @@ -796,7 +776,8 @@ def chunk( dask.array.from_array """ - chunks = cast(Union[tuple[tuple[int, ...], ...], tuple[int, ...]], chunks) + if from_array_kwargs is None: + from_array_kwargs = {} if chunks is None: warnings.warn( @@ -818,17 +799,6 @@ def chunk( chunkmanager = guess_chunkmanager(chunked_array_type) - if from_array_kwargs is None: - from_array_kwargs = {} - - # TODO deprecate passing these dask-specific arguments explicitly. In future just pass everything via from_array_kwargs - _from_array_kwargs = consolidate_dask_from_array_kwargs( - from_array_kwargs, - name=name, - lock=lock, - inline_array=inline_array, - ) - data_old = self._data if chunkmanager.is_chunked_array(data_old): data_chunked = chunkmanager.rechunk(data_old, chunks) # type: ignore[arg-type] @@ -847,9 +817,9 @@ def chunk( ndata = ImplicitToExplicitIndexingAdapter(data_old, OuterIndexer) # type: ignore[no-untyped-call, assignment] if is_dict_like(chunks): - chunks = tuple(chunks.get(n, s) for n, s in enumerate(ndata.shape)) + chunks = tuple(chunks.get(n, s) for n, s in enumerate(ndata.shape)) # type: ignore[assignment] - data_chunked = chunkmanager.from_array(ndata, chunks, **_from_array_kwargs) # type: ignore[arg-type] + data_chunked = chunkmanager.from_array(ndata, chunks, **from_array_kwargs) # type: ignore[arg-type] return self._replace(data=data_chunked) diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index db1be996f73..0326a6173cd 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -222,66 +222,3 @@ def __dask_tokenize__(self) -> Hashable: from dask.base import normalize_token return normalize_token((type(self), self._value)) # type: ignore[no-any-return] - - -def consolidate_dask_from_array_kwargs( - from_array_kwargs: dict[Any, Any], - name: str | None = None, - lock: bool | None = None, - inline_array: bool | None = None, -) -> dict[Any, Any]: - """ - Merge dask-specific kwargs with arbitrary from_array_kwargs dict. - - Temporary function, to be deleted once explicitly passing dask-specific kwargs to .chunk() is deprecated. - """ - - from_array_kwargs = _resolve_doubly_passed_kwarg( - from_array_kwargs, - kwarg_name="name", - passed_kwarg_value=name, - default=None, - err_msg_dict_name="from_array_kwargs", - ) - from_array_kwargs = _resolve_doubly_passed_kwarg( - from_array_kwargs, - kwarg_name="lock", - passed_kwarg_value=lock, - default=False, - err_msg_dict_name="from_array_kwargs", - ) - from_array_kwargs = _resolve_doubly_passed_kwarg( - from_array_kwargs, - kwarg_name="inline_array", - passed_kwarg_value=inline_array, - default=False, - err_msg_dict_name="from_array_kwargs", - ) - - return from_array_kwargs - - -def _resolve_doubly_passed_kwarg( - kwargs_dict: dict[Any, Any], - kwarg_name: str, - passed_kwarg_value: str | bool | None, - default: bool | None, - err_msg_dict_name: str, -) -> dict[Any, Any]: - # if in kwargs_dict but not passed explicitly then just pass kwargs_dict through unaltered - if kwarg_name in kwargs_dict and passed_kwarg_value is None: - pass - # if passed explicitly but not in kwargs_dict then use that - elif kwarg_name not in kwargs_dict and passed_kwarg_value is not None: - kwargs_dict[kwarg_name] = passed_kwarg_value - # if in neither then use default - elif kwarg_name not in kwargs_dict and passed_kwarg_value is None: - kwargs_dict[kwarg_name] = default - # if in both then raise - else: - raise ValueError( - f"argument {kwarg_name} cannot be passed both as a keyword argument and within " - f"the {err_msg_dict_name} dictionary" - ) - - return kwargs_dict From 564ddcf388ad8409c7a9139740d989c8ebedb92d Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 8 Feb 2024 22:00:54 -0800 Subject: [PATCH 41/54] fix type annotations --- xarray/core/variable.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 108f5a9ad9d..24ac0e042ad 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2478,7 +2478,7 @@ def _to_dense(self) -> Variable: out = super()._to_dense() return cast("Variable", out) - def chunk( + def chunk( # type: ignore[override] self, chunks: int | Literal["auto"] | Mapping[Any, None | int | tuple[int, ...]] = {}, name: str | None = None, @@ -2487,7 +2487,7 @@ def chunk( chunked_array_type: str | ChunkManagerEntrypoint[Any] | None = None, from_array_kwargs: Any = None, **chunks_kwargs: Any, - ) -> Self: # type: ignore[override] + ) -> Self: """Coerce this array's data into a dask array with the given chunks. If this variable is a non-dask array, it will be converted to dask From 5f9bcfc3f1f5f87fe316e7c3056f1ce9b102a068 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 9 Feb 2024 15:17:57 -0800 Subject: [PATCH 42/54] Fix type annotations in DaskManager --- xarray/namedarray/daskmanager.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py index e5ce7dbd630..bb41b0ea341 100644 --- a/xarray/namedarray/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -1,7 +1,7 @@ from __future__ import annotations from collections.abc import Iterable, Sequence -from typing import TYPE_CHECKING, Any, Callable +from typing import TYPE_CHECKING, Any, Callable, cast import numpy as np from packaging.version import Version @@ -18,12 +18,17 @@ duckarray, ) + try: + from dask.array import Array as DaskArray + except ImportError: + DaskArray = np.ndarray[Any, Any] # type: ignore[assignment, misc] + dask_available = module_available("dask") -class DaskManager(ChunkManagerEntrypoint[Any]): - array_cls: type[Any] +class DaskManager(ChunkManagerEntrypoint["DaskArray"]): # type: ignore[type-var] + array_cls: type[DaskArray] available: bool = dask_available def __init__(self) -> None: @@ -82,7 +87,7 @@ def array_api(self) -> Any: return da - def reduction( + def reduction( # type: ignore[override] self, arr: T_ChunkedArray, func: Callable[..., Any], @@ -91,7 +96,7 @@ def reduction( axis: int | Sequence[int] | None = None, dtype: _DType_co | None = None, keepdims: bool = False, - ) -> Any: + ) -> DaskArray | Any: from dask.array import reduction return reduction( @@ -104,7 +109,7 @@ def reduction( keepdims=keepdims, ) # type: ignore[no-untyped-call] - def scan( + def scan( # type: ignore[override] self, func: Callable[..., Any], binop: Callable[..., Any], @@ -113,7 +118,7 @@ def scan( axis: int | None = None, dtype: _DType_co | None = None, **kwargs: Any, - ) -> Any: + ) -> DaskArray | Any: from dask.array.reductions import cumreduction return cumreduction( @@ -202,7 +207,7 @@ def blockwise( concatenate: bool | None = None, meta: tuple[np.ndarray[Any, _DType_co], ...] | None = None, **kwargs: Any, - ) -> Any: + ) -> DaskArray | Any: from dask.array import blockwise return blockwise( @@ -224,10 +229,13 @@ def unify_chunks( self, *args: Any, # can't type this as mypy assumes args are all same type, but dask unify_chunks args alternate types **kwargs: Any, - ) -> Any: + ) -> tuple[dict[str, _NormalizedChunks], list[DaskArray]]: from dask.array.core import unify_chunks - return unify_chunks(*args, **kwargs) # type: ignore[no-untyped-call] + return cast( + tuple[dict[str, _NormalizedChunks], list[DaskArray]], + unify_chunks(*args, **kwargs), # type: ignore[no-untyped-call] + ) def store( self, From 9f0ad6eae41ad08d6eb29ad38dbb558c984cd02f Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 9 Feb 2024 15:20:39 -0800 Subject: [PATCH 43/54] Update DaskManager's from_array method signature --- xarray/namedarray/daskmanager.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py index bb41b0ea341..d8b4990d980 100644 --- a/xarray/namedarray/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -63,7 +63,9 @@ def normalize_chunks( previous_chunks=previous_chunks, ) # type: ignore[no-untyped-call] - def from_array(self, data: Any, chunks: Any, **kwargs: Any) -> Any: + def from_array( + self, data: Any, chunks: T_Chunks | _NormalizedChunks, **kwargs: Any + ) -> DaskArray | Any: import dask.array as da if isinstance(data, ImplicitToExplicitIndexingAdapter): From a7016eec8642680b6e82224f99fa2109db914b89 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 9 Feb 2024 15:29:10 -0800 Subject: [PATCH 44/54] Update compute method return type --- xarray/namedarray/daskmanager.py | 6 ++++-- xarray/namedarray/parallelcompat.py | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py index d8b4990d980..a2594c1da9b 100644 --- a/xarray/namedarray/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -78,10 +78,12 @@ def from_array( **kwargs, ) # type: ignore[no-untyped-call] - def compute(self, *data: Any, **kwargs: Any) -> Any: + def compute( + self, *data: Any, **kwargs: Any + ) -> tuple[np.ndarray[Any, _DType_co], ...]: from dask.array import compute - return compute(*data, **kwargs) # type: ignore[no-untyped-call] + return compute(*data, **kwargs) # type: ignore[no-untyped-call, no-any-return] @property def array_api(self) -> Any: diff --git a/xarray/namedarray/parallelcompat.py b/xarray/namedarray/parallelcompat.py index 95eae305480..c6263bff4ff 100644 --- a/xarray/namedarray/parallelcompat.py +++ b/xarray/namedarray/parallelcompat.py @@ -37,7 +37,9 @@ def dtype(self) -> np.dtype[Any]: ... @property def chunks(self) -> _NormalizedChunks: ... - def compute(self, *data: Any, **kwargs: Any) -> np.ndarray[Any, Any]: ... + def compute( + self, *data: Any, **kwargs: Any + ) -> tuple[np.ndarray[Any, _DType_co], ...]: ... T_ChunkedArray = TypeVar("T_ChunkedArray", bound=ChunkedArrayMixinProtocol) From be7109c485a78f81452fe5768e5014c0cc9e2cfa Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 9 Feb 2024 15:32:03 -0800 Subject: [PATCH 45/54] Refactor DaskManager's unify_chunks method*** --- xarray/namedarray/daskmanager.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py index a2594c1da9b..e20c042f195 100644 --- a/xarray/namedarray/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -1,7 +1,7 @@ from __future__ import annotations from collections.abc import Iterable, Sequence -from typing import TYPE_CHECKING, Any, Callable, cast +from typing import TYPE_CHECKING, Any, Callable import numpy as np from packaging.version import Version @@ -236,10 +236,7 @@ def unify_chunks( ) -> tuple[dict[str, _NormalizedChunks], list[DaskArray]]: from dask.array.core import unify_chunks - return cast( - tuple[dict[str, _NormalizedChunks], list[DaskArray]], - unify_chunks(*args, **kwargs), # type: ignore[no-untyped-call] - ) + return (unify_chunks(*args, **kwargs),) # type: ignore[return-value, no-untyped-call] def store( self, From 6ac37b34a61ef2db4b9f4e114b9c8f7047de851d Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 9 Feb 2024 15:42:28 -0800 Subject: [PATCH 46/54] Fix return value in DaskManager --- xarray/namedarray/daskmanager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py index e20c042f195..14744d2de6b 100644 --- a/xarray/namedarray/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -236,7 +236,7 @@ def unify_chunks( ) -> tuple[dict[str, _NormalizedChunks], list[DaskArray]]: from dask.array.core import unify_chunks - return (unify_chunks(*args, **kwargs),) # type: ignore[return-value, no-untyped-call] + return unify_chunks(*args, **kwargs) # type: ignore[no-any-return, no-untyped-call] def store( self, From 42210a89ef55c366ce59fa813a306385e8642c1e Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 9 Feb 2024 15:49:40 -0800 Subject: [PATCH 47/54] Fix imports and use is_chunked_array instead of _chunkedarrayfunction_or_api --- xarray/backends/common.py | 4 ++-- xarray/coding/variables.py | 2 +- xarray/core/duck_array_ops.py | 14 +++++++------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index edf457f908c..5b7cdc4cf50 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -13,8 +13,8 @@ from xarray.conventions import cf_encoder from xarray.core import indexing from xarray.core.utils import FrozenDict, NdimSizeLenMixin, is_remote_uri -from xarray.namedarray._typing import _chunkedarrayfunction_or_api from xarray.namedarray.parallelcompat import get_chunked_array_type +from xarray.namedarray.pycompat import is_chunked_array if TYPE_CHECKING: from io import BufferedIOBase @@ -232,7 +232,7 @@ def __init__(self, lock=None): self.lock = lock def add(self, source, target, region=None): - if isinstance(source, _chunkedarrayfunction_or_api): + if is_chunked_array(source): self.sources.append(source) self.targets.append(target) self.regions.append(region) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index adbf32bcbb7..b5e4167f2b2 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -163,7 +163,7 @@ def lazy_elemwise_func(array, func: Callable, dtype: np.typing.DTypeLike): if is_chunked_array(array): chunkmanager = get_chunked_array_type(array) - return chunkmanager.map_blocks(func, array, dtype=dtype) # type: ignore + return chunkmanager.map_blocks(func, array, dtype=dtype) # type: ignore[arg-type] else: return _ElementwiseFunctionArray(array, func, dtype) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 550e7511d9a..908c36aed72 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -17,17 +17,17 @@ import pandas as pd from numpy import all as array_all # noqa from numpy import any as array_any # noqa -from numpy import ( +from numpy import ( # noqa around, # noqa full_like, - gradient, # noqa + gradient, isclose, - isin, # noqa + isin, isnat, - take, # noqa - tensordot, # noqa - transpose, # noqa - unravel_index, # noqa + take, + tensordot, + transpose, + unravel_index, ) from numpy import concatenate as _concatenate from numpy.lib.stride_tricks import sliding_window_view # noqa From 5c78d49bc39f1be2c9f29c50198e8f531415cd03 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 9 Feb 2024 15:55:03 -0800 Subject: [PATCH 48/54] use is_duck_array --- xarray/testing/assertions.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/xarray/testing/assertions.py b/xarray/testing/assertions.py index 0a675dc66e3..2986dd4e3d2 100644 --- a/xarray/testing/assertions.py +++ b/xarray/testing/assertions.py @@ -14,7 +14,7 @@ from xarray.core.dataset import Dataset from xarray.core.indexes import Index, PandasIndex, PandasMultiIndex, default_indexes from xarray.core.variable import IndexVariable, Variable -from xarray.namedarray._typing import _arrayfunction_or_api +from xarray.namedarray.utils import is_duck_array def ensure_warnings(func): @@ -221,14 +221,14 @@ def assert_duckarray_equal(x, y, err_msg="", verbose=True): """Like `np.testing.assert_array_equal`, but for duckarrays""" __tracebackhide__ = True - if not isinstance(x, _arrayfunction_or_api) and not utils.is_scalar(x): + if not is_duck_array(x) and not utils.is_scalar(x): x = np.asarray(x) - if not isinstance(y, _arrayfunction_or_api) and not utils.is_scalar(y): + if not is_duck_array(x) and not utils.is_scalar(y): y = np.asarray(y) - if (isinstance(x, _arrayfunction_or_api) and utils.is_scalar(y)) or ( - utils.is_scalar(x) and isinstance(y, _arrayfunction_or_api) + if (is_duck_array(x) and utils.is_scalar(y)) or ( + utils.is_scalar(x) and is_duck_array(y) ): equiv = (x == y).all() else: From 30487ff13b21ddbe457936dbe549e262f3728692 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 9 Feb 2024 15:56:51 -0800 Subject: [PATCH 49/54] Fix incorrect variable name in assert_duckarray_equal function --- xarray/testing/assertions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/testing/assertions.py b/xarray/testing/assertions.py index 2986dd4e3d2..541ff66788d 100644 --- a/xarray/testing/assertions.py +++ b/xarray/testing/assertions.py @@ -224,7 +224,7 @@ def assert_duckarray_equal(x, y, err_msg="", verbose=True): if not is_duck_array(x) and not utils.is_scalar(x): x = np.asarray(x) - if not is_duck_array(x) and not utils.is_scalar(y): + if not is_duck_array(y) and not utils.is_scalar(y): y = np.asarray(y) if (is_duck_array(x) and utils.is_scalar(y)) or ( From 76b7e84662eea15903091d64f22e1a5f8dc686f2 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 12 Feb 2024 13:01:28 -0800 Subject: [PATCH 50/54] try preserving import structure --- xarray/backends/plugins.py | 2 +- xarray/backends/pydap_.py | 2 +- xarray/core/_aggregations.py | 1189 +------------------------- xarray/core/alignment.py | 3 +- xarray/core/common.py | 2 +- xarray/core/computation.py | 3 +- xarray/core/coordinates.py | 2 +- xarray/core/dataarray.py | 45 +- xarray/core/dataset.py | 108 ++- xarray/core/duck_array_ops.py | 2 +- xarray/core/formatting.py | 2 +- xarray/core/groupby.py | 2 +- xarray/core/indexes.py | 2 +- xarray/core/indexing.py | 8 +- xarray/core/nputils.py | 2 +- xarray/core/parallel.py | 2 +- xarray/core/rolling.py | 28 +- xarray/core/rolling_exp.py | 2 +- xarray/core/utils.py | 88 +- xarray/core/variable.py | 12 +- xarray/plot/utils.py | 3 +- xarray/testing/assertions.py | 9 +- xarray/tests/test_coding_times.py | 2 +- xarray/tests/test_utils.py | 3 +- xarray/util/generate_aggregations.py | 3 +- 25 files changed, 131 insertions(+), 1395 deletions(-) diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index 6ce81a8c7f7..a62ca6c9862 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -9,7 +9,7 @@ from typing import TYPE_CHECKING, Any, Callable from xarray.backends.common import BACKEND_ENTRYPOINTS, BackendEntrypoint -from xarray.namedarray.utils import module_available +from xarray.core.utils import module_available if TYPE_CHECKING: import os diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index f14a32a5dde..5a475a7c3be 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -18,11 +18,11 @@ Frozen, FrozenDict, close_on_error, + is_dict_like, is_remote_uri, ) from xarray.core.variable import Variable from xarray.namedarray.pycompat import integer_types -from xarray.namedarray.utils import is_dict_like if TYPE_CHECKING: import os diff --git a/xarray/core/_aggregations.py b/xarray/core/_aggregations.py index c8510886d13..cea544873b7 100644 --- a/xarray/core/_aggregations.py +++ b/xarray/core/_aggregations.py @@ -10,8 +10,7 @@ from xarray.core import duck_array_ops from xarray.core.options import OPTIONS from xarray.core.types import Dims, Self -from xarray.core.utils import contains_only_chunked_or_numpy -from xarray.namedarray.utils import module_available +from xarray.core.utils import contains_only_chunked_or_numpy, module_available if TYPE_CHECKING: from xarray.core.dataarray import DataArray @@ -85,19 +84,8 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.count() - Size: 8B - Dimensions: () - Data variables: - da int64 8B 5 """ return self.reduce( duck_array_ops.count, @@ -157,19 +145,8 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 78B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.all() - Size: 1B - Dimensions: () - Data variables: - da bool 1B False """ return self.reduce( duck_array_ops.array_all, @@ -229,19 +206,8 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 78B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.any() - Size: 1B - Dimensions: () - Data variables: - da bool 1B True """ return self.reduce( duck_array_ops.array_any, @@ -307,27 +273,12 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.max() - Size: 8B - Dimensions: () - Data variables: - da float64 8B 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.max(skipna=False) - Size: 8B - Dimensions: () - Data variables: - da float64 8B nan """ return self.reduce( duck_array_ops.max, @@ -394,27 +345,12 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.min() - Size: 8B - Dimensions: () - Data variables: - da float64 8B 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.min(skipna=False) - Size: 8B - Dimensions: () - Data variables: - da float64 8B nan """ return self.reduce( duck_array_ops.min, @@ -485,27 +421,12 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.mean() - Size: 8B - Dimensions: () - Data variables: - da float64 8B 1.6 Use ``skipna`` to control whether NaNs are ignored. >>> ds.mean(skipna=False) - Size: 8B - Dimensions: () - Data variables: - da float64 8B nan """ return self.reduce( duck_array_ops.mean, @@ -583,35 +504,16 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.prod() - Size: 8B - Dimensions: () - Data variables: - da float64 8B 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.prod(skipna=False) - Size: 8B - Dimensions: () - Data variables: - da float64 8B nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.prod(skipna=True, min_count=2) - Size: 8B - Dimensions: () - Data variables: - da float64 8B 0.0 """ return self.reduce( duck_array_ops.prod, @@ -690,35 +592,16 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.sum() - Size: 8B - Dimensions: () - Data variables: - da float64 8B 8.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.sum(skipna=False) - Size: 8B - Dimensions: () - Data variables: - da float64 8B nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.sum(skipna=True, min_count=2) - Size: 8B - Dimensions: () - Data variables: - da float64 8B 8.0 """ return self.reduce( duck_array_ops.sum, @@ -794,35 +677,16 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.std() - Size: 8B - Dimensions: () - Data variables: - da float64 8B 1.02 Use ``skipna`` to control whether NaNs are ignored. >>> ds.std(skipna=False) - Size: 8B - Dimensions: () - Data variables: - da float64 8B nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.std(skipna=True, ddof=1) - Size: 8B - Dimensions: () - Data variables: - da float64 8B 1.14 """ return self.reduce( duck_array_ops.std, @@ -898,35 +762,16 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.var() - Size: 8B - Dimensions: () - Data variables: - da float64 8B 1.04 Use ``skipna`` to control whether NaNs are ignored. >>> ds.var(skipna=False) - Size: 8B - Dimensions: () - Data variables: - da float64 8B nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.var(skipna=True, ddof=1) - Size: 8B - Dimensions: () - Data variables: - da float64 8B 1.3 """ return self.reduce( duck_array_ops.var, @@ -998,27 +843,12 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.median() - Size: 8B - Dimensions: () - Data variables: - da float64 8B 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.median(skipna=False) - Size: 8B - Dimensions: () - Data variables: - da float64 8B nan """ return self.reduce( duck_array_ops.median, @@ -1089,29 +919,12 @@ def cumsum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.cumsum() - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 3.0 6.0 6.0 8.0 8.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.cumsum(skipna=False) - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 3.0 6.0 6.0 8.0 nan """ return self.reduce( duck_array_ops.cumsum, @@ -1182,29 +995,12 @@ def cumprod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.cumprod() - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 6.0 0.0 0.0 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.cumprod(skipna=False) - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 6.0 0.0 0.0 nan """ return self.reduce( duck_array_ops.cumprod, @@ -1280,15 +1076,8 @@ def count( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.count() - Size: 8B - array(5) """ return self.reduce( duck_array_ops.count, @@ -1346,15 +1135,8 @@ def all( ... ), ... ) >>> da - Size: 6B - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.all() - Size: 1B - array(False) """ return self.reduce( duck_array_ops.array_all, @@ -1412,15 +1194,8 @@ def any( ... ), ... ) >>> da - Size: 6B - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.any() - Size: 1B - array(True) """ return self.reduce( duck_array_ops.array_any, @@ -1484,21 +1259,12 @@ def max( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.max() - Size: 8B - array(3.) Use ``skipna`` to control whether NaNs are ignored. >>> da.max(skipna=False) - Size: 8B - array(nan) """ return self.reduce( duck_array_ops.max, @@ -1563,21 +1329,12 @@ def min( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.min() - Size: 8B - array(0.) Use ``skipna`` to control whether NaNs are ignored. >>> da.min(skipna=False) - Size: 8B - array(nan) """ return self.reduce( duck_array_ops.min, @@ -1646,21 +1403,12 @@ def mean( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.mean() - Size: 8B - array(1.6) Use ``skipna`` to control whether NaNs are ignored. >>> da.mean(skipna=False) - Size: 8B - array(nan) """ return self.reduce( duck_array_ops.mean, @@ -1736,27 +1484,16 @@ def prod( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.prod() - Size: 8B - array(0.) Use ``skipna`` to control whether NaNs are ignored. >>> da.prod(skipna=False) - Size: 8B - array(nan) Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.prod(skipna=True, min_count=2) - Size: 8B - array(0.) """ return self.reduce( duck_array_ops.prod, @@ -1833,27 +1570,16 @@ def sum( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.sum() - Size: 8B - array(8.) Use ``skipna`` to control whether NaNs are ignored. >>> da.sum(skipna=False) - Size: 8B - array(nan) Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.sum(skipna=True, min_count=2) - Size: 8B - array(8.) """ return self.reduce( duck_array_ops.sum, @@ -1927,27 +1653,16 @@ def std( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.std() - Size: 8B - array(1.0198039) Use ``skipna`` to control whether NaNs are ignored. >>> da.std(skipna=False) - Size: 8B - array(nan) Specify ``ddof=1`` for an unbiased estimate. >>> da.std(skipna=True, ddof=1) - Size: 8B - array(1.14017543) """ return self.reduce( duck_array_ops.std, @@ -2021,27 +1736,16 @@ def var( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.var() - Size: 8B - array(1.04) Use ``skipna`` to control whether NaNs are ignored. >>> da.var(skipna=False) - Size: 8B - array(nan) Specify ``ddof=1`` for an unbiased estimate. >>> da.var(skipna=True, ddof=1) - Size: 8B - array(1.3) """ return self.reduce( duck_array_ops.var, @@ -2111,21 +1815,12 @@ def median( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.median() - Size: 8B - array(2.) Use ``skipna`` to control whether NaNs are ignored. >>> da.median(skipna=False) - Size: 8B - array(nan) """ return self.reduce( duck_array_ops.median, @@ -2194,27 +1889,12 @@ def cumsum( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.cumsum() - Size: 48B - array([1., 3., 6., 6., 8., 8.]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.cumsum(skipna=False) - Size: 48B - array([ 1., 3., 6., 6., 8., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.cumprod() - Size: 48B - array([1., 2., 6., 0., 0., 0.]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.cumprod(skipna=False) - Size: 48B - array([ 1., 2., 6., 0., 0., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").count() - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) int64 24B 1 2 2 """ if ( flox_available @@ -2507,21 +2159,8 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 78B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").all() - Size: 27B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) bool 3B False True True """ if ( flox_available @@ -2605,21 +2244,8 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 78B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").any() - Size: 27B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) bool 3B True True True """ if ( flox_available @@ -2709,31 +2335,12 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").max() - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B 1.0 2.0 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").max(skipna=False) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 2.0 3.0 """ if ( flox_available @@ -2825,31 +2432,12 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").min() - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B 1.0 2.0 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").min(skipna=False) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 2.0 0.0 """ if ( flox_available @@ -2943,31 +2531,12 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").mean() - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B 1.0 2.0 1.5 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").mean(skipna=False) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 2.0 1.5 """ if ( flox_available @@ -3068,41 +2637,16 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").prod() - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B 1.0 4.0 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").prod(skipna=False) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 4.0 0.0 Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.groupby("labels").prod(skipna=True, min_count=2) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 4.0 0.0 """ if ( flox_available @@ -3205,41 +2749,16 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").sum() - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B 1.0 4.0 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").sum(skipna=False) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 4.0 3.0 Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.groupby("labels").sum(skipna=True, min_count=2) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 4.0 3.0 """ if ( flox_available @@ -3339,41 +2858,16 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").std() - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B 0.0 0.0 1.5 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").std(skipna=False) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 0.0 1.5 Specify ``ddof=1`` for an unbiased estimate. >>> ds.groupby("labels").std(skipna=True, ddof=1) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 0.0 2.121 """ if ( flox_available @@ -3473,41 +2967,16 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").var() - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B 0.0 0.0 2.25 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").var(skipna=False) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 0.0 2.25 Specify ``ddof=1`` for an unbiased estimate. >>> ds.groupby("labels").var(skipna=True, ddof=1) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 0.0 4.5 """ if ( flox_available @@ -3603,31 +3072,12 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").median() - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B 1.0 2.0 1.5 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").median(skipna=False) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 2.0 1.5 """ return self._reduce_without_squeeze_warn( duck_array_ops.median, @@ -3706,29 +3156,12 @@ def cumsum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").cumsum() - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 3.0 3.0 4.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").cumsum(skipna=False) - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 3.0 3.0 4.0 nan """ return self._reduce_without_squeeze_warn( duck_array_ops.cumsum, @@ -3807,29 +3240,12 @@ def cumprod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").cumprod() - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 3.0 0.0 4.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").cumprod(skipna=False) - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 3.0 0.0 4.0 nan """ return self._reduce_without_squeeze_warn( duck_array_ops.cumprod, @@ -3936,21 +3352,8 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").count() - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) int64 24B 1 3 1 """ if ( flox_available @@ -4034,21 +3437,8 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 78B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").all() - Size: 27B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) bool 3B True True False """ if ( flox_available @@ -4132,21 +3522,8 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 78B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").any() - Size: 27B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) bool 3B True True True """ if ( flox_available @@ -4236,31 +3613,12 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").max() - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 3.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").max(skipna=False) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 3.0 nan """ if ( flox_available @@ -4352,31 +3710,12 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").min() - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 0.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").min(skipna=False) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 0.0 nan """ if ( flox_available @@ -4470,31 +3809,12 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").mean() - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 1.667 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").mean(skipna=False) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 1.667 nan """ if ( flox_available @@ -4595,41 +3915,16 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").prod() - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 0.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").prod(skipna=False) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 0.0 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.resample(time="3ME").prod(skipna=True, min_count=2) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B nan 0.0 nan """ if ( flox_available @@ -4732,41 +4027,16 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").sum() - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 5.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").sum(skipna=False) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 5.0 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.resample(time="3ME").sum(skipna=True, min_count=2) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B nan 5.0 nan """ if ( flox_available @@ -4866,41 +4136,16 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").std() - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 0.0 1.247 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").std(skipna=False) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 0.0 1.247 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.resample(time="3ME").std(skipna=True, ddof=1) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B nan 1.528 nan """ if ( flox_available @@ -5000,41 +4245,16 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").var() - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 0.0 1.556 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").var(skipna=False) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 0.0 1.556 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.resample(time="3ME").var(skipna=True, ddof=1) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B nan 2.333 nan """ if ( flox_available @@ -5130,31 +4350,12 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").median() - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").median(skipna=False) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 2.0 nan """ return self._reduce_without_squeeze_warn( duck_array_ops.median, @@ -5233,29 +4434,12 @@ def cumsum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").cumsum() - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 5.0 5.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").cumsum(skipna=False) - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 5.0 5.0 2.0 nan """ return self._reduce_without_squeeze_warn( duck_array_ops.cumsum, @@ -5334,29 +4518,12 @@ def cumprod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").cumprod() - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 6.0 0.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").cumprod(skipna=False) - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 6.0 0.0 2.0 nan """ return self._reduce_without_squeeze_warn( duck_array_ops.cumprod, @@ -5462,17 +4629,8 @@ def count( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").count() - Size: 24B - array([1, 2, 2]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -5553,17 +4711,8 @@ def all( ... ), ... ) >>> da - Size: 6B - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").all() - Size: 3B - array([False, True, True]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -5644,17 +4793,8 @@ def any( ... ), ... ) >>> da - Size: 6B - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").any() - Size: 3B - array([ True, True, True]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -5741,25 +4881,12 @@ def max( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").max() - Size: 24B - array([1., 2., 3.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").max(skipna=False) - Size: 24B - array([nan, 2., 3.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -5848,25 +4975,12 @@ def min( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").min() - Size: 24B - array([1., 2., 0.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").min(skipna=False) - Size: 24B - array([nan, 2., 0.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -5957,25 +5071,12 @@ def mean( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").mean() - Size: 24B - array([1. , 2. , 1.5]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").mean(skipna=False) - Size: 24B - array([nan, 2. , 1.5]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -6073,33 +5174,16 @@ def prod( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").prod() - Size: 24B - array([1., 4., 0.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").prod(skipna=False) - Size: 24B - array([nan, 4., 0.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.groupby("labels").prod(skipna=True, min_count=2) - Size: 24B - array([nan, 4., 0.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -6199,33 +5283,16 @@ def sum( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").sum() - Size: 24B - array([1., 4., 3.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").sum(skipna=False) - Size: 24B - array([nan, 4., 3.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.groupby("labels").sum(skipna=True, min_count=2) - Size: 24B - array([nan, 4., 3.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -6322,33 +5389,16 @@ def std( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").std() - Size: 24B - array([0. , 0. , 1.5]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").std(skipna=False) - Size: 24B - array([nan, 0. , 1.5]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Specify ``ddof=1`` for an unbiased estimate. >>> da.groupby("labels").std(skipna=True, ddof=1) - Size: 24B - array([ nan, 0. , 2.12132034]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -6445,33 +5495,16 @@ def var( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").var() - Size: 24B - array([0. , 0. , 2.25]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").var(skipna=False) - Size: 24B - array([ nan, 0. , 2.25]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Specify ``ddof=1`` for an unbiased estimate. >>> da.groupby("labels").var(skipna=True, ddof=1) - Size: 24B - array([nan, 0. , 4.5]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -6564,25 +5597,12 @@ def median( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").median() - Size: 24B - array([1. , 2. , 1.5]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").median(skipna=False) - Size: 24B - array([nan, 2. , 1.5]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' """ return self._reduce_without_squeeze_warn( duck_array_ops.median, @@ -6659,27 +5679,12 @@ def cumsum( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").cumsum() - Size: 48B - array([1., 2., 3., 3., 4., 1.]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").cumsum(skipna=False) - Size: 48B - array([ 1., 2., 3., 3., 4., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").cumprod() - Size: 48B - array([1., 2., 3., 0., 4., 1.]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").cumprod(skipna=False) - Size: 48B - array([ 1., 2., 3., 0., 4., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").count() - Size: 24B - array([1, 3, 1]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -6972,17 +5953,8 @@ def all( ... ), ... ) >>> da - Size: 6B - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").all() - Size: 3B - array([ True, True, False]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -7063,17 +6035,8 @@ def any( ... ), ... ) >>> da - Size: 6B - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").any() - Size: 3B - array([ True, True, True]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -7160,25 +6123,12 @@ def max( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").max() - Size: 24B - array([1., 3., 2.]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").max(skipna=False) - Size: 24B - array([ 1., 3., nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -7267,25 +6217,12 @@ def min( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").min() - Size: 24B - array([1., 0., 2.]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").min(skipna=False) - Size: 24B - array([ 1., 0., nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -7376,25 +6313,12 @@ def mean( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").mean() - Size: 24B - array([1. , 1.66666667, 2. ]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").mean(skipna=False) - Size: 24B - array([1. , 1.66666667, nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -7492,33 +6416,16 @@ def prod( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").prod() - Size: 24B - array([1., 0., 2.]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").prod(skipna=False) - Size: 24B - array([ 1., 0., nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.resample(time="3ME").prod(skipna=True, min_count=2) - Size: 24B - array([nan, 0., nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -7618,33 +6525,16 @@ def sum( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").sum() - Size: 24B - array([1., 5., 2.]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").sum(skipna=False) - Size: 24B - array([ 1., 5., nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.resample(time="3ME").sum(skipna=True, min_count=2) - Size: 24B - array([nan, 5., nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -7741,33 +6631,16 @@ def std( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").std() - Size: 24B - array([0. , 1.24721913, 0. ]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").std(skipna=False) - Size: 24B - array([0. , 1.24721913, nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Specify ``ddof=1`` for an unbiased estimate. >>> da.resample(time="3ME").std(skipna=True, ddof=1) - Size: 24B - array([ nan, 1.52752523, nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -7864,33 +6737,16 @@ def var( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").var() - Size: 24B - array([0. , 1.55555556, 0. ]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").var(skipna=False) - Size: 24B - array([0. , 1.55555556, nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Specify ``ddof=1`` for an unbiased estimate. >>> da.resample(time="3ME").var(skipna=True, ddof=1) - Size: 24B - array([ nan, 2.33333333, nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -7983,25 +6839,12 @@ def median( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").median() - Size: 24B - array([1., 2., 2.]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").median(skipna=False) - Size: 24B - array([ 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ return self._reduce_without_squeeze_warn( duck_array_ops.median, @@ -8078,27 +6921,12 @@ def cumsum( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").cumsum() - Size: 48B - array([1., 2., 5., 5., 2., 2.]) - Coordinates: - labels (time) >> da.resample(time="3ME").cumsum(skipna=False) - Size: 48B - array([ 1., 2., 5., 5., 2., nan]) - Coordinates: - labels (time) >> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").cumprod() - Size: 48B - array([1., 2., 6., 0., 2., 2.]) - Coordinates: - labels (time) >> da.resample(time="3ME").cumprod(skipna=False) - Size: 48B - array([ 1., 2., 6., 0., 2., nan]) - Coordinates: - labels (time) tuple[Mapping[Hashable, Any], tuple[Hashable, ...]]: """All the logic for creating a new DataArray""" - if coords is not None and not is_dict_like(coords) and len(coords) != len(shape): + if ( + coords is not None + and not utils.is_dict_like(coords) + and len(coords) != len(shape) + ): raise ValueError( f"coords is not dict-like, but it has {len(coords)} items, " f"which does not match the {len(shape)} dimensions of the " @@ -150,7 +153,7 @@ def _infer_coords_and_dims( dims = [f"dim_{n}" for n in range(len(shape))] if coords is not None and len(coords) == len(shape): # try to infer dimensions from coords - if is_dict_like(coords): + if utils.is_dict_like(coords): dims = list(coords.keys()) else: for n, (dim, coord) in enumerate(zip(dims, coords)): @@ -172,7 +175,7 @@ def _infer_coords_and_dims( new_coords = coords else: new_coords = {} - if is_dict_like(coords): + if utils.is_dict_like(coords): for k, v in coords.items(): new_coords[k] = as_variable(v, name=k) elif coords is not None: @@ -194,7 +197,7 @@ def _check_data_shape( if data is dtypes.NA: data = np.nan if coords is not None and utils.is_scalar(data, include_0d=False): - if is_dict_like(coords): + if utils.is_dict_like(coords): if dims is None: return data else: @@ -215,14 +218,14 @@ def __init__(self, data_array: T_DataArray): self.data_array = data_array def __getitem__(self, key) -> T_DataArray: - if not is_dict_like(key): + if not utils.is_dict_like(key): # expand the indexer so we can handle Ellipsis labels = indexing.expanded_indexer(key, self.data_array.ndim) key = dict(zip(self.data_array.dims, labels)) return self.data_array.sel(key) def __setitem__(self, key, value) -> None: - if not is_dict_like(key): + if not utils.is_dict_like(key): # expand the indexer so we can handle Ellipsis labels = indexing.expanded_indexer(key, self.data_array.ndim) key = dict(zip(self.data_array.dims, labels)) @@ -831,7 +834,7 @@ def dims(self, value: Any) -> NoReturn: ) def _item_key_to_dict(self, key: Any) -> Mapping[Hashable, Any]: - if is_dict_like(key): + if utils.is_dict_like(key): return key key = indexing.expanded_indexer(key, self.ndim) return dict(zip(self.dims, key)) @@ -1380,7 +1383,7 @@ def chunk( ) chunks = dict(zip(self.dims, chunks)) else: - chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") + chunks = utils.either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") ds = self._to_temp_dataset().chunk( chunks, @@ -1460,7 +1463,7 @@ def isel( Dimensions without coordinates: points """ - indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") + indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "isel") if any(is_fancy_indexer(idx) for idx in indexers.values()): ds = self._to_temp_dataset()._isel_fancy( @@ -2138,7 +2141,7 @@ def reindex( DataArray.reindex_like align """ - indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") + indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") return alignment.reindex( self, indexers=indexers, @@ -2448,9 +2451,11 @@ def rename( if new_name_or_name_dict is None and not names: # change name to None? return self._replace(name=None) - if is_dict_like(new_name_or_name_dict) or new_name_or_name_dict is None: + if utils.is_dict_like(new_name_or_name_dict) or new_name_or_name_dict is None: # change dims/coords - name_dict = either_dict_or_kwargs(new_name_or_name_dict, names, "rename") + name_dict = utils.either_dict_or_kwargs( + new_name_or_name_dict, names, "rename" + ) dataset = self._to_temp_dataset()._rename(name_dict) return self._from_temp_dataset(dataset) if utils.hashable(new_name_or_name_dict) and names: @@ -2516,7 +2521,7 @@ def swap_dims( DataArray.rename Dataset.swap_dims """ - dims_dict = either_dict_or_kwargs(dims_dict, dims_kwargs, "swap_dims") + dims_dict = utils.either_dict_or_kwargs(dims_dict, dims_kwargs, "swap_dims") ds = self._to_temp_dataset().swap_dims(dims_dict) return self._from_temp_dataset(ds) @@ -2610,7 +2615,7 @@ def expand_dims( elif dim is not None and not isinstance(dim, Mapping): dim = {dim: 1} - dim = either_dict_or_kwargs(dim, dim_kwargs, "expand_dims") + dim = utils.either_dict_or_kwargs(dim, dim_kwargs, "expand_dims") ds = self._to_temp_dataset().expand_dims(dim, axis) return self._from_temp_dataset(ds) @@ -3011,7 +3016,7 @@ def transpose( Dataset.transpose """ if dims: - dims = tuple(infix_dims(dims, self.dims, missing_dims)) + dims = tuple(utils.infix_dims(dims, self.dims, missing_dims)) variable = self.variable.transpose(*dims) if transpose_coords: coords: dict[Hashable, Variable] = {} @@ -3210,7 +3215,7 @@ def drop_sel( * y (y) int64 24B 6 9 12 """ if labels_kwargs or isinstance(labels, dict): - labels = either_dict_or_kwargs(labels, labels_kwargs, "drop") + labels = utils.either_dict_or_kwargs(labels, labels_kwargs, "drop") ds = self._to_temp_dataset().drop_sel(labels, errors=errors) return self._from_temp_dataset(ds) @@ -3398,7 +3403,7 @@ def fillna(self, value: Any) -> Self: * Z (Z) int64 48B 0 1 2 3 4 5 height (Z) int64 48B 0 10 20 30 40 50 """ - if is_dict_like(value): + if utils.is_dict_like(value): raise TypeError( "cannot provide fill value as a dictionary with " "fillna on a DataArray" @@ -6913,7 +6918,7 @@ def rolling( """ from xarray.core.rolling import DataArrayRolling - dim = either_dict_or_kwargs(dim, window_kwargs, "rolling") + dim = utils.either_dict_or_kwargs(dim, window_kwargs, "rolling") return DataArrayRolling(self, dim, min_periods=min_periods, center=center) def cumulative( @@ -7127,7 +7132,7 @@ def coarsen( """ from xarray.core.rolling import DataArrayCoarsen - dim = either_dict_or_kwargs(dim, window_kwargs, "coarsen") + dim = utils.either_dict_or_kwargs(dim, window_kwargs, "coarsen") return DataArrayCoarsen( self, dim, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 95ac374251d..5fd1bd3c983 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -102,7 +102,6 @@ HybridMappingProxy, OrderedSet, _default, - consolidate_dask_from_array_kwargs, decode_numpy_dict_values, drop_dims_from_indexers, emit_user_level_warning, @@ -118,13 +117,6 @@ ) from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager from xarray.namedarray.pycompat import array_type, is_chunked_array -from xarray.namedarray.utils import ( - either_dict_or_kwargs, - infix_dims, - is_dict_like, - is_duck_array, - is_duck_dask_array, -) from xarray.plot.accessor import DatasetPlotAccessor from xarray.util.deprecation_helpers import _deprecate_positional_args @@ -312,7 +304,7 @@ def _maybe_chunk( token2 = tokenize(token if token else var._data, str(chunks)) name2 = f"{name_prefix}{name}-{token2}" - from_array_kwargs = consolidate_dask_from_array_kwargs( + from_array_kwargs = utils.consolidate_dask_from_array_kwargs( from_array_kwargs, name=name2, lock=lock, @@ -495,12 +487,12 @@ def __init__(self, dataset: T_Dataset): self.dataset = dataset def __getitem__(self, key: Mapping[Any, Any]) -> T_Dataset: - if not is_dict_like(key): + if not utils.is_dict_like(key): raise TypeError("can only lookup dictionaries from Dataset.loc") return self.dataset.sel(key) def __setitem__(self, key, value) -> None: - if not is_dict_like(key): + if not utils.is_dict_like(key): raise TypeError( "can only set locations defined by dictionaries from Dataset.loc." f" Got: {key}" @@ -1019,7 +1011,9 @@ def _persist_inplace(self, **kwargs) -> Self: """Persist all Dask arrays in memory""" # access .data to coerce everything to numpy or dask arrays lazy_data = { - k: v._data for k, v in self.variables.items() if is_duck_dask_array(v._data) + k: v._data + for k, v in self.variables.items() + if utils.is_duck_dask_array(v._data) } if lazy_data: import dask @@ -1345,7 +1339,7 @@ def _copy( ) -> Self: if data is None: data = {} - elif not is_dict_like(data): + elif not utils.is_dict_like(data): raise ValueError("Data must be dict-like") if data: @@ -1541,7 +1535,7 @@ def __getitem__( """ from xarray.core.formatting import shorten_list_repr - if is_dict_like(key): + if utils.is_dict_like(key): return self.isel(**key) if utils.hashable(key): try: @@ -1578,7 +1572,7 @@ def __setitem__( """ from xarray.core.dataarray import DataArray - if is_dict_like(key): + if utils.is_dict_like(key): # check for consistency and convert value to dataset value = self._setitem_check(key, value) # loop over dataset variables and set new values @@ -2687,7 +2681,7 @@ def chunk( ) chunks_mapping = dict.fromkeys(self.dims, chunks) else: - chunks_mapping = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") + chunks_mapping = utils.either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") bad_dims = chunks_mapping.keys() - self.sizes.keys() if bad_dims: @@ -2742,7 +2736,7 @@ def _validate_indexers( elif isinstance(v, Sequence) and len(v) == 0: yield k, np.empty((0,), dtype="int64") else: - if not is_duck_array(v): + if not utils.is_duck_array(v): v = np.asarray(v) if v.dtype.kind in "US": @@ -2936,7 +2930,7 @@ def isel( Tutorial material on basics of indexing """ - indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") + indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "isel") if any(is_fancy_indexer(idx) for idx in indexers.values()): return self._isel_fancy(indexers, drop=drop, missing_dims=missing_dims) @@ -3090,7 +3084,7 @@ def sel( Tutorial material on basics of indexing """ - indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "sel") + indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "sel") query_results = map_index_queries( self, indexers=indexers, method=method, tolerance=tolerance ) @@ -3179,11 +3173,11 @@ def head( if not indexers_kwargs: if indexers is None: indexers = 5 - if not isinstance(indexers, int) and not is_dict_like(indexers): + if not isinstance(indexers, int) and not utils.is_dict_like(indexers): raise TypeError("indexers must be either dict-like or a single integer") if isinstance(indexers, int): indexers = {dim: indexers for dim in self.dims} - indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "head") + indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "head") for k, v in indexers.items(): if not isinstance(v, int): raise TypeError( @@ -3267,11 +3261,11 @@ def tail( if not indexers_kwargs: if indexers is None: indexers = 5 - if not isinstance(indexers, int) and not is_dict_like(indexers): + if not isinstance(indexers, int) and not utils.is_dict_like(indexers): raise TypeError("indexers must be either dict-like or a single integer") if isinstance(indexers, int): indexers = {dim: indexers for dim in self.dims} - indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "tail") + indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "tail") for k, v in indexers.items(): if not isinstance(v, int): raise TypeError( @@ -3352,12 +3346,12 @@ def thin( if ( not indexers_kwargs and not isinstance(indexers, int) - and not is_dict_like(indexers) + and not utils.is_dict_like(indexers) ): raise TypeError("indexers must be either dict-like or a single integer") if isinstance(indexers, int): indexers = {dim: indexers for dim in self.dims} - indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "thin") + indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "thin") for k, v in indexers.items(): if not isinstance(v, int): raise TypeError( @@ -3737,7 +3731,7 @@ def reindex( original dataset, use the :py:meth:`~Dataset.fillna()` method. """ - indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") + indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") return alignment.reindex( self, indexers=indexers, @@ -3760,7 +3754,7 @@ def _reindex( """ Same as reindex but supports sparse option. """ - indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") + indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") return alignment.reindex( self, indexers=indexers, @@ -3923,7 +3917,7 @@ def interp( if kwargs is None: kwargs = {} - coords = either_dict_or_kwargs(coords, coords_kwargs, "interp") + coords = utils.either_dict_or_kwargs(coords, coords_kwargs, "interp") indexers = dict(self._validate_interp_indexers(coords)) if coords: @@ -3988,7 +3982,7 @@ def _validate_interp_indexer(x, new_x): if name in indexers: continue - if is_duck_dask_array(var.data): + if utils.is_duck_dask_array(var.data): use_indexers = dask_indexers else: use_indexers = validated_indexers @@ -4217,7 +4211,7 @@ def _rename( """Also used internally by DataArray so that the warning (if any) is raised at the right stack level. """ - name_dict = either_dict_or_kwargs(name_dict, names, "rename") + name_dict = utils.either_dict_or_kwargs(name_dict, names, "rename") for k in name_dict.keys(): if k not in self and k not in self.dims: raise ValueError( @@ -4313,7 +4307,7 @@ def rename_dims( Dataset.rename_vars DataArray.rename """ - dims_dict = either_dict_or_kwargs(dims_dict, dims, "rename_dims") + dims_dict = utils.either_dict_or_kwargs(dims_dict, dims, "rename_dims") for k, v in dims_dict.items(): if k not in self.dims: raise ValueError( @@ -4359,7 +4353,7 @@ def rename_vars( Dataset.rename_dims DataArray.rename """ - name_dict = either_dict_or_kwargs(name_dict, names, "rename_vars") + name_dict = utils.either_dict_or_kwargs(name_dict, names, "rename_vars") for k in name_dict: if k not in self: raise ValueError( @@ -4435,7 +4429,7 @@ def swap_dims( # TODO: deprecate this method in favor of a (less confusing) # rename_dims() method that only renames dimensions. - dims_dict = either_dict_or_kwargs(dims_dict, dims_kwargs, "swap_dims") + dims_dict = utils.either_dict_or_kwargs(dims_dict, dims_kwargs, "swap_dims") for current_name, new_name in dims_dict.items(): if current_name not in self.dims: raise ValueError( @@ -4596,7 +4590,7 @@ def expand_dims( raise ValueError("dims should not contain duplicate values.") dim = {d: 1 for d in dim} - dim = either_dict_or_kwargs(dim, dim_kwargs, "expand_dims") + dim = utils.either_dict_or_kwargs(dim, dim_kwargs, "expand_dims") assert isinstance(dim, MutableMapping) if axis is None: @@ -4737,7 +4731,7 @@ def set_index( Dataset.set_xindex Dataset.swap_dims """ - dim_coords = either_dict_or_kwargs(indexes, indexes_kwargs, "set_index") + dim_coords = utils.either_dict_or_kwargs(indexes, indexes_kwargs, "set_index") new_indexes: dict[Hashable, Index] = {} new_variables: dict[Hashable, Variable] = {} @@ -5086,7 +5080,9 @@ def reorder_levels( Another dataset, with this dataset's data but replaced coordinates. """ - dim_order = either_dict_or_kwargs(dim_order, dim_order_kwargs, "reorder_levels") + dim_order = utils.either_dict_or_kwargs( + dim_order, dim_order_kwargs, "reorder_levels" + ) variables = self._variables.copy() indexes = dict(self._indexes) new_indexes: dict[Hashable, Index] = {} @@ -5177,7 +5173,7 @@ def _stack_once( if dims == ...: raise ValueError("Please use [...] for dims, rather than just ...") if ... in dims: - dims = list(infix_dims(dims, self.dims)) + dims = list(utils.infix_dims(dims, self.dims)) new_variables: dict[Hashable, Variable] = {} stacked_var_names: list[Hashable] = [] @@ -5270,7 +5266,7 @@ def stack( -------- Dataset.unstack """ - dimensions = either_dict_or_kwargs(dimensions, dimensions_kwargs, "stack") + dimensions = utils.either_dict_or_kwargs(dimensions, dimensions_kwargs, "stack") result = self for new_dim, dims in dimensions.items(): result = result._stack_once(dims, new_dim, index_cls, create_index) @@ -5566,7 +5562,7 @@ def unstack( # currently compatible. sparse_array_type = array_type("sparse") needs_full_reindex = any( - is_duck_dask_array(v.data) + utils.is_duck_dask_array(v.data) or isinstance(v.data, sparse_array_type) or not isinstance(v.data, np.ndarray) for v in nonindexes @@ -5967,7 +5963,7 @@ def drop( if errors not in ["raise", "ignore"]: raise ValueError('errors must be either "raise" or "ignore"') - if is_dict_like(labels) and not isinstance(labels, dict): + if utils.is_dict_like(labels) and not isinstance(labels, dict): emit_user_level_warning( "dropping coordinates using `drop` is deprecated; use drop_vars.", DeprecationWarning, @@ -5977,7 +5973,7 @@ def drop( if labels_kwargs or isinstance(labels, dict): if dim is not None: raise ValueError("cannot specify dim and dict-like arguments.") - labels = either_dict_or_kwargs(labels, labels_kwargs, "drop") + labels = utils.either_dict_or_kwargs(labels, labels_kwargs, "drop") if dim is None and (is_scalar(labels) or isinstance(labels, Iterable)): emit_user_level_warning( @@ -6057,7 +6053,7 @@ def drop_sel( if errors not in ["raise", "ignore"]: raise ValueError('errors must be either "raise" or "ignore"') - labels = either_dict_or_kwargs(labels, labels_kwargs, "drop_sel") + labels = utils.either_dict_or_kwargs(labels, labels_kwargs, "drop_sel") ds = self for dim, labels_for_dim in labels.items(): @@ -6123,7 +6119,7 @@ def drop_isel(self, indexers=None, **indexers_kwargs) -> Self: A (x, y) int64 32B 0 2 3 5 """ - indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "drop_isel") + indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "drop_isel") ds = self dimension_index = {} @@ -6226,9 +6222,9 @@ def transpose( f'transpose requires dims to be passed as multiple arguments. Expected `{", ".join(list_fix)}`. Received `{dims[0]}` instead' ) - # Use infix_dims to check once for missing dimensions + # Use utils.infix_dims to check once for missing dimensions if len(dims) != 0: - _ = list(infix_dims(dims, self.dims, missing_dims)) + _ = list(utils.infix_dims(dims, self.dims, missing_dims)) ds = self.copy() for name, var in self._variables.items(): @@ -6436,7 +6432,7 @@ def fillna(self, value: Any) -> Self: C (x) float64 32B 2.0 2.0 2.0 5.0 D (x) float64 32B 3.0 3.0 3.0 4.0 """ - if is_dict_like(value): + if utils.is_dict_like(value): value_keys = getattr(value, "data_vars", value).keys() if not set(value_keys) <= set(self.data_vars.keys()): raise ValueError( @@ -7038,7 +7034,7 @@ def assign( temperature_f (lat, lon) float64 32B 51.76 57.75 53.7 51.62 """ - variables = either_dict_or_kwargs(variables, variables_kwargs, "assign") + variables = utils.either_dict_or_kwargs(variables, variables_kwargs, "assign") data = self.copy() # do all calculations first... @@ -7398,7 +7394,7 @@ def to_dask_dataframe( # Make sure var is a dask array, otherwise the array can become too large # when it is broadcasted to several dimensions: - if not is_duck_dask_array(var._data): + if not utils.is_duck_dask_array(var._data): var = var.chunk() # Broadcast then flatten the array: @@ -7642,7 +7638,7 @@ def apply_over_both(lhs_data_vars, rhs_data_vars, lhs_vars, rhs_vars): dest_vars[k] = f(rhs_vars[k], np.nan) return dest_vars - if is_dict_like(other) and not isinstance(other, Dataset): + if utils.is_dict_like(other) and not isinstance(other, Dataset): # can't use our shortcut of doing the binary operation with # Variable objects, so apply over our data vars instead. new_data_vars = apply_over_both( @@ -7807,7 +7803,7 @@ def shift( Data variables: foo (x) object 40B nan nan 'a' 'b' 'c' """ - shifts = either_dict_or_kwargs(shifts, shifts_kwargs, "shift") + shifts = utils.either_dict_or_kwargs(shifts, shifts_kwargs, "shift") invalid = tuple(k for k in shifts if k not in self.dims) if invalid: raise ValueError( @@ -7886,7 +7882,7 @@ def roll( foo (x) list[_T]: - if is_dict_like(arg): + if utils.is_dict_like(arg): if allow_default: return [arg.get(d, default) for d in self.dim] for d in self.dim: @@ -537,7 +531,7 @@ def _numbagg_reduce(self, func, keep_attrs, **kwargs): padded = self.obj.variable if self.center[0]: - if is_duck_dask_array(padded.data): + if utils.is_duck_dask_array(padded.data): # workaround to make the padded chunk size larger than # self.window - 1 shift = -(self.window[0] + 1) // 2 @@ -550,7 +544,7 @@ def _numbagg_reduce(self, func, keep_attrs, **kwargs): valid = (slice(None),) * axis + (slice(-shift, None),) padded = padded.pad({self.dim[0]: (0, -shift)}, mode="constant") - if is_duck_dask_array(padded.data) and False: + if utils.is_duck_dask_array(padded.data) and False: raise AssertionError("should not be reachable") else: values = func( @@ -582,7 +576,7 @@ def _bottleneck_reduce(self, func, keep_attrs, **kwargs): padded = self.obj.variable if self.center[0]: - if is_duck_dask_array(padded.data): + if utils.is_duck_dask_array(padded.data): # workaround to make the padded chunk size larger than # self.window - 1 shift = -(self.window[0] + 1) // 2 @@ -595,7 +589,7 @@ def _bottleneck_reduce(self, func, keep_attrs, **kwargs): valid = (slice(None),) * axis + (slice(-shift, None),) padded = padded.pad({self.dim[0]: (0, -shift)}, mode="constant") - if is_duck_dask_array(padded.data): + if utils.is_duck_dask_array(padded.data): raise AssertionError("should not be reachable") else: values = func( @@ -638,13 +632,13 @@ def _array_reduce( if ( OPTIONS["use_numbagg"] - and module_available("numbagg") + and utils.module_available("numbagg") and pycompat.mod_version("numbagg") >= Version("0.6.3") and numbagg_move_func is not None # TODO: we could at least allow this for the equivalent of `apply_ufunc`'s # "parallelized". `rolling_exp` does this, as an example (but rolling_exp is # much simpler) - and not is_duck_dask_array(self.obj.data) + and not utils.is_duck_dask_array(self.obj.data) # Numbagg doesn't handle object arrays and generally has dtype consistency, # so doesn't deal well with bool arrays which are expected to change type. and self.obj.data.dtype.kind not in "ObMm" @@ -668,7 +662,7 @@ def _array_reduce( if ( OPTIONS["use_bottleneck"] and bottleneck_move_func is not None - and not is_duck_dask_array(self.obj.data) + and not utils.is_duck_dask_array(self.obj.data) and self.ndim == 1 ): # TODO: re-enable bottleneck with dask after the issues @@ -962,7 +956,7 @@ def __init__( f"dimensions {tuple(self.obj.dims)}" ) - if is_dict_like(coord_func): + if utils.is_dict_like(coord_func): coord_func_map = coord_func else: coord_func_map = {d: coord_func for d in self.obj.dims} @@ -1033,7 +1027,7 @@ def construct( from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset - window_dim = either_dict_or_kwargs( + window_dim = utils.either_dict_or_kwargs( window_dim, window_dim_kwargs, "Coarsen.construct" ) if not window_dim: diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py index 16c121dced3..4e085a0a7eb 100644 --- a/xarray/core/rolling_exp.py +++ b/xarray/core/rolling_exp.py @@ -10,8 +10,8 @@ from xarray.core.options import _get_keep_attrs from xarray.core.pdcompat import count_not_none from xarray.core.types import T_DataWithCoords +from xarray.core.utils import module_available from xarray.namedarray import pycompat -from xarray.namedarray.utils import module_available def _get_alpha( diff --git a/xarray/core/utils.py b/xarray/core/utils.py index ea579f50ae3..9b527622e40 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -74,6 +74,19 @@ import numpy as np import pandas as pd +from xarray.namedarray.utils import ( # noqa: F401 + ReprObject, + drop_missing_dims, + either_dict_or_kwargs, + infix_dims, + is_dask_collection, + is_dict_like, + is_duck_array, + is_duck_dask_array, + module_available, + to_0d_object_array, +) + if TYPE_CHECKING: from xarray.core.types import Dims, ErrorOptionsWithWarn @@ -307,13 +320,6 @@ def is_valid_numpy_dtype(dtype: Any) -> bool: return True -def to_0d_object_array(value: Any) -> np.ndarray: - """Given a value, wrap it in a 0-D numpy.ndarray with dtype=object.""" - result = np.empty((), dtype=object) - result[()] = value - return result - - def to_0d_array(value: Any) -> np.ndarray: """Given a value, wrap it in a 0-D numpy.ndarray.""" if np.isscalar(value) or (isinstance(value, np.ndarray) and value.ndim == 0): @@ -620,31 +626,6 @@ def __repr__(self: Any) -> str: return f"{type(self).__name__}(array={self.array!r})" -class ReprObject: - """Object that prints as the given value, for use with sentinel values.""" - - __slots__ = ("_value",) - - def __init__(self, value: str): - self._value = value - - def __repr__(self) -> str: - return self._value - - def __eq__(self, other) -> bool: - if isinstance(other, ReprObject): - return self._value == other._value - return False - - def __hash__(self) -> int: - return hash((type(self), self._value)) - - def __dask_tokenize__(self): - from dask.base import normalize_token - - return normalize_token((type(self), self._value)) - - @contextlib.contextmanager def close_on_error(f): """Context manager to ensure that a file opened by xarray is closed if an @@ -869,49 +850,6 @@ def drop_dims_from_indexers( ) -def drop_missing_dims( - supplied_dims: Iterable[Hashable], - dims: Iterable[Hashable], - missing_dims: ErrorOptionsWithWarn, -) -> Iterable[Hashable]: - """Depending on the setting of missing_dims, drop any dimensions from supplied_dims that - are not present in dims. - - Parameters - ---------- - supplied_dims : Iterable of Hashable - dims : Iterable of Hashable - missing_dims : {"raise", "warn", "ignore"} - """ - - if missing_dims == "raise": - supplied_dims_set = {val for val in supplied_dims if val is not ...} - invalid = supplied_dims_set - set(dims) - if invalid: - raise ValueError( - f"Dimensions {invalid} do not exist. Expected one or more of {dims}" - ) - - return supplied_dims - - elif missing_dims == "warn": - invalid = set(supplied_dims) - set(dims) - if invalid: - warnings.warn( - f"Dimensions {invalid} do not exist. Expected one or more of {dims}" - ) - - return [val for val in supplied_dims if val in dims or val is ...] - - elif missing_dims == "ignore": - return [val for val in supplied_dims if val in dims or val is ...] - - else: - raise ValueError( - f"Unrecognised option {missing_dims} for missing_dims argument" - ) - - @overload def parse_dims( dim: Dims, diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 24ac0e042ad..8d76cfbe004 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -32,7 +32,12 @@ consolidate_dask_from_array_kwargs, decode_numpy_dict_values, drop_dims_from_indexers, + either_dict_or_kwargs, ensure_us_time_resolution, + infix_dims, + is_dict_like, + is_duck_array, + is_duck_dask_array, maybe_coerce_to_str, ) from xarray.namedarray.core import NamedArray, _raise_if_any_duplicate_dimensions @@ -41,13 +46,6 @@ is_0d_dask_array, to_duck_array, ) -from xarray.namedarray.utils import ( - either_dict_or_kwargs, - infix_dims, - is_dict_like, - is_duck_array, - is_duck_dask_array, -) NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( indexing.ExplicitlyIndexed, diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index ab812340596..804e1cfd795 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -13,9 +13,8 @@ from xarray.core.indexes import PandasMultiIndex from xarray.core.options import OPTIONS -from xarray.core.utils import is_scalar +from xarray.core.utils import is_scalar, module_available from xarray.namedarray.pycompat import DuckArrayModule -from xarray.namedarray.utils import module_available nc_time_axis_available = module_available("nc_time_axis") diff --git a/xarray/testing/assertions.py b/xarray/testing/assertions.py index 541ff66788d..6418eb79b8b 100644 --- a/xarray/testing/assertions.py +++ b/xarray/testing/assertions.py @@ -14,7 +14,6 @@ from xarray.core.dataset import Dataset from xarray.core.indexes import Index, PandasIndex, PandasMultiIndex, default_indexes from xarray.core.variable import IndexVariable, Variable -from xarray.namedarray.utils import is_duck_array def ensure_warnings(func): @@ -221,14 +220,14 @@ def assert_duckarray_equal(x, y, err_msg="", verbose=True): """Like `np.testing.assert_array_equal`, but for duckarrays""" __tracebackhide__ = True - if not is_duck_array(x) and not utils.is_scalar(x): + if not utils.is_duck_array(x) and not utils.is_scalar(x): x = np.asarray(x) - if not is_duck_array(y) and not utils.is_scalar(y): + if not utils.is_duck_array(y) and not utils.is_scalar(y): y = np.asarray(y) - if (is_duck_array(x) and utils.is_scalar(y)) or ( - utils.is_scalar(x) and is_duck_array(y) + if (utils.is_duck_array(x) and utils.is_scalar(y)) or ( + utils.is_scalar(x) and utils.is_duck_array(y) ): equiv = (x == y).all() else: diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index a24f885c0ed..9a5589ff872 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -33,7 +33,7 @@ from xarray.coding.variables import SerializationWarning from xarray.conventions import _update_bounds_attributes, cf_encoder from xarray.core.common import contains_cftime_datetimes -from xarray.namedarray.utils import is_duck_dask_array +from xarray.core.utils import is_duck_dask_array from xarray.testing import assert_equal, assert_identical from xarray.tests import ( FirstElementAccessibleArray, diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index f7d00444d91..50061c774a8 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -7,8 +7,7 @@ import pytest from xarray.core import duck_array_ops, utils -from xarray.core.utils import iterate_nested -from xarray.namedarray.utils import either_dict_or_kwargs, infix_dims +from xarray.core.utils import either_dict_or_kwargs, infix_dims, iterate_nested from xarray.tests import assert_array_equal, requires_dask diff --git a/xarray/util/generate_aggregations.py b/xarray/util/generate_aggregations.py index 0bd797a13b9..3462af28663 100644 --- a/xarray/util/generate_aggregations.py +++ b/xarray/util/generate_aggregations.py @@ -29,8 +29,7 @@ from xarray.core import duck_array_ops from xarray.core.options import OPTIONS from xarray.core.types import Dims, Self -from xarray.core.utils import contains_only_chunked_or_numpy -from xarray.namedarray.utils import module_available +from xarray.core.utils import contains_only_chunked_or_numpy, module_available if TYPE_CHECKING: from xarray.core.dataarray import DataArray From f38659dacf7e38d96035ce90d10ad99d10ec5e8b Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 12 Feb 2024 13:12:27 -0800 Subject: [PATCH 51/54] formatting only --- xarray/core/_aggregations.py | 1186 ++++++++++++++++++++++++++++++++++ 1 file changed, 1186 insertions(+) diff --git a/xarray/core/_aggregations.py b/xarray/core/_aggregations.py index cea544873b7..bee6afd5a19 100644 --- a/xarray/core/_aggregations.py +++ b/xarray/core/_aggregations.py @@ -84,8 +84,19 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.count() + Size: 8B + Dimensions: () + Data variables: + da int64 8B 5 """ return self.reduce( duck_array_ops.count, @@ -145,8 +156,19 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 78B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.all() + Size: 1B + Dimensions: () + Data variables: + da bool 1B False """ return self.reduce( duck_array_ops.array_all, @@ -206,8 +228,19 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 78B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.any() + Size: 1B + Dimensions: () + Data variables: + da bool 1B True """ return self.reduce( duck_array_ops.array_any, @@ -273,12 +306,27 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.max() + Size: 8B + Dimensions: () + Data variables: + da float64 8B 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.max(skipna=False) + Size: 8B + Dimensions: () + Data variables: + da float64 8B nan """ return self.reduce( duck_array_ops.max, @@ -345,12 +393,27 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.min() + Size: 8B + Dimensions: () + Data variables: + da float64 8B 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.min(skipna=False) + Size: 8B + Dimensions: () + Data variables: + da float64 8B nan """ return self.reduce( duck_array_ops.min, @@ -421,12 +484,27 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.mean() + Size: 8B + Dimensions: () + Data variables: + da float64 8B 1.6 Use ``skipna`` to control whether NaNs are ignored. >>> ds.mean(skipna=False) + Size: 8B + Dimensions: () + Data variables: + da float64 8B nan """ return self.reduce( duck_array_ops.mean, @@ -504,16 +582,35 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.prod() + Size: 8B + Dimensions: () + Data variables: + da float64 8B 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.prod(skipna=False) + Size: 8B + Dimensions: () + Data variables: + da float64 8B nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.prod(skipna=True, min_count=2) + Size: 8B + Dimensions: () + Data variables: + da float64 8B 0.0 """ return self.reduce( duck_array_ops.prod, @@ -592,16 +689,35 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.sum() + Size: 8B + Dimensions: () + Data variables: + da float64 8B 8.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.sum(skipna=False) + Size: 8B + Dimensions: () + Data variables: + da float64 8B nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.sum(skipna=True, min_count=2) + Size: 8B + Dimensions: () + Data variables: + da float64 8B 8.0 """ return self.reduce( duck_array_ops.sum, @@ -677,16 +793,35 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.std() + Size: 8B + Dimensions: () + Data variables: + da float64 8B 1.02 Use ``skipna`` to control whether NaNs are ignored. >>> ds.std(skipna=False) + Size: 8B + Dimensions: () + Data variables: + da float64 8B nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.std(skipna=True, ddof=1) + Size: 8B + Dimensions: () + Data variables: + da float64 8B 1.14 """ return self.reduce( duck_array_ops.std, @@ -762,16 +897,35 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.var() + Size: 8B + Dimensions: () + Data variables: + da float64 8B 1.04 Use ``skipna`` to control whether NaNs are ignored. >>> ds.var(skipna=False) + Size: 8B + Dimensions: () + Data variables: + da float64 8B nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.var(skipna=True, ddof=1) + Size: 8B + Dimensions: () + Data variables: + da float64 8B 1.3 """ return self.reduce( duck_array_ops.var, @@ -843,12 +997,27 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.median() + Size: 8B + Dimensions: () + Data variables: + da float64 8B 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.median(skipna=False) + Size: 8B + Dimensions: () + Data variables: + da float64 8B nan """ return self.reduce( duck_array_ops.median, @@ -919,12 +1088,29 @@ def cumsum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.cumsum() + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 3.0 6.0 6.0 8.0 8.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.cumsum(skipna=False) + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 3.0 6.0 6.0 8.0 nan """ return self.reduce( duck_array_ops.cumsum, @@ -995,12 +1181,29 @@ def cumprod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.cumprod() + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 6.0 0.0 0.0 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.cumprod(skipna=False) + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 6.0 0.0 0.0 nan """ return self.reduce( duck_array_ops.cumprod, @@ -1076,8 +1279,15 @@ def count( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.count() + Size: 8B + array(5) """ return self.reduce( duck_array_ops.count, @@ -1135,8 +1345,15 @@ def all( ... ), ... ) >>> da + Size: 6B + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.all() + Size: 1B + array(False) """ return self.reduce( duck_array_ops.array_all, @@ -1194,8 +1411,15 @@ def any( ... ), ... ) >>> da + Size: 6B + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.any() + Size: 1B + array(True) """ return self.reduce( duck_array_ops.array_any, @@ -1259,12 +1483,21 @@ def max( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.max() + Size: 8B + array(3.) Use ``skipna`` to control whether NaNs are ignored. >>> da.max(skipna=False) + Size: 8B + array(nan) """ return self.reduce( duck_array_ops.max, @@ -1329,12 +1562,21 @@ def min( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.min() + Size: 8B + array(0.) Use ``skipna`` to control whether NaNs are ignored. >>> da.min(skipna=False) + Size: 8B + array(nan) """ return self.reduce( duck_array_ops.min, @@ -1403,12 +1645,21 @@ def mean( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.mean() + Size: 8B + array(1.6) Use ``skipna`` to control whether NaNs are ignored. >>> da.mean(skipna=False) + Size: 8B + array(nan) """ return self.reduce( duck_array_ops.mean, @@ -1484,16 +1735,27 @@ def prod( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.prod() + Size: 8B + array(0.) Use ``skipna`` to control whether NaNs are ignored. >>> da.prod(skipna=False) + Size: 8B + array(nan) Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.prod(skipna=True, min_count=2) + Size: 8B + array(0.) """ return self.reduce( duck_array_ops.prod, @@ -1570,16 +1832,27 @@ def sum( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.sum() + Size: 8B + array(8.) Use ``skipna`` to control whether NaNs are ignored. >>> da.sum(skipna=False) + Size: 8B + array(nan) Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.sum(skipna=True, min_count=2) + Size: 8B + array(8.) """ return self.reduce( duck_array_ops.sum, @@ -1653,16 +1926,27 @@ def std( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.std() + Size: 8B + array(1.0198039) Use ``skipna`` to control whether NaNs are ignored. >>> da.std(skipna=False) + Size: 8B + array(nan) Specify ``ddof=1`` for an unbiased estimate. >>> da.std(skipna=True, ddof=1) + Size: 8B + array(1.14017543) """ return self.reduce( duck_array_ops.std, @@ -1736,16 +2020,27 @@ def var( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.var() + Size: 8B + array(1.04) Use ``skipna`` to control whether NaNs are ignored. >>> da.var(skipna=False) + Size: 8B + array(nan) Specify ``ddof=1`` for an unbiased estimate. >>> da.var(skipna=True, ddof=1) + Size: 8B + array(1.3) """ return self.reduce( duck_array_ops.var, @@ -1815,12 +2110,21 @@ def median( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.median() + Size: 8B + array(2.) Use ``skipna`` to control whether NaNs are ignored. >>> da.median(skipna=False) + Size: 8B + array(nan) """ return self.reduce( duck_array_ops.median, @@ -1889,12 +2193,27 @@ def cumsum( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.cumsum() + Size: 48B + array([1., 3., 6., 6., 8., 8.]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.cumsum(skipna=False) + Size: 48B + array([ 1., 3., 6., 6., 8., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.cumprod() + Size: 48B + array([1., 2., 6., 0., 0., 0.]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.cumprod(skipna=False) + Size: 48B + array([ 1., 2., 6., 0., 0., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").count() + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) int64 24B 1 2 2 """ if ( flox_available @@ -2159,8 +2506,21 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 78B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").all() + Size: 27B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) bool 3B False True True """ if ( flox_available @@ -2244,8 +2604,21 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 78B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").any() + Size: 27B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) bool 3B True True True """ if ( flox_available @@ -2335,12 +2708,31 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").max() + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B 1.0 2.0 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").max(skipna=False) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 2.0 3.0 """ if ( flox_available @@ -2432,12 +2824,31 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").min() + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B 1.0 2.0 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").min(skipna=False) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 2.0 0.0 """ if ( flox_available @@ -2531,12 +2942,31 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").mean() + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B 1.0 2.0 1.5 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").mean(skipna=False) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 2.0 1.5 """ if ( flox_available @@ -2637,16 +3067,41 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").prod() + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B 1.0 4.0 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").prod(skipna=False) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 4.0 0.0 Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.groupby("labels").prod(skipna=True, min_count=2) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 4.0 0.0 """ if ( flox_available @@ -2749,16 +3204,41 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").sum() + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B 1.0 4.0 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").sum(skipna=False) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 4.0 3.0 Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.groupby("labels").sum(skipna=True, min_count=2) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 4.0 3.0 """ if ( flox_available @@ -2858,16 +3338,41 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").std() + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B 0.0 0.0 1.5 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").std(skipna=False) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 0.0 1.5 Specify ``ddof=1`` for an unbiased estimate. >>> ds.groupby("labels").std(skipna=True, ddof=1) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 0.0 2.121 """ if ( flox_available @@ -2967,16 +3472,41 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").var() + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B 0.0 0.0 2.25 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").var(skipna=False) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 0.0 2.25 Specify ``ddof=1`` for an unbiased estimate. >>> ds.groupby("labels").var(skipna=True, ddof=1) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 0.0 4.5 """ if ( flox_available @@ -3072,12 +3602,31 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").median() + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B 1.0 2.0 1.5 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").median(skipna=False) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 2.0 1.5 """ return self._reduce_without_squeeze_warn( duck_array_ops.median, @@ -3156,12 +3705,29 @@ def cumsum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").cumsum() + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 3.0 3.0 4.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").cumsum(skipna=False) + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 3.0 3.0 4.0 nan """ return self._reduce_without_squeeze_warn( duck_array_ops.cumsum, @@ -3240,12 +3806,29 @@ def cumprod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").cumprod() + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 3.0 0.0 4.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").cumprod(skipna=False) + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 3.0 0.0 4.0 nan """ return self._reduce_without_squeeze_warn( duck_array_ops.cumprod, @@ -3352,8 +3935,21 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").count() + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) int64 24B 1 3 1 """ if ( flox_available @@ -3437,8 +4033,21 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 78B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").all() + Size: 27B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool 3B True True False """ if ( flox_available @@ -3522,8 +4131,21 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 78B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").any() + Size: 27B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool 3B True True True """ if ( flox_available @@ -3613,12 +4235,31 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").max() + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 3.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").max(skipna=False) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 3.0 nan """ if ( flox_available @@ -3710,12 +4351,31 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").min() + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 0.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").min(skipna=False) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 0.0 nan """ if ( flox_available @@ -3809,12 +4469,31 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").mean() + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 1.667 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").mean(skipna=False) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 1.667 nan """ if ( flox_available @@ -3915,16 +4594,41 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").prod() + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 0.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").prod(skipna=False) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 0.0 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.resample(time="3ME").prod(skipna=True, min_count=2) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B nan 0.0 nan """ if ( flox_available @@ -4027,16 +4731,41 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").sum() + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 5.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").sum(skipna=False) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 5.0 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.resample(time="3ME").sum(skipna=True, min_count=2) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B nan 5.0 nan """ if ( flox_available @@ -4136,16 +4865,41 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").std() + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 0.0 1.247 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").std(skipna=False) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 0.0 1.247 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.resample(time="3ME").std(skipna=True, ddof=1) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B nan 1.528 nan """ if ( flox_available @@ -4245,16 +4999,41 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").var() + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 0.0 1.556 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").var(skipna=False) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 0.0 1.556 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.resample(time="3ME").var(skipna=True, ddof=1) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B nan 2.333 nan """ if ( flox_available @@ -4350,12 +5129,31 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").median() + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").median(skipna=False) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 2.0 nan """ return self._reduce_without_squeeze_warn( duck_array_ops.median, @@ -4434,12 +5232,29 @@ def cumsum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").cumsum() + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 5.0 5.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").cumsum(skipna=False) + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 5.0 5.0 2.0 nan """ return self._reduce_without_squeeze_warn( duck_array_ops.cumsum, @@ -4518,12 +5333,29 @@ def cumprod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").cumprod() + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 6.0 0.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").cumprod(skipna=False) + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 6.0 0.0 2.0 nan """ return self._reduce_without_squeeze_warn( duck_array_ops.cumprod, @@ -4629,8 +5461,17 @@ def count( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").count() + Size: 24B + array([1, 2, 2]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -4711,8 +5552,17 @@ def all( ... ), ... ) >>> da + Size: 6B + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").all() + Size: 3B + array([False, True, True]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -4793,8 +5643,17 @@ def any( ... ), ... ) >>> da + Size: 6B + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").any() + Size: 3B + array([ True, True, True]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -4881,12 +5740,25 @@ def max( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").max() + Size: 24B + array([1., 2., 3.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").max(skipna=False) + Size: 24B + array([nan, 2., 3.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -4975,12 +5847,25 @@ def min( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").min() + Size: 24B + array([1., 2., 0.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").min(skipna=False) + Size: 24B + array([nan, 2., 0.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -5071,12 +5956,25 @@ def mean( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").mean() + Size: 24B + array([1. , 2. , 1.5]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").mean(skipna=False) + Size: 24B + array([nan, 2. , 1.5]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -5174,16 +6072,33 @@ def prod( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").prod() + Size: 24B + array([1., 4., 0.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").prod(skipna=False) + Size: 24B + array([nan, 4., 0.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.groupby("labels").prod(skipna=True, min_count=2) + Size: 24B + array([nan, 4., 0.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -5283,16 +6198,33 @@ def sum( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").sum() + Size: 24B + array([1., 4., 3.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").sum(skipna=False) + Size: 24B + array([nan, 4., 3.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.groupby("labels").sum(skipna=True, min_count=2) + Size: 24B + array([nan, 4., 3.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -5389,16 +6321,33 @@ def std( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").std() + Size: 24B + array([0. , 0. , 1.5]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").std(skipna=False) + Size: 24B + array([nan, 0. , 1.5]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' Specify ``ddof=1`` for an unbiased estimate. >>> da.groupby("labels").std(skipna=True, ddof=1) + Size: 24B + array([ nan, 0. , 2.12132034]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -5495,16 +6444,33 @@ def var( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").var() + Size: 24B + array([0. , 0. , 2.25]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").var(skipna=False) + Size: 24B + array([ nan, 0. , 2.25]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' Specify ``ddof=1`` for an unbiased estimate. >>> da.groupby("labels").var(skipna=True, ddof=1) + Size: 24B + array([nan, 0. , 4.5]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -5597,12 +6563,25 @@ def median( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").median() + Size: 24B + array([1. , 2. , 1.5]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").median(skipna=False) + Size: 24B + array([nan, 2. , 1.5]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ return self._reduce_without_squeeze_warn( duck_array_ops.median, @@ -5679,12 +6658,27 @@ def cumsum( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").cumsum() + Size: 48B + array([1., 2., 3., 3., 4., 1.]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").cumsum(skipna=False) + Size: 48B + array([ 1., 2., 3., 3., 4., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").cumprod() + Size: 48B + array([1., 2., 3., 0., 4., 1.]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").cumprod(skipna=False) + Size: 48B + array([ 1., 2., 3., 0., 4., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").count() + Size: 24B + array([1, 3, 1]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -5953,8 +6971,17 @@ def all( ... ), ... ) >>> da + Size: 6B + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").all() + Size: 3B + array([ True, True, False]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -6035,8 +7062,17 @@ def any( ... ), ... ) >>> da + Size: 6B + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").any() + Size: 3B + array([ True, True, True]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -6123,12 +7159,25 @@ def max( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").max() + Size: 24B + array([1., 3., 2.]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").max(skipna=False) + Size: 24B + array([ 1., 3., nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -6217,12 +7266,25 @@ def min( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").min() + Size: 24B + array([1., 0., 2.]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").min(skipna=False) + Size: 24B + array([ 1., 0., nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -6313,12 +7375,25 @@ def mean( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").mean() + Size: 24B + array([1. , 1.66666667, 2. ]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").mean(skipna=False) + Size: 24B + array([1. , 1.66666667, nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -6416,16 +7491,33 @@ def prod( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").prod() + Size: 24B + array([1., 0., 2.]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").prod(skipna=False) + Size: 24B + array([ 1., 0., nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.resample(time="3ME").prod(skipna=True, min_count=2) + Size: 24B + array([nan, 0., nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -6525,16 +7617,33 @@ def sum( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").sum() + Size: 24B + array([1., 5., 2.]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").sum(skipna=False) + Size: 24B + array([ 1., 5., nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.resample(time="3ME").sum(skipna=True, min_count=2) + Size: 24B + array([nan, 5., nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -6631,16 +7740,33 @@ def std( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").std() + Size: 24B + array([0. , 1.24721913, 0. ]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").std(skipna=False) + Size: 24B + array([0. , 1.24721913, nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Specify ``ddof=1`` for an unbiased estimate. >>> da.resample(time="3ME").std(skipna=True, ddof=1) + Size: 24B + array([ nan, 1.52752523, nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -6737,16 +7863,33 @@ def var( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").var() + Size: 24B + array([0. , 1.55555556, 0. ]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").var(skipna=False) + Size: 24B + array([0. , 1.55555556, nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Specify ``ddof=1`` for an unbiased estimate. >>> da.resample(time="3ME").var(skipna=True, ddof=1) + Size: 24B + array([ nan, 2.33333333, nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -6839,12 +7982,25 @@ def median( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").median() + Size: 24B + array([1., 2., 2.]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").median(skipna=False) + Size: 24B + array([ 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ return self._reduce_without_squeeze_warn( duck_array_ops.median, @@ -6921,12 +8077,27 @@ def cumsum( ... ), ... ) >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").cumsum() + Size: 48B + array([1., 2., 5., 5., 2., 2.]) + Coordinates: + labels (time) >> da.resample(time="3ME").cumsum(skipna=False) + Size: 48B + array([ 1., 2., 5., 5., 2., nan]) + Coordinates: + labels (time) >> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").cumprod() + Size: 48B + array([1., 2., 6., 0., 2., 2.]) + Coordinates: + labels (time) >> da.resample(time="3ME").cumprod(skipna=False) + Size: 48B + array([ 1., 2., 6., 0., 2., nan]) + Coordinates: + labels (time) Date: Mon, 12 Feb 2024 13:18:58 -0800 Subject: [PATCH 52/54] more imports restructure --- xarray/core/dataarray.py | 24 +++++------ xarray/core/dataset.py | 93 ++++++++++++++++++++-------------------- 2 files changed, 58 insertions(+), 59 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b09934d72aa..364f53ebcd1 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -62,7 +62,9 @@ HybridMappingProxy, ReprObject, _default, + either_dict_or_kwargs, hashable, + infix_dims, ) from xarray.core.variable import ( IndexVariable, @@ -1383,7 +1385,7 @@ def chunk( ) chunks = dict(zip(self.dims, chunks)) else: - chunks = utils.either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") + chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") ds = self._to_temp_dataset().chunk( chunks, @@ -1463,7 +1465,7 @@ def isel( Dimensions without coordinates: points """ - indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "isel") + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") if any(is_fancy_indexer(idx) for idx in indexers.values()): ds = self._to_temp_dataset()._isel_fancy( @@ -2141,7 +2143,7 @@ def reindex( DataArray.reindex_like align """ - indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") return alignment.reindex( self, indexers=indexers, @@ -2453,9 +2455,7 @@ def rename( return self._replace(name=None) if utils.is_dict_like(new_name_or_name_dict) or new_name_or_name_dict is None: # change dims/coords - name_dict = utils.either_dict_or_kwargs( - new_name_or_name_dict, names, "rename" - ) + name_dict = either_dict_or_kwargs(new_name_or_name_dict, names, "rename") dataset = self._to_temp_dataset()._rename(name_dict) return self._from_temp_dataset(dataset) if utils.hashable(new_name_or_name_dict) and names: @@ -2521,7 +2521,7 @@ def swap_dims( DataArray.rename Dataset.swap_dims """ - dims_dict = utils.either_dict_or_kwargs(dims_dict, dims_kwargs, "swap_dims") + dims_dict = either_dict_or_kwargs(dims_dict, dims_kwargs, "swap_dims") ds = self._to_temp_dataset().swap_dims(dims_dict) return self._from_temp_dataset(ds) @@ -2615,7 +2615,7 @@ def expand_dims( elif dim is not None and not isinstance(dim, Mapping): dim = {dim: 1} - dim = utils.either_dict_or_kwargs(dim, dim_kwargs, "expand_dims") + dim = either_dict_or_kwargs(dim, dim_kwargs, "expand_dims") ds = self._to_temp_dataset().expand_dims(dim, axis) return self._from_temp_dataset(ds) @@ -3016,7 +3016,7 @@ def transpose( Dataset.transpose """ if dims: - dims = tuple(utils.infix_dims(dims, self.dims, missing_dims)) + dims = tuple(infix_dims(dims, self.dims, missing_dims)) variable = self.variable.transpose(*dims) if transpose_coords: coords: dict[Hashable, Variable] = {} @@ -3215,7 +3215,7 @@ def drop_sel( * y (y) int64 24B 6 9 12 """ if labels_kwargs or isinstance(labels, dict): - labels = utils.either_dict_or_kwargs(labels, labels_kwargs, "drop") + labels = either_dict_or_kwargs(labels, labels_kwargs, "drop") ds = self._to_temp_dataset().drop_sel(labels, errors=errors) return self._from_temp_dataset(ds) @@ -6918,7 +6918,7 @@ def rolling( """ from xarray.core.rolling import DataArrayRolling - dim = utils.either_dict_or_kwargs(dim, window_kwargs, "rolling") + dim = either_dict_or_kwargs(dim, window_kwargs, "rolling") return DataArrayRolling(self, dim, min_periods=min_periods, center=center) def cumulative( @@ -7132,7 +7132,7 @@ def coarsen( """ from xarray.core.rolling import DataArrayCoarsen - dim = utils.either_dict_or_kwargs(dim, window_kwargs, "coarsen") + dim = either_dict_or_kwargs(dim, window_kwargs, "coarsen") return DataArrayCoarsen( self, dim, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 5fd1bd3c983..a5131bfb9db 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -104,7 +104,10 @@ _default, decode_numpy_dict_values, drop_dims_from_indexers, + either_dict_or_kwargs, emit_user_level_warning, + is_dict_like, + is_duck_dask_array, is_scalar, maybe_wrap_array, ) @@ -487,12 +490,12 @@ def __init__(self, dataset: T_Dataset): self.dataset = dataset def __getitem__(self, key: Mapping[Any, Any]) -> T_Dataset: - if not utils.is_dict_like(key): + if not is_dict_like(key): raise TypeError("can only lookup dictionaries from Dataset.loc") return self.dataset.sel(key) def __setitem__(self, key, value) -> None: - if not utils.is_dict_like(key): + if not is_dict_like(key): raise TypeError( "can only set locations defined by dictionaries from Dataset.loc." f" Got: {key}" @@ -1011,9 +1014,7 @@ def _persist_inplace(self, **kwargs) -> Self: """Persist all Dask arrays in memory""" # access .data to coerce everything to numpy or dask arrays lazy_data = { - k: v._data - for k, v in self.variables.items() - if utils.is_duck_dask_array(v._data) + k: v._data for k, v in self.variables.items() if is_duck_dask_array(v._data) } if lazy_data: import dask @@ -1339,7 +1340,7 @@ def _copy( ) -> Self: if data is None: data = {} - elif not utils.is_dict_like(data): + elif not is_dict_like(data): raise ValueError("Data must be dict-like") if data: @@ -1535,7 +1536,7 @@ def __getitem__( """ from xarray.core.formatting import shorten_list_repr - if utils.is_dict_like(key): + if is_dict_like(key): return self.isel(**key) if utils.hashable(key): try: @@ -1572,7 +1573,7 @@ def __setitem__( """ from xarray.core.dataarray import DataArray - if utils.is_dict_like(key): + if is_dict_like(key): # check for consistency and convert value to dataset value = self._setitem_check(key, value) # loop over dataset variables and set new values @@ -2681,7 +2682,7 @@ def chunk( ) chunks_mapping = dict.fromkeys(self.dims, chunks) else: - chunks_mapping = utils.either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") + chunks_mapping = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") bad_dims = chunks_mapping.keys() - self.sizes.keys() if bad_dims: @@ -2930,7 +2931,7 @@ def isel( Tutorial material on basics of indexing """ - indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "isel") + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") if any(is_fancy_indexer(idx) for idx in indexers.values()): return self._isel_fancy(indexers, drop=drop, missing_dims=missing_dims) @@ -3084,7 +3085,7 @@ def sel( Tutorial material on basics of indexing """ - indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "sel") + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "sel") query_results = map_index_queries( self, indexers=indexers, method=method, tolerance=tolerance ) @@ -3173,11 +3174,11 @@ def head( if not indexers_kwargs: if indexers is None: indexers = 5 - if not isinstance(indexers, int) and not utils.is_dict_like(indexers): + if not isinstance(indexers, int) and not is_dict_like(indexers): raise TypeError("indexers must be either dict-like or a single integer") if isinstance(indexers, int): indexers = {dim: indexers for dim in self.dims} - indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "head") + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "head") for k, v in indexers.items(): if not isinstance(v, int): raise TypeError( @@ -3261,11 +3262,11 @@ def tail( if not indexers_kwargs: if indexers is None: indexers = 5 - if not isinstance(indexers, int) and not utils.is_dict_like(indexers): + if not isinstance(indexers, int) and not is_dict_like(indexers): raise TypeError("indexers must be either dict-like or a single integer") if isinstance(indexers, int): indexers = {dim: indexers for dim in self.dims} - indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "tail") + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "tail") for k, v in indexers.items(): if not isinstance(v, int): raise TypeError( @@ -3346,12 +3347,12 @@ def thin( if ( not indexers_kwargs and not isinstance(indexers, int) - and not utils.is_dict_like(indexers) + and not is_dict_like(indexers) ): raise TypeError("indexers must be either dict-like or a single integer") if isinstance(indexers, int): indexers = {dim: indexers for dim in self.dims} - indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "thin") + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "thin") for k, v in indexers.items(): if not isinstance(v, int): raise TypeError( @@ -3731,7 +3732,7 @@ def reindex( original dataset, use the :py:meth:`~Dataset.fillna()` method. """ - indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") return alignment.reindex( self, indexers=indexers, @@ -3754,7 +3755,7 @@ def _reindex( """ Same as reindex but supports sparse option. """ - indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") return alignment.reindex( self, indexers=indexers, @@ -3917,7 +3918,7 @@ def interp( if kwargs is None: kwargs = {} - coords = utils.either_dict_or_kwargs(coords, coords_kwargs, "interp") + coords = either_dict_or_kwargs(coords, coords_kwargs, "interp") indexers = dict(self._validate_interp_indexers(coords)) if coords: @@ -3982,7 +3983,7 @@ def _validate_interp_indexer(x, new_x): if name in indexers: continue - if utils.is_duck_dask_array(var.data): + if is_duck_dask_array(var.data): use_indexers = dask_indexers else: use_indexers = validated_indexers @@ -4211,7 +4212,7 @@ def _rename( """Also used internally by DataArray so that the warning (if any) is raised at the right stack level. """ - name_dict = utils.either_dict_or_kwargs(name_dict, names, "rename") + name_dict = either_dict_or_kwargs(name_dict, names, "rename") for k in name_dict.keys(): if k not in self and k not in self.dims: raise ValueError( @@ -4307,7 +4308,7 @@ def rename_dims( Dataset.rename_vars DataArray.rename """ - dims_dict = utils.either_dict_or_kwargs(dims_dict, dims, "rename_dims") + dims_dict = either_dict_or_kwargs(dims_dict, dims, "rename_dims") for k, v in dims_dict.items(): if k not in self.dims: raise ValueError( @@ -4353,7 +4354,7 @@ def rename_vars( Dataset.rename_dims DataArray.rename """ - name_dict = utils.either_dict_or_kwargs(name_dict, names, "rename_vars") + name_dict = either_dict_or_kwargs(name_dict, names, "rename_vars") for k in name_dict: if k not in self: raise ValueError( @@ -4429,7 +4430,7 @@ def swap_dims( # TODO: deprecate this method in favor of a (less confusing) # rename_dims() method that only renames dimensions. - dims_dict = utils.either_dict_or_kwargs(dims_dict, dims_kwargs, "swap_dims") + dims_dict = either_dict_or_kwargs(dims_dict, dims_kwargs, "swap_dims") for current_name, new_name in dims_dict.items(): if current_name not in self.dims: raise ValueError( @@ -4590,7 +4591,7 @@ def expand_dims( raise ValueError("dims should not contain duplicate values.") dim = {d: 1 for d in dim} - dim = utils.either_dict_or_kwargs(dim, dim_kwargs, "expand_dims") + dim = either_dict_or_kwargs(dim, dim_kwargs, "expand_dims") assert isinstance(dim, MutableMapping) if axis is None: @@ -4731,7 +4732,7 @@ def set_index( Dataset.set_xindex Dataset.swap_dims """ - dim_coords = utils.either_dict_or_kwargs(indexes, indexes_kwargs, "set_index") + dim_coords = either_dict_or_kwargs(indexes, indexes_kwargs, "set_index") new_indexes: dict[Hashable, Index] = {} new_variables: dict[Hashable, Variable] = {} @@ -5080,9 +5081,7 @@ def reorder_levels( Another dataset, with this dataset's data but replaced coordinates. """ - dim_order = utils.either_dict_or_kwargs( - dim_order, dim_order_kwargs, "reorder_levels" - ) + dim_order = either_dict_or_kwargs(dim_order, dim_order_kwargs, "reorder_levels") variables = self._variables.copy() indexes = dict(self._indexes) new_indexes: dict[Hashable, Index] = {} @@ -5266,7 +5265,7 @@ def stack( -------- Dataset.unstack """ - dimensions = utils.either_dict_or_kwargs(dimensions, dimensions_kwargs, "stack") + dimensions = either_dict_or_kwargs(dimensions, dimensions_kwargs, "stack") result = self for new_dim, dims in dimensions.items(): result = result._stack_once(dims, new_dim, index_cls, create_index) @@ -5562,7 +5561,7 @@ def unstack( # currently compatible. sparse_array_type = array_type("sparse") needs_full_reindex = any( - utils.is_duck_dask_array(v.data) + is_duck_dask_array(v.data) or isinstance(v.data, sparse_array_type) or not isinstance(v.data, np.ndarray) for v in nonindexes @@ -5963,7 +5962,7 @@ def drop( if errors not in ["raise", "ignore"]: raise ValueError('errors must be either "raise" or "ignore"') - if utils.is_dict_like(labels) and not isinstance(labels, dict): + if is_dict_like(labels) and not isinstance(labels, dict): emit_user_level_warning( "dropping coordinates using `drop` is deprecated; use drop_vars.", DeprecationWarning, @@ -5973,7 +5972,7 @@ def drop( if labels_kwargs or isinstance(labels, dict): if dim is not None: raise ValueError("cannot specify dim and dict-like arguments.") - labels = utils.either_dict_or_kwargs(labels, labels_kwargs, "drop") + labels = either_dict_or_kwargs(labels, labels_kwargs, "drop") if dim is None and (is_scalar(labels) or isinstance(labels, Iterable)): emit_user_level_warning( @@ -6053,7 +6052,7 @@ def drop_sel( if errors not in ["raise", "ignore"]: raise ValueError('errors must be either "raise" or "ignore"') - labels = utils.either_dict_or_kwargs(labels, labels_kwargs, "drop_sel") + labels = either_dict_or_kwargs(labels, labels_kwargs, "drop_sel") ds = self for dim, labels_for_dim in labels.items(): @@ -6119,7 +6118,7 @@ def drop_isel(self, indexers=None, **indexers_kwargs) -> Self: A (x, y) int64 32B 0 2 3 5 """ - indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "drop_isel") + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "drop_isel") ds = self dimension_index = {} @@ -6432,7 +6431,7 @@ def fillna(self, value: Any) -> Self: C (x) float64 32B 2.0 2.0 2.0 5.0 D (x) float64 32B 3.0 3.0 3.0 4.0 """ - if utils.is_dict_like(value): + if is_dict_like(value): value_keys = getattr(value, "data_vars", value).keys() if not set(value_keys) <= set(self.data_vars.keys()): raise ValueError( @@ -7034,7 +7033,7 @@ def assign( temperature_f (lat, lon) float64 32B 51.76 57.75 53.7 51.62 """ - variables = utils.either_dict_or_kwargs(variables, variables_kwargs, "assign") + variables = either_dict_or_kwargs(variables, variables_kwargs, "assign") data = self.copy() # do all calculations first... @@ -7394,7 +7393,7 @@ def to_dask_dataframe( # Make sure var is a dask array, otherwise the array can become too large # when it is broadcasted to several dimensions: - if not utils.is_duck_dask_array(var._data): + if not is_duck_dask_array(var._data): var = var.chunk() # Broadcast then flatten the array: @@ -7638,7 +7637,7 @@ def apply_over_both(lhs_data_vars, rhs_data_vars, lhs_vars, rhs_vars): dest_vars[k] = f(rhs_vars[k], np.nan) return dest_vars - if utils.is_dict_like(other) and not isinstance(other, Dataset): + if is_dict_like(other) and not isinstance(other, Dataset): # can't use our shortcut of doing the binary operation with # Variable objects, so apply over our data vars instead. new_data_vars = apply_over_both( @@ -7803,7 +7802,7 @@ def shift( Data variables: foo (x) object 40B nan nan 'a' 'b' 'c' """ - shifts = utils.either_dict_or_kwargs(shifts, shifts_kwargs, "shift") + shifts = either_dict_or_kwargs(shifts, shifts_kwargs, "shift") invalid = tuple(k for k in shifts if k not in self.dims) if invalid: raise ValueError( @@ -7882,7 +7881,7 @@ def roll( foo (x) Date: Mon, 12 Feb 2024 13:41:15 -0800 Subject: [PATCH 53/54] more formatting --- xarray/core/dataarray.py | 2 +- xarray/core/dataset.py | 28 +++++++++++++++------------- xarray/core/rolling.py | 23 ++++++++++++++--------- 3 files changed, 30 insertions(+), 23 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 364f53ebcd1..c00fe1a9e67 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2143,7 +2143,7 @@ def reindex( DataArray.reindex_like align """ - indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") + indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") return alignment.reindex( self, indexers=indexers, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index a5131bfb9db..884e302b8be 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -106,7 +106,9 @@ drop_dims_from_indexers, either_dict_or_kwargs, emit_user_level_warning, + infix_dims, is_dict_like, + is_duck_array, is_duck_dask_array, is_scalar, maybe_wrap_array, @@ -490,12 +492,12 @@ def __init__(self, dataset: T_Dataset): self.dataset = dataset def __getitem__(self, key: Mapping[Any, Any]) -> T_Dataset: - if not is_dict_like(key): + if not utils.is_dict_like(key): raise TypeError("can only lookup dictionaries from Dataset.loc") return self.dataset.sel(key) def __setitem__(self, key, value) -> None: - if not is_dict_like(key): + if not utils.is_dict_like(key): raise TypeError( "can only set locations defined by dictionaries from Dataset.loc." f" Got: {key}" @@ -1340,7 +1342,7 @@ def _copy( ) -> Self: if data is None: data = {} - elif not is_dict_like(data): + elif not utils.is_dict_like(data): raise ValueError("Data must be dict-like") if data: @@ -1536,7 +1538,7 @@ def __getitem__( """ from xarray.core.formatting import shorten_list_repr - if is_dict_like(key): + if utils.is_dict_like(key): return self.isel(**key) if utils.hashable(key): try: @@ -1573,7 +1575,7 @@ def __setitem__( """ from xarray.core.dataarray import DataArray - if is_dict_like(key): + if utils.is_dict_like(key): # check for consistency and convert value to dataset value = self._setitem_check(key, value) # loop over dataset variables and set new values @@ -2737,7 +2739,7 @@ def _validate_indexers( elif isinstance(v, Sequence) and len(v) == 0: yield k, np.empty((0,), dtype="int64") else: - if not utils.is_duck_array(v): + if not is_duck_array(v): v = np.asarray(v) if v.dtype.kind in "US": @@ -3732,7 +3734,7 @@ def reindex( original dataset, use the :py:meth:`~Dataset.fillna()` method. """ - indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") + indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") return alignment.reindex( self, indexers=indexers, @@ -3755,7 +3757,7 @@ def _reindex( """ Same as reindex but supports sparse option. """ - indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") + indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") return alignment.reindex( self, indexers=indexers, @@ -5172,7 +5174,7 @@ def _stack_once( if dims == ...: raise ValueError("Please use [...] for dims, rather than just ...") if ... in dims: - dims = list(utils.infix_dims(dims, self.dims)) + dims = list(infix_dims(dims, self.dims)) new_variables: dict[Hashable, Variable] = {} stacked_var_names: list[Hashable] = [] @@ -6221,9 +6223,9 @@ def transpose( f'transpose requires dims to be passed as multiple arguments. Expected `{", ".join(list_fix)}`. Received `{dims[0]}` instead' ) - # Use utils.infix_dims to check once for missing dimensions + # Use infix_dims to check once for missing dimensions if len(dims) != 0: - _ = list(utils.infix_dims(dims, self.dims, missing_dims)) + _ = list(infix_dims(dims, self.dims, missing_dims)) ds = self.copy() for name, var in self._variables.items(): @@ -6431,7 +6433,7 @@ def fillna(self, value: Any) -> Self: C (x) float64 32B 2.0 2.0 2.0 5.0 D (x) float64 32B 3.0 3.0 3.0 4.0 """ - if is_dict_like(value): + if utils.is_dict_like(value): value_keys = getattr(value, "data_vars", value).keys() if not set(value_keys) <= set(self.data_vars.keys()): raise ValueError( @@ -7637,7 +7639,7 @@ def apply_over_both(lhs_data_vars, rhs_data_vars, lhs_vars, rhs_vars): dest_vars[k] = f(rhs_vars[k], np.nan) return dest_vars - if is_dict_like(other) and not isinstance(other, Dataset): + if utils.is_dict_like(other) and not isinstance(other, Dataset): # can't use our shortcut of doing the binary operation with # Variable objects, so apply over our data vars instead. new_data_vars = apply_over_both( diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 486b3ca4b5d..6cf49fc995b 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -14,6 +14,11 @@ from xarray.core.arithmetic import CoarsenArithmetic from xarray.core.options import OPTIONS, _get_keep_attrs from xarray.core.types import CoarsenBoundaryOptions, SideOptions, T_Xarray +from xarray.core.utils import ( + either_dict_or_kwargs, + is_duck_dask_array, + module_available, +) from xarray.namedarray import pycompat try: @@ -159,7 +164,7 @@ def _reduce_method( # type: ignore[misc] array_agg_func = getattr(duck_array_ops, name) bottleneck_move_func = getattr(bottleneck, "move_" + name, None) - if utils.module_available("numbagg"): + if module_available("numbagg"): import numbagg numbagg_move_func = getattr(numbagg, "move_" + name, None) @@ -531,7 +536,7 @@ def _numbagg_reduce(self, func, keep_attrs, **kwargs): padded = self.obj.variable if self.center[0]: - if utils.is_duck_dask_array(padded.data): + if is_duck_dask_array(padded.data): # workaround to make the padded chunk size larger than # self.window - 1 shift = -(self.window[0] + 1) // 2 @@ -544,7 +549,7 @@ def _numbagg_reduce(self, func, keep_attrs, **kwargs): valid = (slice(None),) * axis + (slice(-shift, None),) padded = padded.pad({self.dim[0]: (0, -shift)}, mode="constant") - if utils.is_duck_dask_array(padded.data) and False: + if is_duck_dask_array(padded.data) and False: raise AssertionError("should not be reachable") else: values = func( @@ -576,7 +581,7 @@ def _bottleneck_reduce(self, func, keep_attrs, **kwargs): padded = self.obj.variable if self.center[0]: - if utils.is_duck_dask_array(padded.data): + if is_duck_dask_array(padded.data): # workaround to make the padded chunk size larger than # self.window - 1 shift = -(self.window[0] + 1) // 2 @@ -589,7 +594,7 @@ def _bottleneck_reduce(self, func, keep_attrs, **kwargs): valid = (slice(None),) * axis + (slice(-shift, None),) padded = padded.pad({self.dim[0]: (0, -shift)}, mode="constant") - if utils.is_duck_dask_array(padded.data): + if is_duck_dask_array(padded.data): raise AssertionError("should not be reachable") else: values = func( @@ -632,13 +637,13 @@ def _array_reduce( if ( OPTIONS["use_numbagg"] - and utils.module_available("numbagg") + and module_available("numbagg") and pycompat.mod_version("numbagg") >= Version("0.6.3") and numbagg_move_func is not None # TODO: we could at least allow this for the equivalent of `apply_ufunc`'s # "parallelized". `rolling_exp` does this, as an example (but rolling_exp is # much simpler) - and not utils.is_duck_dask_array(self.obj.data) + and not is_duck_dask_array(self.obj.data) # Numbagg doesn't handle object arrays and generally has dtype consistency, # so doesn't deal well with bool arrays which are expected to change type. and self.obj.data.dtype.kind not in "ObMm" @@ -662,7 +667,7 @@ def _array_reduce( if ( OPTIONS["use_bottleneck"] and bottleneck_move_func is not None - and not utils.is_duck_dask_array(self.obj.data) + and not is_duck_dask_array(self.obj.data) and self.ndim == 1 ): # TODO: re-enable bottleneck with dask after the issues @@ -1027,7 +1032,7 @@ def construct( from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset - window_dim = utils.either_dict_or_kwargs( + window_dim = either_dict_or_kwargs( window_dim, window_dim_kwargs, "Coarsen.construct" ) if not window_dim: From ea7feef45437b2f0b8a97aae61d4685cd7ee6eb9 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 12 Feb 2024 13:48:09 -0800 Subject: [PATCH 54/54] update what's new --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d51b5da2a88..ed0b1c30987 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -88,6 +88,9 @@ Internal Changes when the data isn't datetime-like. (:issue:`8718`, :pull:`8724`) By `Maximilian Roos `_. +- Move `parallelcompat` and `chunk managers` modules from `xarray/core` to `xarray/namedarray`. (:pull:`8319`) + By `Tom Nicholas `_ and `Anderson Banihirwe `_. + .. _whats-new.2024.01.1: v2024.01.1 (23 Jan, 2024)