From d64460795e406bc4a998e2ddae0054a1029d52a9 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Mon, 12 Feb 2024 15:09:23 -0700 Subject: [PATCH] Move parallelcompat and chunkmanagers to NamedArray (#8319) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Co-authored-by: Anderson Banihirwe --- doc/internals/chunked-arrays.rst | 18 +- doc/whats-new.rst | 3 + pyproject.toml | 2 +- xarray/backends/api.py | 4 +- xarray/backends/common.py | 4 +- xarray/backends/pydap_.py | 2 +- xarray/backends/zarr.py | 4 +- xarray/coding/strings.py | 3 +- xarray/coding/times.py | 5 +- xarray/coding/variables.py | 6 +- xarray/conventions.py | 2 +- xarray/convert.py | 2 +- xarray/core/accessor_dt.py | 2 +- xarray/core/arithmetic.py | 7 +- xarray/core/common.py | 4 +- xarray/core/computation.py | 6 +- xarray/core/dataarray.py | 4 +- xarray/core/dataset.py | 24 +- xarray/core/duck_array_ops.py | 9 +- xarray/core/formatting.py | 2 +- xarray/core/indexing.py | 11 +- xarray/core/missing.py | 3 +- xarray/core/nputils.py | 5 +- xarray/core/parallel.py | 2 +- xarray/core/rolling.py | 10 +- xarray/core/rolling_exp.py | 2 +- xarray/core/utils.py | 170 ++-------- xarray/core/variable.py | 302 +++++++----------- xarray/core/weighted.py | 2 +- xarray/namedarray/_typing.py | 18 ++ xarray/namedarray/core.py | 116 ++++++- xarray/{core => namedarray}/daskmanager.py | 130 ++++---- xarray/{core => namedarray}/parallelcompat.py | 122 ++++--- xarray/{core => namedarray}/pycompat.py | 44 ++- xarray/namedarray/utils.py | 44 ++- xarray/plot/utils.py | 2 +- xarray/tests/test_backends.py | 2 +- xarray/tests/test_coding_times.py | 2 +- xarray/tests/test_dataset.py | 2 +- xarray/tests/test_duck_array_ops.py | 2 +- xarray/tests/test_missing.py | 2 +- xarray/tests/test_parallelcompat.py | 13 +- xarray/tests/test_plot.py | 2 +- xarray/tests/test_sparse.py | 2 +- xarray/tests/test_utils.py | 3 +- xarray/tests/test_variable.py | 2 +- 46 files changed, 566 insertions(+), 562 deletions(-) rename xarray/{core => namedarray}/daskmanager.py (62%) rename xarray/{core => namedarray}/parallelcompat.py (90%) rename xarray/{core => namedarray}/pycompat.py (76%) diff --git a/doc/internals/chunked-arrays.rst b/doc/internals/chunked-arrays.rst index 7192c3f0bc5..ba7ce72c834 100644 --- a/doc/internals/chunked-arrays.rst +++ b/doc/internals/chunked-arrays.rst @@ -35,24 +35,24 @@ The implementation of these functions is specific to the type of arrays passed t whereas :py:class:`cubed.Array` objects must be processed by :py:func:`cubed.map_blocks`. In order to use the correct implementation of a core operation for the array type encountered, xarray dispatches to the -corresponding subclass of :py:class:`~xarray.core.parallelcompat.ChunkManagerEntrypoint`, +corresponding subclass of :py:class:`~xarray.namedarray.parallelcompat.ChunkManagerEntrypoint`, also known as a "Chunk Manager". Therefore **a full list of the operations that need to be defined is set by the -API of the** :py:class:`~xarray.core.parallelcompat.ChunkManagerEntrypoint` **abstract base class**. Note that chunked array +API of the** :py:class:`~xarray.namedarray.parallelcompat.ChunkManagerEntrypoint` **abstract base class**. Note that chunked array methods are also currently dispatched using this class. Chunked array creation is also handled by this class. As chunked array objects have a one-to-one correspondence with in-memory numpy arrays, it should be possible to create a chunked array from a numpy array by passing the desired -chunking pattern to an implementation of :py:class:`~xarray.core.parallelcompat.ChunkManagerEntrypoint.from_array``. +chunking pattern to an implementation of :py:class:`~xarray.namedarray.parallelcompat.ChunkManagerEntrypoint.from_array``. .. note:: - The :py:class:`~xarray.core.parallelcompat.ChunkManagerEntrypoint` abstract base class is mostly just acting as a + The :py:class:`~xarray.namedarray.parallelcompat.ChunkManagerEntrypoint` abstract base class is mostly just acting as a namespace for containing the chunked-aware function primitives. Ideally in the future we would have an API standard for chunked array types which codified this structure, making the entrypoint system unnecessary. -.. currentmodule:: xarray.core.parallelcompat +.. currentmodule:: xarray.namedarray.parallelcompat -.. autoclass:: xarray.core.parallelcompat.ChunkManagerEntrypoint +.. autoclass:: xarray.namedarray.parallelcompat.ChunkManagerEntrypoint :members: Registering a new ChunkManagerEntrypoint subclass @@ -60,19 +60,19 @@ Registering a new ChunkManagerEntrypoint subclass Rather than hard-coding various chunk managers to deal with specific chunked array implementations, xarray uses an entrypoint system to allow developers of new chunked array implementations to register their corresponding subclass of -:py:class:`~xarray.core.parallelcompat.ChunkManagerEntrypoint`. +:py:class:`~xarray.namedarray.parallelcompat.ChunkManagerEntrypoint`. To register a new entrypoint you need to add an entry to the ``setup.cfg`` like this:: [options.entry_points] xarray.chunkmanagers = - dask = xarray.core.daskmanager:DaskManager + dask = xarray.namedarray.daskmanager:DaskManager See also `cubed-xarray `_ for another example. To check that the entrypoint has worked correctly, you may find it useful to display the available chunkmanagers using -the internal function :py:func:`~xarray.core.parallelcompat.list_chunkmanagers`. +the internal function :py:func:`~xarray.namedarray.parallelcompat.list_chunkmanagers`. .. autofunction:: list_chunkmanagers diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d51b5da2a88..ed0b1c30987 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -88,6 +88,9 @@ Internal Changes when the data isn't datetime-like. (:issue:`8718`, :pull:`8724`) By `Maximilian Roos `_. +- Move `parallelcompat` and `chunk managers` modules from `xarray/core` to `xarray/namedarray`. (:pull:`8319`) + By `Tom Nicholas `_ and `Anderson Banihirwe `_. + .. _whats-new.2024.01.1: v2024.01.1 (23 Jan, 2024) diff --git a/pyproject.toml b/pyproject.toml index 90f92110458..62c2eed3295 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,7 +54,7 @@ issue-tracker = "https://github.com/pydata/xarray/issues" source-code = "https://github.com/pydata/xarray" [project.entry-points."xarray.chunkmanagers"] -dask = "xarray.core.daskmanager:DaskManager" +dask = "xarray.namedarray.daskmanager:DaskManager" [build-system] build-backend = "setuptools.build_meta" diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 6af98714670..e69faa4b100 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -34,13 +34,13 @@ _nested_combine, combine_by_coords, ) -from xarray.core.daskmanager import DaskManager from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset, _get_chunk, _maybe_chunk from xarray.core.indexes import Index -from xarray.core.parallelcompat import guess_chunkmanager from xarray.core.types import ZarrWriteModes from xarray.core.utils import is_remote_uri +from xarray.namedarray.daskmanager import DaskManager +from xarray.namedarray.parallelcompat import guess_chunkmanager if TYPE_CHECKING: try: diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 5b8f9a6840f..5b7cdc4cf50 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -12,9 +12,9 @@ from xarray.conventions import cf_encoder from xarray.core import indexing -from xarray.core.parallelcompat import get_chunked_array_type -from xarray.core.pycompat import is_chunked_array from xarray.core.utils import FrozenDict, NdimSizeLenMixin, is_remote_uri +from xarray.namedarray.parallelcompat import get_chunked_array_type +from xarray.namedarray.pycompat import is_chunked_array if TYPE_CHECKING: from io import BufferedIOBase diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 9b5bcc82e6f..5a475a7c3be 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -14,7 +14,6 @@ ) from xarray.backends.store import StoreBackendEntrypoint from xarray.core import indexing -from xarray.core.pycompat import integer_types from xarray.core.utils import ( Frozen, FrozenDict, @@ -23,6 +22,7 @@ is_remote_uri, ) from xarray.core.variable import Variable +from xarray.namedarray.pycompat import integer_types if TYPE_CHECKING: import os diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 8f9040477d9..73e2145468b 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -19,8 +19,6 @@ ) from xarray.backends.store import StoreBackendEntrypoint from xarray.core import indexing -from xarray.core.parallelcompat import guess_chunkmanager -from xarray.core.pycompat import integer_types from xarray.core.types import ZarrWriteModes from xarray.core.utils import ( FrozenDict, @@ -28,6 +26,8 @@ close_on_error, ) from xarray.core.variable import Variable +from xarray.namedarray.parallelcompat import guess_chunkmanager +from xarray.namedarray.pycompat import integer_types if TYPE_CHECKING: from io import BufferedIOBase diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 07665230b26..2c4501dfb0f 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -15,8 +15,9 @@ unpack_for_encoding, ) from xarray.core import indexing -from xarray.core.parallelcompat import get_chunked_array_type, is_chunked_array from xarray.core.variable import Variable +from xarray.namedarray.parallelcompat import get_chunked_array_type +from xarray.namedarray.pycompat import is_chunked_array def create_vlen_dtype(element_type): diff --git a/xarray/coding/times.py b/xarray/coding/times.py index f54966dc39a..92bce0abeaa 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -24,11 +24,12 @@ from xarray.core.common import contains_cftime_datetimes, is_np_datetime_like from xarray.core.duck_array_ops import asarray from xarray.core.formatting import first_n_items, format_timestamp, last_item -from xarray.core.parallelcompat import T_ChunkedArray, get_chunked_array_type from xarray.core.pdcompat import nanosecond_precision_timestamp -from xarray.core.pycompat import is_chunked_array, is_duck_dask_array from xarray.core.utils import emit_user_level_warning from xarray.core.variable import Variable +from xarray.namedarray.parallelcompat import T_ChunkedArray, get_chunked_array_type +from xarray.namedarray.pycompat import is_chunked_array +from xarray.namedarray.utils import is_duck_dask_array try: import cftime diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 3d70b39d03f..b5e4167f2b2 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -11,9 +11,9 @@ import pandas as pd from xarray.core import dtypes, duck_array_ops, indexing -from xarray.core.parallelcompat import get_chunked_array_type -from xarray.core.pycompat import is_chunked_array from xarray.core.variable import Variable +from xarray.namedarray.parallelcompat import get_chunked_array_type +from xarray.namedarray.pycompat import is_chunked_array if TYPE_CHECKING: T_VarTuple = tuple[tuple[Hashable, ...], Any, dict, dict] @@ -163,7 +163,7 @@ def lazy_elemwise_func(array, func: Callable, dtype: np.typing.DTypeLike): if is_chunked_array(array): chunkmanager = get_chunked_array_type(array) - return chunkmanager.map_blocks(func, array, dtype=dtype) + return chunkmanager.map_blocks(func, array, dtype=dtype) # type: ignore[arg-type] else: return _ElementwiseFunctionArray(array, func, dtype) diff --git a/xarray/conventions.py b/xarray/conventions.py index 61d0cc5e385..6eff45c5b2d 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -14,9 +14,9 @@ _contains_datetime_like_objects, contains_cftime_datetimes, ) -from xarray.core.pycompat import is_duck_dask_array from xarray.core.utils import emit_user_level_warning from xarray.core.variable import IndexVariable, Variable +from xarray.namedarray.utils import is_duck_dask_array CF_RELATED_DATA = ( "bounds", diff --git a/xarray/convert.py b/xarray/convert.py index 404a279f1f8..b8d81ccf9f0 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -10,7 +10,7 @@ from xarray.core import duck_array_ops from xarray.core.dataarray import DataArray from xarray.core.dtypes import get_fill_value -from xarray.core.pycompat import array_type +from xarray.namedarray.pycompat import array_type iris_forbidden_keys = { "standard_name", diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 219bca90048..41b982d268b 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -13,9 +13,9 @@ is_np_datetime_like, is_np_timedelta_like, ) -from xarray.core.pycompat import is_duck_dask_array from xarray.core.types import T_DataArray from xarray.core.variable import IndexVariable +from xarray.namedarray.utils import is_duck_dask_array if TYPE_CHECKING: from numpy.typing import DTypeLike diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py index de2fbe23d35..452c7115b75 100644 --- a/xarray/core/arithmetic.py +++ b/xarray/core/arithmetic.py @@ -15,12 +15,9 @@ VariableOpsMixin, ) from xarray.core.common import ImplementsArrayReduce, ImplementsDatasetReduce -from xarray.core.ops import ( - IncludeNumpySameMethods, - IncludeReduceMethods, -) +from xarray.core.ops import IncludeNumpySameMethods, IncludeReduceMethods from xarray.core.options import OPTIONS, _get_keep_attrs -from xarray.core.pycompat import is_duck_array +from xarray.namedarray.utils import is_duck_array class SupportsArithmetic: diff --git a/xarray/core/common.py b/xarray/core/common.py index 001806c66ec..cf2b4063202 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -13,8 +13,6 @@ from xarray.core import dtypes, duck_array_ops, formatting, formatting_html, ops from xarray.core.indexing import BasicIndexer, ExplicitlyIndexed from xarray.core.options import OPTIONS, _get_keep_attrs -from xarray.core.parallelcompat import get_chunked_array_type, guess_chunkmanager -from xarray.core.pycompat import is_chunked_array from xarray.core.utils import ( Frozen, either_dict_or_kwargs, @@ -22,6 +20,8 @@ is_scalar, ) from xarray.namedarray.core import _raise_if_any_duplicate_dimensions +from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager +from xarray.namedarray.pycompat import is_chunked_array try: import cftime diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 68eae1566c1..f29f6c4dd35 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -22,11 +22,11 @@ from xarray.core.indexes import Index, filter_indexes_from_coords from xarray.core.merge import merge_attrs, merge_coordinates_without_align from xarray.core.options import OPTIONS, _get_keep_attrs -from xarray.core.parallelcompat import get_chunked_array_type -from xarray.core.pycompat import is_chunked_array, is_duck_dask_array from xarray.core.types import Dims, T_DataArray -from xarray.core.utils import is_dict_like, is_scalar, parse_dims +from xarray.core.utils import is_dict_like, is_duck_dask_array, is_scalar, parse_dims from xarray.core.variable import Variable +from xarray.namedarray.parallelcompat import get_chunked_array_type +from xarray.namedarray.pycompat import is_chunked_array from xarray.util.deprecation_helpers import deprecate_dims if TYPE_CHECKING: diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 46d97b36560..c00fe1a9e67 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -64,6 +64,7 @@ _default, either_dict_or_kwargs, hashable, + infix_dims, ) from xarray.core.variable import ( IndexVariable, @@ -71,7 +72,6 @@ as_compatible_data, as_variable, ) -from xarray.namedarray.utils import infix_dims from xarray.plot.accessor import DataArrayPlotAccessor from xarray.plot.utils import _get_units_from_attrs from xarray.util.deprecation_helpers import _deprecate_positional_args, deprecate_dims @@ -85,7 +85,6 @@ from xarray.backends import ZarrStore from xarray.backends.api import T_NetcdfEngine, T_NetcdfTypes from xarray.core.groupby import DataArrayGroupBy - from xarray.core.parallelcompat import ChunkManagerEntrypoint from xarray.core.resample import DataArrayResample from xarray.core.rolling import DataArrayCoarsen, DataArrayRolling from xarray.core.types import ( @@ -108,6 +107,7 @@ T_Xarray, ) from xarray.core.weighted import DataArrayWeighted + from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint T_XarrayOther = TypeVar("T_XarrayOther", bound=Union["DataArray", Dataset]) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 3caa418e00e..884e302b8be 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -63,7 +63,6 @@ assert_coordinate_consistent, create_coords_with_default_indexes, ) -from xarray.core.daskmanager import DaskManager from xarray.core.duck_array_ops import datetime_to_numeric from xarray.core.indexes import ( Index, @@ -86,13 +85,6 @@ ) from xarray.core.missing import get_clean_interp_index from xarray.core.options import OPTIONS, _get_keep_attrs -from xarray.core.parallelcompat import get_chunked_array_type, guess_chunkmanager -from xarray.core.pycompat import ( - array_type, - is_chunked_array, - is_duck_array, - is_duck_dask_array, -) from xarray.core.types import ( QuantileMethods, Self, @@ -114,6 +106,10 @@ drop_dims_from_indexers, either_dict_or_kwargs, emit_user_level_warning, + infix_dims, + is_dict_like, + is_duck_array, + is_duck_dask_array, is_scalar, maybe_wrap_array, ) @@ -124,7 +120,8 @@ broadcast_variables, calculate_dimensions, ) -from xarray.namedarray.utils import infix_dims, is_dict_like +from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager +from xarray.namedarray.pycompat import array_type, is_chunked_array from xarray.plot.accessor import DatasetPlotAccessor from xarray.util.deprecation_helpers import _deprecate_positional_args @@ -138,7 +135,6 @@ from xarray.core.dataarray import DataArray from xarray.core.groupby import DatasetGroupBy from xarray.core.merge import CoercibleMapping, CoercibleValue, _MergeResult - from xarray.core.parallelcompat import ChunkManagerEntrypoint from xarray.core.resample import DatasetResample from xarray.core.rolling import DatasetCoarsen, DatasetRolling from xarray.core.types import ( @@ -164,6 +160,7 @@ T_Xarray, ) from xarray.core.weighted import DatasetWeighted + from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint # list of attributes of pd.DatetimeIndex that are ndarrays of time info @@ -292,6 +289,9 @@ def _maybe_chunk( chunked_array_type: str | ChunkManagerEntrypoint | None = None, from_array_kwargs=None, ): + + from xarray.namedarray.daskmanager import DaskManager + if chunks is not None: chunks = {dim: chunks[dim] for dim in var.dims if dim in chunks} @@ -842,7 +842,9 @@ def load(self, **kwargs) -> Self: chunkmanager = get_chunked_array_type(*lazy_data.values()) # evaluate all the chunked arrays simultaneously - evaluated_data = chunkmanager.compute(*lazy_data.values(), **kwargs) + evaluated_data: tuple[np.ndarray[Any, Any], ...] = chunkmanager.compute( + *lazy_data.values(), **kwargs + ) for k, data in zip(lazy_data, evaluated_data): self.variables[k].data = data diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index a740c54380b..ef497e78ebf 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -33,11 +33,12 @@ from numpy.lib.stride_tricks import sliding_window_view # noqa from packaging.version import Version -from xarray.core import dask_array_ops, dtypes, nputils, pycompat +from xarray.core import dask_array_ops, dtypes, nputils from xarray.core.options import OPTIONS -from xarray.core.parallelcompat import get_chunked_array_type, is_chunked_array -from xarray.core.pycompat import array_type, is_duck_dask_array -from xarray.core.utils import is_duck_array, module_available +from xarray.core.utils import is_duck_array, is_duck_dask_array, module_available +from xarray.namedarray import pycompat +from xarray.namedarray.parallelcompat import get_chunked_array_type +from xarray.namedarray.pycompat import array_type, is_chunked_array # remove once numpy 2.0 is the oldest supported version if module_available("numpy", minversion="2.0.0.dev0"): diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 2ec5513a554..260dabd9d31 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -20,8 +20,8 @@ from xarray.core.duck_array_ops import array_equiv, astype from xarray.core.indexing import MemoryCachedArray from xarray.core.options import OPTIONS, _get_boolean_with_default -from xarray.core.pycompat import array_type, to_duck_array, to_numpy from xarray.core.utils import is_duck_array +from xarray.namedarray.pycompat import array_type, to_duck_array, to_numpy if TYPE_CHECKING: from xarray.core.coordinates import AbstractCoordinates diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index cf8c2a3d259..061f41c22b2 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -17,21 +17,18 @@ from xarray.core import duck_array_ops from xarray.core.nputils import NumpyVIndexAdapter from xarray.core.options import OPTIONS -from xarray.core.parallelcompat import get_chunked_array_type, is_chunked_array -from xarray.core.pycompat import ( - array_type, - integer_types, - is_duck_array, - is_duck_dask_array, -) from xarray.core.types import T_Xarray from xarray.core.utils import ( NDArrayMixin, either_dict_or_kwargs, get_valid_numpy_dtype, + is_duck_array, + is_duck_dask_array, is_scalar, to_0d_array, ) +from xarray.namedarray.parallelcompat import get_chunked_array_type +from xarray.namedarray.pycompat import array_type, integer_types, is_chunked_array if TYPE_CHECKING: from numpy.typing import DTypeLike diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 186ec121ca9..8aa2ff2f042 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -20,10 +20,11 @@ timedelta_to_numeric, ) from xarray.core.options import _get_keep_attrs -from xarray.core.parallelcompat import get_chunked_array_type, is_chunked_array from xarray.core.types import Interp1dOptions, InterpOptions from xarray.core.utils import OrderedSet, is_scalar from xarray.core.variable import Variable, broadcast_variables +from xarray.namedarray.parallelcompat import get_chunked_array_type +from xarray.namedarray.pycompat import is_chunked_array if TYPE_CHECKING: from xarray.core.dataarray import DataArray diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index 84642d09f18..6970d37402f 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -7,8 +7,8 @@ import pandas as pd from packaging.version import Version -from xarray.core import pycompat -from xarray.core.utils import module_available +from xarray.core.utils import is_duck_array, module_available +from xarray.namedarray import pycompat # remove once numpy 2.0 is the oldest supported version if module_available("numpy", minversion="2.0.0.dev0"): @@ -27,7 +27,6 @@ from numpy import RankWarning # type: ignore[attr-defined,no-redef,unused-ignore] from xarray.core.options import OPTIONS -from xarray.core.pycompat import is_duck_array try: import bottleneck as bn diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index dbe5d789abb..41311497f8b 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -14,7 +14,7 @@ from xarray.core.dataset import Dataset from xarray.core.indexes import Index from xarray.core.merge import merge -from xarray.core.pycompat import is_dask_collection +from xarray.core.utils import is_dask_collection from xarray.core.variable import Variable if TYPE_CHECKING: diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 3723f42ac55..6cf49fc995b 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -10,12 +10,16 @@ import numpy as np from packaging.version import Version -from xarray.core import dtypes, duck_array_ops, pycompat, utils +from xarray.core import dtypes, duck_array_ops, utils from xarray.core.arithmetic import CoarsenArithmetic from xarray.core.options import OPTIONS, _get_keep_attrs -from xarray.core.pycompat import is_duck_dask_array from xarray.core.types import CoarsenBoundaryOptions, SideOptions, T_Xarray -from xarray.core.utils import either_dict_or_kwargs, module_available +from xarray.core.utils import ( + either_dict_or_kwargs, + is_duck_dask_array, + module_available, +) +from xarray.namedarray import pycompat try: import bottleneck diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py index 233bb2e37d9..4e085a0a7eb 100644 --- a/xarray/core/rolling_exp.py +++ b/xarray/core/rolling_exp.py @@ -6,12 +6,12 @@ import numpy as np from packaging.version import Version -from xarray.core import pycompat from xarray.core.computation import apply_ufunc from xarray.core.options import _get_keep_attrs from xarray.core.pdcompat import count_not_none from xarray.core.types import T_DataWithCoords from xarray.core.utils import module_available +from xarray.namedarray import pycompat def _get_alpha( diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 103b3ad5ca3..9b527622e40 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -38,7 +38,6 @@ import contextlib import functools -import importlib import inspect import io import itertools @@ -69,16 +68,27 @@ Generic, Literal, TypeVar, - cast, overload, ) import numpy as np import pandas as pd -from packaging.version import Version + +from xarray.namedarray.utils import ( # noqa: F401 + ReprObject, + drop_missing_dims, + either_dict_or_kwargs, + infix_dims, + is_dask_collection, + is_dict_like, + is_duck_array, + is_duck_dask_array, + module_available, + to_0d_object_array, +) if TYPE_CHECKING: - from xarray.core.types import Dims, ErrorOptionsWithWarn, T_DuckArray + from xarray.core.types import Dims, ErrorOptionsWithWarn K = TypeVar("K") V = TypeVar("V") @@ -247,11 +257,6 @@ def remove_incompatible_items( del first_dict[k] -# It's probably OK to give this as a TypeGuard; though it's not perfectly robust. -def is_dict_like(value: Any) -> TypeGuard[Mapping]: - return hasattr(value, "keys") and hasattr(value, "__getitem__") - - def is_full_slice(value: Any) -> bool: return isinstance(value, slice) and value == slice(None) @@ -260,39 +265,6 @@ def is_list_like(value: Any) -> TypeGuard[list | tuple]: return isinstance(value, (list, tuple)) -def is_duck_array(value: Any) -> TypeGuard[T_DuckArray]: - if isinstance(value, np.ndarray): - return True - return ( - hasattr(value, "ndim") - and hasattr(value, "shape") - and hasattr(value, "dtype") - and ( - (hasattr(value, "__array_function__") and hasattr(value, "__array_ufunc__")) - or hasattr(value, "__array_namespace__") - ) - ) - - -def either_dict_or_kwargs( - pos_kwargs: Mapping[Any, T] | None, - kw_kwargs: Mapping[str, T], - func_name: str, -) -> Mapping[Hashable, T]: - if pos_kwargs is None or pos_kwargs == {}: - # Need an explicit cast to appease mypy due to invariance; see - # https://github.com/python/mypy/issues/6228 - return cast(Mapping[Hashable, T], kw_kwargs) - - if not is_dict_like(pos_kwargs): - raise ValueError(f"the first argument to .{func_name} must be a dictionary") - if kw_kwargs: - raise ValueError( - f"cannot specify both keyword and positional arguments to .{func_name}" - ) - return pos_kwargs - - def _is_scalar(value, include_0d): from xarray.core.variable import NON_NUMPY_SUPPORTED_ARRAY_TYPES @@ -348,13 +320,6 @@ def is_valid_numpy_dtype(dtype: Any) -> bool: return True -def to_0d_object_array(value: Any) -> np.ndarray: - """Given a value, wrap it in a 0-D numpy.ndarray with dtype=object.""" - result = np.empty((), dtype=object) - result[()] = value - return result - - def to_0d_array(value: Any) -> np.ndarray: """Given a value, wrap it in a 0-D numpy.ndarray.""" if np.isscalar(value) or (isinstance(value, np.ndarray) and value.ndim == 0): @@ -661,31 +626,6 @@ def __repr__(self: Any) -> str: return f"{type(self).__name__}(array={self.array!r})" -class ReprObject: - """Object that prints as the given value, for use with sentinel values.""" - - __slots__ = ("_value",) - - def __init__(self, value: str): - self._value = value - - def __repr__(self) -> str: - return self._value - - def __eq__(self, other) -> bool: - if isinstance(other, ReprObject): - return self._value == other._value - return False - - def __hash__(self) -> int: - return hash((type(self), self._value)) - - def __dask_tokenize__(self): - from dask.base import normalize_token - - return normalize_token((type(self), self._value)) - - @contextlib.contextmanager def close_on_error(f): """Context manager to ensure that a file opened by xarray is closed if an @@ -910,49 +850,6 @@ def drop_dims_from_indexers( ) -def drop_missing_dims( - supplied_dims: Iterable[Hashable], - dims: Iterable[Hashable], - missing_dims: ErrorOptionsWithWarn, -) -> Iterable[Hashable]: - """Depending on the setting of missing_dims, drop any dimensions from supplied_dims that - are not present in dims. - - Parameters - ---------- - supplied_dims : Iterable of Hashable - dims : Iterable of Hashable - missing_dims : {"raise", "warn", "ignore"} - """ - - if missing_dims == "raise": - supplied_dims_set = {val for val in supplied_dims if val is not ...} - invalid = supplied_dims_set - set(dims) - if invalid: - raise ValueError( - f"Dimensions {invalid} do not exist. Expected one or more of {dims}" - ) - - return supplied_dims - - elif missing_dims == "warn": - invalid = set(supplied_dims) - set(dims) - if invalid: - warnings.warn( - f"Dimensions {invalid} do not exist. Expected one or more of {dims}" - ) - - return [val for val in supplied_dims if val in dims or val is ...] - - elif missing_dims == "ignore": - return [val for val in supplied_dims if val in dims or val is ...] - - else: - raise ValueError( - f"Unrecognised option {missing_dims} for missing_dims argument" - ) - - @overload def parse_dims( dim: Dims, @@ -1146,7 +1043,7 @@ def contains_only_chunked_or_numpy(obj) -> bool: Expects obj to be Dataset or DataArray""" from xarray.core.dataarray import DataArray - from xarray.core.pycompat import is_chunked_array + from xarray.namedarray.pycompat import is_chunked_array if isinstance(obj, DataArray): obj = obj._to_temp_dataset() @@ -1159,35 +1056,6 @@ def contains_only_chunked_or_numpy(obj) -> bool: ) -@functools.lru_cache -def module_available(module: str, minversion: str | None = None) -> bool: - """Checks whether a module is installed without importing it. - - Use this for a lightweight check and lazy imports. - - Parameters - ---------- - module : str - Name of the module. - minversion : str, optional - Minimum version of the module - - Returns - ------- - available : bool - Whether the module is installed. - """ - if importlib.util.find_spec(module) is None: - return False - - if minversion is not None: - version = importlib.metadata.version(module) - - return Version(version) >= Version(minversion) - - return True - - def find_stack_level(test_mode=False) -> int: """Find the first place in the stack that is not inside xarray or the Python standard library. @@ -1245,11 +1113,11 @@ def emit_user_level_warning(message, category=None): def consolidate_dask_from_array_kwargs( - from_array_kwargs: dict, + from_array_kwargs: dict[Any, Any], name: str | None = None, lock: bool | None = None, inline_array: bool | None = None, -) -> dict: +) -> dict[Any, Any]: """ Merge dask-specific kwargs with arbitrary from_array_kwargs dict. @@ -1282,12 +1150,12 @@ def consolidate_dask_from_array_kwargs( def _resolve_doubly_passed_kwarg( - kwargs_dict: dict, + kwargs_dict: dict[Any, Any], kwarg_name: str, passed_kwarg_value: str | bool | None, default: bool | None, err_msg_dict_name: str, -) -> dict: +) -> dict[Any, Any]: # if in kwargs_dict but not passed explicitly then just pass kwargs_dict through unaltered if kwarg_name in kwargs_dict and passed_kwarg_value is None: pass diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 8070135e52d..8d76cfbe004 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -8,7 +8,7 @@ from collections.abc import Hashable, Mapping, Sequence from datetime import timedelta from functools import partial -from typing import TYPE_CHECKING, Any, Callable, NoReturn, cast +from typing import TYPE_CHECKING, Any, Callable, Literal, NoReturn, cast import numpy as np import pandas as pd @@ -26,27 +26,26 @@ as_indexable, ) from xarray.core.options import OPTIONS, _get_keep_attrs -from xarray.core.parallelcompat import get_chunked_array_type, guess_chunkmanager -from xarray.core.pycompat import ( - integer_types, - is_0d_dask_array, - is_chunked_array, - is_duck_dask_array, - to_numpy, -) -from xarray.core.types import T_Chunks from xarray.core.utils import ( OrderedSet, _default, + consolidate_dask_from_array_kwargs, decode_numpy_dict_values, drop_dims_from_indexers, either_dict_or_kwargs, ensure_us_time_resolution, + infix_dims, + is_dict_like, is_duck_array, + is_duck_dask_array, maybe_coerce_to_str, ) from xarray.namedarray.core import NamedArray, _raise_if_any_duplicate_dimensions -from xarray.namedarray.utils import infix_dims +from xarray.namedarray.pycompat import ( + integer_types, + is_0d_dask_array, + to_duck_array, +) NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( indexing.ExplicitlyIndexed, @@ -56,7 +55,6 @@ BASIC_INDEXING_TYPES = integer_types + (slice,) if TYPE_CHECKING: - from xarray.core.parallelcompat import ChunkManagerEntrypoint from xarray.core.types import ( Dims, ErrorOptionsWithWarn, @@ -66,6 +64,8 @@ Self, T_DuckArray, ) + from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint + NON_NANOSECOND_WARNING = ( "Converting non-nanosecond precision {case} values to nanosecond precision. " @@ -498,54 +498,6 @@ def astype( dask="allowed", ) - def load(self, **kwargs): - """Manually trigger loading of this variable's data from disk or a - remote source into memory and return this variable. - - Normally, it should not be necessary to call this method in user code, - because all xarray functions should either work on deferred data or - load data automatically. - - Parameters - ---------- - **kwargs : dict - Additional keyword arguments passed on to ``dask.array.compute``. - - See Also - -------- - dask.array.compute - """ - if is_chunked_array(self._data): - chunkmanager = get_chunked_array_type(self._data) - loaded_data, *_ = chunkmanager.compute(self._data, **kwargs) - self._data = as_compatible_data(loaded_data) - elif isinstance(self._data, indexing.ExplicitlyIndexed): - self._data = self._data.get_duck_array() - elif not is_duck_array(self._data): - self._data = np.asarray(self._data) - return self - - def compute(self, **kwargs): - """Manually trigger loading of this variable's data from disk or a - remote source into memory and return a new variable. The original is - left unaltered. - - Normally, it should not be necessary to call this method in user code, - because all xarray functions should either work on deferred data or - load data automatically. - - Parameters - ---------- - **kwargs : dict - Additional keyword arguments passed on to ``dask.array.compute``. - - See Also - -------- - dask.array.compute - """ - new = self.copy(deep=False) - return new.load(**kwargs) - def _dask_finalize(self, results, array_func, *args, **kwargs): data = array_func(results, *args, **kwargs) return Variable(self._dims, data, attrs=self._attrs, encoding=self._encoding) @@ -608,7 +560,7 @@ def to_dict( return item def _item_key_to_tuple(self, key): - if utils.is_dict_like(key): + if is_dict_like(key): return tuple(key.get(dim, slice(None)) for dim in self.dims) else: return key @@ -964,135 +916,46 @@ def _replace( encoding = copy.copy(self._encoding) return type(self)(dims, data, attrs, encoding, fastpath=True) - def chunk( - self, - chunks: T_Chunks = {}, - name: str | None = None, - lock: bool | None = None, - inline_array: bool | None = None, - chunked_array_type: str | ChunkManagerEntrypoint | None = None, - from_array_kwargs=None, - **chunks_kwargs: Any, - ) -> Self: - """Coerce this array's data into a dask array with the given chunks. - - If this variable is a non-dask array, it will be converted to dask - array. If it's a dask array, it will be rechunked to the given chunk - sizes. + def load(self, **kwargs): + """Manually trigger loading of this variable's data from disk or a + remote source into memory and return this variable. - If neither chunks is not provided for one or more dimensions, chunk - sizes along that dimension will not be updated; non-dask arrays will be - converted into dask arrays with a single block. + Normally, it should not be necessary to call this method in user code, + because all xarray functions should either work on deferred data or + load data automatically. Parameters ---------- - chunks : int, tuple or dict, optional - Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or - ``{'x': 5, 'y': 5}``. - name : str, optional - Used to generate the name for this array in the internal dask - graph. Does not need not be unique. - lock : bool, default: False - Passed on to :py:func:`dask.array.from_array`, if the array is not - already as dask array. - inline_array : bool, default: False - Passed on to :py:func:`dask.array.from_array`, if the array is not - already as dask array. - chunked_array_type: str, optional - Which chunked array type to coerce this datasets' arrays to. - Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEntrypoint` system. - Experimental API that should not be relied upon. - from_array_kwargs: dict, optional - Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create - chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg. - For example, with dask as the default chunked array type, this method would pass additional kwargs - to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon. - **chunks_kwargs : {dim: chunks, ...}, optional - The keyword arguments form of ``chunks``. - One of chunks or chunks_kwargs must be provided. - - Returns - ------- - chunked : xarray.Variable + **kwargs : dict + Additional keyword arguments passed on to ``dask.array.compute``. See Also -------- - Variable.chunks - Variable.chunksizes - xarray.unify_chunks - dask.array.from_array + dask.array.compute """ + self._data = to_duck_array(self._data, **kwargs) + return self - if chunks is None: - warnings.warn( - "None value for 'chunks' is deprecated. " - "It will raise an error in the future. Use instead '{}'", - category=FutureWarning, - ) - chunks = {} - - if isinstance(chunks, (float, str, int, tuple, list)): - # TODO we shouldn't assume here that other chunkmanagers can handle these types - # TODO should we call normalize_chunks here? - pass # dask.array.from_array can handle these directly - else: - chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") - - if utils.is_dict_like(chunks): - chunks = {self.get_axis_num(dim): chunk for dim, chunk in chunks.items()} - - chunkmanager = guess_chunkmanager(chunked_array_type) - - if from_array_kwargs is None: - from_array_kwargs = {} - - # TODO deprecate passing these dask-specific arguments explicitly. In future just pass everything via from_array_kwargs - _from_array_kwargs = utils.consolidate_dask_from_array_kwargs( - from_array_kwargs, - name=name, - lock=lock, - inline_array=inline_array, - ) - - data_old = self._data - if chunkmanager.is_chunked_array(data_old): - data_chunked = chunkmanager.rechunk(data_old, chunks) - else: - if not isinstance(data_old, indexing.ExplicitlyIndexed): - ndata = data_old - else: - # Unambiguously handle array storage backends (like NetCDF4 and h5py) - # that can't handle general array indexing. For example, in netCDF4 you - # can do "outer" indexing along two dimensions independent, which works - # differently from how NumPy handles it. - # da.from_array works by using lazy indexing with a tuple of slices. - # Using OuterIndexer is a pragmatic choice: dask does not yet handle - # different indexing types in an explicit way: - # https://github.com/dask/dask/issues/2883 - # TODO: ImplicitToExplicitIndexingAdapter doesn't match the array api: - ndata = indexing.ImplicitToExplicitIndexingAdapter( # type: ignore[assignment] - data_old, indexing.OuterIndexer - ) - - if utils.is_dict_like(chunks): - chunks = tuple(chunks.get(n, s) for n, s in enumerate(ndata.shape)) - - data_chunked = chunkmanager.from_array( - ndata, - chunks, - **_from_array_kwargs, - ) + def compute(self, **kwargs): + """Manually trigger loading of this variable's data from disk or a + remote source into memory and return a new variable. The original is + left unaltered. - return self._replace(data=data_chunked) + Normally, it should not be necessary to call this method in user code, + because all xarray functions should either work on deferred data or + load data automatically. - def to_numpy(self) -> np.ndarray: - """Coerces wrapped data to numpy and returns a numpy.ndarray""" - # TODO an entrypoint so array libraries can choose coercion method? - return to_numpy(self._data) + Parameters + ---------- + **kwargs : dict + Additional keyword arguments passed on to ``dask.array.compute``. - def as_numpy(self) -> Self: - """Coerces wrapped data into a numpy array, returning a Variable.""" - return self._replace(data=self.to_numpy()) + See Also + -------- + dask.array.compute + """ + new = self.copy(deep=False) + return new.load(**kwargs) def isel( self, @@ -1452,7 +1315,7 @@ def set_dims(self, dims, shape=None): if isinstance(dims, str): dims = [dims] - if shape is None and utils.is_dict_like(dims): + if shape is None and is_dict_like(dims): shape = dims.values() missing_dims = set(self.dims) - set(dims) @@ -2230,10 +2093,10 @@ def coarsen_reshape(self, windows, boundary, side): """ Construct a reshaped-array for coarsen """ - if not utils.is_dict_like(boundary): + if not is_dict_like(boundary): boundary = {d: boundary for d in windows.keys()} - if not utils.is_dict_like(side): + if not is_dict_like(side): side = {d: side for d in windows.keys()} # remove unrelated dimensions @@ -2613,6 +2476,83 @@ def _to_dense(self) -> Variable: out = super()._to_dense() return cast("Variable", out) + def chunk( # type: ignore[override] + self, + chunks: int | Literal["auto"] | Mapping[Any, None | int | tuple[int, ...]] = {}, + name: str | None = None, + lock: bool | None = None, + inline_array: bool | None = None, + chunked_array_type: str | ChunkManagerEntrypoint[Any] | None = None, + from_array_kwargs: Any = None, + **chunks_kwargs: Any, + ) -> Self: + """Coerce this array's data into a dask array with the given chunks. + + If this variable is a non-dask array, it will be converted to dask + array. If it's a dask array, it will be rechunked to the given chunk + sizes. + + If neither chunks is not provided for one or more dimensions, chunk + sizes along that dimension will not be updated; non-dask arrays will be + converted into dask arrays with a single block. + + Parameters + ---------- + chunks : int, tuple or dict, optional + Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or + ``{'x': 5, 'y': 5}``. + name : str, optional + Used to generate the name for this array in the internal dask + graph. Does not need not be unique. + lock : bool, default: False + Passed on to :py:func:`dask.array.from_array`, if the array is not + already as dask array. + inline_array : bool, default: False + Passed on to :py:func:`dask.array.from_array`, if the array is not + already as dask array. + chunked_array_type: str, optional + Which chunked array type to coerce this datasets' arrays to. + Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEntrypoint` system. + Experimental API that should not be relied upon. + from_array_kwargs: dict, optional + Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create + chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg. + For example, with dask as the default chunked array type, this method would pass additional kwargs + to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon. + **chunks_kwargs : {dim: chunks, ...}, optional + The keyword arguments form of ``chunks``. + One of chunks or chunks_kwargs must be provided. + + Returns + ------- + chunked : xarray.Variable + + See Also + -------- + Variable.chunks + Variable.chunksizes + xarray.unify_chunks + dask.array.from_array + """ + + if from_array_kwargs is None: + from_array_kwargs = {} + + # TODO deprecate passing these dask-specific arguments explicitly. In future just pass everything via from_array_kwargs + _from_array_kwargs = consolidate_dask_from_array_kwargs( + from_array_kwargs, + name=name, + lock=lock, + inline_array=inline_array, + ) + + return super().chunk( + chunks=chunks, + chunked_array_type=chunked_array_type, + from_array_kwargs=_from_array_kwargs, + **chunks_kwargs, + ) + class IndexVariable(Variable): """Wrapper for accommodating a pandas.Index in an xarray.Variable. diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index a86121eb9c8..ae9521309e0 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -9,8 +9,8 @@ from xarray.core import duck_array_ops, utils from xarray.core.alignment import align, broadcast from xarray.core.computation import apply_ufunc, dot -from xarray.core.pycompat import is_duck_dask_array from xarray.core.types import Dims, T_DataArray, T_Xarray +from xarray.namedarray.utils import is_duck_dask_array from xarray.util.deprecation_helpers import _deprecate_positional_args # Weighted quantile methods are a subset of the numpy supported quantile methods. diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index 2a07389a349..b715973814f 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -1,9 +1,11 @@ from __future__ import annotations +import sys from collections.abc import Hashable, Iterable, Mapping, Sequence from enum import Enum from types import ModuleType from typing import ( + TYPE_CHECKING, Any, Callable, Final, @@ -18,6 +20,17 @@ import numpy as np +try: + if sys.version_info >= (3, 11): + from typing import TypeAlias + else: + from typing_extensions import TypeAlias +except ImportError: + if TYPE_CHECKING: + raise + else: + Self: Any = None + # Singleton type, as per https://github.com/python/typing/pull/240 class Default(Enum): @@ -64,6 +77,11 @@ def dtype(self) -> _DType_co: ... _AxisLike = Union[_Axis, _Axes] _Chunks = tuple[_Shape, ...] +_NormalizedChunks = tuple[tuple[int, ...], ...] +# FYI in some cases we don't allow `None`, which this doesn't take account of. +T_ChunkDim: TypeAlias = Union[int, Literal["auto"], None, tuple[int, ...]] +# We allow the tuple form of this (though arguably we could transition to named dims only) +T_Chunks: TypeAlias = Union[T_ChunkDim, Mapping[Any, T_ChunkDim]] _Dim = Hashable _Dims = tuple[_Dim, ...] diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index e2830002e11..29722690437 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -20,6 +20,11 @@ # TODO: get rid of this after migrating this class to array API from xarray.core import dtypes, formatting, formatting_html +from xarray.core.indexing import ( + ExplicitlyIndexed, + ImplicitToExplicitIndexingAdapter, + OuterIndexer, +) from xarray.namedarray._aggregations import NamedArrayAggregations from xarray.namedarray._typing import ( ErrorOptionsWithWarn, @@ -35,9 +40,12 @@ _SupportsImag, _SupportsReal, ) +from xarray.namedarray.parallelcompat import guess_chunkmanager +from xarray.namedarray.pycompat import to_numpy from xarray.namedarray.utils import ( either_dict_or_kwargs, infix_dims, + is_dict_like, is_duck_dask_array, to_0d_object_array, ) @@ -60,6 +68,7 @@ _ShapeType, duckarray, ) + from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint try: from dask.typing import ( @@ -720,6 +729,109 @@ def sizes(self) -> dict[_Dim, _IntOrUnknown]: """Ordered mapping from dimension names to lengths.""" return dict(zip(self.dims, self.shape)) + def chunk( + self, + chunks: int | Literal["auto"] | Mapping[Any, None | int | tuple[int, ...]] = {}, + chunked_array_type: str | ChunkManagerEntrypoint[Any] | None = None, + from_array_kwargs: Any = None, + **chunks_kwargs: Any, + ) -> Self: + """Coerce this array's data into a dask array with the given chunks. + + If this variable is a non-dask array, it will be converted to dask + array. If it's a dask array, it will be rechunked to the given chunk + sizes. + + If neither chunks is not provided for one or more dimensions, chunk + sizes along that dimension will not be updated; non-dask arrays will be + converted into dask arrays with a single block. + + Parameters + ---------- + chunks : int, tuple or dict, optional + Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or + ``{'x': 5, 'y': 5}``. + chunked_array_type: str, optional + Which chunked array type to coerce this datasets' arrays to. + Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEntrypoint` system. + Experimental API that should not be relied upon. + from_array_kwargs: dict, optional + Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create + chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg. + For example, with dask as the default chunked array type, this method would pass additional kwargs + to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon. + **chunks_kwargs : {dim: chunks, ...}, optional + The keyword arguments form of ``chunks``. + One of chunks or chunks_kwargs must be provided. + + Returns + ------- + chunked : xarray.Variable + + See Also + -------- + Variable.chunks + Variable.chunksizes + xarray.unify_chunks + dask.array.from_array + """ + + if from_array_kwargs is None: + from_array_kwargs = {} + + if chunks is None: + warnings.warn( + "None value for 'chunks' is deprecated. " + "It will raise an error in the future. Use instead '{}'", + category=FutureWarning, + ) + chunks = {} + + if isinstance(chunks, (float, str, int, tuple, list)): + # TODO we shouldn't assume here that other chunkmanagers can handle these types + # TODO should we call normalize_chunks here? + pass # dask.array.from_array can handle these directly + else: + chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") + + if is_dict_like(chunks): + chunks = {self.get_axis_num(dim): chunk for dim, chunk in chunks.items()} + + chunkmanager = guess_chunkmanager(chunked_array_type) + + data_old = self._data + if chunkmanager.is_chunked_array(data_old): + data_chunked = chunkmanager.rechunk(data_old, chunks) # type: ignore[arg-type] + else: + if not isinstance(data_old, ExplicitlyIndexed): + ndata = data_old + else: + # Unambiguously handle array storage backends (like NetCDF4 and h5py) + # that can't handle general array indexing. For example, in netCDF4 you + # can do "outer" indexing along two dimensions independent, which works + # differently from how NumPy handles it. + # da.from_array works by using lazy indexing with a tuple of slices. + # Using OuterIndexer is a pragmatic choice: dask does not yet handle + # different indexing types in an explicit way: + # https://github.com/dask/dask/issues/2883 + ndata = ImplicitToExplicitIndexingAdapter(data_old, OuterIndexer) # type: ignore[no-untyped-call, assignment] + + if is_dict_like(chunks): + chunks = tuple(chunks.get(n, s) for n, s in enumerate(ndata.shape)) # type: ignore[assignment] + + data_chunked = chunkmanager.from_array(ndata, chunks, **from_array_kwargs) # type: ignore[arg-type] + + return self._replace(data=data_chunked) + + def to_numpy(self) -> np.ndarray[Any, Any]: + """Coerces wrapped data to numpy and returns a numpy.ndarray""" + # TODO an entrypoint so array libraries can choose coercion method? + return to_numpy(self._data) + + def as_numpy(self) -> Self: + """Coerces wrapped data into a numpy array, returning a Variable.""" + return self._replace(data=self.to_numpy()) + def reduce( self, func: Callable[..., Any], @@ -897,7 +1009,7 @@ def permute_dims( if not dim: dims = self.dims[::-1] else: - dims = tuple(infix_dims(dim, self.dims, missing_dims)) # type: ignore + dims = tuple(infix_dims(dim, self.dims, missing_dims)) # type: ignore[arg-type] if len(dims) < 2 or dims == self.dims: # no need to transpose if only one dimension @@ -976,7 +1088,7 @@ def broadcast_to( # Ensure the dimensions are in the correct order ordered_dims = list(broadcast_shape.keys()) ordered_shape = tuple(broadcast_shape[d] for d in ordered_dims) - data = duck_array_ops.broadcast_to(self._data, ordered_shape) # type: ignore # TODO: use array-api-compat function + data = duck_array_ops.broadcast_to(self._data, ordered_shape) # type: ignore[no-untyped-call] # TODO: use array-api-compat function return self._new(data=data, dims=ordered_dims) def expand_dims( diff --git a/xarray/core/daskmanager.py b/xarray/namedarray/daskmanager.py similarity index 62% rename from xarray/core/daskmanager.py rename to xarray/namedarray/daskmanager.py index efa04bc3df2..14744d2de6b 100644 --- a/xarray/core/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -6,16 +6,28 @@ import numpy as np from packaging.version import Version -from xarray.core.duck_array_ops import dask_available from xarray.core.indexing import ImplicitToExplicitIndexingAdapter -from xarray.core.parallelcompat import ChunkManagerEntrypoint, T_ChunkedArray -from xarray.core.pycompat import is_duck_dask_array +from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint, T_ChunkedArray +from xarray.namedarray.utils import is_duck_dask_array, module_available if TYPE_CHECKING: - from xarray.core.types import DaskArray, T_Chunks, T_NormalizedChunks + from xarray.namedarray._typing import ( + T_Chunks, + _DType_co, + _NormalizedChunks, + duckarray, + ) + try: + from dask.array import Array as DaskArray + except ImportError: + DaskArray = np.ndarray[Any, Any] # type: ignore[assignment, misc] -class DaskManager(ChunkManagerEntrypoint["DaskArray"]): + +dask_available = module_available("dask") + + +class DaskManager(ChunkManagerEntrypoint["DaskArray"]): # type: ignore[type-var] array_cls: type[DaskArray] available: bool = dask_available @@ -26,20 +38,20 @@ def __init__(self) -> None: self.array_cls = Array - def is_chunked_array(self, data: Any) -> bool: + def is_chunked_array(self, data: duckarray[Any, Any]) -> bool: return is_duck_dask_array(data) - def chunks(self, data: DaskArray) -> T_NormalizedChunks: - return data.chunks + def chunks(self, data: Any) -> _NormalizedChunks: + return data.chunks # type: ignore[no-any-return] def normalize_chunks( self, - chunks: T_Chunks | T_NormalizedChunks, + chunks: T_Chunks | _NormalizedChunks, shape: tuple[int, ...] | None = None, limit: int | None = None, - dtype: np.dtype | None = None, - previous_chunks: T_NormalizedChunks | None = None, - ) -> T_NormalizedChunks: + dtype: _DType_co | None = None, + previous_chunks: _NormalizedChunks | None = None, + ) -> Any: """Called by open_dataset""" from dask.array.core import normalize_chunks @@ -49,9 +61,11 @@ def normalize_chunks( limit=limit, dtype=dtype, previous_chunks=previous_chunks, - ) + ) # type: ignore[no-untyped-call] - def from_array(self, data: Any, chunks, **kwargs) -> DaskArray: + def from_array( + self, data: Any, chunks: T_Chunks | _NormalizedChunks, **kwargs: Any + ) -> DaskArray | Any: import dask.array as da if isinstance(data, ImplicitToExplicitIndexingAdapter): @@ -62,12 +76,14 @@ def from_array(self, data: Any, chunks, **kwargs) -> DaskArray: data, chunks, **kwargs, - ) + ) # type: ignore[no-untyped-call] - def compute(self, *data: DaskArray, **kwargs) -> tuple[np.ndarray, ...]: + def compute( + self, *data: Any, **kwargs: Any + ) -> tuple[np.ndarray[Any, _DType_co], ...]: from dask.array import compute - return compute(*data, **kwargs) + return compute(*data, **kwargs) # type: ignore[no-untyped-call, no-any-return] @property def array_api(self) -> Any: @@ -75,16 +91,16 @@ def array_api(self) -> Any: return da - def reduction( + def reduction( # type: ignore[override] self, arr: T_ChunkedArray, - func: Callable, - combine_func: Callable | None = None, - aggregate_func: Callable | None = None, + func: Callable[..., Any], + combine_func: Callable[..., Any] | None = None, + aggregate_func: Callable[..., Any] | None = None, axis: int | Sequence[int] | None = None, - dtype: np.dtype | None = None, + dtype: _DType_co | None = None, keepdims: bool = False, - ) -> T_ChunkedArray: + ) -> DaskArray | Any: from dask.array import reduction return reduction( @@ -95,18 +111,18 @@ def reduction( axis=axis, dtype=dtype, keepdims=keepdims, - ) + ) # type: ignore[no-untyped-call] - def scan( + def scan( # type: ignore[override] self, - func: Callable, - binop: Callable, + func: Callable[..., Any], + binop: Callable[..., Any], ident: float, arr: T_ChunkedArray, axis: int | None = None, - dtype: np.dtype | None = None, - **kwargs, - ) -> DaskArray: + dtype: _DType_co | None = None, + **kwargs: Any, + ) -> DaskArray | Any: from dask.array.reductions import cumreduction return cumreduction( @@ -117,23 +133,23 @@ def scan( axis=axis, dtype=dtype, **kwargs, - ) + ) # type: ignore[no-untyped-call] def apply_gufunc( self, - func: Callable, + func: Callable[..., Any], signature: str, *args: Any, axes: Sequence[tuple[int, ...]] | None = None, axis: int | None = None, keepdims: bool = False, - output_dtypes: Sequence[np.typing.DTypeLike] | None = None, + output_dtypes: Sequence[_DType_co] | None = None, output_sizes: dict[str, int] | None = None, vectorize: bool | None = None, allow_rechunk: bool = False, - meta: tuple[np.ndarray, ...] | None = None, - **kwargs, - ): + meta: tuple[np.ndarray[Any, _DType_co], ...] | None = None, + **kwargs: Any, + ) -> Any: from dask.array.gufunc import apply_gufunc return apply_gufunc( @@ -149,18 +165,18 @@ def apply_gufunc( allow_rechunk=allow_rechunk, meta=meta, **kwargs, - ) + ) # type: ignore[no-untyped-call] def map_blocks( self, - func: Callable, + func: Callable[..., Any], *args: Any, - dtype: np.typing.DTypeLike | None = None, + dtype: _DType_co | None = None, chunks: tuple[int, ...] | None = None, drop_axis: int | Sequence[int] | None = None, new_axis: int | Sequence[int] | None = None, - **kwargs, - ): + **kwargs: Any, + ) -> Any: import dask from dask.array import map_blocks @@ -178,24 +194,24 @@ def map_blocks( drop_axis=drop_axis, new_axis=new_axis, **kwargs, - ) + ) # type: ignore[no-untyped-call] def blockwise( self, - func: Callable, - out_ind: Iterable, + func: Callable[..., Any], + out_ind: Iterable[Any], *args: Any, # can't type this as mypy assumes args are all same type, but dask blockwise args alternate types name: str | None = None, - token=None, - dtype: np.dtype | None = None, - adjust_chunks: dict[Any, Callable] | None = None, + token: Any | None = None, + dtype: _DType_co | None = None, + adjust_chunks: dict[Any, Callable[..., Any]] | None = None, new_axes: dict[Any, int] | None = None, align_arrays: bool = True, concatenate: bool | None = None, - meta=None, - **kwargs, - ): + meta: tuple[np.ndarray[Any, _DType_co], ...] | None = None, + **kwargs: Any, + ) -> DaskArray | Any: from dask.array import blockwise return blockwise( @@ -211,23 +227,23 @@ def blockwise( concatenate=concatenate, meta=meta, **kwargs, - ) + ) # type: ignore[no-untyped-call] def unify_chunks( self, *args: Any, # can't type this as mypy assumes args are all same type, but dask unify_chunks args alternate types - **kwargs, - ) -> tuple[dict[str, T_NormalizedChunks], list[DaskArray]]: + **kwargs: Any, + ) -> tuple[dict[str, _NormalizedChunks], list[DaskArray]]: from dask.array.core import unify_chunks - return unify_chunks(*args, **kwargs) + return unify_chunks(*args, **kwargs) # type: ignore[no-any-return, no-untyped-call] def store( self, - sources: DaskArray | Sequence[DaskArray], + sources: Any | Sequence[Any], targets: Any, - **kwargs, - ): + **kwargs: Any, + ) -> Any: from dask.array import store return store( diff --git a/xarray/core/parallelcompat.py b/xarray/namedarray/parallelcompat.py similarity index 90% rename from xarray/core/parallelcompat.py rename to xarray/namedarray/parallelcompat.py index c009ef48419..c6263bff4ff 100644 --- a/xarray/core/parallelcompat.py +++ b/xarray/namedarray/parallelcompat.py @@ -11,26 +11,42 @@ from abc import ABC, abstractmethod from collections.abc import Iterable, Sequence from importlib.metadata import EntryPoint, entry_points -from typing import ( - TYPE_CHECKING, - Any, - Callable, - Generic, - TypeVar, -) +from typing import TYPE_CHECKING, Any, Callable, Generic, Protocol, TypeVar import numpy as np -from xarray.core.pycompat import is_chunked_array - -T_ChunkedArray = TypeVar("T_ChunkedArray", bound=Any) +from xarray.namedarray.pycompat import is_chunked_array if TYPE_CHECKING: - from xarray.core.types import T_Chunks, T_DuckArray, T_NormalizedChunks + from xarray.namedarray._typing import ( + _Chunks, + _DType, + _DType_co, + _NormalizedChunks, + _ShapeType, + duckarray, + ) + + +class ChunkedArrayMixinProtocol(Protocol): + def rechunk(self, chunks: Any, **kwargs: Any) -> Any: ... + + @property + def dtype(self) -> np.dtype[Any]: ... + + @property + def chunks(self) -> _NormalizedChunks: ... + + def compute( + self, *data: Any, **kwargs: Any + ) -> tuple[np.ndarray[Any, _DType_co], ...]: ... + + +T_ChunkedArray = TypeVar("T_ChunkedArray", bound=ChunkedArrayMixinProtocol) @functools.lru_cache(maxsize=1) -def list_chunkmanagers() -> dict[str, ChunkManagerEntrypoint]: +def list_chunkmanagers() -> dict[str, ChunkManagerEntrypoint[Any]]: """ Return a dictionary of available chunk managers and their ChunkManagerEntrypoint subclass objects. @@ -54,7 +70,7 @@ def list_chunkmanagers() -> dict[str, ChunkManagerEntrypoint]: def load_chunkmanagers( entrypoints: Sequence[EntryPoint], -) -> dict[str, ChunkManagerEntrypoint]: +) -> dict[str, ChunkManagerEntrypoint[Any]]: """Load entrypoints and instantiate chunkmanagers only once.""" loaded_entrypoints = { @@ -70,8 +86,8 @@ def load_chunkmanagers( def guess_chunkmanager( - manager: str | ChunkManagerEntrypoint | None, -) -> ChunkManagerEntrypoint: + manager: str | ChunkManagerEntrypoint[Any] | None, +) -> ChunkManagerEntrypoint[Any]: """ Get namespace of chunk-handling methods, guessing from what's available. @@ -105,7 +121,7 @@ def guess_chunkmanager( ) -def get_chunked_array_type(*args) -> ChunkManagerEntrypoint: +def get_chunked_array_type(*args: Any) -> ChunkManagerEntrypoint[Any]: """ Detects which parallel backend should be used for given set of arrays. @@ -175,7 +191,7 @@ def __init__(self) -> None: """Used to set the array_cls attribute at import time.""" raise NotImplementedError() - def is_chunked_array(self, data: Any) -> bool: + def is_chunked_array(self, data: duckarray[Any, Any]) -> bool: """ Check if the given object is an instance of this type of chunked array. @@ -196,7 +212,7 @@ def is_chunked_array(self, data: Any) -> bool: return isinstance(data, self.array_cls) @abstractmethod - def chunks(self, data: T_ChunkedArray) -> T_NormalizedChunks: + def chunks(self, data: T_ChunkedArray) -> _NormalizedChunks: """ Return the current chunks of the given array. @@ -222,12 +238,12 @@ def chunks(self, data: T_ChunkedArray) -> T_NormalizedChunks: @abstractmethod def normalize_chunks( self, - chunks: T_Chunks | T_NormalizedChunks, - shape: tuple[int, ...] | None = None, + chunks: _Chunks | _NormalizedChunks, + shape: _ShapeType | None = None, limit: int | None = None, - dtype: np.dtype | None = None, - previous_chunks: T_NormalizedChunks | None = None, - ) -> T_NormalizedChunks: + dtype: _DType | None = None, + previous_chunks: _NormalizedChunks | None = None, + ) -> _NormalizedChunks: """ Normalize given chunking pattern into an explicit tuple of tuples representation. @@ -258,7 +274,7 @@ def normalize_chunks( @abstractmethod def from_array( - self, data: T_DuckArray | np.typing.ArrayLike, chunks: T_Chunks, **kwargs + self, data: duckarray[Any, Any], chunks: _Chunks, **kwargs: Any ) -> T_ChunkedArray: """ Create a chunked array from a non-chunked numpy-like array. @@ -285,9 +301,9 @@ def from_array( def rechunk( self, data: T_ChunkedArray, - chunks: T_NormalizedChunks | tuple[int, ...] | T_Chunks, - **kwargs, - ) -> T_ChunkedArray: + chunks: _NormalizedChunks | tuple[int, ...] | _Chunks, + **kwargs: Any, + ) -> Any: """ Changes the chunking pattern of the given array. @@ -314,7 +330,9 @@ def rechunk( return data.rechunk(chunks, **kwargs) @abstractmethod - def compute(self, *data: T_ChunkedArray | Any, **kwargs) -> tuple[np.ndarray, ...]: + def compute( + self, *data: T_ChunkedArray | Any, **kwargs: Any + ) -> tuple[np.ndarray[Any, _DType_co], ...]: """ Computes one or more chunked arrays, returning them as eager numpy arrays. @@ -358,11 +376,11 @@ def array_api(self) -> Any: def reduction( self, arr: T_ChunkedArray, - func: Callable, - combine_func: Callable | None = None, - aggregate_func: Callable | None = None, + func: Callable[..., Any], + combine_func: Callable[..., Any] | None = None, + aggregate_func: Callable[..., Any] | None = None, axis: int | Sequence[int] | None = None, - dtype: np.dtype | None = None, + dtype: _DType_co | None = None, keepdims: bool = False, ) -> T_ChunkedArray: """ @@ -406,13 +424,13 @@ def reduction( def scan( self, - func: Callable, - binop: Callable, + func: Callable[..., Any], + binop: Callable[..., Any], ident: float, arr: T_ChunkedArray, axis: int | None = None, - dtype: np.dtype | None = None, - **kwargs, + dtype: _DType_co | None = None, + **kwargs: Any, ) -> T_ChunkedArray: """ General version of a 1D scan, also known as a cumulative array reduction. @@ -444,15 +462,15 @@ def scan( @abstractmethod def apply_gufunc( self, - func: Callable, + func: Callable[..., Any], signature: str, *args: Any, axes: Sequence[tuple[int, ...]] | None = None, keepdims: bool = False, - output_dtypes: Sequence[np.typing.DTypeLike] | None = None, + output_dtypes: Sequence[_DType_co] | None = None, vectorize: bool | None = None, - **kwargs, - ): + **kwargs: Any, + ) -> Any: """ Apply a generalized ufunc or similar python function to arrays. @@ -530,14 +548,14 @@ def apply_gufunc( def map_blocks( self, - func: Callable, + func: Callable[..., Any], *args: Any, - dtype: np.typing.DTypeLike | None = None, + dtype: _DType_co | None = None, chunks: tuple[int, ...] | None = None, drop_axis: int | Sequence[int] | None = None, new_axis: int | Sequence[int] | None = None, - **kwargs, - ): + **kwargs: Any, + ) -> Any: """ Map a function across all blocks of a chunked array. @@ -578,14 +596,14 @@ def map_blocks( def blockwise( self, - func: Callable, - out_ind: Iterable, + func: Callable[..., Any], + out_ind: Iterable[Any], *args: Any, # can't type this as mypy assumes args are all same type, but dask blockwise args alternate types - adjust_chunks: dict[Any, Callable] | None = None, + adjust_chunks: dict[Any, Callable[..., Any]] | None = None, new_axes: dict[Any, int] | None = None, align_arrays: bool = True, - **kwargs, - ): + **kwargs: Any, + ) -> Any: """ Tensor operation: Generalized inner and outer products. @@ -630,8 +648,8 @@ def blockwise( def unify_chunks( self, *args: Any, # can't type this as mypy assumes args are all same type, but dask unify_chunks args alternate types - **kwargs, - ) -> tuple[dict[str, T_NormalizedChunks], list[T_ChunkedArray]]: + **kwargs: Any, + ) -> tuple[dict[str, _NormalizedChunks], list[T_ChunkedArray]]: """ Unify chunks across a sequence of arrays. @@ -654,7 +672,7 @@ def store( sources: T_ChunkedArray | Sequence[T_ChunkedArray], targets: Any, **kwargs: dict[str, Any], - ): + ) -> Any: """ Store chunked arrays in array-like objects, overwriting data in target. diff --git a/xarray/core/pycompat.py b/xarray/namedarray/pycompat.py similarity index 76% rename from xarray/core/pycompat.py rename to xarray/namedarray/pycompat.py index 32ef408f7cc..3ce33d4d8ea 100644 --- a/xarray/core/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -7,13 +7,15 @@ import numpy as np from packaging.version import Version -from xarray.core.utils import is_duck_array, is_scalar, module_available +from xarray.core.utils import is_scalar +from xarray.namedarray.utils import is_duck_array, is_duck_dask_array integer_types = (int, np.integer) if TYPE_CHECKING: ModType = Literal["dask", "pint", "cupy", "sparse", "cubed", "numbagg"] DuckArrayTypes = tuple[type[Any], ...] # TODO: improve this? maybe Generic + from xarray.namedarray._typing import _DType, _ShapeType, duckarray class DuckArrayModule: @@ -86,37 +88,27 @@ def mod_version(mod: ModType) -> Version: return _get_cached_duck_array_module(mod).version -def is_dask_collection(x): - if module_available("dask"): - from dask.base import is_dask_collection - - return is_dask_collection(x) - return False - - -def is_duck_dask_array(x): - return is_duck_array(x) and is_dask_collection(x) - - -def is_chunked_array(x) -> bool: +def is_chunked_array(x: duckarray[Any, Any]) -> bool: return is_duck_dask_array(x) or (is_duck_array(x) and hasattr(x, "chunks")) -def is_0d_dask_array(x): +def is_0d_dask_array(x: duckarray[Any, Any]) -> bool: return is_duck_dask_array(x) and is_scalar(x) -def to_numpy(data) -> np.ndarray: +def to_numpy( + data: duckarray[Any, Any], **kwargs: dict[str, Any] +) -> np.ndarray[Any, np.dtype[Any]]: from xarray.core.indexing import ExplicitlyIndexed - from xarray.core.parallelcompat import get_chunked_array_type + from xarray.namedarray.parallelcompat import get_chunked_array_type if isinstance(data, ExplicitlyIndexed): - data = data.get_duck_array() + data = data.get_duck_array() # type: ignore[no-untyped-call] # TODO first attempt to call .to_numpy() once some libraries implement it - if hasattr(data, "chunks"): + if is_chunked_array(data): chunkmanager = get_chunked_array_type(data) - data, *_ = chunkmanager.compute(data) + data, *_ = chunkmanager.compute(data, **kwargs) if isinstance(data, array_type("cupy")): data = data.get() # pint has to be imported dynamically as pint imports xarray @@ -129,12 +121,18 @@ def to_numpy(data) -> np.ndarray: return data -def to_duck_array(data): +def to_duck_array(data: Any, **kwargs: dict[str, Any]) -> duckarray[_ShapeType, _DType]: from xarray.core.indexing import ExplicitlyIndexed + from xarray.namedarray.parallelcompat import get_chunked_array_type + + if is_chunked_array(data): + chunkmanager = get_chunked_array_type(data) + loaded_data, *_ = chunkmanager.compute(data, **kwargs) # type: ignore[var-annotated] + return loaded_data if isinstance(data, ExplicitlyIndexed): - return data.get_duck_array() + return data.get_duck_array() # type: ignore[no-untyped-call, no-any-return] elif is_duck_array(data): return data else: - return np.asarray(data) + return np.asarray(data) # type: ignore[return-value] diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index 339a5037832..0326a6173cd 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -1,5 +1,6 @@ from __future__ import annotations +import importlib import sys import warnings from collections.abc import Hashable, Iterable, Iterator, Mapping @@ -7,6 +8,7 @@ from typing import TYPE_CHECKING, Any, TypeVar, cast import numpy as np +from packaging.version import Version from xarray.namedarray._typing import ErrorOptionsWithWarn, _DimsLike @@ -18,8 +20,6 @@ from numpy.typing import NDArray - from xarray.namedarray._typing import _Dim, duckarray - try: from dask.array.core import Array as DaskArray from dask.typing import DaskCollection @@ -27,6 +27,8 @@ DaskArray = NDArray # type: ignore DaskCollection: Any = NDArray # type: ignore + from xarray.namedarray._typing import _Dim, duckarray + K = TypeVar("K") V = TypeVar("V") @@ -34,7 +36,7 @@ @lru_cache -def module_available(module: str) -> bool: +def module_available(module: str, minversion: str | None = None) -> bool: """Checks whether a module is installed without importing it. Use this for a lightweight check and lazy imports. @@ -43,27 +45,53 @@ def module_available(module: str) -> bool: ---------- module : str Name of the module. + minversion : str, optional + Minimum version of the module Returns ------- available : bool Whether the module is installed. """ - from importlib.util import find_spec + if importlib.util.find_spec(module) is None: + return False + + if minversion is not None: + version = importlib.metadata.version(module) - return find_spec(module) is not None + return Version(version) >= Version(minversion) + + return True def is_dask_collection(x: object) -> TypeGuard[DaskCollection]: if module_available("dask"): - from dask.typing import DaskCollection + from dask.base import is_dask_collection - return isinstance(x, DaskCollection) + # use is_dask_collection function instead of dask.typing.DaskCollection + # see https://github.com/pydata/xarray/pull/8241#discussion_r1476276023 + return is_dask_collection(x) return False +def is_duck_array(value: Any) -> TypeGuard[duckarray[Any, Any]]: + # TODO: replace is_duck_array with runtime checks via _arrayfunction_or_api protocol on + # python 3.12 and higher (see https://github.com/pydata/xarray/issues/8696#issuecomment-1924588981) + if isinstance(value, np.ndarray): + return True + return ( + hasattr(value, "ndim") + and hasattr(value, "shape") + and hasattr(value, "dtype") + and ( + (hasattr(value, "__array_function__") and hasattr(value, "__array_ufunc__")) + or hasattr(value, "__array_namespace__") + ) + ) + + def is_duck_dask_array(x: duckarray[Any, Any]) -> TypeGuard[DaskArray]: - return is_dask_collection(x) + return is_duck_array(x) and is_dask_collection(x) def to_0d_object_array( diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index eac2f6e87bf..804e1cfd795 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -13,8 +13,8 @@ from xarray.core.indexes import PandasMultiIndex from xarray.core.options import OPTIONS -from xarray.core.pycompat import DuckArrayModule from xarray.core.utils import is_scalar, module_available +from xarray.namedarray.pycompat import DuckArrayModule nc_time_axis_available = module_available("nc_time_axis") diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 4115edc0278..668d14b86c9 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -54,7 +54,7 @@ from xarray.conventions import encode_dataset_coordinates from xarray.core import indexing from xarray.core.options import set_options -from xarray.core.pycompat import array_type +from xarray.namedarray.pycompat import array_type from xarray.tests import ( assert_allclose, assert_array_equal, diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 9ece96d03b7..9a5589ff872 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -33,7 +33,7 @@ from xarray.coding.variables import SerializationWarning from xarray.conventions import _update_bounds_attributes, cf_encoder from xarray.core.common import contains_cftime_datetimes -from xarray.core.pycompat import is_duck_dask_array +from xarray.core.utils import is_duck_dask_array from xarray.testing import assert_equal, assert_identical from xarray.tests import ( FirstElementAccessibleArray, diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index ddf3aa299b0..267a5ca603a 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -39,8 +39,8 @@ from xarray.core.common import duck_array_ops, full_like from xarray.core.coordinates import Coordinates, DatasetCoordinates from xarray.core.indexes import Index, PandasIndex -from xarray.core.pycompat import array_type, integer_types from xarray.core.utils import is_scalar +from xarray.namedarray.pycompat import array_type, integer_types from xarray.testing import _assert_internal_invariants from xarray.tests import ( DuckArrayWrapper, diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 7757ec58edc..df1ab1f40f9 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -27,7 +27,7 @@ timedelta_to_numeric, where, ) -from xarray.core.pycompat import array_type +from xarray.namedarray.pycompat import array_type from xarray.testing import assert_allclose, assert_equal, assert_identical from xarray.tests import ( arm_xfail, diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 45a649605f3..08558f3ced8 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -14,7 +14,7 @@ _get_nan_block_lengths, get_clean_interp_index, ) -from xarray.core.pycompat import array_type +from xarray.namedarray.pycompat import array_type from xarray.tests import ( _CFTIME_CALENDARS, assert_allclose, diff --git a/xarray/tests/test_parallelcompat.py b/xarray/tests/test_parallelcompat.py index ea324cafb76..d4a4e273bc0 100644 --- a/xarray/tests/test_parallelcompat.py +++ b/xarray/tests/test_parallelcompat.py @@ -5,14 +5,15 @@ import numpy as np import pytest -from xarray.core.daskmanager import DaskManager -from xarray.core.parallelcompat import ( +from xarray.core.types import T_Chunks, T_DuckArray, T_NormalizedChunks +from xarray.namedarray._typing import _Chunks +from xarray.namedarray.daskmanager import DaskManager +from xarray.namedarray.parallelcompat import ( ChunkManagerEntrypoint, get_chunked_array_type, guess_chunkmanager, list_chunkmanagers, ) -from xarray.core.types import T_Chunks, T_DuckArray, T_NormalizedChunks from xarray.tests import has_dask, requires_dask @@ -76,7 +77,7 @@ def normalize_chunks( return normalize_chunks(chunks, shape, limit, dtype, previous_chunks) def from_array( - self, data: T_DuckArray | np.typing.ArrayLike, chunks: T_Chunks, **kwargs + self, data: T_DuckArray | np.typing.ArrayLike, chunks: _Chunks, **kwargs ) -> DummyChunkedArray: from dask import array as da @@ -131,14 +132,14 @@ def register_dummy_chunkmanager(monkeypatch): This preserves the presence of the existing DaskManager, so a test that relies on this and DaskManager both being returned from list_chunkmanagers() at once would still work. - The monkeypatching changes the behavior of list_chunkmanagers when called inside xarray.core.parallelcompat, + The monkeypatching changes the behavior of list_chunkmanagers when called inside xarray.namedarray.parallelcompat, but not when called from this tests file. """ # Should include DaskManager iff dask is available to be imported preregistered_chunkmanagers = list_chunkmanagers() monkeypatch.setattr( - "xarray.core.parallelcompat.list_chunkmanagers", + "xarray.namedarray.parallelcompat.list_chunkmanagers", lambda: {"dummy": DummyChunkManager()} | preregistered_chunkmanagers, ) yield diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 697db9c5e80..1a2b9ab100c 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -15,7 +15,7 @@ import xarray as xr import xarray.plot as xplt from xarray import DataArray, Dataset -from xarray.core.utils import module_available +from xarray.namedarray.utils import module_available from xarray.plot.dataarray_plot import _infer_interval_breaks from xarray.plot.dataset_plot import _infer_meta_data from xarray.plot.utils import ( diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index a06d5472ac6..289149bdd6d 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -10,7 +10,7 @@ import xarray as xr from xarray import DataArray, Variable -from xarray.core.pycompat import array_type +from xarray.namedarray.pycompat import array_type from xarray.tests import assert_equal, assert_identical, requires_dask filterwarnings = pytest.mark.filterwarnings diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index 7bdc2c92e44..50061c774a8 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -7,8 +7,7 @@ import pytest from xarray.core import duck_array_ops, utils -from xarray.core.utils import either_dict_or_kwargs, iterate_nested -from xarray.namedarray.utils import infix_dims +from xarray.core.utils import either_dict_or_kwargs, infix_dims, iterate_nested from xarray.tests import assert_array_equal, requires_dask diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index eb71df0dae6..6575f2e1740 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -26,10 +26,10 @@ PandasIndexingAdapter, VectorizedIndexer, ) -from xarray.core.pycompat import array_type from xarray.core.types import T_DuckArray from xarray.core.utils import NDArrayMixin from xarray.core.variable import as_compatible_data, as_variable +from xarray.namedarray.pycompat import array_type from xarray.tests import ( assert_allclose, assert_array_equal,