From 20f92c6c8866b3801dc32d49f8c912a74fa36bb8 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 6 Apr 2023 13:10:08 -0400 Subject: [PATCH] move lazy indexing adapter up out of chunkmanager code --- xarray/core/daskmanager.py | 53 +++++++---------------------------- xarray/core/parallelcompat.py | 1 + xarray/core/variable.py | 26 ++++++++++++++++- 3 files changed, 36 insertions(+), 44 deletions(-) diff --git a/xarray/core/daskmanager.py b/xarray/core/daskmanager.py index 655569a6c82..757f169c00f 100644 --- a/xarray/core/daskmanager.py +++ b/xarray/core/daskmanager.py @@ -3,8 +3,8 @@ import numpy as np -from xarray.core import utils from xarray.core.duck_array_ops import dask_available +from xarray.core.indexing import ImplicitToExplicitIndexingAdapter from xarray.core.parallelcompat import ChunkManagerEntrypoint, T_ChunkedArray, T_Chunks from xarray.core.pycompat import is_duck_dask_array @@ -32,48 +32,15 @@ def chunks(self, data: "DaskArray") -> T_Chunks: def from_array(self, data, chunks, **kwargs) -> "DaskArray": import dask.array as da - from xarray.core import indexing - - # dask-specific kwargs - name = kwargs.pop("name", None) - lock = kwargs.pop("lock", False) - inline_array = kwargs.pop("inline_array", False) - - if is_duck_dask_array(data): - data = self.rechunk(data, chunks) - else: - # TODO move this up to variable.chunk - if isinstance(data, indexing.ExplicitlyIndexed): - # Unambiguously handle array storage backends (like NetCDF4 and h5py) - # that can't handle general array indexing. For example, in netCDF4 you - # can do "outer" indexing along two dimensions independent, which works - # differently from how NumPy handles it. - # da.from_array works by using lazy indexing with a tuple of slices. - # Using OuterIndexer is a pragmatic choice: dask does not yet handle - # different indexing types in an explicit way: - # https://github.com/dask/dask/issues/2883 - data = indexing.ImplicitToExplicitIndexingAdapter( - data, indexing.OuterIndexer - ) - - # All of our lazily loaded backend array classes should use NumPy - # array operations. - dask_kwargs = {"meta": np.ndarray} - else: - dask_kwargs = {} - - if utils.is_dict_like(chunks): - chunks = tuple(chunks.get(n, s) for n, s in enumerate(data.shape)) - - data = da.from_array( - data, - chunks, - name=name, - lock=lock, - inline_array=inline_array, - **dask_kwargs, - ) - return data + if isinstance(data, ImplicitToExplicitIndexingAdapter): + # lazily loaded backend array classes should use NumPy array operations. + kwargs["meta"] = np.ndarray + + return da.from_array( + data, + chunks, + **kwargs, + ) def compute(self, *data: "DaskArray", **kwargs) -> np.ndarray: from dask.array import compute diff --git a/xarray/core/parallelcompat.py b/xarray/core/parallelcompat.py index 68fa0d91538..a0f9eb5b5c5 100644 --- a/xarray/core/parallelcompat.py +++ b/xarray/core/parallelcompat.py @@ -167,6 +167,7 @@ def chunks(self, data: T_ChunkedArray) -> T_Chunks: def from_array( self, data: np.ndarray, chunks: T_Chunks, **kwargs ) -> T_ChunkedArray: + """Called when .chunk is called on an xarray object that is not already chunked.""" ... def rechunk( diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 8c84a2914d5..86dafa0a3fa 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1241,7 +1241,31 @@ def chunk( inline_array=inline_array, ) - data = chunkmanager.from_array(self._data, chunks, **_from_array_kwargs) + data = self._data + if chunkmanager.is_chunked_array(data): + data = chunkmanager.rechunk(data, chunks) + else: + if isinstance(data, indexing.ExplicitlyIndexed): + # Unambiguously handle array storage backends (like NetCDF4 and h5py) + # that can't handle general array indexing. For example, in netCDF4 you + # can do "outer" indexing along two dimensions independent, which works + # differently from how NumPy handles it. + # da.from_array works by using lazy indexing with a tuple of slices. + # Using OuterIndexer is a pragmatic choice: dask does not yet handle + # different indexing types in an explicit way: + # https://github.com/dask/dask/issues/2883 + data = indexing.ImplicitToExplicitIndexingAdapter( + data, indexing.OuterIndexer + ) + + if utils.is_dict_like(chunks): + chunks = tuple(chunks.get(n, s) for n, s in enumerate(data.shape)) + + data = chunkmanager.from_array( + data, + chunks, + **_from_array_kwargs, + ) return self._replace(data=data)