Skip to content

Commit

Permalink
move lazy indexing adapter up out of chunkmanager code
Browse files Browse the repository at this point in the history
  • Loading branch information
TomNicholas committed Apr 6, 2023
1 parent 6a7a043 commit 20f92c6
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 44 deletions.
53 changes: 10 additions & 43 deletions xarray/core/daskmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

import numpy as np

from xarray.core import utils
from xarray.core.duck_array_ops import dask_available
from xarray.core.indexing import ImplicitToExplicitIndexingAdapter
from xarray.core.parallelcompat import ChunkManagerEntrypoint, T_ChunkedArray, T_Chunks
from xarray.core.pycompat import is_duck_dask_array

Expand Down Expand Up @@ -32,48 +32,15 @@ def chunks(self, data: "DaskArray") -> T_Chunks:
def from_array(self, data, chunks, **kwargs) -> "DaskArray":
import dask.array as da

from xarray.core import indexing

# dask-specific kwargs
name = kwargs.pop("name", None)
lock = kwargs.pop("lock", False)
inline_array = kwargs.pop("inline_array", False)

if is_duck_dask_array(data):
data = self.rechunk(data, chunks)
else:
# TODO move this up to variable.chunk
if isinstance(data, indexing.ExplicitlyIndexed):
# Unambiguously handle array storage backends (like NetCDF4 and h5py)
# that can't handle general array indexing. For example, in netCDF4 you
# can do "outer" indexing along two dimensions independent, which works
# differently from how NumPy handles it.
# da.from_array works by using lazy indexing with a tuple of slices.
# Using OuterIndexer is a pragmatic choice: dask does not yet handle
# different indexing types in an explicit way:
# https://github.com/dask/dask/issues/2883
data = indexing.ImplicitToExplicitIndexingAdapter(
data, indexing.OuterIndexer
)

# All of our lazily loaded backend array classes should use NumPy
# array operations.
dask_kwargs = {"meta": np.ndarray}
else:
dask_kwargs = {}

if utils.is_dict_like(chunks):
chunks = tuple(chunks.get(n, s) for n, s in enumerate(data.shape))

data = da.from_array(
data,
chunks,
name=name,
lock=lock,
inline_array=inline_array,
**dask_kwargs,
)
return data
if isinstance(data, ImplicitToExplicitIndexingAdapter):
# lazily loaded backend array classes should use NumPy array operations.
kwargs["meta"] = np.ndarray

return da.from_array(
data,
chunks,
**kwargs,
)

def compute(self, *data: "DaskArray", **kwargs) -> np.ndarray:
from dask.array import compute
Expand Down
1 change: 1 addition & 0 deletions xarray/core/parallelcompat.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ def chunks(self, data: T_ChunkedArray) -> T_Chunks:
def from_array(
self, data: np.ndarray, chunks: T_Chunks, **kwargs
) -> T_ChunkedArray:
"""Called when .chunk is called on an xarray object that is not already chunked."""
...

def rechunk(
Expand Down
26 changes: 25 additions & 1 deletion xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1241,7 +1241,31 @@ def chunk(
inline_array=inline_array,
)

data = chunkmanager.from_array(self._data, chunks, **_from_array_kwargs)
data = self._data
if chunkmanager.is_chunked_array(data):
data = chunkmanager.rechunk(data, chunks)
else:
if isinstance(data, indexing.ExplicitlyIndexed):
# Unambiguously handle array storage backends (like NetCDF4 and h5py)
# that can't handle general array indexing. For example, in netCDF4 you
# can do "outer" indexing along two dimensions independent, which works
# differently from how NumPy handles it.
# da.from_array works by using lazy indexing with a tuple of slices.
# Using OuterIndexer is a pragmatic choice: dask does not yet handle
# different indexing types in an explicit way:
# https://github.com/dask/dask/issues/2883
data = indexing.ImplicitToExplicitIndexingAdapter(
data, indexing.OuterIndexer
)

if utils.is_dict_like(chunks):
chunks = tuple(chunks.get(n, s) for n, s in enumerate(data.shape))

data = chunkmanager.from_array(
data,
chunks,
**_from_array_kwargs,
)

return self._replace(data=data)

Expand Down

0 comments on commit 20f92c6

Please sign in to comment.