From 20f92c6c8866b3801dc32d49f8c912a74fa36bb8 Mon Sep 17 00:00:00 2001
From: Thomas Nicholas <thomas.nicholas@columbia.edu>
Date: Thu, 6 Apr 2023 13:10:08 -0400
Subject: [PATCH] move lazy indexing adapter up out of chunkmanager code

---
 xarray/core/daskmanager.py    | 53 +++++++----------------------------
 xarray/core/parallelcompat.py |  1 +
 xarray/core/variable.py       | 26 ++++++++++++++++-
 3 files changed, 36 insertions(+), 44 deletions(-)

diff --git a/xarray/core/daskmanager.py b/xarray/core/daskmanager.py
index 655569a6c82..757f169c00f 100644
--- a/xarray/core/daskmanager.py
+++ b/xarray/core/daskmanager.py
@@ -3,8 +3,8 @@
 
 import numpy as np
 
-from xarray.core import utils
 from xarray.core.duck_array_ops import dask_available
+from xarray.core.indexing import ImplicitToExplicitIndexingAdapter
 from xarray.core.parallelcompat import ChunkManagerEntrypoint, T_ChunkedArray, T_Chunks
 from xarray.core.pycompat import is_duck_dask_array
 
@@ -32,48 +32,15 @@ def chunks(self, data: "DaskArray") -> T_Chunks:
     def from_array(self, data, chunks, **kwargs) -> "DaskArray":
         import dask.array as da
 
-        from xarray.core import indexing
-
-        # dask-specific kwargs
-        name = kwargs.pop("name", None)
-        lock = kwargs.pop("lock", False)
-        inline_array = kwargs.pop("inline_array", False)
-
-        if is_duck_dask_array(data):
-            data = self.rechunk(data, chunks)
-        else:
-            # TODO move this up to variable.chunk
-            if isinstance(data, indexing.ExplicitlyIndexed):
-                # Unambiguously handle array storage backends (like NetCDF4 and h5py)
-                # that can't handle general array indexing. For example, in netCDF4 you
-                # can do "outer" indexing along two dimensions independent, which works
-                # differently from how NumPy handles it.
-                # da.from_array works by using lazy indexing with a tuple of slices.
-                # Using OuterIndexer is a pragmatic choice: dask does not yet handle
-                # different indexing types in an explicit way:
-                # https://github.com/dask/dask/issues/2883
-                data = indexing.ImplicitToExplicitIndexingAdapter(
-                    data, indexing.OuterIndexer
-                )
-
-                # All of our lazily loaded backend array classes should use NumPy
-                # array operations.
-                dask_kwargs = {"meta": np.ndarray}
-            else:
-                dask_kwargs = {}
-
-            if utils.is_dict_like(chunks):
-                chunks = tuple(chunks.get(n, s) for n, s in enumerate(data.shape))
-
-            data = da.from_array(
-                data,
-                chunks,
-                name=name,
-                lock=lock,
-                inline_array=inline_array,
-                **dask_kwargs,
-            )
-        return data
+        if isinstance(data, ImplicitToExplicitIndexingAdapter):
+            # lazily loaded backend array classes should use NumPy array operations.
+            kwargs["meta"] = np.ndarray
+
+        return da.from_array(
+            data,
+            chunks,
+            **kwargs,
+        )
 
     def compute(self, *data: "DaskArray", **kwargs) -> np.ndarray:
         from dask.array import compute
diff --git a/xarray/core/parallelcompat.py b/xarray/core/parallelcompat.py
index 68fa0d91538..a0f9eb5b5c5 100644
--- a/xarray/core/parallelcompat.py
+++ b/xarray/core/parallelcompat.py
@@ -167,6 +167,7 @@ def chunks(self, data: T_ChunkedArray) -> T_Chunks:
     def from_array(
         self, data: np.ndarray, chunks: T_Chunks, **kwargs
     ) -> T_ChunkedArray:
+        """Called when .chunk is called on an xarray object that is not already chunked."""
         ...
 
     def rechunk(
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index 8c84a2914d5..86dafa0a3fa 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -1241,7 +1241,31 @@ def chunk(
             inline_array=inline_array,
         )
 
-        data = chunkmanager.from_array(self._data, chunks, **_from_array_kwargs)
+        data = self._data
+        if chunkmanager.is_chunked_array(data):
+            data = chunkmanager.rechunk(data, chunks)
+        else:
+            if isinstance(data, indexing.ExplicitlyIndexed):
+                # Unambiguously handle array storage backends (like NetCDF4 and h5py)
+                # that can't handle general array indexing. For example, in netCDF4 you
+                # can do "outer" indexing along two dimensions independent, which works
+                # differently from how NumPy handles it.
+                # da.from_array works by using lazy indexing with a tuple of slices.
+                # Using OuterIndexer is a pragmatic choice: dask does not yet handle
+                # different indexing types in an explicit way:
+                # https://github.com/dask/dask/issues/2883
+                data = indexing.ImplicitToExplicitIndexingAdapter(
+                    data, indexing.OuterIndexer
+                )
+
+            if utils.is_dict_like(chunks):
+                chunks = tuple(chunks.get(n, s) for n, s in enumerate(data.shape))
+
+            data = chunkmanager.from_array(
+                data,
+                chunks,
+                **_from_array_kwargs,
+            )
 
         return self._replace(data=data)