Merge branch 'main' into opt-out-auto-create-index-variables

pydata · Mar 26, 2024 · 55c0b19 · 55c0b19
2 parents 66a6fd7 + 2f34895
commit 55c0b19
Show file tree

Hide file tree

Showing 3 changed files with 101 additions and 20 deletions.
diff --git a/doc/user-guide/indexing.rst b/doc/user-guide/indexing.rst
@@ -748,7 +748,7 @@ Whether array indexing returns a view or a copy of the underlying
 data depends on the nature of the labels.
 
 For positional (integer)
-indexing, xarray follows the same rules as NumPy:
+indexing, xarray follows the same `rules`_ as NumPy:
 
 * Positional indexing with only integers and slices returns a view.
 * Positional indexing with arrays or lists returns a copy.
@@ -765,8 +765,10 @@ Whether data is a copy or a view is more predictable in xarray than in pandas, s
 unlike pandas, xarray does not produce `SettingWithCopy warnings`_. However, you
 should still avoid assignment with chained indexing.
 
-.. _SettingWithCopy warnings: https://pandas.pydata.org/pandas-docs/stable/indexing.html#returning-a-view-versus-a-copy
+Note that other operations (such as :py:meth:`~xarray.DataArray.values`) may also return views rather than copies.
 
+.. _SettingWithCopy warnings: https://pandas.pydata.org/pandas-docs/stable/indexing.html#returning-a-view-versus-a-copy
+.. _rules: https://numpy.org/doc/stable/user/basics.copies.html
 
 .. _multi-level indexing:
 

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -771,11 +771,15 @@ def data(self, value: Any) -> None:
     @property
     def values(self) -> np.ndarray:
         """
-        The array's data as a numpy.ndarray.
+        The array's data converted to numpy.ndarray.
 
-        If the array's data is not a numpy.ndarray this will attempt to convert
-        it naively using np.array(), which will raise an error if the array
-        type does not support coercion like this (e.g. cupy).
+        This will attempt to convert the array naively using np.array(),
+        which will raise an error if the array type does not support
+        coercion like this (e.g. cupy).
+
+        Note that this array is not copied; operations on it follow
+        numpy's rules of what generates a view vs. a copy, and changes
+        to this array may be reflected in the DataArray as well.
         """
         return self.variable.values
 

diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
@@ -1680,11 +1680,65 @@ def _convert_scalar(self, item):
         # a NumPy array.
         return to_0d_array(item)
 
-    def _oindex_get(self, indexer: OuterIndexer):
-        return self.__getitem__(indexer)
+    def _prepare_key(self, key: tuple[Any, ...]) -> tuple[Any, ...]:
+        if isinstance(key, tuple) and len(key) == 1:
+            # unpack key so it can index a pandas.Index object (pandas.Index
+            # objects don't like tuples)
+            (key,) = key
 
-    def _vindex_get(self, indexer: VectorizedIndexer):
-        return self.__getitem__(indexer)
+        return key
+
+    def _handle_result(
+        self, result: Any
+    ) -> (
+        PandasIndexingAdapter
+        | NumpyIndexingAdapter
+        | np.ndarray
+        | np.datetime64
+        | np.timedelta64
+    ):
+        if isinstance(result, pd.Index):
+            return type(self)(result, dtype=self.dtype)
+        else:
+            return self._convert_scalar(result)
+
+    def _oindex_get(
+        self, indexer: OuterIndexer
+    ) -> (
+        PandasIndexingAdapter
+        | NumpyIndexingAdapter
+        | np.ndarray
+        | np.datetime64
+        | np.timedelta64
+    ):
+        key = self._prepare_key(indexer.tuple)
+
+        if getattr(key, "ndim", 0) > 1:  # Return np-array if multidimensional
+            indexable = NumpyIndexingAdapter(np.asarray(self))
+            return indexable.oindex[indexer]
+
+        result = self.array[key]
+
+        return self._handle_result(result)
+
+    def _vindex_get(
+        self, indexer: VectorizedIndexer
+    ) -> (
+        PandasIndexingAdapter
+        | NumpyIndexingAdapter
+        | np.ndarray
+        | np.datetime64
+        | np.timedelta64
+    ):
+        key = self._prepare_key(indexer.tuple)
+
+        if getattr(key, "ndim", 0) > 1:  # Return np-array if multidimensional
+            indexable = NumpyIndexingAdapter(np.asarray(self))
+            return indexable.vindex[indexer]
+
+        result = self.array[key]
+
+        return self._handle_result(result)
 
     def __getitem__(
         self, indexer: ExplicitIndexer
@@ -1695,22 +1749,15 @@ def __getitem__(
         | np.datetime64
         | np.timedelta64
     ):
-        key = indexer.tuple
-        if isinstance(key, tuple) and len(key) == 1:
-            # unpack key so it can index a pandas.Index object (pandas.Index
-            # objects don't like tuples)
-            (key,) = key
+        key = self._prepare_key(indexer.tuple)
 
         if getattr(key, "ndim", 0) > 1:  # Return np-array if multidimensional
             indexable = NumpyIndexingAdapter(np.asarray(self))
-            return apply_indexer(indexable, indexer)
+            return indexable[indexer]
 
         result = self.array[key]
 
-        if isinstance(result, pd.Index):
-            return type(self)(result, dtype=self.dtype)
-        else:
-            return self._convert_scalar(result)
+        return self._handle_result(result)
 
     def transpose(self, order) -> pd.Index:
         return self.array  # self.array should be always one-dimensional
@@ -1766,6 +1813,34 @@ def _convert_scalar(self, item):
             item = item[idx]
         return super()._convert_scalar(item)
 
+    def _oindex_get(
+        self, indexer: OuterIndexer
+    ) -> (
+        PandasIndexingAdapter
+        | NumpyIndexingAdapter
+        | np.ndarray
+        | np.datetime64
+        | np.timedelta64
+    ):
+        result = super()._oindex_get(indexer)
+        if isinstance(result, type(self)):
+            result.level = self.level
+        return result
+
+    def _vindex_get(
+        self, indexer: VectorizedIndexer
+    ) -> (
+        PandasIndexingAdapter
+        | NumpyIndexingAdapter
+        | np.ndarray
+        | np.datetime64
+        | np.timedelta64
+    ):
+        result = super()._vindex_get(indexer)
+        if isinstance(result, type(self)):
+            result.level = self.level
+        return result
+
     def __getitem__(self, indexer: ExplicitIndexer):
         result = super().__getitem__(indexer)
         if isinstance(result, type(self)):