Skip to content

Clean up backend indexing some more #10376

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jun 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ jobs:
- env: "bare-minimum"
python-version: "3.10"
os: ubuntu-latest
- env: "bare-min-and-scipy"
python-version: "3.10"
os: ubuntu-latest
- env: "min-all-deps"
python-version: "3.10"
os: ubuntu-latest
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements/all-but-dask.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ channels:
- nodefaults
dependencies:
- aiobotocore
- array-api-strict
- array-api-strict<2.4
- boto3
- bottleneck
- cartopy
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements/all-but-numba.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ dependencies:
# Pin a "very new numpy" (updated Sept 24, 2024)
- numpy>=2.1.1
- aiobotocore
- array-api-strict
- array-api-strict<2.4
- boto3
- bottleneck
- cartopy
Expand Down
18 changes: 18 additions & 0 deletions ci/requirements/bare-min-and-scipy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name: xarray-tests
channels:
- conda-forge
- nodefaults
dependencies:
- python=3.10
- coveralls
- pip
- pytest
- pytest-cov
- pytest-env
- pytest-mypy-plugins
- pytest-timeout
- pytest-xdist
- numpy=1.24
- packaging=23.1
- pandas=2.1
- scipy=1.11
2 changes: 1 addition & 1 deletion ci/requirements/environment-3.14.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ channels:
- nodefaults
dependencies:
- aiobotocore
- array-api-strict
- array-api-strict<2.4
- boto3
- bottleneck
- cartopy
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements/environment-windows-3.14.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: xarray-tests
channels:
- conda-forge
dependencies:
- array-api-strict
- array-api-strict<2.4
- boto3
- bottleneck
- cartopy
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements/environment-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: xarray-tests
channels:
- conda-forge
dependencies:
- array-api-strict
- array-api-strict<2.4
- boto3
- bottleneck
- cartopy
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ channels:
- nodefaults
dependencies:
- aiobotocore
- array-api-strict
- array-api-strict<2.4
- boto3
- bottleneck
- cartopy
Expand Down
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ Bug fixes
~~~~~~~~~
- Fix Pydap test_cmp_local_file for numpy 2.3.0 changes, 1. do always return arrays for all versions and 2. skip astype(str) for numpy >= 2.3.0 for expected data. (:pull:`10421`)
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
- Fix the SciPy backend for netCDF3 files . (:issue:`8909`, :pull:`10376`)
By `Deepak Cherian <https://github.com/dcherian>`_.


Documentation
Expand Down
8 changes: 7 additions & 1 deletion xarray/backends/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np

from xarray.backends.common import AbstractWritableDataStore
from xarray.core import indexing
from xarray.core.variable import Variable


Expand All @@ -24,7 +25,12 @@ def get_attrs(self):
return self._attributes

def get_variables(self):
return self._variables
res = {}
for k, v in self._variables.items():
v = v.copy(deep=True)
res[k] = v
v._data = indexing.LazilyIndexedArray(v._data)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Harmonizes this test backend with all the other backends

return res

def get_dimensions(self):
return {d: s for v in self._variables.values() for d, s in v.dims.items()}
Expand Down
2 changes: 1 addition & 1 deletion xarray/backends/scipy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def ds(self):
def open_store_variable(self, name, var):
return Variable(
var.dimensions,
ScipyArrayWrapper(name, self),
indexing.LazilyIndexedArray(ScipyArrayWrapper(name, self)),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

harmonized with the other backends

_decode_attrs(var._attributes),
)

Expand Down
4 changes: 4 additions & 0 deletions xarray/coding/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ def __init__(self, array, func: Callable, dtype: np.typing.DTypeLike):
def dtype(self) -> np.dtype:
return np.dtype(self._dtype)

def transpose(self, order):
# For elementwise functions, we can compose transpose and function application
return type(self)(self.array.transpose(order), self.func, self.dtype)

def _oindex_get(self, key):
return type(self)(self.array.oindex[key], self.func, self.dtype)

Expand Down
11 changes: 7 additions & 4 deletions xarray/coding/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ class StackedBytesArray(indexing.ExplicitlyIndexedNDArrayMixin):
values, when accessed, are automatically stacked along the last dimension.

>>> indexer = indexing.BasicIndexer((slice(None),))
>>> StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[indexer]
>>> np.array(StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[indexer])
array(b'abc', dtype='|S3')
"""

Expand Down Expand Up @@ -250,14 +250,17 @@ def __repr__(self):
return f"{type(self).__name__}({self.array!r})"

def _vindex_get(self, key):
return _numpy_char_to_bytes(self.array.vindex[key])
return type(self)(self.array.vindex[key])

def _oindex_get(self, key):
return _numpy_char_to_bytes(self.array.oindex[key])
return type(self)(self.array.oindex[key])

def __getitem__(self, key):
# require slicing the last dimension completely
key = type(key)(indexing.expanded_indexer(key.tuple, self.array.ndim))
if key.tuple[-1] != slice(None):
raise IndexError("too many indices")
return _numpy_char_to_bytes(self.array[key])
return type(self)(self.array[key])

def get_duck_array(self):
return _numpy_char_to_bytes(self.array.get_duck_array())
23 changes: 15 additions & 8 deletions xarray/coding/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
)
from xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder
from xarray.core import dtypes, duck_array_ops, indexing
from xarray.core.types import Self
from xarray.core.variable import Variable

if TYPE_CHECKING:
Expand Down Expand Up @@ -58,13 +59,16 @@
return np.dtype(self.array.dtype.kind + str(self.array.dtype.itemsize))

def _oindex_get(self, key):
return np.asarray(self.array.oindex[key], dtype=self.dtype)
return type(self)(self.array.oindex[key])

def _vindex_get(self, key):
return np.asarray(self.array.vindex[key], dtype=self.dtype)
return type(self)(self.array.vindex[key])

def __getitem__(self, key) -> np.ndarray:
return np.asarray(self.array[key], dtype=self.dtype)
def __getitem__(self, key) -> Self:
return type(self)(self.array[key])

def get_duck_array(self):
return duck_array_ops.astype(self.array.get_duck_array(), dtype=self.dtype)


class BoolTypeArray(indexing.ExplicitlyIndexedNDArrayMixin):
Expand Down Expand Up @@ -96,13 +100,16 @@
return np.dtype("bool")

def _oindex_get(self, key):
return np.asarray(self.array.oindex[key], dtype=self.dtype)
return type(self)(self.array.oindex[key])

def _vindex_get(self, key):
return np.asarray(self.array.vindex[key], dtype=self.dtype)
return type(self)(self.array.vindex[key])

def __getitem__(self, key) -> Self:
return type(self)(self.array[key])

def __getitem__(self, key) -> np.ndarray:
return np.asarray(self.array[key], dtype=self.dtype)
def get_duck_array(self):
return duck_array_ops.astype(self.array.get_duck_array(), dtype=self.dtype)


def _apply_mask(
Expand Down Expand Up @@ -234,7 +241,7 @@
# otherwise numpy unsigned ints will silently cast to the signed counterpart
fill_value = fill_value.item()
# passes if provided fill value fits in encoded on-disk type
new_fill = encoded_dtype.type(fill_value)

Check warning on line 244 in xarray/coding/variables.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 bare-min-and-scipy

NumPy will stop allowing conversion of out-of-bound Python integers to integer arrays. The conversion of 255 to int8 will fail in the future. For the old behavior, usually: np.array(value).astype(dtype)` will give the desired result (the cast overflows).

Check warning on line 244 in xarray/coding/variables.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 bare-min-and-scipy

NumPy will stop allowing conversion of out-of-bound Python integers to integer arrays. The conversion of 255 to int8 will fail in the future. For the old behavior, usually: np.array(value).astype(dtype)` will give the desired result (the cast overflows).

Check warning on line 244 in xarray/coding/variables.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 bare-min-and-scipy

NumPy will stop allowing conversion of out-of-bound Python integers to integer arrays. The conversion of 255 to int8 will fail in the future. For the old behavior, usually: np.array(value).astype(dtype)` will give the desired result (the cast overflows).

Check warning on line 244 in xarray/coding/variables.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 bare-min-and-scipy

NumPy will stop allowing conversion of out-of-bound Python integers to integer arrays. The conversion of 255 to int8 will fail in the future. For the old behavior, usually: np.array(value).astype(dtype)` will give the desired result (the cast overflows).

Check warning on line 244 in xarray/coding/variables.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 bare-min-and-scipy

NumPy will stop allowing conversion of out-of-bound Python integers to integer arrays. The conversion of 255 to int8 will fail in the future. For the old behavior, usually: np.array(value).astype(dtype)` will give the desired result (the cast overflows).

Check warning on line 244 in xarray/coding/variables.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 bare-min-and-scipy

NumPy will stop allowing conversion of out-of-bound Python integers to integer arrays. The conversion of 255 to int8 will fail in the future. For the old behavior, usually: np.array(value).astype(dtype)` will give the desired result (the cast overflows).

Check warning on line 244 in xarray/coding/variables.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 bare-min-and-scipy

NumPy will stop allowing conversion of out-of-bound Python integers to integer arrays. The conversion of 255 to int8 will fail in the future. For the old behavior, usually: np.array(value).astype(dtype)` will give the desired result (the cast overflows).

Check warning on line 244 in xarray/coding/variables.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 bare-min-and-scipy

NumPy will stop allowing conversion of out-of-bound Python integers to integer arrays. The conversion of 255 to int8 will fail in the future. For the old behavior, usually: np.array(value).astype(dtype)` will give the desired result (the cast overflows).

Check warning on line 244 in xarray/coding/variables.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 bare-min-and-scipy

NumPy will stop allowing conversion of out-of-bound Python integers to integer arrays. The conversion of 255 to int8 will fail in the future. For the old behavior, usually: np.array(value).astype(dtype)` will give the desired result (the cast overflows).

Check warning on line 244 in xarray/coding/variables.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 bare-min-and-scipy

NumPy will stop allowing conversion of out-of-bound Python integers to integer arrays. The conversion of 255 to int8 will fail in the future. For the old behavior, usually: np.array(value).astype(dtype)` will give the desired result (the cast overflows).

Check warning on line 244 in xarray/coding/variables.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 min-all-deps

NumPy will stop allowing conversion of out-of-bound Python integers to integer arrays. The conversion of 255 to int8 will fail in the future. For the old behavior, usually: np.array(value).astype(dtype)` will give the desired result (the cast overflows).

Check warning on line 244 in xarray/coding/variables.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 min-all-deps

NumPy will stop allowing conversion of out-of-bound Python integers to integer arrays. The conversion of 255 to int8 will fail in the future. For the old behavior, usually: np.array(value).astype(dtype)` will give the desired result (the cast overflows).

Check warning on line 244 in xarray/coding/variables.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 min-all-deps

NumPy will stop allowing conversion of out-of-bound Python integers to integer arrays. The conversion of 255 to int8 will fail in the future. For the old behavior, usually: np.array(value).astype(dtype)` will give the desired result (the cast overflows).

Check warning on line 244 in xarray/coding/variables.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 min-all-deps

NumPy will stop allowing conversion of out-of-bound Python integers to integer arrays. The conversion of 255 to int8 will fail in the future. For the old behavior, usually: np.array(value).astype(dtype)` will give the desired result (the cast overflows).

Check warning on line 244 in xarray/coding/variables.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 min-all-deps

NumPy will stop allowing conversion of out-of-bound Python integers to integer arrays. The conversion of 255 to int8 will fail in the future. For the old behavior, usually: np.array(value).astype(dtype)` will give the desired result (the cast overflows).

Check warning on line 244 in xarray/coding/variables.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 min-all-deps

NumPy will stop allowing conversion of out-of-bound Python integers to integer arrays. The conversion of 255 to int8 will fail in the future. For the old behavior, usually: np.array(value).astype(dtype)` will give the desired result (the cast overflows).

Check warning on line 244 in xarray/coding/variables.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 min-all-deps

NumPy will stop allowing conversion of out-of-bound Python integers to integer arrays. The conversion of 255 to int8 will fail in the future. For the old behavior, usually: np.array(value).astype(dtype)` will give the desired result (the cast overflows).

Check warning on line 244 in xarray/coding/variables.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 min-all-deps

NumPy will stop allowing conversion of out-of-bound Python integers to integer arrays. The conversion of 255 to int8 will fail in the future. For the old behavior, usually: np.array(value).astype(dtype)` will give the desired result (the cast overflows).

Check warning on line 244 in xarray/coding/variables.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 min-all-deps

NumPy will stop allowing conversion of out-of-bound Python integers to integer arrays. The conversion of 255 to int8 will fail in the future. For the old behavior, usually: np.array(value).astype(dtype)` will give the desired result (the cast overflows).

Check warning on line 244 in xarray/coding/variables.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 min-all-deps

NumPy will stop allowing conversion of out-of-bound Python integers to integer arrays. The conversion of 255 to int8 will fail in the future. For the old behavior, usually: np.array(value).astype(dtype)` will give the desired result (the cast overflows).
except OverflowError:
encoded_kind_str = "signed" if encoded_dtype.kind == "i" else "unsigned"
warnings.warn(
Expand Down
12 changes: 10 additions & 2 deletions xarray/conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
)
from xarray.core.utils import emit_user_level_warning
from xarray.core.variable import IndexVariable, Variable
from xarray.namedarray.utils import is_duck_dask_array
from xarray.namedarray.utils import is_duck_array

CF_RELATED_DATA = (
"bounds",
Expand Down Expand Up @@ -248,7 +248,15 @@ def decode_cf_variable(

encoding.setdefault("dtype", original_dtype)

if not is_duck_dask_array(data):
if (
# we don't need to lazily index duck arrays
not is_duck_array(data)
# These arrays already support lazy indexing
# OR for IndexingAdapters, it makes no sense to wrap them
and not isinstance(data, indexing.ExplicitlyIndexedNDArrayMixin)
):
# this path applies to bare BackendArray objects.
# It is not hit for any internal Xarray backend
data = indexing.LazilyIndexedArray(data)

return Variable(dimensions, data, attributes, encoding=encoding, fastpath=True)
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -717,7 +717,7 @@ def from_variables(

# preserve wrapped pd.Index (if any)
# accessing `.data` can load data from disk, so we only access if needed
data = var._data.array if hasattr(var._data, "array") else var.data
data = var._data if isinstance(var._data, PandasIndexingAdapter) else var.data # type: ignore[redundant-expr]
# multi-index level variable: get level index
if isinstance(var._data, PandasMultiIndexingAdapter):
level = var._data.level
Expand Down
19 changes: 19 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -1427,6 +1427,25 @@ def test_string_object_warning(self) -> None:
with self.roundtrip(original) as actual:
assert_identical(original, actual)

@pytest.mark.parametrize(
"indexer",
(
{"y": [1]},
{"y": slice(2)},
{"y": 1},
{"x": [1], "y": [1]},
{"x": ("x0", [0, 1]), "y": ("x0", [0, 1])},
),
)
def test_indexing_roundtrip(self, indexer) -> None:
# regression test for GH8909
ds = xr.Dataset()
ds["A"] = xr.DataArray([[1, "a"], [2, "b"]], dims=["x", "y"])
with self.roundtrip(ds) as ds2:
expected = ds2.sel(indexer)
with self.roundtrip(expected) as actual:
assert_identical(actual, expected)


class NetCDFBase(CFEncodedBase):
"""Tests for all netCDF3 and netCDF4 backends."""
Expand Down
Loading