diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 29d25c0b734..edf8a1aa492 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -5,9 +5,9 @@ What's New ========== -.. _whats-new.2025.08.1: +.. _whats-new.2025.09.0: -v2025.08.1 (unreleased) +v2025.09.0 (unreleased) ----------------------- New Features @@ -36,6 +36,20 @@ Breaking changes if non default values are provided in this context (:issue:`10640`, :pull:`10650`). By `Spencer Clark `_. +- The default backend ``engine`` used by :py:meth:`Dataset.to_netcdf` + and :py:meth:`DataTree.to_netcdf` is now chosen consistently with + :py:func:`open_dataset` and :py:func:`open_datatree`, using whichever netCDF + libraries are available and valid, and preferring netCDF4 to h5netcdf to scipy + (:issue:`10654`). This will change the default backend in some edge cases + (e.g., from scipy to netCDF4 when writing to a file-like object or bytes). To + override these new defaults, set ``engine`` explicitly. + By `Stephan Hoyer `_. +- The return value of :py:meth:`Dataset.to_netcdf` without ``path`` is now a + ``memoryview`` object instead of ``bytes`` (:pull:`10656`). This removes an + unnecessary memory copy and ensures consistency when using either + ``engine="scipy"`` or ``engine="h5netcdf"``. If you need a bytes object, + simply wrap the return value of ``to_netcdf()`` with ``bytes()``. + By `Stephan Hoyer `_. Deprecations ~~~~~~~~~~~~ @@ -54,6 +68,10 @@ Bug fixes redundant computation of Dask arrays with cross-group dependencies (:issue:`10637`). By `Stephan Hoyer `_. +- :py:meth:`DataTree.to_netcdf` had h5netcdf hard-coded as default + (:issue:`10654`). + By `Stephan Hoyer `_. + Documentation ~~~~~~~~~~~~~ diff --git a/xarray/backends/api.py b/xarray/backends/api.py index afe840d946f..48d1a4c5135 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -17,7 +17,6 @@ from typing import ( TYPE_CHECKING, Any, - Final, Literal, TypeVar, Union, @@ -98,69 +97,44 @@ DATAARRAY_NAME = "__xarray_dataarray_name__" DATAARRAY_VARIABLE = "__xarray_dataarray_variable__" -ENGINES = { - "netcdf4": backends.NetCDF4DataStore.open, - "scipy": backends.ScipyDataStore, - "pydap": backends.PydapDataStore.open, - "h5netcdf": backends.H5NetCDFStore.open, - "zarr": backends.ZarrStore.open_group, -} - - -def _get_default_engine_remote_uri() -> Literal["netcdf4", "pydap"]: - engine: Literal["netcdf4", "pydap"] - try: - import netCDF4 # noqa: F401 - - engine = "netcdf4" - except ImportError: # pragma: no cover - try: - import pydap # noqa: F401 - engine = "pydap" - except ImportError as err: - raise ValueError( - "netCDF4 or pydap is required for accessing remote datasets via OPeNDAP" - ) from err - return engine - - -def _get_default_engine_gz() -> Literal["scipy"]: - try: - import scipy # noqa: F401 +def get_default_netcdf_write_engine( + format: T_NetcdfTypes | None, + to_fileobject_or_memoryview: bool, +) -> Literal["netcdf4", "h5netcdf", "scipy"]: + """Return the default netCDF library to use for writing a netCDF file.""" + module_names = { + "netcdf4": "netCDF4", + "scipy": "scipy", + "h5netcdf": "h5netcdf", + } - engine: Final = "scipy" - except ImportError as err: # pragma: no cover - raise ValueError("scipy is required for accessing .gz files") from err - return engine + candidates = list(plugins.NETCDF_BACKENDS_ORDER) + if format is not None: + if format.upper().startswith("NETCDF3"): + candidates.remove("h5netcdf") + elif format.upper().startswith("NETCDF4"): + candidates.remove("scipy") + else: + raise ValueError(f"unexpected {format=}") -def _get_default_engine_netcdf() -> Literal["netcdf4", "h5netcdf", "scipy"]: - candidates: list[tuple[str, str]] = [ - ("netcdf4", "netCDF4"), - ("h5netcdf", "h5netcdf"), - ("scipy", "scipy.io.netcdf"), - ] + if to_fileobject_or_memoryview: + candidates.remove("netcdf4") - for engine, module_name in candidates: + for engine in candidates: + module_name = module_names[engine] if importlib.util.find_spec(module_name) is not None: return cast(Literal["netcdf4", "h5netcdf", "scipy"], engine) + format_str = f" with {format=}" if format is not None else "" + libraries = ", ".join(module_names[c] for c in candidates) raise ValueError( - "cannot read or write NetCDF files because none of " - "'netCDF4-python', 'h5netcdf', or 'scipy' are installed" + f"cannot write NetCDF files{format_str} because none of the suitable " + f"backend libraries ({libraries}) are installed" ) -def _get_default_engine(path: str, allow_remote: bool = False) -> T_NetcdfEngine: - if allow_remote and is_remote_uri(path): - return _get_default_engine_remote_uri() # type: ignore[return-value] - elif path.endswith(".gz"): - return _get_default_engine_gz() - else: - return _get_default_engine_netcdf() - - def _validate_dataset_names(dataset: Dataset) -> None: """DataArray.name and Dataset keys must be a string or None""" @@ -1958,7 +1932,7 @@ def to_netcdf( multifile: Literal[False] = False, invalid_netcdf: bool = False, auto_complex: bool | None = None, -) -> bytes | memoryview: ... +) -> memoryview: ... # compute=False returns dask.Delayed @@ -2051,7 +2025,7 @@ def to_netcdf( multifile: bool = False, invalid_netcdf: bool = False, auto_complex: bool | None = None, -) -> tuple[ArrayWriter, AbstractDataStore] | bytes | memoryview | Delayed | None: ... +) -> tuple[ArrayWriter, AbstractDataStore] | memoryview | Delayed | None: ... def to_netcdf( @@ -2067,7 +2041,7 @@ def to_netcdf( multifile: bool = False, invalid_netcdf: bool = False, auto_complex: bool | None = None, -) -> tuple[ArrayWriter, AbstractDataStore] | bytes | memoryview | Delayed | None: +) -> tuple[ArrayWriter, AbstractDataStore] | memoryview | Delayed | None: """This function creates an appropriate datastore for writing a dataset to disk as a netCDF file @@ -2075,33 +2049,14 @@ def to_netcdf( The ``multifile`` argument is only for the private use of save_mfdataset. """ - if isinstance(path_or_file, os.PathLike): - path_or_file = os.fspath(path_or_file) - if encoding is None: encoding = {} - if isinstance(path_or_file, str): - if engine is None: - engine = _get_default_engine(path_or_file) - path_or_file = _normalize_path(path_or_file) - else: - # writing to bytes/memoryview or a file-like object - if engine is None: - # TODO: only use 'scipy' if format is None or a netCDF3 format - engine = "scipy" - elif engine not in ("scipy", "h5netcdf"): - raise ValueError( - "invalid engine for creating bytes/memoryview or writing to a " - f"file-like object with to_netcdf: {engine!r}. Only " - "engine=None, engine='scipy' and engine='h5netcdf' is " - "supported." - ) - if not compute: - raise NotImplementedError( - "to_netcdf() with compute=False is not yet implemented when " - "returning bytes" - ) + path_or_file = _normalize_path(path_or_file) + + if engine is None: + to_fileobject_or_memoryview = not isinstance(path_or_file, str) + engine = get_default_netcdf_write_engine(format, to_fileobject_or_memoryview) # validate Dataset keys, DataArray names, and attr keys/values _validate_dataset_names(dataset) @@ -2121,6 +2076,11 @@ def to_netcdf( ) if path_or_file is None: + if not compute: + raise NotImplementedError( + "to_netcdf() with compute=False is not yet implemented when " + "returning a memoryview" + ) target = BytesIOProxy() else: target = path_or_file # type: ignore[assignment] @@ -2164,7 +2124,7 @@ def to_netcdf( if path_or_file is None: assert isinstance(target, BytesIOProxy) # created in this function - return target.getvalue_or_getbuffer() + return target.getbuffer() if not compute: return delayed_close_after_writes(writes, store) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index b09d73c477c..a725fa27b70 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -11,7 +11,6 @@ TYPE_CHECKING, Any, ClassVar, - Generic, Self, TypeVar, Union, @@ -198,18 +197,13 @@ def _normalize_path_list( return _normalize_path_list(paths) -BytesOrMemory = TypeVar("BytesOrMemory", bytes, memoryview) - - @dataclass -class BytesIOProxy(Generic[BytesOrMemory]): - """Proxy object for a write that returns either bytes or a memoryview.""" +class BytesIOProxy: + """Proxy object for a write that a memoryview.""" - # TODO: remove this in favor of BytesIO when Dataset.to_netcdf() stops - # returning bytes from the scipy engine - getvalue: Callable[[], BytesOrMemory] | None = None + getvalue: Callable[[], memoryview] | None = None - def getvalue_or_getbuffer(self) -> BytesOrMemory: + def getbuffer(self) -> memoryview: """Get the value of this write as bytes or memory.""" if self.getvalue is None: raise ValueError("must set getvalue before fetching value") diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index 76df963621e..354fca692dd 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -18,7 +18,7 @@ from xarray.backends.common import AbstractDataStore from xarray.core.types import ReadBuffer -STANDARD_BACKENDS_ORDER = ["netcdf4", "h5netcdf", "scipy"] +NETCDF_BACKENDS_ORDER = ["netcdf4", "h5netcdf", "scipy"] def remove_duplicates(entrypoints: EntryPoints) -> list[EntryPoint]: @@ -92,7 +92,7 @@ def sort_backends( backend_entrypoints: dict[str, type[BackendEntrypoint]], ) -> dict[str, type[BackendEntrypoint]]: ordered_backends_entrypoints = {} - for be_name in STANDARD_BACKENDS_ORDER: + for be_name in NETCDF_BACKENDS_ORDER: if backend_entrypoints.get(be_name) is not None: ordered_backends_entrypoints[be_name] = backend_entrypoints.pop(be_name) ordered_backends_entrypoints.update( diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 051f75cd0fc..40c610bbad0 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -30,7 +30,6 @@ Frozen, FrozenDict, close_on_error, - emit_user_level_warning, module_available, try_read_magic_number_from_file_or_path, ) @@ -169,20 +168,9 @@ def __init__( self.lock = ensure_lock(lock) if isinstance(filename_or_obj, BytesIOProxy): - emit_user_level_warning( - "return value of to_netcdf() without a target for " - "engine='scipy' is currently bytes, but will switch to " - "memoryview in a future version of Xarray. To silence this " - "warning, use the following pattern or switch to " - "to_netcdf(engine='h5netcdf'):\n" - " target = io.BytesIO()\n" - " dataset.to_netcdf(target)\n" - " result = target.getbuffer()", - FutureWarning, - ) source = filename_or_obj filename_or_obj = io.BytesIO() - source.getvalue = filename_or_obj.getvalue + source.getvalue = filename_or_obj.getbuffer if isinstance(filename_or_obj, str): # path manager = CachingFileManager( diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b1833d3266f..a9326ccc76f 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4067,7 +4067,7 @@ def to_netcdf( compute: bool = True, invalid_netcdf: bool = False, auto_complex: bool | None = None, - ) -> bytes | memoryview: ... + ) -> memoryview: ... # compute=False returns dask.Delayed @overload @@ -4131,17 +4131,15 @@ def to_netcdf( compute: bool = True, invalid_netcdf: bool = False, auto_complex: bool | None = None, - ) -> bytes | memoryview | Delayed | None: + ) -> memoryview | Delayed | None: """Write DataArray contents to a netCDF file. Parameters ---------- - path : str, path-like or None, optional - Path to which to save this dataset. File-like objects are only - supported by the scipy engine. If no path is provided, this - function returns the resulting netCDF file as bytes; in this case, - we need to use scipy, which does not support netCDF version 4 (the - default format becomes NETCDF3_64BIT). + path : str, path-like, file-like or None, optional + Path to which to save this datatree, or a file-like object to write + it to (which must support read and write and be seekable) or None + (default) to return in-memory bytes as a memoryview. mode : {"w", "a"}, default: "w" Write ('w') or append ('a') mode. If mode='w', any existing file at this location will be overwritten. If mode='a', existing variables @@ -4201,7 +4199,7 @@ def to_netcdf( Returns ------- - * ``bytes`` or ``memoryview`` if path is None + * ``memoryview`` if path is None * ``dask.delayed.Delayed`` if compute is False * None otherwise diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 9591e7bccdd..3a0fb86a39a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1953,7 +1953,7 @@ def to_netcdf( compute: bool = True, invalid_netcdf: bool = False, auto_complex: bool | None = None, - ) -> bytes | memoryview: ... + ) -> memoryview: ... # compute=False returns dask.Delayed @overload @@ -2017,17 +2017,15 @@ def to_netcdf( compute: bool = True, invalid_netcdf: bool = False, auto_complex: bool | None = None, - ) -> bytes | memoryview | Delayed | None: + ) -> memoryview | Delayed | None: """Write dataset contents to a netCDF file. Parameters ---------- - path : str, path-like or file-like, optional - Path to which to save this dataset. File-like objects are only - supported by the scipy engine. If no path is provided, this - function returns the resulting netCDF file as bytes; in this case, - we need to use scipy, which does not support netCDF version 4 (the - default format becomes NETCDF3_64BIT). + path : str, path-like, file-like or None, optional + Path to which to save this datatree, or a file-like object to write + it to (which must support read and write and be seekable) or None + (default) to return in-memory bytes as a memoryview. mode : {"w", "a"}, default: "w" Write ('w') or append ('a') mode. If mode='w', any existing file at this location will be overwritten. If mode='a', existing variables @@ -2089,7 +2087,7 @@ def to_netcdf( Returns ------- - * ``bytes`` or ``memoryview`` if path is None + * ``memoryview`` if path is None * ``dask.delayed.Delayed`` if compute is False * ``None`` otherwise diff --git a/xarray/core/datatree_io.py b/xarray/core/datatree_io.py index f4bd28af5eb..33cf9ab84cc 100644 --- a/xarray/core/datatree_io.py +++ b/xarray/core/datatree_io.py @@ -6,12 +6,14 @@ from typing import TYPE_CHECKING, Any, Literal, get_args from xarray.backends.api import ( + _normalize_path, delayed_close_after_writes, dump_to_store, + get_default_netcdf_write_engine, get_writable_netcdf_store, get_writable_zarr_store, ) -from xarray.backends.common import ArrayWriter +from xarray.backends.common import ArrayWriter, BytesIOProxy from xarray.backends.locks import get_dask_scheduler from xarray.core.datatree import DataTree from xarray.core.types import NetcdfWriteModes, ZarrWriteModes @@ -50,8 +52,14 @@ def _datatree_to_netcdf( "DataTree.to_netcdf only supports the netcdf4 and h5netcdf engines" ) + filepath = _normalize_path(filepath) + if engine is None: - engine = "h5netcdf" + to_fileobject_or_memoryview = not isinstance(filepath, str) + engine = get_default_netcdf_write_engine( + format="NETCDF4", # required for supporting groups + to_fileobject_or_memoryview=to_fileobject_or_memoryview, + ) # type: ignore[assignment] if group is not None: raise NotImplementedError( @@ -70,9 +78,12 @@ def _datatree_to_netcdf( ) if filepath is None: - # No need to use BytesIOProxy here because the legacy scipy backend - # cannot write netCDF files with groups - target = io.BytesIO() + if not compute: + raise NotImplementedError( + "to_netcdf() with compute=False is not yet implemented when " + "returning a memoryview" + ) + target = BytesIOProxy() else: target = filepath # type: ignore[assignment] @@ -126,7 +137,7 @@ def _datatree_to_netcdf( root_store.sync() if filepath is None: - assert isinstance(target, io.BytesIO) + assert isinstance(target, BytesIOProxy) # created in this function return target.getbuffer() if not compute: diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index c54e3c8d135..3ed49140a9b 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2555,7 +2555,7 @@ def test_read_non_consolidated_warning(self) -> None: def test_non_existent_store(self) -> None: with pytest.raises( FileNotFoundError, - match="(No such file or directory|Unable to find group|No group found in store)", + match="(No such file or directory|Unable to find group|No group found in store|does not exist)", ): xr.open_zarr(f"{uuid.uuid4()}") @@ -4407,29 +4407,25 @@ async def test_load_async(self) -> None: await super().test_load_async() def test_to_netcdf_explicit_engine(self) -> None: - with pytest.warns( - FutureWarning, - match=re.escape("return value of to_netcdf() without a target"), - ): - Dataset({"foo": 42}).to_netcdf(engine="scipy") + Dataset({"foo": 42}).to_netcdf(engine="scipy") def test_roundtrip_via_bytes(self) -> None: original = create_test_data() - with pytest.warns( - FutureWarning, - match=re.escape("return value of to_netcdf() without a target"), - ): - netcdf_bytes = original.to_netcdf(engine="scipy") + netcdf_bytes = original.to_netcdf(engine="scipy") roundtrip = open_dataset(netcdf_bytes, engine="scipy") assert_identical(roundtrip, original) + def test_to_bytes_compute_false(self) -> None: + original = create_test_data() + with pytest.raises( + NotImplementedError, + match=re.escape("to_netcdf() with compute=False is not yet implemented"), + ): + original.to_netcdf(engine="scipy", compute=False) + def test_bytes_pickle(self) -> None: data = Dataset({"foo": ("x", [1, 2, 3])}) - with pytest.warns( - FutureWarning, - match=re.escape("return value of to_netcdf() without a target"), - ): - fobj = data.to_netcdf() + fobj = data.to_netcdf(engine="scipy") with self.open(fobj) as ds: unpickled = pickle.loads(pickle.dumps(ds)) assert_identical(unpickled, data) @@ -4587,11 +4583,19 @@ def test_write_store(self) -> None: pass @requires_scipy + @requires_netCDF4 def test_engine(self) -> None: data = create_test_data() + with pytest.raises(ValueError, match=r"unrecognized engine"): data.to_netcdf("foo.nc", engine="foobar") # type: ignore[call-overload] - with pytest.raises(ValueError, match=r"invalid engine"): + + with pytest.raises( + ValueError, + match=re.escape( + "can only read bytes or file-like objects with engine='scipy' or 'h5netcdf'" + ), + ): data.to_netcdf(engine="netcdf4") with create_tmp_file() as tmp_file: @@ -4652,12 +4656,8 @@ def test_encoding_unlimited_dims(self) -> None: @requires_scipy def test_roundtrip_via_bytes(self) -> None: original = create_test_data() - with pytest.warns( - FutureWarning, - match=re.escape("return value of to_netcdf() without a target"), - ): - netcdf_bytes = original.to_netcdf() - roundtrip = open_dataset(netcdf_bytes) + netcdf_bytes = original.to_netcdf() + roundtrip = load_dataset(netcdf_bytes) assert_identical(roundtrip, original) @pytest.mark.xfail( @@ -6470,12 +6470,8 @@ def test_open_dataarray_options(self) -> None: def test_dataarray_to_netcdf_return_bytes(self) -> None: # regression test for GH1410 data = xr.DataArray([1, 2, 3]) - with pytest.warns( - FutureWarning, - match=re.escape("return value of to_netcdf() without a target"), - ): - output = data.to_netcdf(engine="scipy") - assert isinstance(output, bytes) + output = data.to_netcdf(engine="scipy") + assert isinstance(output, memoryview) def test_dataarray_to_netcdf_no_name_pathlib(self) -> None: original_da = DataArray(np.arange(12).reshape((3, 4))) @@ -7015,10 +7011,7 @@ def test_scipy_entrypoint(tmp_path: Path) -> None: with open(path, "rb") as f: _check_guess_can_open_and_open(entrypoint, f, engine="scipy", expected=ds) - with pytest.warns( - FutureWarning, match=re.escape("return value of to_netcdf() without a target") - ): - contents = ds.to_netcdf(engine="scipy") + contents = ds.to_netcdf(engine="scipy") _check_guess_can_open_and_open(entrypoint, contents, engine="scipy", expected=ds) _check_guess_can_open_and_open( entrypoint, BytesIO(contents), engine="scipy", expected=ds diff --git a/xarray/tests/test_backends_api.py b/xarray/tests/test_backends_api.py index ed487b07450..2d659dcb9c9 100644 --- a/xarray/tests/test_backends_api.py +++ b/xarray/tests/test_backends_api.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re import sys from numbers import Number @@ -7,7 +8,7 @@ import pytest import xarray as xr -from xarray.backends.api import _get_default_engine, _get_default_engine_netcdf +from xarray.backends.api import get_default_netcdf_write_engine from xarray.tests import ( assert_identical, assert_no_warnings, @@ -20,15 +21,27 @@ @requires_netCDF4 @requires_scipy -def test__get_default_engine() -> None: - engine_remote = _get_default_engine("http://example.org/test.nc", allow_remote=True) - assert engine_remote == "netcdf4" +@requires_h5netcdf +def test_get_default_netcdf_write_engine() -> None: + engine = get_default_netcdf_write_engine( + format=None, to_fileobject_or_memoryview=False + ) + assert engine == "netcdf4" + + engine = get_default_netcdf_write_engine( + format="NETCDF4", to_fileobject_or_memoryview=False + ) + assert engine == "netcdf4" - engine_gz = _get_default_engine("/example.gz") - assert engine_gz == "scipy" + engine = get_default_netcdf_write_engine( + format="NETCDF4", to_fileobject_or_memoryview=True + ) + assert engine == "h5netcdf" - engine_default = _get_default_engine("/example") - assert engine_default == "netcdf4" + engine = get_default_netcdf_write_engine( + format="NETCDF3_CLASSIC", to_fileobject_or_memoryview=True + ) + assert engine == "scipy" @requires_h5netcdf @@ -39,7 +52,20 @@ def test_default_engine_h5netcdf(monkeypatch): monkeypatch.delitem(sys.modules, "scipy", raising=False) monkeypatch.setattr(sys, "meta_path", []) - assert _get_default_engine_netcdf() == "h5netcdf" + engine = get_default_netcdf_write_engine( + format=None, to_fileobject_or_memoryview=False + ) + assert engine == "h5netcdf" + + with pytest.raises( + ValueError, + match=re.escape( + "cannot write NetCDF files with format='NETCDF3_CLASSIC' because none of the suitable backend libraries (netCDF4, scipy) are installed" + ), + ): + get_default_netcdf_write_engine( + format="NETCDF3_CLASSIC", to_fileobject_or_memoryview=False + ) def test_custom_engine() -> None: diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index 1781e7c8f69..90e0a9740f4 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -2,6 +2,7 @@ import contextlib import re +import sys from collections.abc import Callable, Generator, Hashable from pathlib import Path from typing import TYPE_CHECKING, Literal, cast @@ -229,7 +230,7 @@ def test_decode_cf(self, tmpdir): ) as roundtrip_dt: assert original_dt["test"].dtype == roundtrip_dt["test"].dtype - def test_to_netcdf_inherited_coords(self, tmpdir): + def test_to_netcdf_inherited_coords(self, tmpdir) -> None: filepath = tmpdir / "test.nc" original_dt = DataTree.from_dict( { @@ -244,7 +245,7 @@ def test_to_netcdf_inherited_coords(self, tmpdir): subtree = cast(DataTree, roundtrip_dt["/sub"]) assert "x" not in subtree.to_dataset(inherit=False).coords - def test_netcdf_encoding(self, tmpdir, simple_datatree): + def test_netcdf_encoding(self, tmpdir, simple_datatree) -> None: filepath = tmpdir / "test.nc" original_dt = simple_datatree @@ -261,7 +262,7 @@ def test_netcdf_encoding(self, tmpdir, simple_datatree): with pytest.raises(ValueError, match="unexpected encoding group.*"): original_dt.to_netcdf(filepath, encoding=enc, engine=self.engine) - def test_write_subgroup(self, tmpdir): + def test_write_subgroup(self, tmpdir) -> None: original_dt = DataTree.from_dict( { "/": xr.Dataset(coords={"x": [1, 2, 3]}), @@ -280,7 +281,7 @@ def test_write_subgroup(self, tmpdir): assert_identical(expected_dt, roundtrip_dt) @requires_netCDF4 - def test_no_redundant_dimensions(self, tmpdir): + def test_no_redundant_dimensions(self, tmpdir) -> None: # regression test for https://github.com/pydata/xarray/issues/10241 original_dt = DataTree.from_dict( { @@ -310,6 +311,16 @@ def test_compute_false(self, tmpdir, simple_datatree): with open_datatree(filepath, engine=self.engine) as written_dt: assert_identical(written_dt, original_dt) + def test_default_write_engine(self, tmpdir, simple_datatree, monkeypatch): + # Ensure the other netCDF library are not installed + exclude = "netCDF4" if self.engine == "h5netcdf" else "h5netcdf" + monkeypatch.delitem(sys.modules, exclude, raising=False) + monkeypatch.setattr(sys, "meta_path", []) + + filepath = tmpdir + "/phony_dims.nc" + original_dt = simple_datatree + original_dt.to_netcdf(filepath) # should not raise + @requires_netCDF4 class TestNetCDF4DatatreeIO(DatatreeIOBase): @@ -584,17 +595,25 @@ def test_phony_dims_warning(self, tmpdir) -> None: "phony_dim_3": 25, } - def test_roundtrip_via_bytes(self, simple_datatree): + def test_roundtrip_via_bytes(self, simple_datatree) -> None: original_dt = simple_datatree roundtrip_dt = open_datatree(original_dt.to_netcdf()) assert_equal(original_dt, roundtrip_dt) - def test_roundtrip_via_bytes_engine_specified(self, simple_datatree): + def test_roundtrip_via_bytes_engine_specified(self, simple_datatree) -> None: original_dt = simple_datatree roundtrip_dt = open_datatree(original_dt.to_netcdf(engine=self.engine)) assert_equal(original_dt, roundtrip_dt) - def test_roundtrip_using_filelike_object(self, tmpdir, simple_datatree): + def test_to_bytes_compute_false(self, simple_datatree) -> None: + original_dt = simple_datatree + with pytest.raises( + NotImplementedError, + match=re.escape("to_netcdf() with compute=False is not yet implemented"), + ): + original_dt.to_netcdf(compute=False) + + def test_roundtrip_using_filelike_object(self, tmpdir, simple_datatree) -> None: original_dt = simple_datatree filepath = tmpdir + "/test.nc" # h5py requires both read and write access when writing, it will @@ -612,7 +631,7 @@ def test_roundtrip_using_filelike_object(self, tmpdir, simple_datatree): class TestZarrDatatreeIO: engine = "zarr" - def test_to_zarr(self, tmpdir, simple_datatree, zarr_format): + def test_to_zarr(self, tmpdir, simple_datatree, zarr_format) -> None: filepath = str(tmpdir / "test.zarr") original_dt = simple_datatree original_dt.to_zarr(filepath, zarr_format=zarr_format) @@ -623,7 +642,7 @@ def test_to_zarr(self, tmpdir, simple_datatree, zarr_format): @pytest.mark.filterwarnings( "ignore:Numcodecs codecs are not in the Zarr version 3 specification" ) - def test_zarr_encoding(self, tmpdir, simple_datatree, zarr_format): + def test_zarr_encoding(self, tmpdir, simple_datatree, zarr_format) -> None: filepath = str(tmpdir / "test.zarr") original_dt = simple_datatree @@ -653,7 +672,7 @@ def test_zarr_encoding(self, tmpdir, simple_datatree, zarr_format): @pytest.mark.xfail(reason="upstream zarr read-only changes have broken this test") @pytest.mark.filterwarnings("ignore:Duplicate name") - def test_to_zarr_zip_store(self, tmpdir, simple_datatree, zarr_format): + def test_to_zarr_zip_store(self, tmpdir, simple_datatree, zarr_format) -> None: from zarr.storage import ZipStore filepath = str(tmpdir / "test.zarr.zip") @@ -664,7 +683,9 @@ def test_to_zarr_zip_store(self, tmpdir, simple_datatree, zarr_format): with open_datatree(store, engine="zarr") as roundtrip_dt: # type: ignore[arg-type, unused-ignore] assert_equal(original_dt, roundtrip_dt) - def test_to_zarr_not_consolidated(self, tmpdir, simple_datatree, zarr_format): + def test_to_zarr_not_consolidated( + self, tmpdir, simple_datatree, zarr_format + ) -> None: filepath = tmpdir / "test.zarr" zmetadata = filepath / ".zmetadata" s1zmetadata = filepath / "set1" / ".zmetadata" @@ -678,7 +699,9 @@ def test_to_zarr_not_consolidated(self, tmpdir, simple_datatree, zarr_format): with open_datatree(filepath, engine="zarr") as roundtrip_dt: assert_equal(original_dt, roundtrip_dt) - def test_to_zarr_default_write_mode(self, tmpdir, simple_datatree, zarr_format): + def test_to_zarr_default_write_mode( + self, tmpdir, simple_datatree, zarr_format + ) -> None: simple_datatree.to_zarr(str(tmpdir), zarr_format=zarr_format) import zarr @@ -695,7 +718,7 @@ def test_to_zarr_default_write_mode(self, tmpdir, simple_datatree, zarr_format): @requires_dask def test_to_zarr_compute_false( self, tmp_path: Path, simple_datatree: DataTree, zarr_format: Literal[2, 3] - ): + ) -> None: import dask.array as da storepath = tmp_path / "test.zarr" @@ -782,7 +805,7 @@ def assert_expected_zarr_files_exist( assert_identical(written_dt, original_dt) @requires_dask - def test_to_zarr_no_redundant_computation(self, tmpdir, zarr_format): + def test_to_zarr_no_redundant_computation(self, tmpdir, zarr_format) -> None: import dask.array as da eval_count = 0 @@ -958,7 +981,7 @@ def test_open_groups_chunks(self, tmpdir, zarr_format) -> None: for ds in dict_of_datasets.values(): ds.close() - def test_write_subgroup(self, tmpdir, zarr_format): + def test_write_subgroup(self, tmpdir, zarr_format) -> None: original_dt = DataTree.from_dict( { "/": xr.Dataset(coords={"x": [1, 2, 3]}), @@ -979,7 +1002,7 @@ def test_write_subgroup(self, tmpdir, zarr_format): @pytest.mark.filterwarnings( "ignore:Failed to open Zarr store with consolidated metadata:RuntimeWarning" ) - def test_write_inherited_coords_false(self, tmpdir, zarr_format): + def test_write_inherited_coords_false(self, tmpdir, zarr_format) -> None: original_dt = DataTree.from_dict( { "/": xr.Dataset(coords={"x": [1, 2, 3]}), @@ -1003,7 +1026,7 @@ def test_write_inherited_coords_false(self, tmpdir, zarr_format): @pytest.mark.filterwarnings( "ignore:Failed to open Zarr store with consolidated metadata:RuntimeWarning" ) - def test_write_inherited_coords_true(self, tmpdir, zarr_format): + def test_write_inherited_coords_true(self, tmpdir, zarr_format) -> None: original_dt = DataTree.from_dict( { "/": xr.Dataset(coords={"x": [1, 2, 3]}), diff --git a/xarray/tests/test_plugins.py b/xarray/tests/test_plugins.py index e2129229c2c..f1342a1f82a 100644 --- a/xarray/tests/test_plugins.py +++ b/xarray/tests/test_plugins.py @@ -171,7 +171,7 @@ def test_build_engines_sorted() -> None: backend_entrypoints = list(plugins.build_engines(dummy_pkg_entrypoints)) indices = [] - for be in plugins.STANDARD_BACKENDS_ORDER: + for be in plugins.NETCDF_BACKENDS_ORDER: try: index = backend_entrypoints.index(be) backend_entrypoints.pop(index)