Skip to content

Commit

Permalink
Merge branch 'main' into refactor-unsigned-masked-cf
Browse files Browse the repository at this point in the history
  • Loading branch information
kmuehlbauer authored Aug 20, 2024
2 parents 01a4742 + ca2e9d6 commit 5aae952
Show file tree
Hide file tree
Showing 14 changed files with 51 additions and 20 deletions.
3 changes: 2 additions & 1 deletion doc/internals/chunked-arrays.rst
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ Once the chunkmanager subclass has been registered, xarray objects wrapping the
The latter two methods ultimately call the chunkmanager's implementation of ``.from_array``, to which they pass the ``from_array_kwargs`` dict.
The ``chunked_array_type`` kwarg selects which registered chunkmanager subclass to dispatch to. It defaults to ``'dask'``
if Dask is installed, otherwise it defaults to whichever chunkmanager is registered if only one is registered.
If multiple chunkmanagers are registered it will raise an error by default.
If multiple chunkmanagers are registered, the ``chunk_manager`` configuration option (which can be set using :py:func:`set_options`)
will be used to determine which chunkmanager to use, defaulting to ``'dask'``.

Parallel processing without chunks
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion doc/user-guide/duckarrays.rst
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ Whilst the features above allow many numpy-like array libraries to be used prett
makes sense to use an interfacing package to make certain tasks easier.

For example the `pint-xarray package <https://pint-xarray.readthedocs.io>`_ offers a custom ``.pint`` accessor (see :ref:`internals.accessors`) which provides
convenient access to information stored within the wrapped array (e.g. ``.units`` and ``.magnitude``), and makes makes
convenient access to information stored within the wrapped array (e.g. ``.units`` and ``.magnitude``), and makes
creating wrapped pint arrays (and especially xarray-wrapping-pint-wrapping-dask arrays) simpler for the user.

We maintain a list of libraries extending ``xarray`` to make working with particular wrapped duck arrays
Expand Down
2 changes: 1 addition & 1 deletion doc/user-guide/reshaping.rst
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ Sort
----

One may sort a DataArray/Dataset via :py:meth:`~xarray.DataArray.sortby` and
:py:meth:`~xarray.Dataset.sortby`. The input can be an individual or list of
:py:meth:`~xarray.Dataset.sortby`. The input can be an individual or list of
1D ``DataArray`` objects:

.. ipython:: python
Expand Down
3 changes: 2 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ v2024.07.1 (unreleased)

New Features
~~~~~~~~~~~~

- Make chunk manager an option in ``set_options`` (:pull:`9362`).
By `Tom White <https://github.com/tomwhite>`_.

Breaking changes
~~~~~~~~~~~~~~~~
Expand Down
8 changes: 5 additions & 3 deletions xarray/core/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,12 @@ def _infer_concat_order_from_coords(datasets):
# Need to read coordinate values to do ordering
indexes = [ds._indexes.get(dim) for ds in datasets]
if any(index is None for index in indexes):
raise ValueError(
"Every dimension needs a coordinate for "
"inferring concatenation order"
error_msg = (
f"Every dimension requires a corresponding 1D coordinate "
f"and index for inferring concatenation order but the "
f"coordinate '{dim}' has no corresponding index"
)
raise ValueError(error_msg)

# TODO (benbovy, flexible indexes): support flexible indexes?
indexes = [index.to_pandas_index() for index in indexes]
Expand Down
8 changes: 5 additions & 3 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1576,9 +1576,11 @@ def __getitem__(
try:
return self._construct_dataarray(key)
except KeyError as e:
raise KeyError(
f"No variable named {key!r}. Variables on the dataset include {shorten_list_repr(list(self.variables.keys()), max_items=10)}"
) from e
message = f"No variable named {key!r}. Variables on the dataset include {shorten_list_repr(list(self.variables.keys()), max_items=10)}"
# If someone attempts `ds['foo' , 'bar']` instead of `ds[['foo', 'bar']]`
if isinstance(key, tuple):
message += f"\nHint: use a list to select multiple variables, for example `ds[{[d for d in key]}]`"
raise KeyError(message) from e

if utils.iterable_of_hashable(key):
return self._copy_listed(key)
Expand Down
7 changes: 6 additions & 1 deletion xarray/core/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

Options = Literal[
"arithmetic_join",
"chunk_manager",
"cmap_divergent",
"cmap_sequential",
"display_max_rows",
Expand All @@ -36,6 +37,7 @@
class T_Options(TypedDict):
arithmetic_broadcast: bool
arithmetic_join: Literal["inner", "outer", "left", "right", "exact"]
chunk_manager: str
cmap_divergent: str | Colormap
cmap_sequential: str | Colormap
display_max_rows: int
Expand All @@ -62,6 +64,7 @@ class T_Options(TypedDict):
OPTIONS: T_Options = {
"arithmetic_broadcast": True,
"arithmetic_join": "inner",
"chunk_manager": "dask",
"cmap_divergent": "RdBu_r",
"cmap_sequential": "viridis",
"display_max_rows": 12,
Expand Down Expand Up @@ -172,7 +175,9 @@ class set_options:
- "override": if indexes are of same size, rewrite indexes to be
those of the first object with that dimension. Indexes for the same
dimension must have the same size in all objects.
chunk_manager : str, default: "dask"
Chunk manager to use for chunked array computations when multiple
options are installed.
cmap_divergent : str or matplotlib.colors.Colormap, default: "RdBu_r"
Colormap to use for divergent data plots. If string, must be
matplotlib built-in colormap. Can also be a Colormap object
Expand Down
5 changes: 3 additions & 2 deletions xarray/namedarray/parallelcompat.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import numpy as np

from xarray.core.options import OPTIONS
from xarray.core.utils import emit_user_level_warning
from xarray.namedarray.pycompat import is_chunked_array

Expand Down Expand Up @@ -101,8 +102,8 @@ def guess_chunkmanager(
# use the only option available
manager = next(iter(chunkmanagers.keys()))
else:
# default to trying to use dask
manager = "dask"
# use the one in options (default dask)
manager = OPTIONS["chunk_manager"]

if isinstance(manager, str):
if manager not in chunkmanagers:
Expand Down
11 changes: 8 additions & 3 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -899,7 +899,7 @@ def test_roundtrip_empty_vlen_string_array(self) -> None:
if actual["a"].dtype.metadata is not None:
assert check_vlen_dtype(actual["a"].dtype) is str
else:
assert actual["a"].dtype == np.dtype("<U1")
assert actual["a"].dtype == np.dtype("=U1")

@pytest.mark.parametrize(
"decoded_fn, encoded_fn",
Expand Down Expand Up @@ -1516,8 +1516,8 @@ def test_encoding_kwarg_vlen_string(
expected = Dataset({"x": expected_string})
kwargs = dict(encoding={"x": {"dtype": str}})
with self.roundtrip(original, save_kwargs=kwargs) as actual:
assert actual["x"].encoding["dtype"] == "<U3"
assert actual["x"].dtype == "<U3"
assert actual["x"].encoding["dtype"] == "=U3"
assert actual["x"].dtype == "=U3"
assert_identical(actual, expected)

@pytest.mark.parametrize("fill_value", ["XXX", "", "bár"])
Expand Down Expand Up @@ -2182,6 +2182,11 @@ def test_write_inconsistent_chunks(self) -> None:
assert actual["x"].encoding["chunksizes"] == (50, 100)
assert actual["y"].encoding["chunksizes"] == (100, 50)

# Flaky test. Very open to contributions on fixing this
@pytest.mark.flaky
def test_roundtrip_coordinates(self) -> None:
super().test_roundtrip_coordinates()


@requires_zarr
class ZarrBase(CFEncodedBase):
Expand Down
4 changes: 2 additions & 2 deletions xarray/tests/test_coding_times.py
Original file line number Diff line number Diff line change
Expand Up @@ -1253,7 +1253,7 @@ def test_roundtrip_datetime64_nanosecond_precision(
encoding = {}

var = Variable(["time"], times, encoding=encoding)
assert var.dtype == np.dtype("<M8[ns]")
assert var.dtype == np.dtype("=M8[ns]")

encoded_var = conventions.encode_cf_variable(var)
assert (
Expand All @@ -1264,7 +1264,7 @@ def test_roundtrip_datetime64_nanosecond_precision(
assert encoded_var.data.dtype == dtype

decoded_var = conventions.decode_cf_variable("foo", encoded_var)
assert decoded_var.dtype == np.dtype("<M8[ns]")
assert decoded_var.dtype == np.dtype("=M8[ns]")
assert (
decoded_var.encoding["units"]
== f"{_numpy_to_netcdf_timeunit(timeunit)} since 1970-01-01 00:00:00"
Expand Down
5 changes: 4 additions & 1 deletion xarray/tests/test_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,7 +728,10 @@ def test_combine_by_coords(self):
combine_by_coords(objs)

objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})]
with pytest.raises(ValueError, match=r"Every dimension needs a coordinate"):
with pytest.raises(
ValueError,
match=r"Every dimension requires a corresponding 1D coordinate and index",
):
combine_by_coords(objs)

def test_empty_input(self):
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -1381,7 +1381,7 @@ def test_map_blocks_roundtrip_string_index():
ds = xr.Dataset(
{"data": (["label"], [1, 2, 3])}, coords={"label": ["foo", "bar", "baz"]}
).chunk(label=1)
assert ds.label.dtype == np.dtype("<U3")
assert ds.label.dtype == np.dtype("=U3")

mapped = ds.map_blocks(lambda x: x, template=ds)
assert mapped.label.dtype == ds.label.dtype
Expand Down
5 changes: 5 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4134,6 +4134,11 @@ def test_getitem(self) -> None:
data["notfound"]
with pytest.raises(KeyError):
data[["var1", "notfound"]]
with pytest.raises(
KeyError,
match=r"Hint: use a list to select multiple variables, for example `ds\[\['var1', 'var2'\]\]`",
):
data["var1", "var2"]

actual1 = data[["var1", "var2"]]
expected1 = Dataset({"var1": data["var1"], "var2": data["var2"]})
Expand Down
6 changes: 6 additions & 0 deletions xarray/tests/test_parallelcompat.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numpy as np
import pytest

from xarray import set_options
from xarray.core.types import T_Chunks, T_DuckArray, T_NormalizedChunks
from xarray.namedarray._typing import _Chunks
from xarray.namedarray.daskmanager import DaskManager
Expand Down Expand Up @@ -152,6 +153,11 @@ def test_get_chunkmanger(self, register_dummy_chunkmanager) -> None:
chunkmanager = guess_chunkmanager("dummy")
assert isinstance(chunkmanager, DummyChunkManager)

def test_get_chunkmanger_via_set_options(self, register_dummy_chunkmanager) -> None:
with set_options(chunk_manager="dummy"):
chunkmanager = guess_chunkmanager(None)
assert isinstance(chunkmanager, DummyChunkManager)

def test_fail_on_nonexistent_chunkmanager(self) -> None:
with pytest.raises(ValueError, match="unrecognized chunk manager foo"):
guess_chunkmanager("foo")
Expand Down

0 comments on commit 5aae952

Please sign in to comment.