diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 0b9b5046cb9..f1d58813958 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -26,7 +26,7 @@ combine_by_coords, ) from ..core.dataarray import DataArray -from ..core.dataset import Dataset, _maybe_chunk +from ..core.dataset import Dataset, _get_chunk, _maybe_chunk from ..core.utils import close_on_error, is_grib_path, is_remote_uri from .common import AbstractDataStore, ArrayWriter from .locks import _get_scheduler @@ -536,7 +536,7 @@ def maybe_decode_store(store, chunks): k: _maybe_chunk( k, v, - store.get_chunk(k, v, chunks), + _get_chunk(k, v, chunks), overwrite_encoded_chunks=overwrite_encoded_chunks, ) for k, v in ds.variables.items() diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py index 7e4605c42ce..e71437da8ab 100644 --- a/xarray/backends/apiv2.py +++ b/xarray/backends/apiv2.py @@ -1,7 +1,8 @@ import os +from ..core.dataset import _get_chunk, _maybe_chunk from ..core.utils import is_remote_uri -from . import plugins, zarr +from . import plugins from .api import ( _autodetect_engine, _get_backend_cls, @@ -54,10 +55,15 @@ def dataset_from_backend_dataset( if isinstance(chunks, int): chunks = dict.fromkeys(ds.dims, chunks) - variables = { - k: zarr.ZarrStore.maybe_chunk(k, v, chunks, overwrite_encoded_chunks) - for k, v in ds.variables.items() - } + variables = {} + for k, v in ds.variables.items(): + var_chunks = _get_chunk(k, v, chunks) + variables[k] = _maybe_chunk( + k, + v, + var_chunks, + overwrite_encoded_chunks=overwrite_encoded_chunks, + ) ds2 = ds._replace(variables) else: diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 9827c345239..f3c92d52303 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -1,5 +1,3 @@ -import warnings - import numpy as np from .. import coding, conventions @@ -368,53 +366,6 @@ def encode_variable(self, variable): def encode_attribute(self, a): return encode_zarr_attr_value(a) - @staticmethod - def get_chunk(name, var, chunks): - chunk_spec = dict(zip(var.dims, var.encoding.get("chunks"))) - - # Coordinate labels aren't chunked - if var.ndim == 1 and var.dims[0] == name: - return chunk_spec - - if chunks == "auto": - return chunk_spec - - for dim in var.dims: - if dim in chunks: - spec = chunks[dim] - if isinstance(spec, int): - spec = (spec,) - if isinstance(spec, (tuple, list)) and chunk_spec[dim]: - if any(s % chunk_spec[dim] for s in spec): - warnings.warn( - "Specified Dask chunks %r would " - "separate Zarr chunk shape %r for " - "dimension %r. This significantly " - "degrades performance. Consider " - "rechunking after loading instead." - % (chunks[dim], chunk_spec[dim], dim), - stacklevel=2, - ) - chunk_spec[dim] = chunks[dim] - return chunk_spec - - @classmethod - def maybe_chunk(cls, name, var, chunks, overwrite_encoded_chunks): - chunk_spec = cls.get_chunk(name, var, chunks) - - if (var.ndim > 0) and (chunk_spec is not None): - from dask.base import tokenize - - # does this cause any data to be read? - token2 = tokenize(name, var._data, chunks) - name2 = f"xarray-{name}-{token2}" - var = var.chunk(chunk_spec, name=name2, lock=None) - if overwrite_encoded_chunks and var.chunks is not None: - var.encoding["chunks"] = tuple(x[0] for x in var.chunks) - return var - else: - return var - def store( self, variables, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 04974c58113..24792a96dfc 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -359,6 +359,34 @@ def _assert_empty(args: tuple, msg: str = "%s") -> None: raise ValueError(msg % args) +def _get_chunk(name, var, chunks): + chunk_spec = dict(zip(var.dims, var.encoding.get("chunks"))) + + # Coordinate labels aren't chunked + if var.ndim == 1 and var.dims[0] == name: + return chunk_spec + + if chunks == "auto": + return chunk_spec + + for dim in var.dims: + if dim in chunks: + spec = chunks[dim] + if isinstance(spec, int): + spec = (spec,) + if isinstance(spec, (tuple, list)) and chunk_spec[dim]: + if any(s % chunk_spec[dim] for s in spec): + warnings.warn( + f"Specified Dask chunks {chunks[dim]} would separate " + f"on disks chunk shape {chunk_spec[dim]} for dimension {dim}. " + "This could degrade performance. " + "Consider rechunking after loading instead.", + stacklevel=2, + ) + chunk_spec[dim] = chunks[dim] + return chunk_spec + + def _maybe_chunk( name, var,