diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 74120f068e5..c84a0549774 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -62,8 +62,10 @@ Bug fixes coordinates. See the corresponding pull-request on GitHub for more details. (:pull:`5692`). By `Benoît Bovy `_. - Fixed "unhashable type" error trying to read NetCDF file with variable having its 'units' - attribute not ``str`` (e.g. ``numpy.ndarray``) (:issue:`6368`). - By `Oleh Khoma `_. + attribute not ``str`` (e.g. ``numpy.ndarray``) (:issue:`6368`). By `Oleh Khoma `_. +- Omit warning about specified dask chunks separating chunks on disk when the + underlying array is empty (e.g., because of an empty dimension) (:issue:`6401`). + By `Joseph K Aicher `_. - Fixed the poor html repr performance on large multi-indexes (:pull:`6400`). By `Benoît Bovy `_. - Allow fancy indexing of duck dask arrays along multiple dimensions. (:pull:`6414`) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 0c3a59383da..6ff7a1f76e9 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -199,30 +199,34 @@ def _get_chunk(var, chunks): chunk_shape, shape=shape, dtype=var.dtype, previous_chunks=preferred_chunk_shape ) - # Warn where requested chunks break preferred chunks. - for dim, size, chunk_sizes in zip(dims, shape, chunk_shape): - try: - preferred_chunk_sizes = preferred_chunks[dim] - except KeyError: - continue - # Determine the stop indices of the preferred chunks, but omit the last stop - # (equal to the dim size). In particular, assume that when a sequence expresses - # the preferred chunks, the sequence sums to the size. - preferred_stops = ( - range(preferred_chunk_sizes, size, preferred_chunk_sizes) - if isinstance(preferred_chunk_sizes, Number) - else itertools.accumulate(preferred_chunk_sizes[:-1]) - ) - # Gather any stop indices of the specified chunks that are not a stop index of a - # preferred chunk. Again, omit the last stop, assuming that it equals the dim - # size. - breaks = set(itertools.accumulate(chunk_sizes[:-1])).difference(preferred_stops) - if breaks: - warnings.warn( - "The specified Dask chunks separate the stored chunks along dimension " - f'"{dim}" starting at index {min(breaks)}. This could degrade ' - "performance. Instead, consider rechunking after loading." + # Warn where requested chunks break preferred chunks, provided that the variable + # contains data. + if var.size: + for dim, size, chunk_sizes in zip(dims, shape, chunk_shape): + try: + preferred_chunk_sizes = preferred_chunks[dim] + except KeyError: + continue + # Determine the stop indices of the preferred chunks, but omit the last stop + # (equal to the dim size). In particular, assume that when a sequence + # expresses the preferred chunks, the sequence sums to the size. + preferred_stops = ( + range(preferred_chunk_sizes, size, preferred_chunk_sizes) + if isinstance(preferred_chunk_sizes, Number) + else itertools.accumulate(preferred_chunk_sizes[:-1]) ) + # Gather any stop indices of the specified chunks that are not a stop index + # of a preferred chunk. Again, omit the last stop, assuming that it equals + # the dim size. + breaks = set(itertools.accumulate(chunk_sizes[:-1])).difference( + preferred_stops + ) + if breaks: + warnings.warn( + "The specified Dask chunks separate the stored chunks along " + f'dimension "{dim}" starting at index {min(breaks)}. This could ' + "degrade performance. Instead, consider rechunking after loading." + ) return dict(zip(dims, chunk_shape)) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 94ffe619bd8..6f6bb0410e2 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2210,6 +2210,13 @@ def test_save_emptydim(self, chunk): with self.roundtrip(ds) as ds_reload: assert_identical(ds, ds_reload) + @requires_dask + def test_no_warning_from_open_emptydim_with_chunks(self): + ds = Dataset({"x": (("a", "b"), np.empty((5, 0)))}).chunk({"a": 1}) + with assert_no_warnings(): + with self.roundtrip(ds, open_kwargs=dict(chunks={"a": 1})) as ds_reload: + assert_identical(ds, ds_reload) + @pytest.mark.parametrize("consolidated", [False, True]) @pytest.mark.parametrize("compute", [False, True]) @pytest.mark.parametrize("use_dask", [False, True])