Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,10 @@ Bug fixes
coordinates. See the corresponding pull-request on GitHub for more details. (:pull:`5692`).
By `Benoît Bovy <https://github.com/benbovy>`_.
- Fixed "unhashable type" error trying to read NetCDF file with variable having its 'units'
attribute not ``str`` (e.g. ``numpy.ndarray``) (:issue:`6368`).
By `Oleh Khoma <https://github.com/okhoma>`_.
attribute not ``str`` (e.g. ``numpy.ndarray``) (:issue:`6368`). By `Oleh Khoma <https://github.com/okhoma>`_.
- Omit warning about specified dask chunks separating chunks on disk when the
underlying array is empty (e.g., because of an empty dimension) (:issue:`6401`).
By `Joseph K Aicher <https://github.com/jaicher>`_.
- Fixed the poor html repr performance on large multi-indexes (:pull:`6400`).
By `Benoît Bovy <https://github.com/benbovy>`_.
- Allow fancy indexing of duck dask arrays along multiple dimensions. (:pull:`6414`)
Expand Down
50 changes: 27 additions & 23 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,30 +199,34 @@ def _get_chunk(var, chunks):
chunk_shape, shape=shape, dtype=var.dtype, previous_chunks=preferred_chunk_shape
)

# Warn where requested chunks break preferred chunks.
for dim, size, chunk_sizes in zip(dims, shape, chunk_shape):
try:
preferred_chunk_sizes = preferred_chunks[dim]
except KeyError:
continue
# Determine the stop indices of the preferred chunks, but omit the last stop
# (equal to the dim size). In particular, assume that when a sequence expresses
# the preferred chunks, the sequence sums to the size.
preferred_stops = (
range(preferred_chunk_sizes, size, preferred_chunk_sizes)
if isinstance(preferred_chunk_sizes, Number)
else itertools.accumulate(preferred_chunk_sizes[:-1])
)
# Gather any stop indices of the specified chunks that are not a stop index of a
# preferred chunk. Again, omit the last stop, assuming that it equals the dim
# size.
breaks = set(itertools.accumulate(chunk_sizes[:-1])).difference(preferred_stops)
if breaks:
warnings.warn(
"The specified Dask chunks separate the stored chunks along dimension "
f'"{dim}" starting at index {min(breaks)}. This could degrade '
"performance. Instead, consider rechunking after loading."
# Warn where requested chunks break preferred chunks, provided that the variable
# contains data.
if var.size:
for dim, size, chunk_sizes in zip(dims, shape, chunk_shape):
try:
preferred_chunk_sizes = preferred_chunks[dim]
except KeyError:
continue
# Determine the stop indices of the preferred chunks, but omit the last stop
# (equal to the dim size). In particular, assume that when a sequence
# expresses the preferred chunks, the sequence sums to the size.
preferred_stops = (
range(preferred_chunk_sizes, size, preferred_chunk_sizes)
if isinstance(preferred_chunk_sizes, Number)
else itertools.accumulate(preferred_chunk_sizes[:-1])
)
# Gather any stop indices of the specified chunks that are not a stop index
# of a preferred chunk. Again, omit the last stop, assuming that it equals
# the dim size.
breaks = set(itertools.accumulate(chunk_sizes[:-1])).difference(
preferred_stops
)
if breaks:
warnings.warn(
"The specified Dask chunks separate the stored chunks along "
f'dimension "{dim}" starting at index {min(breaks)}. This could '
"degrade performance. Instead, consider rechunking after loading."
)

return dict(zip(dims, chunk_shape))

Expand Down
7 changes: 7 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2210,6 +2210,13 @@ def test_save_emptydim(self, chunk):
with self.roundtrip(ds) as ds_reload:
assert_identical(ds, ds_reload)

@requires_dask
def test_no_warning_from_open_emptydim_with_chunks(self):
ds = Dataset({"x": (("a", "b"), np.empty((5, 0)))}).chunk({"a": 1})
with assert_no_warnings():
with self.roundtrip(ds, open_kwargs=dict(chunks={"a": 1})) as ds_reload:
assert_identical(ds, ds_reload)

@pytest.mark.parametrize("consolidated", [False, True])
@pytest.mark.parametrize("compute", [False, True])
@pytest.mark.parametrize("use_dask", [False, True])
Expand Down