Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preserve dimension separator metadata when resizing arrays #1540

Merged
merged 6 commits into from
Oct 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion docs/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,14 @@ Unreleased
Maintenance
~~~~~~~~~~~

* Preserve ``dimension_separator`` when resizing arrays.
By :user:`Ziwen Liu <ziw-liu>` :issue:`1533`.

* Initialise some sets in tests with set literals instead of list literals.
By :user:`Dimitri Papadopoulos Orfanos <DimitriPapadopoulos>` :issue:`1534`.

* Allow ``black`` code formatter to be run with any Python version.
By :user:`David Stansby <dstansby>` :issue:`1549`
By :user:`David Stansby <dstansby>` :issue:`1549`.

.. _release_2.16.1:

Expand Down
25 changes: 8 additions & 17 deletions zarr/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,6 @@ def _load_metadata_nosync(self):
except KeyError:
raise ArrayNotFoundError(self._path)
else:

# decode and store metadata as instance members
meta = self._store._metadata_class.decode_array_metadata(meta_bytes)
self._meta = meta
Expand Down Expand Up @@ -341,7 +340,14 @@ def _flush_metadata_nosync(self):
filters=filters_config,
)
if getattr(self._store, "_store_version", 2) == 2:
meta.update(dict(chunks=self._chunks, dtype=self._dtype, order=self._order))
meta.update(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, @ziw-liu! Moving forward, I do wonder if we don't want to encapsulate all of these items to prevent this type of error.

dict(
chunks=self._chunks,
dtype=self._dtype,
order=self._order,
dimension_separator=self._dimension_separator,
)
)
else:
meta.update(
dict(
Expand Down Expand Up @@ -1358,7 +1364,6 @@ def get_mask_selection(self, selection, out=None, fields=None):
return self._get_selection(indexer=indexer, out=out, fields=fields)

def _get_selection(self, indexer, out=None, fields=None):

# We iterate over all chunks which overlap the selection and thus contain data
# that needs to be extracted. Each chunk is processed in turn, extracting the
# necessary data and storing into the correct location in the output array.
Expand Down Expand Up @@ -1983,7 +1988,6 @@ def _set_basic_selection_nd(self, selection, value, fields=None):
self._set_selection(indexer, value, fields=fields)

def _set_selection(self, indexer, value, fields=None):

# We iterate over all chunks which overlap the selection and thus contain data
# that needs to be replaced. Each chunk is processed in turn, extracting the
# necessary data from the value array and storing into the chunk array.
Expand Down Expand Up @@ -2018,7 +2022,6 @@ def _set_selection(self, indexer, value, fields=None):
):
# iterative approach
for chunk_coords, chunk_selection, out_selection in indexer:

# extract data to store
if sel_shape == ():
chunk_value = value
Expand Down Expand Up @@ -2077,7 +2080,6 @@ def _process_chunk(
and not self._filters
and self._dtype != object
):

dest = out[out_selection]
# Assume that array-like objects that doesn't have a
# `writeable` flag is writable.
Expand All @@ -2088,7 +2090,6 @@ def _process_chunk(
)

if write_direct:

# optimization: we want the whole chunk, and the destination is
# contiguous, so we can decompress directly from the chunk
# into the destination array
Expand Down Expand Up @@ -2321,28 +2322,24 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None):
# to access the existing chunk data

if is_scalar(value, self._dtype):

# setup array filled with value
chunk = np.empty_like(
self._meta_array, shape=self._chunks, dtype=self._dtype, order=self._order
)
chunk.fill(value)

else:

# ensure array is contiguous
chunk = value.astype(self._dtype, order=self._order, copy=False)

else:
# partially replace the contents of this chunk

try:

# obtain compressed data for chunk
cdata = self.chunk_store[ckey]

except KeyError:

# chunk not initialized
if self._fill_value is not None:
chunk = np.empty_like(
Expand All @@ -2359,7 +2356,6 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None):
)

else:

# decode chunk
chunk = self._decode_chunk(cdata)
if not chunk.flags.writeable:
Expand Down Expand Up @@ -2429,7 +2425,6 @@ def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None):
return chunk

def _encode_chunk(self, chunk):

# apply filters
if self._filters:
for f in self._filters:
Expand Down Expand Up @@ -2619,7 +2614,6 @@ def __setstate__(self, state):
self.__init__(**state)

def _synchronized_op(self, f, *args, **kwargs):

if self._synchronizer is None:
# no synchronization
lock = nolock
Expand All @@ -2636,7 +2630,6 @@ def _synchronized_op(self, f, *args, **kwargs):
return result

def _write_op(self, f, *args, **kwargs):

# guard condition
if self._read_only:
raise ReadOnlyError()
Expand Down Expand Up @@ -2676,7 +2669,6 @@ def resize(self, *args):
return self._write_op(self._resize_nosync, *args)

def _resize_nosync(self, *args):

# normalize new shape argument
old_shape = self._shape
new_shape = normalize_resize_args(old_shape, *args)
Expand Down Expand Up @@ -2755,7 +2747,6 @@ def append(self, data, axis=0):
return self._write_op(self._append_nosync, data, axis=axis)

def _append_nosync(self, data, axis=0):

# ensure data is array-like
if not hasattr(data, "shape"):
data = np.asanyarray(data, like=self._meta_array)
Expand Down
Loading