Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restore optimizations for NDBuffer.all_equal #2730

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 25 additions & 10 deletions src/zarr/core/buffer/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,18 +460,33 @@
def __repr__(self) -> str:
return f"<NDBuffer shape={self.shape} dtype={self.dtype} {self._data!r}>"

def all_equal(self, other: Any, equal_nan: bool = True) -> bool:
"""Compare to `other` using np.array_equal."""
if other is None:
def all_equal(self, value: Any, equal_nan: bool = True) -> bool:
if value is None:
# Handle None fill_value for Zarr V2
return False
# use array_equal to obtain equal_nan=True functionality
# Since fill-value is a scalar, isn't there a faster path than allocating a new array for fill value
# every single time we have to write data?
_data, other = np.broadcast_arrays(self._data, other)
return np.array_equal(
self._data, other, equal_nan=equal_nan if self._data.dtype.kind not in "USTO" else False
)

if not value:
# If `value` is falsey, then just 1 truthy value in `array`
# is sufficient to return False. We assume here that np.any is
# optimized to return on the first truthy value in `array`.
try:
return not np.any(self._data)
except (TypeError, ValueError): # pragma: no cover
pass

if np.issubdtype(self._data.dtype, np.object_):
# We have to flatten the result of np.equal to handle outputs like
# [np.array([True,True]), True, True]
return all(np.equal(value, self._data, dtype=self._data.dtype).flatten())

Check warning on line 480 in src/zarr/core/buffer/core.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/core/buffer/core.py#L480

Added line #L480 was not covered by tests
else:
# Numpy errors if you call np.isnan on custom dtypes, so ensure
# we are working with floats before calling isnan
if np.issubdtype(self._data.dtype, np.floating) and np.isnan(value):
return np.all(np.isnan(self._data))
else:
# Using == raises warnings from numpy deprecated pattern, but
# using np.equal() raises type errors for structured dtypes...
return np.all(value == self._data)

def fill(self, value: Any) -> None:
self._data.fill(value)
Expand Down
Loading