Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(feat): fall back to pure python indexing in case of unhandled rust indexing for read #30

Merged
merged 27 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
afd0a37
(chore): file structure
ilan-gold Nov 7, 2024
4ceba6c
Merge branch 'ld/codec_pipeline' into ig/refactoring
ilan-gold Nov 7, 2024
2af41ec
(chore): parametrize tests to get full scope of possibilities
ilan-gold Nov 7, 2024
db97bd4
Merge branch 'ld/codec_pipeline' into ig/test_full_indexing
ilan-gold Nov 8, 2024
c945e88
(chore): xfail tests that fail on zarr-python default pipeline
ilan-gold Nov 8, 2024
0bb8dbc
(fix) singular
ilan-gold Nov 8, 2024
3f176bf
(fix): check for contiguous index arrays
ilan-gold Nov 8, 2024
ff44774
(fix): contiguous numpy arrays converted to slices
ilan-gold Nov 8, 2024
f81a1c8
(feat): add reading for non-contiguous buffers
ilan-gold Nov 8, 2024
42de375
(chore): remove unused imports
ilan-gold Nov 8, 2024
8b60bed
(fix): cleanup unwraps in `retrieve_chunks`
LDeakin Nov 8, 2024
c35766c
Merge branch 'ld/codec_pipeline' into ig/test_full_indexing
flying-sheep Nov 9, 2024
16262fe
Refactor full indexing (#34)
flying-sheep Nov 11, 2024
96d8a2e
(chore): `make_chunk_info_for_rust` cleanup
ilan-gold Nov 11, 2024
fbfa958
(fix): all tests working except "tests/test_pipeline.py::test_roundtr…
ilan-gold Nov 11, 2024
9f90b5e
(fix): skip read in `store_chunk_subset_bytes` for full chunks
LDeakin Nov 11, 2024
deba69e
(fix): improve dropped index detection + disallow integer write case
ilan-gold Nov 12, 2024
0de267f
(chore): message more specific
ilan-gold Nov 12, 2024
e53d4a7
(fix): use `Exception`
ilan-gold Nov 14, 2024
45decd5
(chore): erroneous comment
ilan-gold Nov 14, 2024
68b80e5
(chore): `drop_axes` default
ilan-gold Nov 14, 2024
147ac56
(chore): `drop_axes` param
ilan-gold Nov 14, 2024
f844224
(chore): apply review
ilan-gold Nov 14, 2024
7aeff55
(chore): `else` branch
ilan-gold Nov 14, 2024
639f77b
(chore): add basic nd tests (#35)
ilan-gold Nov 14, 2024
766cf41
(fix): clarify collapsed dimension behavior
ilan-gold Nov 14, 2024
61a4d4b
(chore): clean ups
ilan-gold Nov 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions python/zarrs_python/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

from ._internal import CodecPipelineImpl
from .utils import (
CollapsedDimensionError,
DiscontiguousArrayError,
get_max_threads,
make_chunk_info_for_rust,
Expand Down Expand Up @@ -100,9 +101,9 @@ async def read(
if not out.dtype.isnative:
raise RuntimeError("Non-native byte order not supported")
try:
chunks_desc = make_chunk_info_for_rust_with_indices(batch_info)
chunks_desc = make_chunk_info_for_rust_with_indices(batch_info, drop_axes)
index_in_rust = True
except DiscontiguousArrayError:
except (DiscontiguousArrayError, CollapsedDimensionError):
chunks_desc = make_chunk_info_for_rust(batch_info)
index_in_rust = False
if index_in_rust:
Expand Down Expand Up @@ -142,7 +143,7 @@ async def write(
value = np.ascontiguousarray(value, dtype=value.dtype.newbyteorder("="))
elif not value.flags.c_contiguous:
value = np.ascontiguousarray(value)
chunks_desc = make_chunk_info_for_rust_with_indices(batch_info)
chunks_desc = make_chunk_info_for_rust_with_indices(batch_info, drop_axes)
await asyncio.to_thread(
self.impl.store_chunks_with_indices,
chunks_desc,
Expand Down
93 changes: 92 additions & 1 deletion python/zarrs_python/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

if TYPE_CHECKING:
from collections.abc import Iterable
from types import EllipsisType

from zarr.abc.store import ByteGetter, ByteSetter
from zarr.core.array_spec import ArraySpec
Expand All @@ -23,7 +24,11 @@ class DiscontiguousArrayError(BaseException):
pass


# This is a copy of the function from zarr.core.indexing that fixes:
class CollapsedDimensionError(BaseException):
ilan-gold marked this conversation as resolved.
Show resolved Hide resolved
pass


# This is a (mostly) copy of the function from zarr.core.indexing that fixes:
# DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated
# TODO: Upstream this fix
def make_slice_selection(selection: tuple[np.ndarray | float]) -> list[slice]:
Expand All @@ -44,6 +49,14 @@ def make_slice_selection(selection: tuple[np.ndarray | float]) -> list[slice]:
ls.append(slice(dim_selection[0], dim_selection[-1] + 1, 1))
else:
ls.append(dim_selection)
if (
sum(isinstance(dim_selection, np.ndarray) for dim_selection in selection)
== sum(isinstance(dim_selection, slice) for dim_selection in selection)
== len(selection)
):
raise DiscontiguousArrayError(
"Vindexing with multiple contiguous numpy arrays is not supported"
)
return ls


Expand All @@ -66,11 +79,89 @@ def convert_chunk_to_primitive(
)


def resulting_shape_from_index(
array_shape: tuple[int, ...],
index_tuple: tuple[int | slice | EllipsisType | np.ndarray],
drop_axes: tuple[int, ...],
*,
pad: bool,
) -> tuple[int, ...]:
result_shape = []
advanced_index_shapes = [
idx.shape for idx in index_tuple if isinstance(idx, np.ndarray)
]
basic_shape_index = 0

# Broadcast all advanced indices, if any
if advanced_index_shapes:
broadcasted_shape = np.broadcast_shapes(*advanced_index_shapes)
result_shape.extend(broadcasted_shape)
basic_shape_index += len(
advanced_index_shapes
) # Consume dimensions from array_shape
ilan-gold marked this conversation as resolved.
Show resolved Hide resolved

# Process each remaining index in index_tuple
for idx in index_tuple:
if isinstance(idx, int):
# Integer index reduces dimension, so skip this dimension in array_shape
basic_shape_index += 1
elif isinstance(idx, slice):
if idx.step is not None and idx.step > 1:
raise DiscontiguousArrayError(
"Step size greater than 1 is not supported"
)
# Slice keeps dimension, adjust size accordingly
start, stop, _ = idx.indices(array_shape[basic_shape_index])
result_shape.append(stop - start)
basic_shape_index += 1
elif idx is Ellipsis:
# Calculate number of dimensions that Ellipsis should fill
num_to_fill = len(array_shape) - len(index_tuple) + 1
result_shape.extend(
array_shape[basic_shape_index : basic_shape_index + num_to_fill]
)
basic_shape_index += num_to_fill
flying-sheep marked this conversation as resolved.
Show resolved Hide resolved

# Step 4: Append remaining dimensions from array_shape if fewer indices were used
if basic_shape_index < len(array_shape) and pad:
result_shape.extend(array_shape[basic_shape_index:])
ilan-gold marked this conversation as resolved.
Show resolved Hide resolved

return tuple(size for idx, size in enumerate(result_shape) if idx not in drop_axes)


def get_shape_for_selector(
selector_tuple: SelectorTuple,
shape: tuple[int, ...],
drop_axes: tuple[int, ...],
*,
pad: bool,
) -> tuple[int, ...]:
if isinstance(selector_tuple, slice | np.ndarray):
return resulting_shape_from_index(
shape,
(selector_tuple,),
drop_axes,
pad=pad,
)
return resulting_shape_from_index(shape, selector_tuple, drop_axes, pad=pad)


def make_chunk_info_for_rust_with_indices(
batch_info: Iterable[
tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
],
drop_axes: tuple,
ilan-gold marked this conversation as resolved.
Show resolved Hide resolved
) -> list[tuple[tuple[str, ChunkCoords, str, Any], list[slice], list[slice]]]:
# all?
ilan-gold marked this conversation as resolved.
Show resolved Hide resolved
for _, chunk_spec, chunk_selection, out_selection in batch_info:
shape_out_selection = get_shape_for_selector(
out_selection, chunk_spec.shape, (), pad=False
ilan-gold marked this conversation as resolved.
Show resolved Hide resolved
)
shape_chunk_selection = get_shape_for_selector(
chunk_selection, chunk_spec.shape, drop_axes, pad=True
)
if len(shape_chunk_selection) != len(shape_out_selection):
raise CollapsedDimensionError()
return list(
(
convert_chunk_to_primitive(byte_getter, chunk_spec),
Expand Down
9 changes: 9 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,14 @@ def array_fixture(request: pytest.FixtureRequest) -> npt.NDArray[Any]:
"test_roundtrip[vindex-discontinuous_in_chunk_array-contiguous_in_chunk_array]",
]

# vindexing with two contiguous arrays would be converted to two slices but
# in numpy indexing actually requires dropping a dimension, which in turn boils
# down to integer indexing, which we can't do i.e., [np.array(1, 2), np.array(1, 2)] -> [slice(1, 2), slice(1, 2)]
flying-sheep marked this conversation as resolved.
Show resolved Hide resolved
# is not a correct conversion, and thus we don't support the write operation
zarrs_python_no_collapsed_dim = [
"test_roundtrip[vindex-contiguous_in_chunk_array-contiguous_in_chunk_array]"
]


def pytest_collection_modifyitems(
config: pytest.Config, items: Iterable[pytest.Item]
Expand All @@ -128,5 +136,6 @@ def pytest_collection_modifyitems(
item.name
in zarr_python_default_codec_pipeline_failures
+ zarrs_python_no_discontinuous_writes
+ zarrs_python_no_collapsed_dim
):
item.add_marker(xfail_marker)
Loading