Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
c2ac57b
chore(deps): bump zarrs to 0.18.0
LDeakin Nov 27, 2024
4074328
feat: add HTTP store support
LDeakin Nov 27, 2024
2e8599c
update stubs
LDeakin Nov 27, 2024
f61183e
fix: disallow storage_options for HTTP store
LDeakin Nov 28, 2024
3f2d077
add another todo for store info on codec init
LDeakin Nov 28, 2024
bf4e365
Merge remote-tracking branch 'origin/main' into ld/http_store
LDeakin Nov 29, 2024
390f23b
Merge branch 'main' into ld/http_store
LDeakin Dec 2, 2024
3244bee
refactor store initialisation
LDeakin Dec 6, 2024
95ba17b
try build with latest ring
LDeakin Dec 6, 2024
fa4fca5
try add nasm
LDeakin Dec 6, 2024
38636fd
Revert "try add nasm"
LDeakin Dec 6, 2024
3bf21da
Revert "try build with latest ring"
LDeakin Dec 6, 2024
1bae158
CD: bump maturin to 1.7.8
LDeakin Dec 7, 2024
11f4c87
set python architecture for windows aarch64
LDeakin Dec 7, 2024
4020a7c
Revert "set python architecture for windows aarch64"
LDeakin Dec 7, 2024
08476b3
Set MATURIN_USE_XWIN=1 on windows aarch64
LDeakin Dec 7, 2024
56d69c6
Exclude windows aarch64
LDeakin Dec 7, 2024
96c1a24
fix: store/str type hints
LDeakin Dec 9, 2024
d57e037
fix: get_store to get_store_from_config
LDeakin Dec 9, 2024
86883c5
fix: use match in StoreConfig::extract_bound
LDeakin Dec 9, 2024
5e80adf
fix: elide 'py lifetimes in HTTPStoreConfig
LDeakin Dec 9, 2024
31e9101
fix: remove unneeded return statement
LDeakin Dec 9, 2024
60e867b
fix: address clippy::upper_case_acronyms
LDeakin Dec 9, 2024
63fa081
fix: address clippy::single_match_else
LDeakin Dec 9, 2024
301f6d5
Revert "CD: bump maturin to 1.7.8"
LDeakin Dec 9, 2024
190becd
fix: match on remote store name in StoreConfig::extract_bound
LDeakin Dec 9, 2024
be244e1
fix: remove MATURIN_USE_XWIN
LDeakin Dec 9, 2024
d1e3405
fix: remove zarrs config setup in http tests
LDeakin Dec 9, 2024
dd3ac5b
Merge branch 'main' into ld/http_store
flying-sheep Dec 10, 2024
71f1698
fix: TryFrom<&StoreConfig> for store
LDeakin Dec 10, 2024
0643735
fix: add StoreConfig super class
LDeakin Dec 10, 2024
94806cf
fix: convert local store root to string
LDeakin Dec 10, 2024
9b70af1
fix: change unsupported store to NotImplementedError
LDeakin Dec 10, 2024
0a3bf0f
fix: add docs for HTTP store
LDeakin Dec 10, 2024
6e42799
refactor: move store logic into store module
LDeakin Dec 10, 2024
db7c622
refactor: remove CodecPipelineStore
LDeakin Dec 10, 2024
c288bda
fix: return store directly from opendal_builder_to_sync_store
LDeakin Dec 10, 2024
428710a
fix: use zarrs::ReadableWritableListableStorage
LDeakin Dec 10, 2024
e0704dc
Merge branch 'main' into ld/http_store
LDeakin Dec 10, 2024
fd0c6b4
fix(docs): clarify NotImplementedError
LDeakin Dec 11, 2024
78d9642
Revert store enum changes
flying-sheep Dec 12, 2024
028826e
fmt
flying-sheep Dec 12, 2024
97f9428
update stubs
flying-sheep Dec 12, 2024
41ee02e
wtf
flying-sheep Dec 12, 2024
5873e2a
fix: http store root to endpoint
LDeakin Dec 12, 2024
223c57d
Instantiate Rust containers from Python (#72)
flying-sheep Dec 14, 2024
014b04f
Abstract away store manager
flying-sheep Dec 14, 2024
2296ca1
fix(deps): constrain zarr<=3.0.0b3
LDeakin Dec 15, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ jobs:
- { os: linux, manylinux: musllinux_1_2, target: armv7 }
# windows
- { os: windows, target: i686, python-architecture: x86 }
exclude:
# https://github.com/rust-cross/cargo-xwin/issues/76
- os: windows
target: aarch64
runs-on: ${{ (matrix.os == 'linux' && 'ubuntu') || matrix.os }}-latest
steps:
- uses: actions/checkout@v4
Expand Down
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ numpy = "0.23.0"
unsafe_cell_slice = "0.2.0"
serde_json = "1.0.128"
pyo3-stub-gen = "0.6.1"
opendal = { version = "0.50.2", features = ["services-http"] }
tokio = { version = "1.41.1", features = ["rt-multi-thread"] }
zarrs_opendal = "0.4.0"

[profile.release]
lto = true
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,14 @@ You can then use your `zarr` as normal (with some caveats)!

We export a `ZarrsCodecPipeline` class so that `zarr-python` can use the class but it is not meant to be instantiated and we do not guarantee the stability of its API beyond what is required so that `zarr-python` can use it. Therefore, it is not documented here. We also export two errors, `DiscontiguousArrayError` and `CollapsedDimensionError` that can be thrown in the process of converting to indexers that `zarrs` can understand (see below for more details).

At the moment, we only support local filesystems but intend to support more in the future: https://github.com/ilan-gold/zarrs-python/issues/44
At the moment, we only support a subset of the `zarr-python` stores:

- [x] [LocalStore](https://zarr.readthedocs.io/en/main/_autoapi/zarr/storage/local/index.html) (FileSystem)
- [RemoteStore](https://zarr.readthedocs.io/en/main/_autoapi/zarr/storage/remote/index.html)
- [x] [HTTPFileSystem](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.implementations.http.HTTPFileSystem)

A `NotImplementedError` will be raised if a store is not supported.
We intend to support more stores in the future: https://github.com/ilan-gold/zarrs-python/issues/44.

### Configuration

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ dependencies = [
'donfig',
'pytest',
'universal_pathlib>=0.2.0',
'zarr>=3.0.0b2',
'zarr>=3.0.0b2,<=3.0.0b3',
]

[project.optional-dependencies]
Expand Down
46 changes: 28 additions & 18 deletions python/zarrs/_internal.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@
# ruff: noqa: E501, F401

import typing
from enum import Enum, auto

import numpy
import numpy.typing

class Basic:
def __new__(cls, byte_interface: typing.Any, chunk_spec: typing.Any): ...
...

class CodecPipelineImpl:
def __new__(
cls,
Expand All @@ -19,29 +24,34 @@ class CodecPipelineImpl:
): ...
def retrieve_chunks_and_apply_index(
self,
chunk_descriptions: typing.Sequence[
tuple[
tuple[str, typing.Sequence[int], str, typing.Sequence[int]],
typing.Sequence[slice],
typing.Sequence[slice],
]
],
chunk_descriptions: typing.Sequence[WithSubset],
value: numpy.NDArray[typing.Any],
) -> None: ...
def retrieve_chunks(
self,
chunk_descriptions: typing.Sequence[
tuple[str, typing.Sequence[int], str, typing.Sequence[int]]
],
self, chunk_descriptions: typing.Sequence[Basic]
) -> list[numpy.typing.NDArray[numpy.uint8]]: ...
def store_chunks_with_indices(
self,
chunk_descriptions: typing.Sequence[
tuple[
tuple[str, typing.Sequence[int], str, typing.Sequence[int]],
typing.Sequence[slice],
typing.Sequence[slice],
]
],
chunk_descriptions: typing.Sequence[WithSubset],
value: numpy.NDArray[typing.Any],
) -> None: ...

class FilesystemStoreConfig:
root: str

class HttpStoreConfig:
endpoint: str

class WithSubset:
def __new__(
cls,
item: Basic,
chunk_subset: typing.Sequence[slice],
subset: typing.Sequence[slice],
shape: typing.Sequence[int],
): ...
...

class StoreConfig(Enum):
Filesystem = auto()
Http = auto()
38 changes: 16 additions & 22 deletions python/zarrs/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@
from typing import TYPE_CHECKING, TypedDict

import numpy as np
from zarr.abc.codec import (
Codec,
CodecPipeline,
)
from zarr.abc.codec import Codec, CodecPipeline
from zarr.core.config import config

if TYPE_CHECKING:
Expand All @@ -18,7 +15,7 @@

from zarr.abc.store import ByteGetter, ByteSetter
from zarr.core.array_spec import ArraySpec
from zarr.core.buffer import Buffer, NDBuffer
from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
from zarr.core.chunk_grids import ChunkGrid
from zarr.core.common import ChunkCoords
from zarr.core.indexing import SelectorTuple
Expand Down Expand Up @@ -120,30 +117,28 @@ async def read(
batch_info: Iterable[
tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple]
],
out: NDBuffer,
out: NDBuffer, # type: ignore
drop_axes: tuple[int, ...] = (), # FIXME: unused
) -> None:
out = out.as_ndarray_like() # FIXME: Error if array is not in host memory
# FIXME: Error if array is not in host memory
out: NDArrayLike = out.as_ndarray_like()
if not out.dtype.isnative:
raise RuntimeError("Non-native byte order not supported")
try:
chunks_desc = make_chunk_info_for_rust_with_indices(batch_info, drop_axes)
index_in_rust = True
chunks_desc = make_chunk_info_for_rust_with_indices(
batch_info, drop_axes, out.shape
)
except (DiscontiguousArrayError, CollapsedDimensionError):
chunks_desc = make_chunk_info_for_rust(batch_info)
index_in_rust = False
if index_in_rust:
else:
await asyncio.to_thread(
self.impl.retrieve_chunks_and_apply_index,
chunks_desc,
out,
)
return None
chunks = await asyncio.to_thread(self.impl.retrieve_chunks, chunks_desc)
for chunk, chunk_info in zip(chunks, batch_info):
out_selection = chunk_info[3]
selection = chunk_info[2]
spec = chunk_info[1]
for chunk, (_, spec, selection, out_selection) in zip(chunks, batch_info):
chunk_reshaped = chunk.view(spec.dtype).reshape(spec.shape)
chunk_selected = chunk_reshaped[selection]
if drop_axes:
Expand All @@ -155,18 +150,17 @@ async def write(
batch_info: Iterable[
tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
],
value: NDBuffer,
value: NDBuffer, # type: ignore
drop_axes: tuple[int, ...] = (),
) -> None:
value = value.as_ndarray_like() # FIXME: Error if array is not in host memory
# FIXME: Error if array is not in host memory
value: NDArrayLike | np.ndarray = value.as_ndarray_like()
if not value.dtype.isnative:
value = np.ascontiguousarray(value, dtype=value.dtype.newbyteorder("="))
elif not value.flags.c_contiguous:
value = np.ascontiguousarray(value)
chunks_desc = make_chunk_info_for_rust_with_indices(batch_info, drop_axes)
await asyncio.to_thread(
self.impl.store_chunks_with_indices,
chunks_desc,
value,
chunks_desc = make_chunk_info_for_rust_with_indices(
batch_info, drop_axes, value.shape
)
await asyncio.to_thread(self.impl.store_chunks_with_indices, chunks_desc, value)
return None
41 changes: 19 additions & 22 deletions python/zarrs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,19 @@
import operator
import os
from functools import reduce
from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING

import numpy as np
from zarr.core.indexing import SelectorTuple, is_integer

from zarrs._internal import Basic, WithSubset

if TYPE_CHECKING:
from collections.abc import Iterable
from types import EllipsisType

from zarr.abc.store import ByteGetter, ByteSetter
from zarr.core.array_spec import ArraySpec
from zarr.core.common import ChunkCoords


# adapted from https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
Expand Down Expand Up @@ -62,17 +63,6 @@ def selector_tuple_to_slice_selection(selector_tuple: SelectorTuple) -> list[sli
return make_slice_selection(selector_tuple)


def convert_chunk_to_primitive(
byte_getter: ByteGetter | ByteSetter, chunk_spec: ArraySpec
) -> tuple[str, ChunkCoords, str, Any]:
return (
str(byte_getter),
chunk_spec.shape,
str(chunk_spec.dtype),
chunk_spec.fill_value.tobytes(),
)


def resulting_shape_from_index(
array_shape: tuple[int, ...],
index_tuple: tuple[int | slice | EllipsisType | np.ndarray],
Expand Down Expand Up @@ -149,10 +139,12 @@ def make_chunk_info_for_rust_with_indices(
tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
],
drop_axes: tuple[int, ...],
) -> list[tuple[tuple[str, ChunkCoords, str, Any], list[slice], list[slice]]]:
chunk_info_with_indices = []
shape: tuple[int, ...],
) -> list[WithSubset]:
shape = shape if shape else (1,) # constant array
chunk_info_with_indices: list[WithSubset] = []
for byte_getter, chunk_spec, chunk_selection, out_selection in batch_info:
chunk_info = convert_chunk_to_primitive(byte_getter, chunk_spec)
chunk_info = Basic(byte_getter, chunk_spec)
out_selection_as_slices = selector_tuple_to_slice_selection(out_selection)
chunk_selection_as_slices = selector_tuple_to_slice_selection(chunk_selection)
shape_chunk_selection_slices = get_shape_for_selector(
Expand All @@ -169,7 +161,12 @@ def make_chunk_info_for_rust_with_indices(
f"{shape_chunk_selection} != {shape_chunk_selection_slices}"
)
chunk_info_with_indices.append(
(chunk_info, out_selection_as_slices, chunk_selection_as_slices)
WithSubset(
chunk_info,
chunk_subset=chunk_selection_as_slices,
subset=out_selection_as_slices,
shape=shape,
)
)
return chunk_info_with_indices

Expand All @@ -178,8 +175,8 @@ def make_chunk_info_for_rust(
batch_info: Iterable[
tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
],
) -> list[tuple[str, ChunkCoords, str, Any]]:
return list(
convert_chunk_to_primitive(byte_getter, chunk_spec)
for (byte_getter, chunk_spec, _, _) in batch_info
)
) -> list[Basic]:
return [
Basic(byte_interface, chunk_spec)
for (byte_interface, chunk_spec, _, _) in batch_info
]
Loading
Loading