diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3bd6226922..d32f6f793c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -43,3 +43,35 @@ jobs: - name: Run Tests run: | hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run + + test-upstream-and-min-deps: + name: py=${{ matrix.python-version }}-${{ matrix.dependency-set }} + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.11', "3.13"] + dependency-set: ["upstream", "min_deps"] + exclude: + - python-version: "3.13" + dependency-set: min_deps + - python-version: "3.11" + dependency-set: upstream + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Install Hatch + run: | + python -m pip install --upgrade pip + pip install hatch + - name: Set Up Hatch Env + run: | + hatch env create ${{ matrix.dependency-set }} + hatch env run -e ${{ matrix.dependency-set }} list-env + - name: Run Tests + run: | + hatch env run --env ${{ matrix.dependency-set }} run diff --git a/.pep8speaks.yml b/.pep8speaks.yml deleted file mode 100644 index a000ded163..0000000000 --- a/.pep8speaks.yml +++ /dev/null @@ -1,4 +0,0 @@ -pycodestyle: - max-line-length: 100 - exclude: - - docs diff --git a/pyproject.toml b/pyproject.toml index 84ec8b9a5d..ef2ccd9469 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,12 +28,13 @@ requires-python = ">=3.11" dependencies = [ 'asciitree', 'numpy>=1.25', - 'numcodecs>=0.10.2', - 'fsspec>2024', - 'crc32c', - 'typing_extensions', - 'donfig', + 'numcodecs>=0.13', + 'fsspec>=2022.10.0', + 'crc32c>=2.3', + 'typing_extensions>=4.6', + 'donfig>=0.8', ] + dynamic = [ "version", ] @@ -98,7 +99,7 @@ extra = [ ] optional = [ 'lmdb', - 'universal-pathlib', + 'universal-pathlib>=0.0.22', ] [project.urls] @@ -183,6 +184,65 @@ features = ['docs'] build = "cd docs && make html" serve = "sphinx-autobuild docs docs/_build --host 0.0.0.0" +[tool.hatch.envs.upstream] +python = "3.13" +dependencies = [ + 'numpy', # from scientific-python-nightly-wheels + 'numcodecs @ git+https://github.com/zarr-developers/numcodecs', + 'fsspec @ git+https://github.com/fsspec/filesystem_spec', + 's3fs @ git+https://github.com/fsspec/s3fs', + 'universal_pathlib @ git+https://github.com/fsspec/universal_pathlib', + 'crc32c @ git+https://github.com/ICRAR/crc32c', + 'typing_extensions @ git+https://github.com/python/typing_extensions', + 'donfig @ git+https://github.com/pytroll/donfig', + # test deps + 'hypothesis', + 'pytest', + 'pytest-cov', + 'pytest-asyncio', + 'moto[s3]', +] + +[tool.hatch.envs.upstream.env-vars] +PIP_INDEX_URL = "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple/" +PIP_EXTRA_INDEX_URL = "https://pypi.org/simple/" +PIP_PRE = "1" + +[tool.hatch.envs.upstream.scripts] +run = "pytest --verbose" +run-mypy = "mypy src" +run-hypothesis = "pytest --hypothesis-profile ci tests/test_properties.py tests/test_store/test_stateful*" +list-env = "pip list" + +[tool.hatch.envs.min_deps] +description = """Test environment for minimum supported dependencies + +See Spec 0000 for details and drop schedule: https://scientific-python.org/specs/spec-0000/ +""" +python = "3.11" +dependencies = [ + 'numpy==1.25.*', + 'numcodecs==0.13.*', # 0.13 needed for? (should be 0.11) + 'fsspec==2022.10.0', + 's3fs==2022.10.0', + 'universal_pathlib==0.0.22', + 'crc32c==2.3.*', + 'typing_extensions==4.6.*', # 4.5 needed for @deprecated, 4.6 for Buffer + 'donfig==0.8.*', + # test deps + 'hypothesis', + 'pytest', + 'pytest-cov', + 'pytest-asyncio', + 'moto[s3]', +] + +[tool.hatch.envs.min_deps.scripts] +run = "pytest --verbose" +run-hypothesis = "pytest --hypothesis-profile ci tests/test_properties.py tests/test_store/test_stateful*" +list-env = "pip list" + + [tool.ruff] line-length = 100 force-exclude = true diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 2c423ff59b..680433565e 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -712,7 +712,7 @@ async def create( dtype: npt.DTypeLike | None = None, compressor: dict[str, JSON] | None = None, # TODO: default and type change fill_value: Any | None = 0, # TODO: need type - order: MemoryOrder | None = None, # TODO: default change + order: MemoryOrder | None = None, store: str | StoreLike | None = None, synchronizer: Any | None = None, overwrite: bool = False, @@ -761,6 +761,7 @@ async def create( Default value to use for uninitialized portions of the array. order : {'C', 'F'}, optional Memory layout to be used within each chunk. + Default is set in Zarr's config (`array.order`). store : Store or str Store or path to directory in file system or name of zip file. synchronizer : object, optional @@ -834,12 +835,6 @@ async def create( else: chunk_shape = shape - if order is not None: - warnings.warn( - "order is deprecated, use config `array.order` instead", - DeprecationWarning, - stacklevel=2, - ) if synchronizer is not None: warnings.warn("synchronizer is not yet implemented", RuntimeWarning, stacklevel=2) if chunk_store is not None: @@ -889,6 +884,7 @@ async def create( codecs=codecs, dimension_names=dimension_names, attributes=attributes, + order=order, **kwargs, ) diff --git a/src/zarr/codecs/zstd.py b/src/zarr/codecs/zstd.py index 949f762b20..b4a4a13c29 100644 --- a/src/zarr/codecs/zstd.py +++ b/src/zarr/codecs/zstd.py @@ -3,10 +3,11 @@ import asyncio from dataclasses import dataclass from functools import cached_property -from importlib.metadata import version from typing import TYPE_CHECKING +import numcodecs from numcodecs.zstd import Zstd +from packaging.version import Version from zarr.abc.codec import BytesBytesCodec from zarr.core.buffer.cpu import as_numpy_array_wrapper @@ -43,8 +44,8 @@ class ZstdCodec(BytesBytesCodec): def __init__(self, *, level: int = 0, checksum: bool = False) -> None: # numcodecs 0.13.0 introduces the checksum attribute for the zstd codec - _numcodecs_version = tuple(map(int, version("numcodecs").split("."))) - if _numcodecs_version < (0, 13, 0): # pragma: no cover + _numcodecs_version = Version(numcodecs.__version__) + if _numcodecs_version < Version("0.13.0"): raise RuntimeError( "numcodecs version >= 0.13.0 is required to use the zstd codec. " f"Version {_numcodecs_version} is currently installed." diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 6e3430c41a..bdafa33f67 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -33,6 +33,7 @@ ZARRAY_JSON, ZATTRS_JSON, ChunkCoords, + MemoryOrder, ShapeLike, ZarrFormat, concurrent_map, @@ -203,14 +204,14 @@ class AsyncArray(Generic[T_ArrayMetadata]): metadata: T_ArrayMetadata store_path: StorePath codec_pipeline: CodecPipeline = field(init=False) - order: Literal["C", "F"] + order: MemoryOrder @overload def __init__( self: AsyncArray[ArrayV2Metadata], metadata: ArrayV2Metadata | ArrayV2MetadataDict, store_path: StorePath, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, ) -> None: ... @overload @@ -218,14 +219,14 @@ def __init__( self: AsyncArray[ArrayV3Metadata], metadata: ArrayV3Metadata | ArrayV3MetadataDict, store_path: StorePath, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, ) -> None: ... def __init__( self, metadata: ArrayMetadata | ArrayMetadataDict, store_path: StorePath, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, ) -> None: if isinstance(metadata, dict): zarr_format = metadata["zarr_format"] @@ -261,7 +262,7 @@ async def create( attributes: dict[str, JSON] | None = None, chunks: ShapeLike | None = None, dimension_separator: Literal[".", "/"] | None = None, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: dict[str, JSON] | None = None, # runtime @@ -350,7 +351,7 @@ async def create( # v2 only chunks: ShapeLike | None = None, dimension_separator: Literal[".", "/"] | None = None, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: dict[str, JSON] | None = None, # runtime @@ -382,7 +383,7 @@ async def create( # v2 only chunks: ShapeLike | None = None, dimension_separator: Literal[".", "/"] | None = None, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: dict[str, JSON] | None = None, # runtime @@ -422,7 +423,6 @@ async def create( V2 only. V3 arrays cannot have a dimension separator. order : Literal["C", "F"], optional The order of the array (default is None). - V2 only. V3 arrays should not have 'order' parameter. filters : list[dict[str, JSON]], optional The filters used to compress the data (default is None). V2 only. V3 arrays should not have 'filters' parameter. @@ -471,10 +471,6 @@ async def create( raise ValueError( "dimension_separator cannot be used for arrays with version 3. Use chunk_key_encoding instead." ) - if order is not None: - raise ValueError( - "order cannot be used for arrays with version 3. Use a transpose codec instead." - ) if filters is not None: raise ValueError( "filters cannot be used for arrays with version 3. Use array-to-array codecs instead." @@ -494,6 +490,7 @@ async def create( dimension_names=dimension_names, attributes=attributes, exists_ok=exists_ok, + order=order, ) elif zarr_format == 2: if dtype is str or dtype == "str": @@ -545,6 +542,7 @@ async def _create_v3( dtype: npt.DTypeLike, chunk_shape: ChunkCoords, fill_value: Any | None = None, + order: MemoryOrder | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] @@ -588,7 +586,7 @@ async def _create_v3( attributes=attributes or {}, ) - array = cls(metadata=metadata, store_path=store_path) + array = cls(metadata=metadata, store_path=store_path, order=order) await array._save_metadata(metadata, ensure_parents=True) return array @@ -602,7 +600,7 @@ async def _create_v2( chunks: ChunkCoords, dimension_separator: Literal[".", "/"] | None = None, fill_value: None | float = None, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: dict[str, JSON] | None = None, attributes: dict[str, JSON] | None = None, @@ -610,8 +608,9 @@ async def _create_v2( ) -> AsyncArray[ArrayV2Metadata]: if not exists_ok: await ensure_no_existing_node(store_path, zarr_format=2) + if order is None: - order = "C" + order = parse_indexing_order(config.get("array.order")) if dimension_separator is None: dimension_separator = "." @@ -627,7 +626,7 @@ async def _create_v2( filters=filters, attributes=attributes, ) - array = cls(metadata=metadata, store_path=store_path) + array = cls(metadata=metadata, store_path=store_path, order=order) await array._save_metadata(metadata, ensure_parents=True) return array @@ -1179,7 +1178,7 @@ def create( # v2 only chunks: ChunkCoords | None = None, dimension_separator: Literal[".", "/"] | None = None, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: dict[str, JSON] | None = None, # runtime @@ -1370,7 +1369,7 @@ def store_path(self) -> StorePath: return self._async_array.store_path @property - def order(self) -> Literal["C", "F"]: + def order(self) -> MemoryOrder: return self._async_array.order @property diff --git a/src/zarr/core/array_spec.py b/src/zarr/core/array_spec.py index e84a81cb05..c4d9c363fa 100644 --- a/src/zarr/core/array_spec.py +++ b/src/zarr/core/array_spec.py @@ -1,11 +1,11 @@ from __future__ import annotations from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Literal +from typing import TYPE_CHECKING, Any import numpy as np -from zarr.core.common import parse_fill_value, parse_order, parse_shapelike +from zarr.core.common import MemoryOrder, parse_fill_value, parse_order, parse_shapelike if TYPE_CHECKING: from zarr.core.buffer import BufferPrototype @@ -17,7 +17,7 @@ class ArraySpec: shape: ChunkCoords dtype: np.dtype[Any] fill_value: Any - order: Literal["C", "F"] + order: MemoryOrder prototype: BufferPrototype def __init__( @@ -25,7 +25,7 @@ def __init__( shape: ChunkCoords, dtype: np.dtype[Any], fill_value: Any, - order: Literal["C", "F"], + order: MemoryOrder, prototype: BufferPrototype, ) -> None: shape_parsed = parse_shapelike(shape) diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py index 2e18336050..f18f2e4e8d 100644 --- a/src/zarr/core/metadata/v2.py +++ b/src/zarr/core/metadata/v2.py @@ -25,7 +25,7 @@ from zarr.core.array_spec import ArraySpec from zarr.core.chunk_grids import RegularChunkGrid from zarr.core.chunk_key_encodings import parse_separator -from zarr.core.common import ZARRAY_JSON, ZATTRS_JSON, parse_shapelike +from zarr.core.common import ZARRAY_JSON, ZATTRS_JSON, MemoryOrder, parse_shapelike from zarr.core.config import config, parse_indexing_order from zarr.core.metadata.common import parse_attributes @@ -45,7 +45,7 @@ class ArrayV2Metadata(Metadata): chunks: tuple[int, ...] dtype: np.dtype[Any] fill_value: None | int | float | str | bytes = 0 - order: Literal["C", "F"] = "C" + order: MemoryOrder = "C" filters: tuple[numcodecs.abc.Codec, ...] | None = None dimension_separator: Literal[".", "/"] = "." compressor: numcodecs.abc.Codec | None = None @@ -59,7 +59,7 @@ def __init__( dtype: npt.DTypeLike, chunks: ChunkCoords, fill_value: Any, - order: Literal["C", "F"], + order: MemoryOrder, dimension_separator: Literal[".", "/"] = ".", compressor: numcodecs.abc.Codec | dict[str, JSON] | None = None, filters: Iterable[numcodecs.abc.Codec | dict[str, JSON]] | None = None, @@ -185,7 +185,7 @@ def to_dict(self) -> dict[str, JSON]: return zarray_dict def get_chunk_spec( - self, _chunk_coords: ChunkCoords, order: Literal["C", "F"], prototype: BufferPrototype + self, _chunk_coords: ChunkCoords, order: MemoryOrder, prototype: BufferPrototype ) -> ArraySpec: return ArraySpec( shape=self.chunks, diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index e9d2f92d8a..6b6f28dd96 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -31,6 +31,7 @@ JSON, ZARR_JSON, ChunkCoords, + MemoryOrder, parse_named_configuration, parse_shapelike, ) @@ -289,7 +290,7 @@ def ndim(self) -> int: return len(self.shape) def get_chunk_spec( - self, _chunk_coords: ChunkCoords, order: Literal["C", "F"], prototype: BufferPrototype + self, _chunk_coords: ChunkCoords, order: MemoryOrder, prototype: BufferPrototype ) -> ArraySpec: assert isinstance( self.chunk_grid, RegularChunkGrid diff --git a/src/zarr/storage/remote.py b/src/zarr/storage/remote.py index 55d6d7e240..f6285c0e62 100644 --- a/src/zarr/storage/remote.py +++ b/src/zarr/storage/remote.py @@ -3,8 +3,6 @@ import warnings from typing import TYPE_CHECKING, Any, Self -import fsspec - from zarr.abc.store import ByteRangeRequest, Store from zarr.storage.common import _dereference_path @@ -158,10 +156,16 @@ def from_url( ------- RemoteStore """ + try: + from fsspec import url_to_fs + except ImportError: + # before fsspec==2024.3.1 + from fsspec.core import url_to_fs + opts = storage_options or {} opts = {"asynchronous": True, **opts} - - fs, path = fsspec.url_to_fs(url, **opts) + + fs, path = url_to_fs(url, **opts) # fsspec is not consistent about removing the scheme from the path, so check and strip it here # https://github.com/fsspec/filesystem_spec/issues/1722 diff --git a/tests/test_api.py b/tests/test_api.py index 9b7b4f8b9a..5b62e3a2fa 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -21,7 +21,7 @@ save_array, save_group, ) -from zarr.core.common import ZarrFormat +from zarr.core.common import MemoryOrder, ZarrFormat from zarr.errors import MetadataValidationError from zarr.storage._utils import normalize_path from zarr.storage.memory import MemoryStore @@ -206,6 +206,22 @@ def test_open_with_mode_w_minus(tmp_path: pathlib.Path) -> None: zarr.open(store=tmp_path, mode="w-") +@pytest.mark.parametrize("order", ["C", "F", None]) +@pytest.mark.parametrize("zarr_format", [2, 3]) +def test_array_order(order: MemoryOrder | None, zarr_format: ZarrFormat) -> None: + arr = zarr.ones(shape=(2, 2), order=order, zarr_format=zarr_format) + expected = order or zarr.config.get("array.order") + assert arr.order == expected + + vals = np.asarray(arr) + if expected == "C": + assert vals.flags.c_contiguous + elif expected == "F": + assert vals.flags.f_contiguous + else: + raise AssertionError + + # def test_lazy_loader(): # foo = np.arange(100) # bar = np.arange(100, 0, -1) diff --git a/tests/test_array.py b/tests/test_array.py index 829a04d304..f182cb1a14 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -10,7 +10,7 @@ from zarr.codecs import BytesCodec, VLenBytesCodec from zarr.core.array import chunks_initialized from zarr.core.buffer.cpu import NDBuffer -from zarr.core.common import JSON, ZarrFormat +from zarr.core.common import JSON, MemoryOrder, ZarrFormat from zarr.core.group import AsyncGroup from zarr.core.indexing import ceildiv from zarr.core.sync import sync @@ -417,3 +417,22 @@ def test_update_attrs(zarr_format: int) -> None: arr2 = zarr.open_array(store=store, zarr_format=zarr_format) assert arr2.attrs["foo"] == "bar" + + +@pytest.mark.parametrize("order", ["C", "F", None]) +@pytest.mark.parametrize("zarr_format", [2, 3]) +@pytest.mark.parametrize("store", ["memory"], indirect=True) +def test_array_create_order( + order: MemoryOrder | None, zarr_format: int, store: MemoryStore +) -> None: + arr = Array.create(store=store, shape=(2, 2), order=order, zarr_format=zarr_format, dtype="i4") + expected = order or zarr.config.get("array.order") + assert arr.order == expected + + vals = np.asarray(arr) + if expected == "C": + assert vals.flags.c_contiguous + elif expected == "F": + assert vals.flags.f_contiguous + else: + raise AssertionError