diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 68673f1295..18c232ae40 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -176,7 +176,7 @@ print some diagnostics, e.g.:: Read-only : False Compressor : Blosc(cname='zstd', clevel=3, shuffle=BITSHUFFLE, : blocksize=0) - Store type : builtins.dict + Store type : zarr.storage.KVStore No. bytes : 400000000 (381.5M) No. bytes stored : 3379344 (3.2M) Storage ratio : 118.4 @@ -268,7 +268,7 @@ Here is an example using a delta filter with the Blosc compressor:: Read-only : False Filter [0] : Delta(dtype='>> z[:] array([b'H', b'e', b'l', b'l', b'o', b' ', b'f', b'r', b'o', b'm', b' ', @@ -1274,7 +1276,7 @@ ratios, depending on the correlation structure within the data. E.g.:: Order : C Read-only : False Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - Store type : builtins.dict + Store type : zarr.storage.KVStore No. bytes : 400000000 (381.5M) No. bytes stored : 6696010 (6.4M) Storage ratio : 59.7 @@ -1288,7 +1290,7 @@ ratios, depending on the correlation structure within the data. E.g.:: Order : F Read-only : False Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - Store type : builtins.dict + Store type : zarr.storage.KVStore No. bytes : 400000000 (381.5M) No. bytes stored : 4684636 (4.5M) Storage ratio : 85.4 diff --git a/mypy.ini b/mypy.ini index cb3c188f47..7c1be49cd6 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,4 +1,4 @@ [mypy] -python_version = 3.7 +python_version = 3.8 ignore_missing_imports = True follow_imports = silent diff --git a/pytest.ini b/pytest.ini index 61a0a99ab5..8e3c0adb22 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,4 +3,6 @@ doctest_optionflags = NORMALIZE_WHITESPACE ELLIPSIS IGNORE_EXCEPTION_DETAIL addopts = --durations=10 filterwarnings = error::DeprecationWarning:zarr.* + error::UserWarning:zarr.* ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning + ignore:The loop argument is deprecated since Python 3.8.*:DeprecationWarning diff --git a/zarr/_storage/absstore.py b/zarr/_storage/absstore.py index 0dc5bf1892..01bfbd5039 100644 --- a/zarr/_storage/absstore.py +++ b/zarr/_storage/absstore.py @@ -1,16 +1,16 @@ """This module contains storage classes related to Azure Blob Storage (ABS)""" import warnings -from collections.abc import MutableMapping from numcodecs.compat import ensure_bytes from zarr.util import normalize_storage_path +from zarr._storage.store import Store __doctest_requires__ = { ('ABSStore', 'ABSStore.*'): ['azure.storage.blob'], } -class ABSStore(MutableMapping): +class ABSStore(Store): """Storage class using Azure Blob Storage (ABS). Parameters diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py new file mode 100644 index 0000000000..a779a4e26a --- /dev/null +++ b/zarr/_storage/store.py @@ -0,0 +1,166 @@ +from collections.abc import MutableMapping +from typing import Any, List, Optional, Union + +from zarr.util import normalize_storage_path + +# v2 store keys +array_meta_key = '.zarray' +group_meta_key = '.zgroup' +attrs_key = '.zattrs' + + +class BaseStore(MutableMapping): + """Abstract base class for store implementations. + + This is a thin wrapper over MutableMapping that provides methods to check + whether a store is readable, writeable, eraseable and or listable. + + Stores cannot be mutable mapping as they do have a couple of other + requirements that would break Liskov substitution principle (stores only + allow strings as keys, mutable mapping are more generic). + + Having no-op base method also helps simplifying store usage and do not need + to check the presence of attributes and methods, like `close()`. + + Stores can be used as context manager to make sure they close on exit. + + .. added: 2.11.0 + + """ + + _readable = True + _writeable = True + _erasable = True + _listable = True + + def is_readable(self): + return self._readable + + def is_writeable(self): + return self._writeable + + def is_listable(self): + return self._listable + + def is_erasable(self): + return self._erasable + + def __enter__(self): + if not hasattr(self, "_open_count"): + self._open_count = 0 + self._open_count += 1 + return self + + def __exit__(self, exc_type, exc_value, traceback): + self._open_count -= 1 + if self._open_count == 0: + self.close() + + def close(self) -> None: + """Do nothing by default""" + pass + + def rename(self, src_path: str, dst_path: str) -> None: + if not self.is_erasable(): + raise NotImplementedError( + f'{type(self)} is not erasable, cannot call "rename"' + ) # pragma: no cover + _rename_from_keys(self, src_path, dst_path) + + @staticmethod + def _ensure_store(store: Any): + """ + We want to make sure internally that zarr stores are always a class + with a specific interface derived from ``BaseStore``, which is slightly + different than ``MutableMapping``. + + We'll do this conversion in a few places automatically + """ + from zarr.storage import KVStore # avoid circular import + + if store is None: + return None + elif isinstance(store, BaseStore): + return store + elif isinstance(store, MutableMapping): + return KVStore(store) + else: + for attr in [ + "keys", + "values", + "get", + "__setitem__", + "__getitem__", + "__delitem__", + "__contains__", + ]: + if not hasattr(store, attr): + break + else: + return KVStore(store) + + raise ValueError( + "Starting with Zarr 2.11.0, stores must be subclasses of " + "BaseStore, if your store exposes the MutableMapping interface " + f"wrap it in Zarr.storage.KVStore. Got {store}" + ) + + +class Store(BaseStore): + """Abstract store class used by implementations following the Zarr v2 spec. + + Adds public `listdir`, `rename`, and `rmdir` methods on top of BaseStore. + + .. added: 2.11.0 + + """ + def listdir(self, path: str = "") -> List[str]: + path = normalize_storage_path(path) + return _listdir_from_keys(self, path) + + def rmdir(self, path: str = "") -> None: + if not self.is_erasable(): + raise NotImplementedError( + f'{type(self)} is not erasable, cannot call "rmdir"' + ) # pragma: no cover + path = normalize_storage_path(path) + _rmdir_from_keys(self, path) + + +def _path_to_prefix(path: Optional[str]) -> str: + # assume path already normalized + if path: + prefix = path + '/' + else: + prefix = '' + return prefix + + +def _rename_from_keys(store: BaseStore, src_path: str, dst_path: str) -> None: + # assume path already normalized + src_prefix = _path_to_prefix(src_path) + dst_prefix = _path_to_prefix(dst_path) + for key in list(store.keys()): + if key.startswith(src_prefix): + new_key = dst_prefix + key.lstrip(src_prefix) + store[new_key] = store.pop(key) + + +def _rmdir_from_keys(store: Union[BaseStore, MutableMapping], path: Optional[str] = None) -> None: + # assume path already normalized + prefix = _path_to_prefix(path) + for key in list(store.keys()): + if key.startswith(prefix): + del store[key] + + +def _listdir_from_keys(store: BaseStore, path: Optional[str] = None) -> List[str]: + # assume path already normalized + prefix = _path_to_prefix(path) + children = set() + for key in list(store.keys()): + if key.startswith(prefix) and len(key) > len(prefix): + suffix = key[len(prefix):] + child = suffix.split('/')[0] + children.add(child) + return sorted(children) diff --git a/zarr/attrs.py b/zarr/attrs.py index ea6b831608..ec01dbe04f 100644 --- a/zarr/attrs.py +++ b/zarr/attrs.py @@ -1,6 +1,7 @@ from collections.abc import MutableMapping from zarr.meta import parse_metadata +from zarr._storage.store import Store from zarr.util import json_dumps @@ -26,7 +27,7 @@ class Attributes(MutableMapping): def __init__(self, store, key='.zattrs', read_only=False, cache=True, synchronizer=None): - self.store = store + self.store = Store._ensure_store(store) self.key = key self.read_only = read_only self.cache = cache diff --git a/zarr/convenience.py b/zarr/convenience.py index 80cf7fffd4..18b59a77b2 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -3,7 +3,7 @@ import itertools import os import re -from collections.abc import Mapping +from collections.abc import Mapping, MutableMapping from zarr.core import Array from zarr.creation import array as _create_array @@ -13,17 +13,21 @@ from zarr.hierarchy import group as _create_group from zarr.hierarchy import open_group from zarr.meta import json_dumps, json_loads -from zarr.storage import contains_array, contains_group +from zarr.storage import contains_array, contains_group, BaseStore from zarr.util import TreeViewer, buffer_size, normalize_storage_path +from typing import Union + +StoreLike = Union[BaseStore, MutableMapping, str, None] + # noinspection PyShadowingBuiltins -def open(store=None, mode='a', **kwargs): +def open(store: StoreLike = None, mode: str = "a", **kwargs): """Convenience function to open a group or array using file-mode-like semantics. Parameters ---------- - store : MutableMapping or string, optional + store : Store or string, optional Store or path to directory in file system or name of zip file. mode : {'r', 'r+', 'a', 'w', 'w-'}, optional Persistence mode: 'r' means read only (must exist); 'r+' means @@ -76,27 +80,28 @@ def open(store=None, mode='a', **kwargs): clobber = mode == 'w' # we pass storage options explicitly, since normalize_store_arg might construct # a store if the input is a fsspec-compatible URL - store = normalize_store_arg(store, clobber=clobber, - storage_options=kwargs.pop("storage_options", {})) + _store: BaseStore = normalize_store_arg( + store, clobber=clobber, storage_options=kwargs.pop("storage_options", {}) + ) path = normalize_storage_path(path) if mode in {'w', 'w-', 'x'}: if 'shape' in kwargs: - return open_array(store, mode=mode, **kwargs) + return open_array(_store, mode=mode, **kwargs) else: - return open_group(store, mode=mode, **kwargs) + return open_group(_store, mode=mode, **kwargs) elif mode == "a": - if "shape" in kwargs or contains_array(store, path): - return open_array(store, mode=mode, **kwargs) + if "shape" in kwargs or contains_array(_store, path): + return open_array(_store, mode=mode, **kwargs) else: - return open_group(store, mode=mode, **kwargs) + return open_group(_store, mode=mode, **kwargs) else: - if contains_array(store, path): - return open_array(store, mode=mode, **kwargs) - elif contains_group(store, path): - return open_group(store, mode=mode, **kwargs) + if contains_array(_store, path): + return open_array(_store, mode=mode, **kwargs) + elif contains_group(_store, path): + return open_group(_store, mode=mode, **kwargs) else: raise PathNotFoundError(path) @@ -105,7 +110,7 @@ def _might_close(path): return isinstance(path, (str, os.PathLike)) -def save_array(store, arr, **kwargs): +def save_array(store: StoreLike, arr, **kwargs): """Convenience function to save a NumPy array to the local file system, following a similar API to the NumPy save() function. @@ -137,16 +142,16 @@ def save_array(store, arr, **kwargs): """ may_need_closing = _might_close(store) - store = normalize_store_arg(store, clobber=True) + _store: BaseStore = normalize_store_arg(store, clobber=True) try: - _create_array(arr, store=store, overwrite=True, **kwargs) + _create_array(arr, store=_store, overwrite=True, **kwargs) finally: - if may_need_closing and hasattr(store, 'close'): + if may_need_closing: # needed to ensure zip file records are written - store.close() + _store.close() -def save_group(store, *args, **kwargs): +def save_group(store: StoreLike, *args, **kwargs): """Convenience function to save several NumPy arrays to the local file system, following a similar API to the NumPy savez()/savez_compressed() functions. @@ -208,21 +213,21 @@ def save_group(store, *args, **kwargs): raise ValueError('at least one array must be provided') # handle polymorphic store arg may_need_closing = _might_close(store) - store = normalize_store_arg(store, clobber=True) + _store: BaseStore = normalize_store_arg(store, clobber=True) try: - grp = _create_group(store, overwrite=True) + grp = _create_group(_store, overwrite=True) for i, arr in enumerate(args): k = 'arr_{}'.format(i) grp.create_dataset(k, data=arr, overwrite=True) for k, arr in kwargs.items(): grp.create_dataset(k, data=arr, overwrite=True) finally: - if may_need_closing and hasattr(store, 'close'): + if may_need_closing: # needed to ensure zip file records are written - store.close() + _store.close() -def save(store, *args, **kwargs): +def save(store: StoreLike, *args, **kwargs): """Convenience function to save an array or group of arrays to the local file system. Parameters @@ -332,7 +337,7 @@ def __repr__(self): return r -def load(store): +def load(store: StoreLike): """Load data from an array or group into memory. Parameters @@ -358,11 +363,11 @@ def load(store): """ # handle polymorphic store arg - store = normalize_store_arg(store) - if contains_array(store, path=None): - return Array(store=store, path=None)[...] - elif contains_group(store, path=None): - grp = Group(store=store, path=None) + _store = normalize_store_arg(store) + if contains_array(_store, path=None): + return Array(store=_store, path=None)[...] + elif contains_group(_store, path=None): + grp = Group(store=_store, path=None) return LazyLoader(grp) @@ -1078,7 +1083,7 @@ def copy_all(source, dest, shallow=False, without_attrs=False, log=None, return n_copied, n_skipped, n_bytes_copied -def consolidate_metadata(store, metadata_key='.zmetadata'): +def consolidate_metadata(store: StoreLike, metadata_key=".zmetadata"): """ Consolidate all metadata for groups and arrays within the given store into a single resource and put it under the given key. @@ -1129,7 +1134,7 @@ def is_zarr_key(key): return open_consolidated(store, metadata_key=metadata_key) -def open_consolidated(store, metadata_key='.zmetadata', mode='r+', **kwargs): +def open_consolidated(store: StoreLike, metadata_key=".zmetadata", mode="r+", **kwargs): """Open group using metadata previously consolidated into a single key. This is an optimised method for opening a Zarr group, where instead of diff --git a/zarr/core.py b/zarr/core.py index 6865a0694c..56b22ead8d 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -9,6 +9,8 @@ import numpy as np from numcodecs.compat import ensure_bytes, ensure_ndarray +from collections.abc import MutableMapping + from zarr.attrs import Attributes from zarr.codecs import AsType, get_codec from zarr.errors import ArrayNotFoundError, ReadOnlyError, ArrayIndexError @@ -30,7 +32,7 @@ pop_fields, ) from zarr.meta import decode_array_metadata, encode_array_metadata -from zarr.storage import array_meta_key, attrs_key, getsize, listdir +from zarr.storage import array_meta_key, attrs_key, getsize, listdir, BaseStore from zarr.util import ( all_equal, InfoReporter, @@ -145,7 +147,7 @@ class Array: def __init__( self, - store, + store: BaseStore, path=None, read_only=False, chunk_store=None, @@ -158,6 +160,9 @@ def __init__( # N.B., expect at this point store is fully initialized with all # configuration metadata fully specified and normalized + store = BaseStore._ensure_store(store) + chunk_store = BaseStore._ensure_store(chunk_store) + self._store = store self._chunk_store = chunk_store self._path = normalize_storage_path(path) @@ -2113,7 +2118,7 @@ def _encode_chunk(self, chunk): cdata = chunk # ensure in-memory data is immutable and easy to compare - if isinstance(self.chunk_store, dict): + if isinstance(self.chunk_store, MutableMapping): cdata = ensure_bytes(cdata) return cdata @@ -2146,10 +2151,10 @@ def info(self): Order : C Read-only : False Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - Store type : builtins.dict + Store type : zarr.storage.KVStore No. bytes : 4000000 (3.8M) - No. bytes stored : ... - Storage ratio : ... + No. bytes stored : 320 + Storage ratio : 12500.0 Chunks initialized : 0/10 """ diff --git a/zarr/creation.py b/zarr/creation.py index 75ff1d0212..244a9b080c 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -1,4 +1,3 @@ -import os from warnings import warn import numpy as np @@ -10,10 +9,9 @@ ContainsArrayError, ContainsGroupError, ) -from zarr.n5 import N5Store -from zarr.storage import (DirectoryStore, ZipStore, contains_array, - contains_group, default_compressor, init_array, - normalize_storage_path, FSStore) +from zarr.storage import (contains_array, contains_group, default_compressor, + init_array, normalize_storage_path, + normalize_store_arg) from zarr.util import normalize_dimension_separator @@ -157,27 +155,6 @@ def create(shape, chunks=True, dtype=None, compressor='default', return z -def normalize_store_arg(store, clobber=False, storage_options=None, mode='w'): - if store is None: - return dict() - if isinstance(store, os.PathLike): - store = os.fspath(store) - if isinstance(store, str): - mode = mode if clobber else "r" - if "://" in store or "::" in store: - return FSStore(store, mode=mode, **(storage_options or {})) - elif storage_options: - raise ValueError("storage_options passed with non-fsspec path") - if store.endswith('.zip'): - return ZipStore(store, mode=mode) - elif store.endswith('.n5'): - return N5Store(store) - else: - return DirectoryStore(store) - else: - return store - - def _kwargs_compat(compressor, fill_value, kwargs): # to be compatible with h5py, as well as backwards-compatible with Zarr diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 87c2178e61..402b8dd976 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -15,11 +15,26 @@ ReadOnlyError, ) from zarr.meta import decode_group_metadata -from zarr.storage import (MemoryStore, attrs_key, contains_array, - contains_group, group_meta_key, init_group, listdir, - rename, rmdir) -from zarr.util import (InfoReporter, TreeViewer, is_valid_python_name, nolock, - normalize_shape, normalize_storage_path) +from zarr.storage import ( + BaseStore, + MemoryStore, + attrs_key, + contains_array, + contains_group, + group_meta_key, + init_group, + listdir, + rename, + rmdir, +) +from zarr.util import ( + InfoReporter, + TreeViewer, + is_valid_python_name, + nolock, + normalize_shape, + normalize_storage_path, +) class Group(MutableMapping): @@ -96,6 +111,8 @@ class Group(MutableMapping): def __init__(self, store, path=None, read_only=False, chunk_store=None, cache_attrs=True, synchronizer=None): + store: BaseStore = BaseStore._ensure_store(store) + chunk_store: BaseStore = BaseStore._ensure_store(chunk_store) self._store = store self._chunk_store = chunk_store self._path = normalize_storage_path(path) @@ -237,11 +254,8 @@ def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): - """If the underlying Store has a ``close`` method, call it.""" - try: - self.store.close() - except AttributeError: - pass + """Call the close method of the underlying Store.""" + self.store.close() def info_items(self): @@ -804,11 +818,13 @@ def create_dataset(self, name, **kwargs): """ + assert "mode" not in kwargs return self._write_op(self._create_dataset_nosync, name, **kwargs) def _create_dataset_nosync(self, name, data=None, **kwargs): + assert "mode" not in kwargs path = self._item_path(name) # determine synchronizer diff --git a/zarr/storage.py b/zarr/storage.py index 92be9df0aa..901011c9d2 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -31,7 +31,7 @@ from os import scandir from pickle import PicklingError from threading import Lock, RLock -from typing import Optional, Union, List, Tuple, Dict +from typing import Optional, Union, List, Tuple, Dict, Any import uuid import time @@ -57,6 +57,15 @@ normalize_shape, normalize_storage_path, retry_call) from zarr._storage.absstore import ABSStore # noqa: F401 +from zarr._storage.store import (_listdir_from_keys, + _path_to_prefix, + _rename_from_keys, + _rmdir_from_keys, + array_meta_key, + group_meta_key, + attrs_key, + BaseStore, + Store) __doctest_requires__ = { ('RedisStore', 'RedisStore.*'): ['redis'], @@ -65,9 +74,6 @@ } -array_meta_key = '.zarray' -group_meta_key = '.zgroup' -attrs_key = '.zattrs' try: # noinspection PyUnresolvedReferences from zarr.codecs import Blosc @@ -78,18 +84,11 @@ Path = Union[str, bytes, None] +# allow MutableMapping for backwards compatibility +StoreLike = Union[BaseStore, MutableMapping] -def _path_to_prefix(path: Optional[str]) -> str: - # assume path already normalized - if path: - prefix = path + '/' - else: - prefix = '' - return prefix - - -def contains_array(store: MutableMapping, path: Path = None) -> bool: +def contains_array(store: StoreLike, path: Path = None) -> bool: """Return True if the store contains an array at the given logical path.""" path = normalize_storage_path(path) prefix = _path_to_prefix(path) @@ -97,7 +96,7 @@ def contains_array(store: MutableMapping, path: Path = None) -> bool: return key in store -def contains_group(store: MutableMapping, path: Path = None) -> bool: +def contains_group(store: StoreLike, path: Path = None) -> bool: """Return True if the store contains a group at the given logical path.""" path = normalize_storage_path(path) prefix = _path_to_prefix(path) @@ -105,41 +104,47 @@ def contains_group(store: MutableMapping, path: Path = None) -> bool: return key in store -def _rmdir_from_keys(store: MutableMapping, path: Optional[str] = None) -> None: - # assume path already normalized - prefix = _path_to_prefix(path) - for key in list(store.keys()): - if key.startswith(prefix): - del store[key] +def normalize_store_arg(store: Any, clobber=False, storage_options=None, mode="w") -> BaseStore: + if store is None: + return BaseStore._ensure_store(dict()) + elif isinstance(store, os.PathLike): + store = os.fspath(store) + if isinstance(store, str): + mode = mode if clobber else "r" + if "://" in store or "::" in store: + return FSStore(store, mode=mode, **(storage_options or {})) + elif storage_options: + raise ValueError("storage_options passed with non-fsspec path") + if store.endswith('.zip'): + return ZipStore(store, mode=mode) + elif store.endswith('.n5'): + from zarr.n5 import N5Store + return N5Store(store) + else: + return DirectoryStore(store) + else: + if not isinstance(store, BaseStore) and isinstance(store, MutableMapping): + store = BaseStore._ensure_store(store) + return store -def rmdir(store, path: Path = None): +def rmdir(store: StoreLike, path: Path = None): """Remove all items under the given path. If `store` provides a `rmdir` method, this will be called, otherwise will fall back to implementation via the - `MutableMapping` interface.""" + `Store` interface.""" path = normalize_storage_path(path) - if hasattr(store, 'rmdir'): + if hasattr(store, "rmdir") and store.is_erasable(): # type: ignore # pass through - store.rmdir(path) + store.rmdir(path) # type: ignore else: # slow version, delete one key at a time _rmdir_from_keys(store, path) -def _rename_from_keys(store: MutableMapping, src_path: str, dst_path: str) -> None: - # assume path already normalized - src_prefix = _path_to_prefix(src_path) - dst_prefix = _path_to_prefix(dst_path) - for key in list(store.keys()): - if key.startswith(src_prefix): - new_key = dst_prefix + key.lstrip(src_prefix) - store[new_key] = store.pop(key) - - -def rename(store, src_path: Path, dst_path: Path): +def rename(store: BaseStore, src_path: Path, dst_path: Path): """Rename all items under the given path. If `store` provides a `rename` method, this will be called, otherwise will fall back to implementation via the - `MutableMapping` interface.""" + `Store` interface.""" src_path = normalize_storage_path(src_path) dst_path = normalize_storage_path(dst_path) if hasattr(store, 'rename'): @@ -150,39 +155,32 @@ def rename(store, src_path: Path, dst_path: Path): _rename_from_keys(store, src_path, dst_path) -def _listdir_from_keys(store: MutableMapping, path: Optional[str] = None) -> List[str]: - # assume path already normalized - prefix = _path_to_prefix(path) - children = set() - for key in list(store.keys()): - if key.startswith(prefix) and len(key) > len(prefix): - suffix = key[len(prefix):] - child = suffix.split('/')[0] - children.add(child) - return sorted(children) - - -def listdir(store, path: Path = None): +def listdir(store: BaseStore, path: Path = None): """Obtain a directory listing for the given path. If `store` provides a `listdir` method, this will be called, otherwise will fall back to implementation via the `MutableMapping` interface.""" path = normalize_storage_path(path) if hasattr(store, 'listdir'): # pass through - return store.listdir(path) + return store.listdir(path) # type: ignore else: # slow version, iterate through all keys + warnings.warn( + f"Store {store} has no `listdir` method. From zarr 2.9 onwards " + "may want to inherit from `Store`.", + stacklevel=2, + ) return _listdir_from_keys(store, path) -def getsize(store, path: Path = None) -> int: +def getsize(store: BaseStore, path: Path = None) -> int: """Compute size of stored items for a given path. If `store` provides a `getsize` method, this will be called, otherwise will return -1.""" path = normalize_storage_path(path) if hasattr(store, 'getsize'): # pass through - return store.getsize(path) - elif isinstance(store, dict): + return store.getsize(path) # type: ignore + elif isinstance(store, MutableMapping): # compute from size of values if path in store: v = store[path] @@ -208,8 +206,8 @@ def getsize(store, path: Path = None) -> int: def _require_parent_group( path: Optional[str], - store: MutableMapping, - chunk_store: Optional[MutableMapping], + store: StoreLike, + chunk_store: Optional[StoreLike], overwrite: bool, ): # assume path is normalized @@ -225,7 +223,7 @@ def _require_parent_group( def init_array( - store: MutableMapping, + store: StoreLike, shape: Tuple[int, ...], chunks: Union[bool, int, Tuple[int, ...]] = True, dtype=None, @@ -234,7 +232,7 @@ def init_array( order: str = "C", overwrite: bool = False, path: Path = None, - chunk_store: MutableMapping = None, + chunk_store: StoreLike = None, filters=None, object_codec=None, dimension_separator=None, @@ -244,7 +242,7 @@ def init_array( Parameters ---------- - store : MutableMapping + store : Store A mapping that supports string keys and bytes-like values. shape : int or tuple of ints Array shape. @@ -263,7 +261,7 @@ def init_array( If True, erase all data in `store` prior to initialisation. path : string, bytes, optional Path under which array is stored. - chunk_store : MutableMapping, optional + chunk_store : Store, optional Separate storage for chunks. If not provided, `store` will be used for storage of both chunks and metadata. filters : sequence, optional @@ -277,8 +275,8 @@ def init_array( -------- Initialize an array store:: - >>> from zarr.storage import init_array - >>> store = dict() + >>> from zarr.storage import init_array, KVStore + >>> store = KVStore(dict()) >>> init_array(store, shape=(10000, 10000), chunks=(1000, 1000)) >>> sorted(store.keys()) ['.zarray'] @@ -311,7 +309,7 @@ def init_array( Initialize an array using a storage path:: - >>> store = dict() + >>> store = KVStore(dict()) >>> init_array(store, shape=100000000, chunks=1000000, dtype='i1', path='foo') >>> sorted(store.keys()) ['.zgroup', 'foo/.zarray'] @@ -456,23 +454,23 @@ def _init_array_metadata( def init_group( - store: MutableMapping, + store: StoreLike, overwrite: bool = False, path: Path = None, - chunk_store: MutableMapping = None, + chunk_store: StoreLike = None, ): """Initialize a group store. Note that this is a low-level function and there should be no need to call this directly from user code. Parameters ---------- - store : MutableMapping + store : Store A mapping that supports string keys and byte sequence values. overwrite : bool, optional If True, erase all data in `store` prior to initialisation. path : string, optional Path under which array is stored. - chunk_store : MutableMapping, optional + chunk_store : Store, optional Separate storage for chunks. If not provided, `store` will be used for storage of both chunks and metadata. @@ -491,10 +489,10 @@ def init_group( def _init_group_metadata( - store: MutableMapping, + store: StoreLike, overwrite: Optional[bool] = False, path: Optional[str] = None, - chunk_store: MutableMapping = None, + chunk_store: StoreLike = None, ): # guard conditions @@ -526,7 +524,50 @@ def _dict_store_keys(d: Dict, prefix="", cls=dict): yield prefix + k -class MemoryStore(MutableMapping): +class KVStore(Store): + """ + This provides a default implementation of a store interface around + a mutable mapping, to avoid having to test stores for presence of methods. + + This, for most methods should just be a pass-through to the underlying KV + store which is likely to expose a MuttableMapping interface, + """ + + def __init__(self, mutablemapping): + self._mutable_mapping = mutablemapping + + def __getitem__(self, key): + return self._mutable_mapping[key] + + def __setitem__(self, key, value): + self._mutable_mapping[key] = value + + def __delitem__(self, key): + del self._mutable_mapping[key] + + def get(self, key, default=None): + return self._mutable_mapping.get(key, default) + + def values(self): + return self._mutable_mapping.values() + + def __iter__(self): + return iter(self._mutable_mapping) + + def __len__(self): + return len(self._mutable_mapping) + + def __repr__(self): + return f"<{self.__class__.__name__}: \n{repr(self._mutable_mapping)}\n at {hex(id(self))}>" + + def __eq__(self, other): + if isinstance(other, KVStore): + return self._mutable_mapping == other._mutable_mapping + else: + return NotImplemented + + +class MemoryStore(Store): """Store class that uses a hierarchy of :class:`dict` objects, thus all data will be held in main memory. @@ -544,7 +585,7 @@ class MemoryStore(MutableMapping): >>> z = zarr.zeros(100) >>> type(z.store) - + Notes ----- @@ -730,7 +771,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) -class DirectoryStore(MutableMapping): +class DirectoryStore(Store): """Storage class using directories and files on a standard file system. Parameters @@ -1041,7 +1082,7 @@ def atexit_rmglob(path, rmtree(p) -class FSStore(MutableMapping): +class FSStore(Store): """Wraps an fsspec.FSMap to give access to arbitrary filesystems Requires that ``fsspec`` is installed, as well as any additional @@ -1352,7 +1393,7 @@ def __eq__(self, other): # noinspection PyPep8Naming -class ZipStore(MutableMapping): +class ZipStore(Store): """Storage class using a Zip file. Parameters @@ -1444,6 +1485,8 @@ class also supports the context manager protocol, which ensures the ``close()`` """ + _erasable = False + def __init__(self, path, compression=zipfile.ZIP_STORED, allowZip64=True, mode='a', dimension_separator=None): @@ -1606,7 +1649,7 @@ def migrate_1to2(store): Parameters ---------- - store : MutableMapping + store : Store Store to be migrated. Notes @@ -1650,7 +1693,7 @@ def migrate_1to2(store): # noinspection PyShadowingBuiltins -class DBMStore(MutableMapping): +class DBMStore(Store): """Storage class using a DBM-style database. Parameters @@ -1842,7 +1885,7 @@ def __contains__(self, key): return key in self.db -class LMDBStore(MutableMapping): +class LMDBStore(Store): """Storage class using LMDB. Requires the `lmdb `_ package to be installed. @@ -2019,7 +2062,7 @@ def __len__(self): return self.db.stat()['entries'] -class LRUStoreCache(MutableMapping): +class LRUStoreCache(Store): """Storage class that implements a least-recently-used (LRU) cache layer over some other store. Intended primarily for use with stores that can be slow to access, e.g., remote stores that require network communication to store and @@ -2027,7 +2070,7 @@ class LRUStoreCache(MutableMapping): Parameters ---------- - store : MutableMapping + store : Store The store containing the actual data to be cached. max_size : int The maximum size that the cache may grow to, in number of bytes. Provide `None` @@ -2056,14 +2099,14 @@ class LRUStoreCache(MutableMapping): """ - def __init__(self, store, max_size): - self._store = store + def __init__(self, store: Store, max_size: int): + self._store = Store._ensure_store(store) self._max_size = max_size self._current_size = 0 self._keys_cache = None self._contains_cache = None - self._listdir_cache = dict() - self._values_cache = OrderedDict() + self._listdir_cache: Dict[Path, Any] = dict() + self._values_cache: Dict[Path, Any] = OrderedDict() self._mutex = Lock() self.hits = self.misses = 0 @@ -2103,7 +2146,7 @@ def _keys(self): self._keys_cache = list(self._store.keys()) return self._keys_cache - def listdir(self, path=None): + def listdir(self, path: Path = None): with self._mutex: try: return self._listdir_cache[path] @@ -2112,7 +2155,7 @@ def listdir(self, path=None): self._listdir_cache[path] = listing return listing - def getsize(self, path=None): + def getsize(self, path=None) -> int: return getsize(self._store, path=path) def _pop_value(self): @@ -2129,7 +2172,7 @@ def _accommodate_value(self, value_size): v = self._pop_value() self._current_size -= buffer_size(v) - def _cache_value(self, key, value): + def _cache_value(self, key: Path, value): # cache a value value_size = buffer_size(value) # check size of the value against max size, as if the value itself exceeds max @@ -2201,7 +2244,7 @@ def __delitem__(self, key): self._invalidate_value(key) -class SQLiteStore(MutableMapping): +class SQLiteStore(Store): """Storage class using SQLite. Parameters @@ -2404,7 +2447,7 @@ def clear(self): ) -class MongoDBStore(MutableMapping): +class MongoDBStore(Store): """Storage class using MongoDB. .. note:: This is an experimental feature. @@ -2487,7 +2530,7 @@ def clear(self): self.collection.delete_many({}) -class RedisStore(MutableMapping): +class RedisStore(Store): """Storage class using Redis. .. note:: This is an experimental feature. @@ -2556,7 +2599,7 @@ def clear(self): del self[key] -class ConsolidatedMetadataStore(MutableMapping): +class ConsolidatedMetadataStore(Store): """A layer over other storage, where the metadata has been consolidated into a single key. @@ -2580,7 +2623,7 @@ class ConsolidatedMetadataStore(MutableMapping): Parameters ---------- - store: MutableMapping + store: Store Containing the zarr array. metadata_key: str The target in the store where all of the metadata are stored. We @@ -2592,8 +2635,8 @@ class ConsolidatedMetadataStore(MutableMapping): """ - def __init__(self, store, metadata_key='.zmetadata'): - self.store = store + def __init__(self, store: StoreLike, metadata_key=".zmetadata"): + self.store = Store._ensure_store(store) # retrieve consolidated metadata meta = json_loads(store[metadata_key]) @@ -2605,7 +2648,7 @@ def __init__(self, store, metadata_key='.zmetadata'): consolidated_format) # decode metadata - self.meta_store = meta['metadata'] + self.meta_store: Store = KVStore(meta["metadata"]) def __getitem__(self, key): return self.meta_store[key] diff --git a/zarr/tests/test_attrs.py b/zarr/tests/test_attrs.py index 2aced3abaa..b2de736d4a 100644 --- a/zarr/tests/test_attrs.py +++ b/zarr/tests/test_attrs.py @@ -1,21 +1,26 @@ import json -import unittest import pytest from zarr.attrs import Attributes from zarr.tests.util import CountingDict +from zarr.storage import KVStore -class TestAttributes(unittest.TestCase): +class TestAttributes(): def init_attributes(self, store, read_only=False, cache=True): return Attributes(store, key='attrs', read_only=read_only, cache=cache) - def test_storage(self): + @pytest.mark.parametrize('store_from_dict', [False, True]) + def test_storage(self, store_from_dict): - store = dict() + if store_from_dict: + store = dict() + else: + store = KVStore(dict()) a = Attributes(store=store, key='attrs') + assert isinstance(a.store, KVStore) assert 'foo' not in a assert 'bar' not in a assert dict() == a.asdict() diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index d2bd91038b..e5ccbd494d 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -24,8 +24,12 @@ from zarr.core import Array from zarr.errors import CopyError from zarr.hierarchy import Group, group -from zarr.storage import (ConsolidatedMetadataStore, MemoryStore, - atexit_rmtree, getsize) +from zarr.storage import ( + ConsolidatedMetadataStore, + MemoryStore, + atexit_rmtree, + getsize, +) def test_open_array(path_type): diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 4544a6cae9..2b44ac0574 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -24,11 +24,12 @@ ABSStore, DBMStore, DirectoryStore, + FSStore, + KVStore, LMDBStore, LRUStoreCache, NestedDirectoryStore, SQLiteStore, - FSStore, atexit_rmglob, atexit_rmtree, init_array, @@ -45,8 +46,8 @@ class TestArray(unittest.TestCase): def test_array_init(self): # normal initialization - store = dict() - init_array(store, shape=100, chunks=10, dtype=' end assert [] == list(z.islice(6, 5)) - if hasattr(z.store, 'close'): - z.store.close() + z.store.close() def test_iter(self): params = ( @@ -1491,8 +1417,7 @@ def test_iter(self): z[:] = a for expect, actual in zip_longest(a, z): assert_array_equal(expect, actual) - if hasattr(z.store, 'close'): - z.store.close() + z.store.close() def test_islice(self): params = ( @@ -1530,8 +1455,7 @@ def test_compressors(self): assert np.all(a[0:100] == 1) a[:] = 1 assert np.all(a[:] == 1) - if hasattr(a.store, 'close'): - a.store.close() + a.store.close() def test_endian(self): dtype = np.dtype('float32') @@ -1542,10 +1466,8 @@ def test_endian(self): a2[:] = 1 x2 = a2[:] assert_array_equal(x1, x2) - if hasattr(a1.store, 'close'): - a1.store.close() - if hasattr(a2.store, 'close'): - a2.store.close() + a1.store.close() + a2.store.close() def test_attributes(self): a = self.create_array(shape=10, chunks=10, dtype='i8') @@ -1559,8 +1481,7 @@ def test_attributes(self): attrs = json_loads(a.store[a.attrs.key]) assert 'foo' in attrs and attrs['foo'] == 'bar' assert 'bar' in attrs and attrs['bar'] == 'foo' - if hasattr(a.store, 'close'): - a.store.close() + a.store.close() def test_structured_with_object(self): a = self.create_array(fill_value=(0.0, None), @@ -1575,7 +1496,7 @@ class TestArrayWithPath(TestArray): @staticmethod def create_array(read_only=False, **kwargs): - store = dict() + store = KVStore(dict()) cache_metadata = kwargs.pop('cache_metadata', True) cache_attrs = kwargs.pop('cache_attrs', True) write_empty_chunks = kwargs.pop('write_empty_chunks', True) @@ -1584,6 +1505,9 @@ def create_array(read_only=False, **kwargs): cache_metadata=cache_metadata, cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + def test_nchunks_initialized(self): + pass + def test_hexdigest(self): # Check basic 1-D array z = self.create_array(shape=(1050,), chunks=100, dtype='