Skip to content

Commit

Permalink
Create a Base store class for Zarr Store (update) (#789)
Browse files Browse the repository at this point in the history
* fix conflicts

* cleanup naming

* zip move

* fix erasability test

* test for warning

* please flake

* remove uncovered lines

* remove uncovered lines in tests

* pragma no cover for exceptional case

* minor docstring fixes

add assert statements to test_capabilities

* pep8 fix

* avoid NumPy 1.21.0 due to numpy/numpy#19325

* move Store class and some helper functions to zarr._storage.store

update version in Store docstring

* BUG: ABSStore should inherit from Store

* pep8 fix

* TST: make CustomMapping a subclass of Store

TST: initialize stores with KVStore(dict()) instead of bare dict()

* update version mentioned in Store docstring

* update version mentioned in warning message

* use Store._ensure_store in Attributes class

ensures Attributes.store is a Store

* TST: add Attributes test case ensuring store gets coerced to a Store

* use Store._ensure_store in normalize_store_arg

ensures open_array, etc can work when the user supplies a dict

* TST: make sure high level creation functions also work when passed a dict for store

* TST: add test case with group initialized from dict

* TST: add test case with Array initialized from dict

* change CustomMapping back to type object, not Store

want to test the non-Store code path in _ensure_store

* pep8 fixes

* update/fix new hierarchy test case to complete code coverage

* create a BaseStore parent for Store

BaseStore does not have the listdir or rmdir methods

cleaned up some type declerations, making sure mypy passes

* flake8

* restore is_erasable check to rmdir function

Otherwise the save_array doc example fails to write to a ZipStore

Co-authored-by: Matthias Bussonnier <bussonniermatthias@gmail.com>
Co-authored-by: Josh Moore <j.a.moore@dundee.ac.uk>
Co-authored-by: jmoore <josh@glencoesoftware.com>
  • Loading branch information
4 people authored Oct 21, 2021
1 parent 523dbb8 commit 5c71212
Show file tree
Hide file tree
Showing 19 changed files with 794 additions and 583 deletions.
12 changes: 7 additions & 5 deletions docs/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ print some diagnostics, e.g.::
Read-only : False
Compressor : Blosc(cname='zstd', clevel=3, shuffle=BITSHUFFLE,
: blocksize=0)
Store type : builtins.dict
Store type : zarr.storage.KVStore
No. bytes : 400000000 (381.5M)
No. bytes stored : 3379344 (3.2M)
Storage ratio : 118.4
Expand Down Expand Up @@ -268,7 +268,7 @@ Here is an example using a delta filter with the Blosc compressor::
Read-only : False
Filter [0] : Delta(dtype='<i4')
Compressor : Blosc(cname='zstd', clevel=1, shuffle=SHUFFLE, blocksize=0)
Store type : builtins.dict
Store type : zarr.storage.KVStore
No. bytes : 400000000 (381.5M)
No. bytes stored : 1290562 (1.2M)
Storage ratio : 309.9
Expand Down Expand Up @@ -805,8 +805,10 @@ Here is an example using S3Map to read an array created previously::
Order : C
Read-only : False
Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
Store type : fsspec.mapping.FSMap
Store type : zarr.storage.KVStore
No. bytes : 21
No. bytes stored : 382
Storage ratio : 0.1
Chunks initialized : 3/3
>>> z[:]
array([b'H', b'e', b'l', b'l', b'o', b' ', b'f', b'r', b'o', b'm', b' ',
Expand Down Expand Up @@ -1274,7 +1276,7 @@ ratios, depending on the correlation structure within the data. E.g.::
Order : C
Read-only : False
Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
Store type : builtins.dict
Store type : zarr.storage.KVStore
No. bytes : 400000000 (381.5M)
No. bytes stored : 6696010 (6.4M)
Storage ratio : 59.7
Expand All @@ -1288,7 +1290,7 @@ ratios, depending on the correlation structure within the data. E.g.::
Order : F
Read-only : False
Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
Store type : builtins.dict
Store type : zarr.storage.KVStore
No. bytes : 400000000 (381.5M)
No. bytes stored : 4684636 (4.5M)
Storage ratio : 85.4
Expand Down
2 changes: 1 addition & 1 deletion mypy.ini
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[mypy]
python_version = 3.7
python_version = 3.8
ignore_missing_imports = True
follow_imports = silent
2 changes: 2 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@ doctest_optionflags = NORMALIZE_WHITESPACE ELLIPSIS IGNORE_EXCEPTION_DETAIL
addopts = --durations=10
filterwarnings =
error::DeprecationWarning:zarr.*
error::UserWarning:zarr.*
ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning
ignore:The loop argument is deprecated since Python 3.8.*:DeprecationWarning
4 changes: 2 additions & 2 deletions zarr/_storage/absstore.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
"""This module contains storage classes related to Azure Blob Storage (ABS)"""

import warnings
from collections.abc import MutableMapping
from numcodecs.compat import ensure_bytes
from zarr.util import normalize_storage_path
from zarr._storage.store import Store

__doctest_requires__ = {
('ABSStore', 'ABSStore.*'): ['azure.storage.blob'],
}


class ABSStore(MutableMapping):
class ABSStore(Store):
"""Storage class using Azure Blob Storage (ABS).
Parameters
Expand Down
166 changes: 166 additions & 0 deletions zarr/_storage/store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
from collections.abc import MutableMapping
from typing import Any, List, Optional, Union

from zarr.util import normalize_storage_path

# v2 store keys
array_meta_key = '.zarray'
group_meta_key = '.zgroup'
attrs_key = '.zattrs'


class BaseStore(MutableMapping):
"""Abstract base class for store implementations.
This is a thin wrapper over MutableMapping that provides methods to check
whether a store is readable, writeable, eraseable and or listable.
Stores cannot be mutable mapping as they do have a couple of other
requirements that would break Liskov substitution principle (stores only
allow strings as keys, mutable mapping are more generic).
Having no-op base method also helps simplifying store usage and do not need
to check the presence of attributes and methods, like `close()`.
Stores can be used as context manager to make sure they close on exit.
.. added: 2.11.0
"""

_readable = True
_writeable = True
_erasable = True
_listable = True

def is_readable(self):
return self._readable

def is_writeable(self):
return self._writeable

def is_listable(self):
return self._listable

def is_erasable(self):
return self._erasable

def __enter__(self):
if not hasattr(self, "_open_count"):
self._open_count = 0
self._open_count += 1
return self

def __exit__(self, exc_type, exc_value, traceback):
self._open_count -= 1
if self._open_count == 0:
self.close()

def close(self) -> None:
"""Do nothing by default"""
pass

def rename(self, src_path: str, dst_path: str) -> None:
if not self.is_erasable():
raise NotImplementedError(
f'{type(self)} is not erasable, cannot call "rename"'
) # pragma: no cover
_rename_from_keys(self, src_path, dst_path)

@staticmethod
def _ensure_store(store: Any):
"""
We want to make sure internally that zarr stores are always a class
with a specific interface derived from ``BaseStore``, which is slightly
different than ``MutableMapping``.
We'll do this conversion in a few places automatically
"""
from zarr.storage import KVStore # avoid circular import

if store is None:
return None
elif isinstance(store, BaseStore):
return store
elif isinstance(store, MutableMapping):
return KVStore(store)
else:
for attr in [
"keys",
"values",
"get",
"__setitem__",
"__getitem__",
"__delitem__",
"__contains__",
]:
if not hasattr(store, attr):
break
else:
return KVStore(store)

raise ValueError(
"Starting with Zarr 2.11.0, stores must be subclasses of "
"BaseStore, if your store exposes the MutableMapping interface "
f"wrap it in Zarr.storage.KVStore. Got {store}"
)


class Store(BaseStore):
"""Abstract store class used by implementations following the Zarr v2 spec.
Adds public `listdir`, `rename`, and `rmdir` methods on top of BaseStore.
.. added: 2.11.0
"""
def listdir(self, path: str = "") -> List[str]:
path = normalize_storage_path(path)
return _listdir_from_keys(self, path)

def rmdir(self, path: str = "") -> None:
if not self.is_erasable():
raise NotImplementedError(
f'{type(self)} is not erasable, cannot call "rmdir"'
) # pragma: no cover
path = normalize_storage_path(path)
_rmdir_from_keys(self, path)


def _path_to_prefix(path: Optional[str]) -> str:
# assume path already normalized
if path:
prefix = path + '/'
else:
prefix = ''
return prefix


def _rename_from_keys(store: BaseStore, src_path: str, dst_path: str) -> None:
# assume path already normalized
src_prefix = _path_to_prefix(src_path)
dst_prefix = _path_to_prefix(dst_path)
for key in list(store.keys()):
if key.startswith(src_prefix):
new_key = dst_prefix + key.lstrip(src_prefix)
store[new_key] = store.pop(key)


def _rmdir_from_keys(store: Union[BaseStore, MutableMapping], path: Optional[str] = None) -> None:
# assume path already normalized
prefix = _path_to_prefix(path)
for key in list(store.keys()):
if key.startswith(prefix):
del store[key]


def _listdir_from_keys(store: BaseStore, path: Optional[str] = None) -> List[str]:
# assume path already normalized
prefix = _path_to_prefix(path)
children = set()
for key in list(store.keys()):
if key.startswith(prefix) and len(key) > len(prefix):
suffix = key[len(prefix):]
child = suffix.split('/')[0]
children.add(child)
return sorted(children)
3 changes: 2 additions & 1 deletion zarr/attrs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from collections.abc import MutableMapping

from zarr.meta import parse_metadata
from zarr._storage.store import Store
from zarr.util import json_dumps


Expand All @@ -26,7 +27,7 @@ class Attributes(MutableMapping):

def __init__(self, store, key='.zattrs', read_only=False, cache=True,
synchronizer=None):
self.store = store
self.store = Store._ensure_store(store)
self.key = key
self.read_only = read_only
self.cache = cache
Expand Down
Loading

0 comments on commit 5c71212

Please sign in to comment.