Skip to content

Commit

Permalink
workaround s3 zmetadata; bug with s3 anndata
Browse files Browse the repository at this point in the history
  • Loading branch information
LucaMarconato committed Jul 13, 2023
1 parent fe6bebe commit 315c83b
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 20 deletions.
4 changes: 3 additions & 1 deletion src/spatialdata/_core/spatialdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -1055,7 +1055,9 @@ def write(

if consolidate_metadata:
# consolidate metadata to more easily support remote reading
zarr.consolidate_metadata(store)
# bug in zarr, 'zmetadata' is written instead of '.zmetadata'
# see discussion https://github.com/zarr-developers/zarr-python/issues/1121
zarr.consolidate_metadata(store, metadata_key=".zmetadata")

# old code to support overwriting the backing file
# if target_path is not None:
Expand Down
69 changes: 50 additions & 19 deletions src/spatialdata/_io/io_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,31 @@
from spatialdata.models import TableModel


def _open_zarr_store(store: Union[str, Path, zarr.Group]) -> tuple[zarr.Group, str]:
"""
Open a zarr store (on-disk or remote) and return the zarr.Group object and the path to the store.
Parameters
----------
store
Path to the zarr store (on-disk or remote) or a zarr.Group object.
Returns
-------
A tuple of the zarr.Group object and the path to the store.
"""
f = store if isinstance(store, zarr.Group) else zarr.open(store, mode="r")
# workaround for but with .zmetadata being written as zmetadata (https://github.com/zarr-developers/zarr-python/issues/1121)
if isinstance(store, (str, Path)) and str(store).startswith("http") and len(f) == 0:
f = zarr.open_consolidated(store, mode="r", metadata_key="zmetadata")
if isinstance(f.store, zarr.storage.FSStore):
f_store_path = f.store.path
else:
assert isinstance(f.store, zarr.storage.ConsolidatedMetadataStore)
f_store_path = f.store.store.path
return f, f_store_path


def read_zarr(store: Union[str, Path, zarr.Group], selection: Optional[tuple[str]] = None) -> SpatialData:
"""
Read a SpatialData dataset from a zarr store (on-disk or remote).
Expand All @@ -34,7 +59,7 @@ def read_zarr(store: Union[str, Path, zarr.Group], selection: Optional[tuple[str
-------
A SpatialData object.
"""
f = store if isinstance(store, zarr.Group) else zarr.open(store, mode="r")
f, f_store_path = _open_zarr_store(store)

images = {}
labels = {}
Expand All @@ -54,7 +79,7 @@ def read_zarr(store: Union[str, Path, zarr.Group], selection: Optional[tuple[str
# skip hidden files like .zgroup or .zmetadata
continue
f_elem = group[subgroup_name]
f_elem_store = os.path.join(group._store.path, f_elem.path)
f_elem_store = os.path.join(f_store_path, f_elem.path)
element = _read_multiscale(f_elem_store, raster_type="image")
images[subgroup_name] = element
count += 1
Expand All @@ -70,7 +95,7 @@ def read_zarr(store: Union[str, Path, zarr.Group], selection: Optional[tuple[str
# skip hidden files like .zgroup or .zmetadata
continue
f_elem = group[subgroup_name]
f_elem_store = os.path.join(group._store.path, f_elem.path)
f_elem_store = os.path.join(f_store_path, f_elem.path)
labels[subgroup_name] = _read_multiscale(f_elem_store, raster_type="labels")
count += 1
logger.debug(f"Found {count} elements in {group}")
Expand All @@ -84,7 +109,7 @@ def read_zarr(store: Union[str, Path, zarr.Group], selection: Optional[tuple[str
if Path(subgroup_name).name.startswith("."):
# skip hidden files like .zgroup or .zmetadata
continue
f_elem_store = os.path.join(group._store.path, f_elem.path)
f_elem_store = os.path.join(f_store_path, f_elem.path)
points[subgroup_name] = _read_points(f_elem_store)
count += 1
logger.debug(f"Found {count} elements in {group}")
Expand All @@ -97,7 +122,7 @@ def read_zarr(store: Union[str, Path, zarr.Group], selection: Optional[tuple[str
# skip hidden files like .zgroup or .zmetadata
continue
f_elem = group[subgroup_name]
f_elem_store = os.path.join(group._store.path, f_elem.path)
f_elem_store = os.path.join(f_store_path, f_elem.path)
shapes[subgroup_name] = _read_shapes(f_elem_store)
count += 1
logger.debug(f"Found {count} elements in {group}")
Expand All @@ -110,20 +135,26 @@ def read_zarr(store: Union[str, Path, zarr.Group], selection: Optional[tuple[str
# skip hidden files like .zgroup or .zmetadata
continue
f_elem = group[subgroup_name]
f_elem_store = os.path.join(group._store.path, f_elem.path)
table = read_anndata_zarr(f_elem_store)
if TableModel.ATTRS_KEY in table.uns:
# fill out eventual missing attributes that has been omitted because their value was None
attrs = table.uns[TableModel.ATTRS_KEY]
if "region" not in attrs:
attrs["region"] = None
if "region_key" not in attrs:
attrs["region_key"] = None
if "instance_key" not in attrs:
attrs["instance_key"] = None
# fix type for region
if "region" in attrs and isinstance(attrs["region"], np.ndarray):
attrs["region"] = attrs["region"].tolist()
f_elem_store = os.path.join(f_store_path, f_elem.path)
if isinstance(f, zarr.storage.ConsolidatedMetadataStore):
logger.warning("Reading table from remote store is not supported yet. Skipping.")
table = None
# this doesn't work, needs to be fixed in the anndata package
# table = read_anndata_zarr(f_elem)
else:
table = read_anndata_zarr(f_elem_store)
if TableModel.ATTRS_KEY in table.uns:
# fill out eventual missing attributes that has been omitted because their value was None
attrs = table.uns[TableModel.ATTRS_KEY]
if "region" not in attrs:
attrs["region"] = None
if "region_key" not in attrs:
attrs["region_key"] = None
if "instance_key" not in attrs:
attrs["instance_key"] = None
# fix type for region
if "region" in attrs and isinstance(attrs["region"], np.ndarray):
attrs["region"] = attrs["region"].tolist()
count += 1
logger.debug(f"Found {count} elements in {group}")

Expand Down

0 comments on commit 315c83b

Please sign in to comment.