Merge pull request #26 from bioio-devs/feature/public-s3-prefix-witho…

…ut-auth Public s3 prefix without auth
bioio-devs · Jul 24, 2024 · b5f7f74 · b5f7f74
2 parents afbcc1f + d186d80
commit b5f7f74
Show file tree

Hide file tree

Showing 4 changed files with 31 additions and 20 deletions.
diff --git a/README.md b/README.md
@@ -42,13 +42,20 @@ img.data
 ```
 
 ### Reading from AWS S3
-To read from private S3 buckets or public buckets using `s3://` paths, [credentials](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html) must be configured. Public buckets can be accessed without credentials by using the `https://` path.
+To read from private S3 buckets, [credentials](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html) must be configured. Public buckets can be accessed without credentials.
 ```python
 from bioio import BioImage
 path = "https://allencell.s3.amazonaws.com/aics/nuc-morph-dataset/hipsc_fov_nuclei_timelapse_dataset/hipsc_fov_nuclei_timelapse_data_used_for_analysis/baseline_colonies_fov_timelapse_dataset/20200323_09_small/raw.ome.zarr"
 image = BioImage(path)
 print(image.get_image_dask_data())
 ```
+If using an `s3://` path to access a public S3 bucket, the `BioImage` constructor must be given a dictionary with `anon: True` in the `fs_kwargs` argument.
+```python
+from bioio import BioImage
+path = "s3://allencell/aics/nuc-morph-dataset/hipsc_fov_nuclei_timelapse_dataset/hipsc_fov_nuclei_timelapse_data_used_for_analysis/baseline_colonies_fov_timelapse_dataset/20200323_09_small/raw.ome.zarr"
+image = BioImage(path, fs_kwargs=dict(anon=True))
+print(image.get_image_dask_data())
+```
 
 ## Issues
 [_Click here to view all open issues in bioio-devs organization at once_](https://github.com/search?q=user%3Abioio-devs+is%3Aissue+is%3Aopen&type=issues&ref=advsearch) or check this repository's issue tab.

diff --git a/bioio_ome_zarr/reader.py b/bioio_ome_zarr/reader.py
@@ -5,9 +5,10 @@
 from typing import Any, Dict, List, Optional, Tuple
 
 import xarray as xr
+import zarr.storage
 from bioio_base import constants, dimensions, exceptions, io, reader, types
 from fsspec.spec import AbstractFileSystem
-from ome_zarr.io import parse_url
+from ome_zarr.io import ZarrLocation
 from ome_zarr.reader import Reader as ZarrReader
 
 from . import utils as metadata_utils
@@ -25,7 +26,7 @@ class Reader(reader.Reader):
     image: types.PathLike
         String or Path to the ZARR root
     fs_kwargs: Dict[str, Any]
-        Ignored
+        Passed to fsspec. For public S3 buckets, use {"anon": True}.
     """
 
     _xarray_dask_data: Optional["xr.DataArray"] = None
@@ -74,7 +75,9 @@ def _is_supported_image(fs: AbstractFileSystem, path: str, **kwargs: Any) -> boo
             get_zarr_reader(fs, path)
             return True
 
-        except AttributeError:
+        except (AssertionError, AttributeError):
+            # AssertionError may be raised by ZarrReader.__init__ which calls
+            # zarr.exists()
             return False
 
     @classmethod
@@ -263,6 +266,4 @@ def _get_coords(
 
 
 def get_zarr_reader(fs: AbstractFileSystem, path: str) -> ZarrReader:
-    if fs is not None:
-        path = fs.unstrip_protocol(path)
-    return ZarrReader(parse_url(path, mode="r"))
+    return ZarrReader(ZarrLocation(zarr.storage.FSStore(url=path, fs=fs)))
diff --git a/bioio_ome_zarr/tests/test_s3_read.py b/bioio_ome_zarr/tests/test_s3_read.py
@@ -1,14 +1,20 @@
 import numpy as np
+import pytest
 
 from bioio_ome_zarr import Reader
 
 
-def test_ome_zarr_reader() -> None:
+@pytest.mark.parametrize(
+    ["prefix", "fs_kwargs"],
+    [
+        ["s3://allencell/aics/", dict(anon=True)],
+        ["https://allencell.s3.amazonaws.com/aics/", dict()],
+    ],
+)
+def test_ome_zarr_reader(prefix: str, fs_kwargs: dict) -> None:
     # ARRANGE
     uri = (
-        # Cannot use s3:// URL due to ome-zarr issue #369
-        # "s3://allencell/aics/nuc_morph_data"
-        "https://allencell.s3.amazonaws.com/aics/nuc-morph-dataset"
+        prefix + "nuc-morph-dataset"
         "/hipsc_fov_nuclei_timelapse_dataset"
         "/hipsc_fov_nuclei_timelapse_data_used_for_analysis"
         "/baseline_colonies_fov_timelapse_dataset/20200323_09_small/raw.ome.zarr"
@@ -17,7 +23,7 @@ def test_ome_zarr_reader() -> None:
     resolution_level = 0
 
     # ACT
-    image_container = Reader(uri, fs_kwargs=dict(anon=True))
+    image_container = Reader(uri, fs_kwargs=fs_kwargs)
     image_container.set_scene(scene)
     image_container.set_resolution_level(resolution_level)
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,10 +4,6 @@
 requires = ["setuptools>=65", "wheel", "setuptools_scm[toml]>=6.2"]
 build-backend = "setuptools.build_meta"
 
-[tool.setuptools_scm]
-
-# package basics
-# https://peps.python.org/pep-0621/
 [project]
 name = "bioio-ome-zarr"
 description = "A BioIO reader plugin for reading Zarr files in the OME format."
@@ -27,10 +23,11 @@ classifiers = [
 ]
 dynamic = ["version"]
 dependencies = [
-  "bioio-base>=1.0.0",
-  "fsspec>=2022.8.0",
-  "ome-zarr>=0.8.0",
-  "xarray>=0.16.1",
+    "bioio-base>=1.0.0",
+    "fsspec>=2022.8.0",
+    "ome-zarr>=0.9.0",
+    "xarray>=0.16.1",
+    "zarr>=2.18.2",
 ]
 
 [project.urls]