Skip to content

Commit

Permalink
Merge pull request #26 from bioio-devs/feature/public-s3-prefix-witho…
Browse files Browse the repository at this point in the history
…ut-auth

Public s3 prefix without auth
  • Loading branch information
pgarrison authored Jul 24, 2024
2 parents afbcc1f + d186d80 commit b5f7f74
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 20 deletions.
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,20 @@ img.data
```

### Reading from AWS S3
To read from private S3 buckets or public buckets using `s3://` paths, [credentials](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html) must be configured. Public buckets can be accessed without credentials by using the `https://` path.
To read from private S3 buckets, [credentials](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html) must be configured. Public buckets can be accessed without credentials.
```python
from bioio import BioImage
path = "https://allencell.s3.amazonaws.com/aics/nuc-morph-dataset/hipsc_fov_nuclei_timelapse_dataset/hipsc_fov_nuclei_timelapse_data_used_for_analysis/baseline_colonies_fov_timelapse_dataset/20200323_09_small/raw.ome.zarr"
image = BioImage(path)
print(image.get_image_dask_data())
```
If using an `s3://` path to access a public S3 bucket, the `BioImage` constructor must be given a dictionary with `anon: True` in the `fs_kwargs` argument.
```python
from bioio import BioImage
path = "s3://allencell/aics/nuc-morph-dataset/hipsc_fov_nuclei_timelapse_dataset/hipsc_fov_nuclei_timelapse_data_used_for_analysis/baseline_colonies_fov_timelapse_dataset/20200323_09_small/raw.ome.zarr"
image = BioImage(path, fs_kwargs=dict(anon=True))
print(image.get_image_dask_data())
```

## Issues
[_Click here to view all open issues in bioio-devs organization at once_](https://github.com/search?q=user%3Abioio-devs+is%3Aissue+is%3Aopen&type=issues&ref=advsearch) or check this repository's issue tab.
Expand Down
13 changes: 7 additions & 6 deletions bioio_ome_zarr/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
from typing import Any, Dict, List, Optional, Tuple

import xarray as xr
import zarr.storage
from bioio_base import constants, dimensions, exceptions, io, reader, types
from fsspec.spec import AbstractFileSystem
from ome_zarr.io import parse_url
from ome_zarr.io import ZarrLocation
from ome_zarr.reader import Reader as ZarrReader

from . import utils as metadata_utils
Expand All @@ -25,7 +26,7 @@ class Reader(reader.Reader):
image: types.PathLike
String or Path to the ZARR root
fs_kwargs: Dict[str, Any]
Ignored
Passed to fsspec. For public S3 buckets, use {"anon": True}.
"""

_xarray_dask_data: Optional["xr.DataArray"] = None
Expand Down Expand Up @@ -74,7 +75,9 @@ def _is_supported_image(fs: AbstractFileSystem, path: str, **kwargs: Any) -> boo
get_zarr_reader(fs, path)
return True

except AttributeError:
except (AssertionError, AttributeError):
# AssertionError may be raised by ZarrReader.__init__ which calls
# zarr.exists()
return False

@classmethod
Expand Down Expand Up @@ -263,6 +266,4 @@ def _get_coords(


def get_zarr_reader(fs: AbstractFileSystem, path: str) -> ZarrReader:
if fs is not None:
path = fs.unstrip_protocol(path)
return ZarrReader(parse_url(path, mode="r"))
return ZarrReader(ZarrLocation(zarr.storage.FSStore(url=path, fs=fs)))
16 changes: 11 additions & 5 deletions bioio_ome_zarr/tests/test_s3_read.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
import numpy as np
import pytest

from bioio_ome_zarr import Reader


def test_ome_zarr_reader() -> None:
@pytest.mark.parametrize(
["prefix", "fs_kwargs"],
[
["s3://allencell/aics/", dict(anon=True)],
["https://allencell.s3.amazonaws.com/aics/", dict()],
],
)
def test_ome_zarr_reader(prefix: str, fs_kwargs: dict) -> None:
# ARRANGE
uri = (
# Cannot use s3:// URL due to ome-zarr issue #369
# "s3://allencell/aics/nuc_morph_data"
"https://allencell.s3.amazonaws.com/aics/nuc-morph-dataset"
prefix + "nuc-morph-dataset"
"/hipsc_fov_nuclei_timelapse_dataset"
"/hipsc_fov_nuclei_timelapse_data_used_for_analysis"
"/baseline_colonies_fov_timelapse_dataset/20200323_09_small/raw.ome.zarr"
Expand All @@ -17,7 +23,7 @@ def test_ome_zarr_reader() -> None:
resolution_level = 0

# ACT
image_container = Reader(uri, fs_kwargs=dict(anon=True))
image_container = Reader(uri, fs_kwargs=fs_kwargs)
image_container.set_scene(scene)
image_container.set_resolution_level(resolution_level)

Expand Down
13 changes: 5 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@
requires = ["setuptools>=65", "wheel", "setuptools_scm[toml]>=6.2"]
build-backend = "setuptools.build_meta"

[tool.setuptools_scm]

# package basics
# https://peps.python.org/pep-0621/
[project]
name = "bioio-ome-zarr"
description = "A BioIO reader plugin for reading Zarr files in the OME format."
Expand All @@ -27,10 +23,11 @@ classifiers = [
]
dynamic = ["version"]
dependencies = [
"bioio-base>=1.0.0",
"fsspec>=2022.8.0",
"ome-zarr>=0.8.0",
"xarray>=0.16.1",
"bioio-base>=1.0.0",
"fsspec>=2022.8.0",
"ome-zarr>=0.9.0",
"xarray>=0.16.1",
"zarr>=2.18.2",
]

[project.urls]
Expand Down

0 comments on commit b5f7f74

Please sign in to comment.