From bc29b0c1a92a5a7575edcae9cdeb0a0e1f3c279e Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Mon, 31 Jan 2022 16:57:52 -0500 Subject: [PATCH] Make use of `/assets/{asset_id}/info/` endpoint --- dandi/dandiapi.py | 188 +++++++++++++++++++------------- dandi/download.py | 22 ++-- dandi/tests/test_download.py | 9 ++ docs/source/modref/dandiapi.rst | 6 + 4 files changed, 134 insertions(+), 91 deletions(-) diff --git a/dandi/dandiapi.py b/dandi/dandiapi.py index 8407b1f93..13f112cea 100644 --- a/dandi/dandiapi.py +++ b/dandi/dandiapi.py @@ -38,7 +38,6 @@ DRAFT, MAX_CHUNK_SIZE, RETRY_STATUSES, - ZARR_MIME_TYPE, DandiInstance, EmbargoStatus, known_instances, @@ -500,7 +499,9 @@ def get_asset(self, asset_id: str) -> "BaseRemoteAsset": method must be used instead. """ try: - return BaseRemoteAsset.from_metadata(self, self.get(f"/assets/{asset_id}")) + return BaseRemoteAsset.from_base_data( + self, self.get(f"/assets/{asset_id}/info/") + ) except requests.HTTPError as e: if e.response.status_code == 404: raise NotFoundError(f"No such asset: {asset_id!r}") @@ -1084,17 +1085,21 @@ def iter_upload_raw_asset( ) -class BaseRemoteAsset(APIBase): +class BaseRemoteAsset(ABC, APIBase): """ Representation of an asset retrieved from the API without associated Dandiset information. + This is an abstract class; its concrete subclasses are + `BaseRemoteBlobAsset` (for assets backed by blobs) and + `BaseRemoteZarrAsset` (for assets backed by Zarrs). + Stringifying a `BaseRemoteAsset` returns a string of the form :samp:`"{server_id}:asset/{asset_id}"`. This class should not be instantiated by end-users directly. Instead, - instances should be retrieved from the appropriate attributes & methods of - `DandiAPIClient` and `RemoteDandiset`. + instances should be retrieved from the appropriate methods of + `DandiAPIClient`. """ #: The `DandiAPIClient` instance that returned this `BaseRemoteAsset` @@ -1106,6 +1111,10 @@ class BaseRemoteAsset(APIBase): path: str #: The size of the asset in bytes size: int + #: The date at which the asset was created + created: datetime + #: The date at which the asset was last modified + modified: datetime #: Metadata supplied at initialization; returned when metadata is requested #: instead of performing an API call _metadata: Optional[Dict[str, Any]] = PrivateAttr(default_factory=None) @@ -1120,23 +1129,32 @@ def __str__(self) -> str: return f"{self.client._instance_id}:assets/{self.identifier}" @classmethod - def from_metadata( - self, client: "DandiAPIClient", metadata: Dict[str, Any] + def from_base_data( + self, + client: "DandiAPIClient", + data: Dict[str, Any], + metadata: Optional[Dict[str, Any]] = None, ) -> "BaseRemoteAsset": """ - Construct a `BaseRemoteAsset` instance from a `DandiAPIClient` and a - `dict` of raw asset metadata. + Construct a `BaseRemoteAsset` instance from a `DandiAPIClient`, a + `dict` of raw data in the same format as returned by the API's + pagination endpoints, and optional raw asset metadata. This is a low-level method that non-developers would normally only use when acquiring data using means outside of this library. """ - return BaseRemoteAsset( # type: ignore[call-arg] - client=client, - identifier=metadata["identifier"], - path=metadata["path"], - size=metadata["contentSize"], - _metadata=metadata, - ) + klass: Type[BaseRemoteAsset] + if data.get("blob") is not None: + klass = BaseRemoteBlobAsset + if data.pop("zarr", None) is not None: + raise ValueError("Asset data contains both `blob` and `zarr`'") + elif data.get("zarr") is not None: + klass = BaseRemoteZarrAsset + if data.pop("blob", None) is not None: + raise ValueError("Asset data contains both `blob` and `zarr`'") + else: + raise ValueError("Asset data contains neither `blob` nor `zarr`") + return klass(client=client, **data, _metadata=metadata) # type: ignore[call-arg] @property def api_path(self) -> str: @@ -1314,16 +1332,14 @@ def download( fp.write(chunk) @property + @abstractmethod def asset_type(self) -> AssetType: """ .. versionadded:: 0.36.0 The type of the asset's underlying data """ - if self.get_raw_metadata().get("encodingFormat") == ZARR_MIME_TYPE: - return AssetType.ZARR - else: - return AssetType.BLOB + ... @property def digest_type(self) -> models.DigestType: @@ -1340,11 +1356,81 @@ def digest_type(self) -> models.DigestType: return models.DigestType.dandi_etag -class RemoteAsset(ABC, BaseRemoteAsset): +class BaseRemoteBlobAsset(BaseRemoteAsset): + """ + .. versionadded:: 0.36.0 + + A `BaseRemoteAsset` whose actual data is a blob resource + """ + + #: The ID of the underlying blob resource + blob: str + + @property + def asset_type(self) -> AssetType: + """ + .. versionadded:: 0.36.0 + + The type of the asset's underlying data + """ + return AssetType.BLOB + + +class BaseRemoteZarrAsset(BaseRemoteAsset): + """ + .. versionadded:: 0.36.0 + + A `BaseRemoteAsset` whose actual data is a Zarr resource + """ + + #: The ID of the underlying Zarr resource + zarr: str + + @property + def asset_type(self) -> AssetType: + """ + .. versionadded:: 0.36.0 + + The type of the asset's underlying data + """ + return AssetType.ZARR + + @property + def filetree(self) -> "RemoteZarrEntry": + """ + The `RemoteZarrEntry` for the root of the hierarchy of files within the + Zarr + """ + return RemoteZarrEntry( + client=self.client, zarr_id=self.zarr, parts=(), _known_dir=True + ) + + def iterfiles(self, include_dirs: bool = False) -> Iterator["RemoteZarrEntry"]: + """ + Returns a generator of all `RemoteZarrEntry`\\s within the Zarr. By + default, only instances for files are produced, unless ``include_dirs`` + is true. + """ + dirs = deque([self.filetree]) + while dirs: + for p in dirs.popleft().iterdir(): + if p.is_dir(): + dirs.append(p) + if include_dirs: + yield p + else: + yield p + + +class RemoteAsset(BaseRemoteAsset): """ Subclass of `BaseRemoteAsset` that includes information about the Dandiset to which the asset belongs. + This is an abstract class; its concrete subclasses are `RemoteBlobAsset` + (for assets backed by blobs) and `RemoteZarrAsset` (for assets backed by + Zarrs). + This class should not be instantiated by end-users directly. Instead, instances should be retrieved from the appropriate attributes & methods of `RemoteDandiset`. @@ -1357,10 +1443,6 @@ class RemoteAsset(ABC, BaseRemoteAsset): #: The identifier for the version of the Dandiset to which the asset #: belongs version_id: str - #: The date at which the asset was created - created: datetime - #: The date at which the asset was last modified - modified: datetime @classmethod def from_data( @@ -1440,25 +1522,13 @@ def delete(self) -> None: self.client.delete(self.api_path) -class RemoteBlobAsset(RemoteAsset): +class RemoteBlobAsset(RemoteAsset, BaseRemoteBlobAsset): """ .. versionadded:: 0.36.0 A `RemoteAsset` whose actual data is a blob resource """ - #: The ID of the underlying blob resource - blob: str - - @property - def asset_type(self) -> AssetType: - """ - .. versionadded:: 0.36.0 - - The type of the asset's underlying data - """ - return AssetType.BLOB - def set_raw_metadata(self, metadata: Dict[str, Any]) -> None: """ Set the metadata for the asset on the server to the given value and @@ -1470,29 +1540,18 @@ def set_raw_metadata(self, metadata: Dict[str, Any]) -> None: self.identifier = data["asset_id"] self.path = data["path"] self.size = int(data["size"]) + self.created = ensure_datetime(data["created"]) self.modified = ensure_datetime(data["modified"]) self._metadata = data["metadata"] -class RemoteZarrAsset(RemoteAsset): +class RemoteZarrAsset(RemoteAsset, BaseRemoteZarrAsset): """ .. versionadded:: 0.36.0 A `RemoteAsset` whose actual data is a Zarr resource """ - #: The ID of the underlying Zarr resource - zarr: str - - @property - def asset_type(self) -> AssetType: - """ - .. versionadded:: 0.36.0 - - The type of the asset's underlying data - """ - return AssetType.ZARR - def set_raw_metadata(self, metadata: Dict[str, Any]) -> None: """ Set the metadata for the asset on the server to the given value and @@ -1504,35 +1563,10 @@ def set_raw_metadata(self, metadata: Dict[str, Any]) -> None: self.identifier = data["asset_id"] self.path = data["path"] self.size = int(data["size"]) + self.created = ensure_datetime(data["created"]) self.modified = ensure_datetime(data["modified"]) self._metadata = data["metadata"] - @property - def filetree(self) -> "RemoteZarrEntry": - """ - The `RemoteZarrEntry` for the root of the hierarchy of files within the - Zarr - """ - return RemoteZarrEntry( - client=self.client, zarr_id=self.zarr, parts=(), _known_dir=True - ) - - def iterfiles(self, include_dirs: bool = False) -> Iterator["RemoteZarrEntry"]: - """ - Returns a generator of all `RemoteZarrEntry`\\s within the Zarr. By - default, only instances for files are produced, unless ``include_dirs`` - is true. - """ - dirs = deque([self.filetree]) - while dirs: - for p in dirs.popleft().iterdir(): - if p.is_dir(): - dirs.append(p) - if include_dirs: - yield p - else: - yield p - class ZarrListing(BaseModel): """ diff --git a/dandi/download.py b/dandi/download.py index 0c5753d23..64091ca49 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -39,7 +39,7 @@ from . import get_logger from .consts import RETRY_STATUSES, dandiset_metadata_file -from .dandiapi import AssetType, RemoteAsset, RemoteDandiset, RemoteZarrAsset +from .dandiapi import AssetType, BaseRemoteZarrAsset, RemoteDandiset from .dandiarchive import ( DandisetURL, MultiAssetURL, @@ -286,10 +286,7 @@ def download_generator( "Asset %s is missing blobDateModified metadata field", asset.path, ) - if isinstance(asset, RemoteAsset): - # TODO: Remove ^^ guard ^^ once dandi-archive#681 is - # supported - mtime = asset.modified + mtime = asset.modified _download_generator = _download_file( asset.get_download_file_iter(), download_path, @@ -304,14 +301,9 @@ def download_generator( ) else: - assert ( - asset.asset_type is AssetType.ZARR + assert isinstance( + asset, BaseRemoteZarrAsset ), f"Asset {asset.path} is neither blob nor Zarr" - if not isinstance(asset, RemoteZarrAsset): - raise NotImplementedError( - "Downloading a Zarr asset identified by a URL without" - " Dandiset details is not yet implemented" - ) _download_generator = _download_zarr( asset, download_path, @@ -805,12 +797,14 @@ def __exit__( def append(self, blob: bytes) -> None: if self.fp is None: - raise ValueError("DownloadDirectory.append() called outside of context manager") + raise ValueError( + "DownloadDirectory.append() called outside of context manager" + ) self.fp.write(blob) def _download_zarr( - asset: RemoteZarrAsset, + asset: BaseRemoteZarrAsset, download_path: str, toplevel_path: Union[str, Path], existing: str, diff --git a/dandi/tests/test_download.py b/dandi/tests/test_download.py index 8b031a102..b140385f4 100644 --- a/dandi/tests/test_download.py +++ b/dandi/tests/test_download.py @@ -371,6 +371,15 @@ def test_download_nonzarr_to_zarr_path( assert (dd / "sample.zarr").read_text() == "This is not a Zarr.\n" +def test_download_zarr_asset_id_only( + zarr_dandiset: SampleDandiset, tmp_path: Path +) -> None: + asset = zarr_dandiset.dandiset.get_asset_by_path("sample.zarr") + download(asset.base_download_url, tmp_path) + assert list(tmp_path.iterdir()) == [tmp_path / "sample.zarr"] + assert_dirtrees_eq(zarr_dandiset.dspath / "sample.zarr", tmp_path / "sample.zarr") + + @pytest.mark.parametrize( "file_qty,inputs,expected", [ diff --git a/docs/source/modref/dandiapi.rst b/docs/source/modref/dandiapi.rst index bb80a20b6..cff441d54 100644 --- a/docs/source/modref/dandiapi.rst +++ b/docs/source/modref/dandiapi.rst @@ -50,6 +50,9 @@ Assets :inherited-members: BaseModel :exclude-members: Config, JSON_EXCLUDE +.. autoclass:: BaseRemoteBlobAsset() + :show-inheritance: + .. autoclass:: AssetType .. autoclass:: RemoteAsset() @@ -62,6 +65,9 @@ Assets Zarr Assets ^^^^^^^^^^^ +.. autoclass:: BaseRemoteZarrAsset() + :show-inheritance: + .. autoclass:: RemoteZarrAsset() :show-inheritance: