Skip to content

Commit

Permalink
Merge pull request #895 from dandi/asset-info
Browse files Browse the repository at this point in the history
Make use of `/assets/{asset_id}/info/` endpoint
  • Loading branch information
yarikoptic authored Feb 2, 2022
2 parents 1db2f69 + bc29b0c commit 11a95f1
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 91 deletions.
188 changes: 111 additions & 77 deletions dandi/dandiapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
DRAFT,
MAX_CHUNK_SIZE,
RETRY_STATUSES,
ZARR_MIME_TYPE,
DandiInstance,
EmbargoStatus,
known_instances,
Expand Down Expand Up @@ -500,7 +499,9 @@ def get_asset(self, asset_id: str) -> "BaseRemoteAsset":
method must be used instead.
"""
try:
return BaseRemoteAsset.from_metadata(self, self.get(f"/assets/{asset_id}"))
return BaseRemoteAsset.from_base_data(
self, self.get(f"/assets/{asset_id}/info/")
)
except requests.HTTPError as e:
if e.response.status_code == 404:
raise NotFoundError(f"No such asset: {asset_id!r}")
Expand Down Expand Up @@ -1084,17 +1085,21 @@ def iter_upload_raw_asset(
)


class BaseRemoteAsset(APIBase):
class BaseRemoteAsset(ABC, APIBase):
"""
Representation of an asset retrieved from the API without associated
Dandiset information.
This is an abstract class; its concrete subclasses are
`BaseRemoteBlobAsset` (for assets backed by blobs) and
`BaseRemoteZarrAsset` (for assets backed by Zarrs).
Stringifying a `BaseRemoteAsset` returns a string of the form
:samp:`"{server_id}:asset/{asset_id}"`.
This class should not be instantiated by end-users directly. Instead,
instances should be retrieved from the appropriate attributes & methods of
`DandiAPIClient` and `RemoteDandiset`.
instances should be retrieved from the appropriate methods of
`DandiAPIClient`.
"""

#: The `DandiAPIClient` instance that returned this `BaseRemoteAsset`
Expand All @@ -1106,6 +1111,10 @@ class BaseRemoteAsset(APIBase):
path: str
#: The size of the asset in bytes
size: int
#: The date at which the asset was created
created: datetime
#: The date at which the asset was last modified
modified: datetime
#: Metadata supplied at initialization; returned when metadata is requested
#: instead of performing an API call
_metadata: Optional[Dict[str, Any]] = PrivateAttr(default_factory=None)
Expand All @@ -1120,23 +1129,32 @@ def __str__(self) -> str:
return f"{self.client._instance_id}:assets/{self.identifier}"

@classmethod
def from_metadata(
self, client: "DandiAPIClient", metadata: Dict[str, Any]
def from_base_data(
self,
client: "DandiAPIClient",
data: Dict[str, Any],
metadata: Optional[Dict[str, Any]] = None,
) -> "BaseRemoteAsset":
"""
Construct a `BaseRemoteAsset` instance from a `DandiAPIClient` and a
`dict` of raw asset metadata.
Construct a `BaseRemoteAsset` instance from a `DandiAPIClient`, a
`dict` of raw data in the same format as returned by the API's
pagination endpoints, and optional raw asset metadata.
This is a low-level method that non-developers would normally only use
when acquiring data using means outside of this library.
"""
return BaseRemoteAsset( # type: ignore[call-arg]
client=client,
identifier=metadata["identifier"],
path=metadata["path"],
size=metadata["contentSize"],
_metadata=metadata,
)
klass: Type[BaseRemoteAsset]
if data.get("blob") is not None:
klass = BaseRemoteBlobAsset
if data.pop("zarr", None) is not None:
raise ValueError("Asset data contains both `blob` and `zarr`'")
elif data.get("zarr") is not None:
klass = BaseRemoteZarrAsset
if data.pop("blob", None) is not None:
raise ValueError("Asset data contains both `blob` and `zarr`'")
else:
raise ValueError("Asset data contains neither `blob` nor `zarr`")
return klass(client=client, **data, _metadata=metadata) # type: ignore[call-arg]

@property
def api_path(self) -> str:
Expand Down Expand Up @@ -1313,16 +1331,14 @@ def download(
fp.write(chunk)

@property
@abstractmethod
def asset_type(self) -> AssetType:
"""
.. versionadded:: 0.36.0
The type of the asset's underlying data
"""
if self.get_raw_metadata().get("encodingFormat") == ZARR_MIME_TYPE:
return AssetType.ZARR
else:
return AssetType.BLOB
...

@property
def digest_type(self) -> models.DigestType:
Expand All @@ -1339,11 +1355,81 @@ def digest_type(self) -> models.DigestType:
return models.DigestType.dandi_etag


class RemoteAsset(ABC, BaseRemoteAsset):
class BaseRemoteBlobAsset(BaseRemoteAsset):
"""
.. versionadded:: 0.36.0
A `BaseRemoteAsset` whose actual data is a blob resource
"""

#: The ID of the underlying blob resource
blob: str

@property
def asset_type(self) -> AssetType:
"""
.. versionadded:: 0.36.0
The type of the asset's underlying data
"""
return AssetType.BLOB


class BaseRemoteZarrAsset(BaseRemoteAsset):
"""
.. versionadded:: 0.36.0
A `BaseRemoteAsset` whose actual data is a Zarr resource
"""

#: The ID of the underlying Zarr resource
zarr: str

@property
def asset_type(self) -> AssetType:
"""
.. versionadded:: 0.36.0
The type of the asset's underlying data
"""
return AssetType.ZARR

@property
def filetree(self) -> "RemoteZarrEntry":
"""
The `RemoteZarrEntry` for the root of the hierarchy of files within the
Zarr
"""
return RemoteZarrEntry(
client=self.client, zarr_id=self.zarr, parts=(), _known_dir=True
)

def iterfiles(self, include_dirs: bool = False) -> Iterator["RemoteZarrEntry"]:
"""
Returns a generator of all `RemoteZarrEntry`\\s within the Zarr. By
default, only instances for files are produced, unless ``include_dirs``
is true.
"""
dirs = deque([self.filetree])
while dirs:
for p in dirs.popleft().iterdir():
if p.is_dir():
dirs.append(p)
if include_dirs:
yield p
else:
yield p


class RemoteAsset(BaseRemoteAsset):
"""
Subclass of `BaseRemoteAsset` that includes information about the Dandiset
to which the asset belongs.
This is an abstract class; its concrete subclasses are `RemoteBlobAsset`
(for assets backed by blobs) and `RemoteZarrAsset` (for assets backed by
Zarrs).
This class should not be instantiated by end-users directly. Instead,
instances should be retrieved from the appropriate attributes & methods of
`RemoteDandiset`.
Expand All @@ -1356,10 +1442,6 @@ class RemoteAsset(ABC, BaseRemoteAsset):
#: The identifier for the version of the Dandiset to which the asset
#: belongs
version_id: str
#: The date at which the asset was created
created: datetime
#: The date at which the asset was last modified
modified: datetime

@classmethod
def from_data(
Expand Down Expand Up @@ -1439,25 +1521,13 @@ def delete(self) -> None:
self.client.delete(self.api_path)


class RemoteBlobAsset(RemoteAsset):
class RemoteBlobAsset(RemoteAsset, BaseRemoteBlobAsset):
"""
.. versionadded:: 0.36.0
A `RemoteAsset` whose actual data is a blob resource
"""

#: The ID of the underlying blob resource
blob: str

@property
def asset_type(self) -> AssetType:
"""
.. versionadded:: 0.36.0
The type of the asset's underlying data
"""
return AssetType.BLOB

def set_raw_metadata(self, metadata: Dict[str, Any]) -> None:
"""
Set the metadata for the asset on the server to the given value and
Expand All @@ -1469,29 +1539,18 @@ def set_raw_metadata(self, metadata: Dict[str, Any]) -> None:
self.identifier = data["asset_id"]
self.path = data["path"]
self.size = int(data["size"])
self.created = ensure_datetime(data["created"])
self.modified = ensure_datetime(data["modified"])
self._metadata = data["metadata"]


class RemoteZarrAsset(RemoteAsset):
class RemoteZarrAsset(RemoteAsset, BaseRemoteZarrAsset):
"""
.. versionadded:: 0.36.0
A `RemoteAsset` whose actual data is a Zarr resource
"""

#: The ID of the underlying Zarr resource
zarr: str

@property
def asset_type(self) -> AssetType:
"""
.. versionadded:: 0.36.0
The type of the asset's underlying data
"""
return AssetType.ZARR

def set_raw_metadata(self, metadata: Dict[str, Any]) -> None:
"""
Set the metadata for the asset on the server to the given value and
Expand All @@ -1503,35 +1562,10 @@ def set_raw_metadata(self, metadata: Dict[str, Any]) -> None:
self.identifier = data["asset_id"]
self.path = data["path"]
self.size = int(data["size"])
self.created = ensure_datetime(data["created"])
self.modified = ensure_datetime(data["modified"])
self._metadata = data["metadata"]

@property
def filetree(self) -> "RemoteZarrEntry":
"""
The `RemoteZarrEntry` for the root of the hierarchy of files within the
Zarr
"""
return RemoteZarrEntry(
client=self.client, zarr_id=self.zarr, parts=(), _known_dir=True
)

def iterfiles(self, include_dirs: bool = False) -> Iterator["RemoteZarrEntry"]:
"""
Returns a generator of all `RemoteZarrEntry`\\s within the Zarr. By
default, only instances for files are produced, unless ``include_dirs``
is true.
"""
dirs = deque([self.filetree])
while dirs:
for p in dirs.popleft().iterdir():
if p.is_dir():
dirs.append(p)
if include_dirs:
yield p
else:
yield p


class ZarrListing(BaseModel):
"""
Expand Down
22 changes: 8 additions & 14 deletions dandi/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@

from . import get_logger
from .consts import RETRY_STATUSES, dandiset_metadata_file
from .dandiapi import AssetType, RemoteAsset, RemoteDandiset, RemoteZarrAsset
from .dandiapi import AssetType, BaseRemoteZarrAsset, RemoteDandiset
from .dandiarchive import (
DandisetURL,
MultiAssetURL,
Expand Down Expand Up @@ -286,10 +286,7 @@ def download_generator(
"Asset %s is missing blobDateModified metadata field",
asset.path,
)
if isinstance(asset, RemoteAsset):
# TODO: Remove ^^ guard ^^ once dandi-archive#681 is
# supported
mtime = asset.modified
mtime = asset.modified
_download_generator = _download_file(
asset.get_download_file_iter(),
download_path,
Expand All @@ -304,14 +301,9 @@ def download_generator(
)

else:
assert (
asset.asset_type is AssetType.ZARR
assert isinstance(
asset, BaseRemoteZarrAsset
), f"Asset {asset.path} is neither blob nor Zarr"
if not isinstance(asset, RemoteZarrAsset):
raise NotImplementedError(
"Downloading a Zarr asset identified by a URL without"
" Dandiset details is not yet implemented"
)
_download_generator = _download_zarr(
asset,
download_path,
Expand Down Expand Up @@ -805,12 +797,14 @@ def __exit__(

def append(self, blob: bytes) -> None:
if self.fp is None:
raise ValueError("DownloadDirectory.append() called outside of context manager")
raise ValueError(
"DownloadDirectory.append() called outside of context manager"
)
self.fp.write(blob)


def _download_zarr(
asset: RemoteZarrAsset,
asset: BaseRemoteZarrAsset,
download_path: str,
toplevel_path: Union[str, Path],
existing: str,
Expand Down
9 changes: 9 additions & 0 deletions dandi/tests/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,15 @@ def test_download_nonzarr_to_zarr_path(
assert (dd / "sample.zarr").read_text() == "This is not a Zarr.\n"


def test_download_zarr_asset_id_only(
zarr_dandiset: SampleDandiset, tmp_path: Path
) -> None:
asset = zarr_dandiset.dandiset.get_asset_by_path("sample.zarr")
download(asset.base_download_url, tmp_path)
assert list(tmp_path.iterdir()) == [tmp_path / "sample.zarr"]
assert_dirtrees_eq(zarr_dandiset.dspath / "sample.zarr", tmp_path / "sample.zarr")


@pytest.mark.parametrize(
"file_qty,inputs,expected",
[
Expand Down
6 changes: 6 additions & 0 deletions docs/source/modref/dandiapi.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ Assets
:inherited-members: BaseModel
:exclude-members: Config, JSON_EXCLUDE

.. autoclass:: BaseRemoteBlobAsset()
:show-inheritance:

.. autoclass:: AssetType

.. autoclass:: RemoteAsset()
Expand All @@ -62,6 +65,9 @@ Assets
Zarr Assets
^^^^^^^^^^^

.. autoclass:: BaseRemoteZarrAsset()
:show-inheritance:

.. autoclass:: RemoteZarrAsset()
:show-inheritance:

Expand Down

0 comments on commit 11a95f1

Please sign in to comment.