From a929f9a15d3995bab54a09fb5c3a9b21f62054e4 Mon Sep 17 00:00:00 2001 From: "zhen.chen" Date: Thu, 4 Nov 2021 16:39:41 +0800 Subject: [PATCH] feat(utility): add "Url" class for get and update url --- tensorbay/client/cloud_storage.py | 10 ++- tensorbay/client/segment.py | 47 ++++--------- tensorbay/dataset/data.py | 32 ++++----- tensorbay/dataset/frame.py | 8 +-- tensorbay/dataset/tests/test_data.py | 14 ++-- tensorbay/dataset/tests/test_frame.py | 4 +- tensorbay/label/label_mask.py | 10 ++- tensorbay/utility/__init__.py | 3 +- tensorbay/utility/file.py | 95 ++++++++++++++++----------- 9 files changed, 110 insertions(+), 113 deletions(-) diff --git a/tensorbay/client/cloud_storage.py b/tensorbay/client/cloud_storage.py index aaefe38bc..b6c850302 100644 --- a/tensorbay/client/cloud_storage.py +++ b/tensorbay/client/cloud_storage.py @@ -13,6 +13,7 @@ from tensorbay.client.requests import Client from tensorbay.dataset import AuthData +from tensorbay.utility import URL class CloudClient: @@ -61,5 +62,12 @@ def list_auth_data(self, path: str = "") -> List[AuthData]: """ return [ - AuthData(cloud_path, _url_getter=self._get_url) for cloud_path in self._list_files(path) + AuthData( + cloud_path, + url=URL.from_getter( + lambda c=cloud_path: self._get_url(c), + lambda c=cloud_path: self._get_url(c), # type: ignore[misc] + ), + ) + for cloud_path in self._list_files(path) ] diff --git a/tensorbay/client/segment.py b/tensorbay/client/segment.py index 0da00ab3d..626c92943 100644 --- a/tensorbay/client/segment.py +++ b/tensorbay/client/segment.py @@ -27,7 +27,7 @@ import time from copy import deepcopy from itertools import zip_longest -from typing import TYPE_CHECKING, Any, Callable, Dict, Generator, Iterable, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Dict, Generator, Iterable, Optional, Tuple, Union import filetype from requests_toolbelt import MultipartEncoder @@ -40,7 +40,7 @@ from tensorbay.exception import FrameError, InvalidParamsError, ResponseError from tensorbay.label import Label from tensorbay.sensor.sensor import Sensor, Sensors -from tensorbay.utility import FileMixin, chunked, locked +from tensorbay.utility import URL, FileMixin, chunked, locked if TYPE_CHECKING: from tensorbay.client.dataset import DatasetClient, FusionDatasetClient @@ -49,18 +49,6 @@ _MASK_KEYS = ("semantic_mask", "instance_mask", "panoptic_mask") -class _UrlGetters: - def __init__(self, urls: LazyPage[str]) -> None: - self._urls = urls - - def __getitem__(self, index: int) -> Callable[[str], str]: - return lambda _: self._urls.items[index].get() - - def update(self) -> None: - """Update all urls.""" - self._urls.pull() - - class SegmentClientBase: """This class defines the basic concept of :class:`SegmentClient`. @@ -364,41 +352,34 @@ def _generate_data_paths(self, offset: int = 0, limit: int = 128) -> Generator[s def _generate_data(self, offset: int = 0, limit: int = 128) -> Generator[RemoteData, None, int]: response = self._list_data_details(offset, limit) - urls = _UrlGetters( - LazyPage.from_items( - offset, - limit, - self._generate_urls, - (item["url"] for item in response["dataDetails"]), - ), + urls = LazyPage.from_items( + offset, + limit, + self._generate_urls, + (item["url"] for item in response["dataDetails"]), ) mask_urls = {} for key in _MASK_KEYS: - mask_urls[key] = _UrlGetters( - LazyPage( - offset, - limit, - lambda offset, limit, k=key: self._generate_mask_urls( # type: ignore[misc] - k.upper(), offset, limit - ), + mask_urls[key] = LazyPage( + offset, + limit, + lambda offset, limit, k=key: self._generate_mask_urls( # type: ignore[misc] + k.upper(), offset, limit ), ) for i, item in enumerate(response["dataDetails"]): data = RemoteData.from_response_body( item, - _url_getter=urls[i], - _url_updater=urls.update, + url=URL.from_getter(urls.items[i].get, urls.pull), cache_path=self._cache_path, ) label = data.label for key in _MASK_KEYS: mask = getattr(label, key, None) if mask: - # pylint: disable=protected-access - mask._url_getter = mask_urls[key][i] - mask._url_updater = mask_urls[key].update + mask.url = URL.from_getter(mask_urls[key].items[i].get, mask_urls[key].pull) mask.cache_path = os.path.join(self._cache_path, key, mask.path) yield data diff --git a/tensorbay/dataset/data.py b/tensorbay/dataset/data.py index 775a87826..638adb978 100644 --- a/tensorbay/dataset/data.py +++ b/tensorbay/dataset/data.py @@ -11,10 +11,10 @@ """ import os -from typing import Any, Callable, Dict, Optional, Type, TypeVar, Union +from typing import Any, Dict, Optional, Type, TypeVar, Union from tensorbay.label import Label -from tensorbay.utility import FileMixin, RemoteFileMixin, ReprMixin +from tensorbay.utility import URL, FileMixin, RemoteFileMixin, ReprMixin class DataBase(ReprMixin): @@ -138,7 +138,7 @@ class RemoteData(DataBase, RemoteFileMixin): Arguments: remote_path: The file remote path. timestamp: The timestamp for the file. - _url_getter: The url getter of the remote file. + url: The Url instance used to get and update url. cache_path: The path to store the cache. Attributes: @@ -156,16 +156,14 @@ def __init__( remote_path: str, *, timestamp: Optional[float] = None, - _url_getter: Optional[Callable[[str], str]] = None, - _url_updater: Optional[Callable[[], None]] = None, + url: Optional[URL] = None, cache_path: str = "", ) -> None: DataBase.__init__(self, timestamp) RemoteFileMixin.__init__( self, remote_path, - _url_getter=_url_getter, - _url_updater=_url_updater, + url=url, cache_path=cache_path, ) @@ -174,10 +172,9 @@ def from_response_body( cls: Type[_T], body: Dict[str, Any], *, - _url_getter: Optional[Callable[[str], str]], - _url_updater: Optional[Callable[[], None]] = None, - cache_path: str = "", # noqa: DAR101 - ) -> _T: + url: Optional[URL] = None, + cache_path: str = "", + ) -> _T: # noqa: DAR101 """Loads a :class:`RemoteData` object from a response body. Arguments: @@ -198,9 +195,7 @@ def from_response_body( "SENTENCE": {...} } } - - _url_getter: The url getter of the remote file. - _url_updater: The url updater of the remote file. + url: The Url instance used to get and update url. cache_path: The path to store the cache. Returns: @@ -210,8 +205,7 @@ def from_response_body( data = cls( body["remotePath"], timestamp=body.get("timestamp"), - _url_getter=_url_getter, - _url_updater=_url_updater, + url=url, cache_path=cache_path, ) data.label._loads(body["label"]) # pylint: disable=protected-access @@ -230,7 +224,7 @@ class AuthData(DataBase, RemoteFileMixin): cloud_path: The cloud file path. target_remote_path: The file remote path after uploading to tensorbay. timestamp: The timestamp for the file. - _url_getter: The url getter of the remote file. + url: The Url instance used to get and update url. Attributes: path: The cloud file path. @@ -246,10 +240,10 @@ def __init__( *, target_remote_path: Optional[str] = None, timestamp: Optional[float] = None, - _url_getter: Optional[Callable[[str], str]] = None, + url: Optional[URL] = None, ) -> None: DataBase.__init__(self, timestamp) - RemoteFileMixin.__init__(self, cloud_path, _url_getter=_url_getter) + RemoteFileMixin.__init__(self, cloud_path, url=url) self._target_remote_path = target_remote_path @property diff --git a/tensorbay/dataset/frame.py b/tensorbay/dataset/frame.py index 0b2bee578..3a5b7a359 100644 --- a/tensorbay/dataset/frame.py +++ b/tensorbay/dataset/frame.py @@ -21,7 +21,7 @@ from tensorbay.client.lazy import LazyPage from tensorbay.dataset.data import DataBase, RemoteData -from tensorbay.utility import UserMutableMapping +from tensorbay.utility import URL, UserMutableMapping logger = logging.getLogger(__name__) @@ -114,12 +114,10 @@ def from_response_body( frame = cls(frame_id) for data_contents in body["frame"]: sensor_name = data_contents["sensorName"] + url = URL.from_getter(lambda s=sensor_name: urls.items[url_index].get()[s], urls.pull) frame[sensor_name] = RemoteData.from_response_body( data_contents, - _url_getter=lambda _, s=sensor_name: urls.items[ # type: ignore[misc] - url_index - ].get()[s], - _url_updater=urls.pull, + url=url, cache_path=cache_path, ) return frame diff --git a/tensorbay/dataset/tests/test_data.py b/tensorbay/dataset/tests/test_data.py index 0a4c8f398..0e8598616 100644 --- a/tensorbay/dataset/tests/test_data.py +++ b/tensorbay/dataset/tests/test_data.py @@ -9,6 +9,7 @@ import pytest from tensorbay.dataset.data import Data, RemoteData +from tensorbay.utility import URL _REMOTE_DATA = { "remotePath": "test.json", @@ -16,6 +17,7 @@ "label": {}, "url": "url", } +url = URL("url", lambda: "url") class TestData: @@ -62,21 +64,19 @@ class TestRemoteData: def test_init(self): remote_path = "A/test.json" timestamp = 1614667532 - remote_data = RemoteData(remote_path, timestamp=timestamp, _url_getter=lambda x: x) + remote_data = RemoteData(remote_path, timestamp=timestamp, url=url) assert remote_data.path == remote_path assert remote_data.timestamp == timestamp - assert remote_data.get_url() == remote_path + assert remote_data.url.get() == "url" def test_get_url(self): remote_data = RemoteData("A/test.josn") with pytest.raises(ValueError): - remote_data.get_url() + remote_data.open() def test_from_response_body(self): - data = RemoteData.from_response_body( - _REMOTE_DATA, _url_getter=lambda _: "url", cache_path="cache_path" - ) + data = RemoteData.from_response_body(_REMOTE_DATA, url=url, cache_path="cache_path") assert data.path == _REMOTE_DATA["remotePath"] assert data.timestamp == _REMOTE_DATA["timestamp"] - assert data.get_url() == "url" + assert data.url.get() == "url" assert data.cache_path == os.path.join("cache_path", _REMOTE_DATA["remotePath"]) diff --git a/tensorbay/dataset/tests/test_frame.py b/tensorbay/dataset/tests/test_frame.py index e5b5a82a5..32b38aedb 100644 --- a/tensorbay/dataset/tests/test_frame.py +++ b/tensorbay/dataset/tests/test_frame.py @@ -52,9 +52,9 @@ def test_from_response_body(self): assert frame.frame_id == _FRAME_ID assert frame["sensor1"].path == "test1.png" assert frame["sensor1"].timestamp == 1614945883 - assert frame["sensor1"].get_url() == "url1" + assert frame["sensor1"].url.get() == "url1" assert frame["sensor1"].cache_path == os.path.join("cache_path", "test1.png") assert frame["sensor2"].path == "test2.png" assert frame["sensor2"].timestamp == 1614945884 - assert frame["sensor2"].get_url() == "url2" + assert frame["sensor2"].url.get() == "url2" assert frame["sensor2"].cache_path == os.path.join("cache_path", "test2.png") diff --git a/tensorbay/label/label_mask.py b/tensorbay/label/label_mask.py index c6c418eae..9a267c959 100644 --- a/tensorbay/label/label_mask.py +++ b/tensorbay/label/label_mask.py @@ -5,11 +5,11 @@ """Mask related classes.""" -from typing import Any, Callable, Dict, Optional, Type, TypeVar +from typing import Any, Dict, Optional, Type, TypeVar from tensorbay.label.basic import AttributeType, SubcatalogBase from tensorbay.label.supports import AttributesMixin, IsTrackingMixin, MaskCategoriesMixin -from tensorbay.utility import FileMixin, RemoteFileMixin, ReprMixin +from tensorbay.utility import URL, FileMixin, RemoteFileMixin, ReprMixin class SemanticMaskSubcatalog(SubcatalogBase, MaskCategoriesMixin, AttributesMixin): @@ -462,11 +462,9 @@ class RemotePanopticMask(PanopticMaskBase, RemoteFileMixin): _T = TypeVar("_T", bound="RemotePanopticMask") - def __init__( - self, remote_path: str, *, _url_getter: Optional[Callable[[str], str]] = None - ) -> None: + def __init__(self, remote_path: str, *, url: Optional[URL] = None) -> None: PanopticMaskBase.__init__(self) - RemoteFileMixin.__init__(self, remote_path, _url_getter=_url_getter) + RemoteFileMixin.__init__(self, remote_path, url=url) @classmethod def from_response_body(cls: Type[_T], body: Dict[str, Any]) -> _T: diff --git a/tensorbay/utility/__init__.py b/tensorbay/utility/__init__.py index c973f0b5f..d3ed28300 100644 --- a/tensorbay/utility/__init__.py +++ b/tensorbay/utility/__init__.py @@ -13,7 +13,7 @@ Disable, KwargsDeprecated, ) -from tensorbay.utility.file import FileMixin, RemoteFileMixin +from tensorbay.utility.file import URL, FileMixin, RemoteFileMixin from tensorbay.utility.itertools import chunked from tensorbay.utility.name import NameList, NameMixin, SortedNameList from tensorbay.utility.repr import ReprMixin, ReprType, repr_config @@ -43,6 +43,7 @@ "TypeEnum", "TypeMixin", "TypeRegister", + "URL", "UserMapping", "UserMutableMapping", "UserMutableSequence", diff --git a/tensorbay/utility/file.py b/tensorbay/utility/file.py index 1d84d7d8e..2d4b5911e 100644 --- a/tensorbay/utility/file.py +++ b/tensorbay/utility/file.py @@ -19,6 +19,52 @@ from tensorbay.utility.repr import ReprMixin +class URL(ReprMixin): + """Url is a class used to get and update the url. + + Arguments: + url: The url. + updater: A function used to update the url. + + """ + + def __init__(self, url: str, updater: Callable[[], Optional[str]]) -> None: + self._updater = updater + self._getter: Callable[..., str] = lambda: url + + @classmethod + def from_getter(cls, getter: Callable[..., str], updater: Callable[[], Optional[str]]) -> "URL": + """Create a Url instance from the given getter and updater. + + Arguments: + getter: The url getter of the file. + updater: The updater of the url. + + Returns: + The Url instance which stores the url the updater. + + """ + obj: "URL" = object.__new__(cls) + obj._getter = getter + obj._updater = updater + return obj + + def update(self) -> None: + """Update the url.""" + url = self._updater() + if url is not None: + self._getter = lambda: url # type: ignore[assignment, return-value] + + def get(self) -> str: + """Get url of the file. + + Returns: + The url. + + """ + return self._getter() + + class FileMixin(ReprMixin): """FileMixin is a mixin class to mixin file related methods for local file. @@ -90,7 +136,7 @@ class RemoteFileMixin(ReprMixin): Arguments: local_path: The file local path. - _url_getter: The url getter of the remote file. + url: The Url instance used to get and update url. cache_path: The path to store the cache. Attributes: @@ -104,60 +150,31 @@ def __init__( self, remote_path: str, *, - _url_getter: Optional[Callable[[str], str]] = None, - _url_updater: Optional[Callable[[], None]] = None, + url: Optional[URL] = None, cache_path: str = "", ) -> None: self.path = remote_path - self._url_getter = _url_getter - self._url_updater = _url_updater + self.url = url self.cache_path = os.path.join(cache_path, remote_path) if cache_path else "" def _repr_head(self) -> str: return f'{self.__class__.__name__}("{self.path}")' def _urlopen(self) -> HTTPResponse: + + if not self.url: + raise ValueError(f"The file cannot open because {self._repr_head()} has no url") + try: return urlopen( # type: ignore[no-any-return] - quote(self.get_url(), safe=printable), timeout=2 + quote(self.url.get(), safe=printable), timeout=2 ) except HTTPError as error: if error.code == 403: - self.update_url() - return urlopen(quote(self.get_url(), safe=printable)) # type: ignore[no-any-return] + self.url.update() + return urlopen(quote(self.url.get(), safe=printable)) # type: ignore[no-any-return] raise - def update_url(self) -> None: - """Update the url when the url is timed out. - - Raises: - ValueError: When the _url_updater is missing. - - """ - if not self._url_updater: - raise ValueError( - f"The file URL cannot be updated because {self._repr_head()} has no url updater" - ) - - self._url_updater() - - def get_url(self) -> str: - """Return the url of the data hosted by tensorbay. - - Returns: - The url of the data. - - Raises: - ValueError: When the _url_getter is missing. - - """ - if not self._url_getter: - raise ValueError( - f"The file URL cannot be got because {self._repr_head()} has no url getter" - ) - - return self._url_getter(self.path) - def open(self) -> Union[HTTPResponse, BufferedReader]: """Return the binary file pointer of this file.