diff --git a/src/huggingface_hub/file_download.py b/src/huggingface_hub/file_download.py index abc82e1fe7..64a83f379e 100644 --- a/src/huggingface_hub/file_download.py +++ b/src/huggingface_hub/file_download.py @@ -75,6 +75,7 @@ tqdm, validate_hf_hub_args, ) +from .utils._deprecation import _deprecate_method from .utils._headers import _http_user_agent from .utils._runtime import _PY_VERSION # noqa: F401 # for backward compatibility from .utils._typing import HTTP_METHOD_T @@ -345,6 +346,7 @@ def filename_to_url( return url, etag +@_deprecate_method(version="0.22.0", message="Use `huggingface_hub.utils.build_hf_headers` instead.") def http_user_agent( *, library_name: Optional[str] = None, @@ -1249,6 +1251,9 @@ def hf_hub_download( token=token, proxies=proxies, timeout=etag_timeout, + library_name=library_name, + library_version=library_version, + user_agent=user_agent, ) except EntryNotFoundError as http_error: # Cache the non-existence of the file and raise @@ -1595,6 +1600,9 @@ def get_hf_file_metadata( token: Union[bool, str, None] = None, proxies: Optional[Dict] = None, timeout: Optional[float] = DEFAULT_REQUEST_TIMEOUT, + library_name: Optional[str] = None, + library_version: Optional[str] = None, + user_agent: Union[Dict, str, None] = None, ) -> HfFileMetadata: """Fetch metadata of a file versioned on the Hub for a given url. @@ -1612,12 +1620,20 @@ def get_hf_file_metadata( `requests.request`. timeout (`float`, *optional*, defaults to 10): How many seconds to wait for the server to send metadata before giving up. + library_name (`str`, *optional*): + The name of the library to which the object corresponds. + library_version (`str`, *optional*): + The version of the library. + user_agent (`dict`, `str`, *optional*): + The user-agent info in the form of a dictionary or a string. Returns: A [`HfFileMetadata`] object containing metadata such as location, etag, size and commit_hash. """ - headers = build_hf_headers(token=token) + headers = build_hf_headers( + token=token, library_name=library_name, library_version=library_version, user_agent=user_agent + ) headers["Accept-Encoding"] = "identity" # prevent any compression => we want to know the real size of the file # Retrieve metadata diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index de1cd8f8fc..1d089450ec 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -92,6 +92,7 @@ ) from .constants import ( DEFAULT_ETAG_TIMEOUT, + DEFAULT_REQUEST_TIMEOUT, DEFAULT_REVISION, DISCUSSION_STATUS, DISCUSSION_TYPES, @@ -106,10 +107,7 @@ DiscussionStatusFilter, DiscussionTypeFilter, ) -from .file_download import ( - get_hf_file_metadata, - hf_hub_url, -) +from .file_download import HfFileMetadata, get_hf_file_metadata, hf_hub_url from .repocard_data import DatasetCardData, ModelCardData, SpaceCardData from .utils import ( # noqa: F401 # imported for backward compatibility BadRequestError, @@ -4614,6 +4612,48 @@ def delete_folder( parent_commit=parent_commit, ) + @validate_hf_hub_args + def get_hf_file_metadata( + self, + *, + url: str, + token: Union[bool, str, None] = None, + proxies: Optional[Dict] = None, + timeout: Optional[float] = DEFAULT_REQUEST_TIMEOUT, + ) -> HfFileMetadata: + """Fetch metadata of a file versioned on the Hub for a given url. + + Args: + url (`str`): + File url, for example returned by [`hf_hub_url`]. + token (`str` or `bool`, *optional*): + A token to be used for the download. + - If `True`, the token is read from the HuggingFace config + folder. + - If `False` or `None`, no token is provided. + - If a string, it's used as the authentication token. + proxies (`dict`, *optional*): + Dictionary mapping protocol to the URL of the proxy passed to `requests.request`. + timeout (`float`, *optional*, defaults to 10): + How many seconds to wait for the server to send metadata before giving up. + + Returns: + A [`HfFileMetadata`] object containing metadata such as location, etag, size and commit_hash. + """ + if token is None: + # Cannot do `token = token or self.token` as token can be `False`. + token = self.token + + return get_hf_file_metadata( + url=url, + token=token, + proxies=proxies, + timeout=timeout, + library_name=self.library_name, + library_version=self.library_version, + user_agent=self.user_agent, + ) + @validate_hf_hub_args def hf_hub_download( self, diff --git a/tests/test_file_download.py b/tests/test_file_download.py index a52dc3802a..98ec2d3255 100644 --- a/tests/test_file_download.py +++ b/tests/test_file_download.py @@ -41,6 +41,7 @@ _create_symlink, _get_pointer_path, _normalize_etag, + _request_wrapper, _to_local_dir, cached_download, filename_to_url, @@ -388,6 +389,50 @@ def test_hf_hub_download_offline_no_refs(self): cache_dir=cache_dir, ) + def test_hf_hub_download_with_user_agent(self): + """ + Check that user agent is correctly sent to the HEAD call when downloading a file. + + Regression test for #1854. + See https://github.com/huggingface/huggingface_hub/pull/1854. + """ + + def _check_user_agent(headers: dict): + assert "user-agent" in headers + assert "test/1.0.0" in headers["user-agent"] + assert "foo/bar" in headers["user-agent"] + + with SoftTemporaryDirectory() as cache_dir: + with patch("huggingface_hub.file_download._request_wrapper", wraps=_request_wrapper) as mock_request: + # First download + hf_hub_download( + DUMMY_MODEL_ID, + filename=CONFIG_NAME, + cache_dir=cache_dir, + library_name="test", + library_version="1.0.0", + user_agent="foo/bar", + ) + calls = mock_request.call_args_list + assert len(calls) == 3 # HEAD, HEAD, GET + for call in calls: + _check_user_agent(call.kwargs["headers"]) + + with patch("huggingface_hub.file_download._request_wrapper", wraps=_request_wrapper) as mock_request: + # Second download: no GET call + hf_hub_download( + DUMMY_MODEL_ID, + filename=CONFIG_NAME, + cache_dir=cache_dir, + library_name="test", + library_version="1.0.0", + user_agent="foo/bar", + ) + calls = mock_request.call_args_list + assert len(calls) == 2 # HEAD, HEAD + for call in calls: + _check_user_agent(call.kwargs["headers"]) + def test_hf_hub_url_with_empty_subfolder(self): """ Check subfolder arg is processed correctly when empty string is passed to