huggingface · Wauplin · Sep 8, 2022 · Sep 7, 2022 · Sep 7, 2022 · Sep 7, 2022
diff --git a/src/huggingface_hub/file_download.py b/src/huggingface_hub/file_download.py
@@ -6,7 +6,6 @@
 import re
 import sys
 import tempfile
-import time
 import warnings
 from contextlib import contextmanager
 from functools import partial
@@ -20,6 +19,7 @@
 import requests
 from filelock import FileLock
 from huggingface_hub import constants
+from requests.exceptions import ConnectTimeout, ProxyError
 
 from . import __version__
 from .constants import (
@@ -37,6 +37,7 @@
     EntryNotFoundError,
     LocalEntryNotFoundError,
     hf_raise_for_status,
+    http_backoff,
     logging,
     tqdm,
     validate_hf_hub_args,
@@ -478,30 +479,17 @@ def _request_wrapper(
         return response
 
     # 3. Exponential backoff
-    tries, success = 0, False
-    while not success:
-        tries += 1
-        try:
-            response = requests.request(
-                method=method.upper(), url=url, timeout=timeout, **params
-            )
-            success = True
-        except (
-            requests.exceptions.ConnectTimeout,
-            requests.exceptions.ProxyError,
-        ) as err:
-            if tries > max_retries:
-                raise err
-            else:
-                logger.info(
-                    f"{method} request to {url} timed out, retrying..."
-                    f" [{tries/max_retries}]"
-                )
-                sleep_time = min(
-                    max_wait_time, base_wait_time * 2 ** (tries - 1)
-                )  # Exponential backoff
-                time.sleep(sleep_time)
-    return response
+    return http_backoff(
+        method=method,
+        url=url,
+        max_retries=max_retries,
+        base_wait_time=base_wait_time,
+        max_wait_time=max_wait_time,
+        retry_on_exceptions=(ConnectTimeout, ProxyError),
+        retry_on_status_codes=(),
+        timeout=timeout,
+        **params,
+    )
 
 
 def _request_with_retry(*args, **kwargs) -> requests.Response:

diff --git a/src/huggingface_hub/lfs.py b/src/huggingface_hub/lfs.py
@@ -28,7 +28,7 @@
 from huggingface_hub.constants import ENDPOINT, REPO_TYPES_URL_PREFIXES
 from requests.auth import HTTPBasicAuth
 
-from .utils import hf_raise_for_status, validate_hf_hub_args
+from .utils import hf_raise_for_status, http_backoff, validate_hf_hub_args
 from .utils.sha import sha256, sha_fileobj
 
 
@@ -308,7 +308,7 @@ def _upload_single_part(upload_url: str, fileobj: BinaryIO):
 
     Raises: `requests.HTTPError` if the upload resulted in an error
     """
-    upload_res = requests.put(upload_url, data=fileobj)
+    upload_res = http_backoff("PUT", upload_url, data=fileobj)
     hf_raise_for_status(upload_res)
     return upload_res
 
@@ -376,7 +376,7 @@ def _upload_multi_part(
             seek_from=chunk_size * part_idx,
             read_limit=chunk_size,
         ) as fileobj_slice:
-            part_upload_res = requests.put(part_upload_url, data=fileobj_slice)
+            part_upload_res = http_backoff("PUT", part_upload_url, data=fileobj_slice)
             hf_raise_for_status(part_upload_res)
             etag = part_upload_res.headers.get("etag")
             if etag is None or etag == "":

diff --git a/src/huggingface_hub/utils/__init__.py b/src/huggingface_hub/utils/__init__.py
@@ -34,6 +34,7 @@
     RevisionNotFoundError,
     hf_raise_for_status,
 )
+from ._http import http_backoff
 from ._paths import filter_repo_objects
 from ._subprocess import run_subprocess
 from ._validators import HFValidationError, validate_hf_hub_args, validate_repo_id

diff --git a/src/huggingface_hub/utils/_http.py b/src/huggingface_hub/utils/_http.py
@@ -0,0 +1,131 @@
+# coding=utf-8
+# Copyright 2022-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains utilities to handle HTTP requests in Huggingface Hub."""
+import time
+from http import HTTPStatus
+from typing import Tuple, Type, Union
+
+import requests
+from requests import Response
+from requests.exceptions import ConnectTimeout, ProxyError
+
+from . import logging
+from ._typing import Literal
+
+
+logger = logging.get_logger(__name__)
+
+HTTP_METHOD_T = Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"]
+
+
+def http_backoff(
+    method: HTTP_METHOD_T,
+    url: str,
+    *,
+    max_retries: int = 5,
+    base_wait_time: float = 1,
+    max_wait_time: float = 8,
+    retry_on_exceptions: Union[Type[Exception], Tuple[Type[Exception], ...]] = (
+        ConnectTimeout,
+        ProxyError,
+    ),
+    retry_on_status_codes: Union[int, Tuple[int, ...]] = HTTPStatus.SERVICE_UNAVAILABLE,
+    **kwargs,
+) -> Response:
+    """Wrapper around requests to retry calls on an endpoint, with exponential backoff.
+
+    Endpoint call is retried on exceptions (ex: connection timeout, proxy error,...)
+    and/or on specific status codes (ex: service unavailable). If the call failed more
+    than `max_retries`, the exception is thrown or `raise_for_status` is called on the
+    response object.
+
+    Re-implement mechanisms from the `backoff` library to avoid adding an external
+    dependencies to `hugging_face_hub`. See https://github.com/litl/backoff.
+
+    Args:
+        method (`Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"]`):
+            HTTP method to perform.
+        url (`str`):
+            The URL of the resource to fetch.
+        max_retries (`int`, *optional*, defaults to `5`):
+            Maximum number of retries, defaults to 5 (no retries).
+        base_wait_time (`float`, *optional*, defaults to `1`):
+            Duration (in seconds) to wait before retrying the first time.
+            Wait time between retries then grows exponentially, capped by
+            `max_wait_time`.
+        max_wait_time (`float`, *optional*, defaults to `8`):
+            Maximum duration (in seconds) to wait before retrying.
+        retry_on_exceptions (`Type[Exception]` or `Tuple[Type[Exception]]`, *optional*, defaults to `(ConnectTimeout, ProxyError,)`):
+            Define which exceptions must be caught to retry the request. Can be a single
+            type or a tuple of types.
+            By default, retry on `ConnectTimeout` and `ProxyError`.
+        retry_on_status_codes (`int` or `Tuple[int]`, *optional*, defaults to `503`):
+            Define on which status codes the request must be retried. By default, only
+            HTTP 503 Service Unavailable is retried.
+        **kwargs (`dict`, *optional*):
+            kwargs to pass to `requests.request`.
+
+    Example:
+    ```
+    >>> from huggingface_hub.utils import http_backoff
+
+    # Same usage as "requests.request".
+    >>> response = http_backoff("GET", "https://www.google.com")
+    >>> response.raise_for_status()
+
+    # If you expect a Gateway Timeout from time to time
+    >>> http_backoff("PUT", upload_url, data=data, retry_on_status_codes=504)
+    >>> response.raise_for_status()
+    ```
+    """
+    if isinstance(retry_on_exceptions, type):  # Tuple from single exception type
+        retry_on_exceptions = (retry_on_exceptions,)
+
+    if isinstance(retry_on_status_codes, int):  # Tuple from single status code
+        retry_on_status_codes = (retry_on_status_codes,)
+
+    nb_tries = 0
+    sleep_time = base_wait_time
+    while True:
+        nb_tries += 1
+        try:
+            # Perform request and return if status_code is not in the retry list.
+            response = requests.request(method=method, url=url, **kwargs)
+            if response.status_code not in retry_on_status_codes:
+                return response
+
+            # Wrong status code returned (HTTP 503 for instance)
+            logger.warning(
+                f"HTTP Error {response.status_code} thrown while requesting"
+                f" {method} {url}"
+            )
+            if nb_tries > max_retries:
+                response.raise_for_status()  # Will raise uncaught exception
+                # We return response to avoid infinite loop in the corner case where the
+                # user ask for retry on a status code that doesn't raise_for_status.
+                return response
+
+        except retry_on_exceptions as err:
+            logger.warning(f"'{err}' thrown while requesting {method} {url}")
+
+            if nb_tries > max_retries:
+                raise err
+
+        # Sleep for X seconds
+        logger.warning(f"Retrying in {sleep_time}s [{nb_tries/max_retries}].")
+        time.sleep(sleep_time)
+
+        # Update sleep time for next retry
+        sleep_time = min(max_wait_time, sleep_time * 2)  # Exponential backoff
diff --git a/tests/test_utils_http.py b/tests/test_utils_http.py
@@ -0,0 +1,127 @@
+import time
+import unittest
+from typing import Generator
+from unittest.mock import Mock, call, patch
+
+from huggingface_hub.utils._http import http_backoff
+from requests import ConnectTimeout, HTTPError
+
+
+URL = "https://www.google.com"
+
+
+@patch("huggingface_hub.utils._http.requests.request")
+class TestHttpBackoff(unittest.TestCase):
+    def test_backoff_no_errors(self, mock_request: Mock) -> None:
+        """Test normal usage of `http_backoff`."""
+        data_mock = Mock()
+        response = http_backoff("GET", URL, data=data_mock)
+        mock_request.assert_called_once_with(method="GET", url=URL, data=data_mock)
+        self.assertIs(response, mock_request())
+
+    def test_backoff_3_calls(self, mock_request: Mock) -> None:
+        """Test `http_backoff` with 2 fails."""
+        response_mock = Mock()
+        mock_request.side_effect = (ValueError(), ValueError(), response_mock)
+        response = http_backoff(  # retry on ValueError, instant retry
+            "GET", URL, retry_on_exceptions=ValueError, base_wait_time=0.0
+        )
+        self.assertEqual(mock_request.call_count, 3)
+        mock_request.assert_has_calls(
+            calls=[
+                call(method="GET", url=URL),
+                call(method="GET", url=URL),
+                call(method="GET", url=URL),
+            ]
+        )
+        self.assertIs(response, response_mock)
+
+    def test_backoff_on_exception_until_max(self, mock_request: Mock) -> None:
+        """Test `http_backoff` until max limit is reached with exceptions."""
+        mock_request.side_effect = ConnectTimeout()
+
+        with self.assertRaises(ConnectTimeout):
+            http_backoff("GET", URL, base_wait_time=0.0, max_retries=3)
+
+        self.assertEqual(mock_request.call_count, 4)
+
+    def test_backoff_on_status_code_until_max(self, mock_request: Mock) -> None:
+        """Test `http_backoff` until max limit is reached with status codes."""
+        mock_503 = Mock()
+        mock_503.status_code = 503
+        mock_504 = Mock()
+        mock_504.status_code = 504
+        mock_504.raise_for_status.side_effect = HTTPError()
+        mock_request.side_effect = (mock_503, mock_504, mock_503, mock_504)
+
+        with self.assertRaises(HTTPError):
+            http_backoff(
+                "GET",
+                URL,
+                base_wait_time=0.0,
+                max_retries=3,
+                retry_on_status_codes=(503, 504),
+            )
+
+        self.assertEqual(mock_request.call_count, 4)
+
+    def test_backoff_on_exceptions_and_status_codes(self, mock_request: Mock) -> None:
+        """Test `http_backoff` until max limit with status codes and exceptions."""
+        mock_503 = Mock()
+        mock_503.status_code = 503
+        mock_request.side_effect = (mock_503, ConnectTimeout())
+
+        with self.assertRaises(ConnectTimeout):
+            http_backoff("GET", URL, base_wait_time=0.0, max_retries=1)
+
+        self.assertEqual(mock_request.call_count, 2)
+
+    def test_backoff_on_valid_status_code(self, mock_request: Mock) -> None:
+        """Test `http_backoff` until max limit with a valid status code.
+
+        Quite a corner case: the user wants to retry is status code is 200. Requests are
+        retried but in the end, the HTTP 200 response is returned if the server returned
+        only 200 responses.
+        """
+        mock_200 = Mock()
+        mock_200.status_code = 200
+        mock_request.side_effect = (mock_200, mock_200, mock_200, mock_200)
+
+        response = http_backoff(
+            "GET", URL, base_wait_time=0.0, max_retries=3, retry_on_status_codes=200
+        )
+
+        self.assertEqual(mock_request.call_count, 4)
+        self.assertIs(response, mock_200)
+
+    def test_backoff_sleep_time(self, mock_request: Mock) -> None:
+        """Test `http_backoff` sleep time goes exponential until max limit.
+
+        Since timing between 2 requests is sleep duration + some other stuff, this test
+        can be unstable. However, sleep durations between 10ms and 50ms should be enough
+        to make the approximation that measured durations are the "sleep time" waited by
+        `http_backoff`. If this is not the case, just increase `base_wait_time`,
+        `max_wait_time` and `expected_sleep_times` with bigger values.
+        """
+        sleep_times = []
+
+        def _side_effect_timer() -> Generator[ConnectTimeout, None, None]:
+            t0 = time.time()
+            while True:
+                yield ConnectTimeout()
+                t1 = time.time()
+                sleep_times.append(round(t1 - t0, 2))
+                t0 = t1
+
+        mock_request.side_effect = _side_effect_timer()
+
+        with self.assertRaises(ConnectTimeout):
+            http_backoff(
+                "GET", URL, base_wait_time=0.01, max_wait_time=0.05, max_retries=5
+            )
+
+        self.assertEqual(mock_request.call_count, 6)
+
+        # Assert sleep times are exponential until plateau
+        expected_sleep_times = [0.01, 0.02, 0.04, 0.05, 0.05]
+        self.assertListEqual(sleep_times, expected_sleep_times)
diff --git a/tests/testing_utils.py b/tests/testing_utils.py
@@ -128,15 +128,15 @@ def offline(mode=OfflineSimulationMode.CONNECTION_FAILS, timeout=1e-16):
     """
     Simulate offline mode.
 
-    There are three offline simulatiom modes:
+    There are three offline simulation modes:
 
     CONNECTION_FAILS (default mode): a ConnectionError is raised for each network call.
         Connection errors are created by mocking socket.socket
     CONNECTION_TIMES_OUT: the connection hangs until it times out.
         The default timeout value is low (1e-16) to speed up the tests.
         Timeout errors are created by mocking requests.request
     HF_HUB_OFFLINE_SET_TO_1: the HF_HUB_OFFLINE_SET_TO_1 environment variable is set to 1.
-        This makes the http/ftp calls of the library instantly fail and raise an OfflineModeEmabled error.
+        This makes the http/ftp calls of the library instantly fail and raise an OfflineModeEnabled error.
     """
     import socket