huggingface · Wauplin · Sep 25, 2023 · Sep 25, 2023 · Sep 25, 2023 · Sep 25, 2023
diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py
@@ -1045,7 +1045,9 @@ def whoami(self, token: Optional[str] = None) -> Dict:
             raise HTTPError(
                 "Invalid user token. If you didn't pass a user token, make sure you "
                 "are properly logged in by executing `huggingface-cli login`, and "
-                "if you did pass a user token, double-check it's correct."
+                "if you did pass a user token, double-check it's correct.",
+                request=e.request,
+                response=e.response,
             ) from e
         return r.json()
 

diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py
@@ -232,7 +232,7 @@ def post(
                     )
                 except TimeoutError as error:
                     # Convert any `TimeoutError` to a `InferenceTimeoutError`
-                    raise InferenceTimeoutError(f"Inference call timed out: {url}") from error
+                    raise InferenceTimeoutError(f"Inference call timed out: {url}") from error  # type: ignore
 
             try:
                 hf_raise_for_status(response)
@@ -243,7 +243,9 @@ def post(
                     if timeout is not None and time.time() - t0 > timeout:
                         raise InferenceTimeoutError(
                             f"Model not loaded on the server: {url}. Please retry with a higher timeout (current:"
-                            f" {self.timeout})."
+                            f" {self.timeout}).",
+                            request=error.request,
+                            response=error.response,
                         ) from error
                     # ...or wait 1s and retry
                     logger.info(f"Waiting for model to be loaded on the server: {error}")

diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py
@@ -231,7 +231,7 @@ async def post(
                 except asyncio.TimeoutError as error:
                     await client.close()
                     # Convert any `TimeoutError` to a `InferenceTimeoutError`
-                    raise InferenceTimeoutError(f"Inference call timed out: {url}") from error
+                    raise InferenceTimeoutError(f"Inference call timed out: {url}") from error  # type: ignore
                 except aiohttp.ClientResponseError as error:
                     error.response_error_payload = response_error_payload
                     await client.close()
@@ -240,7 +240,9 @@ async def post(
                         if timeout is not None and time.time() - t0 > timeout:
                             raise InferenceTimeoutError(
                                 f"Model not loaded on the server: {url}. Please retry with a higher timeout"
-                                f" (current: {self.timeout})."
+                                f" (current: {self.timeout}).",
+                                request=error.request,
+                                response=error.response,
                             ) from error
                         # ...or wait 1s and retry
                         logger.info(f"Waiting for model to be loaded on the server: {error}")

diff --git a/src/huggingface_hub/inference/_text_generation.py b/src/huggingface_hub/inference/_text_generation.py
@@ -468,13 +468,13 @@ def raise_text_generation_error(http_error: HTTPError) -> NoReturn:
     # If error_type => more information than `hf_raise_for_status`
     if error_type is not None:
         if error_type == "generation":
-            raise GenerationError(message) from http_error
+            raise GenerationError(message) from http_error  # type: ignore
         if error_type == "incomplete_generation":
-            raise IncompleteGenerationError(message) from http_error
+            raise IncompleteGenerationError(message) from http_error  # type: ignore
         if error_type == "overloaded":
-            raise OverloadedError(message) from http_error
+            raise OverloadedError(message) from http_error  # type: ignore
         if error_type == "validation":
-            raise ValidationError(message) from http_error
+            raise ValidationError(message) from http_error  # type: ignore
 
     # Otherwise, fallback to default error
     raise http_error
diff --git a/src/huggingface_hub/utils/_errors.py b/src/huggingface_hub/utils/_errors.py
@@ -85,7 +85,8 @@ def __init__(self, message: str, response: Optional[Response] = None):
                 request_id=self.request_id,
                 server_message=self.server_message,
             ),
-            response=response,
+            response=response,  # type: ignore
+            request=response.request if response is not None else None,  # type: ignore
         )
 
     def append_to_message(self, additional_message: str) -> None:

diff --git a/utils/generate_async_inference_client.py b/utils/generate_async_inference_client.py
@@ -207,7 +207,7 @@ def _rename_to_AsyncInferenceClient(code: str) -> str:
                 except asyncio.TimeoutError as error:
                     await client.close()
                     # Convert any `TimeoutError` to a `InferenceTimeoutError`
-                    raise InferenceTimeoutError(f"Inference call timed out: {url}") from error
+                    raise InferenceTimeoutError(f"Inference call timed out: {url}") from error  # type: ignore
                 except aiohttp.ClientResponseError as error:
                     error.response_error_payload = response_error_payload
                     await client.close()
@@ -216,7 +216,9 @@ def _rename_to_AsyncInferenceClient(code: str) -> str:
                         if timeout is not None and time.time() - t0 > timeout:
                             raise InferenceTimeoutError(
                                 f"Model not loaded on the server: {url}. Please retry with a higher timeout"
-                                f" (current: {self.timeout})."
+                                f" (current: {self.timeout}).",
+                                request=error.request,
+                                response=error.response,
                             ) from error
                         # ...or wait 1s and retry
                         logger.info(f"Waiting for model to be loaded on the server: {error}")