diff --git a/docs/api/models/huggingface.md b/docs/api/models/huggingface.md
new file mode 100644
index 0000000000..72e78c4a3e
--- /dev/null
+++ b/docs/api/models/huggingface.md
@@ -0,0 +1,7 @@
+# `pydantic_ai.models.huggingface`
+
+## Setup
+
+For details on how to set up authentication with this model, see [model configuration for Hugging Face](../../models/huggingface.md).
+
+::: pydantic_ai.models.huggingface
diff --git a/docs/api/providers.md b/docs/api/providers.md
index ec684520ce..7b2ddc1c12 100644
--- a/docs/api/providers.md
+++ b/docs/api/providers.md
@@ -31,3 +31,5 @@
::: pydantic_ai.providers.github.GitHubProvider
::: pydantic_ai.providers.openrouter.OpenRouterProvider
+
+::: pydantic_ai.providers.huggingface.HuggingFaceProvider
diff --git a/docs/models/huggingface.md b/docs/models/huggingface.md
new file mode 100644
index 0000000000..61f8eef35f
--- /dev/null
+++ b/docs/models/huggingface.md
@@ -0,0 +1,95 @@
+# Hugging Face
+
+[Hugging Face](https://huggingface.co/) is an AI platform with all major open source models, datasets, MCPs, and demos. You can use [Inference Providers](https://huggingface.co/docs/inference-providers) to run open source models like DeepSeek R1 on scalable serverless infrastructure.
+
+## Install
+
+To use `HuggingFaceModel`, you need to either install `pydantic-ai`, or install `pydantic-ai-slim` with the `huggingface` optional group:
+
+```bash
+pip/uv-add "pydantic-ai-slim[huggingface]"
+```
+
+## Configuration
+
+To use [Hugging Face](https://huggingface.co/) inference, you'll need to set up an account which will give you [free tier](https://huggingface.co/docs/inference-providers/pricing) allowance on [Inference Providers](https://huggingface.co/docs/inference-providers). To setup inference, follow these steps:
+
+1. Go to [Hugging Face](https://huggingface.co/join) and sign up for an account.
+2. Create a new access token in [Hugging Face](https://huggingface.co/settings/tokens).
+3. Set the `HF_TOKEN` environment variable to the token you just created.
+
+Once you have a Hugging Face access token, you can set it as an environment variable:
+
+```bash
+export HF_TOKEN='hf_token'
+```
+
+## Usage
+
+You can then use [`HuggingFaceModel`][pydantic_ai.models.huggingface.HuggingFaceModel] by name:
+
+```python
+from pydantic_ai import Agent
+
+agent = Agent('huggingface:Qwen/Qwen3-235B-A22B')
+...
+```
+
+Or initialise the model directly with just the model name:
+
+```python
+from pydantic_ai import Agent
+from pydantic_ai.models.huggingface import HuggingFaceModel
+
+model = HuggingFaceModel('Qwen/Qwen3-235B-A22B')
+agent = Agent(model)
+...
+```
+
+By default, the [`HuggingFaceModel`][pydantic_ai.models.huggingface.HuggingFaceModel] uses the
+[`HuggingFaceProvider`][pydantic_ai.providers.huggingface.HuggingFaceProvider] that will select automatically
+the first of the inference providers (Cerebras, Together AI, Cohere..etc) available for the model, sorted by your
+preferred order in https://hf.co/settings/inference-providers.
+
+## Configure the provider
+
+If you want to pass parameters in code to the provider, you can programmatically instantiate the
+[`HuggingFaceProvider`][pydantic_ai.providers.huggingface.HuggingFaceProvider] and pass it to the model:
+
+```python
+from pydantic_ai import Agent
+from pydantic_ai.models.huggingface import HuggingFaceModel
+from pydantic_ai.providers.huggingface import HuggingFaceProvider
+
+model = HuggingFaceModel('Qwen/Qwen3-235B-A22B', provider=HuggingFaceProvider(api_key='hf_token', provider_name='nebius'))
+agent = Agent(model)
+...
+```
+
+## Custom Hugging Face client
+
+[`HuggingFaceProvider`][pydantic_ai.providers.huggingface.HuggingFaceProvider] also accepts a custom
+[`AsyncInferenceClient`][huggingface_hub.AsyncInferenceClient] client via the `hf_client` parameter, so you can customise
+the `headers`, `bill_to` (billing to an HF organization you're a member of), `base_url` etc. as defined in the
+[Hugging Face Hub python library docs](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client).
+
+```python
+from huggingface_hub import AsyncInferenceClient
+
+from pydantic_ai import Agent
+from pydantic_ai.models.huggingface import HuggingFaceModel
+from pydantic_ai.providers.huggingface import HuggingFaceProvider
+
+client = AsyncInferenceClient(
+ bill_to='openai',
+ api_key='hf_token',
+ provider='fireworks-ai',
+)
+
+model = HuggingFaceModel(
+ 'Qwen/Qwen3-235B-A22B',
+ provider=HuggingFaceProvider(hf_client=client),
+)
+agent = Agent(model)
+...
+```
diff --git a/mkdocs.yml b/mkdocs.yml
index 44b1548f1a..a950d52c0c 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -83,6 +83,7 @@ nav:
- api/models/gemini.md
- api/models/google.md
- api/models/groq.md
+ - api/models/huggingface.md
- api/models/instrumented.md
- api/models/mistral.md
- api/models/test.md
diff --git a/pydantic_ai_slim/pydantic_ai/models/__init__.py b/pydantic_ai_slim/pydantic_ai/models/__init__.py
index 79f6031687..811c128379 100644
--- a/pydantic_ai_slim/pydantic_ai/models/__init__.py
+++ b/pydantic_ai_slim/pydantic_ai/models/__init__.py
@@ -227,6 +227,14 @@
'heroku:claude-3-7-sonnet',
'heroku:claude-4-sonnet',
'heroku:claude-3-haiku',
+ 'huggingface:Qwen/QwQ-32B',
+ 'huggingface:Qwen/Qwen2.5-72B-Instruct',
+ 'huggingface:Qwen/Qwen3-235B-A22B',
+ 'huggingface:Qwen/Qwen3-32B',
+ 'huggingface:deepseek-ai/DeepSeek-R1',
+ 'huggingface:meta-llama/Llama-3.3-70B-Instruct',
+ 'huggingface:meta-llama/Llama-4-Maverick-17B-128E-Instruct',
+ 'huggingface:meta-llama/Llama-4-Scout-17B-16E-Instruct',
'mistral:codestral-latest',
'mistral:mistral-large-latest',
'mistral:mistral-moderation-latest',
@@ -560,7 +568,7 @@ def override_allow_model_requests(allow_model_requests: bool) -> Iterator[None]:
ALLOW_MODEL_REQUESTS = old_value # pyright: ignore[reportConstantRedefinition]
-def infer_model(model: Model | KnownModelName | str) -> Model:
+def infer_model(model: Model | KnownModelName | str) -> Model: # noqa: C901
"""Infer the model from the name."""
if isinstance(model, Model):
return model
@@ -624,6 +632,10 @@ def infer_model(model: Model | KnownModelName | str) -> Model:
from .bedrock import BedrockConverseModel
return BedrockConverseModel(model_name, provider=provider)
+ elif provider == 'huggingface':
+ from .huggingface import HuggingFaceModel
+
+ return HuggingFaceModel(model_name, provider=provider)
else:
raise UserError(f'Unknown model: {model}') # pragma: no cover
diff --git a/pydantic_ai_slim/pydantic_ai/models/huggingface.py b/pydantic_ai_slim/pydantic_ai/models/huggingface.py
new file mode 100644
index 0000000000..41d53ca62a
--- /dev/null
+++ b/pydantic_ai_slim/pydantic_ai/models/huggingface.py
@@ -0,0 +1,463 @@
+from __future__ import annotations as _annotations
+
+import base64
+from collections.abc import AsyncIterable, AsyncIterator
+from contextlib import asynccontextmanager
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from typing import Literal, Union, cast, overload
+
+from typing_extensions import assert_never
+
+from pydantic_ai._thinking_part import split_content_into_text_and_thinking
+from pydantic_ai.providers import Provider, infer_provider
+
+from .. import ModelHTTPError, UnexpectedModelBehavior, _utils, usage
+from .._utils import guard_tool_call_id as _guard_tool_call_id, now_utc as _now_utc
+from ..messages import (
+ AudioUrl,
+ BinaryContent,
+ DocumentUrl,
+ ImageUrl,
+ ModelMessage,
+ ModelRequest,
+ ModelResponse,
+ ModelResponsePart,
+ ModelResponseStreamEvent,
+ RetryPromptPart,
+ SystemPromptPart,
+ TextPart,
+ ThinkingPart,
+ ToolCallPart,
+ ToolReturnPart,
+ UserPromptPart,
+ VideoUrl,
+)
+from ..settings import ModelSettings
+from ..tools import ToolDefinition
+from . import Model, ModelRequestParameters, StreamedResponse, check_allow_model_requests
+
+try:
+ import aiohttp
+ from huggingface_hub import (
+ AsyncInferenceClient,
+ ChatCompletionInputMessage,
+ ChatCompletionInputMessageChunk,
+ ChatCompletionInputTool,
+ ChatCompletionInputToolCall,
+ ChatCompletionInputURL,
+ ChatCompletionOutput,
+ ChatCompletionOutputMessage,
+ ChatCompletionStreamOutput,
+ )
+ from huggingface_hub.errors import HfHubHTTPError
+
+except ImportError as _import_error:
+ raise ImportError(
+ 'Please install `huggingface_hub` to use Hugging Face Inference Providers, '
+ 'you can use the `huggingface` optional group — `pip install "pydantic-ai-slim[huggingface]"`'
+ ) from _import_error
+
+__all__ = (
+ 'HuggingFaceModel',
+ 'HuggingFaceModelSettings',
+)
+
+
+HFSystemPromptRole = Literal['system', 'user']
+
+LatestHuggingFaceModelNames = Literal[
+ 'deepseek-ai/DeepSeek-R1',
+ 'meta-llama/Llama-3.3-70B-Instruct',
+ 'meta-llama/Llama-4-Maverick-17B-128E-Instruct',
+ 'meta-llama/Llama-4-Scout-17B-16E-Instruct',
+ 'Qwen/QwQ-32B',
+ 'Qwen/Qwen2.5-72B-Instruct',
+ 'Qwen/Qwen3-235B-A22B',
+ 'Qwen/Qwen3-32B',
+]
+"""Latest Hugging Face models."""
+
+
+HuggingFaceModelName = Union[str, LatestHuggingFaceModelNames]
+"""Possible Hugging Face model names.
+
+You can browse available models [here](https://huggingface.co/models?pipeline_tag=text-generation&inference_provider=all&sort=trending).
+"""
+
+
+class HuggingFaceModelSettings(ModelSettings, total=False):
+ """Settings used for a Hugging Face model request."""
+
+ # ALL FIELDS MUST BE `huggingface_` PREFIXED SO YOU CAN MERGE THEM WITH OTHER MODELS.
+ # This class is a placeholder for any future huggingface-specific settings
+
+
+@dataclass(init=False)
+class HuggingFaceModel(Model):
+ """A model that uses Hugging Face Inference Providers.
+
+ Internally, this uses the [HF Python client](https://github.com/huggingface/huggingface_hub) to interact with the API.
+
+ Apart from `__init__`, all methods are private or match those of the base class.
+ """
+
+ client: AsyncInferenceClient = field(repr=False)
+
+ _model_name: str = field(repr=False)
+ _system: str = field(default='huggingface', repr=False)
+
+ def __init__(
+ self,
+ model_name: str,
+ *,
+ provider: Literal['huggingface'] | Provider[AsyncInferenceClient] = 'huggingface',
+ ):
+ """Initialize a Hugging Face model.
+
+ Args:
+ model_name: The name of the Model to use. You can browse available models [here](https://huggingface.co/models?pipeline_tag=text-generation&inference_provider=all&sort=trending).
+ provider: The provider to use for Hugging Face Inference Providers. Can be either the string 'huggingface' or an
+ instance of `Provider[AsyncInferenceClient]`. If not provided, the other parameters will be used.
+ """
+ self._model_name = model_name
+ self._provider = provider
+ if isinstance(provider, str):
+ provider = infer_provider(provider)
+ self.client = provider.client
+
+ async def request(
+ self,
+ messages: list[ModelMessage],
+ model_settings: ModelSettings | None,
+ model_request_parameters: ModelRequestParameters,
+ ) -> ModelResponse:
+ check_allow_model_requests()
+ response = await self._completions_create(
+ messages, False, cast(HuggingFaceModelSettings, model_settings or {}), model_request_parameters
+ )
+ model_response = self._process_response(response)
+ model_response.usage.requests = 1
+ return model_response
+
+ @asynccontextmanager
+ async def request_stream(
+ self,
+ messages: list[ModelMessage],
+ model_settings: ModelSettings | None,
+ model_request_parameters: ModelRequestParameters,
+ ) -> AsyncIterator[StreamedResponse]:
+ check_allow_model_requests()
+ response = await self._completions_create(
+ messages, True, cast(HuggingFaceModelSettings, model_settings or {}), model_request_parameters
+ )
+ yield await self._process_streamed_response(response)
+
+ @property
+ def model_name(self) -> HuggingFaceModelName:
+ """The model name."""
+ return self._model_name
+
+ @property
+ def system(self) -> str:
+ """The system / model provider."""
+ return self._system
+
+ @overload
+ async def _completions_create(
+ self,
+ messages: list[ModelMessage],
+ stream: Literal[True],
+ model_settings: HuggingFaceModelSettings,
+ model_request_parameters: ModelRequestParameters,
+ ) -> AsyncIterable[ChatCompletionStreamOutput]: ...
+
+ @overload
+ async def _completions_create(
+ self,
+ messages: list[ModelMessage],
+ stream: Literal[False],
+ model_settings: HuggingFaceModelSettings,
+ model_request_parameters: ModelRequestParameters,
+ ) -> ChatCompletionOutput: ...
+
+ async def _completions_create(
+ self,
+ messages: list[ModelMessage],
+ stream: bool,
+ model_settings: HuggingFaceModelSettings,
+ model_request_parameters: ModelRequestParameters,
+ ) -> ChatCompletionOutput | AsyncIterable[ChatCompletionStreamOutput]:
+ tools = self._get_tools(model_request_parameters)
+
+ if not tools:
+ tool_choice: Literal['none', 'required', 'auto'] | None = None
+ elif not model_request_parameters.allow_text_output:
+ tool_choice = 'required'
+ else:
+ tool_choice = 'auto'
+
+ hf_messages = await self._map_messages(messages)
+
+ try:
+ return await self.client.chat.completions.create( # type: ignore
+ model=self._model_name,
+ messages=hf_messages, # type: ignore
+ tools=tools,
+ tool_choice=tool_choice or None,
+ stream=stream,
+ stop=model_settings.get('stop_sequences', None),
+ temperature=model_settings.get('temperature', None),
+ top_p=model_settings.get('top_p', None),
+ seed=model_settings.get('seed', None),
+ presence_penalty=model_settings.get('presence_penalty', None),
+ frequency_penalty=model_settings.get('frequency_penalty', None),
+ logit_bias=model_settings.get('logit_bias', None), # type: ignore
+ logprobs=model_settings.get('logprobs', None),
+ top_logprobs=model_settings.get('top_logprobs', None),
+ extra_body=model_settings.get('extra_body'), # type: ignore
+ )
+ except aiohttp.ClientResponseError as e:
+ raise ModelHTTPError(
+ status_code=e.status,
+ model_name=self.model_name,
+ body=e.response_error_payload, # type: ignore
+ ) from e
+ except HfHubHTTPError as e:
+ raise ModelHTTPError(
+ status_code=e.response.status_code,
+ model_name=self.model_name,
+ body=e.response.content,
+ ) from e
+
+ def _process_response(self, response: ChatCompletionOutput) -> ModelResponse:
+ """Process a non-streamed response, and prepare a message to return."""
+ if response.created:
+ timestamp = datetime.fromtimestamp(response.created, tz=timezone.utc)
+ else:
+ timestamp = _now_utc()
+
+ choice = response.choices[0]
+ content = choice.message.content
+ tool_calls = choice.message.tool_calls
+
+ items: list[ModelResponsePart] = []
+
+ if content is not None:
+ items.extend(split_content_into_text_and_thinking(content))
+ if tool_calls is not None:
+ for c in tool_calls:
+ items.append(ToolCallPart(c.function.name, c.function.arguments, tool_call_id=c.id))
+ return ModelResponse(
+ items,
+ usage=_map_usage(response),
+ model_name=response.model,
+ timestamp=timestamp,
+ vendor_id=response.id,
+ )
+
+ async def _process_streamed_response(self, response: AsyncIterable[ChatCompletionStreamOutput]) -> StreamedResponse:
+ """Process a streamed response, and prepare a streaming response to return."""
+ peekable_response = _utils.PeekableAsyncStream(response)
+ first_chunk = await peekable_response.peek()
+ if isinstance(first_chunk, _utils.Unset):
+ raise UnexpectedModelBehavior( # pragma: no cover
+ 'Streamed response ended without content or tool calls'
+ )
+
+ return HuggingFaceStreamedResponse(
+ _model_name=self._model_name,
+ _response=peekable_response,
+ _timestamp=datetime.fromtimestamp(first_chunk.created, tz=timezone.utc),
+ )
+
+ def _get_tools(self, model_request_parameters: ModelRequestParameters) -> list[ChatCompletionInputTool]:
+ tools = [self._map_tool_definition(r) for r in model_request_parameters.function_tools]
+ if model_request_parameters.output_tools:
+ tools += [self._map_tool_definition(r) for r in model_request_parameters.output_tools]
+ return tools
+
+ async def _map_messages(
+ self, messages: list[ModelMessage]
+ ) -> list[ChatCompletionInputMessage | ChatCompletionOutputMessage]:
+ """Just maps a `pydantic_ai.Message` to a `huggingface_hub.ChatCompletionInputMessage`."""
+ hf_messages: list[ChatCompletionInputMessage | ChatCompletionOutputMessage] = []
+ for message in messages:
+ if isinstance(message, ModelRequest):
+ async for item in self._map_user_message(message):
+ hf_messages.append(item)
+ elif isinstance(message, ModelResponse):
+ texts: list[str] = []
+ tool_calls: list[ChatCompletionInputToolCall] = []
+ for item in message.parts:
+ if isinstance(item, TextPart):
+ texts.append(item.content)
+ elif isinstance(item, ToolCallPart):
+ tool_calls.append(self._map_tool_call(item))
+ elif isinstance(item, ThinkingPart):
+ # NOTE: We don't send ThinkingPart to the providers yet. If you are unsatisfied with this,
+ # please open an issue. The below code is the code to send thinking to the provider.
+ # texts.append(f'\n{item.content}\n')
+ pass
+ else:
+ assert_never(item)
+ message_param = ChatCompletionInputMessage(role='assistant') # type: ignore
+ if texts:
+ # Note: model responses from this model should only have one text item, so the following
+ # shouldn't merge multiple texts into one unless you switch models between runs:
+ message_param['content'] = '\n\n'.join(texts)
+ if tool_calls:
+ message_param['tool_calls'] = tool_calls
+ hf_messages.append(message_param)
+ else:
+ assert_never(message)
+ if instructions := self._get_instructions(messages):
+ hf_messages.insert(0, ChatCompletionInputMessage(content=instructions, role='system')) # type: ignore
+ return hf_messages
+
+ @staticmethod
+ def _map_tool_call(t: ToolCallPart) -> ChatCompletionInputToolCall:
+ return ChatCompletionInputToolCall.parse_obj_as_instance( # type: ignore
+ {
+ 'id': _guard_tool_call_id(t=t),
+ 'type': 'function',
+ 'function': {
+ 'name': t.tool_name,
+ 'arguments': t.args_as_json_str(),
+ },
+ }
+ )
+
+ @staticmethod
+ def _map_tool_definition(f: ToolDefinition) -> ChatCompletionInputTool:
+ tool_param: ChatCompletionInputTool = ChatCompletionInputTool.parse_obj_as_instance( # type: ignore
+ {
+ 'type': 'function',
+ 'function': {
+ 'name': f.name,
+ 'description': f.description,
+ 'parameters': f.parameters_json_schema,
+ },
+ }
+ )
+ if f.strict is not None:
+ tool_param['function']['strict'] = f.strict
+ return tool_param
+
+ async def _map_user_message(
+ self, message: ModelRequest
+ ) -> AsyncIterable[ChatCompletionInputMessage | ChatCompletionOutputMessage]:
+ for part in message.parts:
+ if isinstance(part, SystemPromptPart):
+ yield ChatCompletionInputMessage.parse_obj_as_instance({'role': 'system', 'content': part.content}) # type: ignore
+ elif isinstance(part, UserPromptPart):
+ yield await self._map_user_prompt(part)
+ elif isinstance(part, ToolReturnPart):
+ yield ChatCompletionOutputMessage.parse_obj_as_instance( # type: ignore
+ {
+ 'role': 'tool',
+ 'tool_call_id': _guard_tool_call_id(t=part),
+ 'content': part.model_response_str(),
+ }
+ )
+ elif isinstance(part, RetryPromptPart):
+ if part.tool_name is None:
+ yield ChatCompletionInputMessage.parse_obj_as_instance( # type: ignore
+ {'role': 'user', 'content': part.model_response()}
+ )
+ else:
+ yield ChatCompletionInputMessage.parse_obj_as_instance( # type: ignore
+ {
+ 'role': 'tool',
+ 'tool_call_id': _guard_tool_call_id(t=part),
+ 'content': part.model_response(),
+ }
+ )
+ else:
+ assert_never(part)
+
+ @staticmethod
+ async def _map_user_prompt(part: UserPromptPart) -> ChatCompletionInputMessage:
+ content: str | list[ChatCompletionInputMessage]
+ if isinstance(part.content, str):
+ content = part.content
+ else:
+ content = []
+ for item in part.content:
+ if isinstance(item, str):
+ content.append(ChatCompletionInputMessageChunk(type='text', text=item)) # type: ignore
+ elif isinstance(item, ImageUrl):
+ url = ChatCompletionInputURL(url=item.url) # type: ignore
+ content.append(ChatCompletionInputMessageChunk(type='image_url', image_url=url)) # type: ignore
+ elif isinstance(item, BinaryContent):
+ base64_encoded = base64.b64encode(item.data).decode('utf-8')
+ if item.is_image:
+ url = ChatCompletionInputURL(url=f'data:{item.media_type};base64,{base64_encoded}') # type: ignore
+ content.append(ChatCompletionInputMessageChunk(type='image_url', image_url=url)) # type: ignore
+ else: # pragma: no cover
+ raise RuntimeError(f'Unsupported binary content type: {item.media_type}')
+ elif isinstance(item, AudioUrl):
+ raise NotImplementedError('AudioUrl is not supported for Hugging Face')
+ elif isinstance(item, DocumentUrl):
+ raise NotImplementedError('DocumentUrl is not supported for Hugging Face')
+ elif isinstance(item, VideoUrl):
+ raise NotImplementedError('VideoUrl is not supported for Hugging Face')
+ else:
+ assert_never(item)
+ return ChatCompletionInputMessage(role='user', content=content) # type: ignore
+
+
+@dataclass
+class HuggingFaceStreamedResponse(StreamedResponse):
+ """Implementation of `StreamedResponse` for Hugging Face models."""
+
+ _model_name: str
+ _response: AsyncIterable[ChatCompletionStreamOutput]
+ _timestamp: datetime
+
+ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
+ async for chunk in self._response:
+ self._usage += _map_usage(chunk)
+
+ try:
+ choice = chunk.choices[0]
+ except IndexError:
+ continue
+
+ # Handle the text part of the response
+ content = choice.delta.content
+ if content is not None:
+ yield self._parts_manager.handle_text_delta(vendor_part_id='content', content=content)
+
+ for dtc in choice.delta.tool_calls or []:
+ maybe_event = self._parts_manager.handle_tool_call_delta(
+ vendor_part_id=dtc.index,
+ tool_name=dtc.function and dtc.function.name, # type: ignore
+ args=dtc.function and dtc.function.arguments,
+ tool_call_id=dtc.id,
+ )
+ if maybe_event is not None:
+ yield maybe_event
+
+ @property
+ def model_name(self) -> str:
+ """Get the model name of the response."""
+ return self._model_name
+
+ @property
+ def timestamp(self) -> datetime:
+ """Get the timestamp of the response."""
+ return self._timestamp
+
+
+def _map_usage(response: ChatCompletionOutput | ChatCompletionStreamOutput) -> usage.Usage:
+ response_usage = response.usage
+ if response_usage is None:
+ return usage.Usage()
+
+ return usage.Usage(
+ request_tokens=response_usage.prompt_tokens,
+ response_tokens=response_usage.completion_tokens,
+ total_tokens=response_usage.total_tokens,
+ details=None,
+ )
diff --git a/pydantic_ai_slim/pydantic_ai/providers/__init__.py b/pydantic_ai_slim/pydantic_ai/providers/__init__.py
index 5e2112ac66..f756120cf4 100644
--- a/pydantic_ai_slim/pydantic_ai/providers/__init__.py
+++ b/pydantic_ai_slim/pydantic_ai/providers/__init__.py
@@ -111,6 +111,10 @@ def infer_provider_class(provider: str) -> type[Provider[Any]]: # noqa: C901
from .heroku import HerokuProvider
return HerokuProvider
+ elif provider == 'huggingface':
+ from .huggingface import HuggingFaceProvider
+
+ return HuggingFaceProvider
elif provider == 'github':
from .github import GitHubProvider
diff --git a/pydantic_ai_slim/pydantic_ai/providers/huggingface.py b/pydantic_ai_slim/pydantic_ai/providers/huggingface.py
new file mode 100644
index 0000000000..8afb415914
--- /dev/null
+++ b/pydantic_ai_slim/pydantic_ai/providers/huggingface.py
@@ -0,0 +1,88 @@
+from __future__ import annotations as _annotations
+
+import os
+from typing import overload
+
+from httpx import AsyncClient
+
+from pydantic_ai.exceptions import UserError
+
+try:
+ from huggingface_hub import AsyncInferenceClient
+except ImportError as _import_error: # pragma: no cover
+ raise ImportError(
+ 'Please install the `huggingface_hub` package to use the HuggingFace provider, '
+ "you can use the `huggingface` optional group — `pip install 'pydantic-ai-slim[huggingface]'`"
+ ) from _import_error
+
+from . import Provider
+
+
+class HuggingFaceProvider(Provider[AsyncInferenceClient]):
+ """Provider for Hugging Face."""
+
+ @property
+ def name(self) -> str:
+ return 'huggingface'
+
+ @property
+ def base_url(self) -> str:
+ return self.client.model # type: ignore
+
+ @property
+ def client(self) -> AsyncInferenceClient:
+ return self._client
+
+ @overload
+ def __init__(self, *, base_url: str, api_key: str | None = None) -> None: ...
+ @overload
+ def __init__(self, *, provider_name: str, api_key: str | None = None) -> None: ...
+ @overload
+ def __init__(self, *, hf_client: AsyncInferenceClient, api_key: str | None = None) -> None: ...
+ @overload
+ def __init__(self, *, hf_client: AsyncInferenceClient, base_url: str, api_key: str | None = None) -> None: ...
+ @overload
+ def __init__(self, *, hf_client: AsyncInferenceClient, provider_name: str, api_key: str | None = None) -> None: ...
+ @overload
+ def __init__(self, *, api_key: str | None = None) -> None: ...
+
+ def __init__(
+ self,
+ base_url: str | None = None,
+ api_key: str | None = None,
+ hf_client: AsyncInferenceClient | None = None,
+ http_client: AsyncClient | None = None,
+ provider_name: str | None = None,
+ ) -> None:
+ """Create a new Hugging Face provider.
+
+ Args:
+ base_url: The base url for the Hugging Face requests.
+ api_key: The API key to use for authentication, if not provided, the `HF_TOKEN` environment variable
+ will be used if available.
+ hf_client: An existing
+ [`AsyncInferenceClient`](https://huggingface.co/docs/huggingface_hub/v0.29.3/en/package_reference/inference_client#huggingface_hub.AsyncInferenceClient)
+ client to use. If not provided, a new instance will be created.
+ http_client: (currently ignored) An existing `httpx.AsyncClient` to use for making HTTP requests.
+ provider_name : Name of the provider to use for inference. available providers can be found in the [HF Inference Providers documentation](https://huggingface.co/docs/inference-providers/index#partners).
+ defaults to "auto", which will select the first available provider for the model, the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
+ If `base_url` is passed, then `provider_name` is not used.
+ """
+ api_key = api_key or os.environ.get('HF_TOKEN')
+
+ if api_key is None:
+ raise UserError(
+ 'Set the `HF_TOKEN` environment variable or pass it via `HuggingFaceProvider(api_key=...)`'
+ 'to use the HuggingFace provider.'
+ )
+
+ if http_client is not None:
+ raise ValueError('`http_client` is ignored for HuggingFace provider, please use `hf_client` instead.')
+
+ if base_url is not None and provider_name is not None:
+ raise ValueError('Cannot provide both `base_url` and `provider_name`.')
+
+ if hf_client is None:
+ self._client = AsyncInferenceClient(api_key=api_key, provider=provider_name, base_url=base_url) # type: ignore
+ else:
+ self._client = hf_client
diff --git a/pydantic_ai_slim/pyproject.toml b/pydantic_ai_slim/pyproject.toml
index 6371e0d7c4..2705ca9144 100644
--- a/pydantic_ai_slim/pyproject.toml
+++ b/pydantic_ai_slim/pyproject.toml
@@ -69,6 +69,7 @@ anthropic = ["anthropic>=0.52.0"]
groq = ["groq>=0.19.0"]
mistral = ["mistralai>=1.2.5"]
bedrock = ["boto3>=1.37.24"]
+huggingface = ["huggingface-hub[inference]>=0.33.2"]
# Tools
duckduckgo = ["ddgs>=9.0.0"]
tavily = ["tavily-python>=0.5.0"]
diff --git a/pyproject.toml b/pyproject.toml
index 18c7853c70..534f156db7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,7 +46,7 @@ requires-python = ">=3.9"
[tool.hatch.metadata.hooks.uv-dynamic-versioning]
dependencies = [
- "pydantic-ai-slim[openai,vertexai,google,groq,anthropic,mistral,cohere,bedrock,cli,mcp,evals]=={{ version }}",
+ "pydantic-ai-slim[openai,vertexai,google,groq,anthropic,mistral,cohere,bedrock,huggingface,cli,mcp,evals]=={{ version }}",
]
[tool.hatch.metadata.hooks.uv-dynamic-versioning.optional-dependencies]
diff --git a/tests/conftest.py b/tests/conftest.py
index ce95301d3f..f94f5f0477 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -316,6 +316,11 @@ def openrouter_api_key() -> str:
return os.getenv('OPENROUTER_API_KEY', 'mock-api-key')
+@pytest.fixture(scope='session')
+def huggingface_api_key() -> str:
+ return os.getenv('HF_TOKEN', 'hf_token')
+
+
@pytest.fixture(scope='session')
def heroku_inference_key() -> str:
return os.getenv('HEROKU_INFERENCE_KEY', 'mock-api-key')
@@ -398,6 +403,7 @@ def model(
groq_api_key: str,
co_api_key: str,
gemini_api_key: str,
+ huggingface_api_key: str,
bedrock_provider: BedrockProvider,
) -> Model: # pragma: lax no cover
try:
@@ -440,6 +446,14 @@ def model(
from pydantic_ai.models.bedrock import BedrockConverseModel
return BedrockConverseModel('us.amazon.nova-micro-v1:0', provider=bedrock_provider)
+ elif request.param == 'huggingface':
+ from pydantic_ai.models.huggingface import HuggingFaceModel
+ from pydantic_ai.providers.huggingface import HuggingFaceProvider
+
+ return HuggingFaceModel(
+ 'Qwen/Qwen2.5-72B-Instruct',
+ provider=HuggingFaceProvider(provider_name='nebius', api_key=huggingface_api_key),
+ )
else:
raise ValueError(f'Unknown model: {request.param}')
except ImportError:
diff --git a/tests/models/cassettes/test_huggingface/test_hf_model_instructions.yaml b/tests/models/cassettes/test_huggingface/test_hf_model_instructions.yaml
new file mode 100644
index 0000000000..d8a5ee07e3
--- /dev/null
+++ b/tests/models/cassettes/test_huggingface/test_hf_model_instructions.yaml
@@ -0,0 +1,121 @@
+interactions:
+- request:
+ body: null
+ headers:
+ accept:
+ - '*/*'
+ accept-encoding:
+ - gzip, deflate
+ connection:
+ - keep-alive
+ method: GET
+ uri: https://huggingface.co/api/models/Qwen/Qwen2.5-72B-Instruct?expand=inferenceProviderMapping
+ response:
+ headers:
+ access-control-allow-origin:
+ - https://huggingface.co
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '701'
+ content-type:
+ - application/json; charset=utf-8
+ cross-origin-opener-policy:
+ - same-origin
+ etag:
+ - W/"2bd-diYmxjldwbIbFgWNRPBqJ3SEIak"
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ vary:
+ - Origin
+ parsed_body:
+ _id: 66e81cefd1b1391042d0e47e
+ id: Qwen/Qwen2.5-72B-Instruct
+ inferenceProviderMapping:
+ featherless-ai:
+ providerId: Qwen/Qwen2.5-72B-Instruct
+ status: live
+ task: conversational
+ fireworks-ai:
+ providerId: accounts/fireworks/models/qwen2p5-72b-instruct
+ status: live
+ task: conversational
+ hyperbolic:
+ providerId: Qwen/Qwen2.5-72B-Instruct
+ status: live
+ task: conversational
+ nebius:
+ providerId: Qwen/Qwen2.5-72B-Instruct-fast
+ status: live
+ task: conversational
+ novita:
+ providerId: qwen/qwen-2.5-72b-instruct
+ status: live
+ task: conversational
+ together:
+ providerId: Qwen/Qwen2.5-72B-Instruct-Turbo
+ status: live
+ task: conversational
+ status:
+ code: 200
+ message: OK
+- request:
+ body: null
+ headers: {}
+ method: POST
+ uri: https://router.huggingface.co/nebius/v1/chat/completions
+ response:
+ headers:
+ access-control-allow-credentials:
+ - 'true'
+ access-control-allow-origin:
+ - '*'
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '560'
+ content-type:
+ - application/json
+ cross-origin-opener-policy:
+ - same-origin
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ strict-transport-security:
+ - max-age=31536000; includeSubDomains
+ vary:
+ - Origin
+ parsed_body:
+ choices:
+ - finish_reason: stop
+ index: 0
+ logprobs: null
+ message:
+ audio: null
+ content: Paris
+ function_call: null
+ reasoning_content: null
+ refusal: null
+ role: assistant
+ tool_calls: []
+ stop_reason: null
+ created: 1751470757
+ id: chatcmpl-b3936940372c481b8d886e596dc75524
+ model: Qwen/Qwen2.5-72B-Instruct-fast
+ object: chat.completion
+ prompt_logprobs: null
+ service_tier: null
+ system_fingerprint: null
+ usage:
+ completion_tokens: 2
+ completion_tokens_details: null
+ prompt_tokens: 26
+ prompt_tokens_details: null
+ total_tokens: 28
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/tests/models/cassettes/test_huggingface/test_hf_model_thinking_part.yaml b/tests/models/cassettes/test_huggingface/test_hf_model_thinking_part.yaml
new file mode 100644
index 0000000000..10be947804
--- /dev/null
+++ b/tests/models/cassettes/test_huggingface/test_hf_model_thinking_part.yaml
@@ -0,0 +1,291 @@
+interactions:
+- request:
+ body: null
+ headers:
+ accept:
+ - '*/*'
+ accept-encoding:
+ - gzip, deflate
+ connection:
+ - keep-alive
+ method: GET
+ uri: https://huggingface.co/api/models/Qwen/Qwen3-235B-A22B?expand=inferenceProviderMapping
+ response:
+ headers:
+ access-control-allow-origin:
+ - https://huggingface.co
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '470'
+ content-type:
+ - application/json; charset=utf-8
+ cross-origin-opener-policy:
+ - same-origin
+ etag:
+ - W/"1d6-5wPQfbCXoh8XtBVekhfceCwHN4Y"
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ vary:
+ - Origin
+ parsed_body:
+ _id: 680daa4ac41c05ba341b67d1
+ id: Qwen/Qwen3-235B-A22B
+ inferenceProviderMapping:
+ fireworks-ai:
+ providerId: accounts/fireworks/models/qwen3-235b-a22b
+ status: live
+ task: conversational
+ nebius:
+ providerId: Qwen/Qwen3-235B-A22B
+ status: live
+ task: conversational
+ novita:
+ providerId: qwen/qwen3-235b-a22b-fp8
+ status: live
+ task: conversational
+ nscale:
+ providerId: Qwen/Qwen3-235B-A22B
+ status: live
+ task: conversational
+ status:
+ code: 200
+ message: OK
+- request:
+ body: null
+ headers: {}
+ method: POST
+ uri: https://router.huggingface.co/nebius/v1/chat/completions
+ response:
+ headers:
+ access-control-allow-credentials:
+ - 'true'
+ access-control-allow-origin:
+ - '*'
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '5526'
+ content-type:
+ - application/json
+ cross-origin-opener-policy:
+ - same-origin
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ strict-transport-security:
+ - max-age=31536000; includeSubDomains
+ vary:
+ - Origin
+ parsed_body:
+ choices:
+ - finish_reason: stop
+ index: 0
+ logprobs: null
+ message:
+ audio: null
+ content: "\nOkay, the user is asking how to cross the street safely. Let me break this down step by step.
+ First, they need to look both ways to check for cars. But wait, should they check left, right, then left again?
+ I remember that's a common safety tip. They might be in a country where people drive on the right side or the
+ left, so maybe should I mention that?\n\nAlso, traffic signals and signs are important. What about pedestrian
+ crossings or traffic lights? Explaining when to walk when the signal is green and the cars have stopped. Oh, right,
+ sometimes people might not realize to wait for the walk signal. And even when using a crosswalk, you still need
+ to look both ways because cars might not stop.\n\nDistractions like phones or headphones. Yeah, people often get
+ hurt because they're looking at their phone while crossing. Should advise them to put away distractions and stay
+ alert. Kids and elderly folks might need extra care, like holding an adult's hand.\n\nWhat about if there's no
+ traffic light or crosswalk? Then finding the safest spot with good visibility, maybe near a corner where cars
+ can see them better. And teaching kids the basics of street safety.\n\nAlso, the confidence aspect—don't rush,
+ take your time, make eye contact with drivers. And what to do if stuck in the middle? Wait for the next signal.
+ Oh, and bicycles! In some places, bike lanes cross sidewalks, so being watchful for cyclists too.\n\nWait, should
+ I structure these points in a numbered list? Start with stopping at the curb, then looking both ways, checking
+ traffic signals, obeying signs, avoiding distractions, using crosswalks if possible, teaching kids, staying visible,
+ making eye contact, and what to do if stuck. Maybe add something about not assuming drivers see them and being
+ cautious.\n\nLet me make sure not to miss any key points. Also, mention that it's safer to cross at intersections.
+ And maybe a final note about local laws or practices varying by country. Yeah, that covers the main points. I
+ should present it clearly so it's easy to follow step by step without getting overwhelmed.\n\n\nCrossing
+ the street safely requires attention, patience, and following key steps. Here's a clear guide:\n\n1. **Stop at
+ the Curb**: Find a safe spot to pause before stepping onto the road.\n\n2. **Look Both Ways (Left, Right, Then
+ Left Again!)** \n - **First check left**: Look for oncoming traffic from your left (if driving is on the right
+ side in your country). \n - **Then check right**: Check for vehicles coming from the right. \n - **Final
+ glance left**: Recheck the direction of traffic closest to you before stepping off the curb. \n *(Reverse this
+ order if driving is on the left, as in the UK or Japan.)*\n\n3. **Use Traffic Signals and Crosswalks**: \n -
+ Wait for the pedestrian \"walk\" signal (green hand or similar). \n - If there’s no signal, only cross once
+ all vehicles have come to a complete stop and you’ve made eye contact with drivers. \n - Follow any painted
+ crosswalk lines and stay within them.\n\n4. **Obey Traffic Signs/Lights**: \n - Red/yellow lights mean stop.
+ Green means it’s safe to start crossing, but still watch for turning vehicles. \n - If the \"don’t walk\" signal
+ flashes while you’re mid-crossing, finish crossing without rushing.\n\n5. **Avoid Distractions**: \n - Put
+ away phones, earbuds, or anything that blocks your senses. \n - Keep your head up and stay alert to your surroundings.\n\n6.
+ **Be Visible and Predictable**: \n - Wear bright/light-colored clothing, especially at night. \n - Walk
+ (don’t run) and follow the flow of traffic. Avoid sudden changes in direction.\n\n7. **Teach Children Safely**:
+ \ \n - Hold young children’s hands. \n - Practice the \"stop, look, listen\" rule together. \n - Teach
+ them to make eye contact with drivers before crossing.\n\n8. **Cross at Intersections When Possible**: \n -
+ Drivers expect pedestrians at crosswalks and intersections. \n - If no crosswalk exists, choose a spot with
+ clear visibility (e.g., where you can see around parked cars).\n\n9. **Don’t Assume Drivers See You**: \n -
+ Even if a car stops, check for other vehicles that might not yield. \n - At night, use a flashlight or phone
+ light to stay visible.\n\n10. **What to Do if Stuck Mid-Street**: \n - If the light changes before you reach
+ the other side, stay calm. \n - Stop at the median or safety island and wait for the next signal. \n\n**Bonus
+ Tip**: In areas with bike lanes, check for cyclists even once you’ve started crossing. In some places, bikes ride
+ against traffic flow, so look both ways even on one-way streets.\n\n**Local Laws Matter**: Check rules in your
+ area—e.g., some places require yielding to pedestrians, while others prioritize drivers. Always prioritize your
+ safety over assumptions.\n\nFollow these steps, and you’ll cross the street confidently and safely every time!
+ \U0001F6B6♀️ ✅"
+ function_call: null
+ reasoning_content: null
+ refusal: null
+ role: assistant
+ tool_calls: []
+ stop_reason: null
+ created: 1752067065
+ id: chatcmpl-957db61fe60d4440bcfe1f11f2c5b4b9
+ model: Qwen/Qwen3-235B-A22B
+ object: chat.completion
+ prompt_logprobs: null
+ service_tier: null
+ system_fingerprint: null
+ usage:
+ completion_tokens: 1090
+ completion_tokens_details: null
+ prompt_tokens: 15
+ prompt_tokens_details: null
+ total_tokens: 1105
+ status:
+ code: 200
+ message: OK
+- request:
+ body: null
+ headers: {}
+ method: POST
+ uri: https://router.huggingface.co/nebius/v1/chat/completions
+ response:
+ headers:
+ access-control-allow-credentials:
+ - 'true'
+ access-control-allow-origin:
+ - '*'
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '9391'
+ content-type:
+ - application/json
+ cross-origin-opener-policy:
+ - same-origin
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ strict-transport-security:
+ - max-age=31536000; includeSubDomains
+ vary:
+ - Origin
+ parsed_body:
+ choices:
+ - finish_reason: stop
+ index: 0
+ logprobs: null
+ message:
+ audio: null
+ content: "\nOkay, the user previously asked how to cross the street, and I gave a detailed answer. Now they're
+ asking about crossing a river analogously. Let me start by understanding the connection. They want a similar structured
+ approach but for crossing a river.\n\nFirst, I need to figure out the equivalents between crossing a street and
+ a river. The original steps included looking both ways, using signals, avoiding distractions, etc. For a river,
+ physical steps might involve checking the current, choosing a safe spot, maybe using a bridge or boat.\n\nI should
+ map each street-crossing step to a river scenario. For example, \"stop at the curb\" becomes \"assess the riverbank.\"
+ Instead of traffic signals, check for ferry schedules or bridge access. Use safety equipment like life jackets
+ instead of wearing bright clothes.\n\nWait, the user mentioned \"analogously,\" so the structure should mirror
+ the previous answer but with river-specific actions. Maybe start by pausing to observe the river, checking water
+ flow instead of traffic. Use bridges as crosswalks and traffic signals. Boating has its own signals, like flags
+ or lights.\n\nAlso, think about hazards unique to rivers: strong currents, slippery rocks, wildlife. Safety considerations
+ here would involve knowing how to swim, having a floatation device, not relying on unreliable methods like jumping
+ on rocks.\n\nNeed to include steps like evaluating the crossing point, checking for bridges or fords, using boats
+ if necessary, avoiding hazards. Maybe mention time-sensitive factors like weather or flooding, similar to obeying
+ traffic lights.\n\nI should ensure each point from the previous answer has a parallel. For example, distractions
+ like phones would be like not paying attention to the river's flow. Visibility could mean wearing bright colors
+ to be seen on the water.\n\nAlso, consider group scenarios or children, making sure everyone knows the plan. Teaching
+ kids to cross safely here might involve knowing how to use floatation devices.\n\nI need to make sure the answer
+ is comprehensive but clear, following the same logical structure as the street crossing answer. Maybe list steps
+ in order, similar to the original list. Check for completeness: assessment, choosing the method, using proper
+ equipment, following safety protocols, dealing with emergencies mid-crossing, etc.\n\nWait, the original answer
+ had 10 points plus a bonus tip. Should mirror that structure? Maybe create a list with parallels. Also, ensure
+ that analogies are accurate—traffic becomes currents, crosswalks become bridges or fords, traffic signals become
+ navigational markers or ferry schedules.\n\nAvoiding distractions would involve focusing on the water's movement
+ and surroundings, not looking at devices. Visibility in water might involve reflective gear or signaling.\n\nAlso,
+ legal aspects: crossing at authorized spots only, checking if permits are needed. Local hazards or regulations
+ could be like local traffic laws.\n\nPotential mistakes: Maybe comparing the river's current to traffic is too
+ direct. Or missing steps like testing the stability of a path across the river, which isn't present in street
+ crossing. Need to add those unique elements naturally.\n\nAlso, emergencies: If mid-river and something goes wrong,
+ similar to being stuck on the street. Having a plan, like heading to a safer area or using safety equipment.\n\nAlright,
+ structure each point as an analogy, then expand with river-specific advice. Make sure the order parallels the
+ original. Use similar section titles but adapted. Maybe end with a note on local conditions and safety first.\n\n\nCrossing
+ a river safely shares many principles with crossing a street but involves adapting to the unpredictable nature
+ of water. Below is an analogy-based guide, mirroring the street-crossing steps while addressing river-specific
+ challenges:\n\n---\n\n### **1. Stop at the Riverbank: Assess the Situation**\n - **Pause and Observe** \n Like
+ stopping at the curb, stand back from the edge to evaluate the river’s width, speed, and hazards (swift currents,
+ rocks, debris, depth). \n - **Check for Bridges/Piers/Crossings** \n Just as intersections prioritize
+ pedestrian safety, bridges or marked fords exist for safer passage. Use them if accessible. \n\n---\n\n### **2.
+ Read the River: Look Both Upstream and Downstream** \n - **Scan Both Directions** \n Just as you look
+ left/right for cars, search **upstream (left if driving is right-hand)** for hazards like floating debris or sudden
+ surges. Check **downstream (right)** for exit points in case you’re swept away. \n - **Check the Flow** \n
+ \ Assess current strength: Is it a gentle trickle or a raging torrent? Avoid crossing if water is above knee-deep
+ or too fast. \n\n---\n\n### **3. Use Safe Routes: Bridges, Ferries, or Designated Fords** \n - **Follow Traffic
+ Signals → Follow Nautical Rules** \n Wait for ferry schedules, flashing lights (if present), or buoys marking
+ safe paths. Cross only when signals (like a ferry’s horn) indicate it’s safe. \n - **Choose a Footbridge or
+ Ferry** \n Bridges eliminate water risks entirely, much like crosswalks. Ferries or boats (with licensed
+ operators) are safest for wider rivers. \n\n---\n\n### **4. Prioritize Your Path: Know Where to Step or Swim**
+ \ \n - **Identify Stable Rocks or Shallows** \n If wading, pick a route with flat, secure footing (like
+ stepping stones) or the shallowest stretch, avoiding slippery algae-covered surfaces. \n - **Test the Current**
+ \ \n Before fully entering, use a stick or rock to gauge the force of the water. Swift currents can sweep
+ you off your feet faster than a car can strike. \n\n---\n\n### **5. Avoid Distractions: Focus on the Movement**
+ \ \n - **Put Away Devices** \n A phone distraction here risks losing balance in the river versus stepping
+ blindly into traffic. Keep both hands free for stability. \n - **Listen to the River** \n Gurgling or
+ roaring water warns of hidden holes or rapids—similar to hearing a car engine approaching. \n\n---\n\n### **6.
+ Be Predictable and Visible: Wear Bright Gear or Floats** \n - **Wear a Life Jacket** \n Like high-visibility
+ clothing, a life jacket keeps you buoyant and makes you easier for rescuers or boat operators to spot. \n -
+ **Stick to a Straight Route** \n Zigzagging in water wastes energy and increases the risk of losing balance,
+ just as darting across lanes on a street invites accidents. \n\n---\n\n### **7. Communicate: Make Eye Contact
+ with Boaters or Guides** \n - **Signal to Operators** \n In small boats or rafts, wave to catch the attention
+ of passing vessels (like making eye contact with drivers) to ensure they see you. \n - **Use Hand Signals or
+ Whistles** \n Agree on emergency signals with your group beforehand (e.g., pointing downstream to signal
+ danger). \n\n---\n\n### **8. Cross at the Safest Spot: Avoid Mid-River Surprises** \n - **Choose Wide, Slow
+ Sections** \n Like crossing at intersections, wide shallow areas have gentler currents. Avoid narrows where
+ water funnels into rapids. \n - **Watch for Hidden Dangers** \n Submerged logs, sudden drop-offs, or hypothermic
+ cold water can be as lethal as a speeding car. \n\n---\n\n### **9. Don’t Assume Safety: Verify Every Step or
+ Stroke** \n - **Test Each Footstep** \n Tap the riverbed before transferring weight to avoid stepping
+ into a hole or loose gravel (like checking for icy patches on a sidewalk). \n - **Swim Only If Trained** \n
+ \ If the river is too deep to wade, only swim if you know how. Use floatation devices if unsure—similar to
+ holding an adult’s hand as a child crossing a street. \n\n---\n\n### **10. Mid-River Emergencies: What to Do
+ if Stuck** \n - **If Struck by Current** \n Stay calm, float on your back with feet downstream (to avoid
+ head-first collisions), and steer toward eddies or the shore. \n - **If Trapped on a Rock** \n Hug a large
+ rock and wait for help, like pausing at a median. Don’t risk swimming diagonally across the river’s flow. \n\n---\n\n###
+ **Bonus Tip: Adapt to Local Conditions** \n - **Research Hazards** \n Some rivers have undertows, wildlife,
+ or pollution. Check local warnings (like road signs for blind corners or school zones). \n - **Weather Watch**
+ \ \n Sudden rainstorms can cause flash floods—delay crossing if clouds mass on the horizon. \n\n---\n\nBy
+ applying street-crossing principles to river navigation—patience, situational awareness, and prioritizing safe
+ infrastructure—you can minimize risks. Always assume the river is more dangerous than it appears, just as you’d
+ treat an unfamiliar road. **Safety first, crossing second!** \U0001F30A \U0001F6A4 ⚠️"
+ function_call: null
+ reasoning_content: null
+ refusal: null
+ role: assistant
+ tool_calls: []
+ stop_reason: null
+ created: 1752067094
+ id: chatcmpl-35fdec1307634f94a39f7e26f52e12a7
+ model: Qwen/Qwen3-235B-A22B
+ object: chat.completion
+ prompt_logprobs: null
+ service_tier: null
+ system_fingerprint: null
+ usage:
+ completion_tokens: 1860
+ completion_tokens_details: null
+ prompt_tokens: 691
+ prompt_tokens_details: null
+ total_tokens: 2551
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/tests/models/cassettes/test_huggingface/test_image_as_binary_content_input.yaml b/tests/models/cassettes/test_huggingface/test_image_as_binary_content_input.yaml
new file mode 100644
index 0000000000..8b295d4404
--- /dev/null
+++ b/tests/models/cassettes/test_huggingface/test_image_as_binary_content_input.yaml
@@ -0,0 +1,106 @@
+interactions:
+- request:
+ body: null
+ headers:
+ accept:
+ - '*/*'
+ accept-encoding:
+ - gzip, deflate
+ connection:
+ - keep-alive
+ method: GET
+ uri: https://huggingface.co/api/models/Qwen/Qwen2.5-VL-72B-Instruct?expand=inferenceProviderMapping
+ response:
+ headers:
+ access-control-allow-origin:
+ - https://huggingface.co
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '293'
+ content-type:
+ - application/json; charset=utf-8
+ cross-origin-opener-policy:
+ - same-origin
+ etag:
+ - W/"125-DEMuQsKZBCb9/68jW5UsI3Q7x7E"
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ vary:
+ - Origin
+ parsed_body:
+ _id: 6797079422990ae89b5aff86
+ id: Qwen/Qwen2.5-VL-72B-Instruct
+ inferenceProviderMapping:
+ hyperbolic:
+ providerId: Qwen/Qwen2.5-VL-72B-Instruct
+ status: live
+ task: conversational
+ nebius:
+ providerId: Qwen/Qwen2.5-VL-72B-Instruct
+ status: live
+ task: conversational
+ status:
+ code: 200
+ message: OK
+- request:
+ body: null
+ headers: {}
+ method: POST
+ uri: https://router.huggingface.co/nebius/v1/chat/completions
+ response:
+ headers:
+ access-control-allow-credentials:
+ - 'true'
+ access-control-allow-origin:
+ - '*'
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '776'
+ content-type:
+ - application/json
+ cross-origin-opener-policy:
+ - same-origin
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ strict-transport-security:
+ - max-age=31536000; includeSubDomains
+ vary:
+ - Origin
+ parsed_body:
+ choices:
+ - finish_reason: stop
+ index: 0
+ logprobs: null
+ message:
+ audio: null
+ content: The fruit in the image is a kiwi. It has been sliced in half, revealing its bright green flesh with small
+ black seeds arranged in a circular pattern around a white center. The outer skin of the kiwi is fuzzy and brown.
+ function_call: null
+ reasoning_content: null
+ refusal: null
+ role: assistant
+ tool_calls: []
+ stop_reason: null
+ created: 1751986733
+ id: chatcmpl-bd957b950cce4d61839e2af25f56f684
+ model: Qwen/Qwen2.5-VL-72B-Instruct
+ object: chat.completion
+ prompt_logprobs: null
+ service_tier: null
+ system_fingerprint: null
+ usage:
+ completion_tokens: 49
+ completion_tokens_details: null
+ prompt_tokens: 7625
+ prompt_tokens_details: null
+ total_tokens: 7674
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/tests/models/cassettes/test_huggingface/test_image_url_input.yaml b/tests/models/cassettes/test_huggingface/test_image_url_input.yaml
new file mode 100644
index 0000000000..791a0aede5
--- /dev/null
+++ b/tests/models/cassettes/test_huggingface/test_image_url_input.yaml
@@ -0,0 +1,105 @@
+interactions:
+- request:
+ body: null
+ headers:
+ accept:
+ - '*/*'
+ accept-encoding:
+ - gzip, deflate
+ connection:
+ - keep-alive
+ method: GET
+ uri: https://huggingface.co/api/models/Qwen/Qwen2.5-VL-72B-Instruct?expand=inferenceProviderMapping
+ response:
+ headers:
+ access-control-allow-origin:
+ - https://huggingface.co
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '293'
+ content-type:
+ - application/json; charset=utf-8
+ cross-origin-opener-policy:
+ - same-origin
+ etag:
+ - W/"125-DEMuQsKZBCb9/68jW5UsI3Q7x7E"
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ vary:
+ - Origin
+ parsed_body:
+ _id: 6797079422990ae89b5aff86
+ id: Qwen/Qwen2.5-VL-72B-Instruct
+ inferenceProviderMapping:
+ hyperbolic:
+ providerId: Qwen/Qwen2.5-VL-72B-Instruct
+ status: live
+ task: conversational
+ nebius:
+ providerId: Qwen/Qwen2.5-VL-72B-Instruct
+ status: live
+ task: conversational
+ status:
+ code: 200
+ message: OK
+- request:
+ body: null
+ headers: {}
+ method: POST
+ uri: https://router.huggingface.co/nebius/v1/chat/completions
+ response:
+ headers:
+ access-control-allow-credentials:
+ - 'true'
+ access-control-allow-origin:
+ - '*'
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '612'
+ content-type:
+ - application/json
+ cross-origin-opener-policy:
+ - same-origin
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ strict-transport-security:
+ - max-age=31536000; includeSubDomains
+ vary:
+ - Origin
+ parsed_body:
+ choices:
+ - finish_reason: stop
+ index: 0
+ logprobs: null
+ message:
+ audio: null
+ content: Hello! How can I assist you with this image of a potato?
+ function_call: null
+ reasoning_content: null
+ refusal: null
+ role: assistant
+ tool_calls: []
+ stop_reason: null
+ created: 1751983479
+ id: chatcmpl-49aa100effab4ca28514d5ccc00d7944
+ model: Qwen/Qwen2.5-VL-72B-Instruct
+ object: chat.completion
+ prompt_logprobs: null
+ service_tier: null
+ system_fingerprint: null
+ usage:
+ completion_tokens: 15
+ completion_tokens_details: null
+ prompt_tokens: 269
+ prompt_tokens_details: null
+ total_tokens: 284
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/tests/models/cassettes/test_huggingface/test_max_completion_tokens[Qwen-Qwen2.5-72B-Instruct].yaml b/tests/models/cassettes/test_huggingface/test_max_completion_tokens[Qwen-Qwen2.5-72B-Instruct].yaml
new file mode 100644
index 0000000000..8395c16fc6
--- /dev/null
+++ b/tests/models/cassettes/test_huggingface/test_max_completion_tokens[Qwen-Qwen2.5-72B-Instruct].yaml
@@ -0,0 +1,122 @@
+interactions:
+- request:
+ body: null
+ headers:
+ accept:
+ - '*/*'
+ accept-encoding:
+ - gzip, deflate
+ connection:
+ - keep-alive
+ method: GET
+ uri: https://huggingface.co/api/models/Qwen/Qwen2.5-72B-Instruct?expand=inferenceProviderMapping
+ response:
+ headers:
+ access-control-allow-origin:
+ - https://huggingface.co
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '704'
+ content-type:
+ - application/json; charset=utf-8
+ cross-origin-opener-policy:
+ - same-origin
+ etag:
+ - W/"2c0-CGiQuUurY/UiBTJC7RlRRjJtbZU"
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ vary:
+ - Origin
+ parsed_body:
+ _id: 66e81cefd1b1391042d0e47e
+ id: Qwen/Qwen2.5-72B-Instruct
+ inferenceProviderMapping:
+ featherless-ai:
+ providerId: Qwen/Qwen2.5-72B-Instruct
+ status: live
+ task: conversational
+ fireworks-ai:
+ providerId: accounts/fireworks/models/qwen2p5-72b-instruct
+ status: error
+ task: conversational
+ hyperbolic:
+ providerId: Qwen/Qwen2.5-72B-Instruct
+ status: error
+ task: conversational
+ nebius:
+ providerId: Qwen/Qwen2.5-72B-Instruct-fast
+ status: live
+ task: conversational
+ novita:
+ providerId: qwen/qwen-2.5-72b-instruct
+ status: error
+ task: conversational
+ together:
+ providerId: Qwen/Qwen2.5-72B-Instruct-Turbo
+ status: live
+ task: conversational
+ status:
+ code: 200
+ message: OK
+- request:
+ body: null
+ headers: {}
+ method: POST
+ uri: https://router.huggingface.co/nebius/v1/chat/completions
+ response:
+ headers:
+ access-control-allow-credentials:
+ - 'true'
+ access-control-allow-origin:
+ - '*'
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '693'
+ content-type:
+ - application/json
+ cross-origin-opener-policy:
+ - same-origin
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ strict-transport-security:
+ - max-age=31536000; includeSubDomains
+ vary:
+ - Origin
+ parsed_body:
+ choices:
+ - finish_reason: stop
+ index: 0
+ logprobs: null
+ message:
+ audio: null
+ content: Hello! How can I assist you today? Whether you have questions, need help with something specific, or just
+ want to chat, I'm here to help!
+ function_call: null
+ reasoning_content: null
+ refusal: null
+ role: assistant
+ tool_calls: []
+ stop_reason: null
+ created: 1752050598
+ id: chatcmpl-5295b41092674918b860d41f723660cb
+ model: Qwen/Qwen2.5-72B-Instruct-fast
+ object: chat.completion
+ prompt_logprobs: null
+ service_tier: null
+ system_fingerprint: null
+ usage:
+ completion_tokens: 33
+ completion_tokens_details: null
+ prompt_tokens: 30
+ prompt_tokens_details: null
+ total_tokens: 63
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/tests/models/cassettes/test_huggingface/test_max_completion_tokens[deepseek-ai-DeepSeek-R1-0528].yaml b/tests/models/cassettes/test_huggingface/test_max_completion_tokens[deepseek-ai-DeepSeek-R1-0528].yaml
new file mode 100644
index 0000000000..6f9868de9b
--- /dev/null
+++ b/tests/models/cassettes/test_huggingface/test_max_completion_tokens[deepseek-ai-DeepSeek-R1-0528].yaml
@@ -0,0 +1,128 @@
+interactions:
+- request:
+ body: null
+ headers:
+ accept:
+ - '*/*'
+ accept-encoding:
+ - gzip, deflate
+ connection:
+ - keep-alive
+ method: GET
+ uri: https://huggingface.co/api/models/deepseek-ai/DeepSeek-R1-0528?expand=inferenceProviderMapping
+ response:
+ headers:
+ access-control-allow-origin:
+ - https://huggingface.co
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '678'
+ content-type:
+ - application/json; charset=utf-8
+ cross-origin-opener-policy:
+ - same-origin
+ etag:
+ - W/"2a6-gQg+B654Px2F2NUtLDU93uSoBDU"
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ vary:
+ - Origin
+ parsed_body:
+ _id: 6836db82a3626cb7b5343be8
+ id: deepseek-ai/DeepSeek-R1-0528
+ inferenceProviderMapping:
+ fireworks-ai:
+ providerId: accounts/fireworks/models/deepseek-r1-0528
+ status: live
+ task: conversational
+ hyperbolic:
+ providerId: deepseek-ai/DeepSeek-R1-0528
+ status: live
+ task: conversational
+ nebius:
+ providerId: deepseek-ai/DeepSeek-R1-0528
+ status: live
+ task: conversational
+ novita:
+ providerId: deepseek/deepseek-r1-0528
+ status: live
+ task: conversational
+ sambanova:
+ providerId: DeepSeek-R1-0528
+ status: live
+ task: conversational
+ together:
+ providerId: deepseek-ai/DeepSeek-R1
+ status: live
+ task: conversational
+ status:
+ code: 200
+ message: OK
+- request:
+ body: null
+ headers: {}
+ method: POST
+ uri: https://router.huggingface.co/nebius/v1/chat/completions
+ response:
+ headers:
+ access-control-allow-credentials:
+ - 'true'
+ access-control-allow-origin:
+ - '*'
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '1325'
+ content-type:
+ - application/json
+ cross-origin-opener-policy:
+ - same-origin
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ strict-transport-security:
+ - max-age=31536000; includeSubDomains
+ vary:
+ - Origin
+ parsed_body:
+ choices:
+ - finish_reason: stop
+ index: 0
+ logprobs: null
+ message:
+ audio: null
+ content: "\nOkay, the user just said “hello”. A simple greeting. They might be testing if I'm online, starting
+ a casual chat, or preparing a deeper question. \n\nSince they didn't add context, I'll match their tone—friendly
+ and open-ended. Short response invites them to lead. Adding the emoji makes it warmer. No need to overthink yet.
+ \n\nHmm… if they're new, they might need reassurance that I'm responsive. If they're regular users, they're probably
+ just warming up. Either way, keeping it light feels safe. \n\nWatch for clues in their next message—if they dive
+ into a topic, they were just being polite before asking. If they reply with small talk, they might want companionship.\n\nHello!
+ \U0001F60A How can I assist you today?"
+ function_call: null
+ reasoning_content: null
+ refusal: null
+ role: assistant
+ tool_calls: []
+ stop_reason: null
+ created: 1752050599
+ id: chatcmpl-25472217e5b643e0a1f3f20dd44ed2c1
+ kv_transfer_params: null
+ model: deepseek-ai/DeepSeek-R1-0528
+ object: chat.completion
+ prompt_logprobs: null
+ service_tier: null
+ system_fingerprint: null
+ usage:
+ completion_tokens: 165
+ completion_tokens_details: null
+ prompt_tokens: 6
+ prompt_tokens_details: null
+ total_tokens: 171
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/tests/models/cassettes/test_huggingface/test_max_completion_tokens[meta-llama-Llama-3.3-70B-Instruct].yaml b/tests/models/cassettes/test_huggingface/test_max_completion_tokens[meta-llama-Llama-3.3-70B-Instruct].yaml
new file mode 100644
index 0000000000..101f8f9e22
--- /dev/null
+++ b/tests/models/cassettes/test_huggingface/test_max_completion_tokens[meta-llama-Llama-3.3-70B-Instruct].yaml
@@ -0,0 +1,142 @@
+interactions:
+- request:
+ body: null
+ headers:
+ accept:
+ - '*/*'
+ accept-encoding:
+ - gzip, deflate
+ connection:
+ - keep-alive
+ method: GET
+ uri: https://huggingface.co/api/models/meta-llama/Llama-3.3-70B-Instruct?expand=inferenceProviderMapping
+ response:
+ headers:
+ access-control-allow-origin:
+ - https://huggingface.co
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '1215'
+ content-type:
+ - application/json; charset=utf-8
+ cross-origin-opener-policy:
+ - same-origin
+ etag:
+ - W/"4bf-2c5rXKFDCLWF+O3TnkXoII8pC2U"
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ vary:
+ - Origin
+ parsed_body:
+ _id: 6745f28f9333dfcc06268b1e
+ id: meta-llama/Llama-3.3-70B-Instruct
+ inferenceProviderMapping:
+ cerebras:
+ providerId: llama-3.3-70b
+ status: live
+ task: conversational
+ featherless-ai:
+ providerId: meta-llama/Llama-3.3-70B-Instruct
+ status: live
+ task: conversational
+ fireworks-ai:
+ providerId: accounts/fireworks/models/llama-v3p3-70b-instruct
+ status: live
+ task: conversational
+ groq:
+ providerId: llama-3.3-70b-versatile
+ status: live
+ task: conversational
+ hyperbolic:
+ providerId: meta-llama/Llama-3.3-70B-Instruct
+ status: live
+ task: conversational
+ nebius:
+ providerId: meta-llama/Llama-3.3-70B-Instruct-fast
+ status: live
+ task: conversational
+ novita:
+ providerId: meta-llama/llama-3.3-70b-instruct
+ status: live
+ task: conversational
+ nscale:
+ providerId: meta-llama/Llama-3.3-70B-Instruct
+ status: live
+ task: conversational
+ ovhcloud:
+ providerId: Meta-Llama-3_3-70B-Instruct
+ status: error
+ task: conversational
+ sambanova:
+ providerId: Meta-Llama-3.3-70B-Instruct
+ status: live
+ task: conversational
+ together:
+ providerId: meta-llama/Llama-3.3-70B-Instruct-Turbo
+ status: live
+ task: conversational
+ status:
+ code: 200
+ message: OK
+- request:
+ body: null
+ headers: {}
+ method: POST
+ uri: https://router.huggingface.co/nebius/v1/chat/completions
+ response:
+ headers:
+ access-control-allow-credentials:
+ - 'true'
+ access-control-allow-origin:
+ - '*'
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '686'
+ content-type:
+ - application/json
+ cross-origin-opener-policy:
+ - same-origin
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ strict-transport-security:
+ - max-age=31536000; includeSubDomains
+ vary:
+ - Origin
+ parsed_body:
+ choices:
+ - finish_reason: stop
+ index: 0
+ logprobs: null
+ message:
+ audio: null
+ content: '{"type": "function", "name": "print_output", "parameters": {"output": "hello"}}'
+ function_call: null
+ reasoning_content: null
+ refusal: null
+ role: assistant
+ tool_calls: []
+ stop_reason: 128008
+ created: 1752050609
+ id: chatcmpl-e4e88c8a58b34ea8bd5c47e6265a0de3
+ kv_transfer_params: null
+ model: meta-llama/Llama-3.3-70B-Instruct-fast
+ object: chat.completion
+ prompt_logprobs: null
+ service_tier: null
+ system_fingerprint: null
+ usage:
+ completion_tokens: 23
+ completion_tokens_details: null
+ prompt_tokens: 92
+ prompt_tokens_details: null
+ total_tokens: 115
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/tests/models/cassettes/test_huggingface/test_request_simple_success_with_vcr.yaml b/tests/models/cassettes/test_huggingface/test_request_simple_success_with_vcr.yaml
new file mode 100644
index 0000000000..6996da0333
--- /dev/null
+++ b/tests/models/cassettes/test_huggingface/test_request_simple_success_with_vcr.yaml
@@ -0,0 +1,126 @@
+interactions:
+- request:
+ body: null
+ headers:
+ accept:
+ - '*/*'
+ accept-encoding:
+ - gzip, deflate
+ connection:
+ - keep-alive
+ method: GET
+ uri: https://huggingface.co/api/models/Qwen/Qwen2.5-72B-Instruct?expand=inferenceProviderMapping
+ response:
+ headers:
+ access-control-allow-origin:
+ - https://huggingface.co
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '800'
+ content-type:
+ - application/json; charset=utf-8
+ cross-origin-opener-policy:
+ - same-origin
+ etag:
+ - W/"320-IoLwHc4XKGzRoHW0ok1gY7tY/NI"
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ vary:
+ - Origin
+ parsed_body:
+ _id: 66e81cefd1b1391042d0e47e
+ id: Qwen/Qwen2.5-72B-Instruct
+ inferenceProviderMapping:
+ featherless-ai:
+ providerId: Qwen/Qwen2.5-72B-Instruct
+ status: error
+ task: conversational
+ fireworks-ai:
+ providerId: accounts/fireworks/models/qwen2p5-72b-instruct
+ status: live
+ task: conversational
+ hf-inference:
+ providerId: Qwen/Qwen2.5-72B-Instruct
+ status: live
+ task: conversational
+ hyperbolic:
+ providerId: Qwen/Qwen2.5-72B-Instruct
+ status: live
+ task: conversational
+ nebius:
+ providerId: Qwen/Qwen2.5-72B-Instruct-fast
+ status: live
+ task: conversational
+ novita:
+ providerId: qwen/qwen-2.5-72b-instruct
+ status: live
+ task: conversational
+ together:
+ providerId: Qwen/Qwen2.5-72B-Instruct-Turbo
+ status: live
+ task: conversational
+ status:
+ code: 200
+ message: OK
+- request:
+ body: null
+ headers: {}
+ method: POST
+ uri: https://router.huggingface.co/nebius/v1/chat/completions
+ response:
+ headers:
+ access-control-allow-credentials:
+ - 'true'
+ access-control-allow-origin:
+ - '*'
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '680'
+ content-type:
+ - application/json
+ cross-origin-opener-policy:
+ - same-origin
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ strict-transport-security:
+ - max-age=31536000; includeSubDomains
+ vary:
+ - Origin
+ parsed_body:
+ choices:
+ - finish_reason: stop
+ index: 0
+ logprobs: null
+ message:
+ audio: null
+ content: Hello! How can I assist you today? Feel free to ask me any questions or let me know if you need help with
+ anything specific.
+ function_call: null
+ reasoning_content: null
+ refusal: null
+ role: assistant
+ tool_calls: []
+ stop_reason: null
+ created: 1749475549
+ id: chatcmpl-6050852c70164258bb9bab4e93e2b69c
+ model: Qwen/Qwen2.5-72B-Instruct-fast
+ object: chat.completion
+ prompt_logprobs: null
+ service_tier: null
+ system_fingerprint: null
+ usage:
+ completion_tokens: 29
+ completion_tokens_details: null
+ prompt_tokens: 30
+ prompt_tokens_details: null
+ total_tokens: 59
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/tests/models/cassettes/test_huggingface/test_request_simple_usage.yaml b/tests/models/cassettes/test_huggingface/test_request_simple_usage.yaml
new file mode 100644
index 0000000000..4025ce48a1
--- /dev/null
+++ b/tests/models/cassettes/test_huggingface/test_request_simple_usage.yaml
@@ -0,0 +1,122 @@
+interactions:
+- request:
+ body: null
+ headers:
+ accept:
+ - '*/*'
+ accept-encoding:
+ - gzip, deflate
+ connection:
+ - keep-alive
+ method: GET
+ uri: https://huggingface.co/api/models/Qwen/Qwen2.5-72B-Instruct?expand=inferenceProviderMapping
+ response:
+ headers:
+ access-control-allow-origin:
+ - https://huggingface.co
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '703'
+ content-type:
+ - application/json; charset=utf-8
+ cross-origin-opener-policy:
+ - same-origin
+ etag:
+ - W/"2bf-bkSLwumMG89/DZCsDWwBvtIEsEs"
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ vary:
+ - Origin
+ parsed_body:
+ _id: 66e81cefd1b1391042d0e47e
+ id: Qwen/Qwen2.5-72B-Instruct
+ inferenceProviderMapping:
+ featherless-ai:
+ providerId: Qwen/Qwen2.5-72B-Instruct
+ status: live
+ task: conversational
+ fireworks-ai:
+ providerId: accounts/fireworks/models/qwen2p5-72b-instruct
+ status: live
+ task: conversational
+ hyperbolic:
+ providerId: Qwen/Qwen2.5-72B-Instruct
+ status: error
+ task: conversational
+ nebius:
+ providerId: Qwen/Qwen2.5-72B-Instruct-fast
+ status: live
+ task: conversational
+ novita:
+ providerId: qwen/qwen-2.5-72b-instruct
+ status: error
+ task: conversational
+ together:
+ providerId: Qwen/Qwen2.5-72B-Instruct-Turbo
+ status: live
+ task: conversational
+ status:
+ code: 200
+ message: OK
+- request:
+ body: null
+ headers: {}
+ method: POST
+ uri: https://router.huggingface.co/nebius/v1/chat/completions
+ response:
+ headers:
+ access-control-allow-credentials:
+ - 'true'
+ access-control-allow-origin:
+ - '*'
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '712'
+ content-type:
+ - application/json
+ cross-origin-opener-policy:
+ - same-origin
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ strict-transport-security:
+ - max-age=31536000; includeSubDomains
+ vary:
+ - Origin
+ parsed_body:
+ choices:
+ - finish_reason: stop
+ index: 0
+ logprobs: null
+ message:
+ audio: null
+ content: Hello! It's great to meet you. How can I assist you today? Whether you have any questions, need some advice,
+ or just want to chat, feel free to let me know!
+ function_call: null
+ reasoning_content: null
+ refusal: null
+ role: assistant
+ tool_calls: []
+ stop_reason: null
+ created: 1751982062
+ id: chatcmpl-f366f315c05040fd9c4a505b516bce4b
+ model: Qwen/Qwen2.5-72B-Instruct-fast
+ object: chat.completion
+ prompt_logprobs: null
+ service_tier: null
+ system_fingerprint: null
+ usage:
+ completion_tokens: 40
+ completion_tokens_details: null
+ prompt_tokens: 30
+ prompt_tokens_details: null
+ total_tokens: 70
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/tests/models/cassettes/test_huggingface/test_simple_completion.yaml b/tests/models/cassettes/test_huggingface/test_simple_completion.yaml
new file mode 100644
index 0000000000..a5f1d979ec
--- /dev/null
+++ b/tests/models/cassettes/test_huggingface/test_simple_completion.yaml
@@ -0,0 +1,122 @@
+interactions:
+- request:
+ body: null
+ headers:
+ accept:
+ - '*/*'
+ accept-encoding:
+ - gzip, deflate
+ connection:
+ - keep-alive
+ method: GET
+ uri: https://huggingface.co/api/models/Qwen/Qwen2.5-72B-Instruct?expand=inferenceProviderMapping
+ response:
+ headers:
+ access-control-allow-origin:
+ - https://huggingface.co
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '703'
+ content-type:
+ - application/json; charset=utf-8
+ cross-origin-opener-policy:
+ - same-origin
+ etag:
+ - W/"2bf-bkSLwumMG89/DZCsDWwBvtIEsEs"
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ vary:
+ - Origin
+ parsed_body:
+ _id: 66e81cefd1b1391042d0e47e
+ id: Qwen/Qwen2.5-72B-Instruct
+ inferenceProviderMapping:
+ featherless-ai:
+ providerId: Qwen/Qwen2.5-72B-Instruct
+ status: live
+ task: conversational
+ fireworks-ai:
+ providerId: accounts/fireworks/models/qwen2p5-72b-instruct
+ status: live
+ task: conversational
+ hyperbolic:
+ providerId: Qwen/Qwen2.5-72B-Instruct
+ status: error
+ task: conversational
+ nebius:
+ providerId: Qwen/Qwen2.5-72B-Instruct-fast
+ status: live
+ task: conversational
+ novita:
+ providerId: qwen/qwen-2.5-72b-instruct
+ status: error
+ task: conversational
+ together:
+ providerId: Qwen/Qwen2.5-72B-Instruct-Turbo
+ status: live
+ task: conversational
+ status:
+ code: 200
+ message: OK
+- request:
+ body: null
+ headers: {}
+ method: POST
+ uri: https://router.huggingface.co/nebius/v1/chat/completions
+ response:
+ headers:
+ access-control-allow-credentials:
+ - 'true'
+ access-control-allow-origin:
+ - '*'
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '680'
+ content-type:
+ - application/json
+ cross-origin-opener-policy:
+ - same-origin
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ strict-transport-security:
+ - max-age=31536000; includeSubDomains
+ vary:
+ - Origin
+ parsed_body:
+ choices:
+ - finish_reason: stop
+ index: 0
+ logprobs: null
+ message:
+ audio: null
+ content: Hello! How can I assist you today? Feel free to ask me any questions or let me know if you need help with
+ anything specific.
+ function_call: null
+ reasoning_content: null
+ refusal: null
+ role: assistant
+ tool_calls: []
+ stop_reason: null
+ created: 1751982153
+ id: chatcmpl-d445c0d473a84791af2acf356cc00df7
+ model: Qwen/Qwen2.5-72B-Instruct-fast
+ object: chat.completion
+ prompt_logprobs: null
+ service_tier: null
+ system_fingerprint: null
+ usage:
+ completion_tokens: 29
+ completion_tokens_details: null
+ prompt_tokens: 30
+ prompt_tokens_details: null
+ total_tokens: 59
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/tests/models/cassettes/test_huggingface/test_stream_completion.yaml b/tests/models/cassettes/test_huggingface/test_stream_completion.yaml
new file mode 100644
index 0000000000..e592d3f271
--- /dev/null
+++ b/tests/models/cassettes/test_huggingface/test_stream_completion.yaml
@@ -0,0 +1,319 @@
+interactions:
+- request:
+ body: null
+ headers:
+ accept:
+ - '*/*'
+ accept-encoding:
+ - gzip, deflate
+ connection:
+ - keep-alive
+ method: GET
+ uri: https://huggingface.co/api/models/Qwen/Qwen2.5-72B-Instruct?expand=inferenceProviderMapping
+ response:
+ headers:
+ access-control-allow-origin:
+ - https://huggingface.co
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '703'
+ content-type:
+ - application/json; charset=utf-8
+ cross-origin-opener-policy:
+ - same-origin
+ etag:
+ - W/"2bf-bkSLwumMG89/DZCsDWwBvtIEsEs"
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ vary:
+ - Origin
+ parsed_body:
+ _id: 66e81cefd1b1391042d0e47e
+ id: Qwen/Qwen2.5-72B-Instruct
+ inferenceProviderMapping:
+ featherless-ai:
+ providerId: Qwen/Qwen2.5-72B-Instruct
+ status: live
+ task: conversational
+ fireworks-ai:
+ providerId: accounts/fireworks/models/qwen2p5-72b-instruct
+ status: live
+ task: conversational
+ hyperbolic:
+ providerId: Qwen/Qwen2.5-72B-Instruct
+ status: error
+ task: conversational
+ nebius:
+ providerId: Qwen/Qwen2.5-72B-Instruct-fast
+ status: live
+ task: conversational
+ novita:
+ providerId: qwen/qwen-2.5-72b-instruct
+ status: error
+ task: conversational
+ together:
+ providerId: Qwen/Qwen2.5-72B-Instruct-Turbo
+ status: live
+ task: conversational
+ status:
+ code: 200
+ message: OK
+- request:
+ body: null
+ headers: {}
+ method: POST
+ uri: https://router.huggingface.co/nebius/v1/chat/completions
+ response:
+ body:
+ string: |+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":"","role":"assistant"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":"Hello"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":"!"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" It"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" seems"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" like"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" your"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" message"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" might"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" have"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" been"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" cut"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" off"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" or"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" not"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" fully"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" sent"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":"."},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" Could"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" you"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" please"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" provide"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" more"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" details"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" so"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" I"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" can"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" assist"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" you"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":" better"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":"?"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-da9066b0c0ff4cdbae89c40870e43764","choices":[{"delta":{"content":""},"finish_reason":"stop","index":0,"logprobs":null,"stop_reason":null}],"created":1751980879,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: [DONE]
+
+ headers:
+ access-control-allow-credentials:
+ - 'true'
+ access-control-allow-origin:
+ - '*'
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-type:
+ - text/event-stream; charset=utf-8
+ cross-origin-opener-policy:
+ - same-origin
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ strict-transport-security:
+ - max-age=31536000; includeSubDomains
+ transfer-encoding:
+ - chunked
+ vary:
+ - Origin
+ status:
+ code: 200
+ message: OK
+- request:
+ body: null
+ headers:
+ accept:
+ - '*/*'
+ accept-encoding:
+ - gzip, deflate
+ connection:
+ - keep-alive
+ method: GET
+ uri: https://huggingface.co/api/models/Qwen/Qwen2.5-72B-Instruct?expand=inferenceProviderMapping
+ response:
+ headers:
+ access-control-allow-origin:
+ - https://huggingface.co
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-length:
+ - '703'
+ content-type:
+ - application/json; charset=utf-8
+ cross-origin-opener-policy:
+ - same-origin
+ etag:
+ - W/"2bf-bkSLwumMG89/DZCsDWwBvtIEsEs"
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ vary:
+ - Origin
+ parsed_body:
+ _id: 66e81cefd1b1391042d0e47e
+ id: Qwen/Qwen2.5-72B-Instruct
+ inferenceProviderMapping:
+ featherless-ai:
+ providerId: Qwen/Qwen2.5-72B-Instruct
+ status: live
+ task: conversational
+ fireworks-ai:
+ providerId: accounts/fireworks/models/qwen2p5-72b-instruct
+ status: live
+ task: conversational
+ hyperbolic:
+ providerId: Qwen/Qwen2.5-72B-Instruct
+ status: error
+ task: conversational
+ nebius:
+ providerId: Qwen/Qwen2.5-72B-Instruct-fast
+ status: live
+ task: conversational
+ novita:
+ providerId: qwen/qwen-2.5-72b-instruct
+ status: error
+ task: conversational
+ together:
+ providerId: Qwen/Qwen2.5-72B-Instruct-Turbo
+ status: live
+ task: conversational
+ status:
+ code: 200
+ message: OK
+- request:
+ body: null
+ headers: {}
+ method: POST
+ uri: https://router.huggingface.co/nebius/v1/chat/completions
+ response:
+ body:
+ string: |+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":"","role":"assistant"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":"Hello"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":"!"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" How"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" can"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" I"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" assist"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" you"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" today"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":"?"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" Feel"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" free"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" to"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" ask"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" me"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" any"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" questions"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" or"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" let"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" me"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" know"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" if"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" you"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" need"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" help"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" with"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" anything"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":" specific"},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":"."},"finish_reason":null,"index":0,"logprobs":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: {"id":"chatcmpl-dad488ace0b540629381a97ed61f6426","choices":[{"delta":{"content":""},"finish_reason":"stop","index":0,"logprobs":null,"stop_reason":null}],"created":1751980905,"model":"Qwen/Qwen2.5-72B-Instruct-fast","object":"chat.completion.chunk"}
+
+ data: [DONE]
+
+ headers:
+ access-control-allow-credentials:
+ - 'true'
+ access-control-allow-origin:
+ - '*'
+ access-control-expose-headers:
+ - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+ connection:
+ - keep-alive
+ content-type:
+ - text/event-stream; charset=utf-8
+ cross-origin-opener-policy:
+ - same-origin
+ referrer-policy:
+ - strict-origin-when-cross-origin
+ strict-transport-security:
+ - max-age=31536000; includeSubDomains
+ transfer-encoding:
+ - chunked
+ vary:
+ - Origin
+ status:
+ code: 200
+ message: OK
+version: 1
+...
diff --git a/tests/models/test_huggingface.py b/tests/models/test_huggingface.py
new file mode 100644
index 0000000000..bc6a7a359d
--- /dev/null
+++ b/tests/models/test_huggingface.py
@@ -0,0 +1,999 @@
+from __future__ import annotations as _annotations
+
+import json
+from collections.abc import Sequence
+from dataclasses import asdict, dataclass, field
+from datetime import datetime, timezone
+from functools import cached_property
+from typing import Any, Literal, Union, cast
+from unittest.mock import Mock
+
+import pytest
+from inline_snapshot import snapshot
+from typing_extensions import TypedDict
+
+from pydantic_ai import Agent, ModelRetry, UnexpectedModelBehavior
+from pydantic_ai.exceptions import ModelHTTPError
+from pydantic_ai.messages import (
+ AudioUrl,
+ BinaryContent,
+ DocumentUrl,
+ ImageUrl,
+ ModelRequest,
+ ModelResponse,
+ RetryPromptPart,
+ SystemPromptPart,
+ TextPart,
+ ThinkingPart,
+ ToolCallPart,
+ ToolReturnPart,
+ UserPromptPart,
+ VideoUrl,
+)
+from pydantic_ai.result import Usage
+from pydantic_ai.settings import ModelSettings
+from pydantic_ai.tools import RunContext
+
+from ..conftest import IsDatetime, IsInstance, IsNow, IsStr, raise_if_exception, try_import
+from .mock_async_stream import MockAsyncStream
+
+with try_import() as imports_successful:
+ import aiohttp
+ from huggingface_hub import (
+ AsyncInferenceClient,
+ ChatCompletionInputMessage,
+ ChatCompletionOutput,
+ ChatCompletionOutputComplete,
+ ChatCompletionOutputFunctionDefinition,
+ ChatCompletionOutputMessage,
+ ChatCompletionOutputToolCall,
+ ChatCompletionOutputUsage,
+ ChatCompletionStreamOutput,
+ ChatCompletionStreamOutputChoice,
+ ChatCompletionStreamOutputDelta,
+ ChatCompletionStreamOutputDeltaToolCall,
+ ChatCompletionStreamOutputFunction,
+ ChatCompletionStreamOutputUsage,
+ )
+ from huggingface_hub.errors import HfHubHTTPError
+
+ from pydantic_ai.models.huggingface import HuggingFaceModel
+ from pydantic_ai.providers.huggingface import HuggingFaceProvider
+
+ MockChatCompletion = Union[ChatCompletionOutput, Exception]
+ MockStreamEvent = Union[ChatCompletionStreamOutput, Exception]
+
+pytestmark = [
+ pytest.mark.skipif(not imports_successful(), reason='huggingface_hub not installed'),
+ pytest.mark.anyio,
+ pytest.mark.filterwarnings('ignore::ResourceWarning'),
+]
+
+
+@dataclass
+class MockHuggingFace:
+ completions: MockChatCompletion | Sequence[MockChatCompletion] | None = None
+ stream: Sequence[MockStreamEvent] | Sequence[Sequence[MockStreamEvent]] | None = None
+ index: int = 0
+ chat_completion_kwargs: list[dict[str, Any]] = field(default_factory=list)
+
+ @cached_property
+ def chat(self) -> Any:
+ completions = type('Completions', (), {'create': self.chat_completions_create})
+ return type('Chat', (), {'completions': completions})
+
+ @classmethod
+ def create_mock(cls, completions: MockChatCompletion | Sequence[MockChatCompletion]) -> AsyncInferenceClient:
+ return cast(AsyncInferenceClient, cls(completions=completions))
+
+ @classmethod
+ def create_stream_mock(
+ cls, stream: Sequence[MockStreamEvent] | Sequence[Sequence[MockStreamEvent]]
+ ) -> AsyncInferenceClient:
+ return cast(AsyncInferenceClient, cls(stream=stream))
+
+ async def chat_completions_create(
+ self, *_args: Any, stream: bool = False, **kwargs: Any
+ ) -> ChatCompletionOutput | MockAsyncStream[MockStreamEvent]:
+ self.chat_completion_kwargs.append(kwargs)
+ if stream or self.stream:
+ assert self.stream is not None, 'you can only use `stream=True` if `stream` is provided'
+ if isinstance(self.stream[0], Sequence):
+ response = MockAsyncStream(iter(cast(list[MockStreamEvent], self.stream[self.index])))
+ else:
+ response = MockAsyncStream(iter(cast(list[MockStreamEvent], self.stream)))
+ else:
+ assert self.completions is not None, 'you can only use `stream=False` if `completions` are provided'
+ if isinstance(self.completions, Sequence):
+ raise_if_exception(self.completions[self.index])
+ response = cast(ChatCompletionOutput, self.completions[self.index])
+ else:
+ raise_if_exception(self.completions)
+ response = cast(ChatCompletionOutput, self.completions)
+ self.index += 1
+ return response
+
+
+def get_mock_chat_completion_kwargs(hf_client: AsyncInferenceClient) -> list[dict[str, Any]]:
+ if isinstance(hf_client, MockHuggingFace):
+ return hf_client.chat_completion_kwargs
+ else: # pragma: no cover
+ raise RuntimeError('Not a MockHuggingFace instance')
+
+
+def completion_message(
+ message: ChatCompletionInputMessage | ChatCompletionOutputMessage, *, usage: ChatCompletionOutputUsage | None = None
+) -> ChatCompletionOutput:
+ choices = [ChatCompletionOutputComplete(finish_reason='stop', index=0, message=message)] # type:ignore
+ return ChatCompletionOutput.parse_obj_as_instance( # type: ignore
+ {
+ 'id': '123',
+ 'choices': choices,
+ 'created': 1704067200, # 2024-01-01
+ 'model': 'hf-model',
+ 'object': 'chat.completion',
+ 'usage': usage,
+ }
+ )
+
+
+@pytest.mark.vcr()
+async def test_simple_completion(allow_model_requests: None, huggingface_api_key: str):
+ model = HuggingFaceModel(
+ 'Qwen/Qwen2.5-72B-Instruct',
+ provider=HuggingFaceProvider(provider_name='nebius', api_key=huggingface_api_key),
+ )
+ agent = Agent(model)
+
+ result = await agent.run('hello')
+ assert (
+ result.output
+ == 'Hello! How can I assist you today? Feel free to ask me any questions or let me know if you need help with anything specific.'
+ )
+ messages = result.all_messages()
+ request = messages[0]
+ response = messages[1]
+ assert request.parts[0].content == 'hello' # type: ignore
+ assert response == ModelResponse(
+ parts=[
+ TextPart(
+ content='Hello! How can I assist you today? Feel free to ask me any questions or let me know if you need help with anything specific.'
+ )
+ ],
+ usage=Usage(requests=1, request_tokens=30, response_tokens=29, total_tokens=59),
+ model_name='Qwen/Qwen2.5-72B-Instruct-fast',
+ timestamp=datetime(2025, 7, 8, 13, 42, 33, tzinfo=timezone.utc),
+ vendor_id='chatcmpl-d445c0d473a84791af2acf356cc00df7',
+ )
+
+
+@pytest.mark.vcr()
+async def test_request_simple_usage(allow_model_requests: None, huggingface_api_key: str):
+ model = HuggingFaceModel(
+ 'Qwen/Qwen2.5-72B-Instruct',
+ provider=HuggingFaceProvider(provider_name='nebius', api_key=huggingface_api_key),
+ )
+ agent = Agent(model)
+
+ result = await agent.run('Hello')
+ assert (
+ result.output
+ == "Hello! It's great to meet you. How can I assist you today? Whether you have any questions, need some advice, or just want to chat, feel free to let me know!"
+ )
+ assert result.usage() == snapshot(Usage(requests=1, request_tokens=30, response_tokens=40, total_tokens=70))
+
+
+async def test_request_structured_response(
+ allow_model_requests: None,
+):
+ tool_call = ChatCompletionOutputToolCall.parse_obj_as_instance( # type:ignore
+ {
+ 'function': ChatCompletionOutputFunctionDefinition.parse_obj_as_instance( # type:ignore
+ {
+ 'name': 'final_result',
+ 'arguments': '{"response": [1, 2, 123]}',
+ }
+ ),
+ 'id': '123',
+ 'type': 'function',
+ }
+ )
+ message = ChatCompletionOutputMessage.parse_obj_as_instance( # type:ignore
+ {
+ 'content': None,
+ 'role': 'assistant',
+ 'tool_calls': [tool_call],
+ }
+ )
+ c = completion_message(message)
+
+ mock_client = MockHuggingFace.create_mock(c)
+ model = HuggingFaceModel(
+ 'Qwen/Qwen2.5-72B-Instruct',
+ provider=HuggingFaceProvider(provider_name='nebius', hf_client=mock_client, api_key='x'),
+ )
+ agent = Agent(model, output_type=list[int])
+
+ result = await agent.run('Hello')
+ assert result.output == [1, 2, 123]
+ messages = result.all_messages()
+ assert messages[0].parts[0].content == 'Hello' # type: ignore
+ assert messages[1] == ModelResponse(
+ parts=[
+ ToolCallPart(
+ tool_name='final_result',
+ args='{"response": [1, 2, 123]}',
+ tool_call_id='123',
+ )
+ ],
+ usage=Usage(requests=1),
+ model_name='hf-model',
+ timestamp=datetime(2024, 1, 1, tzinfo=timezone.utc),
+ vendor_id='123',
+ )
+
+
+async def test_stream_completion(allow_model_requests: None):
+ stream = [text_chunk('hello '), text_chunk('world', finish_reason='stop')]
+ mock_client = MockHuggingFace.create_stream_mock(stream)
+ model = HuggingFaceModel('hf-model', provider=HuggingFaceProvider(hf_client=mock_client, api_key='x'))
+ agent = Agent(model)
+
+ async with agent.run_stream('') as result:
+ assert [c async for c in result.stream_text(debounce_by=None)] == snapshot(['hello ', 'hello world'])
+
+
+async def test_multiple_stream_calls(allow_model_requests: None):
+ stream = [
+ [text_chunk('first '), text_chunk('call', finish_reason='stop')],
+ [text_chunk('second '), text_chunk('call', finish_reason='stop')],
+ ]
+ mock_client = MockHuggingFace.create_stream_mock(stream)
+ model = HuggingFaceModel('hf-model', provider=HuggingFaceProvider(hf_client=mock_client, api_key='x'))
+ agent = Agent(model)
+
+ async with agent.run_stream('first') as result:
+ assert [c async for c in result.stream_text(debounce_by=None)] == snapshot(['first ', 'first call'])
+
+ async with agent.run_stream('second') as result:
+ assert [c async for c in result.stream_text(debounce_by=None)] == snapshot(['second ', 'second call'])
+
+
+async def test_request_tool_call(allow_model_requests: None):
+ tool_call_1 = ChatCompletionOutputToolCall.parse_obj_as_instance( # type:ignore
+ {
+ 'function': ChatCompletionOutputFunctionDefinition.parse_obj_as_instance( # type:ignore
+ {
+ 'name': 'get_location',
+ 'arguments': '{"loc_name": "San Fransisco"}',
+ }
+ ),
+ 'id': '1',
+ 'type': 'function',
+ }
+ )
+ usage_1 = ChatCompletionOutputUsage.parse_obj_as_instance( # type:ignore
+ {
+ 'prompt_tokens': 1,
+ 'completion_tokens': 1,
+ 'total_tokens': 2,
+ }
+ )
+ tool_call_2 = ChatCompletionOutputToolCall.parse_obj_as_instance( # type:ignore
+ {
+ 'function': ChatCompletionOutputFunctionDefinition.parse_obj_as_instance( # type:ignore
+ {
+ 'name': 'get_location',
+ 'arguments': '{"loc_name": "London"}',
+ }
+ ),
+ 'id': '2',
+ 'type': 'function',
+ }
+ )
+ usage_2 = ChatCompletionOutputUsage.parse_obj_as_instance( # type:ignore
+ {
+ 'prompt_tokens': 2,
+ 'completion_tokens': 1,
+ 'total_tokens': 3,
+ }
+ )
+ responses = [
+ completion_message(
+ ChatCompletionOutputMessage.parse_obj_as_instance( # type:ignore
+ {
+ 'content': None,
+ 'role': 'assistant',
+ 'tool_calls': [tool_call_1],
+ }
+ ),
+ usage=usage_1,
+ ),
+ completion_message(
+ ChatCompletionOutputMessage.parse_obj_as_instance( # type:ignore
+ {
+ 'content': None,
+ 'role': 'assistant',
+ 'tool_calls': [tool_call_2],
+ }
+ ),
+ usage=usage_2,
+ ),
+ completion_message(
+ ChatCompletionOutputMessage.parse_obj_as_instance( # type:ignore
+ {
+ 'content': 'final response',
+ 'role': 'assistant',
+ }
+ ),
+ ),
+ ]
+ mock_client = MockHuggingFace.create_mock(responses)
+ model = HuggingFaceModel('hf-model', provider=HuggingFaceProvider(hf_client=mock_client, api_key='x'))
+ agent = Agent(model, system_prompt='this is the system prompt')
+
+ @agent.tool_plain
+ async def get_location(loc_name: str) -> str:
+ if loc_name == 'London':
+ return json.dumps({'lat': 51, 'lng': 0})
+ else:
+ raise ModelRetry('Wrong location, please try again')
+
+ result = await agent.run('Hello')
+ assert result.output == 'final response'
+ assert result.all_messages() == snapshot(
+ [
+ ModelRequest(
+ parts=[
+ SystemPromptPart(content='this is the system prompt', timestamp=IsNow(tz=timezone.utc)),
+ UserPromptPart(content='Hello', timestamp=IsNow(tz=timezone.utc)),
+ ]
+ ),
+ ModelResponse(
+ parts=[
+ ToolCallPart(
+ tool_name='get_location',
+ args='{"loc_name": "San Fransisco"}',
+ tool_call_id='1',
+ )
+ ],
+ usage=Usage(requests=1, request_tokens=1, response_tokens=1, total_tokens=2),
+ model_name='hf-model',
+ timestamp=datetime(2024, 1, 1, 0, 0, tzinfo=timezone.utc),
+ vendor_id='123',
+ ),
+ ModelRequest(
+ parts=[
+ RetryPromptPart(
+ content='Wrong location, please try again',
+ tool_name='get_location',
+ tool_call_id='1',
+ timestamp=IsNow(tz=timezone.utc),
+ )
+ ]
+ ),
+ ModelResponse(
+ parts=[
+ ToolCallPart(
+ tool_name='get_location',
+ args='{"loc_name": "London"}',
+ tool_call_id='2',
+ )
+ ],
+ usage=Usage(requests=1, request_tokens=2, response_tokens=1, total_tokens=3),
+ model_name='hf-model',
+ timestamp=datetime(2024, 1, 1, 0, 0, tzinfo=timezone.utc),
+ vendor_id='123',
+ ),
+ ModelRequest(
+ parts=[
+ ToolReturnPart(
+ tool_name='get_location',
+ content='{"lat": 51, "lng": 0}',
+ tool_call_id='2',
+ timestamp=IsNow(tz=timezone.utc),
+ )
+ ]
+ ),
+ ModelResponse(
+ parts=[TextPart(content='final response')],
+ usage=Usage(requests=1),
+ model_name='hf-model',
+ timestamp=datetime(2024, 1, 1, 0, 0, tzinfo=timezone.utc),
+ vendor_id='123',
+ ),
+ ]
+ )
+
+
+FinishReason = Literal['stop', 'length', 'tool_calls', 'content_filter', 'function_call']
+
+
+def chunk(
+ delta: list[ChatCompletionStreamOutputDelta], finish_reason: FinishReason | None = None
+) -> ChatCompletionStreamOutput:
+ return ChatCompletionStreamOutput.parse_obj_as_instance( # type: ignore
+ {
+ 'id': 'x',
+ 'choices': [
+ ChatCompletionStreamOutputChoice(index=index, delta=delta, finish_reason=finish_reason) # type: ignore
+ for index, delta in enumerate(delta)
+ ],
+ 'created': 1704067200, # 2024-01-01
+ 'model': 'hf-model',
+ 'object': 'chat.completion.chunk',
+ 'usage': ChatCompletionStreamOutputUsage(completion_tokens=1, prompt_tokens=2, total_tokens=3), # type: ignore
+ }
+ )
+
+
+def text_chunk(text: str, finish_reason: FinishReason | None = None) -> ChatCompletionStreamOutput:
+ return chunk([ChatCompletionStreamOutputDelta(content=text, role='assistant')], finish_reason=finish_reason) # type: ignore
+
+
+async def test_stream_text(allow_model_requests: None):
+ stream = [text_chunk('hello '), text_chunk('world'), chunk([])]
+ mock_client = MockHuggingFace.create_stream_mock(stream)
+ m = HuggingFaceModel('hf-model', provider=HuggingFaceProvider(hf_client=mock_client, api_key='x'))
+ agent = Agent(m)
+
+ async with agent.run_stream('') as result:
+ assert not result.is_complete
+ assert [c async for c in result.stream_text(debounce_by=None)] == snapshot(['hello ', 'hello world'])
+ assert result.is_complete
+ assert result.usage() == snapshot(Usage(requests=1, request_tokens=6, response_tokens=3, total_tokens=9))
+
+
+async def test_stream_text_finish_reason(allow_model_requests: None):
+ stream = [
+ text_chunk('hello '),
+ text_chunk('world'),
+ text_chunk('.', finish_reason='stop'),
+ ]
+ mock_client = MockHuggingFace.create_stream_mock(stream)
+ m = HuggingFaceModel('hf-model', provider=HuggingFaceProvider(hf_client=mock_client, api_key='x'))
+ agent = Agent(m)
+
+ async with agent.run_stream('') as result:
+ assert not result.is_complete
+ assert [c async for c in result.stream_text(debounce_by=None)] == snapshot(
+ ['hello ', 'hello world', 'hello world.']
+ )
+ assert result.is_complete
+
+
+def struc_chunk(
+ tool_name: str | None, tool_arguments: str | None, finish_reason: FinishReason | None = None
+) -> ChatCompletionStreamOutput:
+ return chunk(
+ [
+ ChatCompletionStreamOutputDelta.parse_obj_as_instance( # type: ignore
+ {
+ 'role': 'assistant',
+ 'tool_calls': [
+ ChatCompletionStreamOutputDeltaToolCall.parse_obj_as_instance( # type: ignore
+ {
+ 'index': 0,
+ 'function': ChatCompletionStreamOutputFunction.parse_obj_as_instance( # type: ignore
+ {
+ 'name': tool_name,
+ 'arguments': tool_arguments,
+ }
+ ),
+ }
+ )
+ ],
+ }
+ ),
+ ],
+ finish_reason=finish_reason,
+ )
+
+
+class MyTypedDict(TypedDict, total=False):
+ first: str
+ second: str
+
+
+async def test_stream_structured(allow_model_requests: None):
+ stream = [
+ chunk([ChatCompletionStreamOutputDelta(role='assistant')]), # type: ignore
+ chunk([ChatCompletionStreamOutputDelta(role='assistant', tool_calls=[])]), # type: ignore
+ chunk(
+ [
+ ChatCompletionStreamOutputDelta(
+ role='assistant', # type: ignore
+ tool_calls=[ # type: ignore
+ ChatCompletionStreamOutputDeltaToolCall(id='0', type='function', index=0, function=None) # type: ignore
+ ],
+ )
+ ]
+ ),
+ chunk(
+ [
+ ChatCompletionStreamOutputDelta(
+ role='assistant', # type: ignore
+ tool_calls=[ # type: ignore
+ ChatCompletionStreamOutputDeltaToolCall(id='0', type='function', index=0, function=None) # type: ignore
+ ],
+ )
+ ]
+ ),
+ struc_chunk('final_result', None),
+ chunk(
+ [
+ ChatCompletionStreamOutputDelta(
+ role='assistant', # type: ignore
+ tool_calls=[ # type: ignore
+ ChatCompletionStreamOutputDeltaToolCall(id='0', type='function', index=0, function=None) # type: ignore
+ ],
+ )
+ ]
+ ),
+ struc_chunk(None, '{"first": "One'),
+ struc_chunk(None, '", "second": "Two"'),
+ struc_chunk(None, '}'),
+ chunk([]),
+ ]
+ mock_client = MockHuggingFace.create_stream_mock(stream)
+ m = HuggingFaceModel('hf-model', provider=HuggingFaceProvider(hf_client=mock_client, api_key='x'))
+ agent = Agent(m, output_type=MyTypedDict)
+
+ async with agent.run_stream('') as result:
+ assert not result.is_complete
+ assert [dict(c) async for c in result.stream(debounce_by=None)] == snapshot(
+ [
+ {},
+ {'first': 'One'},
+ {'first': 'One', 'second': 'Two'},
+ {'first': 'One', 'second': 'Two'},
+ {'first': 'One', 'second': 'Two'},
+ ]
+ )
+ assert result.is_complete
+ assert result.usage() == snapshot(Usage(requests=1, request_tokens=20, response_tokens=10, total_tokens=30))
+ # double check usage matches stream count
+ assert result.usage().response_tokens == len(stream)
+
+
+async def test_stream_structured_finish_reason(allow_model_requests: None):
+ stream = [
+ struc_chunk('final_result', None),
+ struc_chunk(None, '{"first": "One'),
+ struc_chunk(None, '", "second": "Two"'),
+ struc_chunk(None, '}'),
+ struc_chunk(None, None, finish_reason='stop'),
+ ]
+ mock_client = MockHuggingFace.create_stream_mock(stream)
+ m = HuggingFaceModel('hf-model', provider=HuggingFaceProvider(hf_client=mock_client, api_key='x'))
+ agent = Agent(m, output_type=MyTypedDict)
+
+ async with agent.run_stream('') as result:
+ assert not result.is_complete
+ assert [dict(c) async for c in result.stream(debounce_by=None)] == snapshot(
+ [
+ {'first': 'One'},
+ {'first': 'One', 'second': 'Two'},
+ {'first': 'One', 'second': 'Two'},
+ {'first': 'One', 'second': 'Two'},
+ {'first': 'One', 'second': 'Two'},
+ ]
+ )
+ assert result.is_complete
+
+
+async def test_no_content(allow_model_requests: None):
+ stream = [
+ chunk([ChatCompletionStreamOutputDelta(role='assistant')]), # type: ignore
+ chunk([ChatCompletionStreamOutputDelta(role='assistant')]), # type: ignore
+ ]
+ mock_client = MockHuggingFace.create_stream_mock(stream)
+ m = HuggingFaceModel('hf-model', provider=HuggingFaceProvider(hf_client=mock_client, api_key='x'))
+ agent = Agent(m, output_type=MyTypedDict)
+
+ with pytest.raises(UnexpectedModelBehavior, match='Received empty model response'):
+ async with agent.run_stream(''):
+ pass
+
+
+async def test_no_delta(allow_model_requests: None):
+ stream = [
+ chunk([]),
+ text_chunk('hello '),
+ text_chunk('world'),
+ ]
+ mock_client = MockHuggingFace.create_stream_mock(stream)
+ m = HuggingFaceModel('hf-model', provider=HuggingFaceProvider(hf_client=mock_client, api_key='x'))
+ agent = Agent(m)
+
+ async with agent.run_stream('') as result:
+ assert not result.is_complete
+ assert [c async for c in result.stream_text(debounce_by=None)] == snapshot(['hello ', 'hello world'])
+ assert result.is_complete
+ assert result.usage() == snapshot(Usage(requests=1, request_tokens=6, response_tokens=3, total_tokens=9))
+
+
+@pytest.mark.vcr()
+async def test_image_url_input(allow_model_requests: None, huggingface_api_key: str):
+ m = HuggingFaceModel(
+ 'Qwen/Qwen2.5-VL-72B-Instruct',
+ provider=HuggingFaceProvider(provider_name='nebius', api_key=huggingface_api_key),
+ )
+ agent = Agent(m)
+
+ result = await agent.run(
+ [
+ 'hello',
+ ImageUrl(url='https://t3.ftcdn.net/jpg/00/85/79/92/360_F_85799278_0BBGV9OAdQDTLnKwAPBCcg1J7QtiieJY.jpg'),
+ ]
+ )
+ assert result.all_messages() == snapshot(
+ [
+ ModelRequest(
+ parts=[
+ UserPromptPart(
+ content=[
+ 'hello',
+ ImageUrl(
+ url='https://t3.ftcdn.net/jpg/00/85/79/92/360_F_85799278_0BBGV9OAdQDTLnKwAPBCcg1J7QtiieJY.jpg'
+ ),
+ ],
+ timestamp=IsNow(tz=timezone.utc),
+ )
+ ]
+ ),
+ ModelResponse(
+ parts=[TextPart(content='Hello! How can I assist you with this image of a potato?')],
+ usage=Usage(requests=1, request_tokens=269, response_tokens=15, total_tokens=284),
+ model_name='Qwen/Qwen2.5-VL-72B-Instruct',
+ timestamp=datetime(2025, 7, 8, 14, 4, 39, tzinfo=timezone.utc),
+ vendor_id='chatcmpl-49aa100effab4ca28514d5ccc00d7944',
+ ),
+ ]
+ )
+
+
+@pytest.mark.vcr()
+async def test_image_as_binary_content_input(
+ allow_model_requests: None, image_content: BinaryContent, huggingface_api_key: str
+):
+ m = HuggingFaceModel(
+ 'Qwen/Qwen2.5-VL-72B-Instruct',
+ provider=HuggingFaceProvider(provider_name='nebius', api_key=huggingface_api_key),
+ )
+ agent = Agent(m)
+ result = await agent.run(['What fruit is in the image?', image_content])
+ assert result.output == snapshot(
+ 'The fruit in the image is a kiwi. It has been sliced in half, revealing its bright green flesh with small black seeds arranged in a circular pattern around a white center. The outer skin of the kiwi is fuzzy and brown.'
+ )
+
+
+def test_model_status_error(allow_model_requests: None) -> None:
+ error = HfHubHTTPError(message='test_error', response=Mock(status_code=500, content={'error': 'test error'}))
+ mock_client = MockHuggingFace.create_mock(error)
+ m = HuggingFaceModel('not_a_model', provider=HuggingFaceProvider(hf_client=mock_client, api_key='x'))
+ agent = Agent(m)
+ with pytest.raises(ModelHTTPError) as exc_info:
+ agent.run_sync('hello')
+ assert str(exc_info.value) == snapshot("status_code: 500, model_name: not_a_model, body: {'error': 'test error'}")
+
+
+@pytest.mark.vcr()
+async def test_request_simple_success_with_vcr(allow_model_requests: None, huggingface_api_key: str):
+ m = HuggingFaceModel(
+ 'Qwen/Qwen2.5-72B-Instruct', provider=HuggingFaceProvider(provider_name='nebius', api_key=huggingface_api_key)
+ )
+ agent = Agent(m)
+ result = await agent.run('hello')
+ assert result.output == snapshot(
+ 'Hello! How can I assist you today? Feel free to ask me any questions or let me know if you need help with anything specific.'
+ )
+
+
+@pytest.mark.vcr()
+async def test_hf_model_instructions(allow_model_requests: None, huggingface_api_key: str):
+ m = HuggingFaceModel(
+ 'Qwen/Qwen2.5-72B-Instruct', provider=HuggingFaceProvider(provider_name='nebius', api_key=huggingface_api_key)
+ )
+
+ def simple_instructions(ctx: RunContext):
+ return 'You are a helpful assistant.'
+
+ agent = Agent(m, instructions=simple_instructions)
+
+ result = await agent.run('What is the capital of France?')
+ assert result.all_messages() == snapshot(
+ [
+ ModelRequest(
+ parts=[UserPromptPart(content='What is the capital of France?', timestamp=IsDatetime())],
+ instructions='You are a helpful assistant.',
+ ),
+ ModelResponse(
+ parts=[TextPart(content='Paris')],
+ usage=Usage(requests=1, request_tokens=26, response_tokens=2, total_tokens=28),
+ model_name='Qwen/Qwen2.5-72B-Instruct-fast',
+ timestamp=IsDatetime(),
+ vendor_id='chatcmpl-b3936940372c481b8d886e596dc75524',
+ ),
+ ]
+ )
+
+
+@pytest.mark.parametrize(
+ 'model_name', ['Qwen/Qwen2.5-72B-Instruct', 'deepseek-ai/DeepSeek-R1-0528', 'meta-llama/Llama-3.3-70B-Instruct']
+)
+@pytest.mark.vcr()
+async def test_max_completion_tokens(allow_model_requests: None, model_name: str, huggingface_api_key: str):
+ m = HuggingFaceModel(model_name, provider=HuggingFaceProvider(provider_name='nebius', api_key=huggingface_api_key))
+ agent = Agent(m, model_settings=ModelSettings(max_tokens=100))
+
+ result = await agent.run('hello')
+ assert result.output == IsStr()
+
+
+def test_system_property():
+ model = HuggingFaceModel('some-model', provider=HuggingFaceProvider(hf_client=Mock(), api_key='x'))
+ assert model.system == 'huggingface'
+
+
+async def test_model_client_response_error(allow_model_requests: None) -> None:
+ request_info = Mock(spec=aiohttp.RequestInfo)
+ request_info.url = 'http://test.com'
+ request_info.method = 'POST'
+ request_info.headers = {}
+ request_info.real_url = 'http://test.com'
+ error = aiohttp.ClientResponseError(request_info, history=(), status=400, message='Bad Request')
+ error.response_error_payload = {'error': 'test error'} # type: ignore
+
+ mock_client = MockHuggingFace.create_mock(error)
+ m = HuggingFaceModel('not_a_model', provider=HuggingFaceProvider(hf_client=mock_client, api_key='x'))
+ agent = Agent(m)
+ with pytest.raises(ModelHTTPError) as exc_info:
+ await agent.run('hello')
+ assert str(exc_info.value) == snapshot("status_code: 400, model_name: not_a_model, body: {'error': 'test error'}")
+
+
+async def test_process_response_no_created_timestamp(allow_model_requests: None):
+ c = completion_message(
+ ChatCompletionOutputMessage.parse_obj_as_instance({'content': 'response', 'role': 'assistant'}), # type: ignore
+ )
+ c.created = None # type: ignore
+
+ mock_client = MockHuggingFace.create_mock(c)
+ model = HuggingFaceModel(
+ 'test-model',
+ provider=HuggingFaceProvider(hf_client=mock_client, api_key='x'),
+ )
+ agent = Agent(model)
+ result = await agent.run('Hello')
+ messages = result.all_messages()
+ response_message = messages[1]
+ assert isinstance(response_message, ModelResponse)
+ assert response_message.timestamp == IsNow(tz=timezone.utc)
+
+
+async def test_retry_prompt_without_tool_name(allow_model_requests: None):
+ responses = [
+ completion_message(
+ ChatCompletionOutputMessage.parse_obj_as_instance({'content': 'invalid-response', 'role': 'assistant'}) # type: ignore
+ ),
+ completion_message(
+ ChatCompletionOutputMessage.parse_obj_as_instance({'content': 'final-response', 'role': 'assistant'}) # type: ignore
+ ),
+ ]
+
+ mock_client = MockHuggingFace.create_mock(responses)
+ model = HuggingFaceModel(
+ 'test-model',
+ provider=HuggingFaceProvider(hf_client=mock_client, api_key='x'),
+ )
+ agent = Agent(model)
+
+ @agent.output_validator
+ def response_validator(value: str) -> str:
+ if value == 'invalid-response':
+ raise ModelRetry('Response is invalid')
+ return value
+
+ result = await agent.run('Hello')
+ assert result.output == 'final-response'
+ assert result.all_messages() == snapshot(
+ [
+ ModelRequest(parts=[UserPromptPart(content='Hello', timestamp=IsNow(tz=timezone.utc))]),
+ ModelResponse(
+ parts=[TextPart(content='invalid-response')],
+ usage=Usage(requests=1),
+ model_name='hf-model',
+ timestamp=datetime(2024, 1, 1, 0, 0, tzinfo=timezone.utc),
+ vendor_id='123',
+ ),
+ ModelRequest(
+ parts=[
+ RetryPromptPart(
+ content='Response is invalid',
+ tool_name=None,
+ tool_call_id=IsStr(),
+ timestamp=IsNow(tz=timezone.utc),
+ )
+ ]
+ ),
+ ModelResponse(
+ parts=[TextPart(content='final-response')],
+ usage=Usage(requests=1),
+ model_name='hf-model',
+ timestamp=datetime(2024, 1, 1, 0, 0, tzinfo=timezone.utc),
+ vendor_id='123',
+ ),
+ ]
+ )
+ kwargs = get_mock_chat_completion_kwargs(mock_client)[1]
+ messages = kwargs['messages']
+ assert {k: v for k, v in asdict(messages[-2]).items() if v is not None} == {
+ 'role': 'assistant',
+ 'content': 'invalid-response',
+ }
+ assert {k: v for k, v in asdict(messages[-1]).items() if v is not None} == {
+ 'role': 'user',
+ 'content': 'Validation feedback:\nResponse is invalid\n\nFix the errors and try again.',
+ }
+
+
+async def test_thinking_part_in_history(allow_model_requests: None):
+ c = completion_message(ChatCompletionOutputMessage(content='response', role='assistant')) # type: ignore
+ mock_client = MockHuggingFace.create_mock(c)
+ model = HuggingFaceModel('hf-model', provider=HuggingFaceProvider(hf_client=mock_client, api_key='x'))
+ agent = Agent(model)
+ messages = [
+ ModelRequest(parts=[UserPromptPart(content='request')]),
+ ModelResponse(
+ parts=[
+ TextPart(content='thought 1'),
+ ThinkingPart(content='this should be ignored'),
+ TextPart(content='thought 2'),
+ ],
+ model_name='hf-model',
+ timestamp=datetime.now(timezone.utc),
+ ),
+ ]
+
+ await agent.run('another request', message_history=messages)
+
+ kwargs = get_mock_chat_completion_kwargs(mock_client)[0]
+ sent_messages = kwargs['messages']
+ assert [{k: v for k, v in asdict(m).items() if v is not None} for m in sent_messages] == snapshot(
+ [
+ {'content': 'request', 'role': 'user'},
+ {'content': 'thought 1\n\nthought 2', 'role': 'assistant'},
+ {'content': 'another request', 'role': 'user'},
+ ]
+ )
+
+
+@pytest.mark.parametrize('strict', [True, False, None])
+async def test_tool_strict_mode(allow_model_requests: None, strict: bool | None):
+ tool_call = ChatCompletionOutputToolCall.parse_obj_as_instance( # type:ignore
+ {
+ 'function': ChatCompletionOutputFunctionDefinition.parse_obj_as_instance( # type:ignore
+ {
+ 'name': 'my_tool',
+ 'arguments': '{"x": 42}',
+ }
+ ),
+ 'id': '1',
+ 'type': 'function',
+ }
+ )
+ responses = [
+ completion_message(
+ ChatCompletionOutputMessage.parse_obj_as_instance( # type:ignore
+ {
+ 'content': None,
+ 'role': 'assistant',
+ 'tool_calls': [tool_call],
+ }
+ )
+ ),
+ completion_message(ChatCompletionOutputMessage(content='final response', role='assistant')), # type: ignore
+ ]
+ mock_client = MockHuggingFace.create_mock(responses)
+ model = HuggingFaceModel('hf-model', provider=HuggingFaceProvider(hf_client=mock_client, api_key='x'))
+ agent = Agent(model)
+
+ @agent.tool_plain(strict=strict)
+ def my_tool(x: int) -> int:
+ return x
+
+ result = await agent.run('hello')
+ assert result.output == 'final response'
+
+ kwargs = get_mock_chat_completion_kwargs(mock_client)[0]
+ tools = kwargs['tools']
+ if strict is not None:
+ assert tools[0]['function']['strict'] is strict
+ else:
+ assert 'strict' not in tools[0]['function']
+
+
+@pytest.mark.parametrize(
+ 'content_item, error_message',
+ [
+ (AudioUrl(url='url'), 'AudioUrl is not supported for Hugging Face'),
+ (DocumentUrl(url='url'), 'DocumentUrl is not supported for Hugging Face'),
+ (VideoUrl(url='url'), 'VideoUrl is not supported for Hugging Face'),
+ ],
+)
+async def test_unsupported_media_types(allow_model_requests: None, content_item: Any, error_message: str):
+ model = HuggingFaceModel(
+ 'Qwen/Qwen2.5-VL-72B-Instruct',
+ provider=HuggingFaceProvider(api_key='x'),
+ )
+ agent = Agent(model)
+
+ with pytest.raises(NotImplementedError, match=error_message):
+ await agent.run(['hello', content_item])
+
+
+@pytest.mark.vcr()
+async def test_hf_model_thinking_part(allow_model_requests: None, huggingface_api_key: str):
+ m = HuggingFaceModel(
+ 'Qwen/Qwen3-235B-A22B', provider=HuggingFaceProvider(provider_name='nebius', api_key=huggingface_api_key)
+ )
+ agent = Agent(m)
+
+ result = await agent.run('How do I cross the street?')
+ assert result.all_messages() == snapshot(
+ [
+ ModelRequest(parts=[UserPromptPart(content='How do I cross the street?', timestamp=IsDatetime())]),
+ ModelResponse(
+ parts=[
+ IsInstance(ThinkingPart),
+ IsInstance(TextPart),
+ ],
+ usage=Usage(requests=1, request_tokens=15, response_tokens=1090, total_tokens=1105),
+ model_name='Qwen/Qwen3-235B-A22B',
+ timestamp=IsDatetime(),
+ vendor_id='chatcmpl-957db61fe60d4440bcfe1f11f2c5b4b9',
+ ),
+ ]
+ )
+
+ result = await agent.run(
+ 'Considering the way to cross the street, analogously, how do I cross the river?',
+ model=HuggingFaceModel(
+ 'Qwen/Qwen3-235B-A22B', provider=HuggingFaceProvider(provider_name='nebius', api_key=huggingface_api_key)
+ ),
+ message_history=result.all_messages(),
+ )
+ assert result.all_messages() == snapshot(
+ [
+ ModelRequest(parts=[UserPromptPart(content='How do I cross the street?', timestamp=IsDatetime())]),
+ ModelResponse(
+ parts=[
+ IsInstance(ThinkingPart),
+ IsInstance(TextPart),
+ ],
+ usage=Usage(requests=1, request_tokens=15, response_tokens=1090, total_tokens=1105),
+ model_name='Qwen/Qwen3-235B-A22B',
+ timestamp=IsDatetime(),
+ vendor_id='chatcmpl-957db61fe60d4440bcfe1f11f2c5b4b9',
+ ),
+ ModelRequest(
+ parts=[
+ UserPromptPart(
+ content='Considering the way to cross the street, analogously, how do I cross the river?',
+ timestamp=IsDatetime(),
+ )
+ ]
+ ),
+ ModelResponse(
+ parts=[
+ IsInstance(ThinkingPart),
+ TextPart(content=IsStr()),
+ ],
+ usage=Usage(requests=1, request_tokens=691, response_tokens=1860, total_tokens=2551),
+ model_name='Qwen/Qwen3-235B-A22B',
+ timestamp=IsDatetime(),
+ vendor_id='chatcmpl-35fdec1307634f94a39f7e26f52e12a7',
+ ),
+ ]
+ )
diff --git a/tests/models/test_model_names.py b/tests/models/test_model_names.py
index 52a3397a4f..db6f22cd8d 100644
--- a/tests/models/test_model_names.py
+++ b/tests/models/test_model_names.py
@@ -16,6 +16,7 @@
from pydantic_ai.models.cohere import CohereModelName
from pydantic_ai.models.gemini import GeminiModelName
from pydantic_ai.models.groq import GroqModelName
+ from pydantic_ai.models.huggingface import HuggingFaceModelName
from pydantic_ai.models.mistral import MistralModelName
from pydantic_ai.models.openai import OpenAIModelName
@@ -54,6 +55,7 @@ def get_model_names(model_name_type: Any) -> Iterator[str]:
]
bedrock_names = [f'bedrock:{n}' for n in get_model_names(BedrockModelName)]
deepseek_names = ['deepseek:deepseek-chat', 'deepseek:deepseek-reasoner']
+ huggingface_names = [f'huggingface:{n}' for n in get_model_names(HuggingFaceModelName)]
heroku_names = get_heroku_model_names()
extra_names = ['test']
@@ -66,6 +68,7 @@ def get_model_names(model_name_type: Any) -> Iterator[str]:
+ openai_names
+ bedrock_names
+ deepseek_names
+ + huggingface_names
+ heroku_names
+ extra_names
)
diff --git a/tests/providers/test_huggingface.py b/tests/providers/test_huggingface.py
new file mode 100644
index 0000000000..c9570a54dc
--- /dev/null
+++ b/tests/providers/test_huggingface.py
@@ -0,0 +1,142 @@
+from __future__ import annotations as _annotations
+
+import re
+from unittest.mock import MagicMock, Mock, patch
+
+import httpx
+import pytest
+
+from pydantic_ai.exceptions import UserError
+
+from ..conftest import TestEnv, try_import
+
+with try_import() as imports_successful:
+ from huggingface_hub import AsyncInferenceClient
+
+ from pydantic_ai.providers.huggingface import HuggingFaceProvider
+
+
+pytestmark = pytest.mark.skipif(not imports_successful(), reason='huggingface_hub not installed')
+
+
+def test_huggingface_provider():
+ hf_client = AsyncInferenceClient(api_key='api-key')
+ provider = HuggingFaceProvider(api_key='api-key', hf_client=hf_client)
+ assert provider.name == 'huggingface'
+ assert isinstance(provider.client, AsyncInferenceClient)
+ assert provider.client.token == 'api-key'
+
+
+def test_huggingface_provider_need_api_key(env: TestEnv) -> None:
+ env.remove('HF_TOKEN')
+ with pytest.raises(
+ UserError,
+ match=re.escape(
+ 'Set the `HF_TOKEN` environment variable or pass it via `HuggingFaceProvider(api_key=...)`'
+ 'to use the HuggingFace provider.'
+ ),
+ ):
+ HuggingFaceProvider()
+
+
+def test_huggingface_provider_pass_http_client() -> None:
+ http_client = httpx.AsyncClient()
+ with pytest.raises(
+ ValueError,
+ match=re.escape('`http_client` is ignored for HuggingFace provider, please use `hf_client` instead'),
+ ):
+ HuggingFaceProvider(http_client=http_client, api_key='api-key') # type: ignore
+
+
+def test_huggingface_provider_pass_hf_client() -> None:
+ hf_client = AsyncInferenceClient(api_key='api-key')
+ provider = HuggingFaceProvider(hf_client=hf_client, api_key='api-key')
+ assert provider.client == hf_client
+
+
+def test_hf_provider_with_base_url() -> None:
+ # Test with environment variable for base_url
+ provider = HuggingFaceProvider(
+ hf_client=AsyncInferenceClient(base_url='https://router.huggingface.co/nebius/v1'), api_key='test-api-key'
+ )
+ assert provider.base_url == 'https://router.huggingface.co/nebius/v1'
+
+
+def test_huggingface_provider_properties():
+ mock_client = Mock(spec=AsyncInferenceClient)
+ mock_client.model = 'test-model'
+ provider = HuggingFaceProvider(hf_client=mock_client, api_key='test-api-key')
+ assert provider.name == 'huggingface'
+ assert provider.client is mock_client
+
+
+def test_huggingface_provider_init_api_key_error(monkeypatch: pytest.MonkeyPatch):
+ monkeypatch.delenv('HF_TOKEN', raising=False)
+ with pytest.raises(UserError, match='Set the `HF_TOKEN` environment variable'):
+ HuggingFaceProvider()
+
+
+@patch('pydantic_ai.providers.huggingface.AsyncInferenceClient')
+def test_huggingface_provider_init_api_key_from_env(
+ MockAsyncInferenceClient: MagicMock, monkeypatch: pytest.MonkeyPatch
+):
+ monkeypatch.setenv('HF_TOKEN', 'env-key')
+ HuggingFaceProvider()
+ MockAsyncInferenceClient.assert_called_with(api_key='env-key', provider=None, base_url=None)
+
+
+@patch('pydantic_ai.providers.huggingface.AsyncInferenceClient')
+def test_huggingface_provider_init_api_key_from_arg(
+ MockAsyncInferenceClient: MagicMock, monkeypatch: pytest.MonkeyPatch
+):
+ monkeypatch.setenv('HF_TOKEN', 'env-key')
+ HuggingFaceProvider(api_key='arg-key')
+ MockAsyncInferenceClient.assert_called_with(api_key='arg-key', provider=None, base_url=None)
+
+
+def test_huggingface_provider_init_http_client_error():
+ with pytest.raises(ValueError, match='`http_client` is ignored'):
+ HuggingFaceProvider(api_key='key', http_client=Mock()) # type: ignore[call-overload]
+
+
+def test_huggingface_provider_init_base_url_and_provider_name_error():
+ with pytest.raises(ValueError, match='Cannot provide both `base_url` and `provider_name`'):
+ HuggingFaceProvider(api_key='key', base_url='url', provider_name='provider') # type: ignore[call-overload]
+
+
+def test_huggingface_provider_init_with_hf_client():
+ mock_client = Mock(spec=AsyncInferenceClient)
+ provider = HuggingFaceProvider(hf_client=mock_client, api_key='key')
+ assert provider.client is mock_client
+
+
+@patch('pydantic_ai.providers.huggingface.AsyncInferenceClient')
+def test_huggingface_provider_init_without_hf_client(MockAsyncInferenceClient: MagicMock):
+ provider = HuggingFaceProvider(api_key='key')
+ assert provider.client is MockAsyncInferenceClient.return_value
+ MockAsyncInferenceClient.assert_called_with(api_key='key', provider=None, base_url=None)
+
+
+@patch('pydantic_ai.providers.huggingface.AsyncInferenceClient')
+def test_huggingface_provider_init_with_provider_name(MockAsyncInferenceClient: MagicMock):
+ HuggingFaceProvider(api_key='key', provider_name='test-provider')
+ MockAsyncInferenceClient.assert_called_once_with(api_key='key', provider='test-provider', base_url=None)
+
+
+@patch('pydantic_ai.providers.huggingface.AsyncInferenceClient')
+def test_huggingface_provider_init_with_base_url(MockAsyncInferenceClient: MagicMock):
+ HuggingFaceProvider(api_key='key', base_url='test-url')
+ MockAsyncInferenceClient.assert_called_once_with(api_key='key', provider=None, base_url='test-url')
+
+
+def test_huggingface_provider_init_api_key_is_none(monkeypatch: pytest.MonkeyPatch):
+ monkeypatch.delenv('HF_TOKEN', raising=False)
+ with pytest.raises(UserError):
+ HuggingFaceProvider(api_key=None)
+
+
+def test_huggingface_provider_base_url():
+ mock_client = Mock(spec=AsyncInferenceClient)
+ mock_client.model = 'test-model'
+ provider = HuggingFaceProvider(hf_client=mock_client, api_key='test-api-key')
+ assert provider.base_url == 'test-model'
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 024116249c..8efc0da005 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -144,6 +144,7 @@ def test_list_models(capfd: CaptureFixture[str]):
'cohere',
'deepseek',
'heroku',
+ 'huggingface',
)
models = {line.strip().split(' ')[0] for line in output[3:]}
for provider in providers:
diff --git a/tests/test_examples.py b/tests/test_examples.py
index c1dda22a49..5735a11ffa 100644
--- a/tests/test_examples.py
+++ b/tests/test_examples.py
@@ -149,6 +149,7 @@ def print(self, *args: Any, **kwargs: Any) -> None:
env.set('CO_API_KEY', 'testing')
env.set('MISTRAL_API_KEY', 'testing')
env.set('ANTHROPIC_API_KEY', 'testing')
+ env.set('HF_TOKEN', 'hf_testing')
env.set('AWS_ACCESS_KEY_ID', 'testing')
env.set('AWS_SECRET_ACCESS_KEY', 'testing')
env.set('AWS_DEFAULT_REGION', 'us-east-1')
diff --git a/uv.lock b/uv.lock
index 2b2c5c2c3b..bea12cf7af 100644
--- a/uv.lock
+++ b/uv.lock
@@ -829,16 +829,16 @@ wheels = [
[[package]]
name = "ddgs"
-version = "9.0.0"
+version = "9.2.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "click" },
{ name = "lxml" },
{ name = "primp" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/1f/08/0e84549a1d7d5950573f73d7bc5d36f2a00f92ad8e644b59066afd430a6f/ddgs-9.0.0.tar.gz", hash = "sha256:53b47c74a8060457cb02cbb64acdf59655d799ce8e0934e945bcd878fcab3a7f", size = 21795, upload-time = "2025-07-06T15:43:50.862Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/57/e1/8154854084b24908ec782f1c2713a66b205bdcd2b20a9bc3ce274afccc24/ddgs-9.2.3.tar.gz", hash = "sha256:5ec4e0bf0a9055a991c958695b1c0194c2511d254449ab88eb874297879ed1a5", size = 26553, upload-time = "2025-07-14T17:17:24.232Z" }
wheels = [
- { url = "https://files.pythonhosted.org/packages/e5/05/bd3ed9a28212b313f5678533152c4d79fbc386e44245ca5eed426d75f019/ddgs-9.0.0-py3-none-any.whl", hash = "sha256:5dd11d666d6caf1cfdbd341579637bb670c4b2f41df5413b76705519d8e7a22c", size = 17944, upload-time = "2025-07-06T15:43:49.564Z" },
+ { url = "https://files.pythonhosted.org/packages/1d/af/d42b3f4eff55cdcddf8b33631be602e40d63d7cf0cffcf15503166a46b22/ddgs-9.2.3-py3-none-any.whl", hash = "sha256:4b658edf52db3bfe80c12492077e7cc9d39312b0dbb03f8669753ac1313d3784", size = 30148, upload-time = "2025-07-14T17:17:22.969Z" },
]
[[package]]
@@ -942,16 +942,16 @@ wheels = [
[[package]]
name = "duckduckgo-search"
-version = "8.1.1"
+version = "7.5.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "click" },
{ name = "lxml" },
{ name = "primp" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/10/ef/07791a05751e6cc9de1dd49fb12730259ee109b18e6d097e25e6c32d5617/duckduckgo_search-8.1.1.tar.gz", hash = "sha256:9da91c9eb26a17e016ea1da26235d40404b46b0565ea86d75a9f78cc9441f935", size = 22868, upload-time = "2025-07-06T15:30:59.73Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/17/a8/18404f6525aefa80290afa920ed76fbab16472f19015fdb957b7113f3a9e/duckduckgo_search-7.5.0.tar.gz", hash = "sha256:3e28dc5ec9188efa3a7c8532aa05aaf03bb34b79370855760abd55e6051ff79b", size = 24657, upload-time = "2025-02-24T14:50:49.356Z" }
wheels = [
- { url = "https://files.pythonhosted.org/packages/db/72/c027b3b488b1010cf71670032fcf7e681d44b81829d484bb04e31a949a8d/duckduckgo_search-8.1.1-py3-none-any.whl", hash = "sha256:f48adbb06626ee05918f7e0cef3a45639e9939805c4fc179e68c48a12f1b5062", size = 18932, upload-time = "2025-07-06T15:30:58.339Z" },
+ { url = "https://files.pythonhosted.org/packages/75/21/fc2c821a2c92c021f8f8adf9fb36235d1b49525b7cd953e85624296aab94/duckduckgo_search-7.5.0-py3-none-any.whl", hash = "sha256:6a2d3f12ae29b3e076cd43be61f5f73cd95261e0a0f318fe0ad3648d7a5dff03", size = 20238, upload-time = "2025-02-24T14:50:48.179Z" },
]
[[package]]
@@ -992,16 +992,17 @@ wheels = [
[[package]]
name = "fasta2a"
-version = "0.4.1"
+version = "0.5.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
+ { name = "eval-type-backport", marker = "python_full_version < '3.10'" },
{ name = "opentelemetry-api" },
{ name = "pydantic" },
{ name = "starlette" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/1e/65/3728453396e5efa6166cf58b32e5aef7dabeba438c8bb20e1c9461fceaed/fasta2a-0.4.1.tar.gz", hash = "sha256:2c664d572480662a73201485ce0f909d607d5d28ba33409646454cef5d0645ed", size = 13966, upload-time = "2025-07-10T08:18:55.859Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/5d/2a/f9d212026bdc74068ef9aef493a2b37ce0d4201694d158180759e07489b5/fasta2a-0.5.0.tar.gz", hash = "sha256:0bca45f675fb3354ae6cd0e6dd0be1d504ee135b8e802b4058fb3485521f61e9", size = 1436123, upload-time = "2025-07-10T16:31:01.502Z" }
wheels = [
- { url = "https://files.pythonhosted.org/packages/8d/1e/e85fcd71af3a3f6c8262c89027a5c5e6c3faf348b1f9101b79d996c801df/fasta2a-0.4.1-py3-none-any.whl", hash = "sha256:f0b4a8162bd7fc9a363ef3724c395c5cb97e87d9d03769b9faaf675389c7fdfb", size = 16934, upload-time = "2025-07-10T08:18:45.014Z" },
+ { url = "https://files.pythonhosted.org/packages/c5/08/d25f303013a04e2bec68ed97c4f4f85ad9c178fc582e8e4345147fd141fb/fasta2a-0.5.0-py3-none-any.whl", hash = "sha256:806f4bbd6cd2858ca631d47e75f3bbf4746ff0752ccca38edbfe85930c4ffbe2", size = 25198, upload-time = "2025-07-10T16:30:59.938Z" },
]
[[package]]
@@ -1195,7 +1196,7 @@ wheels = [
[[package]]
name = "google-genai"
-version = "1.24.0"
+version = "1.25.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
@@ -1207,9 +1208,9 @@ dependencies = [
{ name = "typing-extensions" },
{ name = "websockets" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/8d/cf/37ac8cd4752e28e547b8a52765fe48a2ada2d0d286ea03f46e4d8c69ff4f/google_genai-1.24.0.tar.gz", hash = "sha256:bc896e30ad26d05a2af3d17c2ba10ea214a94f1c0cdb93d5c004dc038774e75a", size = 226740, upload-time = "2025-07-01T22:14:24.365Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/7f/59/c9b9148c8702b60253f5a251f16ae436534c5d4362da193c9db05ac9858c/google_genai-1.25.0.tar.gz", hash = "sha256:a08a79c819a5d949d9948cd372e36e512bf85cd28158994daaa36d0ec4cb2b02", size = 228141, upload-time = "2025-07-09T20:53:47.885Z" }
wheels = [
- { url = "https://files.pythonhosted.org/packages/30/28/a35f64fc02e599808101617a21d447d241dadeba2aac1f4dc2d1179b8218/google_genai-1.24.0-py3-none-any.whl", hash = "sha256:98be8c51632576289ecc33cd84bcdaf4356ef0bef04ac7578660c49175af22b9", size = 226065, upload-time = "2025-07-01T22:14:23.177Z" },
+ { url = "https://files.pythonhosted.org/packages/f6/ec/149f3d49b56cf848142071772aabb1c290b535bd9b5327a5dfccf1d00332/google_genai-1.25.0-py3-none-any.whl", hash = "sha256:fb5cee79b9a0a1b2afd5cfdf279099ecebd186551eefcaa6ec0c6016244e6138", size = 226847, upload-time = "2025-07-09T20:53:46.532Z" },
]
[[package]]
@@ -1340,6 +1341,21 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d0/9e/984486f2d0a0bd2b024bf4bc1c62688fcafa9e61991f041fb0e2def4a982/h2-4.2.0-py3-none-any.whl", hash = "sha256:479a53ad425bb29af087f3458a61d30780bc818e4ebcf01f0b536ba916462ed0", size = 60957, upload-time = "2025-02-01T11:02:26.481Z" },
]
+[[package]]
+name = "hf-xet"
+version = "1.1.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/75/dc/dc091aeeb671e71cbec30e84963f9c0202c17337b24b0a800e7d205543e8/hf_xet-1.1.3.tar.gz", hash = "sha256:a5f09b1dd24e6ff6bcedb4b0ddab2d81824098bb002cf8b4ffa780545fa348c3", size = 488127, upload-time = "2025-06-04T00:47:27.456Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/9b/1f/bc01a4c0894973adebbcd4aa338a06815c76333ebb3921d94dcbd40dae6a/hf_xet-1.1.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c3b508b5f583a75641aebf732853deb058953370ce8184f5dabc49f803b0819b", size = 2256929, upload-time = "2025-06-04T00:47:21.206Z" },
+ { url = "https://files.pythonhosted.org/packages/78/07/6ef50851b5c6b45b77a6e018fa299c69a2db3b8bbd0d5af594c0238b1ceb/hf_xet-1.1.3-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:b788a61977fbe6b5186e66239e2a329a3f0b7e7ff50dad38984c0c74f44aeca1", size = 2153719, upload-time = "2025-06-04T00:47:19.302Z" },
+ { url = "https://files.pythonhosted.org/packages/52/48/e929e6e3db6e4758c2adf0f2ca2c59287f1b76229d8bdc1a4c9cfc05212e/hf_xet-1.1.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd2da210856444a34aad8ada2fc12f70dabed7cc20f37e90754d1d9b43bc0534", size = 4820519, upload-time = "2025-06-04T00:47:17.244Z" },
+ { url = "https://files.pythonhosted.org/packages/28/2e/03f89c5014a5aafaa9b150655f811798a317036646623bdaace25f485ae8/hf_xet-1.1.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:8203f52827e3df65981984936654a5b390566336956f65765a8aa58c362bb841", size = 4964121, upload-time = "2025-06-04T00:47:15.17Z" },
+ { url = "https://files.pythonhosted.org/packages/47/8b/5cd399a92b47d98086f55fc72d69bc9ea5e5c6f27a9ed3e0cdd6be4e58a3/hf_xet-1.1.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:30c575a5306f8e6fda37edb866762140a435037365eba7a17ce7bd0bc0216a8b", size = 5283017, upload-time = "2025-06-04T00:47:23.239Z" },
+ { url = "https://files.pythonhosted.org/packages/53/e3/2fcec58d2fcfd25ff07feb876f466cfa11f8dcf9d3b742c07fe9dd51ee0a/hf_xet-1.1.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7c1a6aa6abed1f696f8099aa9796ca04c9ee778a58728a115607de9cc4638ff1", size = 4970349, upload-time = "2025-06-04T00:47:25.383Z" },
+ { url = "https://files.pythonhosted.org/packages/53/bf/10ca917e335861101017ff46044c90e517b574fbb37219347b83be1952f6/hf_xet-1.1.3-cp37-abi3-win_amd64.whl", hash = "sha256:b578ae5ac9c056296bb0df9d018e597c8dc6390c5266f35b5c44696003cde9f3", size = 2310934, upload-time = "2025-06-04T00:47:29.632Z" },
+]
+
[[package]]
name = "hpack"
version = "4.1.0"
@@ -1388,20 +1404,26 @@ wheels = [
[[package]]
name = "huggingface-hub"
-version = "0.29.1"
+version = "0.33.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "filelock" },
{ name = "fsspec" },
+ { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
{ name = "packaging" },
{ name = "pyyaml" },
{ name = "requests" },
{ name = "tqdm" },
{ name = "typing-extensions" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/22/37/797d6476f13e5ef6af5fc48a5d641d32b39c37e166ccf40c3714c5854a85/huggingface_hub-0.29.1.tar.gz", hash = "sha256:9524eae42077b8ff4fc459ceb7a514eca1c1232b775276b009709fe2a084f250", size = 389776, upload-time = "2025-02-20T09:24:59.839Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/fa/42/8a95c5632080ae312c0498744b2b852195e10b05a20b1be11c5141092f4c/huggingface_hub-0.33.2.tar.gz", hash = "sha256:84221defaec8fa09c090390cd68c78b88e3c4c2b7befba68d3dc5aacbc3c2c5f", size = 426637, upload-time = "2025-07-02T06:26:05.156Z" }
wheels = [
- { url = "https://files.pythonhosted.org/packages/ae/05/75b90de9093de0aadafc868bb2fa7c57651fd8f45384adf39bd77f63980d/huggingface_hub-0.29.1-py3-none-any.whl", hash = "sha256:352f69caf16566c7b6de84b54a822f6238e17ddd8ae3da4f8f2272aea5b198d5", size = 468049, upload-time = "2025-02-20T09:24:57.962Z" },
+ { url = "https://files.pythonhosted.org/packages/44/f4/5f3f22e762ad1965f01122b42dae5bf0e009286e2dba601ce1d0dba72424/huggingface_hub-0.33.2-py3-none-any.whl", hash = "sha256:3749498bfa91e8cde2ddc2c1db92c79981f40e66434c20133b39e5928ac9bcc5", size = 515373, upload-time = "2025-07-02T06:26:03.072Z" },
+]
+
+[package.optional-dependencies]
+inference = [
+ { name = "aiohttp" },
]
[[package]]
@@ -2966,7 +2988,7 @@ wheels = [
name = "pydantic-ai"
source = { editable = "." }
dependencies = [
- { name = "pydantic-ai-slim", extra = ["anthropic", "bedrock", "cli", "cohere", "evals", "google", "groq", "mcp", "mistral", "openai", "vertexai"] },
+ { name = "pydantic-ai-slim", extra = ["anthropic", "bedrock", "cli", "cohere", "evals", "google", "groq", "huggingface", "mcp", "mistral", "openai", "vertexai"] },
]
[package.optional-dependencies]
@@ -3005,7 +3027,7 @@ requires-dist = [
{ name = "fasta2a", marker = "extra == 'a2a'", specifier = ">=0.4.1" },
{ name = "logfire", marker = "extra == 'logfire'", specifier = ">=3.11.0" },
{ name = "pydantic-ai-examples", marker = "extra == 'examples'", editable = "examples" },
- { name = "pydantic-ai-slim", extras = ["anthropic", "bedrock", "cli", "cohere", "evals", "google", "groq", "mcp", "mistral", "openai", "vertexai"], editable = "pydantic_ai_slim" },
+ { name = "pydantic-ai-slim", extras = ["anthropic", "bedrock", "cli", "cohere", "evals", "google", "groq", "huggingface", "mcp", "mistral", "openai", "vertexai"], editable = "pydantic_ai_slim" },
]
provides-extras = ["a2a", "examples", "logfire"]
@@ -3107,6 +3129,9 @@ google = [
groq = [
{ name = "groq" },
]
+huggingface = [
+ { name = "huggingface-hub", extra = ["inference"] },
+]
logfire = [
{ name = "logfire" },
]
@@ -3163,6 +3188,7 @@ requires-dist = [
{ name = "griffe", specifier = ">=1.3.2" },
{ name = "groq", marker = "extra == 'groq'", specifier = ">=0.19.0" },
{ name = "httpx", specifier = ">=0.27" },
+ { name = "huggingface-hub", extras = ["inference"], marker = "extra == 'huggingface'", specifier = ">=0.33.2" },
{ name = "logfire", marker = "extra == 'logfire'", specifier = ">=3.11.0" },
{ name = "mcp", marker = "python_full_version >= '3.10' and extra == 'mcp'", specifier = ">=1.9.4" },
{ name = "mistralai", marker = "extra == 'mistral'", specifier = ">=1.2.5" },
@@ -3177,7 +3203,7 @@ requires-dist = [
{ name = "tavily-python", marker = "extra == 'tavily'", specifier = ">=0.5.0" },
{ name = "typing-inspection", specifier = ">=0.4.0" },
]
-provides-extras = ["a2a", "anthropic", "bedrock", "cli", "cohere", "duckduckgo", "evals", "google", "groq", "logfire", "mcp", "mistral", "openai", "tavily", "vertexai"]
+provides-extras = ["a2a", "anthropic", "bedrock", "cli", "cohere", "duckduckgo", "evals", "google", "groq", "huggingface", "logfire", "mcp", "mistral", "openai", "tavily", "vertexai"]
[package.metadata.requires-dev]
dev = [