diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py index 9d7307acb8..4269439da7 100644 --- a/src/huggingface_hub/inference/_client.py +++ b/src/huggingface_hub/inference/_client.py @@ -518,16 +518,6 @@ def chat_completion( """ A method for completing conversations using a specified language model. - - - If the model is served by a server supporting chat-completion, the method will directly call the server's - `/v1/chat/completions` endpoint. If the server does not support chat-completion, the method will render the - chat template client-side based on the information fetched from the Hub API. In this case, you will need to - have `minijinja` template engine installed. Run `pip install "huggingface_hub[inference]"` or `pip install minijinja` - to install it. - - - Args: messages (List[Union[`SystemMessage`, `UserMessage`, `AssistantMessage`]]): Conversation history consisting of roles and content pairs. @@ -584,7 +574,7 @@ def chat_completion( send the request. Returns: - [`ChatCompletionOutput] or Iterable of [`ChatCompletionStreamOutput`]: + [`ChatCompletionOutput`] or Iterable of [`ChatCompletionStreamOutput`]: Generated text returned from the server: - if `stream=False`, the generated text is returned as a [`ChatCompletionOutput`] (default). - if `stream=True`, the generated text is returned token by token as a sequence of [`ChatCompletionStreamOutput`]. diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py index 506ff9e9f6..3f12382892 100644 --- a/src/huggingface_hub/inference/_generated/_async_client.py +++ b/src/huggingface_hub/inference/_generated/_async_client.py @@ -519,16 +519,6 @@ async def chat_completion( """ A method for completing conversations using a specified language model. - - - If the model is served by a server supporting chat-completion, the method will directly call the server's - `/v1/chat/completions` endpoint. If the server does not support chat-completion, the method will render the - chat template client-side based on the information fetched from the Hub API. In this case, you will need to - have `minijinja` template engine installed. Run `pip install "huggingface_hub[inference]"` or `pip install minijinja` - to install it. - - - Args: messages (List[Union[`SystemMessage`, `UserMessage`, `AssistantMessage`]]): Conversation history consisting of roles and content pairs. @@ -585,7 +575,7 @@ async def chat_completion( send the request. Returns: - [`ChatCompletionOutput] or Iterable of [`ChatCompletionStreamOutput`]: + [`ChatCompletionOutput`] or Iterable of [`ChatCompletionStreamOutput`]: Generated text returned from the server: - if `stream=False`, the generated text is returned as a [`ChatCompletionOutput`] (default). - if `stream=True`, the generated text is returned token by token as a sequence of [`ChatCompletionStreamOutput`].