Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
Support for OpenAI Realtime API, LLM, TTS, and STT APIs.

Also includes support for a large number of OpenAI-compatible APIs including Azure OpenAI, Cerebras,
Fireworks, Perplexity, Telnyx, xAI, Ollama, DeepSeek, OpenRouter, and OVHcloud AI Endpoints.
Fireworks, Perplexity, Telnyx, xAI, Ollama, DeepSeek, OpenRouter, Cohere, and OVHcloud AI Endpoints.

See https://docs.livekit.io/agents/integrations/openai/ and
https://docs.livekit.io/agents/integrations/llm/ for more information.
Expand All @@ -27,6 +27,7 @@
from .embeddings import EmbeddingData, create_embeddings
from .llm import LLM, LLMStream
from .models import (
CohereChatModels,
OpenRouterProviderPreferences,
OpenRouterWebPlugin,
STTModels,
Expand All @@ -42,6 +43,7 @@
"TTS",
"LLM",
"LLMStream",
"CohereChatModels",
"OpenRouterProviderPreferences",
"OpenRouterWebPlugin",
"STTModels",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from livekit.agents.inference.llm import LLMStream as _LLMStream
from livekit.agents.llm import (
ChatContext,
ChatMessage,
FunctionTool,
ProviderTool,
RawFunctionTool,
Expand All @@ -45,6 +46,7 @@
from .models import (
CerebrasChatModels,
ChatModels,
CohereChatModels,
CometAPIChatModels,
DeepSeekChatModels,
NebiusChatModels,
Expand Down Expand Up @@ -447,6 +449,44 @@ def with_openrouter(
timeout=timeout,
)

@staticmethod
def with_cohere(
*,
model: str | CohereChatModels = "command-r-08-2024",
api_key: str | None = None,
base_url: str = "https://api.cohere.ai/compatibility/v1",
client: openai.AsyncClient | None = None,
temperature: NotGivenOr[float] = NOT_GIVEN,
tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
top_p: NotGivenOr[float] = NOT_GIVEN,
max_completion_tokens: NotGivenOr[int] = NOT_GIVEN,
timeout: httpx.Timeout | None = None,
) -> LLM:
"""
Create a new instance of Cohere LLM using OpenAI-compatible API.

``api_key`` must be set to your Cohere API key, either using the argument or by setting
the ``COHERE_API_KEY`` environment variable.
"""
api_key = api_key or os.environ.get("COHERE_API_KEY")
if api_key is None:
raise ValueError(
"Cohere API key is required, either as argument or set COHERE_API_KEY environment variable"
)

return LLM(
model=model,
api_key=api_key,
client=client,
base_url=base_url,
temperature=temperature,
tool_choice=tool_choice,
top_p=top_p,
max_completion_tokens=max_completion_tokens,
timeout=timeout,
_strict_tool_schema=False,
)

@staticmethod
def with_deepseek(
*,
Expand Down Expand Up @@ -961,6 +1001,16 @@ def chat(
if is_given(response_format):
extra["response_format"] = llm_utils.to_openai_response_format(response_format) # type: ignore

# Cohere requires at least one user message to generate a response
if "api.cohere.ai" in str(self._client.base_url):
has_user_message = any(
isinstance(item, ChatMessage) and item.role == "user" for item in chat_ctx.items
)

if not has_user_message:
placeholder_msg = ChatMessage(role="user", content=["."])
chat_ctx.items.append(placeholder_msg)

return LLMStream(
self,
model=self._opts.model,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,16 @@
"grok-2-1212",
]

CohereChatModels = Literal[
"command-a-03-2025",
"command-r7b-12-2024",
"command-a-translate-08-2025",
"command-a-reasoning-08-2025",
"command-a-vision-07-2025",
"command-r-08-2024",
"command-r-plus-08-2024",
]


def _supports_reasoning_effort(model: Union[ChatModels, str]) -> bool:
return model in [
Expand Down
Loading