Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions docs/en/concepts/llms.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -283,11 +283,54 @@ In this section, you'll find detailed examples that help you select, configure,
)
```

**Extended Thinking (Claude Sonnet 4 and Beyond):**

CrewAI supports Anthropic's Extended Thinking feature, which allows Claude to think through problems in a more human-like way before responding. This is particularly useful for complex reasoning, analysis, and problem-solving tasks.

```python Code
from crewai import LLM

# Enable extended thinking with default settings
llm = LLM(
model="anthropic/claude-sonnet-4",
thinking={"type": "enabled"},
max_tokens=10000
)

# Configure thinking with budget control
llm = LLM(
model="anthropic/claude-sonnet-4",
thinking={
"type": "enabled",
"budget_tokens": 5000 # Limit thinking tokens
},
max_tokens=10000
)
```

**Thinking Configuration Options:**
- `type`: Set to `"enabled"` to activate extended thinking mode
- `budget_tokens` (optional): Maximum tokens to use for thinking (helps control costs)

**Models Supporting Extended Thinking:**
- `claude-sonnet-4` and newer models
- `claude-3-7-sonnet` (with extended thinking capabilities)

**When to Use Extended Thinking:**
- Complex reasoning and multi-step problem solving
- Mathematical calculations and proofs
- Code analysis and debugging
- Strategic planning and decision making
- Research and analytical tasks

**Note:** Extended thinking consumes additional tokens but can significantly improve response quality for complex tasks.

**Supported Environment Variables:**
- `ANTHROPIC_API_KEY`: Your Anthropic API key (required)

**Features:**
- Native tool use support for Claude 3+ models
- Extended Thinking support for Claude Sonnet 4+
- Streaming support for real-time responses
- Automatic system message handling
- Stop sequences for controlled output
Expand All @@ -305,6 +348,7 @@ In this section, you'll find detailed examples that help you select, configure,

| Model | Context Window | Best For |
|------------------------------|----------------|-----------------------------------------------|
| claude-sonnet-4 | 200,000 tokens | Latest with extended thinking capabilities |
| claude-3-7-sonnet | 200,000 tokens | Advanced reasoning and agentic tasks |
| claude-3-5-sonnet-20241022 | 200,000 tokens | Latest Sonnet with best performance |
| claude-3-5-haiku | 200,000 tokens | Fast, compact model for quick responses |
Expand Down
2 changes: 2 additions & 0 deletions lib/crewai/src/crewai/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@

from crewai.agent.core import Agent
from crewai.llms.hooks.base import BaseInterceptor
from crewai.llms.providers.anthropic.completion import AnthropicThinkingConfig
from crewai.task import Task
from crewai.tools.base_tool import BaseTool
from crewai.utilities.types import LLMMessage
Expand Down Expand Up @@ -585,6 +586,7 @@ def __init__(
reasoning_effort: Literal["none", "low", "medium", "high"] | None = None,
stream: bool = False,
interceptor: BaseInterceptor[httpx.Request, httpx.Response] | None = None,
thinking: AnthropicThinkingConfig | dict[str, Any] | None = None,
**kwargs: Any,
) -> None:
"""Initialize LLM instance.
Expand Down
129 changes: 119 additions & 10 deletions lib/crewai/src/crewai/llms/providers/anthropic/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import json
import logging
import os
from typing import TYPE_CHECKING, Any, cast
from typing import TYPE_CHECKING, Any, Literal, cast

from anthropic.types import ThinkingBlock
from pydantic import BaseModel

from crewai.events.types.llm_events import LLMCallType
Expand All @@ -22,15 +23,19 @@

try:
from anthropic import Anthropic, AsyncAnthropic
from anthropic.types import Message
from anthropic.types.tool_use_block import ToolUseBlock
from anthropic.types import Message, TextBlock, ThinkingBlock, ToolUseBlock
import httpx
except ImportError:
raise ImportError(
'Anthropic native provider not available, to install: uv add "crewai[anthropic]"'
) from None


class AnthropicThinkingConfig(BaseModel):
type: Literal["enabled", "disabled"]
budget_tokens: int | None = None


class AnthropicCompletion(BaseLLM):
"""Anthropic native completion implementation.

Expand All @@ -52,6 +57,7 @@ def __init__(
stream: bool = False,
client_params: dict[str, Any] | None = None,
interceptor: BaseInterceptor[httpx.Request, httpx.Response] | None = None,
thinking: AnthropicThinkingConfig | None = None,
**kwargs: Any,
):
"""Initialize Anthropic chat completion client.
Expand Down Expand Up @@ -97,6 +103,10 @@ def __init__(
self.top_p = top_p
self.stream = stream
self.stop_sequences = stop_sequences or []
self.thinking = thinking
self.previous_thinking_blocks: list[ThinkingBlock] = []
# Model-specific settings
self.is_claude_3 = "claude-3" in model.lower()
self.supports_tools = True

@property
Expand Down Expand Up @@ -326,6 +336,12 @@ def _prepare_completion_params(
if tools and self.supports_tools:
params["tools"] = self._convert_tools_for_interference(tools)

if self.thinking:
if isinstance(self.thinking, AnthropicThinkingConfig):
params["thinking"] = self.thinking.model_dump()
else:
params["thinking"] = self.thinking

return params

def _convert_tools_for_interference(
Expand Down Expand Up @@ -365,6 +381,34 @@ def _convert_tools_for_interference(

return anthropic_tools

def _extract_thinking_block(
self, content_block: Any
) -> ThinkingBlock | dict[str, Any] | None:
"""Extract and format thinking block from content block.

Args:
content_block: Content block from Anthropic response

Returns:
Dictionary with thinking block data including signature, or None if not a thinking block
"""
if content_block.type == "thinking":
thinking_block = {
"type": "thinking",
"thinking": content_block.thinking,
}
if hasattr(content_block, "signature"):
thinking_block["signature"] = content_block.signature
return thinking_block
if content_block.type == "redacted_thinking":
redacted_block = {"type": "redacted_thinking"}
if hasattr(content_block, "thinking"):
redacted_block["thinking"] = content_block.thinking
if hasattr(content_block, "signature"):
redacted_block["signature"] = content_block.signature
return redacted_block
return None

def _format_messages_for_anthropic(
self, messages: str | list[LLMMessage]
) -> tuple[list[LLMMessage], str | None]:
Expand All @@ -374,6 +418,7 @@ def _format_messages_for_anthropic(
- System messages are separate from conversation messages
- Messages must alternate between user and assistant
- First message must be from user
- When thinking is enabled, assistant messages must start with thinking blocks

Args:
messages: Input messages
Expand All @@ -398,8 +443,29 @@ def _format_messages_for_anthropic(
system_message = cast(str, content)
else:
role_str = role if role is not None else "user"
content_str = content if content is not None else ""
formatted_messages.append({"role": role_str, "content": content_str})

if isinstance(content, list):
formatted_messages.append({"role": role_str, "content": content})
elif (
role_str == "assistant"
and self.thinking
and self.previous_thinking_blocks
):
structured_content = cast(
list[dict[str, Any]],
[
*self.previous_thinking_blocks,
{"type": "text", "text": content if content else ""},
],
)
formatted_messages.append(
LLMMessage(role=role_str, content=structured_content)
)
else:
content_str = content if content is not None else ""
formatted_messages.append(
LLMMessage(role=role_str, content=content_str)
)

# Ensure first message is from user (Anthropic requirement)
if not formatted_messages:
Expand Down Expand Up @@ -449,7 +515,6 @@ def _handle_completion(
if tool_uses and tool_uses[0].name == "structured_output":
structured_data = tool_uses[0].input
structured_json = json.dumps(structured_data)

self._emit_call_completed_event(
response=structured_json,
call_type=LLMCallType.LLM_CALL,
Expand Down Expand Up @@ -477,15 +542,22 @@ def _handle_completion(
from_agent,
)

# Extract text content
content = ""
thinking_blocks: list[ThinkingBlock] = []

if response.content:
for content_block in response.content:
if hasattr(content_block, "text"):
content += content_block.text
else:
thinking_block = self._extract_thinking_block(content_block)
if thinking_block:
thinking_blocks.append(cast(ThinkingBlock, thinking_block))

content = self._apply_stop_words(content)
if thinking_blocks:
self.previous_thinking_blocks = thinking_blocks

content = self._apply_stop_words(content)
self._emit_call_completed_event(
response=content,
call_type=LLMCallType.LLM_CALL,
Expand Down Expand Up @@ -540,6 +612,16 @@ def _handle_streaming_completion(

final_message: Message = stream.get_final_message()

thinking_blocks: list[ThinkingBlock] = []
if final_message.content:
for content_block in final_message.content:
thinking_block = self._extract_thinking_block(content_block)
if thinking_block:
thinking_blocks.append(cast(ThinkingBlock, thinking_block))

if thinking_blocks:
self.previous_thinking_blocks = thinking_blocks

usage = self._extract_anthropic_token_usage(final_message)
self._track_token_usage_internal(usage)

Expand Down Expand Up @@ -644,7 +726,26 @@ def _handle_tool_use_conversation(
follow_up_params = params.copy()

# Add Claude's tool use response to conversation
assistant_message = {"role": "assistant", "content": initial_response.content}
assistant_content: list[
ThinkingBlock | ToolUseBlock | TextBlock | dict[str, Any]
] = []
for block in initial_response.content:
thinking_block = self._extract_thinking_block(block)
if thinking_block:
assistant_content.append(thinking_block)
elif block.type == "tool_use":
assistant_content.append(
{
"type": "tool_use",
"id": block.id,
"name": block.name,
"input": block.input,
}
)
elif hasattr(block, "text"):
assistant_content.append({"type": "text", "text": block.text})

assistant_message = {"role": "assistant", "content": assistant_content}

# Add user message with tool results
user_message = {"role": "user", "content": tool_results}
Expand All @@ -663,12 +764,20 @@ def _handle_tool_use_conversation(
follow_up_usage = self._extract_anthropic_token_usage(final_response)
self._track_token_usage_internal(follow_up_usage)

# Extract final text content
final_content = ""
thinking_blocks: list[ThinkingBlock] = []

if final_response.content:
for content_block in final_response.content:
if hasattr(content_block, "text"):
final_content += content_block.text
else:
thinking_block = self._extract_thinking_block(content_block)
if thinking_block:
thinking_blocks.append(cast(ThinkingBlock, thinking_block))

if thinking_blocks:
self.previous_thinking_blocks = thinking_blocks

final_content = self._apply_stop_words(final_content)

Expand Down
Loading
Loading