crewAIInc · lorenzejay · Dec 8, 2025 · Nov 26, 2025 · Nov 26, 2025 · Nov 26, 2025
diff --git a/docs/en/concepts/llms.mdx b/docs/en/concepts/llms.mdx
@@ -283,11 +283,54 @@ In this section, you'll find detailed examples that help you select, configure,
     )
     ```
 
+    **Extended Thinking (Claude Sonnet 4 and Beyond):**
+
+    CrewAI supports Anthropic's Extended Thinking feature, which allows Claude to think through problems in a more human-like way before responding. This is particularly useful for complex reasoning, analysis, and problem-solving tasks.
+
+    ```python Code
+    from crewai import LLM
+
+    # Enable extended thinking with default settings
+    llm = LLM(
+        model="anthropic/claude-sonnet-4",
+        thinking={"type": "enabled"},
+        max_tokens=10000
+    )
+
+    # Configure thinking with budget control
+    llm = LLM(
+        model="anthropic/claude-sonnet-4",
+        thinking={
+            "type": "enabled",
+            "budget_tokens": 5000  # Limit thinking tokens
+        },
+        max_tokens=10000
+    )
+    ```
+
+    **Thinking Configuration Options:**
+    - `type`: Set to `"enabled"` to activate extended thinking mode
+    - `budget_tokens` (optional): Maximum tokens to use for thinking (helps control costs)
+
+    **Models Supporting Extended Thinking:**
+    - `claude-sonnet-4` and newer models
+    - `claude-3-7-sonnet` (with extended thinking capabilities)
+
+    **When to Use Extended Thinking:**
+    - Complex reasoning and multi-step problem solving
+    - Mathematical calculations and proofs
+    - Code analysis and debugging
+    - Strategic planning and decision making
+    - Research and analytical tasks
+
+    **Note:** Extended thinking consumes additional tokens but can significantly improve response quality for complex tasks.
+
     **Supported Environment Variables:**
     - `ANTHROPIC_API_KEY`: Your Anthropic API key (required)
 
     **Features:**
     - Native tool use support for Claude 3+ models
+    - Extended Thinking support for Claude Sonnet 4+
     - Streaming support for real-time responses
     - Automatic system message handling
     - Stop sequences for controlled output
@@ -305,6 +348,7 @@ In this section, you'll find detailed examples that help you select, configure,
 
     | Model                        | Context Window | Best For                                      |
     |------------------------------|----------------|-----------------------------------------------|
+    | claude-sonnet-4              | 200,000 tokens | Latest with extended thinking capabilities    |
     | claude-3-7-sonnet            | 200,000 tokens | Advanced reasoning and agentic tasks          |
     | claude-3-5-sonnet-20241022   | 200,000 tokens | Latest Sonnet with best performance           |
     | claude-3-5-haiku             | 200,000 tokens | Fast, compact model for quick responses       |

diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py
@@ -67,6 +67,7 @@
 
     from crewai.agent.core import Agent
     from crewai.llms.hooks.base import BaseInterceptor
+    from crewai.llms.providers.anthropic.completion import AnthropicThinkingConfig
     from crewai.task import Task
     from crewai.tools.base_tool import BaseTool
     from crewai.utilities.types import LLMMessage
@@ -585,6 +586,7 @@ def __init__(
         reasoning_effort: Literal["none", "low", "medium", "high"] | None = None,
         stream: bool = False,
         interceptor: BaseInterceptor[httpx.Request, httpx.Response] | None = None,
+        thinking: AnthropicThinkingConfig | dict[str, Any] | None = None,
         **kwargs: Any,
     ) -> None:
         """Initialize LLM instance.

diff --git a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py
@@ -3,8 +3,9 @@
 import json
 import logging
 import os
-from typing import TYPE_CHECKING, Any, cast
+from typing import TYPE_CHECKING, Any, Literal, cast
 
+from anthropic.types import ThinkingBlock
 from pydantic import BaseModel
 
 from crewai.events.types.llm_events import LLMCallType
@@ -22,15 +23,19 @@
 
 try:
     from anthropic import Anthropic, AsyncAnthropic
-    from anthropic.types import Message
-    from anthropic.types.tool_use_block import ToolUseBlock
+    from anthropic.types import Message, TextBlock, ThinkingBlock, ToolUseBlock
     import httpx
 except ImportError:
     raise ImportError(
         'Anthropic native provider not available, to install: uv add "crewai[anthropic]"'
     ) from None
 
 
+class AnthropicThinkingConfig(BaseModel):
+    type: Literal["enabled", "disabled"]
+    budget_tokens: int | None = None
+
+
 class AnthropicCompletion(BaseLLM):
     """Anthropic native completion implementation.
 
@@ -52,6 +57,7 @@ def __init__(
         stream: bool = False,
         client_params: dict[str, Any] | None = None,
         interceptor: BaseInterceptor[httpx.Request, httpx.Response] | None = None,
+        thinking: AnthropicThinkingConfig | None = None,
         **kwargs: Any,
     ):
         """Initialize Anthropic chat completion client.
@@ -97,6 +103,10 @@ def __init__(
         self.top_p = top_p
         self.stream = stream
         self.stop_sequences = stop_sequences or []
+        self.thinking = thinking
+        self.previous_thinking_blocks: list[ThinkingBlock] = []
+        # Model-specific settings
+        self.is_claude_3 = "claude-3" in model.lower()
         self.supports_tools = True
 
     @property
@@ -326,6 +336,12 @@ def _prepare_completion_params(
         if tools and self.supports_tools:
             params["tools"] = self._convert_tools_for_interference(tools)
 
+        if self.thinking:
+            if isinstance(self.thinking, AnthropicThinkingConfig):
+                params["thinking"] = self.thinking.model_dump()
+            else:
+                params["thinking"] = self.thinking
+
         return params
 
     def _convert_tools_for_interference(
@@ -365,6 +381,34 @@ def _convert_tools_for_interference(
 
         return anthropic_tools
 
+    def _extract_thinking_block(
+        self, content_block: Any
+    ) -> ThinkingBlock | dict[str, Any] | None:
+        """Extract and format thinking block from content block.
+
+        Args:
+            content_block: Content block from Anthropic response
+
+        Returns:
+            Dictionary with thinking block data including signature, or None if not a thinking block
+        """
+        if content_block.type == "thinking":
+            thinking_block = {
+                "type": "thinking",
+                "thinking": content_block.thinking,
+            }
+            if hasattr(content_block, "signature"):
+                thinking_block["signature"] = content_block.signature
+            return thinking_block
+        if content_block.type == "redacted_thinking":
+            redacted_block = {"type": "redacted_thinking"}
+            if hasattr(content_block, "thinking"):
+                redacted_block["thinking"] = content_block.thinking
+            if hasattr(content_block, "signature"):
+                redacted_block["signature"] = content_block.signature
+            return redacted_block
+        return None
+
     def _format_messages_for_anthropic(
         self, messages: str | list[LLMMessage]
     ) -> tuple[list[LLMMessage], str | None]:
@@ -374,6 +418,7 @@ def _format_messages_for_anthropic(
         - System messages are separate from conversation messages
         - Messages must alternate between user and assistant
         - First message must be from user
+        - When thinking is enabled, assistant messages must start with thinking blocks
 
         Args:
             messages: Input messages
@@ -398,8 +443,29 @@ def _format_messages_for_anthropic(
                     system_message = cast(str, content)
             else:
                 role_str = role if role is not None else "user"
-                content_str = content if content is not None else ""
-                formatted_messages.append({"role": role_str, "content": content_str})
+
+                if isinstance(content, list):
+                    formatted_messages.append({"role": role_str, "content": content})
+                elif (
+                    role_str == "assistant"
+                    and self.thinking
+                    and self.previous_thinking_blocks
+                ):
+                    structured_content = cast(
+                        list[dict[str, Any]],
+                        [
+                            *self.previous_thinking_blocks,
+                            {"type": "text", "text": content if content else ""},
+                        ],
+                    )
+                    formatted_messages.append(
+                        LLMMessage(role=role_str, content=structured_content)
+                    )
+                else:
+                    content_str = content if content is not None else ""
+                    formatted_messages.append(
+                        LLMMessage(role=role_str, content=content_str)
+                    )
 
         # Ensure first message is from user (Anthropic requirement)
         if not formatted_messages:
@@ -449,7 +515,6 @@ def _handle_completion(
             if tool_uses and tool_uses[0].name == "structured_output":
                 structured_data = tool_uses[0].input
                 structured_json = json.dumps(structured_data)
-
                 self._emit_call_completed_event(
                     response=structured_json,
                     call_type=LLMCallType.LLM_CALL,
@@ -477,15 +542,22 @@ def _handle_completion(
                     from_agent,
                 )
 
-        # Extract text content
         content = ""
+        thinking_blocks: list[ThinkingBlock] = []
+
         if response.content:
             for content_block in response.content:
                 if hasattr(content_block, "text"):
                     content += content_block.text
+                else:
+                    thinking_block = self._extract_thinking_block(content_block)
+                    if thinking_block:
+                        thinking_blocks.append(cast(ThinkingBlock, thinking_block))
 
-        content = self._apply_stop_words(content)
+        if thinking_blocks:
+            self.previous_thinking_blocks = thinking_blocks
 
+        content = self._apply_stop_words(content)
         self._emit_call_completed_event(
             response=content,
             call_type=LLMCallType.LLM_CALL,
@@ -540,6 +612,16 @@ def _handle_streaming_completion(
 
             final_message: Message = stream.get_final_message()
 
+        thinking_blocks: list[ThinkingBlock] = []
+        if final_message.content:
+            for content_block in final_message.content:
+                thinking_block = self._extract_thinking_block(content_block)
+                if thinking_block:
+                    thinking_blocks.append(cast(ThinkingBlock, thinking_block))
+
+        if thinking_blocks:
+            self.previous_thinking_blocks = thinking_blocks
+
         usage = self._extract_anthropic_token_usage(final_message)
         self._track_token_usage_internal(usage)
 
@@ -644,7 +726,26 @@ def _handle_tool_use_conversation(
         follow_up_params = params.copy()
 
         # Add Claude's tool use response to conversation
-        assistant_message = {"role": "assistant", "content": initial_response.content}
+        assistant_content: list[
+            ThinkingBlock | ToolUseBlock | TextBlock | dict[str, Any]
+        ] = []
+        for block in initial_response.content:
+            thinking_block = self._extract_thinking_block(block)
+            if thinking_block:
+                assistant_content.append(thinking_block)
+            elif block.type == "tool_use":
+                assistant_content.append(
+                    {
+                        "type": "tool_use",
+                        "id": block.id,
+                        "name": block.name,
+                        "input": block.input,
+                    }
+                )
+            elif hasattr(block, "text"):
+                assistant_content.append({"type": "text", "text": block.text})
+
+        assistant_message = {"role": "assistant", "content": assistant_content}
 
         # Add user message with tool results
         user_message = {"role": "user", "content": tool_results}
@@ -663,12 +764,20 @@ def _handle_tool_use_conversation(
             follow_up_usage = self._extract_anthropic_token_usage(final_response)
             self._track_token_usage_internal(follow_up_usage)
 
-            # Extract final text content
             final_content = ""
+            thinking_blocks: list[ThinkingBlock] = []
+
             if final_response.content:
                 for content_block in final_response.content:
                     if hasattr(content_block, "text"):
                         final_content += content_block.text
+                    else:
+                        thinking_block = self._extract_thinking_block(content_block)
+                        if thinking_block:
+                            thinking_blocks.append(cast(ThinkingBlock, thinking_block))
+
+            if thinking_blocks:
+                self.previous_thinking_blocks = thinking_blocks
 
             final_content = self._apply_stop_words(final_content)