diff --git a/.openhands/microagents/repo.md b/.openhands/microagents/repo.md index 90afabdb0f..166e7a1c6f 100644 --- a/.openhands/microagents/repo.md +++ b/.openhands/microagents/repo.md @@ -186,6 +186,7 @@ The simplified pattern eliminates the need for manual executor instantiation and - Avoid using # type: ignore. Treat it only as a last resort. In most cases, issues should be resolved by improving type annotations, adding assertions, or adjusting code/tests—rather than silencing the type checker. - Please AVOID using # type: ignore[attr-defined] unless absolutely necessary. If the issue can be addressed by adding a few extra assert statements to verify types, prefer that approach instead! - For issue like # type: ignore[call-arg]: if you discover that the argument doesn't actually exist, do not try to mock it again in tests. Instead, simply remove it. +- Avoid getattr/hasattr guards and instead enforce type correctness by relying on explicit type assertions and proper object usage, ensuring functions only receive the expected Pydantic models or typed inputs. diff --git a/examples/6_interactive_terminal.py b/examples/6_interactive_terminal_w_reasoning.py similarity index 93% rename from examples/6_interactive_terminal.py rename to examples/6_interactive_terminal_w_reasoning.py index d30928a560..0ecd165848 100644 --- a/examples/6_interactive_terminal.py +++ b/examples/6_interactive_terminal_w_reasoning.py @@ -24,7 +24,8 @@ api_key = os.getenv("LITELLM_API_KEY") assert api_key is not None, "LITELLM_API_KEY environment variable is not set." llm = LLM( - model="litellm_proxy/anthropic/claude-sonnet-4-20250514", + # model="litellm_proxy/gemini/gemini-2.5-pro", + model="litellm_proxy/deepseek/deepseek-reasoner", base_url="https://llm-proxy.eval.all-hands.dev", api_key=SecretStr(api_key), ) diff --git a/openhands/sdk/agent/agent/agent.py b/openhands/sdk/agent/agent/agent.py index d0756ec2bd..97d504d7f9 100644 --- a/openhands/sdk/agent/agent/agent.py +++ b/openhands/sdk/agent/agent/agent.py @@ -201,6 +201,8 @@ def step( if i == 0 else [], # Only first gets thought metrics=metrics if i == len(tool_calls) - 1 else None, + # Only first gets reasoning content + reasoning_content=message.reasoning_content if i == 0 else None, ) if action_event is None: continue @@ -254,6 +256,7 @@ def _get_action_events( on_event: ConversationCallbackType, thought: list[TextContent] = [], metrics: MetricsSnapshot | None = None, + reasoning_content: str | None = None, ) -> ActionEvent | None: """Handle tool calls from the LLM. @@ -267,7 +270,10 @@ def _get_action_events( if tool is None: err = f"Tool '{tool_name}' not found. Available: {list(self.tools.keys())}" logger.error(err) - event = AgentErrorEvent(error=err, metrics=metrics) + event = AgentErrorEvent( + error=err, + metrics=metrics, + ) on_event(event) state.agent_finished = True return @@ -282,7 +288,10 @@ def _get_action_events( f"Error validating args {tool_call.function.arguments} for tool " f"'{tool.name}': {e}" ) - event = AgentErrorEvent(error=err, metrics=metrics) + event = AgentErrorEvent( + error=err, + metrics=metrics, + ) on_event(event) return @@ -290,6 +299,7 @@ def _get_action_events( action_event = ActionEvent( action=action, thought=thought, + reasoning_content=reasoning_content, tool_name=tool.name, tool_call_id=tool_call.id, tool_call=tool_call, diff --git a/openhands/sdk/conversation/visualizer.py b/openhands/sdk/conversation/visualizer.py index d20ffb6b5f..f38c772b42 100644 --- a/openhands/sdk/conversation/visualizer.py +++ b/openhands/sdk/conversation/visualizer.py @@ -123,6 +123,7 @@ def abbr(n: int | float) -> str: prompt = usage.prompt_tokens or 0 cache_read = usage.cache_read_tokens or 0 cache_rate = f"{(cache_read / prompt * 100):.2f}%" if prompt > 0 else "N/A" + reasoning_tokens = usage.reasoning_tokens or 0 # Cost cost_str = f"{cost:.4f}" if cost > 0 else "$0.00" @@ -130,7 +131,9 @@ def abbr(n: int | float) -> str: # Build with fixed color scheme parts: list[str] = [] parts.append(f"[cyan]↑ input {input_tokens}[/cyan]") - parts.append(f"[magenta]⚡ cache hit {cache_rate}[/magenta]") + parts.append(f"[magenta]cache hit {cache_rate}[/magenta]") + if reasoning_tokens > 0: + parts.append(f"[yellow] reasoning {abbr(reasoning_tokens)}[/yellow]") parts.append(f"[blue]↓ output {output_tokens}[/blue]") parts.append(f"[green]$ {cost_str}[/green]") @@ -140,6 +143,12 @@ def _create_action_panel(self, event: ActionEvent) -> Panel: """Create a Rich Panel for ActionEvent with complete content.""" content = Text() + # Display reasoning content first if available (common to all three types) + if event.reasoning_content: + content.append("Reasoning:\n", style="bold magenta") + content.append(event.reasoning_content, style="white") + content.append("\n\n") + # Display complete thought content thought_text = " ".join([t.text for t in event.thought]) if thought_text: @@ -266,6 +275,7 @@ def _create_message_panel(self, event: MessageEvent) -> Panel: def _create_error_panel(self, event: AgentErrorEvent) -> Panel: """Create a Rich Panel for AgentErrorEvent with complete content.""" content = Text() + content.append("Error Details:\n", style="bold red") content.append(event.error, style="bright_red") diff --git a/openhands/sdk/event/llm_convertible.py b/openhands/sdk/event/llm_convertible.py index d8f2523945..0a00ea3176 100644 --- a/openhands/sdk/event/llm_convertible.py +++ b/openhands/sdk/event/llm_convertible.py @@ -2,7 +2,7 @@ from typing import cast from litellm import ChatCompletionMessageToolCall, ChatCompletionToolParam -from pydantic import Field +from pydantic import ConfigDict, Field, computed_field from openhands.sdk.event.base import N_CHAR_PREVIEW, LLMConvertibleEvent from openhands.sdk.event.types import SourceType @@ -42,6 +42,10 @@ class ActionEvent(LLMConvertibleEvent): thought: list[TextContent] = Field( ..., description="The thought process of the agent before taking this action" ) + reasoning_content: str | None = Field( + default=None, + description="Intermediate reasoning/thinking content from reasoning models", + ) action: Action = Field(..., description="Single action (tool call) returned by LLM") tool_name: str = Field(..., description="The name of the tool being called") tool_call_id: str = Field( @@ -75,7 +79,12 @@ def to_llm_message(self) -> Message: content: list[TextContent | ImageContent] = cast( list[TextContent | ImageContent], self.thought ) - return Message(role="assistant", content=content, tool_calls=[self.tool_call]) + return Message( + role="assistant", + content=content, + tool_calls=[self.tool_call], + reasoning_content=self.reasoning_content, + ) def __str__(self) -> str: """Plain text string representation for ActionEvent.""" @@ -131,10 +140,19 @@ class MessageEvent(LLMConvertibleEvent): This is originally the "MessageAction", but it suppose not to be tool call.""" + model_config = ConfigDict(extra="ignore") + source: SourceType llm_message: Message = Field( ..., description="The exact LLM message for this message event" ) + metrics: MetricsSnapshot | None = Field( + default=None, + description=( + "Snapshot of LLM metrics (token counts and costs) for this message. " + "Only attached to messages from agent." + ), + ) # context extensions stuff / microagent can go here activated_microagents: list[str] = Field( @@ -143,13 +161,10 @@ class MessageEvent(LLMConvertibleEvent): extended_content: list[TextContent] = Field( default_factory=list, description="List of content added by agent context" ) - metrics: MetricsSnapshot | None = Field( - default=None, - description=( - "Snapshot of LLM metrics (token counts and costs) for this message. " - "Only attached to messages from agent." - ), - ) + + @computed_field + def reasoning_content(self) -> str: + return self.llm_message.reasoning_content or "" def to_llm_message(self) -> Message: msg = copy.deepcopy(self.llm_message) @@ -220,7 +235,11 @@ def __str__(self) -> str: class AgentErrorEvent(LLMConvertibleEvent): - """Error triggered by the agent.""" + """Error triggered by the agent. + + Note: This event should not contain model "thought" or "reasoning_content". It + represents an error produced by the agent/scaffold, not model output. + """ source: SourceType = "agent" error: str = Field(..., description="The error message from the scaffold") diff --git a/openhands/sdk/llm/llm.py b/openhands/sdk/llm/llm.py index 4f99add9e1..8e49c1d184 100644 --- a/openhands/sdk/llm/llm.py +++ b/openhands/sdk/llm/llm.py @@ -370,7 +370,10 @@ def completion( # 3) normalize provider params kwargs["tools"] = tools # we might remove this field in _normalize_call_kwargs - call_kwargs = self._normalize_call_kwargs(kwargs, has_tools=bool(tools)) + has_tools_flag = ( + bool(tools) and use_native_fc + ) # only keep tools when native FC is active + call_kwargs = self._normalize_call_kwargs(kwargs, has_tools=has_tools_flag) # 4) optional request logging context (kept small) assert self._telemetry is not None @@ -495,11 +498,11 @@ def _normalize_call_kwargs(self, opts: dict, *, has_tools: bool) -> dict: # Anthropic/OpenAI reasoning models ignore temp/top_p out.pop("temperature", None) out.pop("top_p", None) - # Gemini 2.5 budget mapping + # Gemini 2.5-pro default to low if not set + # otherwise litellm doesn't send reasoning, even though it happens if "gemini-2.5-pro" in self.model: - if self.reasoning_effort in {None, "low", "none"}: - out["thinking"] = {"budget_tokens": 128} - out["allowed_openai_params"] = ["thinking"] + if self.reasoning_effort in {None, "none"}: + out["reasoning_effort"] = "low" # Anthropic Opus 4.1: prefer temperature when # both provided; disable extended thinking @@ -563,14 +566,21 @@ def _all_choices( "Expected non-streaming Choices when post-processing mocked tools" ) - non_fn_message: dict = resp.choices[0].message.model_dump() - fn_msgs = convert_non_fncall_messages_to_fncall_messages( + # Preserve provider-specific reasoning fields before conversion + orig_msg = resp.choices[0].message + non_fn_message: dict = orig_msg.model_dump() + fn_msgs: list[dict] = convert_non_fncall_messages_to_fncall_messages( nonfncall_msgs + [non_fn_message], tools ) - last = fn_msgs[-1] - if not isinstance(last, LiteLLMMessage): - last = LiteLLMMessage(**last) - resp.choices[0].message = last + last: dict = fn_msgs[-1] + + for name in ("reasoning_content", "provider_specific_fields"): + val = getattr(orig_msg, name, None) + if not val: + continue + last[name] = val + + resp.choices[0].message = LiteLLMMessage.model_validate(last) return resp # ========================================================================= diff --git a/openhands/sdk/llm/message.py b/openhands/sdk/llm/message.py index 71cf07103d..5b989423a2 100644 --- a/openhands/sdk/llm/message.py +++ b/openhands/sdk/llm/message.py @@ -81,6 +81,11 @@ class Message(BaseModel): name: str | None = None # name of the tool # force string serializer force_string_serializer: bool = False + # reasoning content (from reasoning models like o1, Claude thinking, DeepSeek R1) + reasoning_content: str | None = Field( + default=None, + description="Intermediate reasoning/thinking content from reasoning models", + ) @property def contains_image(self) -> bool: @@ -178,14 +183,22 @@ def _add_tool_call_keys(self, message_dict: dict[str, Any]) -> dict[str, Any]: @classmethod def from_litellm_message(cls, message: LiteLLMMessage) -> "Message": - """Convert a litellm LiteLLMMessage to our Message class.""" + """Convert a LiteLLMMessage to our Message class. + + Provider-agnostic mapping for reasoning: + - Prefer `message.reasoning_content` if present (LiteLLM normalized field) + """ assert message.role != "function", "Function role is not supported" + + rc = getattr(message, "reasoning_content", None) + return Message( role=message.role, content=[TextContent(text=message.content)] if isinstance(message.content, str) else [], tool_calls=message.tool_calls, + reasoning_content=rc, ) diff --git a/openhands/sdk/llm/utils/metrics.py b/openhands/sdk/llm/utils/metrics.py index b55db37090..9de9b0ed4c 100644 --- a/openhands/sdk/llm/utils/metrics.py +++ b/openhands/sdk/llm/utils/metrics.py @@ -47,6 +47,9 @@ class TokenUsage(BaseModel): cache_write_tokens: int = Field( default=0, ge=0, description="Cache write tokens must be non-negative" ) + reasoning_tokens: int = Field( + default=0, ge=0, description="Reasoning tokens must be non-negative" + ) context_window: int = Field( default=0, ge=0, description="Context window must be non-negative" ) @@ -63,6 +66,7 @@ def __add__(self, other: "TokenUsage") -> "TokenUsage": completion_tokens=self.completion_tokens + other.completion_tokens, cache_read_tokens=self.cache_read_tokens + other.cache_read_tokens, cache_write_tokens=self.cache_write_tokens + other.cache_write_tokens, + reasoning_tokens=self.reasoning_tokens + other.reasoning_tokens, context_window=max(self.context_window, other.context_window), per_turn_token=other.per_turn_token, response_id=self.response_id, @@ -122,6 +126,7 @@ def initialize_accumulated_token_usage(self) -> "Metrics": completion_tokens=0, cache_read_tokens=0, cache_write_tokens=0, + reasoning_tokens=0, context_window=0, response_id="", ) @@ -159,6 +164,7 @@ def add_token_usage( cache_write_tokens: int, context_window: int, response_id: str, + reasoning_tokens: int = 0, ) -> None: """Add a single usage record.""" # Token each turn for calculating context usage. @@ -170,6 +176,7 @@ def add_token_usage( completion_tokens=completion_tokens, cache_read_tokens=cache_read_tokens, cache_write_tokens=cache_write_tokens, + reasoning_tokens=reasoning_tokens, context_window=context_window, per_turn_token=per_turn_token, response_id=response_id, @@ -183,6 +190,7 @@ def add_token_usage( completion_tokens=completion_tokens, cache_read_tokens=cache_read_tokens, cache_write_tokens=cache_write_tokens, + reasoning_tokens=reasoning_tokens, context_window=context_window, per_turn_token=per_turn_token, response_id="", @@ -286,6 +294,8 @@ def diff(self, baseline: "Metrics") -> "Metrics": - base_usage.cache_read_tokens, cache_write_tokens=current_usage.cache_write_tokens - base_usage.cache_write_tokens, + reasoning_tokens=current_usage.reasoning_tokens + - base_usage.reasoning_tokens, context_window=current_usage.context_window, per_turn_token=0, response_id="", diff --git a/openhands/sdk/llm/utils/model_features.py b/openhands/sdk/llm/utils/model_features.py index b8a3adf37e..263bae552d 100644 --- a/openhands/sdk/llm/utils/model_features.py +++ b/openhands/sdk/llm/utils/model_features.py @@ -103,7 +103,7 @@ class ModelFeatures: ] REASONING_EFFORT_PATTERNS: list[str] = [ - # Mirror main behavior exactly (no unintended expansion), plus DeepSeek support + # Mirror main behavior exactly (no unintended expansion) "o1-2024-12-17", "o1", "o3", @@ -116,8 +116,6 @@ class ModelFeatures: "gemini-2.5-pro", "gpt-5", "gpt-5-2025-08-07", - # DeepSeek reasoning family - "deepseek-r1-0528*", ] PROMPT_CACHE_PATTERNS: list[str] = [ diff --git a/openhands/sdk/llm/utils/telemetry.py b/openhands/sdk/llm/utils/telemetry.py index 3ef7ed87de..ec14f68e3e 100644 --- a/openhands/sdk/llm/utils/telemetry.py +++ b/openhands/sdk/llm/utils/telemetry.py @@ -122,16 +122,23 @@ def _record_usage( prompt_tokens = usage.prompt_tokens or 0 completion_tokens = usage.completion_tokens or 0 cache_write = usage._cache_creation_input_tokens or 0 + cache_read = 0 - details = usage.prompt_tokens_details or None - if details and details.cached_tokens: - cache_read = details.cached_tokens + prompt_token_details = usage.prompt_tokens_details or None + if prompt_token_details and prompt_token_details.cached_tokens: + cache_read = prompt_token_details.cached_tokens + + reasoning_tokens = 0 + completion_tokens_details = usage.completion_tokens_details or None + if completion_tokens_details and completion_tokens_details.reasoning_tokens: + reasoning_tokens = completion_tokens_details.reasoning_tokens self.metrics.add_token_usage( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, cache_read_tokens=cache_read, cache_write_tokens=cache_write, + reasoning_tokens=reasoning_tokens, context_window=context_window, response_id=response_id, ) @@ -200,6 +207,32 @@ def _log_completion( data["cost"] = float(cost or 0.0) data["timestamp"] = time.time() data["latency_sec"] = self._last_latency + + # Usage summary (prompt, completion, reasoning tokens) for quick inspection + try: + usage = getattr(resp, "usage", None) + if usage: + if isinstance(usage, dict): + usage = Usage.model_validate(usage) + prompt_tokens = int(usage.prompt_tokens or 0) + completion_tokens = int(usage.completion_tokens or 0) + reasoning_tokens = 0 + details = usage.completion_tokens_details or None + if details and details.reasoning_tokens: + reasoning_tokens = int(details.reasoning_tokens) + data["usage_summary"] = { + "prompt_tokens": prompt_tokens, + "completion_tokens": completion_tokens, + "reasoning_tokens": reasoning_tokens, + } + if usage.prompt_tokens_details: + data["usage_summary"]["cache_read_tokens"] = int( + usage.prompt_tokens_details.cached_tokens or 0 + ) + except Exception: + # Best-effort only; don't fail logging + pass + # Raw response *before* nonfncall -> call conversion if raw_resp: data["raw_response"] = raw_resp diff --git a/openhands/sdk/tests/llm/test_model_features.py b/openhands/sdk/tests/llm/test_model_features.py index 5656dc683a..1a881ac60b 100644 --- a/openhands/sdk/tests/llm/test_model_features.py +++ b/openhands/sdk/tests/llm/test_model_features.py @@ -15,7 +15,6 @@ ("litellm_proxy/gemini-2.5-pro", "gemini-2.5-pro"), ("qwen3-coder-480b-a35b-instruct", "qwen3-coder-480b-a35b-instruct"), ("gpt-5", "gpt-5"), - ("deepseek/DeepSeek-R1-0528:671b-Q4_K_XL", "deepseek-r1-0528"), ("openai/GLM-4.5-GGUF", "glm-4.5"), ("openrouter/gpt-4o-mini", "gpt-4o-mini"), ( @@ -73,7 +72,6 @@ def test_function_calling_support(model, expected_function_calling): ("o1", True), ("o3-mini", True), ("o3", True), - ("deepseek-r1-0528", True), ("gpt-4o", False), ("claude-3-5-sonnet", False), ("gemini-1.5-pro", False), diff --git a/tests/sdk/llm/test_llm_metrics.py b/tests/sdk/llm/test_llm_metrics.py index 29f77ef85a..f5e34e9dd0 100644 --- a/tests/sdk/llm/test_llm_metrics.py +++ b/tests/sdk/llm/test_llm_metrics.py @@ -253,6 +253,7 @@ def test_token_usage_pydantic_features(): "completion_tokens": 25, "cache_read_tokens": 5, "cache_write_tokens": 2, + "reasoning_tokens": 0, "context_window": 2048, "per_turn_token": 102, "response_id": "test-456", diff --git a/tests/sdk/llm/test_model_features.py b/tests/sdk/llm/test_model_features.py index 20b6b58444..8f2b200e0d 100644 --- a/tests/sdk/llm/test_model_features.py +++ b/tests/sdk/llm/test_model_features.py @@ -15,7 +15,6 @@ ("litellm_proxy/gemini-2.5-pro", "gemini-2.5-pro"), ("qwen3-coder-480b-a35b-instruct", "qwen3-coder-480b-a35b-instruct"), ("gpt-5", "gpt-5"), - ("deepseek/DeepSeek-R1-0528:671b-Q4_K_XL", "deepseek-r1-0528"), ("openai/GLM-4.5-GGUF", "glm-4.5"), ("openrouter/gpt-4o-mini", "gpt-4o-mini"), ( @@ -77,7 +76,6 @@ def test_function_calling_support(model, expected_function_calling): ("o1", True), ("o3-mini", True), ("o3", True), - ("deepseek-r1-0528", True), ("gpt-4o", False), ("claude-3-5-sonnet", False), ("gemini-1.5-pro", False), @@ -126,7 +124,6 @@ def test_prompt_cache_support(model, expected_cache): ("grok-code-fast-1", False), ("xai/grok-4-0709", False), ("xai/grok-code-fast-1", False), - ("deepseek-r1-0528", False), ], ) def test_stop_words_support(model, expected_stop_words): @@ -262,7 +259,6 @@ def test_stop_words_grok_provider_prefixed(): "o1-2024-12-17", "xai/grok-4-0709", "xai/grok-code-fast-1", - "deepseek-r1-0528", ], ) def test_supports_stop_words_false_models(model): diff --git a/tests/sdk/llm/test_reasoning_content.py b/tests/sdk/llm/test_reasoning_content.py new file mode 100644 index 0000000000..014bd87515 --- /dev/null +++ b/tests/sdk/llm/test_reasoning_content.py @@ -0,0 +1,151 @@ +"""Tests for reasoning content support in LLM and Message classes.""" + +from litellm.types.utils import Choices, Message as LiteLLMMessage, ModelResponse, Usage + + +def create_mock_response(content: str = "Test response", response_id: str = "test-id"): + """Helper function to create properly structured mock responses.""" + return ModelResponse( + id=response_id, + choices=[ + Choices( + finish_reason="stop", + index=0, + message=LiteLLMMessage( + content=content, + role="assistant", + ), + ) + ], + created=1234567890, + model="claude-sonnet-4-20250514", + object="chat.completion", + usage=Usage( + prompt_tokens=10, + completion_tokens=5, + total_tokens=15, + ), + ) + + +def test_message_with_reasoning_content(): + """Test Message with reasoning content fields.""" + from openhands.sdk.llm.message import Message, TextContent + + message = Message( + role="assistant", + content=[TextContent(text="The answer is 42.")], + reasoning_content="Let me think about this step by step...", + ) + + assert message.reasoning_content == "Let me think about this step by step..." + + +def test_message_without_reasoning_content(): + """Test Message without reasoning content (default behavior).""" + from openhands.sdk.llm.message import Message, TextContent + + message = Message(role="assistant", content=[TextContent(text="The answer is 42.")]) + + assert message.reasoning_content is None + + +def test_message_from_litellm_message_with_reasoning(): + """Test Message.from_litellm_message with reasoning content.""" + from openhands.sdk.llm.message import Message + + # Create a mock LiteLLM message with reasoning content + litellm_message = LiteLLMMessage(role="assistant", content="The answer is 42.") + # Add reasoning content as attributes + litellm_message.reasoning_content = "Let me think about this..." + + message = Message.from_litellm_message(litellm_message) + + assert message.role == "assistant" + assert len(message.content) == 1 + from openhands.sdk.llm.message import TextContent + + assert isinstance(message.content[0], TextContent) + assert message.content[0].text == "The answer is 42." + assert message.reasoning_content == "Let me think about this..." + + +def test_message_from_litellm_message_without_reasoning(): + """Test Message.from_litellm_message without reasoning content.""" + from openhands.sdk.llm.message import Message + + litellm_message = LiteLLMMessage(role="assistant", content="The answer is 42.") + + message = Message.from_litellm_message(litellm_message) + + assert message.role == "assistant" + assert len(message.content) == 1 + from openhands.sdk.llm.message import TextContent + + assert isinstance(message.content[0], TextContent) + assert message.content[0].text == "The answer is 42." + assert message.reasoning_content is None + + +def test_message_serialization_with_reasoning(): + """Test Message serialization includes reasoning content.""" + from openhands.sdk.llm.message import Message, TextContent + + message = Message( + role="assistant", + content=[TextContent(text="Answer")], + reasoning_content="Thinking process...", + ) + + serialized = message.model_dump() + + assert serialized["reasoning_content"] == "Thinking process..." + + +def test_message_serialization_without_reasoning(): + """Test Message serialization without reasoning content.""" + from openhands.sdk.llm.message import Message, TextContent + + message = Message(role="assistant", content=[TextContent(text="Answer")]) + + serialized = message.model_dump() + + assert serialized["reasoning_content"] is None + + +def test_action_event_with_reasoning_content(): + """Test ActionEvent with reasoning content fields.""" + from litellm import ChatCompletionMessageToolCall + from litellm.types.utils import Function + + from openhands.sdk.event.llm_convertible import ActionEvent + from openhands.sdk.llm.message import TextContent + from openhands.sdk.tool import ActionBase + + # Create a simple action for testing + class TestAction(ActionBase): + action: str = "test" + + # Create a tool call + tool_call = ChatCompletionMessageToolCall( + id="test-id", + function=Function(name="test_tool", arguments='{"arg": "value"}'), + type="function", + ) + + action_event = ActionEvent( + thought=[TextContent(text="I need to test this")], + action=TestAction(), + tool_name="test_tool", + tool_call_id="test-id", + tool_call=tool_call, + llm_response_id="response-123", + reasoning_content="Let me think about this step by step...", + ) + + # Test that reasoning content is preserved + assert action_event.reasoning_content == "Let me think about this step by step..." + + # Test that reasoning content is included in the LLM message + llm_message = action_event.to_llm_message() + assert llm_message.reasoning_content == "Let me think about this step by step..."