crmne · rhys117 · May 14, 2025
diff --git a/lib/ruby_llm/message.rb b/lib/ruby_llm/message.rb
@@ -7,11 +7,12 @@ module RubyLLM
   class Message
     ROLES = %i[system user assistant tool].freeze
 
-    attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id
+    attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :thinking_content
 
     def initialize(options = {})
       @role = options[:role].to_sym
       @content = normalize_content(options[:content])
+      @thinking_content = options[:thinking_content]
       @tool_calls = options[:tool_calls]
       @input_tokens = options[:input_tokens]
       @output_tokens = options[:output_tokens]

diff --git a/lib/ruby_llm/model_info.rb b/lib/ruby_llm/model_info.rb
@@ -12,7 +12,7 @@ module RubyLLM
   #   model.input_price_per_million   # => 30.0
   class ModelInfo
     attr_reader :id, :name, :provider, :family, :created_at, :context_window, :max_output_tokens, :knowledge_cutoff,
-                :modalities, :capabilities, :pricing, :metadata
+                :modalities, :capabilities, :pricing, :metadata, :thinking
 
     def initialize(data)
       @id = data[:id]
@@ -22,6 +22,7 @@ def initialize(data)
       @created_at = data[:created_at]
       @context_window = data[:context_window]
       @max_output_tokens = data[:max_output_tokens]
+      @thinking = data[:thinking]
       @knowledge_cutoff = data[:knowledge_cutoff]
       @modalities = Modalities.new(data[:modalities] || {})
       @capabilities = data[:capabilities] || []
@@ -57,6 +58,10 @@ def supports_functions?
       function_calling?
     end
 
+    def supports_thinking?
+      thinking
+    end
+
     def input_price_per_million
       pricing.text_tokens.input
     end

diff --git a/lib/ruby_llm/models.json b/lib/ruby_llm/models.json
@@ -297,6 +297,7 @@
     "created_at": null,
     "context_window": 200000,
     "max_output_tokens": 64000,
+    "thinking": true,
     "knowledge_cutoff": null,
     "modalities": {
       "input": [

diff --git a/lib/ruby_llm/providers/anthropic/chat.rb b/lib/ruby_llm/providers/anthropic/chat.rb
@@ -39,9 +39,13 @@ def build_base_payload(chat_messages, temperature, model, stream)
           {
             model: model,
             messages: chat_messages.map { |msg| format_message(msg) },
-            temperature: temperature,
+            temperature: 1, # TODO: Ensure to maintain this as being configurable - but must be set to 1 to enable thinking
             stream: stream,
-            max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096
+            max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096,
+            thinking: {
+              type: RubyLLM.models.find(model)&.supports_thinking? ? 'enabled' : 'disabled', # TODO: Make this configurable
+              budget_tokens: 1024 # TODO: Make this configurable
+            }
           }
         end
 
@@ -52,23 +56,32 @@ def add_optional_fields(payload, system_content:, tools:)
 
         def parse_completion_response(response)
           data = response.body
+          RubyLLM.logger.debug("Anthropic response: #{data}")
+
           content_blocks = data['content'] || []
 
+          thinking_content = extract_thinking_content(content_blocks)
           text_content = extract_text_content(content_blocks)
           tool_use = Tools.find_tool_use(content_blocks)
 
-          build_message(data, text_content, tool_use)
+          build_message(data, text_content, tool_use, thinking_content)
+        end
+
+        def extract_thinking_content(blocks)
+          thinking_blocks = blocks.select { |c| c['type'] == 'thinking' }
+          thinking_blocks.map { |c| c['thinking'] }.join
         end
 
         def extract_text_content(blocks)
           text_blocks = blocks.select { |c| c['type'] == 'text' }
           text_blocks.map { |c| c['text'] }.join
         end
 
-        def build_message(data, content, tool_use)
+        def build_message(data, content, tool_use, thinking_content)
           Message.new(
             role: :assistant,
             content: content,
+            thinking_content: thinking_content,
             tool_calls: Tools.parse_tool_calls(tool_use),
             input_tokens: data.dig('usage', 'input_tokens'),
             output_tokens: data.dig('usage', 'output_tokens'),