@@ -493,6 +493,9 @@ async def _achat_streaming(
493493 current_tool_calls = {} # Track tool calls by tool_call_id
494494 thought_buffer = [] # Buffer for accumulating complete thoughts
495495 current_thinking_chunk = [] # Buffer for accumulating Claude's token-by-token thinking
496+
497+ # Determine if this is a Gemini model (sends complete thoughts) vs Claude (streams tokens)
498+ is_gemini = self .config .is_gemini_model ()
496499
497500 # Stream events from the graph
498501 async for event in self .graph .astream_events (
@@ -514,32 +517,46 @@ async def _achat_streaming(
514517 for part in content :
515518 if isinstance (part , dict ):
516519 # Check for thinking content
517- # Gemini uses 'type': 'thinking' with 'thinking' key (sends complete blocks)
518- # Claude uses 'type': 'thinking' with 'thinking' key for extended thinking (streams token-by-token)
519520 if part .get ("type" ) == "thinking" and "thinking" in part :
520521 thought_text = part .get ("thinking" , "" )
521522 if thought_text :
522- # Claude streams thinking token-by-token, accumulate it
523- current_thinking_chunk .append (thought_text )
524-
525- if self .config .verbose :
526- logger .debug (f"Captured thinking token: { thought_text [:50 ]} ..." )
523+ if is_gemini :
524+ # Gemini sends complete thoughts each turn - stream immediately
525+ if self .config .verbose :
526+ logger .debug (f"Gemini complete thought: { thought_text [:50 ]} ..." )
527+
528+ thought_buffer .append (thought_text )
529+
530+ # Stream the complete thought immediately if callback provided
531+ if thinking_callback and self .config .show_thinking :
532+ await thinking_callback (
533+ thought_text ,
534+ "thinking" ,
535+ "" ,
536+ "" ,
537+ )
538+ else :
539+ # Claude streams thinking token-by-token, accumulate it
540+ current_thinking_chunk .append (thought_text )
541+
542+ if self .config .verbose :
543+ logger .debug (f"Claude thinking token: { thought_text [:50 ]} ..." )
527544
528545 # Also check for thinking content blocks (alternative format)
529546 elif isinstance (content , str ) and content :
530547 # Some models might send thinking as regular text chunks
531548 # We'll handle this in on_chat_model_end
532549 pass
533550
534- # When model completes a response, process accumulated thinking
551+ # When model completes a response, process accumulated thinking (Claude only)
535552 elif kind == "on_chat_model_end" :
536- # If we accumulated thinking chunks, combine them
553+ # If we accumulated thinking chunks (Claude) , combine and stream them
537554 if current_thinking_chunk :
538555 complete_thought = "" .join (current_thinking_chunk )
539556 thought_buffer .append (complete_thought )
540557
541558 if self .config .verbose :
542- logger .debug (f"Complete thought accumulated ({ len (complete_thought )} chars)" )
559+ logger .debug (f"Complete Claude thought accumulated ({ len (complete_thought )} chars)" )
543560
544561 # Stream the complete thought if callback provided
545562 if thinking_callback and self .config .show_thinking :
0 commit comments