@@ -129,6 +129,9 @@ def __init__(
129129 self .conversation : Optional [Conversation ] = None
130130 self ._user_conversation_handle : Optional [StreamHandle ] = None
131131 self ._agent_conversation_handle : Optional [StreamHandle ] = None
132+
133+ # Track pending transcripts for turn-based response triggering
134+ self ._pending_user_transcripts : Dict [str , str ] = {}
132135
133136 # Merge plugin events BEFORE subscribing to any events
134137 for plugin in [stt , tts , turn_detection , vad , llm ]:
@@ -673,16 +676,56 @@ async def _process_track(self, track_id: str, track_type: str, participant):
673676
674677 async def _on_turn_event (self , event : TurnStartedEvent | TurnEndedEvent ) -> None :
675678 """Handle turn detection events."""
679+ # In realtime mode, the LLM handles turn detection, interruption, and responses itself
680+ if self .realtime_mode :
681+ return
682+
676683 if isinstance (event , TurnStartedEvent ):
677- # TODO: Implement TTS pause/resume functionality
678- # For now, TTS will continue playing - this should be improved
679- self .logger .info (
680- f"👉 Turn started - participant speaking { event .speaker_id } : { event .confidence } "
681- )
684+ # Interrupt TTS when user starts speaking (barge-in)
685+ if event .speaker_id and event .speaker_id != self .agent_user .id :
686+ if self .tts :
687+ self .logger .info (
688+ f"👉 Turn started - interrupting TTS for participant { event .speaker_id } "
689+ )
690+ try :
691+ await self .tts .stop_audio ()
692+ except Exception as e :
693+ self .logger .error (f"Error stopping TTS: { e } " )
694+ else :
695+ self .logger .info (
696+ f"👉 Turn started - participant speaking { event .speaker_id } : { event .confidence } "
697+ )
698+ else :
699+ # Agent itself started speaking - this is normal
700+ self .logger .debug (
701+ f"👉 Turn started - agent speaking { event .speaker_id } "
702+ )
682703 elif isinstance (event , TurnEndedEvent ):
683704 self .logger .info (
684705 f"👉 Turn ended - participant { event .speaker_id } finished (duration: { event .duration } , confidence: { event .confidence } )"
685706 )
707+
708+ # When turn detection is enabled, trigger LLM response when user's turn ends
709+ # This is the signal that the user has finished speaking and expects a response
710+ if event .speaker_id and event .speaker_id != self .agent_user .id :
711+ # Get the accumulated transcript for this speaker
712+ transcript = self ._pending_user_transcripts .get (event .speaker_id , "" )
713+
714+ if transcript and transcript .strip ():
715+ self .logger .info (f"🤖 Triggering LLM response after turn ended for { event .speaker_id } " )
716+
717+ # Create participant object if we have metadata
718+ participant = None
719+ if hasattr (event , 'custom' ) and event .custom :
720+ # Try to extract participant info from custom metadata
721+ participant = event .custom .get ('participant' )
722+
723+ # Trigger LLM response with the complete transcript
724+ if self .llm :
725+ await self .simple_response (transcript , participant )
726+
727+ # Clear the pending transcript for this speaker
728+ self ._pending_user_transcripts [event .speaker_id ] = ""
686729
687730 async def _on_partial_transcript (
688731 self , event : STTPartialTranscriptEvent | RealtimePartialTranscriptEvent
@@ -737,6 +780,38 @@ async def _on_transcript(self, event: STTTranscriptEvent | RealtimeTranscriptEve
737780 )
738781 self .conversation .complete_message (self ._user_conversation_handle )
739782 self ._user_conversation_handle = None
783+
784+ # In realtime mode, the LLM handles everything itself (STT, turn detection, responses)
785+ # Skip our manual LLM triggering logic
786+ if self .realtime_mode :
787+ return
788+
789+ # Determine how to handle LLM triggering based on turn detection
790+ if self .turn_detection is not None :
791+ # With turn detection: accumulate transcripts and wait for TurnEndedEvent
792+ # Store/append the transcript for this user
793+ if user_id not in self ._pending_user_transcripts :
794+ self ._pending_user_transcripts [user_id ] = event .text
795+ else :
796+ # Append to existing transcript (user might be speaking in chunks)
797+ self ._pending_user_transcripts [user_id ] += " " + event .text
798+
799+ self .logger .debug (
800+ f"📝 Accumulated transcript for { user_id } (waiting for turn end): "
801+ f"{ self ._pending_user_transcripts [user_id ][:100 ]} ..."
802+ )
803+ else :
804+ # Without turn detection: trigger LLM immediately on transcript completion
805+ # This is the traditional STT -> LLM flow
806+ if self .llm :
807+ self .logger .info ("🤖 Triggering LLM response immediately (no turn detection)" )
808+
809+ # Get participant from event metadata
810+ participant = None
811+ if hasattr (event , "user_metadata" ):
812+ participant = event .user_metadata
813+
814+ await self .simple_response (event .text , participant )
740815
741816 async def _on_stt_error (self , error ):
742817 """Handle STT service errors."""
0 commit comments