diff --git a/agents-core/vision_agents/core/agents/agents.py b/agents-core/vision_agents/core/agents/agents.py
index 575f2156..1a3a9a02 100644
--- a/agents-core/vision_agents/core/agents/agents.py
+++ b/agents-core/vision_agents/core/agents/agents.py
@@ -241,17 +241,6 @@ async def join(self, call: Call) -> "AgentSessionContextManager":
             audio_track = self._audio_track if self.publish_audio else None
             video_track = self._video_track if self.publish_video else None
 
-            if audio_track or video_track:
-                with self.tracer.start_as_current_span("edge.publish_tracks"):
-                    await self.edge.publish_tracks(audio_track, video_track)
-                await self._listen_to_audio_and_video()
-
-            self.logger.info(f"🤖 Agent joined call: {call.id}")
-
-            # Set up audio and video tracks together to avoid SDP issues
-            audio_track = self._audio_track if self.publish_audio else None
-            video_track = self._video_track if self.publish_video else None
-
             if audio_track or video_track:
                 with self.tracer.start_as_current_span("edge.publish_tracks"):
                     await self.edge.publish_tracks(audio_track, video_track)
@@ -631,6 +620,10 @@ async def _process_track(self, track_id: str, track_type: str, participant):
             self.logger.info("No image processors, video processing handled by video processors only")
             return
         
+        # Initialize error tracking counters
+        timeout_errors = 0
+        consecutive_errors = 0
+        
         while True:
             try:
                 # Use the shared forwarder instead of competing for track.recv()
@@ -660,6 +653,7 @@ async def _process_track(self, track_id: str, track_type: str, participant):
 
             except asyncio.TimeoutError:
                 # Exponential backoff for timeout errors
+                timeout_errors += 1
                 backoff_delay = min(2.0 ** min(timeout_errors, 5), 30.0)
                 self.logger.debug(
                     f"🎥VDP: Applying backoff delay: {backoff_delay:.1f}s"