adding tracing at info level to try to capture recovery from preemption/eviction

ryanolson · ryanolson · commit e8235995a231 · 2025-08-07T15:46:32.000Z
diff --git a/lib/bindings/python/rust/llm/block_manager/vllm/connector/leader.rs b/lib/bindings/python/rust/llm/block_manager/vllm/connector/leader.rs
@@ -297,10 +297,39 @@ impl Leader for KvConnectorLeader {
 
         for cached_req in &scheduler_output.cached_requests {
             let request_id = &cached_req.request_id;
-            assert!(
-                inflight_requests.remove(request_id),
-                "request_id {request_id} not found in inflight_requests: "
-            );
+
+            if cached_req.resumed_from_preemption {
+                // we really do not know what to expect here:
+                // first let's try to get the slot, it might fail because maybe preemption put us thru
+                // a finished cycle -- who knows
+                let shared_slot = self.slot_manager.get_slot(request_id);
+                match &shared_slot {
+                    Ok(_) => {
+                        tracing::info!("after preemption, slot is still alive");
+                    }
+                    Err(_) => {
+                        tracing::info!("after preemption, slot is not alive");
+                    }
+                }
+
+                let shared_slot = shared_slot?;
+                let mut slot = shared_slot
+                    .lock()
+                    .map_err(|e| anyhow::anyhow!("Failed to lock slot: {}", e))?;
+
+                // todo: we probably need to reset the slot state and reload it from `cache_req`; however, we do not
+                // know if it will take another pass at `get_num_new_matched_tokens` or `update_state_after_alloc`.
+                slot.reset_after_preemption()?;
+
+                // note, we can not trigger onboarding here -- perhaps we are supposed to or perhaps will get another
+                // pass at `get_num_new_matched_tokens` or `update_state_after_alloc`.
+            } else {
+                // note: evicition might trigger this assert
+                assert!(
+                    inflight_requests.remove(request_id),
+                    "request_id {request_id} not found in inflight_requests: "
+                );
+            }
 
             let shared_slot = self.slot_manager.get_slot(request_id)?;
             let mut slot = shared_slot
diff --git a/lib/bindings/python/rust/llm/block_manager/vllm/connector/leader/slot.rs b/lib/bindings/python/rust/llm/block_manager/vllm/connector/leader/slot.rs
@@ -72,6 +72,9 @@ pub enum SlotState {
 
     /// The slot is finished and all resources have been released.
     Finished,
+
+    /// The slot is preempted and is waiting for the next iteration to resume.
+    Preempted,
 }
 
 pub trait Slot: std::fmt::Debug {
@@ -122,6 +125,9 @@ pub trait Slot: std::fmt::Debug {
 
     /// Record the number of tokens that were cached on the disk.
     fn record_cached_disk_tokens(&mut self, num_tokens: usize);
+
+    /// Reset the slot after preemption.
+    fn reset_after_preemption(&mut self) -> Result<(), SlotError>;
 }
 
 pub trait ExternallyManagedDeviceSlot: Slot {
@@ -341,6 +347,22 @@ impl Slot for VllmConnectorSlot {
         self.state
     }
 
+    fn reset_after_preemption(&mut self) -> Result<(), SlotError> {
+        assert!(self.staging_from_disk.is_none());
+        assert!(self.staging_from_host.is_none());
+        assert!(self.pending_operations.is_none());
+
+        self.state = SlotState::Preempted;
+        self.iteration_first_scheduled = None;
+        self.current_position = 0;
+        self.evaluated_blocks = 0;
+        self.device_blocks.clear();
+        self.tokens_cached_from_device = 0;
+        self.tokens_cached_from_host = 0;
+        self.tokens_cached_from_disk = 0;
+        Ok(())
+    }
+
     fn record_cached_device_tokens(&mut self, num_tokens: usize) {
         self.tokens_cached_from_device = num_tokens;
         tracing::debug!("recording {} cached device tokens", num_tokens,);
@@ -511,13 +533,17 @@ impl Slot for VllmConnectorSlot {
             return Ok(());
         }
 
-        if !matches!(self.state(), SlotState::Initialized) {
+        if !matches!(self.state(), SlotState::Initialized | SlotState::Preempted) {
             return Err(SlotError::InvalidOperation(format!(
                 "slot must be in the NotScheduled state to acquire local matches; got {:?}",
                 self.state()
             )));
         }
 
+        if matches!(self.state(), SlotState::Preempted) {
+            tracing::info!("slot is in the Preempted state; we get another chance to match");
+        }
+
         let block_size = self.block_manager.block_size();
         let num_computed_blocks = num_computed_tokens / block_size;
         debug_assert!(num_computed_tokens % block_size == 0);