Skip to content

Commit 002da46

Browse files
committed
fully resetting the slot state if we are in a prefilling state on an unexpected call to get_num_new_matched_tokens
1 parent 142e626 commit 002da46

File tree

3 files changed

+19
-4
lines changed

3 files changed

+19
-4
lines changed

lib/bindings/python/rust/llm/block_manager/vllm/connector/leader.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,11 @@ impl Leader for KvConnectorLeader {
139139
.lock()
140140
.map_err(|e| anyhow::anyhow!("Failed to lock slot: {}", e))?;
141141

142+
if slot.state() == SlotState::Prefilling {
143+
tracing::warn!("slot is in the Prefilled state; this seems like we need to reset the slot and start over");
144+
slot.reset();
145+
}
146+
142147
// early exit if we cannot match full block
143148
if (slot.sequence().total_tokens() - num_computed_tokens) < self.block_size {
144149
return Ok((0, false));
@@ -319,7 +324,7 @@ impl Leader for KvConnectorLeader {
319324

320325
// todo: we probably need to reset the slot state and reload it from `cache_req`; however, we do not
321326
// know if it will take another pass at `get_num_new_matched_tokens` or `update_state_after_alloc`.
322-
slot.reset_after_preemption()?;
327+
slot.reset_after_preemption();
323328

324329
// note, we can not trigger onboarding here -- perhaps we are supposed to or perhaps will get another
325330
// pass at `get_num_new_matched_tokens` or `update_state_after_alloc`.

lib/bindings/python/rust/llm/block_manager/vllm/connector/leader/slot.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,10 @@ pub trait Slot: std::fmt::Debug {
127127
fn record_cached_disk_tokens(&mut self, num_tokens: usize);
128128

129129
/// Reset the slot after preemption.
130-
fn reset_after_preemption(&mut self) -> Result<(), SlotError>;
130+
fn reset_after_preemption(&mut self);
131+
132+
/// Reset the slot.
133+
fn reset(&mut self);
131134
}
132135

133136
pub trait ExternallyManagedDeviceSlot: Slot {
@@ -347,7 +350,7 @@ impl Slot for VllmConnectorSlot {
347350
self.state
348351
}
349352

350-
fn reset_after_preemption(&mut self) -> Result<(), SlotError> {
353+
fn reset_after_preemption(&mut self) {
351354
assert!(self.staging_from_disk.is_none());
352355
assert!(self.staging_from_host.is_none());
353356
assert!(self.pending_operations.is_none());
@@ -360,7 +363,11 @@ impl Slot for VllmConnectorSlot {
360363
self.tokens_cached_from_device = 0;
361364
self.tokens_cached_from_host = 0;
362365
self.tokens_cached_from_disk = 0;
363-
Ok(())
366+
}
367+
368+
fn reset(&mut self) {
369+
self.reset_after_preemption();
370+
self.state = SlotState::Initialized;
364371
}
365372

366373
fn record_cached_device_tokens(&mut self, num_tokens: usize) {

lib/llm/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ pub mod types;
3838
#[cfg(feature = "block-manager")]
3939
pub mod block_manager;
4040

41+
#[cfg(feature = "block-manager")]
42+
pub mod integrations;
43+
4144
/// Reads a JSON file, extracts a specific field, and deserializes it into type T.
4245
///
4346
/// # Arguments

0 commit comments

Comments
 (0)