File tree Expand file tree Collapse file tree 3 files changed +19
-4
lines changed
bindings/python/rust/llm/block_manager/vllm/connector Expand file tree Collapse file tree 3 files changed +19
-4
lines changed Original file line number Diff line number Diff line change @@ -139,6 +139,11 @@ impl Leader for KvConnectorLeader {
139139 . lock ( )
140140 . map_err ( |e| anyhow:: anyhow!( "Failed to lock slot: {}" , e) ) ?;
141141
142+ if slot. state ( ) == SlotState :: Prefilling {
143+ tracing:: warn!( "slot is in the Prefilled state; this seems like we need to reset the slot and start over" ) ;
144+ slot. reset ( ) ;
145+ }
146+
142147 // early exit if we cannot match full block
143148 if ( slot. sequence ( ) . total_tokens ( ) - num_computed_tokens) < self . block_size {
144149 return Ok ( ( 0 , false ) ) ;
@@ -319,7 +324,7 @@ impl Leader for KvConnectorLeader {
319324
320325 // todo: we probably need to reset the slot state and reload it from `cache_req`; however, we do not
321326 // know if it will take another pass at `get_num_new_matched_tokens` or `update_state_after_alloc`.
322- slot. reset_after_preemption ( ) ? ;
327+ slot. reset_after_preemption ( ) ;
323328
324329 // note, we can not trigger onboarding here -- perhaps we are supposed to or perhaps will get another
325330 // pass at `get_num_new_matched_tokens` or `update_state_after_alloc`.
Original file line number Diff line number Diff line change @@ -127,7 +127,10 @@ pub trait Slot: std::fmt::Debug {
127127 fn record_cached_disk_tokens ( & mut self , num_tokens : usize ) ;
128128
129129 /// Reset the slot after preemption.
130- fn reset_after_preemption ( & mut self ) -> Result < ( ) , SlotError > ;
130+ fn reset_after_preemption ( & mut self ) ;
131+
132+ /// Reset the slot.
133+ fn reset ( & mut self ) ;
131134}
132135
133136pub trait ExternallyManagedDeviceSlot : Slot {
@@ -347,7 +350,7 @@ impl Slot for VllmConnectorSlot {
347350 self . state
348351 }
349352
350- fn reset_after_preemption ( & mut self ) -> Result < ( ) , SlotError > {
353+ fn reset_after_preemption ( & mut self ) {
351354 assert ! ( self . staging_from_disk. is_none( ) ) ;
352355 assert ! ( self . staging_from_host. is_none( ) ) ;
353356 assert ! ( self . pending_operations. is_none( ) ) ;
@@ -360,7 +363,11 @@ impl Slot for VllmConnectorSlot {
360363 self . tokens_cached_from_device = 0 ;
361364 self . tokens_cached_from_host = 0 ;
362365 self . tokens_cached_from_disk = 0 ;
363- Ok ( ( ) )
366+ }
367+
368+ fn reset ( & mut self ) {
369+ self . reset_after_preemption ( ) ;
370+ self . state = SlotState :: Initialized ;
364371 }
365372
366373 fn record_cached_device_tokens ( & mut self , num_tokens : usize ) {
Original file line number Diff line number Diff line change @@ -38,6 +38,9 @@ pub mod types;
3838#[ cfg( feature = "block-manager" ) ]
3939pub mod block_manager;
4040
41+ #[ cfg( feature = "block-manager" ) ]
42+ pub mod integrations;
43+
4144/// Reads a JSON file, extracts a specific field, and deserializes it into type T.
4245///
4346/// # Arguments
You can’t perform that action at this time.
0 commit comments