1111from vllm .v1 .core .kv_cache_manager import KVCacheManager
1212from vllm .v1 .core .scheduler_output import (CachedRequestData , NewRequestData ,
1313 SchedulerOutput )
14- from vllm .v1 .engine import EngineCoreOutput , EngineCoreOutputs
14+ from vllm .v1 .engine import (EngineCoreEvent , EngineCoreEventType ,
15+ EngineCoreOutput , EngineCoreOutputs )
1516from vllm .v1 .metrics .stats import SchedulerStats
1617from vllm .v1 .outputs import ModelRunnerOutput
1718from vllm .v1 .request import Request , RequestStatus
@@ -27,10 +28,12 @@ def __init__(
2728 model_config : ModelConfig ,
2829 cache_config : CacheConfig ,
2930 lora_config : Optional [LoRAConfig ],
31+ log_stats : bool ,
3032 ) -> None :
3133 self .scheduler_config = scheduler_config
3234 self .cache_config = cache_config
3335 self .lora_config = lora_config
36+ self .log_stats = log_stats
3437
3538 # Scheduling constraints.
3639 self .max_num_running_reqs = self .scheduler_config .max_num_seqs
@@ -46,7 +49,8 @@ def __init__(
4649 num_gpu_blocks = num_gpu_blocks ,
4750 max_model_len = self .max_model_len ,
4851 sliding_window = self .cache_config .sliding_window ,
49- enable_caching = self .cache_config .enable_prefix_caching )
52+ enable_caching = self .cache_config .enable_prefix_caching ,
53+ log_stats = self .log_stats )
5054 self .block_size = self .cache_config .block_size
5155
5256 # req_id -> Request
@@ -249,7 +253,7 @@ def schedule(self) -> "SchedulerOutput":
249253 self .running .append (request )
250254 if request .status == RequestStatus .WAITING :
251255 scheduled_new_reqs .append (request )
252- request . scheduled ( scheduled_timestamp )
256+ self . request_scheduled ( request , scheduled_timestamp )
253257 elif request .status == RequestStatus .PREEMPTED :
254258 scheduled_resumed_reqs .append (request )
255259 else :
@@ -546,7 +550,7 @@ def _check_stop(self, request: Request) -> bool:
546550 def add_request (self , request : Request ) -> None :
547551 self .waiting .append (request )
548552 self .requests [request .request_id ] = request
549- request . queued ( )
553+ self . request_queued ( request )
550554
551555 def finish_requests (
552556 self ,
@@ -594,7 +598,22 @@ def has_unfinished_requests(self) -> bool:
594598 def reset_prefix_cache (self ) -> bool :
595599 return self .kv_cache_manager .reset_prefix_cache ()
596600
597- def make_stats (self ) -> SchedulerStats :
601+ def request_queued (self , request : Request ):
602+ if not self .log_stats :
603+ return
604+ request .events .append (
605+ EngineCoreEvent .new_event (EngineCoreEventType .QUEUED ))
606+
607+ def request_scheduled (self , request : Request , timestamp : float ):
608+ if not self .log_stats :
609+ return
610+ request .events .append (
611+ EngineCoreEvent .new_event (EngineCoreEventType .SCHEDULED ,
612+ timestamp ))
613+
614+ def make_stats (self ) -> Optional [SchedulerStats ]:
615+ if not self .log_stats :
616+ return None
598617 return SchedulerStats (
599618 num_running_reqs = len (self .running ),
600619 num_waiting_reqs = len (self .waiting ),
0 commit comments