11# SPDX-License-Identifier: Apache-2.0 
22
3+ import  time 
34from  collections  import  deque 
45from  typing  import  Deque , Dict , Iterable , List , Optional , Set , Tuple , Union 
56
1011from  vllm .v1 .core .kv_cache_manager  import  KVCacheManager 
1112from  vllm .v1 .core .scheduler_output  import  (CachedRequestData , NewRequestData ,
1213                                           SchedulerOutput )
13- from  vllm .v1 .engine  import  EngineCoreOutput , EngineCoreOutputs 
14+ from  vllm .v1 .engine  import  (EngineCoreEvent , EngineCoreEventType ,
15+                             EngineCoreOutput , EngineCoreOutputs )
1416from  vllm .v1 .metrics .stats  import  SchedulerStats 
1517from  vllm .v1 .outputs  import  ModelRunnerOutput 
1618from  vllm .v1 .request  import  Request , RequestStatus 
@@ -26,10 +28,12 @@ def __init__(
2628        model_config : ModelConfig ,
2729        cache_config : CacheConfig ,
2830        lora_config : Optional [LoRAConfig ],
31+         log_stats : bool ,
2932    ) ->  None :
3033        self .scheduler_config  =  scheduler_config 
3134        self .cache_config  =  cache_config 
3235        self .lora_config  =  lora_config 
36+         self .log_stats  =  log_stats 
3337
3438        # Scheduling constraints. 
3539        self .max_num_running_reqs  =  self .scheduler_config .max_num_seqs 
@@ -45,7 +49,8 @@ def __init__(
4549            num_gpu_blocks = num_gpu_blocks ,
4650            max_model_len = self .max_model_len ,
4751            sliding_window = self .cache_config .sliding_window ,
48-             enable_caching = self .cache_config .enable_prefix_caching )
52+             enable_caching = self .cache_config .enable_prefix_caching ,
53+             log_stats = self .log_stats )
4954        self .block_size  =  self .cache_config .block_size 
5055
5156        # req_id -> Request 
@@ -107,6 +112,8 @@ def schedule(self) -> "SchedulerOutput":
107112        scheduled_encoder_inputs : Dict [str , List [int ]] =  {}
108113        encoder_budget  =  self .max_num_encoder_input_tokens 
109114
115+         scheduled_timestamp  =  time .monotonic ()
116+ 
110117        # First, schedule the RUNNING requests. 
111118        req_index  =  0 
112119        while  req_index  <  len (self .running ) and  token_budget  >  0 :
@@ -246,6 +253,7 @@ def schedule(self) -> "SchedulerOutput":
246253                self .running .append (request )
247254                if  request .status  ==  RequestStatus .WAITING :
248255                    scheduled_new_reqs .append (request )
256+                     self .request_scheduled (request , scheduled_timestamp )
249257                elif  request .status  ==  RequestStatus .PREEMPTED :
250258                    scheduled_resumed_reqs .append (request )
251259                else :
@@ -508,7 +516,8 @@ def update_from_output(
508516                        finish_reason = request .get_finished_reason (),
509517                        new_logprobs = new_logprobs ,
510518                        new_prompt_logprobs_tensors = prompt_logprobs_tensors ,
511-                         stop_reason = request .stop_reason ))
519+                         stop_reason = request .stop_reason ,
520+                         events = request .take_events ()))
512521
513522            if  not  stopped :
514523                new_running .append (request )
@@ -541,6 +550,7 @@ def _check_stop(self, request: Request) -> bool:
541550    def  add_request (self , request : Request ) ->  None :
542551        self .waiting .append (request )
543552        self .requests [request .request_id ] =  request 
553+         self .request_queued (request )
544554
545555    def  finish_requests (
546556        self ,
@@ -588,7 +598,22 @@ def has_unfinished_requests(self) -> bool:
588598    def  reset_prefix_cache (self ) ->  bool :
589599        return  self .kv_cache_manager .reset_prefix_cache ()
590600
591-     def  make_stats (self ) ->  SchedulerStats :
601+     def  request_queued (self , request : Request ):
602+         if  not  self .log_stats :
603+             return 
604+         request .events .append (
605+             EngineCoreEvent .new_event (EngineCoreEventType .QUEUED ))
606+ 
607+     def  request_scheduled (self , request : Request , timestamp : float ):
608+         if  not  self .log_stats :
609+             return 
610+         request .events .append (
611+             EngineCoreEvent .new_event (EngineCoreEventType .SCHEDULED ,
612+                                       timestamp ))
613+ 
614+     def  make_stats (self ) ->  Optional [SchedulerStats ]:
615+         if  not  self .log_stats :
616+             return  None 
592617        return  SchedulerStats (
593618            num_running_reqs = len (self .running ),
594619            num_waiting_reqs = len (self .waiting ),
0 commit comments