@@ -34,7 +34,11 @@ class Metrics:
3434    See https://prometheus.github.io/client_python/multiprocess/ for more 
3535    details on limitations. 
3636    """ 
37+ 
3738    labelname_finish_reason  =  "finished_reason" 
39+     labelname_waiting_lora_adapters  =  "waiting_lora_adapters" 
40+     labelname_running_lora_adapters  =  "running_lora_adapters" 
41+     labelname_max_lora  =  "max_lora" 
3842    _gauge_cls  =  prometheus_client .Gauge 
3943    _counter_cls  =  prometheus_client .Counter 
4044    _histogram_cls  =  prometheus_client .Histogram 
@@ -55,6 +59,16 @@ def __init__(self, labelnames: List[str], max_model_len: int):
5559            documentation = "Number of requests waiting to be processed." ,
5660            labelnames = labelnames ,
5761            multiprocess_mode = "sum" )
62+         self .gauge_lora_info  =  self ._gauge_cls (
63+             name = "vllm:lora_requests_info" ,
64+             documentation = "Running stats on lora requests." ,
65+             labelnames = [
66+                 self .labelname_running_lora_adapters ,
67+                 self .labelname_max_lora ,
68+                 self .labelname_waiting_lora_adapters ,
69+             ],
70+             multiprocess_mode = "livemostrecent" ,
71+         )
5872        self .gauge_scheduler_swapped  =  self ._gauge_cls (
5973            name = "vllm:num_requests_swapped" ,
6074            documentation = "Number of requests swapped to CPU." ,
@@ -426,6 +440,9 @@ def _log_histogram(self, histogram, data: Union[List[int],
426440        for  datum  in  data :
427441            histogram .labels (** self .labels ).observe (datum )
428442
443+     def  _log_gauge_string (self , gauge , data : Dict [str , str ]) ->  None :
444+         gauge .labels (** data ).set (1 )
445+ 
429446    def  _log_prometheus (self , stats : Stats ) ->  None :
430447        # System state data 
431448        self ._log_gauge (self .metrics .gauge_scheduler_running ,
@@ -442,7 +459,17 @@ def _log_prometheus(self, stats: Stats) -> None:
442459                        stats .cpu_prefix_cache_hit_rate )
443460        self ._log_gauge (self .metrics .gauge_gpu_prefix_cache_hit_rate ,
444461                        stats .gpu_prefix_cache_hit_rate )
445- 
462+         # Including max-lora in metric, in future this property of lora 
463+         # config maybe extended to be dynamic. 
464+         lora_info  =  {
465+             self .metrics .labelname_running_lora_adapters :
466+             "," .join (stats .running_lora_adapters ),
467+             self .metrics .labelname_waiting_lora_adapters :
468+             "," .join (stats .waiting_lora_adapters ),
469+             self .metrics .labelname_max_lora :
470+             stats .max_lora ,
471+         }
472+         self ._log_gauge_string (self .metrics .gauge_lora_info , lora_info )
446473        # Iteration level data 
447474        self ._log_counter (self .metrics .counter_num_preemption ,
448475                          stats .num_preemption_iter )
0 commit comments