@@ -34,7 +34,11 @@ class Metrics:
3434 See https://prometheus.github.io/client_python/multiprocess/ for more
3535 details on limitations.
3636 """
37+
3738 labelname_finish_reason = "finished_reason"
39+ labelname_waiting_lora_adapters = "waiting_lora_adapters"
40+ labelname_running_lora_adapters = "running_lora_adapters"
41+ labelname_max_lora = "max_lora"
3842 _gauge_cls = prometheus_client .Gauge
3943 _counter_cls = prometheus_client .Counter
4044 _histogram_cls = prometheus_client .Histogram
@@ -55,6 +59,16 @@ def __init__(self, labelnames: List[str], max_model_len: int):
5559 documentation = "Number of requests waiting to be processed." ,
5660 labelnames = labelnames ,
5761 multiprocess_mode = "sum" )
62+ self .gauge_lora_info = self ._gauge_cls (
63+ name = "vllm:lora_requests_info" ,
64+ documentation = "Running stats on lora requests." ,
65+ labelnames = [
66+ self .labelname_running_lora_adapters ,
67+ self .labelname_max_lora ,
68+ self .labelname_waiting_lora_adapters ,
69+ ],
70+ multiprocess_mode = "livemostrecent" ,
71+ )
5872 self .gauge_scheduler_swapped = self ._gauge_cls (
5973 name = "vllm:num_requests_swapped" ,
6074 documentation = "Number of requests swapped to CPU." ,
@@ -426,6 +440,9 @@ def _log_histogram(self, histogram, data: Union[List[int],
426440 for datum in data :
427441 histogram .labels (** self .labels ).observe (datum )
428442
443+ def _log_gauge_string (self , gauge , data : Dict [str , str ]) -> None :
444+ gauge .labels (** data ).set (1 )
445+
429446 def _log_prometheus (self , stats : Stats ) -> None :
430447 # System state data
431448 self ._log_gauge (self .metrics .gauge_scheduler_running ,
@@ -442,7 +459,17 @@ def _log_prometheus(self, stats: Stats) -> None:
442459 stats .cpu_prefix_cache_hit_rate )
443460 self ._log_gauge (self .metrics .gauge_gpu_prefix_cache_hit_rate ,
444461 stats .gpu_prefix_cache_hit_rate )
445-
462+ # Including max-lora in metric, in future this property of lora
463+ # config maybe extended to be dynamic.
464+ lora_info = {
465+ self .metrics .labelname_running_lora_adapters :
466+ "," .join (stats .running_lora_adapters ),
467+ self .metrics .labelname_waiting_lora_adapters :
468+ "," .join (stats .waiting_lora_adapters ),
469+ self .metrics .labelname_max_lora :
470+ stats .max_lora ,
471+ }
472+ self ._log_gauge_string (self .metrics .gauge_lora_info , lora_info )
446473 # Iteration level data
447474 self ._log_counter (self .metrics .counter_num_preemption ,
448475 stats .num_preemption_iter )
0 commit comments