@@ -74,31 +74,51 @@ def __init__(self, labelnames: List[str], vllm_config: VllmConfig):
7474            ],
7575            multiprocess_mode = "livemostrecent" ,
7676        )
77+ 
78+         # Deprecated in 0.8 - KV cache offloading is not used in V1 
79+         # TODO: in 0.9, only enable if show_hidden_metrics=True 
7780        self .gauge_scheduler_swapped  =  self ._gauge_cls (
7881            name = "vllm:num_requests_swapped" ,
79-             documentation = "Number of requests swapped to CPU." ,
82+             documentation = (
83+                 "Number of requests swapped to CPU. " 
84+                 "DEPRECATED: KV cache offloading is not used in V1" ),
8085            labelnames = labelnames ,
8186            multiprocess_mode = "sum" )
87+ 
8288        #   KV Cache Usage in % 
8389        self .gauge_gpu_cache_usage  =  self ._gauge_cls (
8490            name = "vllm:gpu_cache_usage_perc" ,
8591            documentation = "GPU KV-cache usage. 1 means 100 percent usage." ,
8692            labelnames = labelnames ,
8793            multiprocess_mode = "sum" )
94+ 
95+         # Deprecated in 0.8 - KV cache offloading is not used in V1 
96+         # TODO: in 0.9, only enable if show_hidden_metrics=True 
8897        self .gauge_cpu_cache_usage  =  self ._gauge_cls (
8998            name = "vllm:cpu_cache_usage_perc" ,
90-             documentation = "CPU KV-cache usage. 1 means 100 percent usage." ,
99+             documentation = (
100+                 "CPU KV-cache usage. 1 means 100 percent usage. " 
101+                 "DEPRECATED: KV cache offloading is not used in V1" ),
91102            labelnames = labelnames ,
92103            multiprocess_mode = "sum" )
93-         #   Prefix caching block hit rate 
104+ 
105+         # Deprecated in 0.8 - KV cache offloading is not used in V1 
106+         # TODO: in 0.9, only enable if show_hidden_metrics=True 
94107        self .gauge_cpu_prefix_cache_hit_rate  =  self ._gauge_cls (
95108            name = "vllm:cpu_prefix_cache_hit_rate" ,
96-             documentation = "CPU prefix cache block hit rate." ,
109+             documentation = (
110+                 "CPU prefix cache block hit rate. " 
111+                 "DEPRECATED: KV cache offloading is not used in V1" ),
97112            labelnames = labelnames ,
98113            multiprocess_mode = "sum" )
114+ 
115+         # Deprecated in 0.8 - replaced by queries+hits counters in V1 
116+         # TODO: in 0.9, only enable if show_hidden_metrics=True 
99117        self .gauge_gpu_prefix_cache_hit_rate  =  self ._gauge_cls (
100118            name = "vllm:gpu_prefix_cache_hit_rate" ,
101-             documentation = "GPU prefix cache block hit rate." ,
119+             documentation = ("GPU prefix cache block hit rate. " 
120+                            "DEPRECATED: use vllm:gpu_prefix_cache_queries and " 
121+                            "vllm:gpu_prefix_cache_queries in V1" ),
102122            labelnames = labelnames ,
103123            multiprocess_mode = "sum" )
104124
0 commit comments