@@ -197,24 +197,35 @@ def __init__(self, labelnames: List[str], vllm_config: VllmConfig):
197197 "Histogram of time spent in DECODE phase for request." ,
198198 labelnames = labelnames ,
199199 buckets = request_latency_buckets )
200+ # Deprecated in 0.8 - duplicates vllm:request_queue_time_seconds:
201+ # TODO: in 0.9, only enable if show_hidden_metrics=True
200202 self .histogram_time_in_queue_request = self ._histogram_cls (
201203 name = "vllm:time_in_queue_requests" ,
202- documentation =
203- "Histogram of time the request spent in the queue in seconds." ,
204+ documentation = (
205+ "Histogram of time the request spent in the queue in seconds. "
206+ "DEPRECATED: use vllm:request_queue_time_seconds instead." ),
204207 labelnames = labelnames ,
205208 buckets = request_latency_buckets )
209+
210+ # Deprecated in 0.8 - use prefill/decode/inference time metrics
211+ # TODO: in 0.9, only enable if show_hidden_metrics=True
206212 self .histogram_model_forward_time_request = self ._histogram_cls (
207213 name = "vllm:model_forward_time_milliseconds" ,
208- documentation =
209- "Histogram of time spent in the model forward pass in ms." ,
214+ documentation = (
215+ "Histogram of time spent in the model forward pass in ms. "
216+ "DEPRECATED: use prefill/decode/inference time metrics instead."
217+ ),
210218 labelnames = labelnames ,
211219 buckets = build_1_2_3_5_8_buckets (3000 ))
212220 self .histogram_model_execute_time_request = self ._histogram_cls (
213221 name = "vllm:model_execute_time_milliseconds" ,
214- documentation =
215- "Histogram of time spent in the model execute function in ms." ,
222+ documentation = (
223+ "Histogram of time spent in the model execute function in ms."
224+ "DEPRECATED: use prefill/decode/inference time metrics instead."
225+ ),
216226 labelnames = labelnames ,
217227 buckets = build_1_2_3_5_8_buckets (3000 ))
228+
218229 # Metadata
219230 self .histogram_num_prompt_tokens_request = self ._histogram_cls (
220231 name = "vllm:request_prompt_tokens" ,
0 commit comments