66from ._utils import get_simple_chat_template
77
88
9- IGNORE_FIELDS = [
10- "metrics.vllm.latency.ttft" ,
11- "metrics.vllm.latency.queue" ,
12- "metrics.vllm.latency.prefill" ,
13- "metrics.vllm.latency.decode" ,
14- "metrics.vllm.latency.inference" ,
15- "metrics.vllm.latency.model_forward" ,
16- "metrics.vllm.latency.model_execute" ,
17- ]
9+ IGNORE_FIELDS = []
1810
1911
2012@pytest .mark .snapshot (ignores = IGNORE_FIELDS )
@@ -41,7 +33,16 @@ def test_llmobs_basic(llmobs_events, mock_tracer, opt_125m_llm):
4133 "finish_reason" : "length" ,
4234 "num_cached_tokens" : 0 ,
4335 },
44- token_metrics = {"input_tokens" : 6 , "output_tokens" : 8 , "total_tokens" : 14 },
36+ token_metrics = {
37+ "input_tokens" : 6 ,
38+ "output_tokens" : 8 ,
39+ "total_tokens" : 14 ,
40+ "time_to_first_token" : mock .ANY ,
41+ "time_in_queue" : mock .ANY ,
42+ "time_in_model_prefill" : mock .ANY ,
43+ "time_in_model_decode" : mock .ANY ,
44+ "time_in_model_inference" : mock .ANY ,
45+ },
4546 tags = {"ml_app" : "<ml-app-name>" , "service" : "tests.contrib.vllm" },
4647 )
4748 assert llmobs_events [0 ] == expected
@@ -95,7 +96,16 @@ def test_llmobs_chat(llmobs_events, mock_tracer, opt_125m_llm):
9596 "finish_reason" : "length" ,
9697 "num_cached_tokens" : mock .ANY ,
9798 },
98- token_metrics = {"input_tokens" : 37 , "output_tokens" : 16 , "total_tokens" : 53 },
99+ token_metrics = {
100+ "input_tokens" : 37 ,
101+ "output_tokens" : 16 ,
102+ "total_tokens" : 53 ,
103+ "time_to_first_token" : mock .ANY ,
104+ "time_in_queue" : mock .ANY ,
105+ "time_in_model_prefill" : mock .ANY ,
106+ "time_in_model_decode" : mock .ANY ,
107+ "time_in_model_inference" : mock .ANY ,
108+ },
99109 tags = {"ml_app" : "<ml-app-name>" , "service" : "tests.contrib.vllm" },
100110 )
101111 assert llmobs_events [0 ] == expected
@@ -128,7 +138,15 @@ def test_llmobs_classify(llmobs_events, mock_tracer, bge_reranker_llm):
128138 input_documents = [{"text" : prompt }],
129139 output_value = "[1 embedding(s) returned with size 1]" ,
130140 metadata = {"embedding_dim" : 1 , "num_cached_tokens" : 0 },
131- token_metrics = {"input_tokens" : 7 , "output_tokens" : 0 , "total_tokens" : 7 },
141+ token_metrics = {
142+ "input_tokens" : 7 ,
143+ "output_tokens" : 0 ,
144+ "total_tokens" : 7 ,
145+ "time_to_first_token" : mock .ANY ,
146+ "time_in_queue" : mock .ANY ,
147+ "time_in_model_prefill" : mock .ANY ,
148+ "time_in_model_inference" : mock .ANY ,
149+ },
132150 tags = {"ml_app" : "<ml-app-name>" , "service" : "tests.contrib.vllm" },
133151 )
134152 assert event == expected
@@ -161,7 +179,15 @@ def test_llmobs_embed(llmobs_events, mock_tracer, e5_small_llm):
161179 input_documents = [{"text" : prompt }],
162180 output_value = "[1 embedding(s) returned with size 384]" ,
163181 metadata = {"embedding_dim" : 384 , "num_cached_tokens" : 0 },
164- token_metrics = {"input_tokens" : 7 , "output_tokens" : 0 , "total_tokens" : 7 },
182+ token_metrics = {
183+ "input_tokens" : 7 ,
184+ "output_tokens" : 0 ,
185+ "total_tokens" : 7 ,
186+ "time_to_first_token" : mock .ANY ,
187+ "time_in_queue" : mock .ANY ,
188+ "time_in_model_prefill" : mock .ANY ,
189+ "time_in_model_inference" : mock .ANY ,
190+ },
165191 tags = {"ml_app" : "<ml-app-name>" , "service" : "tests.contrib.vllm" },
166192 )
167193 assert event == expected
@@ -194,7 +220,15 @@ def test_llmobs_reward(llmobs_events, mock_tracer, bge_reranker_llm):
194220 input_documents = [{"text" : prompt }],
195221 output_value = "[7 embedding(s) returned with size 1024]" ,
196222 metadata = {"embedding_dim" : 1024 , "num_cached_tokens" : 0 },
197- token_metrics = {"input_tokens" : 7 , "output_tokens" : 0 , "total_tokens" : 7 },
223+ token_metrics = {
224+ "input_tokens" : 7 ,
225+ "output_tokens" : 0 ,
226+ "total_tokens" : 7 ,
227+ "time_to_first_token" : mock .ANY ,
228+ "time_in_queue" : mock .ANY ,
229+ "time_in_model_prefill" : mock .ANY ,
230+ "time_in_model_inference" : mock .ANY ,
231+ },
198232 tags = {"ml_app" : "<ml-app-name>" , "service" : "tests.contrib.vllm" },
199233 )
200234 assert event == expected
@@ -223,11 +257,19 @@ def test_llmobs_score(llmobs_events, mock_tracer, bge_reranker_llm):
223257 "input_tokens" : 19 ,
224258 "output_tokens" : 0 ,
225259 "total_tokens" : 19 ,
260+ "time_to_first_token" : mock .ANY ,
261+ "time_in_queue" : mock .ANY ,
262+ "time_in_model_prefill" : mock .ANY ,
263+ "time_in_model_inference" : mock .ANY ,
226264 },
227265 "[0, 4865, 83, 70, 10323, 111, 9942, 32, 2, 2, 581, 10323, 111, 9942, 83, 7270, 5, 2]" : {
228266 "input_tokens" : 18 ,
229267 "output_tokens" : 0 ,
230268 "total_tokens" : 18 ,
269+ "time_to_first_token" : mock .ANY ,
270+ "time_in_queue" : mock .ANY ,
271+ "time_in_model_prefill" : mock .ANY ,
272+ "time_in_model_inference" : mock .ANY ,
231273 },
232274 }
233275
0 commit comments