77This module defines the default health check payload for vLLM backends.
88"""
99
10+ import logging
11+
1012from dynamo .health_check import HealthCheckPayload
1113
14+ logger = logging .getLogger (__name__ )
15+
16+
17+ def _get_bos_token_id_from_engine (engine_client ) -> int :
18+ """
19+ Extract BOS token ID from the vLLM engine client's tokenizer if available.
20+
21+ Args:
22+ engine_client: vLLM AsyncLLM engine client
23+
24+ Returns:
25+ BOS token ID from the model's tokenizer, or 1 as fallback
26+ """
27+ if engine_client is None :
28+ return 1
29+
30+ try :
31+ tokenizer_group = getattr (engine_client , "tokenizer" , None )
32+ if tokenizer_group :
33+ tokenizer = getattr (tokenizer_group , "tokenizer" , None )
34+ if tokenizer :
35+ bos_token_id = getattr (tokenizer , "bos_token_id" , None )
36+ if bos_token_id is not None :
37+ logger .info (
38+ f"Using model's BOS token ID for health check: { bos_token_id } "
39+ )
40+ return int (bos_token_id )
41+ except Exception as e :
42+ logger .debug (f"Failed to get BOS token from engine: { e } " )
43+
44+ logger .debug ("Using default BOS token ID (1) for health check" )
45+ return 1
46+
1247
1348class VllmHealthCheckPayload (HealthCheckPayload ):
1449 """
@@ -17,14 +52,20 @@ class VllmHealthCheckPayload(HealthCheckPayload):
1752 Provides vLLM defaults and inherits environment override support from base class.
1853 """
1954
20- def __init__ (self ):
55+ def __init__ (self , engine_client = None ):
2156 """
2257 Initialize vLLM health check payload with vLLM-specific defaults.
58+
59+ Args:
60+ engine_client: Optional vLLM AsyncLLM engine client to extract BOS token from.
61+ If provided, will attempt to use the model's actual BOS token.
2362 """
63+ bos_token_id = _get_bos_token_id_from_engine (engine_client )
64+
2465 # Set vLLM default payload - minimal request that completes quickly
2566 # The handler expects token_ids, sampling_options, and stop_conditions
2667 self .default_payload = {
27- "token_ids" : [1 ], # Single token for minimal processing
68+ "token_ids" : [bos_token_id ],
2869 "sampling_options" : {
2970 "max_tokens" : 1 ,
3071 "temperature" : 0.0 ,
@@ -38,3 +79,44 @@ def __init__(self):
3879 },
3980 }
4081 super ().__init__ ()
82+
83+
84+ class VllmPrefillHealthCheckPayload (HealthCheckPayload ):
85+ """
86+ vLLM-specific health check payload for prefill workers in disaggregated mode.
87+
88+ The prefill handler expects a different structure with 'request_id' and 'sampling_params'.
89+ """
90+
91+ def __init__ (self , engine_client = None ):
92+ """
93+ Initialize vLLM prefill health check payload with proper structure.
94+
95+ Args:
96+ engine_client: Optional vLLM AsyncLLM engine client to extract BOS token from.
97+ If provided, will attempt to use the model's actual BOS token.
98+ """
99+ bos_token_id = _get_bos_token_id_from_engine (engine_client )
100+
101+ # Prefill handler expects request_id, token_ids, and sampling_params
102+ # The sampling_params are converted via msgspec in the handler
103+ self .default_payload = {
104+ "request_id" : "health_check" ,
105+ "token_ids" : [bos_token_id ],
106+ "sampling_params" : {
107+ "max_tokens" : 1 ,
108+ "min_tokens" : 1 ,
109+ "temperature" : 0.0 ,
110+ "top_p" : 1.0 ,
111+ "top_k" : - 1 ,
112+ "detokenize" : False ,
113+ "include_stop_str_in_output" : False ,
114+ "ignore_eos" : False ,
115+ "extra_args" : {
116+ "kv_transfer_params" : {
117+ "do_remote_decode" : True ,
118+ }
119+ },
120+ },
121+ }
122+ super ().__init__ ()
0 commit comments