22
33import  asyncio 
44from  dataclasses  import  dataclass 
5- from  typing  import  Dict , List , Optional 
5+ from  typing  import  Dict , List , Optional ,  Union 
66
77from  vllm .outputs  import  RequestOutput 
88from  vllm .sampling_params  import  RequestOutputKind 
@@ -164,6 +164,7 @@ def process_outputs(
164164
165165            new_token_ids  =  engine_core_output .new_token_ids 
166166            finish_reason  =  engine_core_output .finish_reason 
167+             stop_reason  =  engine_core_output .stop_reason 
167168
168169            # TODO(andy): prompt logprobs + chunked prefill can 
169170            # result in engine core returning an output for a 
@@ -181,9 +182,10 @@ def process_outputs(
181182
182183            # 2) Detokenize the token ids into text and check for stop 
183184            #    strings. 
184-             stop_reason  =  req_state .detokenizer .update (new_token_ids )
185-             if  stop_reason :
185+             stop_string  =  req_state .detokenizer .update (new_token_ids )
186+             if  stop_string   and   finish_reason   !=   FinishReason . STOP :
186187                finish_reason  =  FinishReason .STOP 
188+                 stop_reason  =  stop_string 
187189
188190            # 3) Compute sample and prompt logprobs for request, 
189191            #    if required. 
@@ -250,7 +252,7 @@ def _make_request_output(
250252        request_state : RequestState ,
251253        new_token_ids : List [int ],
252254        finish_reason : Optional [FinishReason ],
253-         stop_reason : Optional [ str ],
255+         stop_reason : Union [ int ,  str ,  None ],
254256    ) ->  Optional [RequestOutput ]:
255257
256258        finished  =  finish_reason  is  not   None 
0 commit comments