We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 7812bcf commit 23a6c52Copy full SHA for 23a6c52
vllm/entrypoints/context.py
@@ -238,11 +238,11 @@ def append_output(self, output) -> None:
238
# (finished=True), then the next token processed will mark the
239
# beginning of a new message
240
self.first_tok_of_message = output.finished
241
- tok = output.outputs[0].token_ids[0]
242
- self.parser.process(tok)
+ for tok in output.outputs[0].token_ids:
+ self.parser.process(tok)
243
self._update_num_output_tokens(output.outputs[0].token_ids)
244
# Check if the current token is part of reasoning content
245
- self._update_num_reasoning_tokens([tok])
+ self._update_num_reasoning_tokens(output.outputs[0].token_ids)
246
self.last_tok = tok
247
else:
248
# Handle the case of tool output in direct message format
0 commit comments