[gpt-oss][Bugfix]Fix streamableparser for missing handling of certain token_ids (#24306)

chaunceyjiang · web-flow · commit 23a6c5280e93 · 2025-09-05T10:26:00.000-07:00
Signed-off-by: chaunceyjiang &lt;chaunceyjiang@gmail.com&gt;
diff --git a/vllm/entrypoints/context.py b/vllm/entrypoints/context.py
@@ -238,11 +238,11 @@ def append_output(self, output) -> None:
             # (finished=True), then the next token processed will mark the
             # beginning of a new message
             self.first_tok_of_message = output.finished
-            tok = output.outputs[0].token_ids[0]
-            self.parser.process(tok)
+            for tok in output.outputs[0].token_ids:
+                self.parser.process(tok)
             self._update_num_output_tokens(output.outputs[0].token_ids)
             # Check if the current token is part of reasoning content
-            self._update_num_reasoning_tokens([tok])
+            self._update_num_reasoning_tokens(output.outputs[0].token_ids)
             self.last_tok = tok
         else:
             # Handle the case of tool output in direct message format