fixing vllm-project#31 by converting SamplerOutput output_token to int in TTModelRunner::_make_sampler_output as expected by vLLM downstream

tstescoTT · skhorasganiTT · commit d236ccf813be · 2025-04-16T19:09:09.000Z
diff --git a/vllm/worker/tt_model_runner.py b/vllm/worker/tt_model_runner.py
@@ -351,7 +351,7 @@ def _make_sampler_output(
         zero_logprob = Logprob(0.0)
         sampler_outputs = []
         for batch_idx, seq_id in enumerate(seq_groups):
-            next_token_id = next_token_ids[batch_idx]
+            next_token_id = int(next_token_ids[batch_idx])
             seq_outputs = [SequenceOutput(seq_id, next_token_id,
                                 {next_token_id: zero_logprob})]
             sampler_outputs.append(