diff --git a/vllm/engine/output_processor/multi_step.py b/vllm/engine/output_processor/multi_step.py index 5f126c7571dc..126e7da70216 100644 --- a/vllm/engine/output_processor/multi_step.py +++ b/vllm/engine/output_processor/multi_step.py @@ -178,7 +178,7 @@ def _process_seq_outputs(self, seq: Sequence, # generates a fixed number of tokens without evaluating stopping # conditions within the block. This can cause an eos token to be # unintentionally ignored. - if not sampling_params.ignore_eos: + if not sampling_params.ignore_eos and self.detokenizer: eos_token_id = self.get_tokenizer_for_seq(seq).eos_token_id # Avoiding .index calls as exception throwing in the happy path # is expensive.