Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions tests/models/multimodal/processing/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ def _test_processing_correctness(
}

tokenizer_encode_kwargs = {}
if model_config.hf_config.model_type in ("mllama", "whisper"):
# For some encoder-decoder models, tokenizer will always add bos_token
if model_config.hf_config.model_type in ("mllama", "whisper", "ultravox"):
# For some multimodal models, tokenizer will always add bos_token
# at the beginning of prompt by default, causing hf_processor outputs
# incorrect token ids. So we need use `add_special_tokens=False` here
# to leave bos_token to be added by the processor.
Expand Down Expand Up @@ -172,7 +172,7 @@ def _test_processing_correctness(
"Qwen/Qwen2-VL-2B-Instruct",
"Qwen/Qwen2.5-VL-3B-Instruct",
"Qwen/Qwen2-Audio-7B-Instruct",
"fixie-ai/ultravox-v0_5-llama-3_2-1b",
"fixie-ai/ultravox-v0_4",
"openai/whisper-large-v3",
])
@pytest.mark.parametrize("hit_rate", [0.3, 0.5, 1.0])
Expand Down
2 changes: 1 addition & 1 deletion tests/models/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def check_available_online(
"Qwen2VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2-VL-2B-Instruct"), # noqa: E501
"Qwen2_5_VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2.5-VL-3B-Instruct", # noqa: E501
min_transformers_version="4.49"), # noqa: E501
"UltravoxModel": _HfExamplesInfo("fixie-ai/ultravox-v0_5-llama-3_2-1b",
"UltravoxModel": _HfExamplesInfo("fixie-ai/ultravox-v0_4",
trust_remote_code=True),
# [Encoder-decoder]
"MllamaForConditionalGeneration": _HfExamplesInfo("meta-llama/Llama-3.2-11B-Vision-Instruct"), # noqa: E501
Expand Down
13 changes: 2 additions & 11 deletions vllm/model_executor/models/ultravox.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,8 @@ def _call_hf_processor(
) -> BatchFeature:
# Text-only input not supported in composite processor
if not mm_data or not mm_data.get("audios", []):
prompt_ids = self.info.get_tokenizer().encode(prompt)
prompt_ids = self.info.get_tokenizer().encode(
prompt, add_special_tokens=False)
prompt_ids = self._apply_hf_processor_tokens_only(prompt_ids)
return BatchFeature(dict(input_ids=[prompt_ids]), tensor_type="pt")

Expand Down Expand Up @@ -185,16 +186,6 @@ def _call_hf_processor(
)
return BatchFeature(combined_outputs)

def _apply_hf_processor_tokens_only(
self,
prompt_tokens: list[int],
) -> list[int]:
# HF processor omits bos_token_id by setting add_special_tokens=False
tokenizer = self.info.get_tokenizer()
assert prompt_tokens[0] == tokenizer.bos_token_id

return prompt_tokens[1:]

def _get_mm_fields_config(
self,
hf_inputs: BatchFeature,
Expand Down