File tree Expand file tree Collapse file tree 3 files changed +6
-15
lines changed
vllm/model_executor/models Expand file tree Collapse file tree 3 files changed +6
-15
lines changed Original file line number Diff line number Diff line change @@ -83,8 +83,8 @@ def _test_processing_correctness(
8383 }
8484
8585 tokenizer_encode_kwargs = {}
86- if model_config .hf_config .model_type in ("mllama" , "whisper" ):
87- # For some encoder-decoder models, tokenizer will always add bos_token
86+ if model_config .hf_config .model_type in ("mllama" , "whisper" , "ultravox" ):
87+ # For some multimodal models, tokenizer will always add bos_token
8888 # at the beginning of prompt by default, causing hf_processor outputs
8989 # incorrect token ids. So we need use `add_special_tokens=False` here
9090 # to leave bos_token to be added by the processor.
@@ -172,7 +172,7 @@ def _test_processing_correctness(
172172 "Qwen/Qwen2-VL-2B-Instruct" ,
173173 "Qwen/Qwen2.5-VL-3B-Instruct" ,
174174 "Qwen/Qwen2-Audio-7B-Instruct" ,
175- "fixie-ai/ultravox-v0_5-llama-3_2-1b " ,
175+ "fixie-ai/ultravox-v0_4 " ,
176176 "openai/whisper-large-v3" ,
177177])
178178@pytest .mark .parametrize ("hit_rate" , [0.3 , 0.5 , 1.0 ])
Original file line number Diff line number Diff line change @@ -284,7 +284,7 @@ def check_available_online(
284284 "Qwen2VLForConditionalGeneration" : _HfExamplesInfo ("Qwen/Qwen2-VL-2B-Instruct" ), # noqa: E501
285285 "Qwen2_5_VLForConditionalGeneration" : _HfExamplesInfo ("Qwen/Qwen2.5-VL-3B-Instruct" , # noqa: E501
286286 min_transformers_version = "4.49" ), # noqa: E501
287- "UltravoxModel" : _HfExamplesInfo ("fixie-ai/ultravox-v0_5-llama-3_2-1b " ,
287+ "UltravoxModel" : _HfExamplesInfo ("fixie-ai/ultravox-v0_4 " ,
288288 trust_remote_code = True ),
289289 # [Encoder-decoder]
290290 "MllamaForConditionalGeneration" : _HfExamplesInfo ("meta-llama/Llama-3.2-11B-Vision-Instruct" ), # noqa: E501
Original file line number Diff line number Diff line change @@ -146,7 +146,8 @@ def _call_hf_processor(
146146 ) -> BatchFeature :
147147 # Text-only input not supported in composite processor
148148 if not mm_data or not mm_data .get ("audios" , []):
149- prompt_ids = self .info .get_tokenizer ().encode (prompt )
149+ prompt_ids = self .info .get_tokenizer ().encode (
150+ prompt , add_special_tokens = False )
150151 prompt_ids = self ._apply_hf_processor_tokens_only (prompt_ids )
151152 return BatchFeature (dict (input_ids = [prompt_ids ]), tensor_type = "pt" )
152153
@@ -185,16 +186,6 @@ def _call_hf_processor(
185186 )
186187 return BatchFeature (combined_outputs )
187188
188- def _apply_hf_processor_tokens_only (
189- self ,
190- prompt_tokens : list [int ],
191- ) -> list [int ]:
192- # HF processor omits bos_token_id by setting add_special_tokens=False
193- tokenizer = self .info .get_tokenizer ()
194- assert prompt_tokens [0 ] == tokenizer .bos_token_id
195-
196- return prompt_tokens [1 :]
197-
198189 def _get_mm_fields_config (
199190 self ,
200191 hf_inputs : BatchFeature ,
You can’t perform that action at this time.
0 commit comments