@@ -431,7 +431,7 @@ def _apply_hf_processor_main(
431431 tokenization_kwargs : Mapping [str , object ],
432432 * ,
433433 enable_hf_prompt_update : bool ,
434- ) -> tuple [list [int ], MultiModalKwargs , bool ]:
434+ ) -> tuple [list [int ], BatchFeature , bool ]:
435435 """
436436 Qwen2.5-Omni reimplements this function to handle text only.
437437 """
@@ -448,20 +448,20 @@ def _apply_hf_processor_main(
448448 else :
449449 prompt_ids = self ._apply_hf_processor_tokens_only (prompt )
450450
451- mm_kwargs = self ._apply_hf_processor_mm_only (
451+ mm_processed_data = self ._apply_hf_processor_mm_only (
452452 mm_items = mm_items ,
453453 hf_processor_mm_kwargs = hf_processor_mm_kwargs ,
454454 tokenization_kwargs = tokenization_kwargs ,
455455 )
456456
457- return prompt_ids , mm_kwargs , False
457+ return prompt_ids , mm_processed_data , False
458458
459459 def _apply_hf_processor_mm_only (
460460 self ,
461461 mm_items : MultiModalDataItems ,
462462 hf_processor_mm_kwargs : Mapping [str , object ],
463463 tokenization_kwargs : Mapping [str , object ],
464- ) -> MultiModalKwargs :
464+ ) -> BatchFeature :
465465 """
466466 Qwen2.5-Omni reimplements this function to handle `use_audio_in_video`.
467467 """
@@ -473,14 +473,14 @@ def _apply_hf_processor_mm_only(
473473 assert "audio" in mm_counts
474474 mm_counts ["audio" ] -= mm_counts ["video" ]
475475
476- _ , mm_kwargs , _ = self ._apply_hf_processor_text_mm (
476+ _ , mm_processed_data , _ = self ._apply_hf_processor_text_mm (
477477 prompt_text = self .dummy_inputs .get_dummy_text (mm_counts ),
478478 mm_items = mm_items ,
479479 hf_processor_mm_kwargs = hf_processor_mm_kwargs ,
480480 tokenization_kwargs = tokenization_kwargs ,
481481 )
482482
483- return mm_kwargs
483+ return mm_processed_data
484484
485485 def _validate_mm_placeholders (
486486 self ,
0 commit comments