Skip to content

Commit 8c9da6b

Browse files
[Core] Simplify mm processing cache (#22457)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
1 parent 399d2a1 commit 8c9da6b

File tree

4 files changed

+97
-206
lines changed

4 files changed

+97
-206
lines changed

vllm/model_executor/models/qwen2_5_omni_thinker.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@ def _apply_hf_processor_main(
431431
tokenization_kwargs: Mapping[str, object],
432432
*,
433433
enable_hf_prompt_update: bool,
434-
) -> tuple[list[int], MultiModalKwargs, bool]:
434+
) -> tuple[list[int], BatchFeature, bool]:
435435
"""
436436
Qwen2.5-Omni reimplements this function to handle text only.
437437
"""
@@ -448,20 +448,20 @@ def _apply_hf_processor_main(
448448
else:
449449
prompt_ids = self._apply_hf_processor_tokens_only(prompt)
450450

451-
mm_kwargs = self._apply_hf_processor_mm_only(
451+
mm_processed_data = self._apply_hf_processor_mm_only(
452452
mm_items=mm_items,
453453
hf_processor_mm_kwargs=hf_processor_mm_kwargs,
454454
tokenization_kwargs=tokenization_kwargs,
455455
)
456456

457-
return prompt_ids, mm_kwargs, False
457+
return prompt_ids, mm_processed_data, False
458458

459459
def _apply_hf_processor_mm_only(
460460
self,
461461
mm_items: MultiModalDataItems,
462462
hf_processor_mm_kwargs: Mapping[str, object],
463463
tokenization_kwargs: Mapping[str, object],
464-
) -> MultiModalKwargs:
464+
) -> BatchFeature:
465465
"""
466466
Qwen2.5-Omni reimplements this function to handle `use_audio_in_video`.
467467
"""
@@ -473,14 +473,14 @@ def _apply_hf_processor_mm_only(
473473
assert "audio" in mm_counts
474474
mm_counts["audio"] -= mm_counts["video"]
475475

476-
_, mm_kwargs, _ = self._apply_hf_processor_text_mm(
476+
_, mm_processed_data, _ = self._apply_hf_processor_text_mm(
477477
prompt_text=self.dummy_inputs.get_dummy_text(mm_counts),
478478
mm_items=mm_items,
479479
hf_processor_mm_kwargs=hf_processor_mm_kwargs,
480480
tokenization_kwargs=tokenization_kwargs,
481481
)
482482

483-
return mm_kwargs
483+
return mm_processed_data
484484

485485
def _validate_mm_placeholders(
486486
self,

vllm/model_executor/models/transformers.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222
import regex as re
2323
import torch
2424
from torch import nn
25-
from transformers import AutoModel, PretrainedConfig, PreTrainedModel
25+
from transformers import (AutoModel, BatchFeature, PretrainedConfig,
26+
PreTrainedModel)
2627
from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS
2728

2829
from vllm.attention import Attention
@@ -269,7 +270,7 @@ def _apply_hf_processor_text_mm(
269270
mm_items: MultiModalDataItems,
270271
hf_processor_mm_kwargs: Mapping[str, object],
271272
tokenization_kwargs: Mapping[str, object],
272-
):
273+
) -> tuple[list[int], BatchFeature, bool]:
273274
"""
274275
Apply the HF processor on the prompt text and multi-modal data
275276
together.

0 commit comments

Comments
 (0)