File tree Expand file tree Collapse file tree 2 files changed +5
-1
lines changed
tensorrt_llm/_torch/models Expand file tree Collapse file tree 2 files changed +5
-1
lines changed Original file line number Diff line number Diff line change 88| LLaVA-NeXT | Yes | Yes | Yes | No |
99| Llama 4 | Yes | Yes | No | No |
1010| Mistral-Small-3.1 | Yes | Yes | No | No |
11+ | Nano-v2-VLM | Yes | Yes | Yes | No |
1112| Phi-4-multimodal | Yes | Yes | No | No |
1213| Qwen2-VL | Yes | Yes | Yes | No |
1314| Qwen2.5-VL | Yes | Yes | Yes | No |
Original file line number Diff line number Diff line change 2020from ..attention_backend import AttentionMetadata
2121from ..model_config import ModelConfig
2222from .modeling_auto import AutoModelForCausalLM
23- from .modeling_multimodal_utils import fuse_input_embeds
23+ from .modeling_multimodal_utils import (find_uncached_mm_embeds ,
24+ fuse_input_embeds )
2425from .modeling_radio import RADIOVisionModel
2526from .modeling_utils import register_auto_model
2627
@@ -394,6 +395,8 @@ def forward(
394395 multimodal_param .multimodal_data ["multimodal_embedding" ]
395396 for multimodal_param in multimodal_params
396397 ]
398+ mm_embedding = find_uncached_mm_embeds (
399+ mm_embedding , multimodal_params [:num_context_requests ])
397400 input_ids , input_embeds = fuse_input_embeds (
398401 self .llm .model .embed_tokens ,
399402 input_ids ,
You can’t perform that action at this time.
0 commit comments