We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 3320334 commit 10532e5Copy full SHA for 10532e5
vllm/multimodal/profiling.py
@@ -355,7 +355,11 @@ def _get_mm_max_tokens(
355
mm_counts=mm_counts,
356
)
357
if max_tokens_per_item is not None:
358
- return max_tokens_per_item
+ return {
359
+ modality: max_tokens
360
+ for modality, max_tokens in max_tokens_per_item.items()
361
+ if mm_counts.get(modality, 0) > 0
362
+ }
363
364
mm_inputs = self._get_dummy_mm_inputs(seq_len, mm_counts)
365
return self._get_mm_num_tokens(mm_inputs, mm_embeddings_only=mm_embeddings_only)
@@ -375,5 +379,4 @@ def get_mm_max_contiguous_tokens(
375
379
This is important to take into account when profiling and
376
380
initializing the encoder cache size.
377
381
"""
378
-
382
return self._get_mm_max_tokens(seq_len, mm_counts, mm_embeddings_only=False)
0 commit comments