Skip to content

Commit 0d4b49d

Browse files
DarkLight1337DamonFool
authored andcommitted
[Core] Don't use cache during multi-modal profiling (vllm-project#14336)
1 parent c34beef commit 0d4b49d

File tree

2 files changed

+15
-5
lines changed

2 files changed

+15
-5
lines changed

vllm/inputs/registry.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,9 @@ def dummy_data_for_profiling(
331331

332332
if mm_registry.has_processor(model_config):
333333
tokenizer = cached_tokenizer_from_config(model_config)
334-
processor = mm_registry.create_processor(model_config, tokenizer)
334+
processor = mm_registry.create_processor(model_config,
335+
tokenizer,
336+
disable_cache=True)
335337
profiler = MultiModalProfiler(processor)
336338
dummy_data = profiler.get_dummy_data(
337339
seq_len, is_encoder_data=is_encoder_data)

vllm/multimodal/registry.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,9 @@ def get_max_tokens_per_item_by_modality(
257257
"""
258258
if self.has_processor(model_config):
259259
tokenizer = cached_tokenizer_from_config(model_config)
260-
processor = self.create_processor(model_config, tokenizer)
260+
processor = self.create_processor(model_config,
261+
tokenizer,
262+
disable_cache=True)
261263
seq_len = model_config.max_model_len
262264
mm_limits = self.get_mm_limits_per_prompt(model_config)
263265
return processor.info.get_mm_max_tokens_per_item(
@@ -372,7 +374,9 @@ def get_mm_limits_per_prompt(
372374
"""
373375
if self.has_processor(model_config):
374376
tokenizer = cached_tokenizer_from_config(model_config)
375-
processor = self.create_processor(model_config, tokenizer)
377+
processor = self.create_processor(model_config,
378+
tokenizer,
379+
disable_cache=True)
376380
profiler = MultiModalProfiler(processor)
377381
return profiler.get_mm_limits()
378382

@@ -433,18 +437,22 @@ def create_processor(
433437
self,
434438
model_config: "ModelConfig",
435439
tokenizer: AnyTokenizer,
440+
*,
441+
disable_cache: Optional[bool] = None,
436442
) -> BaseMultiModalProcessor[BaseProcessingInfo]:
437443
"""
438444
Create a multi-modal processor for a specific model and tokenizer.
439445
440446
See also:
441447
:ref:`mm-processing`
442448
"""
449+
if disable_cache is None:
450+
disable_cache = model_config.disable_mm_preprocessor_cache
451+
443452
model_cls = self._get_model_cls(model_config)
444453
factories = self._processor_factories[model_cls]
445454

446455
ctx = InputProcessingContext(model_config, tokenizer)
447-
cache = (None if model_config.disable_mm_preprocessor_cache else
448-
self._processing_cache)
456+
cache = None if disable_cache else self._processing_cache
449457

450458
return factories.build_processor(ctx, cache=cache)

0 commit comments

Comments
 (0)