[TPU][V1] Make --disable_chunked_mm_input mandatory for serving MM models (vllm-project#16483)

NickLucche · yangw-dev · commit 6bfa06641d6c · 2025-04-21T10:08:13.000-07:00
Signed-off-by: NickLucche &lt;nlucches@redhat.com&gt;
Signed-off-by: Yang Wang &lt;elainewy@meta.com&gt;
diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py
@@ -120,6 +120,13 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
         assert not vllm_config.speculative_config, (
             "Speculative decoding is not yet supported for TPU backend")
 
+        if scheduler_config.is_multimodal_model and not \
+            scheduler_config.disable_chunked_mm_input:
+            logger.warning("TPU does not support running Multimodal models"\
+            " without setting `--disable_chunked_mm_input`. " \
+            "Forcing --disable_chunked_mm_input.")
+            scheduler_config.disable_chunked_mm_input = True
+
     @classmethod
     def is_pin_memory_available(cls):
         logger.warning("Pin memory is not supported on TPU.")