[V1] TPU - Enable prefix caching by default (#14773)

alexm-redhat · web-flow · commit 7888e1d0a3eb · 2025-03-13T20:40:05.000-07:00
diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py
@@ -108,12 +108,6 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
                     parallel_config.worker_cls = \
                         "vllm.worker.tpu_worker.TPUWorker"
 
-        # Adjust scheduler config for V1
-        # TODO: Add support for these
-        if envs.VLLM_USE_V1 and vllm_config.cache_config.enable_prefix_caching:
-            logger.warning("[V1][TPU] Disable prefix caching")
-            vllm_config.cache_config.enable_prefix_caching = False
-
         assert not vllm_config.speculative_config, (
             "Speculative decoding is not yet supported for TPU backend")