diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py index ca8734d28b45..ffea9bb35513 100644 --- a/vllm/v1/worker/gpu_worker.py +++ b/vllm/v1/worker/gpu_worker.py @@ -389,7 +389,7 @@ def compile_or_warm_up_model(self) -> None: f"utilize gpu memory. Current kv cache memory in use is " f"{int(self.available_kv_cache_memory_bytes)} bytes.") - logger.info(msg) + logger.debug(msg) # Warm up sampler and preallocate memory buffer for logits and other # sampling related tensors of max possible shape to avoid memory