From a22c678799f1505b4bf415f425b4c5c654ff5ba1 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Tue, 4 Mar 2025 17:14:28 +0800 Subject: [PATCH 1/5] add logs for inferring the device Signed-off-by: youkaichao --- vllm/platforms/__init__.py | 62 ++++++++++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 9 deletions(-) diff --git a/vllm/platforms/__init__.py b/vllm/platforms/__init__.py index 48cf8f7a323a..6188791d4abc 100644 --- a/vllm/platforms/__init__.py +++ b/vllm/platforms/__init__.py @@ -32,6 +32,7 @@ def vllm_version_matches_substr(substr: str) -> bool: def tpu_platform_plugin() -> Optional[str]: is_tpu = False + logger.debug("Checking if TPU platform is available.") try: # While it's technically possible to install libtpu on a # non-TPU machine, this is a very uncommon scenario. Therefore, @@ -39,7 +40,9 @@ def tpu_platform_plugin() -> Optional[str]: # has TPUs. import libtpu # noqa: F401 is_tpu = True - except Exception: + logger.debug("Confirmed TPU platform is available.") + except Exception as e: + logger.debug("TPU platform is not available because of %s", str(e)) pass return "vllm.platforms.tpu.TpuPlatform" if is_tpu else None @@ -47,7 +50,7 @@ def tpu_platform_plugin() -> Optional[str]: def cuda_platform_plugin() -> Optional[str]: is_cuda = False - + logger.debug("Checking if CUDA platform is available.") try: from vllm.utils import import_pynvml pynvml = import_pynvml() @@ -60,9 +63,19 @@ def cuda_platform_plugin() -> Optional[str]: # on a GPU machine, even if in a cpu build. is_cuda = (pynvml.nvmlDeviceGetCount() > 0 and not vllm_version_matches_substr("cpu")) + if pynvml.nvmlDeviceGetCount() <= 0: + logger.debug( + "Cuda platform is not available because no GPU is found.") + if vllm_version_matches_substr("cpu"): + logger.debug("Cuda platform is not available because" + " vLLM is built with CPU.") + if is_cuda: + logger.debug("Confirmed CUDA platform is available.") finally: pynvml.nvmlShutdown() except Exception as e: + logger.debug("exception %s happens when checking CUDA platform", + str(e)) if "nvml" not in e.__class__.__name__.lower(): # If the error is not related to NVML, re-raise it. raise e @@ -75,23 +88,29 @@ def cuda_is_jetson() -> bool: or os.path.exists("/sys/class/tegra-firmware") if cuda_is_jetson(): + logger.debug("Confirmed CUDA platform is available on Jetson.") is_cuda = True + else: + logger.debug("CUDA platform is not available because of %s", + str(e)) return "vllm.platforms.cuda.CudaPlatform" if is_cuda else None def rocm_platform_plugin() -> Optional[str]: is_rocm = False - + logger.debug("Checking if ROCm platform is available.") try: import amdsmi amdsmi.amdsmi_init() try: if len(amdsmi.amdsmi_get_processor_handles()) > 0: is_rocm = True + logger.debug("Confirmed ROCm platform is available.") finally: amdsmi.amdsmi_shut_down() - except Exception: + except Exception as e: + logger.debug("ROCm platform is not available because of %s", str(e)) pass return "vllm.platforms.rocm.RocmPlatform" if is_rocm else None @@ -99,10 +118,17 @@ def rocm_platform_plugin() -> Optional[str]: def hpu_platform_plugin() -> Optional[str]: is_hpu = False + logger.debug("Checking if HPU platform is available.") try: from importlib import util is_hpu = util.find_spec('habana_frameworks') is not None - except Exception: + if is_hpu: + logger.debug("Confirmed HPU platform is available.") + else: + logger.debug("HPU platform is not available because " + "habana_frameworks is not found.") + except Exception as e: + logger.debug("HPU platform is not available because of %s", str(e)) pass return "vllm.platforms.hpu.HpuPlatform" if is_hpu else None @@ -110,7 +136,7 @@ def hpu_platform_plugin() -> Optional[str]: def xpu_platform_plugin() -> Optional[str]: is_xpu = False - + logger.debug("Checking if XPU platform is available.") try: # installed IPEX if the machine has XPUs. import intel_extension_for_pytorch # noqa: F401 @@ -118,7 +144,9 @@ def xpu_platform_plugin() -> Optional[str]: import torch if hasattr(torch, 'xpu') and torch.xpu.is_available(): is_xpu = True - except Exception: + logger.debug("Confirmed XPU platform is available.") + except Exception as e: + logger.debug("XPU platform is not available because of %s", str(e)) pass return "vllm.platforms.xpu.XPUPlatform" if is_xpu else None @@ -126,13 +154,21 @@ def xpu_platform_plugin() -> Optional[str]: def cpu_platform_plugin() -> Optional[str]: is_cpu = False + logger.debug("Checking if CPU platform is available.") try: is_cpu = vllm_version_matches_substr("cpu") + if is_cpu: + logger.debug("Confirmed CPU platform is available because" + " vLLM is built with CPU.") if not is_cpu: import platform is_cpu = platform.machine().lower().startswith("arm") + if is_cpu: + logger.debug("Confirmed CPU platform is available" + " because the machine is ARM.") - except Exception: + except Exception as e: + logger.debug("CPU platform is not available because of %s", str(e)) pass return "vllm.platforms.cpu.CpuPlatform" if is_cpu else None @@ -140,10 +176,14 @@ def cpu_platform_plugin() -> Optional[str]: def neuron_platform_plugin() -> Optional[str]: is_neuron = False + logger.debug("Checking if Neuron platform is available.") try: import transformers_neuronx # noqa: F401 is_neuron = True - except ImportError: + logger.debug("Confirmed Neuron platform is available because" + " transformers_neuronx is found.") + except ImportError as e: + logger.debug("Neuron platform is not available because of %s", str(e)) pass return "vllm.platforms.neuron.NeuronPlatform" if is_neuron else None @@ -151,8 +191,12 @@ def neuron_platform_plugin() -> Optional[str]: def openvino_platform_plugin() -> Optional[str]: is_openvino = False + logger.debug("Checking if OpenVINO platform is available.") with suppress(Exception): is_openvino = vllm_version_matches_substr("openvino") + if is_openvino: + logger.debug("Confirmed OpenVINO platform is available" + " because vLLM is built with OpenVINO.") return "vllm.platforms.openvino.OpenVinoPlatform" if is_openvino else None From 6ba4a1978ab020c8397640f4e2a5d44f280e8a96 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Tue, 4 Mar 2025 17:21:08 +0800 Subject: [PATCH 2/5] improve Signed-off-by: youkaichao --- vllm/platforms/__init__.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/vllm/platforms/__init__.py b/vllm/platforms/__init__.py index 6188791d4abc..7e3dc4772bdb 100644 --- a/vllm/platforms/__init__.py +++ b/vllm/platforms/__init__.py @@ -42,7 +42,7 @@ def tpu_platform_plugin() -> Optional[str]: is_tpu = True logger.debug("Confirmed TPU platform is available.") except Exception as e: - logger.debug("TPU platform is not available because of %s", str(e)) + logger.debug("TPU platform is not available because: %s", str(e)) pass return "vllm.platforms.tpu.TpuPlatform" if is_tpu else None @@ -91,8 +91,7 @@ def cuda_is_jetson() -> bool: logger.debug("Confirmed CUDA platform is available on Jetson.") is_cuda = True else: - logger.debug("CUDA platform is not available because of %s", - str(e)) + logger.debug("CUDA platform is not available because: %s", str(e)) return "vllm.platforms.cuda.CudaPlatform" if is_cuda else None @@ -110,7 +109,7 @@ def rocm_platform_plugin() -> Optional[str]: finally: amdsmi.amdsmi_shut_down() except Exception as e: - logger.debug("ROCm platform is not available because of %s", str(e)) + logger.debug("ROCm platform is not available because: %s", str(e)) pass return "vllm.platforms.rocm.RocmPlatform" if is_rocm else None @@ -128,7 +127,7 @@ def hpu_platform_plugin() -> Optional[str]: logger.debug("HPU platform is not available because " "habana_frameworks is not found.") except Exception as e: - logger.debug("HPU platform is not available because of %s", str(e)) + logger.debug("HPU platform is not available because: %s", str(e)) pass return "vllm.platforms.hpu.HpuPlatform" if is_hpu else None @@ -146,7 +145,7 @@ def xpu_platform_plugin() -> Optional[str]: is_xpu = True logger.debug("Confirmed XPU platform is available.") except Exception as e: - logger.debug("XPU platform is not available because of %s", str(e)) + logger.debug("XPU platform is not available because: %s", str(e)) pass return "vllm.platforms.xpu.XPUPlatform" if is_xpu else None @@ -168,7 +167,7 @@ def cpu_platform_plugin() -> Optional[str]: " because the machine is ARM.") except Exception as e: - logger.debug("CPU platform is not available because of %s", str(e)) + logger.debug("CPU platform is not available because: %s", str(e)) pass return "vllm.platforms.cpu.CpuPlatform" if is_cpu else None @@ -183,7 +182,7 @@ def neuron_platform_plugin() -> Optional[str]: logger.debug("Confirmed Neuron platform is available because" " transformers_neuronx is found.") except ImportError as e: - logger.debug("Neuron platform is not available because of %s", str(e)) + logger.debug("Neuron platform is not available because: %s", str(e)) pass return "vllm.platforms.neuron.NeuronPlatform" if is_neuron else None From ae20d27cd6672bd885e48a82a30000299ca4d34c Mon Sep 17 00:00:00 2001 From: youkaichao Date: Tue, 4 Mar 2025 17:23:38 +0800 Subject: [PATCH 3/5] openvino Signed-off-by: youkaichao --- vllm/platforms/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vllm/platforms/__init__.py b/vllm/platforms/__init__.py index 7e3dc4772bdb..6bf8c1c5d902 100644 --- a/vllm/platforms/__init__.py +++ b/vllm/platforms/__init__.py @@ -196,6 +196,9 @@ def openvino_platform_plugin() -> Optional[str]: if is_openvino: logger.debug("Confirmed OpenVINO platform is available" " because vLLM is built with OpenVINO.") + if not is_openvino: + logger.debug("OpenVINO platform is not available because" + " vLLM is not built with OpenVINO.") return "vllm.platforms.openvino.OpenVinoPlatform" if is_openvino else None From d24529907d1b52152e027a3a75417c2a17bdc240 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Tue, 4 Mar 2025 17:27:25 +0800 Subject: [PATCH 4/5] unify cuda Signed-off-by: youkaichao --- vllm/platforms/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/platforms/__init__.py b/vllm/platforms/__init__.py index 6bf8c1c5d902..8a82d51224e4 100644 --- a/vllm/platforms/__init__.py +++ b/vllm/platforms/__init__.py @@ -65,9 +65,9 @@ def cuda_platform_plugin() -> Optional[str]: and not vllm_version_matches_substr("cpu")) if pynvml.nvmlDeviceGetCount() <= 0: logger.debug( - "Cuda platform is not available because no GPU is found.") + "CUDA platform is not available because no GPU is found.") if vllm_version_matches_substr("cpu"): - logger.debug("Cuda platform is not available because" + logger.debug("CUDA platform is not available because" " vLLM is built with CPU.") if is_cuda: logger.debug("Confirmed CUDA platform is available.") From 1aef02c5e26bab9656f282bfc19943e12a1ce387 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Tue, 4 Mar 2025 17:28:40 +0800 Subject: [PATCH 5/5] improve logging Signed-off-by: youkaichao --- vllm/platforms/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/platforms/__init__.py b/vllm/platforms/__init__.py index 8a82d51224e4..89e69c7f5780 100644 --- a/vllm/platforms/__init__.py +++ b/vllm/platforms/__init__.py @@ -74,7 +74,7 @@ def cuda_platform_plugin() -> Optional[str]: finally: pynvml.nvmlShutdown() except Exception as e: - logger.debug("exception %s happens when checking CUDA platform", + logger.debug("Exception happens when checking CUDA platform: %s", str(e)) if "nvml" not in e.__class__.__name__.lower(): # If the error is not related to NVML, re-raise it.