Skip to content

Commit c9189cd

Browse files
youkaichaoshreyankg
authored andcommitted
[platform] add debug logging during inferring the device type (vllm-project#14195)
Signed-off-by: youkaichao <youkaichao@gmail.com>
1 parent 319ccfc commit c9189cd

File tree

1 file changed

+55
-9
lines changed

1 file changed

+55
-9
lines changed

vllm/platforms/__init__.py

Lines changed: 55 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,22 +32,25 @@ def vllm_version_matches_substr(substr: str) -> bool:
3232

3333
def tpu_platform_plugin() -> Optional[str]:
3434
is_tpu = False
35+
logger.debug("Checking if TPU platform is available.")
3536
try:
3637
# While it's technically possible to install libtpu on a
3738
# non-TPU machine, this is a very uncommon scenario. Therefore,
3839
# we assume that libtpu is installed if and only if the machine
3940
# has TPUs.
4041
import libtpu # noqa: F401
4142
is_tpu = True
42-
except Exception:
43+
logger.debug("Confirmed TPU platform is available.")
44+
except Exception as e:
45+
logger.debug("TPU platform is not available because: %s", str(e))
4346
pass
4447

4548
return "vllm.platforms.tpu.TpuPlatform" if is_tpu else None
4649

4750

4851
def cuda_platform_plugin() -> Optional[str]:
4952
is_cuda = False
50-
53+
logger.debug("Checking if CUDA platform is available.")
5154
try:
5255
from vllm.utils import import_pynvml
5356
pynvml = import_pynvml()
@@ -60,9 +63,19 @@ def cuda_platform_plugin() -> Optional[str]:
6063
# on a GPU machine, even if in a cpu build.
6164
is_cuda = (pynvml.nvmlDeviceGetCount() > 0
6265
and not vllm_version_matches_substr("cpu"))
66+
if pynvml.nvmlDeviceGetCount() <= 0:
67+
logger.debug(
68+
"CUDA platform is not available because no GPU is found.")
69+
if vllm_version_matches_substr("cpu"):
70+
logger.debug("CUDA platform is not available because"
71+
" vLLM is built with CPU.")
72+
if is_cuda:
73+
logger.debug("Confirmed CUDA platform is available.")
6374
finally:
6475
pynvml.nvmlShutdown()
6576
except Exception as e:
77+
logger.debug("Exception happens when checking CUDA platform: %s",
78+
str(e))
6679
if "nvml" not in e.__class__.__name__.lower():
6780
# If the error is not related to NVML, re-raise it.
6881
raise e
@@ -75,50 +88,64 @@ def cuda_is_jetson() -> bool:
7588
or os.path.exists("/sys/class/tegra-firmware")
7689

7790
if cuda_is_jetson():
91+
logger.debug("Confirmed CUDA platform is available on Jetson.")
7892
is_cuda = True
93+
else:
94+
logger.debug("CUDA platform is not available because: %s", str(e))
7995

8096
return "vllm.platforms.cuda.CudaPlatform" if is_cuda else None
8197

8298

8399
def rocm_platform_plugin() -> Optional[str]:
84100
is_rocm = False
85-
101+
logger.debug("Checking if ROCm platform is available.")
86102
try:
87103
import amdsmi
88104
amdsmi.amdsmi_init()
89105
try:
90106
if len(amdsmi.amdsmi_get_processor_handles()) > 0:
91107
is_rocm = True
108+
logger.debug("Confirmed ROCm platform is available.")
92109
finally:
93110
amdsmi.amdsmi_shut_down()
94-
except Exception:
111+
except Exception as e:
112+
logger.debug("ROCm platform is not available because: %s", str(e))
95113
pass
96114

97115
return "vllm.platforms.rocm.RocmPlatform" if is_rocm else None
98116

99117

100118
def hpu_platform_plugin() -> Optional[str]:
101119
is_hpu = False
120+
logger.debug("Checking if HPU platform is available.")
102121
try:
103122
from importlib import util
104123
is_hpu = util.find_spec('habana_frameworks') is not None
105-
except Exception:
124+
if is_hpu:
125+
logger.debug("Confirmed HPU platform is available.")
126+
else:
127+
logger.debug("HPU platform is not available because "
128+
"habana_frameworks is not found.")
129+
except Exception as e:
130+
logger.debug("HPU platform is not available because: %s", str(e))
106131
pass
107132

108133
return "vllm.platforms.hpu.HpuPlatform" if is_hpu else None
109134

110135

111136
def xpu_platform_plugin() -> Optional[str]:
112137
is_xpu = False
113-
138+
logger.debug("Checking if XPU platform is available.")
114139
try:
115140
# installed IPEX if the machine has XPUs.
116141
import intel_extension_for_pytorch # noqa: F401
117142
import oneccl_bindings_for_pytorch # noqa: F401
118143
import torch
119144
if hasattr(torch, 'xpu') and torch.xpu.is_available():
120145
is_xpu = True
121-
except Exception:
146+
logger.debug("Confirmed XPU platform is available.")
147+
except Exception as e:
148+
logger.debug("XPU platform is not available because: %s", str(e))
122149
pass
123150

124151
return "vllm.platforms.xpu.XPUPlatform" if is_xpu else None
@@ -137,35 +164,54 @@ def metal_platform_plugin() -> Optional[str]:
137164

138165
def cpu_platform_plugin() -> Optional[str]:
139166
is_cpu = False
167+
logger.debug("Checking if CPU platform is available.")
140168
try:
141169
#from importlib.metadata import version
142170
#is_cpu = "cpu" in version("vllm")
143171
is_cpu = vllm_version_matches_substr("cpu")
172+
if is_cpu:
173+
logger.debug("Confirmed CPU platform is available because"
174+
" vLLM is built with CPU.")
144175
if not is_cpu:
145176
import platform
146177
is_cpu = platform.machine().lower().startswith("arm")
178+
if is_cpu:
179+
logger.debug("Confirmed CPU platform is available"
180+
" because the machine is ARM.")
147181

148-
except Exception:
182+
except Exception as e:
183+
logger.debug("CPU platform is not available because: %s", str(e))
149184
pass
150185

151186
return "vllm.platforms.cpu.CpuPlatform" if is_cpu else None
152187

153188

154189
def neuron_platform_plugin() -> Optional[str]:
155190
is_neuron = False
191+
logger.debug("Checking if Neuron platform is available.")
156192
try:
157193
import transformers_neuronx # noqa: F401
158194
is_neuron = True
159-
except ImportError:
195+
logger.debug("Confirmed Neuron platform is available because"
196+
" transformers_neuronx is found.")
197+
except ImportError as e:
198+
logger.debug("Neuron platform is not available because: %s", str(e))
160199
pass
161200

162201
return "vllm.platforms.neuron.NeuronPlatform" if is_neuron else None
163202

164203

165204
def openvino_platform_plugin() -> Optional[str]:
166205
is_openvino = False
206+
logger.debug("Checking if OpenVINO platform is available.")
167207
with suppress(Exception):
168208
is_openvino = vllm_version_matches_substr("openvino")
209+
if is_openvino:
210+
logger.debug("Confirmed OpenVINO platform is available"
211+
" because vLLM is built with OpenVINO.")
212+
if not is_openvino:
213+
logger.debug("OpenVINO platform is not available because"
214+
" vLLM is not built with OpenVINO.")
169215

170216
return "vllm.platforms.openvino.OpenVinoPlatform" if is_openvino else None
171217

0 commit comments

Comments
 (0)