File tree Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Original file line number Diff line number Diff line change 3535from vllm .test_utils import MODEL_WEIGHTS_S3_BUCKET , MODELS_ON_S3
3636from vllm .transformers_utils .utils import check_gguf_file
3737from vllm .usage .usage_lib import UsageContext
38- from vllm .utils import FlexibleArgumentParser , is_in_ray_actor
38+ from vllm .utils import FlexibleArgumentParser , GiB_bytes , is_in_ray_actor
3939
4040# yapf: enable
4141
@@ -1625,13 +1625,13 @@ def _set_default_args_v1(self, usage_context: UsageContext) -> None:
16251625 # values for non-H100/H200 GPUs.
16261626 try :
16271627 from vllm .platforms import current_platform
1628- device_name = current_platform .get_device_name (). lower ()
1628+ device_memory = current_platform .get_device_total_memory ()
16291629 except Exception :
16301630 # This is only used to set default_max_num_batched_tokens
1631- device_name = "no-device"
1631+ device_memory = 0
16321632
1633- if "h100" in device_name or "h200" in device_name :
1634- # For H100 and H200, we use larger default values.
1633+ if device_memory >= 70 * GiB_bytes :
1634+ # For GPUs like H100 and MI300x, use larger default values.
16351635 default_max_num_batched_tokens = {
16361636 UsageContext .LLM_CLASS : 16384 ,
16371637 UsageContext .OPENAI_API_SERVER : 8192 ,
You can’t perform that action at this time.
0 commit comments