From 4b3657af29eded71542beff0495f8f6a6e915b82 Mon Sep 17 00:00:00 2001 From: Hongxia Yang <62075498+hongxiayang@users.noreply.github.com> Date: Fri, 12 Jul 2024 00:30:46 -0400 Subject: [PATCH] [ROCm][AMD] unify CUDA_VISIBLE_DEVICES usage in cuda/rocm (#6352) --- Dockerfile.rocm | 14 +++++++------- tests/distributed/test_utils.py | 7 +------ vllm/config.py | 9 +-------- vllm/utils.py | 4 ---- vllm/worker/worker_base.py | 10 +--------- 5 files changed, 10 insertions(+), 34 deletions(-) diff --git a/Dockerfile.rocm b/Dockerfile.rocm index 1b89b892bbf1c..befb0499f2e68 100644 --- a/Dockerfile.rocm +++ b/Dockerfile.rocm @@ -52,25 +52,25 @@ RUN pip install --upgrade pip # Remove sccache so it doesn't interfere with ccache # TODO: implement sccache support across components RUN apt-get purge -y sccache; pip uninstall -y sccache; rm -f "$(which sccache)" -# Install torch == 2.4.0 on ROCm +# Install torch == 2.5.0 on ROCm RUN case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \ *"rocm-5.7"*) \ pip uninstall -y torch torchaudio torchvision \ && pip install --no-cache-dir --pre \ - torch==2.4.0.dev20240612 torchaudio==2.4.0.dev20240612 \ - torchvision==0.19.0.dev20240612 \ + torch==2.5.0.dev20240710 torchaudio==2.4.0.dev20240710 \ + torchvision==0.20.0.dev20240710 \ --index-url https://download.pytorch.org/whl/nightly/rocm5.7;; \ *"rocm-6.0"*) \ pip uninstall -y torch torchaudio torchvision \ && pip install --no-cache-dir --pre \ - torch==2.4.0.dev20240612 torchaudio==2.4.0.dev20240612 \ - torchvision==0.19.0.dev20240612 \ + torch==2.5.0.dev20240710 torchaudio==2.4.0.dev20240710 \ + torchvision==0.20.0.dev20240710 \ --index-url https://download.pytorch.org/whl/nightly/rocm6.0;; \ *"rocm-6.1"*) \ pip uninstall -y torch torchaudio torchvision \ && pip install --no-cache-dir --pre \ - torch==2.4.0.dev20240612 torchaudio==2.4.0.dev20240612 \ - torchvision==0.19.0.dev20240612 \ + torch==2.5.0.dev20240710 torchaudio==2.4.0.dev20240710 \ + torchvision==0.20.0.dev20240710 \ --index-url https://download.pytorch.org/whl/nightly/rocm6.1;; \ *) ;; esac diff --git a/tests/distributed/test_utils.py b/tests/distributed/test_utils.py index 9ff11b0d27b11..a51a9909f6f41 100644 --- a/tests/distributed/test_utils.py +++ b/tests/distributed/test_utils.py @@ -1,7 +1,7 @@ import ray import vllm.envs as envs -from vllm.utils import (cuda_device_count_stateless, is_hip, +from vllm.utils import (cuda_device_count_stateless, update_environment_variables) @@ -22,11 +22,6 @@ def get_cuda_visible_devices(self): def test_cuda_device_count_stateless(): """Test that cuda_device_count_stateless changes return value if CUDA_VISIBLE_DEVICES is changed.""" - if is_hip(): - # Set HIP_VISIBLE_DEVICES == CUDA_VISIBLE_DEVICES. Conversion - # is handled by `update_environment_variables` - update_environment_variables( - {"CUDA_VISIBLE_DEVICES": envs.CUDA_VISIBLE_DEVICES}) actor = _CUDADeviceCountStatelessTestActor.options( # type: ignore num_gpus=2).remote() assert sorted(ray.get( diff --git a/vllm/config.py b/vllm/config.py index d333a042fe5af..de7bb3943a45f 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -6,7 +6,6 @@ import torch from transformers import PretrainedConfig -import vllm.envs as envs from vllm.logger import init_logger from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS from vllm.model_executor.models import ModelRegistry @@ -14,7 +13,7 @@ from vllm.transformers_utils.config import get_config, get_hf_text_config from vllm.utils import (cuda_device_count_stateless, get_cpu_memory, is_cpu, is_hip, is_neuron, is_openvino, is_tpu, is_xpu, - print_warning_once, update_environment_variables) + print_warning_once) if TYPE_CHECKING: from ray.util.placement_group import PlacementGroup @@ -695,12 +694,6 @@ def __init__( self.distributed_executor_backend = backend logger.info("Defaulting to use %s for distributed inference", backend) - # If CUDA_VISIBLE_DEVICES is set on ROCm prior to vLLM init, - # propagate changes to HIP_VISIBLE_DEVICES (conversion handled by - # the update_environment_variables function) - if is_hip() and envs.CUDA_VISIBLE_DEVICES: - update_environment_variables( - {"CUDA_VISIBLE_DEVICES": envs.CUDA_VISIBLE_DEVICES}) self._verify_args() self.rank = 0 diff --git a/vllm/utils.py b/vllm/utils.py index a3d15d7979228..8be1528230b5f 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -386,10 +386,6 @@ def get_open_port() -> int: def update_environment_variables(envs: Dict[str, str]): - if is_hip() and "CUDA_VISIBLE_DEVICES" in envs: - # Propagate changes to CUDA_VISIBLE_DEVICES to - # ROCm's HIP_VISIBLE_DEVICES as well - envs["HIP_VISIBLE_DEVICES"] = envs["CUDA_VISIBLE_DEVICES"] for k, v in envs.items(): if k in os.environ and os.environ[k] != v: logger.warning( diff --git a/vllm/worker/worker_base.py b/vllm/worker/worker_base.py index b082f45344863..93ffea9106501 100644 --- a/vllm/worker/worker_base.py +++ b/vllm/worker/worker_base.py @@ -11,7 +11,7 @@ from vllm.lora.request import LoRARequest from vllm.sequence import (ExecuteModelRequest, IntermediateTensors, SamplerOutput) -from vllm.utils import (enable_trace_function_call_for_thread, is_hip, +from vllm.utils import (enable_trace_function_call_for_thread, update_environment_variables) from vllm.worker.model_runner_base import ModelRunnerBase, ModelRunnerInputBase @@ -309,14 +309,6 @@ def update_environment_variables(envs: Dict[str, str]) -> None: # overwriting CUDA_VISIBLE_DEVICES is desired behavior # suppress the warning in `update_environment_variables` del os.environ[key] - if is_hip(): - hip_env_var = "HIP_VISIBLE_DEVICES" - if hip_env_var in os.environ: - logger.warning( - "Ignoring pre-set environment variable `%s=%s` as " - "%s has also been set, which takes precedence.", - hip_env_var, os.environ[hip_env_var], key) - os.environ.pop(hip_env_var, None) update_environment_variables(envs) def init_worker(self, *args, **kwargs):