|
1 | 1 | #!/bin/bash |
| 2 | +# |
| 3 | +# Launch a Ray cluster inside Docker for vLLM inference. |
| 4 | +# |
| 5 | +# This script can start either a head node or a worker node, depending on the |
| 6 | +# --head or --worker flag provided as the third positional argument. |
| 7 | +# |
| 8 | +# Usage: |
| 9 | +# 1. Designate one machine as the head node and execute: |
| 10 | +# bash run_cluster.sh \ |
| 11 | +# vllm/vllm-openai \ |
| 12 | +# <head_node_ip> \ |
| 13 | +# --head \ |
| 14 | +# /abs/path/to/huggingface/cache \ |
| 15 | +# -e VLLM_HOST_IP=<head_node_ip> |
| 16 | +# |
| 17 | +# 2. On every worker machine, execute: |
| 18 | +# bash run_cluster.sh \ |
| 19 | +# vllm/vllm-openai \ |
| 20 | +# <head_node_ip> \ |
| 21 | +# --worker \ |
| 22 | +# /abs/path/to/huggingface/cache \ |
| 23 | +# -e VLLM_HOST_IP=<worker_node_ip> |
| 24 | +# |
| 25 | +# Each worker requires a unique VLLM_HOST_IP value. |
| 26 | +# Keep each terminal session open. Closing a session stops the associated Ray |
| 27 | +# node and thereby shuts down the entire cluster. |
| 28 | +# Every machine must be reachable at the supplied IP address. |
| 29 | +# |
| 30 | +# The container is named "node-<random_suffix>". To open a shell inside |
| 31 | +# a container after launch, use: |
| 32 | +# docker exec -it node-<random_suffix> /bin/bash |
| 33 | +# |
| 34 | +# Then, you can execute vLLM commands on the Ray cluster as if it were a |
| 35 | +# single machine, e.g. vllm serve ... |
| 36 | +# |
| 37 | +# To stop the container, use: |
| 38 | +# docker stop node-<random_suffix> |
2 | 39 |
|
3 | | -# Check for minimum number of required arguments |
| 40 | +# Check for minimum number of required arguments. |
4 | 41 | if [ $# -lt 4 ]; then |
5 | | - echo "Usage: $0 docker_image head_node_address --head|--worker path_to_hf_home [additional_args...]" |
| 42 | + echo "Usage: $0 docker_image head_node_ip --head|--worker path_to_hf_home [additional_args...]" |
6 | 43 | exit 1 |
7 | 44 | fi |
8 | 45 |
|
9 | | -# Assign the first three arguments and shift them away |
| 46 | +# Extract the mandatory positional arguments and remove them from $@. |
10 | 47 | DOCKER_IMAGE="$1" |
11 | 48 | HEAD_NODE_ADDRESS="$2" |
12 | | -NODE_TYPE="$3" # Should be --head or --worker |
| 49 | +NODE_TYPE="$3" # Should be --head or --worker. |
13 | 50 | PATH_TO_HF_HOME="$4" |
14 | 51 | shift 4 |
15 | 52 |
|
16 | | -# Additional arguments are passed directly to the Docker command |
| 53 | +# Preserve any extra arguments so they can be forwarded to Docker. |
17 | 54 | ADDITIONAL_ARGS=("$@") |
18 | 55 |
|
19 | | -# Validate node type |
| 56 | +# Validate the NODE_TYPE argument. |
20 | 57 | if [ "${NODE_TYPE}" != "--head" ] && [ "${NODE_TYPE}" != "--worker" ]; then |
21 | 58 | echo "Error: Node type must be --head or --worker" |
22 | 59 | exit 1 |
23 | 60 | fi |
24 | 61 |
|
25 | | -# Define a function to cleanup on EXIT signal |
| 62 | +# Generate a unique container name with random suffix. |
| 63 | +# Docker container names must be unique on each host. |
| 64 | +# The random suffix allows multiple Ray containers to run simultaneously on the same machine, |
| 65 | +# for example, on a multi-GPU machine. |
| 66 | +CONTAINER_NAME="node-${RANDOM}" |
| 67 | + |
| 68 | +# Define a cleanup routine that removes the container when the script exits. |
| 69 | +# This prevents orphaned containers from accumulating if the script is interrupted. |
26 | 70 | cleanup() { |
27 | | - docker stop node |
28 | | - docker rm node |
| 71 | + docker stop "${CONTAINER_NAME}" |
| 72 | + docker rm "${CONTAINER_NAME}" |
29 | 73 | } |
30 | 74 | trap cleanup EXIT |
31 | 75 |
|
32 | | -# Command setup for head or worker node |
| 76 | +# Build the Ray start command based on the node role. |
| 77 | +# The head node manages the cluster and accepts connections on port 6379, |
| 78 | +# while workers connect to the head's address. |
33 | 79 | RAY_START_CMD="ray start --block" |
34 | 80 | if [ "${NODE_TYPE}" == "--head" ]; then |
35 | 81 | RAY_START_CMD+=" --head --port=6379" |
36 | 82 | else |
37 | 83 | RAY_START_CMD+=" --address=${HEAD_NODE_ADDRESS}:6379" |
38 | 84 | fi |
39 | 85 |
|
40 | | -# Run the docker command with the user specified parameters and additional arguments |
| 86 | +# Launch the container with the assembled parameters. |
| 87 | +# --network host: Allows Ray nodes to communicate directly via host networking |
| 88 | +# --shm-size 10.24g: Increases shared memory |
| 89 | +# --gpus all: Gives container access to all GPUs on the host |
| 90 | +# -v HF_HOME: Mounts HuggingFace cache to avoid re-downloading models |
41 | 91 | docker run \ |
42 | 92 | --entrypoint /bin/bash \ |
43 | 93 | --network host \ |
44 | | - --name node \ |
| 94 | + --name "${CONTAINER_NAME}" \ |
45 | 95 | --shm-size 10.24g \ |
46 | 96 | --gpus all \ |
47 | 97 | -v "${PATH_TO_HF_HOME}:/root/.cache/huggingface" \ |
|
0 commit comments