Skip to content

Commit 2c642fd

Browse files
authored
fix: vllm deployment examples (#2062)
1 parent 1958b3a commit 2c642fd

File tree

3 files changed

+5
-5
lines changed

3 files changed

+5
-5
lines changed

components/backends/vllm/deploy/agg_router.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,4 +80,4 @@ spec:
8080
image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4
8181
workingDir: /workspace/components/backends/vllm
8282
args:
83-
- "python3 components/main.py --model Qwen/Qwen3-0.6B --enforce-eager 2>&1 | tee /tmp/vllm.log"
83+
- "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager 2>&1 | tee /tmp/vllm.log"

components/backends/vllm/deploy/disagg.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ spec:
8080
image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4
8181
workingDir: /workspace/components/backends/vllm
8282
args:
83-
- "python3 components/main.py --model Qwen/Qwen3-0.6B --enforce-eager 2>&1 | tee /tmp/vllm.log"
83+
- "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager 2>&1 | tee /tmp/vllm.log"
8484
VllmPrefillWorker:
8585
dynamoNamespace: vllm-v1-disagg
8686
envFromSecret: hf-token-secret
@@ -119,4 +119,4 @@ spec:
119119
image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4
120120
workingDir: /workspace/components/backends/vllm
121121
args:
122-
- "python3 components/main.py --model Qwen/Qwen3-0.6B --enforce-eager --is-prefill-worker 2>&1 | tee /tmp/vllm.log"
122+
- "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager --is-prefill-worker 2>&1 | tee /tmp/vllm.log"

components/backends/vllm/deploy/disagg_planner.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ spec:
8080
image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4
8181
workingDir: /workspace/components/backends/vllm
8282
args:
83-
- "python3 components/main.py --model Qwen/Qwen3-0.6B --enforce-eager 2>&1 | tee /tmp/vllm.log"
83+
- "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager 2>&1 | tee /tmp/vllm.log"
8484
VllmPrefillWorker:
8585
dynamoNamespace: vllm-v1-disagg-planner
8686
envFromSecret: hf-token-secret
@@ -119,4 +119,4 @@ spec:
119119
image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4
120120
workingDir: /workspace/components/backends/vllm
121121
args:
122-
- "python3 components/main.py --model Qwen/Qwen3-0.6B --enforce-eager --is-prefill-worker 2>&1 | tee /tmp/vllm.log"
122+
- "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager --is-prefill-worker 2>&1 | tee /tmp/vllm.log"

0 commit comments

Comments
 (0)