Skip to content

Commit 1d4ae1c

Browse files
committed
fix: sglang examples launch and deploy
1 parent 2c642fd commit 1d4ae1c

File tree

3 files changed

+86
-2
lines changed

3 files changed

+86
-2
lines changed
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: nvidia.com/v1alpha1
5+
kind: DynamoGraphDeployment
6+
metadata:
7+
name: sglang-agg
8+
spec:
9+
services:
10+
Frontend:
11+
livenessProbe:
12+
httpGet:
13+
path: /health
14+
port: 8000
15+
initialDelaySeconds: 60
16+
periodSeconds: 60
17+
timeoutSeconds: 30
18+
failureThreshold: 10
19+
readinessProbe:
20+
exec:
21+
command:
22+
- /bin/sh
23+
- -c
24+
- "exit 0"
25+
initialDelaySeconds: 60
26+
periodSeconds: 60
27+
timeoutSeconds: 30
28+
failureThreshold: 10
29+
dynamoNamespace: sglang-agg
30+
componentType: main
31+
replicas: 1
32+
resources:
33+
requests:
34+
cpu: "5"
35+
memory: "10Gi"
36+
limits:
37+
cpu: "5"
38+
memory: "10Gi"
39+
extraPodSpec:
40+
mainContainer:
41+
image: my-registry/sglang-runtime:my-tag
42+
workingDir: /workspace/components/backends/sglang
43+
command: ["sh", "-c"]
44+
args:
45+
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace dynamo && python3 -m dynamo.frontend"
46+
SGLangDecodeWorker:
47+
envFromSecret: hf-token-secret
48+
livenessProbe:
49+
exec:
50+
command:
51+
- /bin/sh
52+
- -c
53+
- "exit 0"
54+
periodSeconds: 60
55+
timeoutSeconds: 30
56+
failureThreshold: 10
57+
readinessProbe:
58+
exec:
59+
command:
60+
- /bin/sh
61+
- -c
62+
- "exit 0"
63+
initialDelaySeconds: 60
64+
periodSeconds: 60
65+
timeoutSeconds: 30
66+
failureThreshold: 10
67+
dynamoNamespace: sglang-agg
68+
componentType: worker
69+
replicas: 1
70+
resources:
71+
requests:
72+
cpu: "10"
73+
memory: "20Gi"
74+
gpu: "1"
75+
limits:
76+
cpu: "10"
77+
memory: "20Gi"
78+
gpu: "1"
79+
extraPodSpec:
80+
mainContainer:
81+
image: my-registry/sglang-runtime:my-tag
82+
workingDir: /workspace/components/backends/sglang
83+
args:
84+
- "python3 -m dynamo.sglang.worker --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B --page-size 16 --tp 1 --trust-remote-code --skip-tokenizer-init"

components/backends/sglang/launch/agg.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,4 @@ python3 -m dynamo.sglang.worker \
2525
--page-size 16 \
2626
--tp 1 \
2727
--trust-remote-code \
28-
--skip-tokenizer-init \
28+
--skip-tokenizer-init

components/backends/sglang/launch/agg_router.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,4 @@ python3 -m dynamo.sglang.worker \
2525
--page-size 16 \
2626
--tp 1 \
2727
--trust-remote-code \
28-
--skip-tokenizer-init \
28+
--skip-tokenizer-init

0 commit comments

Comments
 (0)