Skip to content

Commit cfc6178

Browse files
authored
feat: add sglang disagg deployment examples (#2137)
1 parent f809659 commit cfc6178

File tree

6 files changed

+256
-5
lines changed

6 files changed

+256
-5
lines changed

components/backends/sglang/deploy/agg.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ spec:
4242
workingDir: /workspace/components/backends/sglang
4343
command: ["sh", "-c"]
4444
args:
45-
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace dynamo && python3 -m dynamo.frontend"
45+
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-agg && python3 -m dynamo.frontend --http-port=8000"
4646
SGLangDecodeWorker:
4747
envFromSecret: hf-token-secret
4848
livenessProbe:
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: nvidia.com/v1alpha1
5+
kind: DynamoGraphDeployment
6+
metadata:
7+
name: sglang-agg-router
8+
spec:
9+
services:
10+
Frontend:
11+
livenessProbe:
12+
httpGet:
13+
path: /health
14+
port: 8000
15+
initialDelaySeconds: 60
16+
periodSeconds: 60
17+
timeoutSeconds: 30
18+
failureThreshold: 10
19+
readinessProbe:
20+
exec:
21+
command:
22+
- /bin/sh
23+
- -c
24+
- "exit 0"
25+
initialDelaySeconds: 60
26+
periodSeconds: 60
27+
timeoutSeconds: 30
28+
failureThreshold: 10
29+
dynamoNamespace: sglang-agg-router
30+
componentType: main
31+
replicas: 1
32+
resources:
33+
requests:
34+
cpu: "5"
35+
memory: "10Gi"
36+
limits:
37+
cpu: "5"
38+
memory: "10Gi"
39+
extraPodSpec:
40+
mainContainer:
41+
image: my-registry/sglang-runtime:my-tag
42+
workingDir: /workspace/components/backends/sglang
43+
command: ["sh", "-c"]
44+
args:
45+
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-agg-router && python3 -m dynamo.frontend --http-port=8000 --router-mode kv"
46+
SGLangDecodeWorker:
47+
envFromSecret: hf-token-secret
48+
livenessProbe:
49+
exec:
50+
command:
51+
- /bin/sh
52+
- -c
53+
- "exit 0"
54+
periodSeconds: 60
55+
timeoutSeconds: 30
56+
failureThreshold: 10
57+
readinessProbe:
58+
exec:
59+
command:
60+
- /bin/sh
61+
- -c
62+
- "exit 0"
63+
initialDelaySeconds: 60
64+
periodSeconds: 60
65+
timeoutSeconds: 30
66+
failureThreshold: 10
67+
dynamoNamespace: sglang-agg-router
68+
componentType: worker
69+
replicas: 1
70+
resources:
71+
requests:
72+
cpu: "10"
73+
memory: "20Gi"
74+
gpu: "1"
75+
limits:
76+
cpu: "10"
77+
memory: "20Gi"
78+
gpu: "1"
79+
extraPodSpec:
80+
mainContainer:
81+
image: my-registry/sglang-runtime:my-tag
82+
workingDir: /workspace/components/backends/sglang
83+
args:
84+
- "python3"
85+
- "-m"
86+
- "dynamo.sglang.worker"
87+
- "--model-path"
88+
- "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
89+
- "--served-model-name"
90+
- "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
91+
- "--page-size"
92+
- "16"
93+
- "--tp"
94+
- "1"
95+
- "--trust-remote-code"
96+
- "--skip-tokenizer-init"
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: nvidia.com/v1alpha1
5+
kind: DynamoGraphDeployment
6+
metadata:
7+
name: sglang-disagg
8+
spec:
9+
services:
10+
Frontend:
11+
livenessProbe:
12+
httpGet:
13+
path: /health
14+
port: 8000
15+
initialDelaySeconds: 60
16+
periodSeconds: 60
17+
timeoutSeconds: 30
18+
failureThreshold: 10
19+
readinessProbe:
20+
exec:
21+
command:
22+
- /bin/sh
23+
- -c
24+
- "exit 0"
25+
initialDelaySeconds: 60
26+
periodSeconds: 60
27+
timeoutSeconds: 30
28+
failureThreshold: 10
29+
dynamoNamespace: sglang-disagg
30+
componentType: main
31+
replicas: 1
32+
resources:
33+
requests:
34+
cpu: "5"
35+
memory: "10Gi"
36+
limits:
37+
cpu: "5"
38+
memory: "10Gi"
39+
extraPodSpec:
40+
mainContainer:
41+
image: my-registry/sglang-runtime:my-tag
42+
workingDir: /workspace/components/backends/sglang
43+
command: ["sh", "-c"]
44+
args:
45+
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-disagg && python3 -m dynamo.frontend --http-port=8000"
46+
SGLangDecodeWorker:
47+
envFromSecret: hf-token-secret
48+
livenessProbe:
49+
exec:
50+
command:
51+
- /bin/sh
52+
- -c
53+
- "exit 0"
54+
periodSeconds: 60
55+
timeoutSeconds: 30
56+
failureThreshold: 10
57+
readinessProbe:
58+
exec:
59+
command:
60+
- /bin/sh
61+
- -c
62+
- "exit 0"
63+
initialDelaySeconds: 60
64+
periodSeconds: 60
65+
timeoutSeconds: 30
66+
failureThreshold: 10
67+
dynamoNamespace: sglang-disagg
68+
componentType: worker
69+
replicas: 1
70+
resources:
71+
requests:
72+
cpu: "10"
73+
memory: "20Gi"
74+
gpu: "1"
75+
limits:
76+
cpu: "10"
77+
memory: "20Gi"
78+
gpu: "1"
79+
extraPodSpec:
80+
mainContainer:
81+
image: my-registry/sglang-runtime:my-tag
82+
workingDir: /workspace/components/backends/sglang
83+
args:
84+
- "python3"
85+
- "-m"
86+
- "dynamo.sglang.worker"
87+
- "--model-path"
88+
- "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
89+
- "--served-model-name"
90+
- "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
91+
- "--page-size"
92+
- "16"
93+
- "--tp"
94+
- "1"
95+
- "--trust-remote-code"
96+
- "--skip-tokenizer-init"
97+
- "--disaggregation-mode"
98+
- "decode"
99+
- "--disaggregation-transfer-backend"
100+
- "nixl"
101+
SGLangPrefillWorker:
102+
envFromSecret: hf-token-secret
103+
livenessProbe:
104+
exec:
105+
command:
106+
- /bin/sh
107+
- -c
108+
- "exit 0"
109+
periodSeconds: 60
110+
timeoutSeconds: 30
111+
failureThreshold: 10
112+
readinessProbe:
113+
exec:
114+
command:
115+
- /bin/sh
116+
- -c
117+
- "exit 0"
118+
initialDelaySeconds: 60
119+
periodSeconds: 60
120+
timeoutSeconds: 30
121+
failureThreshold: 10
122+
dynamoNamespace: sglang-disagg
123+
componentType: worker
124+
replicas: 1
125+
resources:
126+
requests:
127+
cpu: "10"
128+
memory: "20Gi"
129+
gpu: "1"
130+
limits:
131+
cpu: "10"
132+
memory: "20Gi"
133+
gpu: "1"
134+
extraPodSpec:
135+
mainContainer:
136+
image: my-registry/sglang-runtime:my-tag
137+
workingDir: /workspace/components/backends/sglang
138+
args:
139+
- "python3"
140+
- "-m"
141+
- "dynamo.sglang.worker"
142+
- "--model-path"
143+
- "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
144+
- "--served-model-name"
145+
- "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
146+
- "--page-size"
147+
- "16"
148+
- "--tp"
149+
- "1"
150+
- "--trust-remote-code"
151+
- "--skip-tokenizer-init"
152+
- "--disaggregation-mode"
153+
- "prefill"
154+
- "--disaggregation-transfer-backend"
155+
- "nixl"

components/backends/sglang/launch/agg.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ trap cleanup EXIT INT TERM
1515
python3 -m dynamo.sglang.utils.clear_namespace --namespace dynamo
1616

1717
# run ingress
18-
dynamo run in=http out=dyn --http-port=8000 &
18+
python3 -m dynamo.frontend --http-port=8000 &
1919
DYNAMO_PID=$!
2020

2121
# run worker

components/backends/sglang/launch/disagg.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ trap cleanup EXIT INT TERM
1515
python3 -m dynamo.sglang.utils.clear_namespace --namespace dynamo
1616

1717
# run ingress
18-
dynamo run in=http out=dyn --http-port=8000 &
18+
python3 -m dynamo.frontend --http-port=8000 &
1919
DYNAMO_PID=$!
2020

2121
# run prefill worker

components/backends/sglang/launch/disagg_dp_attn.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ trap cleanup EXIT INT TERM
1515
python3 -m dynamo.sglang.utils.clear_namespace --namespace dynamo
1616

1717
# run ingress
18-
dynamo run in=http out=dyn --http-port=8000 &
18+
python3 -m dynamo.frontend --http-port=8000 &
1919
DYNAMO_PID=$!
2020

2121
# run prefill worker
@@ -33,7 +33,7 @@ python3 -m dynamo.sglang.worker \
3333
PREFILL_PID=$!
3434

3535
# run decode worker
36-
CUDA_VISIBLE_DEVICES=2,3 python3 dynamo.sglang.decode_worker \
36+
CUDA_VISIBLE_DEVICES=2,3 python3 -m dynamo.sglang.decode_worker \
3737
--model-path silence09/DeepSeek-R1-Small-2layers \
3838
--served-model-name silence09/DeepSeek-R1-Small-2layers \
3939
--tp 2 \

0 commit comments

Comments
 (0)