Skip to content

Commit 1bda07a

Browse files
committed
feat: add disagg examples
1 parent 222245e commit 1bda07a

File tree

3 files changed

+252
-1
lines changed

3 files changed

+252
-1
lines changed

components/backends/sglang/deploy/agg.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ spec:
4242
workingDir: /workspace/components/backends/sglang
4343
command: ["sh", "-c"]
4444
args:
45-
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace dynamo && python3 -m dynamo.frontend"
45+
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace dynamo && dynamo run in=http out=dyn --http-port=8000"
4646
SGLangDecodeWorker:
4747
envFromSecret: hf-token-secret
4848
livenessProbe:
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: nvidia.com/v1alpha1
5+
kind: DynamoGraphDeployment
6+
metadata:
7+
name: sglang-agg-router
8+
spec:
9+
services:
10+
Frontend:
11+
livenessProbe:
12+
httpGet:
13+
path: /health
14+
port: 8000
15+
initialDelaySeconds: 60
16+
periodSeconds: 60
17+
timeoutSeconds: 30
18+
failureThreshold: 10
19+
readinessProbe:
20+
exec:
21+
command:
22+
- /bin/sh
23+
- -c
24+
- "exit 0"
25+
initialDelaySeconds: 60
26+
periodSeconds: 60
27+
timeoutSeconds: 30
28+
failureThreshold: 10
29+
dynamoNamespace: sglang-agg-router
30+
componentType: main
31+
replicas: 1
32+
resources:
33+
requests:
34+
cpu: "5"
35+
memory: "10Gi"
36+
limits:
37+
cpu: "5"
38+
memory: "10Gi"
39+
extraPodSpec:
40+
mainContainer:
41+
image: my-registry/sglang-runtime:my-tag
42+
workingDir: /workspace/components/backends/sglang
43+
command: ["sh", "-c"]
44+
args:
45+
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace dynamo && dynamo run in=http out=dyn --router-mode kv --http-port=8000"
46+
SGLangDecodeWorker:
47+
envFromSecret: hf-token-secret
48+
livenessProbe:
49+
exec:
50+
command:
51+
- /bin/sh
52+
- -c
53+
- "exit 0"
54+
periodSeconds: 60
55+
timeoutSeconds: 30
56+
failureThreshold: 10
57+
readinessProbe:
58+
exec:
59+
command:
60+
- /bin/sh
61+
- -c
62+
- "exit 0"
63+
initialDelaySeconds: 60
64+
periodSeconds: 60
65+
timeoutSeconds: 30
66+
failureThreshold: 10
67+
dynamoNamespace: sglang-agg-router
68+
componentType: worker
69+
replicas: 1
70+
resources:
71+
requests:
72+
cpu: "10"
73+
memory: "20Gi"
74+
gpu: "1"
75+
limits:
76+
cpu: "10"
77+
memory: "20Gi"
78+
gpu: "1"
79+
extraPodSpec:
80+
mainContainer:
81+
image: my-registry/sglang-runtime:my-tag
82+
workingDir: /workspace/components/backends/sglang
83+
args:
84+
- "python3"
85+
- "-m"
86+
- "dynamo.sglang.worker"
87+
- "--model-path"
88+
- "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
89+
- "--served-model-name"
90+
- "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
91+
- "--page-size"
92+
- "16"
93+
- "--tp"
94+
- "1"
95+
- "--trust-remote-code"
96+
- "--skip-tokenizer-init"
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: nvidia.com/v1alpha1
5+
kind: DynamoGraphDeployment
6+
metadata:
7+
name: sglang-agg
8+
spec:
9+
services:
10+
Frontend:
11+
livenessProbe:
12+
httpGet:
13+
path: /health
14+
port: 8000
15+
initialDelaySeconds: 60
16+
periodSeconds: 60
17+
timeoutSeconds: 30
18+
failureThreshold: 10
19+
readinessProbe:
20+
exec:
21+
command:
22+
- /bin/sh
23+
- -c
24+
- "exit 0"
25+
initialDelaySeconds: 60
26+
periodSeconds: 60
27+
timeoutSeconds: 30
28+
failureThreshold: 10
29+
dynamoNamespace: sglang-disagg
30+
componentType: main
31+
replicas: 1
32+
resources:
33+
requests:
34+
cpu: "5"
35+
memory: "10Gi"
36+
limits:
37+
cpu: "5"
38+
memory: "10Gi"
39+
extraPodSpec:
40+
mainContainer:
41+
image: my-registry/sglang-runtime:my-tag
42+
workingDir: /workspace/components/backends/sglang
43+
command: ["sh", "-c"]
44+
args:
45+
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace dynamo && dynamo run in=http out=dyn --http-port=8000"
46+
SGLangDecodeWorker:
47+
envFromSecret: hf-token-secret
48+
livenessProbe:
49+
exec:
50+
command:
51+
- /bin/sh
52+
- -c
53+
- "exit 0"
54+
periodSeconds: 60
55+
timeoutSeconds: 30
56+
failureThreshold: 10
57+
readinessProbe:
58+
exec:
59+
command:
60+
- /bin/sh
61+
- -c
62+
- "exit 0"
63+
initialDelaySeconds: 60
64+
periodSeconds: 60
65+
timeoutSeconds: 30
66+
failureThreshold: 10
67+
dynamoNamespace: sglang-disagg
68+
componentType: worker
69+
replicas: 1
70+
resources:
71+
requests:
72+
cpu: "10"
73+
memory: "20Gi"
74+
gpu: "1"
75+
limits:
76+
cpu: "10"
77+
memory: "20Gi"
78+
gpu: "1"
79+
extraPodSpec:
80+
mainContainer:
81+
image: my-registry/sglang-runtime:my-tag
82+
workingDir: /workspace/components/backends/sglang
83+
args:
84+
- "python3"
85+
- "-m"
86+
- "dynamo.sglang.worker"
87+
- "--model-path"
88+
- "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
89+
- "--served-model-name"
90+
- "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
91+
- "--page-size"
92+
- "16"
93+
- "--tp"
94+
- "1"
95+
- "--trust-remote-code"
96+
- "--skip-tokenizer-init"
97+
- "--disaggregation-mode"
98+
- "decode"
99+
- "--disaggregation-transfer-backend"
100+
- "nixl"
101+
SGLangPrefillWorker:
102+
envFromSecret: hf-token-secret
103+
livenessProbe:
104+
exec:
105+
command:
106+
- /bin/sh
107+
- -c
108+
- "exit 0"
109+
periodSeconds: 60
110+
timeoutSeconds: 30
111+
failureThreshold: 10
112+
readinessProbe:
113+
exec:
114+
command:
115+
- /bin/sh
116+
- -c
117+
- "exit 0"
118+
initialDelaySeconds: 60
119+
periodSeconds: 60
120+
timeoutSeconds: 30
121+
failureThreshold: 10
122+
dynamoNamespace: sglang-disagg
123+
componentType: worker
124+
replicas: 1
125+
resources:
126+
requests:
127+
cpu: "10"
128+
memory: "20Gi"
129+
gpu: "1"
130+
limits:
131+
cpu: "10"
132+
memory: "20Gi"
133+
gpu: "1"
134+
extraPodSpec:
135+
mainContainer:
136+
image: my-registry/sglang-runtime:my-tag
137+
workingDir: /workspace/components/backends/sglang
138+
args:
139+
- "python3"
140+
- "-m"
141+
- "dynamo.sglang.worker"
142+
- "--model-path"
143+
- "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
144+
- "--served-model-name"
145+
- "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
146+
- "--page-size"
147+
- "16"
148+
- "--tp"
149+
- "1"
150+
- "--trust-remote-code"
151+
- "--skip-tokenizer-init"
152+
- "--disaggregation-mode"
153+
- "prefill"
154+
- "--disaggregation-transfer-backend"
155+
- "nixl"

0 commit comments

Comments
 (0)