1+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+ # SPDX-License-Identifier: Apache-2.0
3+
4+ apiVersion : nvidia.com/v1alpha1
5+ kind : DynamoGraphDeployment
6+ metadata :
7+ name : sglang-disagg
8+ spec :
9+ services :
10+ Frontend :
11+ livenessProbe :
12+ httpGet :
13+ path : /health
14+ port : 8000
15+ initialDelaySeconds : 60
16+ periodSeconds : 60
17+ timeoutSeconds : 30
18+ failureThreshold : 10
19+ readinessProbe :
20+ exec :
21+ command :
22+ - /bin/sh
23+ - -c
24+ - " exit 0"
25+ initialDelaySeconds : 60
26+ periodSeconds : 60
27+ timeoutSeconds : 30
28+ failureThreshold : 10
29+ dynamoNamespace : sglang-disagg
30+ componentType : main
31+ replicas : 1
32+ resources :
33+ requests :
34+ cpu : " 5"
35+ memory : " 10Gi"
36+ limits :
37+ cpu : " 5"
38+ memory : " 10Gi"
39+ extraPodSpec :
40+ mainContainer :
41+ image : my-registry/sglang-runtime:my-tag
42+ workingDir : /workspace/components/backends/sglang
43+ command : ["sh", "-c"]
44+ args :
45+ - " python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-disagg && python3 -m dynamo.frontend --http-port=8000"
46+ SGLangDecodeWorker :
47+ envFromSecret : hf-token-secret
48+ livenessProbe :
49+ exec :
50+ command :
51+ - /bin/sh
52+ - -c
53+ - " exit 0"
54+ periodSeconds : 60
55+ timeoutSeconds : 30
56+ failureThreshold : 10
57+ readinessProbe :
58+ exec :
59+ command :
60+ - /bin/sh
61+ - -c
62+ - " exit 0"
63+ initialDelaySeconds : 60
64+ periodSeconds : 60
65+ timeoutSeconds : 30
66+ failureThreshold : 10
67+ dynamoNamespace : sglang-disagg
68+ componentType : worker
69+ replicas : 1
70+ resources :
71+ requests :
72+ cpu : " 10"
73+ memory : " 20Gi"
74+ gpu : " 1"
75+ limits :
76+ cpu : " 10"
77+ memory : " 20Gi"
78+ gpu : " 1"
79+ extraPodSpec :
80+ mainContainer :
81+ image : my-registry/sglang-runtime:my-tag
82+ workingDir : /workspace/components/backends/sglang
83+ args :
84+ - " python3"
85+ - " -m"
86+ - " dynamo.sglang.worker"
87+ - " --model-path"
88+ - " deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
89+ - " --served-model-name"
90+ - " deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
91+ - " --page-size"
92+ - " 16"
93+ - " --tp"
94+ - " 1"
95+ - " --trust-remote-code"
96+ - " --skip-tokenizer-init"
97+ - " --disaggregation-mode"
98+ - " decode"
99+ - " --disaggregation-transfer-backend"
100+ - " nixl"
101+ SGLangPrefillWorker :
102+ envFromSecret : hf-token-secret
103+ livenessProbe :
104+ exec :
105+ command :
106+ - /bin/sh
107+ - -c
108+ - " exit 0"
109+ periodSeconds : 60
110+ timeoutSeconds : 30
111+ failureThreshold : 10
112+ readinessProbe :
113+ exec :
114+ command :
115+ - /bin/sh
116+ - -c
117+ - " exit 0"
118+ initialDelaySeconds : 60
119+ periodSeconds : 60
120+ timeoutSeconds : 30
121+ failureThreshold : 10
122+ dynamoNamespace : sglang-disagg
123+ componentType : worker
124+ replicas : 1
125+ resources :
126+ requests :
127+ cpu : " 10"
128+ memory : " 20Gi"
129+ gpu : " 1"
130+ limits :
131+ cpu : " 10"
132+ memory : " 20Gi"
133+ gpu : " 1"
134+ extraPodSpec :
135+ mainContainer :
136+ image : my-registry/sglang-runtime:my-tag
137+ workingDir : /workspace/components/backends/sglang
138+ args :
139+ - " python3"
140+ - " -m"
141+ - " dynamo.sglang.worker"
142+ - " --model-path"
143+ - " deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
144+ - " --served-model-name"
145+ - " deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
146+ - " --page-size"
147+ - " 16"
148+ - " --tp"
149+ - " 1"
150+ - " --trust-remote-code"
151+ - " --skip-tokenizer-init"
152+ - " --disaggregation-mode"
153+ - " prefill"
154+ - " --disaggregation-transfer-backend"
155+ - " nixl"
0 commit comments