Skip to content

Commit 81c2780

Browse files
fix: operator defaults (#2398)
Signed-off-by: mohammedabdulwahhab <furkhan324@berkeley.edu>
1 parent 9ddb3ef commit 81c2780

26 files changed

+544
-1036
lines changed

components/backends/sglang/deploy/agg.yaml

Lines changed: 1 addition & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -8,26 +8,8 @@ metadata:
88
spec:
99
services:
1010
Frontend:
11-
livenessProbe:
12-
httpGet:
13-
path: /health
14-
port: 8000
15-
initialDelaySeconds: 20
16-
periodSeconds: 5
17-
timeoutSeconds: 5
18-
failureThreshold: 3
19-
readinessProbe:
20-
exec:
21-
command:
22-
- /bin/sh
23-
- -c
24-
- 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
25-
initialDelaySeconds: 60
26-
periodSeconds: 60
27-
timeoutSeconds: 30
28-
failureThreshold: 10
2911
dynamoNamespace: sglang-agg
30-
componentType: main
12+
componentType: frontend
3113
replicas: 1
3214
resources:
3315
requests:
@@ -45,21 +27,6 @@ spec:
4527
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-agg && python3 -m dynamo.frontend --http-port=8000"
4628
SGLangDecodeWorker:
4729
envFromSecret: hf-token-secret
48-
livenessProbe:
49-
httpGet:
50-
path: /live
51-
port: 9090
52-
periodSeconds: 5
53-
timeoutSeconds: 30
54-
failureThreshold: 1
55-
readinessProbe:
56-
exec:
57-
httpGet:
58-
path: /health
59-
port: 9090
60-
periodSeconds: 10
61-
timeoutSeconds: 30
62-
failureThreshold: 60
6330
dynamoNamespace: sglang-agg
6431
componentType: worker
6532
replicas: 1
@@ -72,21 +39,8 @@ spec:
7239
cpu: "32"
7340
memory: "80Gi"
7441
gpu: "1"
75-
envs:
76-
- name: DYN_SYSTEM_ENABLED
77-
value: "true"
78-
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
79-
value: "[\"generate\"]"
80-
- name: DYN_SYSTEM_PORT
81-
value: "9090"
8242
extraPodSpec:
8343
mainContainer:
84-
startupProbe:
85-
httpGet:
86-
path: /live
87-
port: 9090
88-
periodSeconds: 10
89-
failureThreshold: 60
9044
image: my-registry/sglang-runtime:my-tag
9145
workingDir: /workspace/components/backends/sglang
9246
command:

components/backends/sglang/deploy/agg_router.yaml

Lines changed: 1 addition & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -8,26 +8,8 @@ metadata:
88
spec:
99
services:
1010
Frontend:
11-
livenessProbe:
12-
httpGet:
13-
path: /health
14-
port: 8000
15-
initialDelaySeconds: 20
16-
periodSeconds: 5
17-
timeoutSeconds: 5
18-
failureThreshold: 3
19-
readinessProbe:
20-
exec:
21-
command:
22-
- /bin/sh
23-
- -c
24-
- 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
25-
initialDelaySeconds: 60
26-
periodSeconds: 60
27-
timeoutSeconds: 30
28-
failureThreshold: 10
2911
dynamoNamespace: sglang-agg-router
30-
componentType: main
12+
componentType: frontend
3113
replicas: 1
3214
resources:
3315
requests:
@@ -45,21 +27,6 @@ spec:
4527
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-agg-router && python3 -m dynamo.frontend --http-port=8000 --router-mode kv"
4628
SGLangDecodeWorker:
4729
envFromSecret: hf-token-secret
48-
livenessProbe:
49-
httpGet:
50-
path: /live
51-
port: 9090
52-
periodSeconds: 5
53-
timeoutSeconds: 30
54-
failureThreshold: 1
55-
readinessProbe:
56-
exec:
57-
httpGet:
58-
path: /health
59-
port: 9090
60-
periodSeconds: 10
61-
timeoutSeconds: 30
62-
failureThreshold: 60
6330
dynamoNamespace: sglang-agg-router
6431
componentType: worker
6532
replicas: 1
@@ -72,21 +39,8 @@ spec:
7239
cpu: "32"
7340
memory: "80Gi"
7441
gpu: "1"
75-
envs:
76-
- name: DYN_SYSTEM_ENABLED
77-
value: "true"
78-
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
79-
value: "[\"generate\"]"
80-
- name: DYN_SYSTEM_PORT
81-
value: "9090"
8242
extraPodSpec:
8343
mainContainer:
84-
startupProbe:
85-
httpGet:
86-
path: /live
87-
port: 9090
88-
periodSeconds: 10
89-
failureThreshold: 60
9044
image: my-registry/sglang-runtime:my-tag
9145
workingDir: /workspace/components/backends/sglang
9246
command:

components/backends/sglang/deploy/disagg.yaml

Lines changed: 1 addition & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -8,26 +8,8 @@ metadata:
88
spec:
99
services:
1010
Frontend:
11-
livenessProbe:
12-
httpGet:
13-
path: /health
14-
port: 8000
15-
initialDelaySeconds: 20
16-
periodSeconds: 5
17-
timeoutSeconds: 5
18-
failureThreshold: 3
19-
readinessProbe:
20-
exec:
21-
command:
22-
- /bin/sh
23-
- -c
24-
- 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
25-
initialDelaySeconds: 60
26-
periodSeconds: 60
27-
timeoutSeconds: 30
28-
failureThreshold: 10
2911
dynamoNamespace: sglang-disagg
30-
componentType: main
12+
componentType: frontend
3113
replicas: 1
3214
resources:
3315
requests:
@@ -45,21 +27,6 @@ spec:
4527
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-disagg && python3 -m dynamo.frontend --http-port=8000"
4628
SGLangDecodeWorker:
4729
envFromSecret: hf-token-secret
48-
livenessProbe:
49-
httpGet:
50-
path: /live
51-
port: 9090
52-
periodSeconds: 5
53-
timeoutSeconds: 30
54-
failureThreshold: 1
55-
readinessProbe:
56-
exec:
57-
httpGet:
58-
path: /health
59-
port: 9090
60-
periodSeconds: 10
61-
timeoutSeconds: 30
62-
failureThreshold: 60
6330
dynamoNamespace: sglang-disagg
6431
componentType: worker
6532
replicas: 1
@@ -72,21 +39,8 @@ spec:
7239
cpu: "32"
7340
memory: "80Gi"
7441
gpu: "1"
75-
envs:
76-
- name: DYN_SYSTEM_ENABLED
77-
value: "true"
78-
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
79-
value: "[\"generate\"]"
80-
- name: DYN_SYSTEM_PORT
81-
value: "9090"
8242
extraPodSpec:
8343
mainContainer:
84-
startupProbe:
85-
httpGet:
86-
path: /live
87-
port: 9090
88-
periodSeconds: 10
89-
failureThreshold: 60
9044
image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0808-07
9145
workingDir: /workspace/components/backends/sglang
9246
command:
@@ -112,21 +66,6 @@ spec:
11266
- "nixl"
11367
SGLangPrefillWorker:
11468
envFromSecret: hf-token-secret
115-
livenessProbe:
116-
httpGet:
117-
path: /live
118-
port: 9090
119-
periodSeconds: 5
120-
timeoutSeconds: 30
121-
failureThreshold: 1
122-
readinessProbe:
123-
exec:
124-
httpGet:
125-
path: /health
126-
port: 9090
127-
periodSeconds: 10
128-
timeoutSeconds: 30
129-
failureThreshold: 60
13069
dynamoNamespace: sglang-disagg
13170
componentType: worker
13271
replicas: 1
@@ -139,21 +78,8 @@ spec:
13978
cpu: "32"
14079
memory: "80Gi"
14180
gpu: "1"
142-
envs:
143-
- name: DYN_SYSTEM_ENABLED
144-
value: "true"
145-
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
146-
value: "[\"generate\"]"
147-
- name: DYN_SYSTEM_PORT
148-
value: "9090"
14981
extraPodSpec:
15082
mainContainer:
151-
startupProbe:
152-
httpGet:
153-
path: /health
154-
port: 9090
155-
periodSeconds: 10
156-
failureThreshold: 60
15783
image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0808-07
15884
workingDir: /workspace/components/backends/sglang
15985
command:

components/backends/sglang/deploy/disagg_planner.yaml

Lines changed: 3 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -16,25 +16,7 @@ spec:
1616
services:
1717
Frontend:
1818
dynamoNamespace: dynamo
19-
livenessProbe:
20-
httpGet:
21-
path: /health
22-
port: 8000
23-
initialDelaySeconds: 20
24-
periodSeconds: 5
25-
timeoutSeconds: 5
26-
failureThreshold: 3
27-
readinessProbe:
28-
exec:
29-
command:
30-
- /bin/sh
31-
- -c
32-
- 'curl -s http://localhost:8000/health | jq -e ".status == \"healthy\""'
33-
initialDelaySeconds: 60
34-
periodSeconds: 60
35-
timeoutSeconds: 30
36-
failureThreshold: 10
37-
componentType: main
19+
componentType: frontend
3820
replicas: 1
3921
resources:
4022
requests:
@@ -97,9 +79,9 @@ spec:
9779
- --backend=sglang
9880
- --adjustment-interval=60
9981
- --profile-results-dir=/workspace/profiling_results
100-
Prometheus:
82+
Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently.
10183
dynamoNamespace: dynamo
102-
componentType: main
84+
componentType: frontend
10385
replicas: 1
10486
envs:
10587
- name: PYTHONPATH
@@ -142,20 +124,6 @@ spec:
142124
SGLangDecodeWorker:
143125
dynamoNamespace: dynamo
144126
envFromSecret: hf-token-secret
145-
livenessProbe:
146-
httpGet:
147-
path: /live
148-
port: 9090
149-
periodSeconds: 5
150-
timeoutSeconds: 30
151-
failureThreshold: 1
152-
readinessProbe:
153-
httpGet:
154-
path: /health
155-
port: 9090
156-
periodSeconds: 10
157-
timeoutSeconds: 30
158-
failureThreshold: 60
159127
componentType: worker
160128
replicas: 2
161129
resources:
@@ -167,21 +135,8 @@ spec:
167135
cpu: "32"
168136
memory: "80Gi"
169137
gpu: "1"
170-
envs:
171-
- name: DYN_SYSTEM_ENABLED
172-
value: "true"
173-
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
174-
value: "[\"generate\"]"
175-
- name: DYN_SYSTEM_PORT
176-
value: "9090"
177138
extraPodSpec:
178139
mainContainer:
179-
startupProbe:
180-
httpGet:
181-
path: /live
182-
port: 9090
183-
periodSeconds: 10
184-
failureThreshold: 60
185140
image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
186141
workingDir: /workspace/components/backends/sglang
187142
args:
@@ -205,20 +160,6 @@ spec:
205160
SGLangPrefillWorker:
206161
dynamoNamespace: dynamo
207162
envFromSecret: hf-token-secret
208-
livenessProbe:
209-
httpGet:
210-
path: /live
211-
port: 9090
212-
periodSeconds: 5
213-
timeoutSeconds: 30
214-
failureThreshold: 1
215-
readinessProbe:
216-
httpGet:
217-
path: /health
218-
port: 9090
219-
periodSeconds: 10
220-
timeoutSeconds: 30
221-
failureThreshold: 60
222163
componentType: worker
223164
replicas: 2
224165
resources:
@@ -230,21 +171,8 @@ spec:
230171
cpu: "32"
231172
memory: "80Gi"
232173
gpu: "1"
233-
envs:
234-
- name: DYN_SYSTEM_ENABLED
235-
value: "true"
236-
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
237-
value: "[\"generate\"]"
238-
- name: DYN_SYSTEM_PORT
239-
value: "9090"
240174
extraPodSpec:
241175
mainContainer:
242-
startupProbe:
243-
httpGet:
244-
path: /health
245-
port: 9090
246-
periodSeconds: 10
247-
failureThreshold: 60
248176
image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
249177
workingDir: /workspace/components/backends/sglang
250178
args:

0 commit comments

Comments
 (0)