Skip to content

Commit a2874fd

Browse files
feat: add possibility to use grove in dynamo graph helm chart (#1954)
1 parent fe718fd commit a2874fd

File tree

8 files changed

+150
-0
lines changed

8 files changed

+150
-0
lines changed

components/backends/vllm/deploy/agg.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ spec:
4040
mainContainer:
4141
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
4242
workingDir: /workspace/components/backends/vllm
43+
command:
44+
- /bin/sh
45+
- -c
4346
args:
4447
- "python3 -m dynamo.frontend --http-port 8000"
4548
VllmDecodeWorker:

components/backends/vllm/deploy/agg_router.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ spec:
4040
mainContainer:
4141
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
4242
workingDir: /workspace/components/backends/vllm
43+
command:
44+
- /bin/sh
45+
- -c
4346
args:
4447
- "python3 -m dynamo.frontend --http-port 8000 --router-mode kv"
4548
VllmDecodeWorker:

components/backends/vllm/deploy/disagg.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ spec:
4040
mainContainer:
4141
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
4242
workingDir: /workspace/components/backends/vllm
43+
command:
44+
- /bin/sh
45+
- -c
4346
args:
4447
- "python3 -m dynamo.frontend --http-port 8000"
4548
VllmDecodeWorker:

components/backends/vllm/deploy/disagg_planner.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ spec:
4040
mainContainer:
4141
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
4242
workingDir: /workspace/components/backends/vllm
43+
command:
44+
- /bin/sh
45+
- -c
4346
args:
4447
- "python3 -m dynamo.frontend --http-port 8000"
4548
VllmDecodeWorker:
@@ -79,6 +82,9 @@ spec:
7982
mainContainer:
8083
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
8184
workingDir: /workspace/components/backends/vllm
85+
command:
86+
- /bin/sh
87+
- -c
8288
args:
8389
- "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager 2>&1 | tee /tmp/vllm.log"
8490
VllmPrefillWorker:

components/backends/vllm/deploy/disagg_router.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ spec:
4040
mainContainer:
4141
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
4242
workingDir: /workspace/components/backends/vllm
43+
command:
44+
- /bin/sh
45+
- -c
4346
args:
4447
- "python3 -m dynamo.frontend --http-port 8000 --router-mode kv"
4548
VllmDecodeWorker:

deploy/helm/README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,21 @@ This approach allows you to install Dynamo directly using a DynamoGraphDeploymen
2626
- Kubernetes 1.16+
2727
- ETCD v3.5+ (without auth)
2828
- NATS v2.10+ (with jetstream enabled)
29+
- Grove v0.1.0+ (optional if deploying using Grove)
2930

3031
### Basic Installation
3132

3233
Here is how you would install a VLLM inference backend example.
3334

3435
```bash
3536
helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud -f ./components/backends/vllm/deploy/agg.yaml
37+
38+
### Installation using Grove
39+
40+
Same example as above, but using Grove PodGangSet resources.
41+
42+
```bash
43+
helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud -f ./components/backends/vllm/deploy/agg.yaml --set deploymentType=grove
3644
```
3745

3846
### Customizable Properties
@@ -54,6 +62,7 @@ helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud \
5462
| `imagePullSecrets` | Array of image pull secrets for accessing private registries | `imagePullSecrets[0].name=docker-secret-1` |
5563
| `etcdAddr` | Address of the etcd service | `dynamo-platform-etcd:2379` |
5664
| `natsAddr` | Address of the NATS messaging service | `nats://dynamo-platform-nats:4222` |
65+
| `deploymentType` | Type of deployment to use. Can be `basic` or `grove`. If not specified, `basic` is used. | `deploymentType=grove` |
5766

5867

5968

deploy/helm/chart/templates/deployment.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
15+
# if deploymentType is empty, or explicitly set to basic, use basic as default
16+
{{- if or (not .Values.deploymentType) (eq .Values.deploymentType "basic") -}}
1517
{{- range $serviceName, $serviceSpec := .Values.spec.services }}
1618
---
1719
apiVersion: apps/v1
@@ -117,3 +119,4 @@ spec:
117119
scheme: HTTP
118120
{{- end }}
119121
{{- end }}
122+
{{- end }}
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
{{- if eq .Values.deploymentType "grove" }}
16+
---
17+
apiVersion: grove.io/v1alpha1
18+
kind: PodGangSet
19+
metadata:
20+
name: {{ $.Release.Name }}
21+
labels:
22+
app: {{ $.Release.Name }}
23+
spec:
24+
replicas: 1
25+
template:
26+
cliques:
27+
{{- range $serviceName, $serviceSpec := .Values.spec.services }}
28+
- name: {{ $serviceName | lower }}
29+
spec:
30+
roleName: {{ $serviceName | lower }}
31+
replicas: {{ $serviceSpec.replicas }}
32+
podSpec:
33+
{{- if $.Values.imagePullSecrets }}
34+
imagePullSecrets:
35+
{{ $.Values.imagePullSecrets | toYaml | nindent 12 }}
36+
{{- end }}
37+
containers:
38+
- name: main
39+
image: {{ $serviceSpec.extraPodSpec.mainContainer.image }}
40+
{{- if $serviceSpec.resources }}
41+
resources:
42+
requests:
43+
{{- if $serviceSpec.resources.cpu }}
44+
cpu: "{{ $serviceSpec.resources.cpu }}"
45+
{{- end }}
46+
{{- if $serviceSpec.resources.memory }}
47+
memory: "{{ $serviceSpec.resources.memory }}"
48+
{{- end }}
49+
{{- if $serviceSpec.resources.gpu }}
50+
nvidia.com/gpu: "{{ $serviceSpec.resources.gpu }}"
51+
{{- end }}
52+
limits:
53+
{{- if $serviceSpec.resources.cpu }}
54+
cpu: "{{ $serviceSpec.resources.cpu }}"
55+
{{- end }}
56+
{{- if $serviceSpec.resources.memory }}
57+
memory: "{{ $serviceSpec.resources.memory }}"
58+
{{- end }}
59+
{{- if $serviceSpec.resources.gpu }}
60+
nvidia.com/gpu: "{{ $serviceSpec.resources.gpu }}"
61+
{{- end }}
62+
{{- end }}
63+
workingDir: {{ $serviceSpec.extraPodSpec.mainContainer.workingDir }}
64+
{{- if $serviceSpec.extraPodSpec.mainContainer.command }}
65+
command:
66+
{{- $serviceSpec.extraPodSpec.mainContainer.command | toYaml | nindent 14 }}
67+
{{- end }}
68+
{{- if $serviceSpec.extraPodSpec.mainContainer.args }}
69+
args:
70+
{{- $serviceSpec.extraPodSpec.mainContainer.args | toYaml | nindent 14 }}
71+
{{- end }}
72+
env:
73+
- name: DYNAMO_PORT
74+
value: "{{ $.Values.dynamoPort | default 8000 }}"
75+
{{- if $.Values.etcdAddr }}
76+
- name: ETCD_ENDPOINTS
77+
value: "{{ $.Values.etcdAddr }}"
78+
{{- end }}
79+
{{- if $.Values.natsAddr }}
80+
- name: NATS_SERVER
81+
value: "{{ $.Values.natsAddr }}"
82+
{{- end }}
83+
{{- if $serviceSpec.envFromSecret }}
84+
envFrom:
85+
- secretRef:
86+
name: {{ $serviceSpec.envFromSecret }}
87+
{{- end }}
88+
ports:
89+
- name: health
90+
containerPort: {{ $.Values.healthPort | default 5000 }}
91+
livenessProbe:
92+
{{- if $serviceSpec.livenessProbe }}
93+
{{ $serviceSpec.livenessProbe | toYaml | nindent 14 }}
94+
{{- else }}
95+
initialDelaySeconds: 60
96+
periodSeconds: 60
97+
timeoutSeconds: 5
98+
failureThreshold: 10
99+
successThreshold: 1
100+
httpGet:
101+
path: /healthz
102+
port: health
103+
scheme: HTTP
104+
{{- end }}
105+
readinessProbe:
106+
{{- if $serviceSpec.readinessProbe }}
107+
{{ $serviceSpec.readinessProbe | toYaml | nindent 14 }}
108+
{{- else }}
109+
initialDelaySeconds: 60
110+
periodSeconds: 60
111+
timeoutSeconds: 5
112+
failureThreshold: 10
113+
successThreshold: 1
114+
httpGet:
115+
path: /readyz
116+
port: health
117+
scheme: HTTP
118+
{{- end }}
119+
{{- end }}
120+
{{- end }}

0 commit comments

Comments
 (0)