Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions components/backends/vllm/deploy/agg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ spec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000"
VllmDecodeWorker:
Expand Down
3 changes: 3 additions & 0 deletions components/backends/vllm/deploy/agg_router.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ spec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000 --router-mode kv"
VllmDecodeWorker:
Expand Down
3 changes: 3 additions & 0 deletions components/backends/vllm/deploy/disagg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ spec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000"
VllmDecodeWorker:
Expand Down
6 changes: 6 additions & 0 deletions components/backends/vllm/deploy/disagg_planner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ spec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000"
VllmDecodeWorker:
Expand Down Expand Up @@ -79,6 +82,9 @@ spec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager 2>&1 | tee /tmp/vllm.log"
VllmPrefillWorker:
Expand Down
3 changes: 3 additions & 0 deletions components/backends/vllm/deploy/disagg_router.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ spec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000 --router-mode kv"
VllmDecodeWorker:
Expand Down
9 changes: 9 additions & 0 deletions deploy/helm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,21 @@ This approach allows you to install Dynamo directly using a DynamoGraphDeploymen
- Kubernetes 1.16+
- ETCD v3.5+ (without auth)
- NATS v2.10+ (with jetstream enabled)
- Grove v0.1.0+ (optional if deploying using Grove)

### Basic Installation

Here is how you would install a VLLM inference backend example.

```bash
helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud -f ./components/backends/vllm/deploy/agg.yaml

### Installation using Grove

Same example as above, but using Grove PodGangSet resources.

```bash
helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud -f ./components/backends/vllm/deploy/agg.yaml --set deploymentType=grove
```

### Customizable Properties
Expand All @@ -54,6 +62,7 @@ helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud \
| `imagePullSecrets` | Array of image pull secrets for accessing private registries | `imagePullSecrets[0].name=docker-secret-1` |
| `etcdAddr` | Address of the etcd service | `dynamo-platform-etcd:2379` |
| `natsAddr` | Address of the NATS messaging service | `nats://dynamo-platform-nats:4222` |
| `deploymentType` | Type of deployment to use. Can be `basic` or `grove`. If not specified, `basic` is used. | `deploymentType=grove` |



3 changes: 3 additions & 0 deletions deploy/helm/chart/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# if deploymentType is empty, or explicitly set to basic, use basic as default
{{- if or (not .Values.deploymentType) (eq .Values.deploymentType "basic") -}}
{{- range $serviceName, $serviceSpec := .Values.spec.services }}
---
apiVersion: apps/v1
Expand Down Expand Up @@ -117,3 +119,4 @@ spec:
scheme: HTTP
{{- end }}
{{- end }}
{{- end }}
120 changes: 120 additions & 0 deletions deploy/helm/chart/templates/grove-podgangset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
{{- if eq .Values.deploymentType "grove" }}
---
apiVersion: grove.io/v1alpha1
kind: PodGangSet
metadata:
name: {{ $.Release.Name }}
labels:
app: {{ $.Release.Name }}
spec:
replicas: 1
template:
cliques:
{{- range $serviceName, $serviceSpec := .Values.spec.services }}
- name: {{ $serviceName | lower }}
spec:
roleName: {{ $serviceName | lower }}
replicas: {{ $serviceSpec.replicas }}
podSpec:
{{- if $.Values.imagePullSecrets }}
imagePullSecrets:
{{ $.Values.imagePullSecrets | toYaml | nindent 12 }}
{{- end }}
containers:
- name: main
image: {{ $serviceSpec.extraPodSpec.mainContainer.image }}
{{- if $serviceSpec.resources }}
resources:
requests:
{{- if $serviceSpec.resources.cpu }}
cpu: "{{ $serviceSpec.resources.cpu }}"
{{- end }}
{{- if $serviceSpec.resources.memory }}
memory: "{{ $serviceSpec.resources.memory }}"
{{- end }}
{{- if $serviceSpec.resources.gpu }}
nvidia.com/gpu: "{{ $serviceSpec.resources.gpu }}"
{{- end }}
limits:
{{- if $serviceSpec.resources.cpu }}
cpu: "{{ $serviceSpec.resources.cpu }}"
{{- end }}
{{- if $serviceSpec.resources.memory }}
memory: "{{ $serviceSpec.resources.memory }}"
{{- end }}
{{- if $serviceSpec.resources.gpu }}
nvidia.com/gpu: "{{ $serviceSpec.resources.gpu }}"
{{- end }}
{{- end }}
workingDir: {{ $serviceSpec.extraPodSpec.mainContainer.workingDir }}
{{- if $serviceSpec.extraPodSpec.mainContainer.command }}
command:
{{- $serviceSpec.extraPodSpec.mainContainer.command | toYaml | nindent 14 }}
{{- end }}
{{- if $serviceSpec.extraPodSpec.mainContainer.args }}
args:
{{- $serviceSpec.extraPodSpec.mainContainer.args | toYaml | nindent 14 }}
{{- end }}
env:
- name: DYNAMO_PORT
value: "{{ $.Values.dynamoPort | default 8000 }}"
{{- if $.Values.etcdAddr }}
- name: ETCD_ENDPOINTS
value: "{{ $.Values.etcdAddr }}"
{{- end }}
{{- if $.Values.natsAddr }}
- name: NATS_SERVER
value: "{{ $.Values.natsAddr }}"
{{- end }}
{{- if $serviceSpec.envFromSecret }}
envFrom:
- secretRef:
name: {{ $serviceSpec.envFromSecret }}
{{- end }}
ports:
- name: health
containerPort: {{ $.Values.healthPort | default 5000 }}
livenessProbe:
{{- if $serviceSpec.livenessProbe }}
{{ $serviceSpec.livenessProbe | toYaml | nindent 14 }}
{{- else }}
initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 5
failureThreshold: 10
successThreshold: 1
httpGet:
path: /healthz
port: health
scheme: HTTP
{{- end }}
readinessProbe:
{{- if $serviceSpec.readinessProbe }}
{{ $serviceSpec.readinessProbe | toYaml | nindent 14 }}
{{- else }}
initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 5
failureThreshold: 10
successThreshold: 1
httpGet:
path: /readyz
port: health
scheme: HTTP
{{- end }}
{{- end }}
{{- end }}
Loading