diff --git a/components/backends/vllm/deploy/agg.yaml b/components/backends/vllm/deploy/agg.yaml index 1e65d9e83c..154b00d654 100644 --- a/components/backends/vllm/deploy/agg.yaml +++ b/components/backends/vllm/deploy/agg.yaml @@ -40,6 +40,9 @@ spec: mainContainer: image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4 workingDir: /workspace/components/backends/vllm + command: + - /bin/sh + - -c args: - "python3 -m dynamo.frontend --http-port 8000" VllmDecodeWorker: diff --git a/components/backends/vllm/deploy/agg_router.yaml b/components/backends/vllm/deploy/agg_router.yaml index ebe09f56d7..07ac0b4dac 100644 --- a/components/backends/vllm/deploy/agg_router.yaml +++ b/components/backends/vllm/deploy/agg_router.yaml @@ -40,6 +40,9 @@ spec: mainContainer: image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4 workingDir: /workspace/components/backends/vllm + command: + - /bin/sh + - -c args: - "python3 -m dynamo.frontend --http-port 8000 --router-mode kv" VllmDecodeWorker: diff --git a/components/backends/vllm/deploy/disagg.yaml b/components/backends/vllm/deploy/disagg.yaml index bec1b3b4b9..edd01b64a7 100644 --- a/components/backends/vllm/deploy/disagg.yaml +++ b/components/backends/vllm/deploy/disagg.yaml @@ -40,6 +40,9 @@ spec: mainContainer: image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4 workingDir: /workspace/components/backends/vllm + command: + - /bin/sh + - -c args: - "python3 -m dynamo.frontend --http-port 8000" VllmDecodeWorker: diff --git a/components/backends/vllm/deploy/disagg_planner.yaml b/components/backends/vllm/deploy/disagg_planner.yaml index 61d84f76e3..a3886256ca 100644 --- a/components/backends/vllm/deploy/disagg_planner.yaml +++ b/components/backends/vllm/deploy/disagg_planner.yaml @@ -40,6 +40,9 @@ spec: mainContainer: image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4 workingDir: /workspace/components/backends/vllm + command: + - /bin/sh + - -c args: - "python3 -m dynamo.frontend --http-port 8000" VllmDecodeWorker: @@ -79,6 +82,9 @@ spec: mainContainer: image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4 workingDir: /workspace/components/backends/vllm + command: + - /bin/sh + - -c args: - "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager 2>&1 | tee /tmp/vllm.log" VllmPrefillWorker: diff --git a/components/backends/vllm/deploy/disagg_router.yaml b/components/backends/vllm/deploy/disagg_router.yaml index bab4c70cad..c86419736d 100644 --- a/components/backends/vllm/deploy/disagg_router.yaml +++ b/components/backends/vllm/deploy/disagg_router.yaml @@ -40,6 +40,9 @@ spec: mainContainer: image: nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime:dep-216.4 workingDir: /workspace/components/backends/vllm + command: + - /bin/sh + - -c args: - "python3 -m dynamo.frontend --http-port 8000 --router-mode kv" VllmDecodeWorker: diff --git a/deploy/helm/README.md b/deploy/helm/README.md index 7c1b404108..704a11e3db 100644 --- a/deploy/helm/README.md +++ b/deploy/helm/README.md @@ -26,6 +26,7 @@ This approach allows you to install Dynamo directly using a DynamoGraphDeploymen - Kubernetes 1.16+ - ETCD v3.5+ (without auth) - NATS v2.10+ (with jetstream enabled) +- Grove v0.1.0+ (optional if deploying using Grove) ### Basic Installation @@ -33,6 +34,13 @@ Here is how you would install a VLLM inference backend example. ```bash helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud -f ./components/backends/vllm/deploy/agg.yaml + +### Installation using Grove + +Same example as above, but using Grove PodGangSet resources. + +```bash +helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud -f ./components/backends/vllm/deploy/agg.yaml --set deploymentType=grove ``` ### Customizable Properties @@ -54,6 +62,7 @@ helm upgrade --install dynamo-graph ./deploy/helm/chart -n dynamo-cloud \ | `imagePullSecrets` | Array of image pull secrets for accessing private registries | `imagePullSecrets[0].name=docker-secret-1` | | `etcdAddr` | Address of the etcd service | `dynamo-platform-etcd:2379` | | `natsAddr` | Address of the NATS messaging service | `nats://dynamo-platform-nats:4222` | +| `deploymentType` | Type of deployment to use. Can be `basic` or `grove`. If not specified, `basic` is used. | `deploymentType=grove` | diff --git a/deploy/helm/chart/templates/deployment.yaml b/deploy/helm/chart/templates/deployment.yaml index 8d7be4c6bc..51579d3254 100644 --- a/deploy/helm/chart/templates/deployment.yaml +++ b/deploy/helm/chart/templates/deployment.yaml @@ -12,6 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# if deploymentType is empty, or explicitly set to basic, use basic as default +{{- if or (not .Values.deploymentType) (eq .Values.deploymentType "basic") -}} {{- range $serviceName, $serviceSpec := .Values.spec.services }} --- apiVersion: apps/v1 @@ -117,3 +119,4 @@ spec: scheme: HTTP {{- end }} {{- end }} +{{- end }} \ No newline at end of file diff --git a/deploy/helm/chart/templates/grove-podgangset.yaml b/deploy/helm/chart/templates/grove-podgangset.yaml new file mode 100644 index 0000000000..fa5689db1b --- /dev/null +++ b/deploy/helm/chart/templates/grove-podgangset.yaml @@ -0,0 +1,120 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +{{- if eq .Values.deploymentType "grove" }} +--- +apiVersion: grove.io/v1alpha1 +kind: PodGangSet +metadata: + name: {{ $.Release.Name }} + labels: + app: {{ $.Release.Name }} +spec: + replicas: 1 + template: + cliques: + {{- range $serviceName, $serviceSpec := .Values.spec.services }} + - name: {{ $serviceName | lower }} + spec: + roleName: {{ $serviceName | lower }} + replicas: {{ $serviceSpec.replicas }} + podSpec: + {{- if $.Values.imagePullSecrets }} + imagePullSecrets: + {{ $.Values.imagePullSecrets | toYaml | nindent 12 }} + {{- end }} + containers: + - name: main + image: {{ $serviceSpec.extraPodSpec.mainContainer.image }} + {{- if $serviceSpec.resources }} + resources: + requests: + {{- if $serviceSpec.resources.cpu }} + cpu: "{{ $serviceSpec.resources.cpu }}" + {{- end }} + {{- if $serviceSpec.resources.memory }} + memory: "{{ $serviceSpec.resources.memory }}" + {{- end }} + {{- if $serviceSpec.resources.gpu }} + nvidia.com/gpu: "{{ $serviceSpec.resources.gpu }}" + {{- end }} + limits: + {{- if $serviceSpec.resources.cpu }} + cpu: "{{ $serviceSpec.resources.cpu }}" + {{- end }} + {{- if $serviceSpec.resources.memory }} + memory: "{{ $serviceSpec.resources.memory }}" + {{- end }} + {{- if $serviceSpec.resources.gpu }} + nvidia.com/gpu: "{{ $serviceSpec.resources.gpu }}" + {{- end }} + {{- end }} + workingDir: {{ $serviceSpec.extraPodSpec.mainContainer.workingDir }} + {{- if $serviceSpec.extraPodSpec.mainContainer.command }} + command: + {{- $serviceSpec.extraPodSpec.mainContainer.command | toYaml | nindent 14 }} + {{- end }} + {{- if $serviceSpec.extraPodSpec.mainContainer.args }} + args: + {{- $serviceSpec.extraPodSpec.mainContainer.args | toYaml | nindent 14 }} + {{- end }} + env: + - name: DYNAMO_PORT + value: "{{ $.Values.dynamoPort | default 8000 }}" + {{- if $.Values.etcdAddr }} + - name: ETCD_ENDPOINTS + value: "{{ $.Values.etcdAddr }}" + {{- end }} + {{- if $.Values.natsAddr }} + - name: NATS_SERVER + value: "{{ $.Values.natsAddr }}" + {{- end }} + {{- if $serviceSpec.envFromSecret }} + envFrom: + - secretRef: + name: {{ $serviceSpec.envFromSecret }} + {{- end }} + ports: + - name: health + containerPort: {{ $.Values.healthPort | default 5000 }} + livenessProbe: + {{- if $serviceSpec.livenessProbe }} + {{ $serviceSpec.livenessProbe | toYaml | nindent 14 }} + {{- else }} + initialDelaySeconds: 60 + periodSeconds: 60 + timeoutSeconds: 5 + failureThreshold: 10 + successThreshold: 1 + httpGet: + path: /healthz + port: health + scheme: HTTP + {{- end }} + readinessProbe: + {{- if $serviceSpec.readinessProbe }} + {{ $serviceSpec.readinessProbe | toYaml | nindent 14 }} + {{- else }} + initialDelaySeconds: 60 + periodSeconds: 60 + timeoutSeconds: 5 + failureThreshold: 10 + successThreshold: 1 + httpGet: + path: /readyz + port: health + scheme: HTTP + {{- end }} + {{- end }} +{{- end }}