ai-dynamo
diff --git a/‎components/backends/sglang/deploy/disagg-multinode.yaml‎
Lines changed: 109 additions & 0 deletions b/‎components/backends/sglang/deploy/disagg-multinode.yaml‎
Lines changed: 109 additions & 0 deletions
diff --git a/‎deploy/cloud/helm/crds/templates/nvidia.com_dynamocomponentdeployments.yaml‎
Lines changed: 21 additions & 8 deletions b/‎deploy/cloud/helm/crds/templates/nvidia.com_dynamocomponentdeployments.yaml‎
Lines changed: 21 additions & 8 deletions
diff --git a/‎deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeployments.yaml‎
Lines changed: 21 additions & 8 deletions b/‎deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeployments.yaml‎
Lines changed: 21 additions & 8 deletions
diff --git a/‎deploy/cloud/operator/Makefile‎
Lines changed: 3 additions & 1 deletion b/‎deploy/cloud/operator/Makefile‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎deploy/cloud/operator/api/dynamo/common/common.go‎
Lines changed: 3 additions & 4 deletions b/‎deploy/cloud/operator/api/dynamo/common/common.go‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go‎
Lines changed: 26 additions & 0 deletions b/‎deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎deploy/cloud/operator/api/v1alpha1/zz_generated.deepcopy.go‎
Lines changed: 20 additions & 0 deletions b/‎deploy/cloud/operator/api/v1alpha1/zz_generated.deepcopy.go‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎deploy/cloud/operator/config/crd/bases/nvidia.com_dynamocomponentdeployments.yaml‎
Lines changed: 21 additions & 8 deletions b/‎deploy/cloud/operator/config/crd/bases/nvidia.com_dynamocomponentdeployments.yaml‎
Lines changed: 21 additions & 8 deletions
@@ -0,0 +1,109 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: nvidia.com/v1alpha1
+kind: DynamoGraphDeployment
+metadata:
+  name: sglang-disagg-multinode
+spec:
+  envs:
+  - name: HF_TOKEN
+    valueFrom:
+      secretKeyRef:
+        name: hf-token-secret
+        key: HF_TOKEN
+  - name: GLOO_SOCKET_IFNAME
+    value: "eth0"
+  backendFramework: sglang
+  services:
+    Frontend:
+      dynamoNamespace: sglang-disagg-multinode
+      componentType: main
+      replicas: 1
+      extraPodSpec:
+        mainContainer:
+          image: my-registry/sglang-runtime:my-tag
+          workingDir: /workspace/components/backends/sglang
+          command: ["sh", "-c"]
+          args:
+            - "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-disagg-multinode && python3 -m dynamo.frontend --http-port=8000"
+    decode:
+      multinode:
+        nodeCount: 2
+      envFromSecret: hf-token-secret
+      dynamoNamespace: sglang-disagg-multinode
+      componentType: worker
+      replicas: 1
+      resources:
+        requests:
+          cpu: "10"
+          memory: "40Gi"
+        limits:
+          cpu: "10"
+          memory: "40Gi"
+          gpu: "4"
+      extraPodSpec:
+        mainContainer:
+          image: my-registry/sglang-runtime:my-tag
+          workingDir: /workspace/components/backends/sglang
+          command: ["sh", "-c"]
+          args:
+            - "python3"
+            - "-m"
+            - "dynamo.sglang.decode_worker"
+            - "--model-path"
+            - "meta-llama/Llama-3.3-70B-Instruct"
+            - "--served-model-name"
+            - "meta-llama/Llama-3.3-70B-Instruct"
+            - "--tp-size"
+            - "8"
+            - "--trust-remote-code"
+            - "--skip-tokenizer-init"
+            - "--disaggregation-mode"
+            - "decode"
+            - "--disaggregation-transfer-backend"
+            - "nixl"
+            - "--disaggregation-bootstrap-port"
+            - "30001"
+            - "--mem-fraction-static"
+            - "0.82"
+    prefill:
+      multinode:
+        nodeCount: 2
+      envFromSecret: hf-token-secret
+      dynamoNamespace: sglang-disagg
+      componentType: worker
+      replicas: 1
+      resources:
+        requests:
+          cpu: "10"
+          memory: "40Gi"
+        limits:
+          cpu: "10"
+          memory: "40Gi"
+          gpu: "4"
+      extraPodSpec:
+        mainContainer:
+          image: my-registry/sglang-runtime:my-tag
+          workingDir: /workspace/components/backends/sglang
+          command: ["sh", "-c"]
+          args:
+            - "python3"
+            - "-m"
+            - "dynamo.sglang.worker"
+            - "--model-path"
+            - "meta-llama/Llama-3.3-70B-Instruct"
+            - "--served-model-name"
+            - "meta-llama/Llama-3.3-70B-Instruct"
+            - "--tp-size"
+            - "8"
+            - "--trust-remote-code"
+            - "--skip-tokenizer-init"
+            - "--disaggregation-mode"
+            - "prefill"
+            - "--disaggregation-transfer-backend"
+            - "nixl"
+            - "--disaggregation-bootstrap-port"
+            - "30001"
+            - "--mem-fraction-static"
+            - "0.82"
@@ -10009,6 +10009,21 @@ spec:
                       format: int32
                       type: integer
                   type: object
+                multinode:
+                  description: Multinode is the configuration for multinode components.
+                  properties:
+                    nodeCount:
+                      default: 2
+                      description: |-
+                        Indicates the number of nodes to deploy for multinode components.
+                        Total number of GPUs is NumberOfNodes * GPU limit.
+                        Must be greater than 1.
+                      format: int32
+                      minimum: 2
+                      type: integer
+                  required:
+                    - nodeCount
+                  type: object
                 pvc:
                   description: PVC config describing volumes to be mounted by the component.
                   properties:
@@ -10199,13 +10214,12 @@ spec:
                             type: string
                           type: object
                         gpu:
-                          description: GPU is the number of GPUs to request per node.
+                          description: |-
+                            Indicates the number of GPUs to request.
+                            total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
                           type: string
                         memory:
                           type: string
-                        nodes:
-                          description: Nodes is the number of nodes to request. Total number of GPUs will be GPU * Nodes.
-                          type: string
                       type: object
                     requests:
                       properties:
@@ -10216,13 +10230,12 @@ spec:
                             type: string
                           type: object
                         gpu:
-                          description: GPU is the number of GPUs to request per node.
+                          description: |-
+                            Indicates the number of GPUs to request.
+                            total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
                           type: string
                         memory:
                           type: string
-                        nodes:
-                          description: Nodes is the number of nodes to request. Total number of GPUs will be GPU * Nodes.
-                          type: string
                       type: object
                   type: object
                 serviceName:
 
@@ -10108,6 +10108,21 @@ spec:
                             format: int32
                             type: integer
                         type: object
+                      multinode:
+                        description: Multinode is the configuration for multinode components.
+                        properties:
+                          nodeCount:
+                            default: 2
+                            description: |-
+                              Indicates the number of nodes to deploy for multinode components.
+                              Total number of GPUs is NumberOfNodes * GPU limit.
+                              Must be greater than 1.
+                            format: int32
+                            minimum: 2
+                            type: integer
+                        required:
+                          - nodeCount
+                        type: object
                       pvc:
                         description: PVC config describing volumes to be mounted by the component.
                         properties:
@@ -10298,13 +10313,12 @@ spec:
                                   type: string
                                 type: object
                               gpu:
-                                description: GPU is the number of GPUs to request per node.
+                                description: |-
+                                  Indicates the number of GPUs to request.
+                                  total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
                                 type: string
                               memory:
                                 type: string
-                              nodes:
-                                description: Nodes is the number of nodes to request. Total number of GPUs will be GPU * Nodes.
-                                type: string
                             type: object
                           requests:
                             properties:
@@ -10315,13 +10329,12 @@ spec:
                                   type: string
                                 type: object
                               gpu:
-                                description: GPU is the number of GPUs to request per node.
+                                description: |-
+                                  Indicates the number of GPUs to request.
+                                  total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
                                 type: string
                               memory:
                                 type: string
-                              nodes:
-                                description: Nodes is the number of nodes to request. Total number of GPUs will be GPU * Nodes.
-                                type: string
                             type: object
                         type: object
                       serviceName:
 
@@ -97,7 +97,9 @@ manifests: controller-gen ensure-yq ## Generate WebhookConfiguration, ClusterRol
 			yq eval '.metadata.annotations."helm.sh/resource-policy" = "keep"' -i "$$file"; \
 		fi; \
 	done
-	cp config/crd/bases/*.yaml ../helm/crds/templates/
+	if [ -d "../helm/crds/templates/" ]; then \
+		cp config/crd/bases/*.yaml ../helm/crds/templates/; \
+	fi
 
 .PHONY: generate
 generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
 
@@ -25,10 +25,9 @@ import (
 type ResourceItem struct {
 	CPU    string `json:"cpu,omitempty"`
 	Memory string `json:"memory,omitempty"`
-	// GPU is the number of GPUs to request per node.
-	GPU string `json:"gpu,omitempty"`
-	// Nodes is the number of nodes to request. Total number of GPUs will be GPU * Nodes.
-	Nodes  string            `json:"nodes,omitempty"`
+	// Indicates the number of GPUs to request.
+	// total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
+	GPU    string            `json:"gpu,omitempty"`
 	Custom map[string]string `json:"custom,omitempty"`
 }
 
 
@@ -106,6 +106,17 @@ type DynamoComponentDeploymentSharedSpec struct {
 	ReadinessProbe *corev1.Probe `json:"readinessProbe,omitempty"`
 	// Replicas is the desired number of Pods for this component when autoscaling is not used.
 	Replicas *int32 `json:"replicas,omitempty"`
+	// Multinode is the configuration for multinode components.
+	Multinode *MultinodeSpec `json:"multinode,omitempty"`
+}
+
+type MultinodeSpec struct {
+	// +kubebuilder:default=2
+	// Indicates the number of nodes to deploy for multinode components.
+	// Total number of GPUs is NumberOfNodes * GPU limit.
+	// Must be greater than 1.
+	// +kubebuilder:validation:Minimum=2
+	NodeCount int32 `json:"nodeCount"`
 }
 
 type IngressTLSSpec struct {
@@ -234,3 +245,18 @@ func (s *DynamoComponentDeployment) SetDynamoDeploymentConfig(config []byte) {
 		Value: string(config),
 	})
 }
+
+func (s *DynamoComponentDeployment) IsMultinode() bool {
+	return s.GetNumberOfNodes() > 1
+}
+
+func (s *DynamoComponentDeployment) GetNumberOfNodes() int32 {
+	return s.Spec.GetNumberOfNodes()
+}
+
+func (s *DynamoComponentDeploymentSharedSpec) GetNumberOfNodes() int32 {
+	if s.Multinode != nil {
+		return s.Multinode.NodeCount
+	}
+	return 1
+}
@@ -10009,6 +10009,21 @@ spec:
                       format: int32
                       type: integer
                   type: object
+                multinode:
+                  description: Multinode is the configuration for multinode components.
+                  properties:
+                    nodeCount:
+                      default: 2
+                      description: |-
+                        Indicates the number of nodes to deploy for multinode components.
+                        Total number of GPUs is NumberOfNodes * GPU limit.
+                        Must be greater than 1.
+                      format: int32
+                      minimum: 2
+                      type: integer
+                  required:
+                    - nodeCount
+                  type: object
                 pvc:
                   description: PVC config describing volumes to be mounted by the component.
                   properties:
@@ -10199,13 +10214,12 @@ spec:
                             type: string
                           type: object
                         gpu:
-                          description: GPU is the number of GPUs to request per node.
+                          description: |-
+                            Indicates the number of GPUs to request.
+                            total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
                           type: string
                         memory:
                           type: string
-                        nodes:
-                          description: Nodes is the number of nodes to request. Total number of GPUs will be GPU * Nodes.
-                          type: string
                       type: object
                     requests:
                       properties:
@@ -10216,13 +10230,12 @@ spec:
                             type: string
                           type: object
                         gpu:
-                          description: GPU is the number of GPUs to request per node.
+                          description: |-
+                            Indicates the number of GPUs to request.
+                            total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
                           type: string
                         memory:
                           type: string
-                        nodes:
-                          description: Nodes is the number of nodes to request. Total number of GPUs will be GPU * Nodes.
-                          type: string
                       type: object
                   type: object
                 serviceName: