Skip to content

Commit 63d43ca

Browse files
julienmancusohhzhang16
authored andcommitted
feat: extract deploymentType as interface (#2405)
Signed-off-by: Hannah Zhang <hannahz@nvidia.com>
1 parent 6ac8aaf commit 63d43ca

23 files changed

+977
-850
lines changed
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: nvidia.com/v1alpha1
5+
kind: DynamoGraphDeployment
6+
metadata:
7+
name: sglang-disagg-multinode
8+
spec:
9+
envs:
10+
- name: HF_TOKEN
11+
valueFrom:
12+
secretKeyRef:
13+
name: hf-token-secret
14+
key: HF_TOKEN
15+
- name: GLOO_SOCKET_IFNAME
16+
value: "eth0"
17+
backendFramework: sglang
18+
services:
19+
Frontend:
20+
dynamoNamespace: sglang-disagg-multinode
21+
componentType: main
22+
replicas: 1
23+
extraPodSpec:
24+
mainContainer:
25+
image: my-registry/sglang-runtime:my-tag
26+
workingDir: /workspace/components/backends/sglang
27+
command: ["sh", "-c"]
28+
args:
29+
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-disagg-multinode && python3 -m dynamo.frontend --http-port=8000"
30+
decode:
31+
multinode:
32+
nodeCount: 2
33+
envFromSecret: hf-token-secret
34+
dynamoNamespace: sglang-disagg-multinode
35+
componentType: worker
36+
replicas: 1
37+
resources:
38+
requests:
39+
cpu: "10"
40+
memory: "40Gi"
41+
limits:
42+
cpu: "10"
43+
memory: "40Gi"
44+
gpu: "4"
45+
extraPodSpec:
46+
mainContainer:
47+
image: my-registry/sglang-runtime:my-tag
48+
workingDir: /workspace/components/backends/sglang
49+
command: ["sh", "-c"]
50+
args:
51+
- "python3"
52+
- "-m"
53+
- "dynamo.sglang.decode_worker"
54+
- "--model-path"
55+
- "meta-llama/Llama-3.3-70B-Instruct"
56+
- "--served-model-name"
57+
- "meta-llama/Llama-3.3-70B-Instruct"
58+
- "--tp-size"
59+
- "8"
60+
- "--trust-remote-code"
61+
- "--skip-tokenizer-init"
62+
- "--disaggregation-mode"
63+
- "decode"
64+
- "--disaggregation-transfer-backend"
65+
- "nixl"
66+
- "--disaggregation-bootstrap-port"
67+
- "30001"
68+
- "--mem-fraction-static"
69+
- "0.82"
70+
prefill:
71+
multinode:
72+
nodeCount: 2
73+
envFromSecret: hf-token-secret
74+
dynamoNamespace: sglang-disagg
75+
componentType: worker
76+
replicas: 1
77+
resources:
78+
requests:
79+
cpu: "10"
80+
memory: "40Gi"
81+
limits:
82+
cpu: "10"
83+
memory: "40Gi"
84+
gpu: "4"
85+
extraPodSpec:
86+
mainContainer:
87+
image: my-registry/sglang-runtime:my-tag
88+
workingDir: /workspace/components/backends/sglang
89+
command: ["sh", "-c"]
90+
args:
91+
- "python3"
92+
- "-m"
93+
- "dynamo.sglang.worker"
94+
- "--model-path"
95+
- "meta-llama/Llama-3.3-70B-Instruct"
96+
- "--served-model-name"
97+
- "meta-llama/Llama-3.3-70B-Instruct"
98+
- "--tp-size"
99+
- "8"
100+
- "--trust-remote-code"
101+
- "--skip-tokenizer-init"
102+
- "--disaggregation-mode"
103+
- "prefill"
104+
- "--disaggregation-transfer-backend"
105+
- "nixl"
106+
- "--disaggregation-bootstrap-port"
107+
- "30001"
108+
- "--mem-fraction-static"
109+
- "0.82"

deploy/cloud/helm/crds/templates/nvidia.com_dynamocomponentdeployments.yaml

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10009,6 +10009,21 @@ spec:
1000910009
format: int32
1001010010
type: integer
1001110011
type: object
10012+
multinode:
10013+
description: Multinode is the configuration for multinode components.
10014+
properties:
10015+
nodeCount:
10016+
default: 2
10017+
description: |-
10018+
Indicates the number of nodes to deploy for multinode components.
10019+
Total number of GPUs is NumberOfNodes * GPU limit.
10020+
Must be greater than 1.
10021+
format: int32
10022+
minimum: 2
10023+
type: integer
10024+
required:
10025+
- nodeCount
10026+
type: object
1001210027
pvc:
1001310028
description: PVC config describing volumes to be mounted by the component.
1001410029
properties:
@@ -10199,13 +10214,12 @@ spec:
1019910214
type: string
1020010215
type: object
1020110216
gpu:
10202-
description: GPU is the number of GPUs to request per node.
10217+
description: |-
10218+
Indicates the number of GPUs to request.
10219+
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
1020310220
type: string
1020410221
memory:
1020510222
type: string
10206-
nodes:
10207-
description: Nodes is the number of nodes to request. Total number of GPUs will be GPU * Nodes.
10208-
type: string
1020910223
type: object
1021010224
requests:
1021110225
properties:
@@ -10216,13 +10230,12 @@ spec:
1021610230
type: string
1021710231
type: object
1021810232
gpu:
10219-
description: GPU is the number of GPUs to request per node.
10233+
description: |-
10234+
Indicates the number of GPUs to request.
10235+
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
1022010236
type: string
1022110237
memory:
1022210238
type: string
10223-
nodes:
10224-
description: Nodes is the number of nodes to request. Total number of GPUs will be GPU * Nodes.
10225-
type: string
1022610239
type: object
1022710240
type: object
1022810241
serviceName:

deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeployments.yaml

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10108,6 +10108,21 @@ spec:
1010810108
format: int32
1010910109
type: integer
1011010110
type: object
10111+
multinode:
10112+
description: Multinode is the configuration for multinode components.
10113+
properties:
10114+
nodeCount:
10115+
default: 2
10116+
description: |-
10117+
Indicates the number of nodes to deploy for multinode components.
10118+
Total number of GPUs is NumberOfNodes * GPU limit.
10119+
Must be greater than 1.
10120+
format: int32
10121+
minimum: 2
10122+
type: integer
10123+
required:
10124+
- nodeCount
10125+
type: object
1011110126
pvc:
1011210127
description: PVC config describing volumes to be mounted by the component.
1011310128
properties:
@@ -10298,13 +10313,12 @@ spec:
1029810313
type: string
1029910314
type: object
1030010315
gpu:
10301-
description: GPU is the number of GPUs to request per node.
10316+
description: |-
10317+
Indicates the number of GPUs to request.
10318+
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
1030210319
type: string
1030310320
memory:
1030410321
type: string
10305-
nodes:
10306-
description: Nodes is the number of nodes to request. Total number of GPUs will be GPU * Nodes.
10307-
type: string
1030810322
type: object
1030910323
requests:
1031010324
properties:
@@ -10315,13 +10329,12 @@ spec:
1031510329
type: string
1031610330
type: object
1031710331
gpu:
10318-
description: GPU is the number of GPUs to request per node.
10332+
description: |-
10333+
Indicates the number of GPUs to request.
10334+
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
1031910335
type: string
1032010336
memory:
1032110337
type: string
10322-
nodes:
10323-
description: Nodes is the number of nodes to request. Total number of GPUs will be GPU * Nodes.
10324-
type: string
1032510338
type: object
1032610339
type: object
1032710340
serviceName:

deploy/cloud/operator/Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,9 @@ manifests: controller-gen ensure-yq ## Generate WebhookConfiguration, ClusterRol
9797
yq eval '.metadata.annotations."helm.sh/resource-policy" = "keep"' -i "$$file"; \
9898
fi; \
9999
done
100-
cp config/crd/bases/*.yaml ../helm/crds/templates/
100+
if [ -d "../helm/crds/templates/" ]; then \
101+
cp config/crd/bases/*.yaml ../helm/crds/templates/; \
102+
fi
101103

102104
.PHONY: generate
103105
generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.

deploy/cloud/operator/api/dynamo/common/common.go

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,9 @@ import (
2525
type ResourceItem struct {
2626
CPU string `json:"cpu,omitempty"`
2727
Memory string `json:"memory,omitempty"`
28-
// GPU is the number of GPUs to request per node.
29-
GPU string `json:"gpu,omitempty"`
30-
// Nodes is the number of nodes to request. Total number of GPUs will be GPU * Nodes.
31-
Nodes string `json:"nodes,omitempty"`
28+
// Indicates the number of GPUs to request.
29+
// total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
30+
GPU string `json:"gpu,omitempty"`
3231
Custom map[string]string `json:"custom,omitempty"`
3332
}
3433

deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,17 @@ type DynamoComponentDeploymentSharedSpec struct {
106106
ReadinessProbe *corev1.Probe `json:"readinessProbe,omitempty"`
107107
// Replicas is the desired number of Pods for this component when autoscaling is not used.
108108
Replicas *int32 `json:"replicas,omitempty"`
109+
// Multinode is the configuration for multinode components.
110+
Multinode *MultinodeSpec `json:"multinode,omitempty"`
111+
}
112+
113+
type MultinodeSpec struct {
114+
// +kubebuilder:default=2
115+
// Indicates the number of nodes to deploy for multinode components.
116+
// Total number of GPUs is NumberOfNodes * GPU limit.
117+
// Must be greater than 1.
118+
// +kubebuilder:validation:Minimum=2
119+
NodeCount int32 `json:"nodeCount"`
109120
}
110121

111122
type IngressTLSSpec struct {
@@ -234,3 +245,18 @@ func (s *DynamoComponentDeployment) SetDynamoDeploymentConfig(config []byte) {
234245
Value: string(config),
235246
})
236247
}
248+
249+
func (s *DynamoComponentDeployment) IsMultinode() bool {
250+
return s.GetNumberOfNodes() > 1
251+
}
252+
253+
func (s *DynamoComponentDeployment) GetNumberOfNodes() int32 {
254+
return s.Spec.GetNumberOfNodes()
255+
}
256+
257+
func (s *DynamoComponentDeploymentSharedSpec) GetNumberOfNodes() int32 {
258+
if s.Multinode != nil {
259+
return s.Multinode.NodeCount
260+
}
261+
return 1
262+
}

deploy/cloud/operator/api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 20 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

deploy/cloud/operator/config/crd/bases/nvidia.com_dynamocomponentdeployments.yaml

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10009,6 +10009,21 @@ spec:
1000910009
format: int32
1001010010
type: integer
1001110011
type: object
10012+
multinode:
10013+
description: Multinode is the configuration for multinode components.
10014+
properties:
10015+
nodeCount:
10016+
default: 2
10017+
description: |-
10018+
Indicates the number of nodes to deploy for multinode components.
10019+
Total number of GPUs is NumberOfNodes * GPU limit.
10020+
Must be greater than 1.
10021+
format: int32
10022+
minimum: 2
10023+
type: integer
10024+
required:
10025+
- nodeCount
10026+
type: object
1001210027
pvc:
1001310028
description: PVC config describing volumes to be mounted by the component.
1001410029
properties:
@@ -10199,13 +10214,12 @@ spec:
1019910214
type: string
1020010215
type: object
1020110216
gpu:
10202-
description: GPU is the number of GPUs to request per node.
10217+
description: |-
10218+
Indicates the number of GPUs to request.
10219+
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
1020310220
type: string
1020410221
memory:
1020510222
type: string
10206-
nodes:
10207-
description: Nodes is the number of nodes to request. Total number of GPUs will be GPU * Nodes.
10208-
type: string
1020910223
type: object
1021010224
requests:
1021110225
properties:
@@ -10216,13 +10230,12 @@ spec:
1021610230
type: string
1021710231
type: object
1021810232
gpu:
10219-
description: GPU is the number of GPUs to request per node.
10233+
description: |-
10234+
Indicates the number of GPUs to request.
10235+
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
1022010236
type: string
1022110237
memory:
1022210238
type: string
10223-
nodes:
10224-
description: Nodes is the number of nodes to request. Total number of GPUs will be GPU * Nodes.
10225-
type: string
1022610239
type: object
1022710240
type: object
1022810241
serviceName:

0 commit comments

Comments
 (0)