@@ -25,21 +25,65 @@ import (
25
25
26
26
// GPUNodeSpec defines the desired state of GPUNode.
27
27
type GPUNodeSpec struct {
28
- // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
29
- // Important: Run "make" to regenerate code after modifying this file
28
+ ManageMode GPUNodeManageMode `json:"manageMode,omitempty"`
30
29
31
- // Foo is an example field of GPUNode. Edit gpunode_types.go to remove/update
32
- Foo string `json:"foo,omitempty"`
30
+ // if not all GPU cards should be used, specify the GPU card indices, default to empty,
31
+ // onboard all GPU cards to the pool
32
+ GPUCardIndices []int `json:"gpuCardIndices,omitempty"`
33
33
}
34
34
35
+ type GPUNodeManageMode string
36
+
37
+ const (
38
+ GPUNodeManageModeNone GPUNodeManageMode = "manual"
39
+ GPUNodeManageModeAuto GPUNodeManageMode = "selected"
40
+ GPUNodeManageModeManual GPUNodeManageMode = "provisioned"
41
+ )
42
+
35
43
// GPUNodeStatus defines the observed state of GPUNode.
36
44
type GPUNodeStatus struct {
37
- // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
38
- // Important: Run "make" to regenerate code after modifying this file
45
+ Phase TensorFusionClusterPhase `json:"phase,omitempty"`
46
+
47
+ Conditions []metav1.Condition `json:"conditions,omitempty"`
48
+
49
+ TotalTFlops int32 `json:"totalTFlops,omitempty"`
50
+ TotalVRAM string `json:"totalVRAM,omitempty"`
51
+
52
+ AvailableTFlops int32 `json:"availableTFlops,omitempty"`
53
+ AvailableVRAM string `json:"availableVRAM,omitempty"`
54
+
55
+ HypervisorStatus NodeHypervisorStatus `json:"hypervisorStatus,omitempty"`
56
+
57
+ NodeInfo GPUNodeInfo `json:"nodeInfo,omitempty"`
58
+
59
+ LoadedModels []string `json:"loadedModels,omitempty"`
60
+
61
+ TotalGPUs int32 `json:"totalGPUs,omitempty"`
62
+ ManagedGPUs int32 `json:"managedGPUs,omitempty"`
63
+ ManagedGPUResourceIDs []string `json:"managedGPUResourceIDs,omitempty"`
64
+ }
65
+
66
+ type GPUNodeInfo struct {
67
+ Hostname string `json:"hostname,omitempty"`
68
+ IP string `json:"ip,omitempty"`
69
+ KernalVersion string `json:"kernalVersion,omitempty"`
70
+ OSImage string `json:"osImage,omitempty"`
71
+ GPUDriverVersion string `json:"gpuDriverVersion,omitempty"`
72
+ GPUModel string `json:"gpuModel,omitempty"`
73
+ GPUCount int32 `json:"gpuCount,omitempty"`
74
+ OperatingSystem string `json:"operatingSystem,omitempty"`
75
+ Architecture string `json:"architecture,omitempty"`
76
+ }
77
+
78
+ type NodeHypervisorStatus struct {
79
+ HypervisorState string `json:"hypervisorState,omitempty"`
80
+ HypervisorVersion string `json:"hypervisorVersion,omitempty"`
81
+ LastHeartbeatTime metav1.Time `json:"lastHeartbeatTime,omitempty"`
39
82
}
40
83
41
84
// +kubebuilder:object:root=true
42
85
// +kubebuilder:subresource:status
86
+ // +kubebuilder:resource:scope=Cluster
43
87
44
88
// GPUNode is the Schema for the gpunodes API.
45
89
type GPUNode struct {
0 commit comments