Skip to content

Commit b79b3da

Browse files
Code2Life0x5457dependabot[bot]
authored
feat: init cluster manager and node manager types (#6)
* feat: init cluster manager and node manager types * fix: nested type and raw object issue * fix: typing enhancement * feat: fix webhook bug, create tfconn in pod controller (#7) * chore(deps): bump golang.org/x/crypto from 0.24.0 to 0.31.0 (#9) Bumps [golang.org/x/crypto](https://github.com/golang/crypto) from 0.24.0 to 0.31.0. - [Commits](golang/crypto@v0.24.0...v0.31.0) --- updated-dependencies: - dependency-name: golang.org/x/crypto dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * fix: typing enhancement * fix: go lint issues --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: 0x5457 <0x5457@protonmail.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
1 parent 6edbf18 commit b79b3da

28 files changed

+4143
-202
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,5 @@ go.work
2525
*.swp
2626
*.swo
2727
*~
28+
29+
.DS_Store

PROJECT

+8
Original file line numberDiff line numberDiff line change
@@ -63,4 +63,12 @@ resources:
6363
kind: GPUNodeClass
6464
path: github.com/NexusGPU/tensor-fusion-operator/api/v1
6565
version: v1
66+
- api:
67+
crdVersion: v1
68+
namespaced: true
69+
controller: true
70+
domain: tensor-fusion.ai
71+
kind: SchedulingConfigTemplate
72+
path: github.com/NexusGPU/tensor-fusion-operator/api/v1
73+
version: v1
6674
version: "3"

README.md

+7
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@
1212
- kubectl version v1.11.3+.
1313
- Access to a Kubernetes v1.11.3+ cluster.
1414

15+
### Add new API
16+
17+
```bash
18+
kubebuilder create api --group "" --version v1 --kind SchedulingConfigTemplate --namespaced false
19+
```
20+
1521
### To Deploy on the cluster
1622
**Build and push your image to the location specified by `IMG`:**
1723

@@ -112,3 +118,4 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
112118
See the License for the specific language governing permissions and
113119
limitations under the License.
114120

121+

api/v1/base_types.go

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
package v1
2+
3+
type NameNamespace struct {
4+
Name string `json:"name,omitempty"`
5+
Namespace string `json:"namespace,omitempty"`
6+
}

api/v1/gpunode_types.go

+50-6
Original file line numberDiff line numberDiff line change
@@ -25,21 +25,65 @@ import (
2525

2626
// GPUNodeSpec defines the desired state of GPUNode.
2727
type GPUNodeSpec struct {
28-
// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
29-
// Important: Run "make" to regenerate code after modifying this file
28+
ManageMode GPUNodeManageMode `json:"manageMode,omitempty"`
3029

31-
// Foo is an example field of GPUNode. Edit gpunode_types.go to remove/update
32-
Foo string `json:"foo,omitempty"`
30+
// if not all GPU cards should be used, specify the GPU card indices, default to empty,
31+
// onboard all GPU cards to the pool
32+
GPUCardIndices []int `json:"gpuCardIndices,omitempty"`
3333
}
3434

35+
type GPUNodeManageMode string
36+
37+
const (
38+
GPUNodeManageModeNone GPUNodeManageMode = "manual"
39+
GPUNodeManageModeAuto GPUNodeManageMode = "selected"
40+
GPUNodeManageModeManual GPUNodeManageMode = "provisioned"
41+
)
42+
3543
// GPUNodeStatus defines the observed state of GPUNode.
3644
type GPUNodeStatus struct {
37-
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
38-
// Important: Run "make" to regenerate code after modifying this file
45+
Phase TensorFusionClusterPhase `json:"phase,omitempty"`
46+
47+
Conditions []metav1.Condition `json:"conditions,omitempty"`
48+
49+
TotalTFlops int32 `json:"totalTFlops,omitempty"`
50+
TotalVRAM string `json:"totalVRAM,omitempty"`
51+
52+
AvailableTFlops int32 `json:"availableTFlops,omitempty"`
53+
AvailableVRAM string `json:"availableVRAM,omitempty"`
54+
55+
HypervisorStatus NodeHypervisorStatus `json:"hypervisorStatus,omitempty"`
56+
57+
NodeInfo GPUNodeInfo `json:"nodeInfo,omitempty"`
58+
59+
LoadedModels []string `json:"loadedModels,omitempty"`
60+
61+
TotalGPUs int32 `json:"totalGPUs,omitempty"`
62+
ManagedGPUs int32 `json:"managedGPUs,omitempty"`
63+
ManagedGPUResourceIDs []string `json:"managedGPUResourceIDs,omitempty"`
64+
}
65+
66+
type GPUNodeInfo struct {
67+
Hostname string `json:"hostname,omitempty"`
68+
IP string `json:"ip,omitempty"`
69+
KernalVersion string `json:"kernalVersion,omitempty"`
70+
OSImage string `json:"osImage,omitempty"`
71+
GPUDriverVersion string `json:"gpuDriverVersion,omitempty"`
72+
GPUModel string `json:"gpuModel,omitempty"`
73+
GPUCount int32 `json:"gpuCount,omitempty"`
74+
OperatingSystem string `json:"operatingSystem,omitempty"`
75+
Architecture string `json:"architecture,omitempty"`
76+
}
77+
78+
type NodeHypervisorStatus struct {
79+
HypervisorState string `json:"hypervisorState,omitempty"`
80+
HypervisorVersion string `json:"hypervisorVersion,omitempty"`
81+
LastHeartbeatTime metav1.Time `json:"lastHeartbeatTime,omitempty"`
3982
}
4083

4184
// +kubebuilder:object:root=true
4285
// +kubebuilder:subresource:status
86+
// +kubebuilder:resource:scope=Cluster
4387

4488
// GPUNode is the Schema for the gpunodes API.
4589
type GPUNode struct {

api/v1/gpunodeclass_types.go

+45-6
Original file line numberDiff line numberDiff line change
@@ -25,21 +25,60 @@ import (
2525

2626
// GPUNodeClassSpec defines the desired state of GPUNodeClass.
2727
type GPUNodeClassSpec struct {
28-
// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
29-
// Important: Run "make" to regenerate code after modifying this file
28+
OSImageFamily string `json:"osImageFamily,omitempty"` // The AMI family to use
3029

31-
// Foo is an example field of GPUNodeClass. Edit gpunodeclass_types.go to remove/update
32-
Foo string `json:"foo,omitempty"`
30+
OSImageSelectorTerms []NodeClassOSImageSelectorTerms `json:"osImageSelectorTerms,omitempty"`
31+
32+
BlockDeviceMappings []NodeClassBlockDeviceMappings `json:"blockDeviceMappings,omitempty"` // Block device mappings for the instance
33+
34+
InstanceProfile string `json:"instanceProfile,omitempty"` // The instance profile to use
35+
36+
MetadataOptions NodeClassMetadataOptions `json:"metadataOptions,omitempty"`
37+
38+
SecurityGroupSelectorTerms []NodeClassItemIDSelectorTerms `json:"securityGroupSelectorTerms,omitempty"`
39+
40+
SubnetSelectorTerms []NodeClassItemIDSelectorTerms `json:"subnetSelectorTerms,omitempty"` // Terms to select subnets
41+
42+
Tags map[string]string `json:"tags,omitempty"` // Tags associated with the resource
43+
44+
UserData string `json:"userData,omitempty"` // User data script for the instance
45+
}
46+
47+
type NodeClassItemIDSelectorTerms struct {
48+
ID string `json:"id,omitempty"` // The ID of the security group
49+
}
50+
51+
type NodeClassMetadataOptions struct {
52+
HttpEndpoint string `json:"httpEndpoint,omitempty"` // Whether the HTTP metadata endpoint is enabled
53+
HttpProtocolIPv6 string `json:"httpProtocolIPv6,omitempty"` // Whether IPv6 is enabled for the HTTP metadata endpoint
54+
HttpPutResponseHopLimit int `json:"httpPutResponseHopLimit,omitempty"` // The hop limit for HTTP PUT responses
55+
HttpTokens string `json:"httpTokens,omitempty"` // The HTTP tokens required for metadata access
56+
}
57+
58+
type NodeClassOSImageSelectorTerms struct {
59+
Name string `json:"name,omitempty"`
60+
Owner string `json:"owner,omitempty"`
61+
}
62+
63+
type NodeClassBlockDeviceMappings struct {
64+
DeviceName string `json:"deviceName,omitempty"` // The device name for the block device
65+
Ebs NodeClassEbsSettings `json:"ebs,omitempty"`
66+
}
67+
68+
type NodeClassEbsSettings struct {
69+
DeleteOnTermination bool `json:"deleteOnTermination,omitempty"` // Whether to delete the EBS volume on termination
70+
Encrypted bool `json:"encrypted,omitempty"` // Whether the EBS volume is encrypted
71+
VolumeSize string `json:"volumeSize,omitempty"` // The size of the EBS volume
72+
VolumeType string `json:"volumeType,omitempty"` // The type of the EBS volume
3373
}
3474

3575
// GPUNodeClassStatus defines the observed state of GPUNodeClass.
3676
type GPUNodeClassStatus struct {
37-
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
38-
// Important: Run "make" to regenerate code after modifying this file
3977
}
4078

4179
// +kubebuilder:object:root=true
4280
// +kubebuilder:subresource:status
81+
// +kubebuilder:resource:scope=Cluster
4382

4483
// GPUNodeClass is the Schema for the gpunodeclasses API.
4584
type GPUNodeClass struct {

0 commit comments

Comments
 (0)