Skip to content

Commit 08f75f6

Browse files
authored
Add Inferentia to cortex cluster info cmd (#1354)
1 parent 8e62740 commit 08f75f6

File tree

3 files changed

+23
-10
lines changed

3 files changed

+23
-10
lines changed

Diff for: cli/cmd/cluster.go

+7-2
Original file line numberDiff line numberDiff line change
@@ -641,12 +641,15 @@ func printInfoNodes(infoResponse *schema.InfoResponse) {
641641
numAPIInstances := len(infoResponse.NodeInfos)
642642

643643
var totalReplicas int
644-
var doesClusterHaveGPUs bool
644+
var doesClusterHaveGPUs, doesClusterHaveInfs bool
645645
for _, nodeInfo := range infoResponse.NodeInfos {
646646
totalReplicas += nodeInfo.NumReplicas
647647
if nodeInfo.ComputeUserCapacity.GPU > 0 {
648648
doesClusterHaveGPUs = true
649649
}
650+
if nodeInfo.ComputeUserCapacity.Inf > 0 {
651+
doesClusterHaveInfs = true
652+
}
650653
}
651654

652655
var pendingReplicasStr string
@@ -667,6 +670,7 @@ func printInfoNodes(infoResponse *schema.InfoResponse) {
667670
{Title: "CPU (requested / total allocatable)"},
668671
{Title: "memory (requested / total allocatable)"},
669672
{Title: "GPU (requested / total allocatable)", Hidden: !doesClusterHaveGPUs},
673+
{Title: "Inf (requested / total allocatable)", Hidden: !doesClusterHaveInfs},
670674
}
671675

672676
var rows [][]interface{}
@@ -679,7 +683,8 @@ func printInfoNodes(infoResponse *schema.InfoResponse) {
679683
cpuStr := nodeInfo.ComputeUserRequested.CPU.MilliString() + " / " + nodeInfo.ComputeUserCapacity.CPU.MilliString()
680684
memStr := nodeInfo.ComputeUserRequested.Mem.String() + " / " + nodeInfo.ComputeUserCapacity.Mem.String()
681685
gpuStr := s.Int64(nodeInfo.ComputeUserRequested.GPU) + " / " + s.Int64(nodeInfo.ComputeUserCapacity.GPU)
682-
rows = append(rows, []interface{}{nodeInfo.InstanceType, lifecycle, nodeInfo.NumReplicas, cpuStr, memStr, gpuStr})
686+
infStr := s.Int64(nodeInfo.ComputeUserRequested.Inf) + " / " + s.Int64(nodeInfo.ComputeUserCapacity.Inf)
687+
rows = append(rows, []interface{}{nodeInfo.InstanceType, lifecycle, nodeInfo.NumReplicas, cpuStr, memStr, gpuStr, infStr})
683688
}
684689

685690
t := table.Table{

Diff for: pkg/lib/k8s/pod.go

+10-7
Original file line numberDiff line numberDiff line change
@@ -371,18 +371,18 @@ func PodMap(pods []kcore.Pod) map[string]kcore.Pod {
371371
}
372372

373373
func PodComputesEqual(podSpec1, podSpec2 *kcore.PodSpec) bool {
374-
cpu1, mem1, gpu1 := TotalPodCompute(podSpec1)
375-
cpu2, mem2, gpu2 := TotalPodCompute(podSpec2)
376-
return cpu1.Equal(cpu2) && mem1.Equal(mem2) && gpu1 == gpu2
374+
cpu1, mem1, gpu1, inf1 := TotalPodCompute(podSpec1)
375+
cpu2, mem2, gpu2, inf2 := TotalPodCompute(podSpec2)
376+
return cpu1.Equal(cpu2) && mem1.Equal(mem2) && gpu1 == gpu2 && inf1 == inf2
377377
}
378378

379-
func TotalPodCompute(podSpec *kcore.PodSpec) (Quantity, Quantity, int64) {
379+
func TotalPodCompute(podSpec *kcore.PodSpec) (Quantity, Quantity, int64, int64) {
380380
totalCPU := Quantity{}
381381
totalMem := Quantity{}
382-
var totalGPU int64
382+
var totalGPU, totalInf int64
383383

384384
if podSpec == nil {
385-
return totalCPU, totalMem, totalGPU
385+
return totalCPU, totalMem, totalGPU, totalInf
386386
}
387387

388388
for _, container := range podSpec.Containers {
@@ -395,9 +395,12 @@ func TotalPodCompute(podSpec *kcore.PodSpec) (Quantity, Quantity, int64) {
395395
if gpu, ok := requests["nvidia.com/gpu"]; ok {
396396
totalGPU += gpu.Value()
397397
}
398+
if inf, ok := requests["aws.amazon.com/neuron"]; ok {
399+
totalInf += inf.Value()
400+
}
398401
}
399402

400-
return totalCPU, totalMem, totalGPU
403+
return totalCPU, totalMem, totalGPU, totalInf
401404
}
402405

403406
// Example of running a shell command: []string{"/bin/bash", "-c", "ps aux | grep my-proc"}

Diff for: pkg/operator/endpoints/info.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -111,20 +111,23 @@ func getNodeInfos() ([]schema.NodeInfo, int, error) {
111111
node.NumReplicas++
112112
}
113113

114-
cpu, mem, gpu := k8s.TotalPodCompute(&pod.Spec)
114+
cpu, mem, gpu, inf := k8s.TotalPodCompute(&pod.Spec)
115115

116116
node.ComputeAvailable.CPU.SubQty(cpu)
117117
node.ComputeAvailable.Mem.SubQty(mem)
118118
node.ComputeAvailable.GPU -= gpu
119+
node.ComputeAvailable.Inf -= inf
119120

120121
if isAPIPod {
121122
node.ComputeUserRequested.CPU.AddQty(cpu)
122123
node.ComputeUserRequested.Mem.AddQty(mem)
123124
node.ComputeUserRequested.GPU += gpu
125+
node.ComputeUserRequested.Inf += inf
124126
} else {
125127
node.ComputeUserCapacity.CPU.SubQty(cpu)
126128
node.ComputeUserCapacity.Mem.SubQty(mem)
127129
node.ComputeUserCapacity.GPU -= gpu
130+
node.ComputeUserCapacity.Inf -= inf
128131
}
129132
}
130133

@@ -145,10 +148,12 @@ func getNodeInfos() ([]schema.NodeInfo, int, error) {
145148

146149
func nodeComputeAllocatable(node *kcore.Node) userconfig.Compute {
147150
gpuQty := node.Status.Allocatable["nvidia.com/gpu"]
151+
infQty := node.Status.Allocatable["aws.amazon.com/neuron"]
148152

149153
return userconfig.Compute{
150154
CPU: k8s.WrapQuantity(*node.Status.Allocatable.Cpu()),
151155
Mem: k8s.WrapQuantity(*node.Status.Allocatable.Memory()),
152156
GPU: (&gpuQty).Value(),
157+
Inf: (&infQty).Value(),
153158
}
154159
}

0 commit comments

Comments
 (0)