Add Inferentia to cortex cluster info cmd (#1354)

RobertLucian · web-flow · commit 08f75f6df949 · 2020-09-15T06:55:37.000+03:00
diff --git a/cli/cmd/cluster.go b/cli/cmd/cluster.go
@@ -641,12 +641,15 @@ func printInfoNodes(infoResponse *schema.InfoResponse) {
 	numAPIInstances := len(infoResponse.NodeInfos)
 
 	var totalReplicas int
-	var doesClusterHaveGPUs bool
+	var doesClusterHaveGPUs, doesClusterHaveInfs bool
 	for _, nodeInfo := range infoResponse.NodeInfos {
 		totalReplicas += nodeInfo.NumReplicas
 		if nodeInfo.ComputeUserCapacity.GPU > 0 {
 			doesClusterHaveGPUs = true
 		}
+		if nodeInfo.ComputeUserCapacity.Inf > 0 {
+			doesClusterHaveInfs = true
+		}
 	}
 
 	var pendingReplicasStr string
@@ -667,6 +670,7 @@ func printInfoNodes(infoResponse *schema.InfoResponse) {
 		{Title: "CPU (requested / total allocatable)"},
 		{Title: "memory (requested / total allocatable)"},
 		{Title: "GPU (requested / total allocatable)", Hidden: !doesClusterHaveGPUs},
+		{Title: "Inf (requested / total allocatable)", Hidden: !doesClusterHaveInfs},
 	}
 
 	var rows [][]interface{}
@@ -679,7 +683,8 @@ func printInfoNodes(infoResponse *schema.InfoResponse) {
 		cpuStr := nodeInfo.ComputeUserRequested.CPU.MilliString() + " / " + nodeInfo.ComputeUserCapacity.CPU.MilliString()
 		memStr := nodeInfo.ComputeUserRequested.Mem.String() + " / " + nodeInfo.ComputeUserCapacity.Mem.String()
 		gpuStr := s.Int64(nodeInfo.ComputeUserRequested.GPU) + " / " + s.Int64(nodeInfo.ComputeUserCapacity.GPU)
-		rows = append(rows, []interface{}{nodeInfo.InstanceType, lifecycle, nodeInfo.NumReplicas, cpuStr, memStr, gpuStr})
+		infStr := s.Int64(nodeInfo.ComputeUserRequested.Inf) + " / " + s.Int64(nodeInfo.ComputeUserCapacity.Inf)
+		rows = append(rows, []interface{}{nodeInfo.InstanceType, lifecycle, nodeInfo.NumReplicas, cpuStr, memStr, gpuStr, infStr})
 	}
 
 	t := table.Table{
diff --git a/pkg/lib/k8s/pod.go b/pkg/lib/k8s/pod.go
@@ -371,18 +371,18 @@ func PodMap(pods []kcore.Pod) map[string]kcore.Pod {
 }
 
 func PodComputesEqual(podSpec1, podSpec2 *kcore.PodSpec) bool {
-	cpu1, mem1, gpu1 := TotalPodCompute(podSpec1)
-	cpu2, mem2, gpu2 := TotalPodCompute(podSpec2)
-	return cpu1.Equal(cpu2) && mem1.Equal(mem2) && gpu1 == gpu2
+	cpu1, mem1, gpu1, inf1 := TotalPodCompute(podSpec1)
+	cpu2, mem2, gpu2, inf2 := TotalPodCompute(podSpec2)
+	return cpu1.Equal(cpu2) && mem1.Equal(mem2) && gpu1 == gpu2 && inf1 == inf2
 }
 
-func TotalPodCompute(podSpec *kcore.PodSpec) (Quantity, Quantity, int64) {
+func TotalPodCompute(podSpec *kcore.PodSpec) (Quantity, Quantity, int64, int64) {
 	totalCPU := Quantity{}
 	totalMem := Quantity{}
-	var totalGPU int64
+	var totalGPU, totalInf int64
 
 	if podSpec == nil {
-		return totalCPU, totalMem, totalGPU
+		return totalCPU, totalMem, totalGPU, totalInf
 	}
 
 	for _, container := range podSpec.Containers {
@@ -395,9 +395,12 @@ func TotalPodCompute(podSpec *kcore.PodSpec) (Quantity, Quantity, int64) {
 		if gpu, ok := requests["nvidia.com/gpu"]; ok {
 			totalGPU += gpu.Value()
 		}
+		if inf, ok := requests["aws.amazon.com/neuron"]; ok {
+			totalInf += inf.Value()
+		}
 	}
 
-	return totalCPU, totalMem, totalGPU
+	return totalCPU, totalMem, totalGPU, totalInf
 }
 
 // Example of running a shell command: []string{"/bin/bash", "-c", "ps aux | grep my-proc"}
diff --git a/pkg/operator/endpoints/info.go b/pkg/operator/endpoints/info.go
@@ -111,20 +111,23 @@ func getNodeInfos() ([]schema.NodeInfo, int, error) {
 			node.NumReplicas++
 		}
 
-		cpu, mem, gpu := k8s.TotalPodCompute(&pod.Spec)
+		cpu, mem, gpu, inf := k8s.TotalPodCompute(&pod.Spec)
 
 		node.ComputeAvailable.CPU.SubQty(cpu)
 		node.ComputeAvailable.Mem.SubQty(mem)
 		node.ComputeAvailable.GPU -= gpu
+		node.ComputeAvailable.Inf -= inf
 
 		if isAPIPod {
 			node.ComputeUserRequested.CPU.AddQty(cpu)
 			node.ComputeUserRequested.Mem.AddQty(mem)
 			node.ComputeUserRequested.GPU += gpu
+			node.ComputeUserRequested.Inf += inf
 		} else {
 			node.ComputeUserCapacity.CPU.SubQty(cpu)
 			node.ComputeUserCapacity.Mem.SubQty(mem)
 			node.ComputeUserCapacity.GPU -= gpu
+			node.ComputeUserCapacity.Inf -= inf
 		}
 	}
 
@@ -145,10 +148,12 @@ func getNodeInfos() ([]schema.NodeInfo, int, error) {
 
 func nodeComputeAllocatable(node *kcore.Node) userconfig.Compute {
 	gpuQty := node.Status.Allocatable["nvidia.com/gpu"]
+	infQty := node.Status.Allocatable["aws.amazon.com/neuron"]
 
 	return userconfig.Compute{
 		CPU: k8s.WrapQuantity(*node.Status.Allocatable.Cpu()),
 		Mem: k8s.WrapQuantity(*node.Status.Allocatable.Memory()),
 		GPU: (&gpuQty).Value(),
+		Inf: (&infQty).Value(),
 	}
 }

Original file line number	Diff line number	Diff line change
`@@ -371,18 +371,18 @@ func PodMap(pods []kcore.Pod) map[string]kcore.Pod {`
`371`	`371`	`}`
`372`	`372`
`373`	`373`	`func PodComputesEqual(podSpec1, podSpec2 *kcore.PodSpec) bool {`
`374`		`- cpu1, mem1, gpu1 := TotalPodCompute(podSpec1)`
`375`		`- cpu2, mem2, gpu2 := TotalPodCompute(podSpec2)`
`376`		`- return cpu1.Equal(cpu2) && mem1.Equal(mem2) && gpu1 == gpu2`
	`374`	`+ cpu1, mem1, gpu1, inf1 := TotalPodCompute(podSpec1)`
	`375`	`+ cpu2, mem2, gpu2, inf2 := TotalPodCompute(podSpec2)`
	`376`	`+ return cpu1.Equal(cpu2) && mem1.Equal(mem2) && gpu1 == gpu2 && inf1 == inf2`
`377`	`377`	`}`
`378`	`378`
`379`		`-func TotalPodCompute(podSpec *kcore.PodSpec) (Quantity, Quantity, int64) {`
	`379`	`+func TotalPodCompute(podSpec *kcore.PodSpec) (Quantity, Quantity, int64, int64) {`
`380`	`380`	`totalCPU := Quantity{}`
`381`	`381`	`totalMem := Quantity{}`
`382`		`- var totalGPU int64`
	`382`	`+ var totalGPU, totalInf int64`
`383`	`383`
`384`	`384`	`if podSpec == nil {`
`385`		`- return totalCPU, totalMem, totalGPU`
	`385`	`+ return totalCPU, totalMem, totalGPU, totalInf`
`386`	`386`	`}`
`387`	`387`
`388`	`388`	`for _, container := range podSpec.Containers {`
`@@ -395,9 +395,12 @@ func TotalPodCompute(podSpec *kcore.PodSpec) (Quantity, Quantity, int64) {`
`395`	`395`	`if gpu, ok := requests["nvidia.com/gpu"]; ok {`
`396`	`396`	`totalGPU += gpu.Value()`
`397`	`397`	`}`
	`398`	`+ if inf, ok := requests["aws.amazon.com/neuron"]; ok {`
	`399`	`+ totalInf += inf.Value()`
	`400`	`+ }`
`398`	`401`	`}`
`399`	`402`
`400`		`- return totalCPU, totalMem, totalGPU`
	`403`	`+ return totalCPU, totalMem, totalGPU, totalInf`
`401`	`404`	`}`
`402`	`405`
`403`	`406`	`// Example of running a shell command: []string{"/bin/bash", "-c", "ps aux \| grep my-proc"}`