Continue working on gpu sharing

volcano-sh · Jul 1, 2020 · 38cec5c · 38cec5c
1 parent c2ca8dd
commit 38cec5c
Show file tree

Hide file tree

Showing 9 changed files with 290 additions and 225 deletions.
diff --git a/installer/helm/chart/volcano/templates/scheduler.yaml b/installer/helm/chart/volcano/templates/scheduler.yaml
@@ -31,7 +31,7 @@ rules:
     verbs: ["create", "list", "watch", "update", "patch"]
   - apiGroups: [""]
     resources: ["pods", "pods/status"]
-    verbs: ["create", "get", "list", "watch", "update", "bind", "updateStatus", "delete"]
+    verbs: ["create", "get", "list", "watch", "update", "patch", "bind", "updateStatus", "delete"]
   - apiGroups: [""]
     resources: ["pods/binding"]
     verbs: ["create"]

diff --git a/installer/volcano-development.yaml b/installer/volcano-development.yaml
@@ -50,7 +50,7 @@ rules:
     verbs: ["create", "list", "watch", "update", "patch"]
   - apiGroups: [""]
     resources: ["pods", "pods/status"]
-    verbs: ["create", "get", "list", "watch", "update", "bind", "updateStatus", "delete"]
+    verbs: ["create", "get", "list", "watch", "update", "patch", "bind", "updateStatus", "delete"]
   - apiGroups: [""]
     resources: ["pods/binding"]
     verbs: ["create"]

diff --git a/pkg/scheduler/api/device_info.go b/pkg/scheduler/api/device_info.go
@@ -1,5 +1,5 @@
 /*
-Copyright 2017 The Kubernetes Authors.
+Copyright 2020 The Volcano Authors.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -17,47 +17,56 @@ limitations under the License.
 package api
 
 import (
-	"strconv"
-
 	v1 "k8s.io/api/core/v1"
 )
 
-type DeviceInfo struct {
-	Id             int
-	PodMap         map[string]*v1.Pod
-	GPUTotalMemory uint
+// GPUDevice include gpu id, memory and the pods that are sharing it.
+type GPUDevice struct {
+	// GPU ID
+	ID int
+	// The pods that are sharing this GPU
+	PodMap map[string]*v1.Pod
+	// memory per card
+	Memory uint
 }
 
-func (di *DeviceInfo) GetPods() []*v1.Pod {
-	pods := []*v1.Pod{}
-	for _, pod := range di.PodMap {
-		pods = append(pods, pod)
+// NewGPUDevice creates a device
+func NewGPUDevice(id int, mem uint) *GPUDevice {
+	return &GPUDevice{
+		ID:     id,
+		Memory: mem,
+		PodMap: map[string]*v1.Pod{},
 	}
-	return pods
 }
 
-func NewDeviceInfo(id int, mem uint) *DeviceInfo {
-	return &DeviceInfo{
-		Id:             id,
-		GPUTotalMemory: mem,
-		PodMap:         map[string]*v1.Pod{},
-	}
-}
-
-func (di *DeviceInfo) GetUsedGPUMemory() uint {
+// getUsedGPUMemory calculates the used memory of the device.
+func (g *GPUDevice) getUsedGPUMemory() uint {
 	res := uint(0)
-	for _, pod := range di.PodMap {
+	for _, pod := range g.PodMap {
 		if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed {
 			continue
 		} else {
-			if len(pod.ObjectMeta.Annotations) > 0 {
-				mem, found := pod.ObjectMeta.Annotations["volcano.sh/pod-gpu-memory"]
-				if found {
-					m, _ := strconv.Atoi(mem)
-					res += uint(m)
-				}
-			}
+			gpuRequest := GetGPUResourceOfPod(pod)
+			res += gpuRequest
 		}
 	}
 	return res
 }
+
+// GetGPUResourceOfPod returns the GPU resource required by the pod.
+func GetGPUResourceOfPod(pod *v1.Pod) uint {
+	var mem uint
+	for _, container := range pod.Spec.Containers {
+		mem += getGPUResourceOfContainer(&container)
+	}
+	return mem
+}
+
+// getGPUResourceOfPod returns the GPU resource required by the container.
+func getGPUResourceOfContainer(container *v1.Container) uint {
+	var mem uint
+	if val, ok := container.Resources.Limits[VolcanoGPUResource]; ok {
+		mem = uint(val.Value())
+	}
+	return mem
+}