Skip to content

Commit

Permalink
Continue working on gpu sharing
Browse files Browse the repository at this point in the history
  • Loading branch information
hzxuzhonghu committed Jul 1, 2020
1 parent c2ca8dd commit 38cec5c
Show file tree
Hide file tree
Showing 9 changed files with 290 additions and 225 deletions.
2 changes: 1 addition & 1 deletion installer/helm/chart/volcano/templates/scheduler.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ rules:
verbs: ["create", "list", "watch", "update", "patch"]
- apiGroups: [""]
resources: ["pods", "pods/status"]
verbs: ["create", "get", "list", "watch", "update", "bind", "updateStatus", "delete"]
verbs: ["create", "get", "list", "watch", "update", "patch", "bind", "updateStatus", "delete"]
- apiGroups: [""]
resources: ["pods/binding"]
verbs: ["create"]
Expand Down
2 changes: 1 addition & 1 deletion installer/volcano-development.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ rules:
verbs: ["create", "list", "watch", "update", "patch"]
- apiGroups: [""]
resources: ["pods", "pods/status"]
verbs: ["create", "get", "list", "watch", "update", "bind", "updateStatus", "delete"]
verbs: ["create", "get", "list", "watch", "update", "patch", "bind", "updateStatus", "delete"]
- apiGroups: [""]
resources: ["pods/binding"]
verbs: ["create"]
Expand Down
67 changes: 38 additions & 29 deletions pkg/scheduler/api/device_info.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
Copyright 2017 The Kubernetes Authors.
Copyright 2020 The Volcano Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand All @@ -17,47 +17,56 @@ limitations under the License.
package api

import (
"strconv"

v1 "k8s.io/api/core/v1"
)

type DeviceInfo struct {
Id int
PodMap map[string]*v1.Pod
GPUTotalMemory uint
// GPUDevice include gpu id, memory and the pods that are sharing it.
type GPUDevice struct {
// GPU ID
ID int
// The pods that are sharing this GPU
PodMap map[string]*v1.Pod
// memory per card
Memory uint
}

func (di *DeviceInfo) GetPods() []*v1.Pod {
pods := []*v1.Pod{}
for _, pod := range di.PodMap {
pods = append(pods, pod)
// NewGPUDevice creates a device
func NewGPUDevice(id int, mem uint) *GPUDevice {
return &GPUDevice{
ID: id,
Memory: mem,
PodMap: map[string]*v1.Pod{},
}
return pods
}

func NewDeviceInfo(id int, mem uint) *DeviceInfo {
return &DeviceInfo{
Id: id,
GPUTotalMemory: mem,
PodMap: map[string]*v1.Pod{},
}
}

func (di *DeviceInfo) GetUsedGPUMemory() uint {
// getUsedGPUMemory calculates the used memory of the device.
func (g *GPUDevice) getUsedGPUMemory() uint {
res := uint(0)
for _, pod := range di.PodMap {
for _, pod := range g.PodMap {
if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed {
continue
} else {
if len(pod.ObjectMeta.Annotations) > 0 {
mem, found := pod.ObjectMeta.Annotations["volcano.sh/pod-gpu-memory"]
if found {
m, _ := strconv.Atoi(mem)
res += uint(m)
}
}
gpuRequest := GetGPUResourceOfPod(pod)
res += gpuRequest
}
}
return res
}

// GetGPUResourceOfPod returns the GPU resource required by the pod.
func GetGPUResourceOfPod(pod *v1.Pod) uint {
var mem uint
for _, container := range pod.Spec.Containers {
mem += getGPUResourceOfContainer(&container)
}
return mem
}

// getGPUResourceOfPod returns the GPU resource required by the container.
func getGPUResourceOfContainer(container *v1.Container) uint {
var mem uint
if val, ok := container.Resources.Limits[VolcanoGPUResource]; ok {
mem = uint(val.Value())
}
return mem
}
Loading

0 comments on commit 38cec5c

Please sign in to comment.