-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
When a cpu gets online, the docker daemon updates the cgroups of its containers. It resolves the issues: - moby#27453 - opencontainers/runc#1119 The extention can be used with the flags --cgroup-parent and --cpuset-cpus Signed-off-by: Alice Frosi <alice@linux.vnet.ibm.com>
- Loading branch information
Alice Frosi
authored and
Alice Frosi
committed
Aug 21, 2017
1 parent
1454015
commit 160961e
Showing
8 changed files
with
578 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,259 @@ | ||
// +build linux | ||
|
||
package daemon | ||
|
||
import ( | ||
"io/ioutil" | ||
"os" | ||
"path/filepath" | ||
"strings" | ||
|
||
"github.com/docker/docker/api/types/container" | ||
"github.com/docker/docker/daemon/cpuhotplug" | ||
"github.com/sirupsen/logrus" | ||
) | ||
|
||
const cpusetDir = "/sys/fs/cgroup/cpuset" | ||
|
||
type containerUpdate struct { | ||
cparent string // Cgroup parent to update | ||
done chan error // Channel for synch | ||
} | ||
|
||
type CpuHotPlug struct { | ||
UdevChan chan struct{} // Channel for cpu events | ||
ContChan chan containerUpdate // Channel for started containers | ||
} | ||
|
||
func (cpuhotplug *CpuHotPlug) Close() { | ||
close(cpuhotplug.ContChan) | ||
} | ||
|
||
func (daemon *Daemon) updateRecCgroup(path, cpuset string) error { | ||
|
||
file := filepath.Join(path, "cpuset.cpus") | ||
|
||
if _, err := os.Stat(file); os.IsNotExist(err) { | ||
//If the parent does not exist nothing to update | ||
return nil | ||
} | ||
|
||
if err := ioutil.WriteFile(file, []byte(cpuset), 0777); err != nil { | ||
logrus.Warnf("Error update cgroup in %s", file) | ||
return err | ||
} | ||
entries, err := ioutil.ReadDir(path) | ||
|
||
if err != nil { | ||
logrus.Warnf("Error update recursively cgroup parent %s", err) | ||
} | ||
|
||
if entries == nil { | ||
return nil | ||
} | ||
|
||
// We update recursively until we find that the directory belongs | ||
// to a container or the cgroup parent branch is finshed | ||
for _, e := range entries { | ||
if !e.IsDir() { | ||
continue | ||
} | ||
|
||
// The directory belongs to a container. | ||
// We don't update at this point in time the container. | ||
if daemon.Exists(e.Name()) { | ||
continue | ||
} | ||
|
||
if err := daemon.updateRecCgroup(filepath.Join(path, e.Name()), cpuset); err != nil { | ||
return err | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
// updateParentCgroups updates the cgroup parent | ||
func (daemon *Daemon) updateParentCgroups(parent string) error { | ||
cpuset, err := cpuhotplug.ReadCurrentCpuset() | ||
if err != nil { | ||
return err | ||
} | ||
//Default parent cgroup name is docker | ||
if parent == "" { | ||
parent = "docker" | ||
} | ||
logrus.Debugf("cpuhotplug: updated parent cgroup %s", parent) | ||
|
||
return daemon.updateRecCgroup(filepath.Join(cpusetDir, parent), cpuset) | ||
} | ||
|
||
// updateContainerCpuset updates the cpuset of a container | ||
func (daemon *Daemon) updateContainerCpuset(id, cpuset string) error { | ||
resources := container.Resources{ | ||
CpusetCpus: cpuset, | ||
} | ||
return daemon.containerd.UpdateResources(id, toContainerdResources(resources)) | ||
} | ||
|
||
// syncUpdateParentCgroup synchronizes the update for the cgroup parent when a container | ||
// is started. When a container is started its cgroup parent needs to be update as well. | ||
// If the cgroup parent was already created but no containers were presented, it may not be | ||
// correctly updated. | ||
func (daemon *Daemon) syncUpdateParentCgroup(cparent string) error { | ||
done := make(chan error, 1) | ||
c := strings.Split(cparent, "/")[0] | ||
daemon.cpuHotplug.ContChan <- containerUpdate{ | ||
cparent: c, | ||
done: done, | ||
} | ||
err := <-done | ||
if err != nil { | ||
logrus.Errorf("Cgroup parent error %s", err) | ||
} | ||
|
||
return err | ||
|
||
} | ||
|
||
// performCpuHotplug listens to the cpu events and update the cpuset accordingly | ||
func (daemon *Daemon) performCpuHotplug() { | ||
//Enable CPU event filter | ||
daemon.cpuHotplug.UdevChan = make(chan struct{}) | ||
cpuhotplug.ListenToCpuEvent(daemon.cpuHotplug.UdevChan) | ||
logrus.Infof("Started cpuhotplug") | ||
|
||
//Initialize channel for incoming started container | ||
daemon.cpuHotplug.ContChan = make(chan containerUpdate) | ||
var oldCgroup = " " | ||
|
||
go func() { | ||
for { | ||
// Priority queue high to a started container over a cpu event | ||
// High priority queue: started container | ||
select { | ||
case w := <-daemon.cpuHotplug.ContChan: | ||
// if no cpu went online and we have already update in a | ||
// previous iteration the cgroup parent tree | ||
// we just skip it | ||
if oldCgroup != w.cparent { | ||
err := daemon.updateParentCgroups(w.cparent) | ||
oldCgroup = w.cparent | ||
w.done <- err | ||
} | ||
w.done <- nil | ||
default: | ||
} | ||
|
||
// Check if a cpu went online or a container is started | ||
select { | ||
// Cgroup parent update for started container | ||
case w := <-daemon.cpuHotplug.ContChan: | ||
if oldCgroup != w.cparent { | ||
err := daemon.updateParentCgroups(w.cparent) | ||
oldCgroup = w.cparent | ||
w.done <- err | ||
} | ||
w.done <- nil | ||
// CPU events | ||
case <-daemon.cpuHotplug.UdevChan: | ||
daemon.updateCpusetContainers() | ||
oldCgroup = " " | ||
} | ||
} | ||
}() | ||
|
||
} | ||
|
||
// updateRestrictedCpuset updates the cpuset of a restricted container. | ||
func (daemon *Daemon) updateRestrictedCpuset(id, originalCpuset string) error { | ||
//Read which cpus are online | ||
currentCpusSet, err := cpuhotplug.ReadCurrentCpuset() | ||
if err != nil { | ||
return err | ||
} | ||
|
||
return daemon.updateContainerCpuset(id, cpuhotplug.NewCpusetRestrictedCont(currentCpusSet, originalCpuset)) | ||
} | ||
|
||
// updateCpusetContainers updates all Docker containers and their cgroup | ||
// parent. | ||
func (daemon *Daemon) updateCpusetContainers() { | ||
|
||
// List of cgroup parent to update | ||
var listParentCgroup []string | ||
for _, c := range daemon.containers.List() { | ||
|
||
if !c.IsRunning() { | ||
continue | ||
} | ||
|
||
containerJson, err := daemon.ContainerInspectCurrent(c.Name, false) | ||
if err != nil { | ||
continue | ||
} | ||
cgroupParent := containerJson.ContainerJSONBase.HostConfig.CgroupParent | ||
// Find all the Parent cgroup of the container | ||
// Avoid duplicate | ||
// We just need the first directory then the cgroup parent tree will be | ||
// recursively updated | ||
s := strings.Split(cgroupParent, "/")[0] | ||
|
||
found := false | ||
// Check if the cgroup parent needs to be inserted | ||
for _, e := range listParentCgroup { | ||
if e == s { | ||
found = true | ||
break | ||
} | ||
} | ||
|
||
if !found { | ||
listParentCgroup = append(listParentCgroup, s) | ||
} | ||
|
||
} | ||
|
||
// Update cgroup parent | ||
for _, e := range listParentCgroup { | ||
if err := daemon.updateParentCgroups(e); err != nil { | ||
logrus.Errorf("Error %s updating cgroup parent %s", err, e) | ||
} | ||
} | ||
|
||
// Update the running containers | ||
for _, c := range daemon.containers.List() { | ||
|
||
if !c.IsRunning() { | ||
continue | ||
} | ||
//Get original cpuset | ||
containerJson, err := daemon.ContainerInspectCurrent(c.Name, false) | ||
if err != nil { | ||
logrus.Warnf("A problem has occured with container %s: %s\n", err, c.ID) | ||
continue | ||
} | ||
|
||
cpuset := containerJson.ContainerJSONBase.HostConfig.Resources.CpusetCpus | ||
|
||
//Unrestricted container | ||
if cpuset == "" { | ||
updatedCpuset, err := cpuhotplug.ReadCurrentCpuset() | ||
if err != nil { | ||
logrus.Warnf("Container %s err: %s", c.ID, err) | ||
} | ||
if err := daemon.updateContainerCpuset(c.ID, updatedCpuset); err == nil { | ||
logrus.Debugf("Container %s updated succesfully", c.ID) | ||
} else { | ||
logrus.Warnf("Container %s err: %s", c.ID, err) | ||
} | ||
continue | ||
} | ||
|
||
//Restricted container | ||
if err := daemon.updateRestrictedCpuset(c.ID, cpuset); err == nil { | ||
logrus.Debugf("Container %s updated succesfully", c.ID) | ||
} else { | ||
logrus.Warnf("Restricted container %s err: %s", c.ID, err) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# Docker Cpuhotplug extention | ||
|
||
Docker uses *cgroups* in order to achieve isolation and limits container resources. | ||
|
||
However, the Docker cpuset.cpus is not automatically updated when a cpu is set offline and online again. The goal of this docker extention is to update the cpuset of the docker daemon and each Docker container. | ||
|
||
A container is defined *restricted*, if it was started with the flag `--cpuset-cpus`, otherwise is called *unrestricted*. A restricted container mantains the initial cpuset restriction and it is update consequently. | ||
|
||
In linux system the updated cpuset can be find in `/sys/fs/cgroup/cpuset/cpuset.cpus` and it holds the information or the currently online cpus. | ||
The each cgroup and subcgroup has its own cpuset.cpus. *Cgroup parent* is a cgroup that contains another cgroup. Subcgroups can have only a subset or the entire subset of their cgroup parent cpuset. | ||
|
||
The docker daemon has an additional option `--exec-opt native.cgroupdriver`. The default value is *cgroupdriver* and the name structure looks like these in the following example. | ||
However, the cgroup manage could be handles also by *systemd* using this option. The name cgroup strcture is different from that used by the *cgroupdriver*. | ||
For a first prototype we just consider the default option. Hence, the cgroup are managed by the *cgroupdriver*. | ||
|
||
The default cgroup parent for docker is called *docker* and the its path is `/sys/fs/cgroup/cpuset/docker/cpuset.cpus`. Each container has a subfolder in `/sys/fs/cgroup/cpuset/docker/` that holds its own cpuset. | ||
|
||
Additionally, a container can create another cgroup parent with the option `--cgroup-parent string`. The cgroup-parent can be an entire path and each folder of the path has its own cpuset. | ||
|
||
Example | ||
```sh | ||
docker run -td s390x/ubuntu bash | ||
docker run -td --cgroup-parent level1/level2 s390x/ubuntu bash | ||
``` | ||
The cgroup structure looks like: | ||
|
||
```sh | ||
/sys/fs/cgroup/cpuset/ | ||
DEFAULT cpuset.cpus | ||
____________ / ___________ \ | ||
| / | \ | ||
| /docker | /level1 | ||
| cpuset.cpus | cpuset.cpus | ||
| / | \ | ||
| / | \ | ||
| /7e2..a12 | /level2 | ||
| cpuset.cpus | cpuset.cpus | ||
| | \ | ||
|___________________________| \ | ||
/2f9..a45 | ||
cpuset.cpus | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
// +build linux | ||
package cpuhotplug | ||
|
||
import ( | ||
"fmt" | ||
"os/exec" | ||
"testing" | ||
"time" | ||
) | ||
|
||
const numEvent = 15 | ||
|
||
func TestListenToCpuEvent(t *testing.T) { | ||
|
||
test := make(chan struct{}) | ||
done := make(chan struct{}) | ||
ListenToCpuEvent(test) | ||
|
||
// Catch the cpu event | ||
go func(done chan struct{}) { | ||
i := numEvent - 1 | ||
for range test { | ||
fmt.Printf("Waiting for %d events\n", i-1) | ||
i-- | ||
if i == 0 { | ||
done <- struct{}{} | ||
} | ||
} | ||
}(done) | ||
|
||
// Trigger the cpu events | ||
go func() { | ||
for i := 0; i < numEvent; i++ { | ||
//trigger cpu events | ||
if err := exec.Command("chcpu", "-d", "1").Run(); err != nil { | ||
fmt.Printf("Error %d offnline cpu\n", err) | ||
} | ||
time.Sleep(100 * time.Millisecond) | ||
if err := exec.Command("chcpu", "-e", " 1").Run(); err != nil { | ||
fmt.Printf("Error %d online cpu\n", err) | ||
} | ||
fmt.Printf("CPU event %d\n", i) | ||
} | ||
}() | ||
<-done | ||
close(test) | ||
} |
Oops, something went wrong.