Skip to content

Commit

Permalink
bugfix: fix containerd pid may be reuse
Browse files Browse the repository at this point in the history
Signed-off-by: Michael Wan <zirenwan@gmail.com>
  • Loading branch information
HusterWan committed Jan 23, 2019
1 parent 71beb00 commit 463c541
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 1 deletion.
38 changes: 37 additions & 1 deletion ctrd/supervisord/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,30 @@ func (d *Daemon) healthPostCheck() error {
return fmt.Errorf("health post check failed")
}

func (d *Daemon) isContainerdProcess(pid int) (bool, error) {
if !utils.IsProcessAlive(pid) {
return false, nil
}

// get process path by pid, if readlink -f command exit code not equals 0,
// we can confirm the pid is not owned by containerd.
output, err := os.Readlink(fmt.Sprintf("/proc/%d/exe", pid))
if err != nil {
logrus.WithField("module", "ctrd").WithField("containerd-pid", pid).Infof("got err: %v", err)
return false, nil
}
processPath := strings.TrimSpace(string(output))

// get containerd process path
output, err = exec.LookPath(d.binaryName)
if err != nil {
return false, err
}
containerdPath := strings.TrimSpace(string(output))

return processPath == containerdPath, nil
}

func (d *Daemon) runContainerd() error {
pid, err := d.getContainerdPid()
if err != nil {
Expand All @@ -178,6 +202,10 @@ func (d *Daemon) runContainerd() error {
// 2. how to make sure the address is the same one?
if pid != -1 {
logrus.WithField("module", "ctrd").WithField("containerd-pid", pid).Infof("containerd is still running")
if err := d.setContainerdPid(pid); err != nil {
utils.KillProcess(d.pid)
return fmt.Errorf("failed to save the pid into %s: %v", d.pidPath(), err)
}
return nil
}

Expand Down Expand Up @@ -253,10 +281,18 @@ func (d *Daemon) getContainerdPid() (int, error) {
return -1, err
}

if utils.IsProcessAlive(int(pid)) {
isAlive, err := d.isContainerdProcess(int(pid))
if err != nil {
return -1, err
} else if !isAlive {
logrus.WithField("module", "ctrd").WithField("ctrd-supervisord", pid).Infof("previous containerd pid not exist, delete the pid file")
os.RemoveAll(d.pidPath())
return -1, nil
} else {
return int(pid), nil
}
}

return -1, nil
}

Expand Down
24 changes: 24 additions & 0 deletions test/z_cli_daemon_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -676,3 +676,27 @@ func (suite *PouchDaemonSuite) TestDaemonWithSystemdCgroupDriver(c *check.C) {
defer RunWithSpecifiedDaemon(dcfg, "rm", "-f", cname)
ret.Assert(c, icmd.Success)
}

// TestContainerdPIDReuse tests even though old containerd pid being reused, we can still
// pull up the containerd instance.
func (suite *PouchDaemonSuite) TestContainerdPIDReuse(c *check.C) {
cfgFile := filepath.Join("/tmp", c.TestName())
c.Assert(CreateConfigFile(cfgFile, nil), check.IsNil)
defer os.RemoveAll(cfgFile)

// prepare config file for pouchd
cfg := daemon.NewConfig()
cfg.NewArgs("--config-file", cfgFile)

containerdPidPath := filepath.Join("/tmp/test/pouch/containerd/state", "containerd.pid")

// set containerd pid to 1 to make sure the pid must be alive
err := ioutil.WriteFile(containerdPidPath, []byte(fmt.Sprintf("%d", 1)), 0660)
if err != nil {
c.Errorf("failed to write pid to file: %v", containerdPidPath)
}

// make sure pouchd can successfully start
c.Assert(cfg.StartDaemon(), check.IsNil)
defer cfg.KillDaemon()
}

0 comments on commit 463c541

Please sign in to comment.