diff --git a/ctrd/supervisord/daemon.go b/ctrd/supervisord/daemon.go index 28682314ca..48a6d90b35 100644 --- a/ctrd/supervisord/daemon.go +++ b/ctrd/supervisord/daemon.go @@ -167,6 +167,30 @@ func (d *Daemon) healthPostCheck() error { return fmt.Errorf("health post check failed") } +func (d *Daemon) isContainerdProcess(pid int) (bool, error) { + if !utils.IsProcessAlive(pid) { + return false, nil + } + + // get process path by pid, if readlink -f command exit code not equals 0, + // we can confirm the pid is not owned by containerd. + output, err := os.Readlink(fmt.Sprintf("/proc/%d/exe", pid)) + if err != nil { + logrus.WithField("module", "ctrd").WithField("containerd-pid", pid).Infof("got err: %v", err) + return false, nil + } + processPath := strings.TrimSpace(string(output)) + + // get containerd process path + output, err = exec.LookPath(d.binaryName) + if err != nil { + return false, err + } + containerdPath := strings.TrimSpace(string(output)) + + return processPath == containerdPath, nil +} + func (d *Daemon) runContainerd() error { pid, err := d.getContainerdPid() if err != nil { @@ -178,6 +202,10 @@ func (d *Daemon) runContainerd() error { // 2. how to make sure the address is the same one? if pid != -1 { logrus.WithField("module", "ctrd").WithField("containerd-pid", pid).Infof("containerd is still running") + if err := d.setContainerdPid(pid); err != nil { + utils.KillProcess(d.pid) + return fmt.Errorf("failed to save the pid into %s: %v", d.pidPath(), err) + } return nil } @@ -253,10 +281,18 @@ func (d *Daemon) getContainerdPid() (int, error) { return -1, err } - if utils.IsProcessAlive(int(pid)) { + isAlive, err := d.isContainerdProcess(int(pid)) + if err != nil { + return -1, err + } else if !isAlive { + logrus.WithField("module", "ctrd").WithField("ctrd-supervisord", pid).Infof("previous containerd pid not exist, delete the pid file") + os.RemoveAll(d.pidPath()) + return -1, nil + } else { return int(pid), nil } } + return -1, nil } diff --git a/test/z_cli_daemon_test.go b/test/z_cli_daemon_test.go index ab3342a6c1..d459ec163a 100644 --- a/test/z_cli_daemon_test.go +++ b/test/z_cli_daemon_test.go @@ -676,3 +676,27 @@ func (suite *PouchDaemonSuite) TestDaemonWithSystemdCgroupDriver(c *check.C) { defer RunWithSpecifiedDaemon(dcfg, "rm", "-f", cname) ret.Assert(c, icmd.Success) } + +// TestContainerdPIDReuse tests even though old containerd pid being reused, we can still +// pull up the containerd instance. +func (suite *PouchDaemonSuite) TestContainerdPIDReuse(c *check.C) { + cfgFile := filepath.Join("/tmp", c.TestName()) + c.Assert(CreateConfigFile(cfgFile, nil), check.IsNil) + defer os.RemoveAll(cfgFile) + + // prepare config file for pouchd + cfg := daemon.NewConfig() + cfg.NewArgs("--config-file", cfgFile) + + containerdPidPath := filepath.Join("/tmp/test/pouch/containerd/state", "containerd.pid") + + // set containerd pid to 1 to make sure the pid must be alive + err := ioutil.WriteFile(containerdPidPath, []byte(fmt.Sprintf("%d", 1)), 0660) + if err != nil { + c.Errorf("failed to write pid to file: %v", containerdPidPath) + } + + // make sure pouchd can successfully start + c.Assert(cfg.StartDaemon(), check.IsNil) + defer cfg.KillDaemon() +}