diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c index b12f2355c7a..2d224babea9 100644 --- a/libcontainer/nsenter/nsexec.c +++ b/libcontainer/nsenter/nsexec.c @@ -832,6 +832,25 @@ void send_mountsources(int sockfd, pid_t child, char *mountsources, size_t mount bail("failed to close container mount namespace fd %d", container_mntns_fd); } +void try_unshare(int flags, const char *msg) +{ + write_log(DEBUG, "unshare %s", msg); + /* + * Kernels prior to v4.3 may return EINVAL on unshare when another process + * reads runc's /proc/$PID/status or /proc/$PID/maps. To work around this, + * retry on EINVAL a few times. + */ + int retries = 5; + for (; retries > 0; retries--) { + if (unshare(flags) == 0) { + return; + } + if (errno != EINVAL) + break; + } + bail("failed to unshare %s", msg); +} + void nsexec(void) { int pipenum; @@ -1170,9 +1189,7 @@ void nsexec(void) * problem. */ if (config.cloneflags & CLONE_NEWUSER) { - write_log(DEBUG, "unshare user namespace"); - if (unshare(CLONE_NEWUSER) < 0) - bail("failed to unshare user namespace"); + try_unshare(CLONE_NEWUSER, "user namespace"); config.cloneflags &= ~CLONE_NEWUSER; /* @@ -1224,9 +1241,7 @@ void nsexec(void) * some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID) * was broken, so we'll just do it the long way anyway. */ - write_log(DEBUG, "unshare remaining namespace (except cgroupns)"); - if (unshare(config.cloneflags & ~CLONE_NEWCGROUP) < 0) - bail("failed to unshare remaining namespaces (except cgroupns)"); + try_unshare(config.cloneflags & ~CLONE_NEWCGROUP, "remaining namespaces (except cgroupns)"); /* Ask our parent to send the mount sources fds. */ if (config.mountsources) { @@ -1344,8 +1359,7 @@ void nsexec(void) } if (config.cloneflags & CLONE_NEWCGROUP) { - if (unshare(CLONE_NEWCGROUP) < 0) - bail("failed to unshare cgroup namespace"); + try_unshare(CLONE_NEWCGROUP, "cgroup namespace"); } write_log(DEBUG, "signal completion to stage-0");