Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rootless: move join namespace inside child process #5889

Merged
merged 2 commits into from
Apr 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 31 additions & 14 deletions pkg/rootless/rootless_linux.c
Original file line number Diff line number Diff line change
Expand Up @@ -535,8 +535,36 @@ create_pause_process (const char *pause_pid_file_path, char **argv)
}
}

static void
join_namespace_or_die (int pid_to_join, const char *ns_file)
{
char ns_path[PATH_MAX];
int ret;
int fd;

ret = snprintf (ns_path, PATH_MAX, "/proc/%d/ns/%s", pid_to_join, ns_file);
if (ret == PATH_MAX)
{
fprintf (stderr, "internal error: namespace path too long\n");
_exit (EXIT_FAILURE);
}

fd = open (ns_path, O_CLOEXEC | O_RDONLY);
if (fd < 0)
{
fprintf (stderr, "cannot open: %s\n", ns_path);
_exit (EXIT_FAILURE);
}
if (setns (fd, 0) < 0)
{
fprintf (stderr, "cannot set namespace to %s: %s\n", ns_path, strerror (errno));
_exit (EXIT_FAILURE);
}
close (fd);
}

int
reexec_userns_join (int userns, int mountns, char *pause_pid_file_path)
reexec_userns_join (int pid_to_join, char *pause_pid_file_path)
{
char uid[16];
char gid[16];
Expand Down Expand Up @@ -606,19 +634,8 @@ reexec_userns_join (int userns, int mountns, char *pause_pid_file_path)
_exit (EXIT_FAILURE);
}

if (setns (userns, 0) < 0)
{
fprintf (stderr, "cannot setns: %s\n", strerror (errno));
_exit (EXIT_FAILURE);
}
close (userns);

if (mountns >= 0 && setns (mountns, 0) < 0)
{
fprintf (stderr, "cannot setns: %s\n", strerror (errno));
_exit (EXIT_FAILURE);
}
close (mountns);
join_namespace_or_die (pid_to_join, "user");
join_namespace_or_die (pid_to_join, "mnt");

if (syscall_setresgid (0, 0, 0) < 0)
{
Expand Down
113 changes: 2 additions & 111 deletions pkg/rootless/rootless_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ extern uid_t rootless_uid();
extern uid_t rootless_gid();
extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path, char *file_to_read, int fd);
extern int reexec_in_user_namespace_wait(int pid, int options);
extern int reexec_userns_join(int userns, int mountns, char *pause_pid_file_path);
extern int reexec_userns_join(int pid, char *pause_pid_file_path);
*/
import "C"

Expand Down Expand Up @@ -124,91 +124,6 @@ func tryMappingTool(tool string, pid int, hostID int, mappings []idtools.IDMap)
return nil
}

func readUserNs(path string) (string, error) {
b := make([]byte, 256)
_, err := unix.Readlink(path, b)
if err != nil {
return "", err
}
return string(b), nil
}

func readUserNsFd(fd uintptr) (string, error) {
return readUserNs(fmt.Sprintf("/proc/self/fd/%d", fd))
}

func getParentUserNs(fd uintptr) (uintptr, error) {
const nsGetParent = 0xb702
ret, _, errno := unix.Syscall(unix.SYS_IOCTL, fd, uintptr(nsGetParent), 0)
if errno != 0 {
return 0, errno
}
return (uintptr)(unsafe.Pointer(ret)), nil
}

// getUserNSFirstChild returns an open FD for the first direct child user namespace that created the process
// Each container creates a new user namespace where the runtime runs. The current process in the container
// might have created new user namespaces that are child of the initial namespace we created.
// This function finds the initial namespace created for the container that is a child of the current namespace.
//
// current ns
// / \
// TARGET -> a [other containers]
// /
// b
// /
// NS READ USING THE PID -> c
func getUserNSFirstChild(fd uintptr) (*os.File, error) {
currentNS, err := readUserNs("/proc/self/ns/user")
if err != nil {
return nil, err
}

ns, err := readUserNsFd(fd)
if err != nil {
return nil, errors.Wrapf(err, "cannot read user namespace")
}
if ns == currentNS {
return nil, errors.New("process running in the same user namespace")
}

for {
nextFd, err := getParentUserNs(fd)
if err != nil {
if err == unix.ENOTTY {
return os.NewFile(fd, "userns child"), nil
}
return nil, errors.Wrapf(err, "cannot get parent user namespace")
}

ns, err = readUserNsFd(nextFd)
if err != nil {
return nil, errors.Wrapf(err, "cannot read user namespace")
}

if ns == currentNS {
if err := unix.Close(int(nextFd)); err != nil {
return nil, err
}

// Drop O_CLOEXEC for the fd.
_, _, errno := unix.Syscall(unix.SYS_FCNTL, fd, unix.F_SETFD, 0)
if errno != 0 {
if err := unix.Close(int(fd)); err != nil {
logrus.Errorf("failed to close file descriptor %d", fd)
}
return nil, errno
}

return os.NewFile(fd, "userns child"), nil
}
if err := unix.Close(int(fd)); err != nil {
return nil, err
}
fd = nextFd
}
}

// joinUserAndMountNS re-exec podman in a new userNS and join the user and mount
// namespace of the specified PID without looking up its parent. Useful to join directly
// the conmon process.
Expand All @@ -220,31 +135,7 @@ func joinUserAndMountNS(pid uint, pausePid string) (bool, int, error) {
cPausePid := C.CString(pausePid)
defer C.free(unsafe.Pointer(cPausePid))

userNS, err := os.Open(fmt.Sprintf("/proc/%d/ns/user", pid))
if err != nil {
return false, -1, err
}
defer func() {
if err := userNS.Close(); err != nil {
logrus.Errorf("unable to close namespace: %q", err)
}
}()

mountNS, err := os.Open(fmt.Sprintf("/proc/%d/ns/mnt", pid))
if err != nil {
return false, -1, err
}
defer func() {
if err := mountNS.Close(); err != nil {
logrus.Errorf("unable to close namespace: %q", err)
}
}()

fd, err := getUserNSFirstChild(userNS.Fd())
if err != nil {
return false, -1, err
}
pidC := C.reexec_userns_join(C.int(fd.Fd()), C.int(mountNS.Fd()), cPausePid)
pidC := C.reexec_userns_join(C.int(pid), cPausePid)
if int(pidC) < 0 {
return false, -1, errors.Errorf("cannot re-exec process")
}
Expand Down