Skip to content
This repository has been archived by the owner on Jun 20, 2024. It is now read-only.

use nsenter instead of ip netns exec #458

Closed
rade opened this issue Mar 16, 2015 · 6 comments · Fixed by #580
Closed

use nsenter instead of ip netns exec #458

rade opened this issue Mar 16, 2015 · 6 comments · Fixed by #580
Assignees
Labels
Milestone

Comments

@rade
Copy link
Member

rade commented Mar 16, 2015

By replacing our use of ip netns exec with nsenter we could remove the current network namespace symlinking hack.

The only downside is that nsenter is a fairly recent command. So, for example, it only appeared in Ubuntu 14.10. This is less of an issue post #388 though.

@rade rade added the chore label Mar 16, 2015
@awh awh self-assigned this Apr 17, 2015
@awh
Copy link
Contributor

awh commented Apr 17, 2015

It would seem that ip netns exec is not the only command that requires the symlink; the command ip link set $GUEST_IFNAME netns $NETNS (used in connect_container_to_bridge) fails with RTNETLINK answers: No such process unless it exists. Strace output of the failing command invoked via ./weave launch:

...
socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_ROUTE) = 3
setsockopt(3, SOL_SOCKET, SO_SNDBUF, [32768], 4) = 0
setsockopt(3, SOL_SOCKET, SO_RCVBUF, [1048576], 4) = 0
bind(3, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0
getsockname(3, {sa_family=AF_NETLINK, pid=92, groups=00000000}, [12]) = 0
sendto(3, " \0\0\0\20\0\5\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 32, 0, NULL, 0) = 32
recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"4\0\0\0\2\0\0\0\0\0\0\0\\\0\0\0\355\377\377\377 \0\0\0\20\0\5\0\0\0\0\0"..., 8192}], msg_controllen=0, msg_flags=0}, 0) = 52
open("/var/run/netns/11772", O_RDONLY)  = -1 ENOENT (No such file or directory)
socket(PF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0) = 4
ioctl(4, SIOCGIFINDEX, {ifr_name="vethwepg11772", ifr_index=38}) = 0
close(4)                                = 0
sendmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"(\0\0\0\20\0\5\0\372 1U\0\0\0\0\0\0\0\0&\0\0\0\0\0\0\0\0\0\0\0"..., 40}], msg_controllen=0, msg_flags=0}, 0) = 40
recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"<\0\0\0\2\0\0\0\372 1U\\\0\0\0\375\377\377\377(\0\0\0\20\0\5\0\372 1U"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 60
writev(2, [{"RTNETLINK answers: No such proce"..., 35}, {NULL, 0}], 2RTNETLINK answers: No such process) = 35
exit_group(2)                           = ?
+++ exited with 2 +++

Compared to when the symlink is in place:

...
socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_ROUTE) = 3
setsockopt(3, SOL_SOCKET, SO_SNDBUF, [32768], 4) = 0
setsockopt(3, SOL_SOCKET, SO_RCVBUF, [1048576], 4) = 0
bind(3, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0
getsockname(3, {sa_family=AF_NETLINK, pid=89, groups=00000000}, [12]) = 0
sendto(3, " \0\0\0\20\0\5\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 32, 0, NULL, 0) = 32
recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"4\0\0\0\2\0\0\0\0\0\0\0Y\0\0\0\355\377\377\377 \0\0\0\20\0\5\0\0\0\0\0"..., 8192}], msg_controllen=0, msg_flags=0}, 0) = 52
open("/var/run/netns/12573", O_RDONLY)  = 4
socket(PF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0) = 5
ioctl(5, SIOCGIFINDEX, {ifr_name="vethwepg12573", ifr_index=42}) = 0
close(5)                                = 0
sendmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"(\0\0\0\20\0\5\0\326!1U\0\0\0\0\0\0\0\0*\0\0\0\0\0\0\0\0\0\0\0"..., 40}], msg_controllen=0, msg_flags=0}, 0) = 40
recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"$\0\0\0\2\0\0\0\326!1UY\0\0\0\0\0\0\0(\0\0\0\20\0\5\0\326!1U"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 36
exit_group(0)                           = ?
+++ exited with 0 +++

@awh
Copy link
Contributor

awh commented Apr 17, 2015

Bizarrely, if I run it locally with sudo ./weave --local launch without the symlink the command works, even though you can see it fails to open the symlink:

...
socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, 0) = 3
setsockopt(3, SOL_SOCKET, SO_SNDBUF, [32768], 4) = 0
setsockopt(3, SOL_SOCKET, SO_RCVBUF, [1048576], 4) = 0
bind(3, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0
getsockname(3, {sa_family=AF_NETLINK, pid=13731, groups=00000000}, [12]) = 0
sendto(3, " \0\0\0\20\0\5\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 32, 0, NULL, 0) = 32
recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"4\0\0\0\2\0\0\0\0\0\0\0\2435\0\0\355\377\377\377 \0\0\0\20\0\5\0\0\0\0\0"..., 8192}], msg_controllen=0, msg_flags=0}, 0) = 52
access("/proc/net", R_OK)               = 0
access("/proc/net/unix", R_OK)          = 0
socket(PF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0) = 4
ioctl(4, SIOCGIFINDEX, {ifr_name="vethwepg13687", ifr_index=46}) = 0
close(4)                                = 0
open("/var/run/netns/13687", O_RDONLY)  = -1 ENOENT (No such file or directory)
socket(PF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0) = 4
ioctl(4, SIOCGIFINDEX, {ifr_name="vethwepg13687", ifr_index=46}) = 0
close(4)                                = 0
sendmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"(\0\0\0\20\0\5\0\33%1U\0\0\0\0\0\0\0\0.\0\0\0\0\0\0\0\0\0\0\0"..., 40}], msg_controllen=0, msg_flags=0}, 0) = 40
recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"$\0\0\0\2\0\0\0\33%1U\2435\0\0\0\0\0\0(\0\0\0\20\0\5\0\33%1U"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 36
exit_group(0)                           = ?
+++ exited with 0 +++
b5d9d403ea923a6f87785128bc569dfd9adf38c38dbbd4a6ddb695b9fddf64e7

@awh
Copy link
Contributor

awh commented Apr 17, 2015

Context:

vagrant@vagrant-ubuntu-utopic-64:~$ uname -a
Linux vagrant-ubuntu-utopic-64 3.16.0-34-generic #47-Ubuntu SMP Fri Apr 10 18:02:58 UTC 2015 x86_64 x86_64 x86_64 GNU/Linux
vagrant@vagrant-ubuntu-utopic-64:~/weave$ cat /etc/lsb-release
DISTRIB_ID=Ubuntu
DISTRIB_RELEASE=14.10
DISTRIB_CODENAME=utopic
DISTRIB_DESCRIPTION="Ubuntu 14.10"
vagrant@vagrant-ubuntu-utopic-64:~/weave$ nsenter --version
nsenter from util-linux 2.25.1
vagrant@vagrant-ubuntu-utopic-64:~/weave$ docker run -ti --entrypoint /bin/sh zettio/weaveexec
/home/weave # cat /etc/alpine-release
3.1.2
/home/weave # nsenter --version
nsenter from util-linux 2.25.2

@awh
Copy link
Contributor

awh commented Apr 17, 2015

Perhaps the netlink call in question is looking in the process table for the container PID if nsenter doesn't supply the network namespace explicitly (e.g. when it can't look it up through the symlink), and this lookup fails inside the container as the process space is isolated?

@awh
Copy link
Contributor

awh commented Apr 17, 2015

Appears to be failing at https://github.com/torvalds/linux/blob/v3.16/net/core/net_namespace.c#L373 (ESRCH is No such process)

@awh
Copy link
Contributor

awh commented Apr 17, 2015

Ahhh, it's also possible to pass a $PROCFS/$PID/ns/net path into ip link set ... netns ..., fixing the issue.

@rade rade closed this as completed in #580 Apr 21, 2015
rade added a commit that referenced this issue Apr 21, 2015
Replace `ip netns exec` with `nsenter`

Closes #458.
@rade rade added this to the 0.11.0 milestone Jan 15, 2016
Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
Projects
None yet
Development

Successfully merging a pull request may close this issue.

2 participants