Skip to content

Commit d4bf3ba

Browse files
committed
Merge branch 'libbpf: add support for'
Mariusz Dudek says: ==================== From: Mariusz Dudek <mariuszx.dudek@intel.com> This patch series adds support for separation of eBPF program load and xsk socket creation. In for example a Kubernetes environment you can have an AF_XDP CNI or daemonset that is responsible for launching pods that execute an application using AF_XDP sockets. It is desirable that the pod runs with as low privileges as possible, CAP_NET_RAW in this case, and that all operations that require privileges are contained in the CNI or daemonset. In this case, you have to be able separate ePBF program load from xsk socket creation. Currently, this will not work with the xsk_socket__create APIs because you need to have CAP_NET_ADMIN privileges to load eBPF program and CAP_SYS_ADMIN privileges to create update xsk_bpf_maps. To be exact xsk_set_bpf_maps does not need those privileges but it takes the prog_fd and xsks_map_fd and those are known only to process that was loading eBPF program. The api bpf_prog_get_fd_by_id that looks up the fd of the prog using an prog_id and bpf_map_get_fd_by_id that looks for xsks_map_fd usinb map_id both requires CAP_SYS_ADMIN. With this patch, the pod can be run with CAP_NET_RAW capability only. In case your umem is larger or equal process limit for MEMLOCK you need either increase the limit or CAP_IPC_LOCK capability. Without this patch in case of insufficient rights ENOPERM is returned by xsk_socket__create. To resolve this privileges issue two new APIs are introduced: - xsk_setup_xdp_prog - loads the built in XDP program. It can also return xsks_map_fd which is needed by unprivileged process to update xsks_map with AF_XDP socket "fd" - xsk_socket__update_xskmap - inserts an AF_XDP socket into an xskmap for a particular xsk_socket Usage example: int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd) int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd); Inserts AF_XDP socket "fd" into the xskmap. The first patch introduces the new APIs. The second patch provides a new sample applications working as control and modification to existing xdpsock application to work with less privileges. This patch set is based on bpf-next commit ba05817 (net, xdp, xsk: fix __sk_mark_napi_id_once napi_id error) Since v5 - fixed sample/bpf/xdpsock_user.c to resolve merge conflicts Since v4 - sample/bpf/Makefile issues fixed Since v3: - force_set_map flag removed - leaking of xsk struct fixed - unified function error returning policy implemented Since v2: - new APIs moved itto LIBBPF_0.3.0 section - struct bpf_prog_cfg_opts removed - loading own eBPF program via xsk_setup_xdp_prog functionality removed Since v1: - struct bpf_prog_cfg improved for backward/forward compatibility - API xsk_update_xskmap renamed to xsk_socket__update_xskmap - commit message formatting fixed ==================== Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2 parents 9e83f54 + d6482b4 commit d4bf3ba

File tree

7 files changed

+425
-19
lines changed

7 files changed

+425
-19
lines changed

samples/bpf/Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ tprogs-y += syscall_tp
4848
tprogs-y += cpustat
4949
tprogs-y += xdp_adjust_tail
5050
tprogs-y += xdpsock
51+
tprogs-y += xdpsock_ctrl_proc
5152
tprogs-y += xsk_fwd
5253
tprogs-y += xdp_fwd
5354
tprogs-y += task_fd_query
@@ -105,6 +106,7 @@ syscall_tp-objs := syscall_tp_user.o
105106
cpustat-objs := cpustat_user.o
106107
xdp_adjust_tail-objs := xdp_adjust_tail_user.o
107108
xdpsock-objs := xdpsock_user.o
109+
xdpsock_ctrl_proc-objs := xdpsock_ctrl_proc.o
108110
xsk_fwd-objs := xsk_fwd.o
109111
xdp_fwd-objs := xdp_fwd_user.o
110112
task_fd_query-objs := task_fd_query_user.o $(TRACE_HELPERS)
@@ -202,7 +204,7 @@ TPROGLDLIBS_tracex4 += -lrt
202204
TPROGLDLIBS_trace_output += -lrt
203205
TPROGLDLIBS_map_perf_test += -lrt
204206
TPROGLDLIBS_test_overhead += -lrt
205-
TPROGLDLIBS_xdpsock += -pthread
207+
TPROGLDLIBS_xdpsock += -pthread -lcap
206208
TPROGLDLIBS_xsk_fwd += -pthread
207209

208210
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:

samples/bpf/xdpsock.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,12 @@
88

99
#define MAX_SOCKS 4
1010

11+
#define SOCKET_NAME "sock_cal_bpf_fd"
12+
#define MAX_NUM_OF_CLIENTS 10
13+
14+
#define CLOSE_CONN 1
15+
16+
typedef __u64 u64;
17+
typedef __u32 u32;
18+
1119
#endif /* XDPSOCK_H */

samples/bpf/xdpsock_ctrl_proc.c

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* Copyright(c) 2017 - 2018 Intel Corporation. */
3+
4+
#include <errno.h>
5+
#include <getopt.h>
6+
#include <libgen.h>
7+
#include <net/if.h>
8+
#include <stdio.h>
9+
#include <stdlib.h>
10+
#include <sys/socket.h>
11+
#include <sys/un.h>
12+
#include <unistd.h>
13+
14+
#include <bpf/bpf.h>
15+
#include <bpf/xsk.h>
16+
#include "xdpsock.h"
17+
18+
static const char *opt_if = "";
19+
20+
static struct option long_options[] = {
21+
{"interface", required_argument, 0, 'i'},
22+
{0, 0, 0, 0}
23+
};
24+
25+
static void usage(const char *prog)
26+
{
27+
const char *str =
28+
" Usage: %s [OPTIONS]\n"
29+
" Options:\n"
30+
" -i, --interface=n Run on interface n\n"
31+
"\n";
32+
fprintf(stderr, "%s\n", str);
33+
34+
exit(0);
35+
}
36+
37+
static void parse_command_line(int argc, char **argv)
38+
{
39+
int option_index, c;
40+
41+
opterr = 0;
42+
43+
for (;;) {
44+
c = getopt_long(argc, argv, "i:",
45+
long_options, &option_index);
46+
if (c == -1)
47+
break;
48+
49+
switch (c) {
50+
case 'i':
51+
opt_if = optarg;
52+
break;
53+
default:
54+
usage(basename(argv[0]));
55+
}
56+
}
57+
}
58+
59+
static int send_xsks_map_fd(int sock, int fd)
60+
{
61+
char cmsgbuf[CMSG_SPACE(sizeof(int))];
62+
struct msghdr msg;
63+
struct iovec iov;
64+
int value = 0;
65+
66+
if (fd == -1) {
67+
fprintf(stderr, "Incorrect fd = %d\n", fd);
68+
return -1;
69+
}
70+
iov.iov_base = &value;
71+
iov.iov_len = sizeof(int);
72+
73+
msg.msg_name = NULL;
74+
msg.msg_namelen = 0;
75+
msg.msg_iov = &iov;
76+
msg.msg_iovlen = 1;
77+
msg.msg_flags = 0;
78+
msg.msg_control = cmsgbuf;
79+
msg.msg_controllen = CMSG_LEN(sizeof(int));
80+
81+
struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
82+
83+
cmsg->cmsg_level = SOL_SOCKET;
84+
cmsg->cmsg_type = SCM_RIGHTS;
85+
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
86+
87+
*(int *)CMSG_DATA(cmsg) = fd;
88+
int ret = sendmsg(sock, &msg, 0);
89+
90+
if (ret == -1) {
91+
fprintf(stderr, "Sendmsg failed with %s", strerror(errno));
92+
return -errno;
93+
}
94+
95+
return ret;
96+
}
97+
98+
int
99+
main(int argc, char **argv)
100+
{
101+
struct sockaddr_un server;
102+
int listening = 1;
103+
int rval, msgsock;
104+
int ifindex = 0;
105+
int flag = 1;
106+
int cmd = 0;
107+
int sock;
108+
int err;
109+
int xsks_map_fd;
110+
111+
parse_command_line(argc, argv);
112+
113+
ifindex = if_nametoindex(opt_if);
114+
if (ifindex == 0) {
115+
fprintf(stderr, "Unable to get ifindex for Interface %s. Reason:%s",
116+
opt_if, strerror(errno));
117+
return -errno;
118+
}
119+
120+
sock = socket(AF_UNIX, SOCK_STREAM, 0);
121+
if (sock < 0) {
122+
fprintf(stderr, "Opening socket stream failed: %s", strerror(errno));
123+
return -errno;
124+
}
125+
126+
server.sun_family = AF_UNIX;
127+
strcpy(server.sun_path, SOCKET_NAME);
128+
129+
setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &flag, sizeof(int));
130+
131+
if (bind(sock, (struct sockaddr *)&server, sizeof(struct sockaddr_un))) {
132+
fprintf(stderr, "Binding to socket stream failed: %s", strerror(errno));
133+
return -errno;
134+
}
135+
136+
listen(sock, MAX_NUM_OF_CLIENTS);
137+
138+
err = xsk_setup_xdp_prog(ifindex, &xsks_map_fd);
139+
if (err) {
140+
fprintf(stderr, "Setup of xdp program failed\n");
141+
goto close_sock;
142+
}
143+
144+
while (listening) {
145+
msgsock = accept(sock, 0, 0);
146+
if (msgsock == -1) {
147+
fprintf(stderr, "Error accepting connection: %s", strerror(errno));
148+
err = -errno;
149+
goto close_sock;
150+
}
151+
err = send_xsks_map_fd(msgsock, xsks_map_fd);
152+
if (err <= 0) {
153+
fprintf(stderr, "Error %d sending xsks_map_fd\n", err);
154+
goto cleanup;
155+
}
156+
do {
157+
rval = read(msgsock, &cmd, sizeof(int));
158+
if (rval < 0) {
159+
fprintf(stderr, "Error reading stream message");
160+
} else {
161+
if (cmd != CLOSE_CONN)
162+
fprintf(stderr, "Recv unknown cmd = %d\n", cmd);
163+
listening = 0;
164+
break;
165+
}
166+
} while (rval > 0);
167+
}
168+
close(msgsock);
169+
close(sock);
170+
unlink(SOCKET_NAME);
171+
172+
/* Unset fd for given ifindex */
173+
err = bpf_set_link_xdp_fd(ifindex, -1, 0);
174+
if (err) {
175+
fprintf(stderr, "Error when unsetting bpf prog_fd for ifindex(%d)\n", ifindex);
176+
return err;
177+
}
178+
179+
return 0;
180+
181+
cleanup:
182+
close(msgsock);
183+
close_sock:
184+
close(sock);
185+
unlink(SOCKET_NAME);
186+
return err;
187+
}

0 commit comments

Comments
 (0)