Skip to content

Commit

Permalink
Merge branch 'sockmap-fixes'
Browse files Browse the repository at this point in the history
John Fastabend says:

====================
sockmap fixes for net

The following implements a set of fixes for sockmap and changes the
API slightly in a few places to reduce preempt_disable/enable scope.
We do this here in net because it requires an API change and this
avoids getting stuck with legacy API going forward.

The short description:

Access to skb mark is removed, it is problematic when we add
features in the future because mark is a union and used by the
TCP/socket code internally. We don't want to expose this to the
BPF programs or let programs change the values.

The other change is caching metadata in the skb itself between
when the BPF program returns a redirect code and the core code
implements the redirect. This avoids having per cpu metadata.

Finally, tighten restriction on using sockmap to CAP_NET_ADMIN and
only SOCK_STREAM sockets.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
davem330 committed Oct 20, 2017
2 parents 1cc276c + 9ef2a8c commit e95c6cf
Show file tree
Hide file tree
Showing 11 changed files with 74 additions and 34 deletions.
2 changes: 1 addition & 1 deletion include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -728,7 +728,7 @@ void xdp_do_flush_map(void);
void bpf_warn_invalid_xdp_action(u32 act);
void bpf_warn_invalid_xdp_redirect(u32 ifindex);

struct sock *do_sk_redirect_map(void);
struct sock *do_sk_redirect_map(struct sk_buff *skb);

#ifdef CONFIG_BPF_JIT
extern int bpf_jit_enable;
Expand Down
5 changes: 5 additions & 0 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,11 @@ struct tcp_skb_cb {
struct inet6_skb_parm h6;
#endif
} header; /* For incoming skbs */
struct {
__u32 key;
__u32 flags;
struct bpf_map *map;
} bpf;
};
};

Expand Down
3 changes: 3 additions & 0 deletions kernel/bpf/devmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
int err = -EINVAL;
u64 cost;

if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);

/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
Expand Down
28 changes: 18 additions & 10 deletions kernel/bpf/sockmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include <linux/workqueue.h>
#include <linux/list.h>
#include <net/strparser.h>
#include <net/tcp.h>

struct bpf_stab {
struct bpf_map map;
Expand Down Expand Up @@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
return SK_DROP;

skb_orphan(skb);
/* We need to ensure that BPF metadata for maps is also cleared
* when we orphan the skb so that we don't have the possibility
* to reference a stale map.
*/
TCP_SKB_CB(skb)->bpf.map = NULL;
skb->sk = psock->sock;
bpf_compute_data_end(skb);
preempt_disable();
rc = (*prog->bpf_func)(skb, prog->insnsi);
preempt_enable();
skb->sk = NULL;

return rc;
Expand All @@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
struct sock *sk;
int rc;

/* Because we use per cpu values to feed input from sock redirect
* in BPF program to do_sk_redirect_map() call we need to ensure we
* are not preempted. RCU read lock is not sufficient in this case
* with CONFIG_PREEMPT_RCU enabled so we must be explicit here.
*/
preempt_disable();
rc = smap_verdict_func(psock, skb);
switch (rc) {
case SK_REDIRECT:
sk = do_sk_redirect_map();
preempt_enable();
sk = do_sk_redirect_map(skb);
if (likely(sk)) {
struct smap_psock *peer = smap_psock_sk(sk);

Expand All @@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
/* Fall through and free skb otherwise */
case SK_DROP:
default:
if (rc != SK_REDIRECT)
preempt_enable();
kfree_skb(skb);
}
}
Expand Down Expand Up @@ -487,6 +486,9 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
int err = -EINVAL;
u64 cost;

if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);

/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
Expand Down Expand Up @@ -840,6 +842,12 @@ static int sock_map_update_elem(struct bpf_map *map,
return -EINVAL;
}

if (skops.sk->sk_type != SOCK_STREAM ||
skops.sk->sk_protocol != IPPROTO_TCP) {
fput(socket->file);
return -EOPNOTSUPP;
}

err = sock_map_ctx_update_elem(&skops, map, key, flags);
fput(socket->file);
return err;
Expand Down
31 changes: 16 additions & 15 deletions net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -1839,31 +1839,31 @@ static const struct bpf_func_proto bpf_redirect_proto = {
.arg2_type = ARG_ANYTHING,
};

BPF_CALL_3(bpf_sk_redirect_map, struct bpf_map *, map, u32, key, u64, flags)
BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
struct bpf_map *, map, u32, key, u64, flags)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);

if (unlikely(flags))
return SK_ABORTED;

ri->ifindex = key;
ri->flags = flags;
ri->map = map;
tcb->bpf.key = key;
tcb->bpf.flags = flags;
tcb->bpf.map = map;

return SK_REDIRECT;
}

struct sock *do_sk_redirect_map(void)
struct sock *do_sk_redirect_map(struct sk_buff *skb)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
struct sock *sk = NULL;

if (ri->map) {
sk = __sock_map_lookup_elem(ri->map, ri->ifindex);
if (tcb->bpf.map) {
sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key);

ri->ifindex = 0;
ri->map = NULL;
/* we do not clear flags for future lookup */
tcb->bpf.key = 0;
tcb->bpf.map = NULL;
}

return sk;
Expand All @@ -1873,9 +1873,10 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
.func = bpf_sk_redirect_map,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_ANYTHING,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_ANYTHING,
};

BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
Expand Down Expand Up @@ -3683,7 +3684,6 @@ static bool sk_skb_is_valid_access(int off, int size,
{
if (type == BPF_WRITE) {
switch (off) {
case bpf_ctx_range(struct __sk_buff, mark):
case bpf_ctx_range(struct __sk_buff, tc_index):
case bpf_ctx_range(struct __sk_buff, priority):
break;
Expand All @@ -3693,6 +3693,7 @@ static bool sk_skb_is_valid_access(int off, int size,
}

switch (off) {
case bpf_ctx_range(struct __sk_buff, mark):
case bpf_ctx_range(struct __sk_buff, tc_classid):
return false;
case bpf_ctx_range(struct __sk_buff, data):
Expand Down
2 changes: 1 addition & 1 deletion samples/sockmap/sockmap_kern.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ int bpf_prog2(struct __sk_buff *skb)
ret = 1;

bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret);
return bpf_sk_redirect_map(&sock_map, ret, 0);
return bpf_sk_redirect_map(skb, &sock_map, ret, 0);
}

SEC("sockops")
Expand Down
3 changes: 2 additions & 1 deletion tools/include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -569,9 +569,10 @@ union bpf_attr {
* @flags: reserved for future use
* Return: 0 on success or negative error code
*
* int bpf_sk_redirect_map(map, key, flags)
* int bpf_sk_redirect_map(skb, map, key, flags)
* Redirect skb to a sock in map using key as a lookup key for the
* sock in map.
* @skb: pointer to skb
* @map: pointer to sockmap
* @key: key to lookup sock in map
* @flags: reserved for future use
Expand Down
2 changes: 1 addition & 1 deletion tools/testing/selftests/bpf/bpf_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
int optlen) =
(void *) BPF_FUNC_setsockopt;
static int (*bpf_sk_redirect_map)(void *map, int key, int flags) =
static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) =
(void *) BPF_FUNC_sk_redirect_map;
static int (*bpf_sock_map_update)(void *map, void *key, void *value,
unsigned long long flags) =
Expand Down
4 changes: 2 additions & 2 deletions tools/testing/selftests/bpf/sockmap_verdict_prog.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ int bpf_prog2(struct __sk_buff *skb)
bpf_printk("verdict: data[0] = redir(%u:%u)\n", map, sk);

if (!map)
return bpf_sk_redirect_map(&sock_map_rx, sk, 0);
return bpf_sk_redirect_map(&sock_map_tx, sk, 0);
return bpf_sk_redirect_map(skb, &sock_map_rx, sk, 0);
return bpf_sk_redirect_map(skb, &sock_map_tx, sk, 0);
}

char _license[] SEC("license") = "GPL";
12 changes: 11 additions & 1 deletion tools/testing/selftests/bpf/test_maps.c
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ static void test_sockmap(int tasks, void *data)
int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc;
struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break;
int ports[] = {50200, 50201, 50202, 50204};
int err, i, fd, sfd[6] = {0xdeadbeef};
int err, i, fd, udp, sfd[6] = {0xdeadbeef};
u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0};
int parse_prog, verdict_prog;
struct sockaddr_in addr;
Expand Down Expand Up @@ -548,6 +548,16 @@ static void test_sockmap(int tasks, void *data)
goto out_sockmap;
}

/* Test update with unsupported UDP socket */
udp = socket(AF_INET, SOCK_DGRAM, 0);
i = 0;
err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY);
if (!err) {
printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n",
i, udp);
goto out_sockmap;
}

/* Test update without programs */
for (i = 0; i < 6; i++) {
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
Expand Down
16 changes: 14 additions & 2 deletions tools/testing/selftests/bpf/test_verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -1130,15 +1130,27 @@ static struct bpf_test tests[] = {
.errstr = "invalid bpf_context access",
},
{
"check skb->mark is writeable by SK_SKB",
"invalid access of skb->mark for SK_SKB",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, mark)),
BPF_EXIT_INSN(),
},
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_SKB,
.errstr = "invalid bpf_context access",
},
{
"check skb->mark is not writeable by SK_SKB",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
offsetof(struct __sk_buff, mark)),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_SKB,
.errstr = "invalid bpf_context access",
},
{
"check skb->tc_index is writeable by SK_SKB",
Expand Down

0 comments on commit e95c6cf

Please sign in to comment.