-
Notifications
You must be signed in to change notification settings - Fork 47
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
bpf: introduce new bpf AF_XDP map type BPF_MAP_TYPE_XSKMAP
The xskmap is yet another BPF map, very much inspired by dev/cpu/sockmap, and is a holder of AF_XDP sockets. A user application adds AF_XDP sockets into the map, and by using the bpf_redirect_map helper, an XDP program can redirect XDP frames to an AF_XDP socket. Note that a socket that is bound to certain ifindex/queue index will *only* accept XDP frames from that netdev/queue index. If an XDP program tries to redirect from a netdev/queue index other than what the socket is bound to, the frame will not be received on the socket. A socket can reside in multiple maps. v3: Fixed race and simplified code. v2: Removed one indirection in map lookup. Signed-off-by: Björn Töpel <bjorn.topel@intel.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
- Loading branch information
Björn Töpel
authored and
Alexei Starovoitov
committed
May 3, 2018
1 parent
c497176
commit fbfc504
Showing
8 changed files
with
289 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,239 @@ | ||
// SPDX-License-Identifier: GPL-2.0 | ||
/* XSKMAP used for AF_XDP sockets | ||
* Copyright(c) 2018 Intel Corporation. | ||
* | ||
* This program is free software; you can redistribute it and/or modify it | ||
* under the terms and conditions of the GNU General Public License, | ||
* version 2, as published by the Free Software Foundation. | ||
* | ||
* This program is distributed in the hope it will be useful, but WITHOUT | ||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
* more details. | ||
*/ | ||
|
||
#include <linux/bpf.h> | ||
#include <linux/capability.h> | ||
#include <net/xdp_sock.h> | ||
#include <linux/slab.h> | ||
#include <linux/sched.h> | ||
|
||
struct xsk_map { | ||
struct bpf_map map; | ||
struct xdp_sock **xsk_map; | ||
struct list_head __percpu *flush_list; | ||
}; | ||
|
||
static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) | ||
{ | ||
int cpu, err = -EINVAL; | ||
struct xsk_map *m; | ||
u64 cost; | ||
|
||
if (!capable(CAP_NET_ADMIN)) | ||
return ERR_PTR(-EPERM); | ||
|
||
if (attr->max_entries == 0 || attr->key_size != 4 || | ||
attr->value_size != 4 || | ||
attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)) | ||
return ERR_PTR(-EINVAL); | ||
|
||
m = kzalloc(sizeof(*m), GFP_USER); | ||
if (!m) | ||
return ERR_PTR(-ENOMEM); | ||
|
||
bpf_map_init_from_attr(&m->map, attr); | ||
|
||
cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *); | ||
cost += sizeof(struct list_head) * num_possible_cpus(); | ||
if (cost >= U32_MAX - PAGE_SIZE) | ||
goto free_m; | ||
|
||
m->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; | ||
|
||
/* Notice returns -EPERM on if map size is larger than memlock limit */ | ||
err = bpf_map_precharge_memlock(m->map.pages); | ||
if (err) | ||
goto free_m; | ||
|
||
m->flush_list = alloc_percpu(struct list_head); | ||
if (!m->flush_list) | ||
goto free_m; | ||
|
||
for_each_possible_cpu(cpu) | ||
INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu)); | ||
|
||
m->xsk_map = bpf_map_area_alloc(m->map.max_entries * | ||
sizeof(struct xdp_sock *), | ||
m->map.numa_node); | ||
if (!m->xsk_map) | ||
goto free_percpu; | ||
return &m->map; | ||
|
||
free_percpu: | ||
free_percpu(m->flush_list); | ||
free_m: | ||
kfree(m); | ||
return ERR_PTR(err); | ||
} | ||
|
||
static void xsk_map_free(struct bpf_map *map) | ||
{ | ||
struct xsk_map *m = container_of(map, struct xsk_map, map); | ||
int i; | ||
|
||
synchronize_net(); | ||
|
||
for (i = 0; i < map->max_entries; i++) { | ||
struct xdp_sock *xs; | ||
|
||
xs = m->xsk_map[i]; | ||
if (!xs) | ||
continue; | ||
|
||
sock_put((struct sock *)xs); | ||
} | ||
|
||
free_percpu(m->flush_list); | ||
bpf_map_area_free(m->xsk_map); | ||
kfree(m); | ||
} | ||
|
||
static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key) | ||
{ | ||
struct xsk_map *m = container_of(map, struct xsk_map, map); | ||
u32 index = key ? *(u32 *)key : U32_MAX; | ||
u32 *next = next_key; | ||
|
||
if (index >= m->map.max_entries) { | ||
*next = 0; | ||
return 0; | ||
} | ||
|
||
if (index == m->map.max_entries - 1) | ||
return -ENOENT; | ||
*next = index + 1; | ||
return 0; | ||
} | ||
|
||
struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key) | ||
{ | ||
struct xsk_map *m = container_of(map, struct xsk_map, map); | ||
struct xdp_sock *xs; | ||
|
||
if (key >= map->max_entries) | ||
return NULL; | ||
|
||
xs = READ_ONCE(m->xsk_map[key]); | ||
return xs; | ||
} | ||
|
||
int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, | ||
struct xdp_sock *xs) | ||
{ | ||
struct xsk_map *m = container_of(map, struct xsk_map, map); | ||
struct list_head *flush_list = this_cpu_ptr(m->flush_list); | ||
int err; | ||
|
||
err = xsk_rcv(xs, xdp); | ||
if (err) | ||
return err; | ||
|
||
if (!xs->flush_node.prev) | ||
list_add(&xs->flush_node, flush_list); | ||
|
||
return 0; | ||
} | ||
|
||
void __xsk_map_flush(struct bpf_map *map) | ||
{ | ||
struct xsk_map *m = container_of(map, struct xsk_map, map); | ||
struct list_head *flush_list = this_cpu_ptr(m->flush_list); | ||
struct xdp_sock *xs, *tmp; | ||
|
||
list_for_each_entry_safe(xs, tmp, flush_list, flush_node) { | ||
xsk_flush(xs); | ||
__list_del(xs->flush_node.prev, xs->flush_node.next); | ||
xs->flush_node.prev = NULL; | ||
} | ||
} | ||
|
||
static void *xsk_map_lookup_elem(struct bpf_map *map, void *key) | ||
{ | ||
return NULL; | ||
} | ||
|
||
static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, | ||
u64 map_flags) | ||
{ | ||
struct xsk_map *m = container_of(map, struct xsk_map, map); | ||
u32 i = *(u32 *)key, fd = *(u32 *)value; | ||
struct xdp_sock *xs, *old_xs; | ||
struct socket *sock; | ||
int err; | ||
|
||
if (unlikely(map_flags > BPF_EXIST)) | ||
return -EINVAL; | ||
if (unlikely(i >= m->map.max_entries)) | ||
return -E2BIG; | ||
if (unlikely(map_flags == BPF_NOEXIST)) | ||
return -EEXIST; | ||
|
||
sock = sockfd_lookup(fd, &err); | ||
if (!sock) | ||
return err; | ||
|
||
if (sock->sk->sk_family != PF_XDP) { | ||
sockfd_put(sock); | ||
return -EOPNOTSUPP; | ||
} | ||
|
||
xs = (struct xdp_sock *)sock->sk; | ||
|
||
if (!xsk_is_setup_for_bpf_map(xs)) { | ||
sockfd_put(sock); | ||
return -EOPNOTSUPP; | ||
} | ||
|
||
sock_hold(sock->sk); | ||
|
||
old_xs = xchg(&m->xsk_map[i], xs); | ||
if (old_xs) { | ||
/* Make sure we've flushed everything. */ | ||
synchronize_net(); | ||
sock_put((struct sock *)old_xs); | ||
} | ||
|
||
sockfd_put(sock); | ||
return 0; | ||
} | ||
|
||
static int xsk_map_delete_elem(struct bpf_map *map, void *key) | ||
{ | ||
struct xsk_map *m = container_of(map, struct xsk_map, map); | ||
struct xdp_sock *old_xs; | ||
int k = *(u32 *)key; | ||
|
||
if (k >= map->max_entries) | ||
return -EINVAL; | ||
|
||
old_xs = xchg(&m->xsk_map[k], NULL); | ||
if (old_xs) { | ||
/* Make sure we've flushed everything. */ | ||
synchronize_net(); | ||
sock_put((struct sock *)old_xs); | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
const struct bpf_map_ops xsk_map_ops = { | ||
.map_alloc = xsk_map_alloc, | ||
.map_free = xsk_map_free, | ||
.map_get_next_key = xsk_map_get_next_key, | ||
.map_lookup_elem = xsk_map_lookup_elem, | ||
.map_update_elem = xsk_map_update_elem, | ||
.map_delete_elem = xsk_map_delete_elem, | ||
}; | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters