Skip to content

Commit

Permalink
xsk: Support tx_metadata_len
Browse files Browse the repository at this point in the history
For zerocopy mode, tx_desc->addr can point to an arbitrary offset
and carry some TX metadata in the headroom. For copy mode, there
is no way currently to populate skb metadata.

Introduce new tx_metadata_len umem config option that indicates how many
bytes to treat as metadata. Metadata bytes come prior to tx_desc address
(same as in RX case).

The size of the metadata has mostly the same constraints as XDP:
- less than 256 bytes
- 8-byte aligned (compared to 4-byte alignment on xdp, due to 8-byte
  timestamp in the completion)
- non-zero

This data is not interpreted in any way right now.

Reviewed-by: Song Yoong Siang <yoong.siang.song@intel.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20231127190319.1190813-2-sdf@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
fomichev authored and Alexei Starovoitov committed Nov 29, 2023
1 parent 40d0eb0 commit 341ac98
Show file tree
Hide file tree
Showing 8 changed files with 30 additions and 8 deletions.
1 change: 1 addition & 0 deletions include/net/xdp_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ struct xdp_umem {
struct user_struct *user;
refcount_t users;
u8 flags;
u8 tx_metadata_len;
bool zc;
struct page **pgs;
int id;
Expand Down
1 change: 1 addition & 0 deletions include/net/xsk_buff_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ struct xsk_buff_pool {
u32 chunk_size;
u32 chunk_shift;
u32 frame_len;
u8 tx_metadata_len; /* inherited from umem */
u8 cached_need_wakeup;
bool uses_need_wakeup;
bool dma_need_sync;
Expand Down
1 change: 1 addition & 0 deletions include/uapi/linux/if_xdp.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ struct xdp_umem_reg {
__u32 chunk_size;
__u32 headroom;
__u32 flags;
__u32 tx_metadata_len;
};

struct xdp_statistics {
Expand Down
4 changes: 4 additions & 0 deletions net/xdp/xdp_umem.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,9 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
return -EINVAL;

if (mr->tx_metadata_len >= 256 || mr->tx_metadata_len % 8)
return -EINVAL;

umem->size = size;
umem->headroom = headroom;
umem->chunk_size = chunk_size;
Expand All @@ -207,6 +210,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
umem->pgs = NULL;
umem->user = NULL;
umem->flags = mr->flags;
umem->tx_metadata_len = mr->tx_metadata_len;

INIT_LIST_HEAD(&umem->xsk_dma_list);
refcount_set(&umem->users, 1);
Expand Down
12 changes: 11 additions & 1 deletion net/xdp/xsk.c
Original file line number Diff line number Diff line change
Expand Up @@ -1283,6 +1283,14 @@ struct xdp_umem_reg_v1 {
__u32 headroom;
};

struct xdp_umem_reg_v2 {
__u64 addr; /* Start of packet data area */
__u64 len; /* Length of packet data area */
__u32 chunk_size;
__u32 headroom;
__u32 flags;
};

static int xsk_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
Expand Down Expand Up @@ -1326,8 +1334,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,

if (optlen < sizeof(struct xdp_umem_reg_v1))
return -EINVAL;
else if (optlen < sizeof(mr))
else if (optlen < sizeof(struct xdp_umem_reg_v2))
mr_size = sizeof(struct xdp_umem_reg_v1);
else if (optlen < sizeof(mr))
mr_size = sizeof(struct xdp_umem_reg_v2);

if (copy_from_sockptr(&mr, optval, mr_size))
return -EFAULT;
Expand Down
1 change: 1 addition & 0 deletions net/xdp/xsk_buff_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
XDP_PACKET_HEADROOM;
pool->umem = umem;
pool->addrs = umem->addrs;
pool->tx_metadata_len = umem->tx_metadata_len;
INIT_LIST_HEAD(&pool->free_list);
INIT_LIST_HEAD(&pool->xskb_list);
INIT_LIST_HEAD(&pool->xsk_tx_list);
Expand Down
17 changes: 10 additions & 7 deletions net/xdp/xsk_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,15 +143,17 @@ static inline bool xp_unused_options_set(u32 options)
static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
u64 offset = desc->addr & (pool->chunk_size - 1);
u64 addr = desc->addr - pool->tx_metadata_len;
u64 len = desc->len + pool->tx_metadata_len;
u64 offset = addr & (pool->chunk_size - 1);

if (!desc->len)
return false;

if (offset + desc->len > pool->chunk_size)
if (offset + len > pool->chunk_size)
return false;

if (desc->addr >= pool->addrs_cnt)
if (addr >= pool->addrs_cnt)
return false;

if (xp_unused_options_set(desc->options))
Expand All @@ -162,16 +164,17 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
u64 addr = xp_unaligned_add_offset_to_addr(desc->addr);
u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len;
u64 len = desc->len + pool->tx_metadata_len;

if (!desc->len)
return false;

if (desc->len > pool->chunk_size)
if (len > pool->chunk_size)
return false;

if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt ||
xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
if (addr >= pool->addrs_cnt || addr + len > pool->addrs_cnt ||
xp_desc_crosses_non_contig_pg(pool, addr, len))
return false;

if (xp_unused_options_set(desc->options))
Expand Down
1 change: 1 addition & 0 deletions tools/include/uapi/linux/if_xdp.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ struct xdp_umem_reg {
__u32 chunk_size;
__u32 headroom;
__u32 flags;
__u32 tx_metadata_len;
};

struct xdp_statistics {
Expand Down

0 comments on commit 341ac98

Please sign in to comment.