Skip to content

Commit fd448bb

Browse files
netoptimizerkernel-patches-bot
authored andcommitted
bpf: run devmap xdp_prog on flush instead of bulk enqueue
This changes the devmap XDP program support to run the program when the bulk queue is flushed instead of before the frame is enqueued. This has a couple of benefits: - It "sorts" the packets by destination devmap entry, and then runs the same BPF program on all the packets in sequence. This ensures that we keep the XDP program and destination device properties hot in I-cache. - It makes the multicast implementation simpler because it can just enqueue packets using bq_enqueue() without having to deal with the devmap program at all. The drawback is that if the devmap program drops the packet, the enqueue step is redundant. However, arguably this is mostly visible in a micro-benchmark, and with more mixed traffic the I-cache benefit should win out. The performance impact of just this patch is as follows: The bq_xmit_all's logic is also refactored and error label is removed. When bq_xmit_all() is called from bq_enqueue(), another packet will always be enqueued immediately after, so clearing dev_rx, xdp_prog and flush_node in bq_xmit_all() is redundant. Let's move the clear to __dev_flush(), and only check them once in bq_enqueue() since they are all modified together. By using xdp_redirect_map in sample/bpf and send pkts via pktgen cmd: ./pktgen_sample03_burst_single_flow.sh -i eno1 -d $dst_ip -m $dst_mac -t 10 -s 64 There are about +/- 0.1M deviation for native testing, the performance improved for the base-case, but some drop back with xdp devmap prog attached. Version | Test | Generic | Native | Native + 2nd xdp_prog 5.10 rc6 | xdp_redirect_map i40e->i40e | 2.0M | 9.1M | 8.0M 5.10 rc6 | xdp_redirect_map i40e->veth | 1.7M | 11.0M | 9.7M 5.10 rc6 + patch | xdp_redirect_map i40e->i40e | 2.0M | 9.5M | 7.5M 5.10 rc6 + patch | xdp_redirect_map i40e->veth | 1.7M | 11.6M | 9.1M Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Acked-by: John Fastabend <john.fastabend@gmail.com> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
1 parent bb21fad commit fd448bb

File tree

1 file changed

+84
-62
lines changed

1 file changed

+84
-62
lines changed

kernel/bpf/devmap.c

Lines changed: 84 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ struct xdp_dev_bulk_queue {
5757
struct list_head flush_node;
5858
struct net_device *dev;
5959
struct net_device *dev_rx;
60+
struct bpf_prog *xdp_prog;
6061
unsigned int count;
6162
};
6263

@@ -327,46 +328,92 @@ bool dev_map_can_have_prog(struct bpf_map *map)
327328
return false;
328329
}
329330

331+
static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
332+
struct xdp_frame **frames, int n,
333+
struct net_device *dev)
334+
{
335+
struct xdp_txq_info txq = { .dev = dev };
336+
struct xdp_buff xdp;
337+
int i, nframes = 0;
338+
339+
for (i = 0; i < n; i++) {
340+
struct xdp_frame *xdpf = frames[i];
341+
u32 act;
342+
int err;
343+
344+
xdp_convert_frame_to_buff(xdpf, &xdp);
345+
xdp.txq = &txq;
346+
347+
act = bpf_prog_run_xdp(xdp_prog, &xdp);
348+
switch (act) {
349+
case XDP_PASS:
350+
err = xdp_update_frame_from_buff(&xdp, xdpf);
351+
if (unlikely(err < 0))
352+
xdp_return_frame_rx_napi(xdpf);
353+
else
354+
frames[nframes++] = xdpf;
355+
break;
356+
default:
357+
bpf_warn_invalid_xdp_action(act);
358+
fallthrough;
359+
case XDP_ABORTED:
360+
trace_xdp_exception(dev, xdp_prog, act);
361+
fallthrough;
362+
case XDP_DROP:
363+
xdp_return_frame_rx_napi(xdpf);
364+
break;
365+
}
366+
}
367+
return nframes; /* sent frames count */
368+
}
369+
330370
static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
331371
{
332372
struct net_device *dev = bq->dev;
333-
int sent = 0, drops = 0, err = 0;
373+
unsigned int cnt = bq->count;
374+
int drops = 0, err = 0;
375+
int to_send = cnt;
376+
int sent = cnt;
334377
int i;
335378

336-
if (unlikely(!bq->count))
379+
if (unlikely(!cnt))
337380
return;
338381

339-
for (i = 0; i < bq->count; i++) {
382+
for (i = 0; i < cnt; i++) {
340383
struct xdp_frame *xdpf = bq->q[i];
341384

342385
prefetch(xdpf);
343386
}
344387

345-
sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, flags);
388+
if (bq->xdp_prog) {
389+
to_send = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev);
390+
if (!to_send) {
391+
sent = 0;
392+
goto out;
393+
}
394+
drops = cnt - to_send;
395+
}
396+
397+
sent = dev->netdev_ops->ndo_xdp_xmit(dev, to_send, bq->q, flags);
346398
if (sent < 0) {
347399
err = sent;
348400
sent = 0;
349-
goto error;
401+
402+
/* If ndo_xdp_xmit fails with an errno, no frames have been
403+
* xmit'ed and it's our responsibility to them free all.
404+
*/
405+
for (i = 0; i < cnt - drops; i++) {
406+
struct xdp_frame *xdpf = bq->q[i];
407+
408+
xdp_return_frame_rx_napi(xdpf);
409+
}
350410
}
351-
drops = bq->count - sent;
352411
out:
412+
drops = cnt - sent;
353413
bq->count = 0;
354414

355415
trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, drops, err);
356-
bq->dev_rx = NULL;
357-
__list_del_clearprev(&bq->flush_node);
358416
return;
359-
error:
360-
/* If ndo_xdp_xmit fails with an errno, no frames have been
361-
* xmit'ed and it's our responsibility to them free all.
362-
*/
363-
for (i = 0; i < bq->count; i++) {
364-
struct xdp_frame *xdpf = bq->q[i];
365-
366-
xdp_return_frame_rx_napi(xdpf);
367-
drops++;
368-
}
369-
goto out;
370417
}
371418

372419
/* __dev_flush is called from xdp_do_flush() which _must_ be signaled
@@ -384,8 +431,12 @@ void __dev_flush(void)
384431
struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
385432
struct xdp_dev_bulk_queue *bq, *tmp;
386433

387-
list_for_each_entry_safe(bq, tmp, flush_list, flush_node)
434+
list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
388435
bq_xmit_all(bq, XDP_XMIT_FLUSH);
436+
bq->dev_rx = NULL;
437+
bq->xdp_prog = NULL;
438+
__list_del_clearprev(&bq->flush_node);
439+
}
389440
}
390441

391442
/* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or
@@ -408,7 +459,7 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
408459
* Thus, safe percpu variable access.
409460
*/
410461
static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
411-
struct net_device *dev_rx)
462+
struct net_device *dev_rx, struct bpf_prog *xdp_prog)
412463
{
413464
struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
414465
struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
@@ -419,18 +470,22 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
419470
/* Ingress dev_rx will be the same for all xdp_frame's in
420471
* bulk_queue, because bq stored per-CPU and must be flushed
421472
* from net_device drivers NAPI func end.
473+
*
474+
* Do the same with xdp_prog and flush_list since these fields
475+
* are only ever modified together.
422476
*/
423-
if (!bq->dev_rx)
477+
if (!bq->dev_rx) {
424478
bq->dev_rx = dev_rx;
479+
bq->xdp_prog = xdp_prog;
480+
list_add(&bq->flush_node, flush_list);
481+
}
425482

426483
bq->q[bq->count++] = xdpf;
427-
428-
if (!bq->flush_node.prev)
429-
list_add(&bq->flush_node, flush_list);
430484
}
431485

432486
static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
433-
struct net_device *dev_rx)
487+
struct net_device *dev_rx,
488+
struct bpf_prog *xdp_prog)
434489
{
435490
struct xdp_frame *xdpf;
436491
int err;
@@ -446,55 +501,22 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
446501
if (unlikely(!xdpf))
447502
return -EOVERFLOW;
448503

449-
bq_enqueue(dev, xdpf, dev_rx);
504+
bq_enqueue(dev, xdpf, dev_rx, xdp_prog);
450505
return 0;
451506
}
452507

453-
static struct xdp_buff *dev_map_run_prog(struct net_device *dev,
454-
struct xdp_buff *xdp,
455-
struct bpf_prog *xdp_prog)
456-
{
457-
struct xdp_txq_info txq = { .dev = dev };
458-
u32 act;
459-
460-
xdp_set_data_meta_invalid(xdp);
461-
xdp->txq = &txq;
462-
463-
act = bpf_prog_run_xdp(xdp_prog, xdp);
464-
switch (act) {
465-
case XDP_PASS:
466-
return xdp;
467-
case XDP_DROP:
468-
break;
469-
default:
470-
bpf_warn_invalid_xdp_action(act);
471-
fallthrough;
472-
case XDP_ABORTED:
473-
trace_xdp_exception(dev, xdp_prog, act);
474-
break;
475-
}
476-
477-
xdp_return_buff(xdp);
478-
return NULL;
479-
}
480-
481508
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
482509
struct net_device *dev_rx)
483510
{
484-
return __xdp_enqueue(dev, xdp, dev_rx);
511+
return __xdp_enqueue(dev, xdp, dev_rx, NULL);
485512
}
486513

487514
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
488515
struct net_device *dev_rx)
489516
{
490517
struct net_device *dev = dst->dev;
491518

492-
if (dst->xdp_prog) {
493-
xdp = dev_map_run_prog(dev, xdp, dst->xdp_prog);
494-
if (!xdp)
495-
return 0;
496-
}
497-
return __xdp_enqueue(dev, xdp, dev_rx);
519+
return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog);
498520
}
499521

500522
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,

0 commit comments

Comments
 (0)