Skip to content

Commit 5574ff7

Browse files
magnus-karlssonanguy11
authored andcommitted
i40e: optimize AF_XDP Tx completion path
Improve the performance of the AF_XDP zero-copy Tx completion path. When there are no XDP buffers being sent using XDP_TX or XDP_REDIRECT, we do not have go through the SW ring to clean up any entries since the AF_XDP path does not use these. In these cases, just fast forward the next-to-use counter and skip going through the SW ring. The limit on the maximum number of entries to complete is also removed since the algorithm is now O(1). To simplify the code path, the maximum number of entries to complete for the XDP path is therefore also increased from 256 to 512 (the default number of Tx HW descriptors). This should be fine since the completion in the XDP path is faster than in the SKB path that has 256 as the maximum number. This patch provides around 4% throughput improvement for the l2fwd application in xdpsock on my machine. Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> Reviewed-by: Sridhar Samudrala <sridhar.samudrala@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
1 parent 753f388 commit 5574ff7

File tree

4 files changed

+27
-23
lines changed

4 files changed

+27
-23
lines changed

drivers/net/ethernet/intel/i40e/i40e_txrx.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2580,7 +2580,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
25802580
*/
25812581
i40e_for_each_ring(ring, q_vector->tx) {
25822582
bool wd = ring->xsk_umem ?
2583-
i40e_clean_xdp_tx_irq(vsi, ring, budget) :
2583+
i40e_clean_xdp_tx_irq(vsi, ring) :
25842584
i40e_clean_tx_irq(vsi, ring, budget);
25852585

25862586
if (!wd) {
@@ -3538,6 +3538,7 @@ static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
35383538
*/
35393539
smp_wmb();
35403540

3541+
xdp_ring->xdp_tx_active++;
35413542
i++;
35423543
if (i == xdp_ring->count)
35433544
i = 0;

drivers/net/ethernet/intel/i40e/i40e_txrx.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ struct i40e_ring {
347347
/* used in interrupt processing */
348348
u16 next_to_use;
349349
u16 next_to_clean;
350+
u16 xdp_tx_active;
350351

351352
u8 atr_sample_rate;
352353
u8 atr_count;

drivers/net/ethernet/intel/i40e/i40e_xsk.c

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
378378
**/
379379
static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
380380
{
381+
unsigned int sent_frames = 0, total_bytes = 0;
381382
struct i40e_tx_desc *tx_desc = NULL;
382383
struct i40e_tx_buffer *tx_bi;
383384
bool work_done = true;
@@ -408,6 +409,9 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
408409
| I40E_TX_DESC_CMD_EOP,
409410
0, desc.len, 0);
410411

412+
sent_frames++;
413+
total_bytes += tx_bi->bytecount;
414+
411415
xdp_ring->next_to_use++;
412416
if (xdp_ring->next_to_use == xdp_ring->count)
413417
xdp_ring->next_to_use = 0;
@@ -420,6 +424,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
420424
i40e_xdp_ring_update_tail(xdp_ring);
421425

422426
xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
427+
i40e_update_tx_stats(xdp_ring, sent_frames, total_bytes);
423428
}
424429

425430
return !!budget && work_done;
@@ -434,6 +439,7 @@ static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring,
434439
struct i40e_tx_buffer *tx_bi)
435440
{
436441
xdp_return_frame(tx_bi->xdpf);
442+
tx_ring->xdp_tx_active--;
437443
dma_unmap_single(tx_ring->dev,
438444
dma_unmap_addr(tx_bi, dma),
439445
dma_unmap_len(tx_bi, len), DMA_TO_DEVICE);
@@ -447,63 +453,60 @@ static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring,
447453
*
448454
* Returns true if cleanup/tranmission is done.
449455
**/
450-
bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi,
451-
struct i40e_ring *tx_ring, int napi_budget)
456+
bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring)
452457
{
453-
unsigned int ntc, total_bytes = 0, budget = vsi->work_limit;
454-
u32 i, completed_frames, frames_ready, xsk_frames = 0;
458+
unsigned int ntc, budget = vsi->work_limit;
455459
struct xdp_umem *umem = tx_ring->xsk_umem;
460+
u32 i, completed_frames, xsk_frames = 0;
456461
u32 head_idx = i40e_get_head(tx_ring);
457-
bool work_done = true, xmit_done;
458462
struct i40e_tx_buffer *tx_bi;
463+
bool xmit_done;
459464

460465
if (head_idx < tx_ring->next_to_clean)
461466
head_idx += tx_ring->count;
462-
frames_ready = head_idx - tx_ring->next_to_clean;
467+
completed_frames = head_idx - tx_ring->next_to_clean;
463468

464-
if (frames_ready == 0) {
469+
if (completed_frames == 0)
465470
goto out_xmit;
466-
} else if (frames_ready > budget) {
467-
completed_frames = budget;
468-
work_done = false;
469-
} else {
470-
completed_frames = frames_ready;
471+
472+
if (likely(!tx_ring->xdp_tx_active)) {
473+
xsk_frames = completed_frames;
474+
goto skip;
471475
}
472476

473477
ntc = tx_ring->next_to_clean;
474478

475479
for (i = 0; i < completed_frames; i++) {
476480
tx_bi = &tx_ring->tx_bi[ntc];
477481

478-
if (tx_bi->xdpf)
482+
if (tx_bi->xdpf) {
479483
i40e_clean_xdp_tx_buffer(tx_ring, tx_bi);
480-
else
484+
tx_bi->xdpf = NULL;
485+
} else {
481486
xsk_frames++;
482-
483-
tx_bi->xdpf = NULL;
484-
total_bytes += tx_bi->bytecount;
487+
}
485488

486489
if (++ntc >= tx_ring->count)
487490
ntc = 0;
488491
}
489492

493+
skip:
490494
tx_ring->next_to_clean += completed_frames;
491495
if (unlikely(tx_ring->next_to_clean >= tx_ring->count))
492496
tx_ring->next_to_clean -= tx_ring->count;
493497

494498
if (xsk_frames)
495499
xsk_umem_complete_tx(umem, xsk_frames);
496500

497-
i40e_arm_wb(tx_ring, vsi, budget);
498-
i40e_update_tx_stats(tx_ring, completed_frames, total_bytes);
501+
i40e_arm_wb(tx_ring, vsi, completed_frames);
499502

500503
out_xmit:
501504
if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem))
502505
xsk_set_tx_need_wakeup(tx_ring->xsk_umem);
503506

504507
xmit_done = i40e_xmit_zc(tx_ring, budget);
505508

506-
return work_done && xmit_done;
509+
return xmit_done;
507510
}
508511

509512
/**

drivers/net/ethernet/intel/i40e/i40e_xsk.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@ int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
1515
bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count);
1616
int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
1717

18-
bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi,
19-
struct i40e_ring *tx_ring, int napi_budget);
18+
bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring);
2019
int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
2120
int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring);
2221
void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);

0 commit comments

Comments
 (0)