Skip to content

Commit

Permalink
mlxsw: pci: Use page pool for Rx buffers allocation
Browse files Browse the repository at this point in the history
As part of driver init, all Rx queues are filled with buffers for
hardware usage. Later, when a packet is received, a new buffer should be
allocated to be used by hardware instead of the received buffer.
Packet's processing time includes allocation time, which can be improved
using page pool.

Using page pool, DMA mapping is done only for first allocation of buffers.
As subsequent buffers allocation avoid DMA mapping, it results in
performance improvement. The purpose of page pool is to allocate pages fast
from cache without locking. This lockless guarantee naturally comes from
running under a NAPI.

Use page pool to allocate the data buffer only, so hardware will use it to
fill the packet. At completion time, attach the data buffer (now filled
with packet payload) to new SKB which is allocated around the received
buffer. SKB building at completion time prevents cache miss for each
packet, as now the SKB is allocated right before packets will be handled by
networking stack.

Page pool for each Rx queue enhances Rx side performance by reclaiming
buffers back to each queue specific pool. This change significantly
improves driver performance, CPU can handle about 345% of the packets per
second it previously handled.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/1cf788a8f43c70aae6d526018ef77becb27ad6d3.1718709196.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Amit Cohen authored and kuba-moo committed Jun 20, 2024
1 parent 5642c6a commit b5b60bb
Showing 1 changed file with 64 additions and 39 deletions.
103 changes: 64 additions & 39 deletions drivers/net/ethernet/mellanox/mlxsw/pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ struct mlxsw_pci_mem_item {
};

struct mlxsw_pci_queue_elem_info {
struct page *page;
char *elem; /* pointer to actual dma mapped element mem chunk */
union {
struct {
Expand Down Expand Up @@ -346,6 +347,19 @@ static void mlxsw_pci_sdq_fini(struct mlxsw_pci *mlxsw_pci,
(MLXSW_PCI_SKB_HEADROOM + \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))

static void
mlxsw_pci_wqe_rx_frag_set(struct mlxsw_pci *mlxsw_pci, struct page *page,
char *wqe, int index, size_t frag_len)
{
dma_addr_t mapaddr;

mapaddr = page_pool_get_dma_addr(page);
mapaddr += MLXSW_PCI_SKB_HEADROOM;

mlxsw_pci_wqe_address_set(wqe, index, mapaddr);
mlxsw_pci_wqe_byte_count_set(wqe, index, frag_len);
}

static int mlxsw_pci_wqe_frag_map(struct mlxsw_pci *mlxsw_pci, char *wqe,
int index, char *frag_data, size_t frag_len,
int direction)
Expand Down Expand Up @@ -375,43 +389,46 @@ static void mlxsw_pci_wqe_frag_unmap(struct mlxsw_pci *mlxsw_pci, char *wqe,
dma_unmap_single(&pdev->dev, mapaddr, frag_len, direction);
}

static int mlxsw_pci_rdq_skb_alloc(struct mlxsw_pci *mlxsw_pci,
struct mlxsw_pci_queue_elem_info *elem_info,
gfp_t gfp)
static struct sk_buff *mlxsw_pci_rdq_build_skb(struct page *page,
u16 byte_count)
{
void *data = page_address(page);
unsigned int allocated_size;
struct sk_buff *skb;

allocated_size = page_size(page);
skb = napi_build_skb(data, allocated_size);
if (unlikely(!skb))
return ERR_PTR(-ENOMEM);

skb_reserve(skb, MLXSW_PCI_SKB_HEADROOM);
skb_put(skb, byte_count);
return skb;
}

static int mlxsw_pci_rdq_page_alloc(struct mlxsw_pci_queue *q,
struct mlxsw_pci_queue_elem_info *elem_info)
{
struct mlxsw_pci_queue *cq = q->u.rdq.cq;
size_t buf_len = MLXSW_PORT_MAX_MTU;
char *wqe = elem_info->elem;
struct sk_buff *skb;
int err;
struct page *page;

skb = __netdev_alloc_skb_ip_align(NULL, buf_len, gfp);
if (!skb)
page = page_pool_dev_alloc_pages(cq->u.cq.page_pool);
if (unlikely(!page))
return -ENOMEM;

err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data,
buf_len, DMA_FROM_DEVICE);
if (err)
goto err_frag_map;

elem_info->u.rdq.skb = skb;
mlxsw_pci_wqe_rx_frag_set(q->pci, page, wqe, 0, buf_len);
elem_info->page = page;
return 0;

err_frag_map:
dev_kfree_skb_any(skb);
return err;
}

static void mlxsw_pci_rdq_skb_free(struct mlxsw_pci *mlxsw_pci,
struct mlxsw_pci_queue_elem_info *elem_info)
static void mlxsw_pci_rdq_page_free(struct mlxsw_pci_queue *q,
struct mlxsw_pci_queue_elem_info *elem_info)
{
struct sk_buff *skb;
char *wqe;
struct mlxsw_pci_queue *cq = q->u.rdq.cq;

skb = elem_info->u.rdq.skb;
wqe = elem_info->elem;

mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE);
dev_kfree_skb_any(skb);
page_pool_put_page(cq->u.cq.page_pool, elem_info->page, -1, false);
}

static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
Expand Down Expand Up @@ -452,7 +469,7 @@ static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
for (i = 0; i < q->count; i++) {
elem_info = mlxsw_pci_queue_elem_info_producer_get(q);
BUG_ON(!elem_info);
err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info, GFP_KERNEL);
err = mlxsw_pci_rdq_page_alloc(q, elem_info);
if (err)
goto rollback;
/* Everything is set up, ring doorbell to pass elem to HW */
Expand All @@ -465,7 +482,7 @@ static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
rollback:
for (i--; i >= 0; i--) {
elem_info = mlxsw_pci_queue_elem_info_get(q, i);
mlxsw_pci_rdq_skb_free(mlxsw_pci, elem_info);
mlxsw_pci_rdq_page_free(q, elem_info);
}
q->u.rdq.cq = NULL;
cq->u.cq.dq = NULL;
Expand All @@ -483,7 +500,7 @@ static void mlxsw_pci_rdq_fini(struct mlxsw_pci *mlxsw_pci,
mlxsw_cmd_hw2sw_rdq(mlxsw_pci->core, q->num);
for (i = 0; i < q->count; i++) {
elem_info = mlxsw_pci_queue_elem_info_get(q, i);
mlxsw_pci_rdq_skb_free(mlxsw_pci, elem_info);
mlxsw_pci_rdq_page_free(q, elem_info);
}
}

Expand Down Expand Up @@ -618,26 +635,38 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
{
struct pci_dev *pdev = mlxsw_pci->pdev;
struct mlxsw_pci_queue_elem_info *elem_info;
struct mlxsw_pci_queue *cq = q->u.rdq.cq;
struct mlxsw_rx_info rx_info = {};
char wqe[MLXSW_PCI_WQE_SIZE];
struct sk_buff *skb;
struct page *page;
u16 byte_count;
int err;

elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
skb = elem_info->u.rdq.skb;
memcpy(wqe, elem_info->elem, MLXSW_PCI_WQE_SIZE);

if (q->consumer_counter++ != consumer_counter_limit)
dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n");

err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info, GFP_ATOMIC);
byte_count = mlxsw_pci_cqe_byte_count_get(cqe);
if (mlxsw_pci_cqe_crc_get(cqe_v, cqe))
byte_count -= ETH_FCS_LEN;

page = elem_info->page;

err = mlxsw_pci_rdq_page_alloc(q, elem_info);
if (err) {
dev_err_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n");
dev_err_ratelimited(&pdev->dev, "Failed to alloc page\n");
goto out;
}

skb = mlxsw_pci_rdq_build_skb(page, byte_count);
if (IS_ERR(skb)) {
dev_err_ratelimited(&pdev->dev, "Failed to build skb for RDQ\n");
page_pool_recycle_direct(cq->u.cq.page_pool, page);
goto out;
}

mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE);
skb_mark_for_recycle(skb);

if (mlxsw_pci_cqe_lag_get(cqe_v, cqe)) {
rx_info.is_lag = true;
Expand Down Expand Up @@ -670,10 +699,6 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,

mlxsw_pci_skb_cb_ts_set(mlxsw_pci, skb, cqe_v, cqe);

byte_count = mlxsw_pci_cqe_byte_count_get(cqe);
if (mlxsw_pci_cqe_crc_get(cqe_v, cqe))
byte_count -= ETH_FCS_LEN;
skb_put(skb, byte_count);
mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info);

out:
Expand Down

0 comments on commit b5b60bb

Please sign in to comment.