Skip to content

Commit

Permalink
providers/mana: CQ polling
Browse files Browse the repository at this point in the history
Implement polling of CQs and arming CQs.
Add processing of CQEs for RC QPs.

Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
  • Loading branch information
Konstantin Taranov committed May 15, 2024
1 parent 140ad50 commit 4b0e953
Show file tree
Hide file tree
Showing 4 changed files with 228 additions and 5 deletions.
229 changes: 224 additions & 5 deletions providers/mana/cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
#include <kernel-abi/mana-abi.h>

#include "mana.h"

#include "gdma.h"
#include "doorbells.h"
#include "rollback.h"
#define INITIALIZED_OWNER_BIT(log2_num_entries) (1UL << (log2_num_entries))

DECLARE_DRV_CMD(mana_create_cq, IB_USER_VERBS_CMD_CREATE_CQ,
Expand Down Expand Up @@ -131,10 +133,227 @@ int mana_destroy_cq(struct ibv_cq *ibcq)
return ret;
}

int mana_arm_cq(struct ibv_cq *ibcq, int solicited)
{
struct mana_cq *cq = container_of(ibcq, struct mana_cq, ibcq);

if (solicited)
return -EOPNOTSUPP;
if (cq->cqid == UINT32_MAX)
return -EINVAL;

gdma_ring_cq_doorbell(cq);
return 0;
}

static inline uint32_t handle_rc_requester_cqe(struct mana_qp *qp, struct gdma_cqe *cqe)
{
struct mana_gdma_queue *recv_queue = &qp->rc_qp.queues[USER_RC_RECV_QUEUE_REQUESTER];
struct mana_gdma_queue *send_queue = &qp->rc_qp.queues[USER_RC_SEND_QUEUE_REQUESTER];
uint32_t syndrome = cqe->rdma_cqe.rc_armed_completion.syndrome;
uint32_t psn = cqe->rdma_cqe.rc_armed_completion.psn;
struct rc_sq_shadow_wqe *shadow_wqe;
uint32_t wcs = 0;

if (!IB_IS_ACK(syndrome))
return 0;

if (!PSN_GT(psn, qp->rc_qp.sq_highest_completed_psn))
return 0;

qp->rc_qp.sq_highest_completed_psn = psn;

if (!PSN_LT(psn, qp->rc_qp.sq_psn))
return 0;

while ((shadow_wqe = (struct rc_sq_shadow_wqe *)
shadow_queue_get_next_to_complete(&qp->shadow_sq)) != NULL) {
if (PSN_LT(psn, shadow_wqe->end_psn))
break;

send_queue->cons_idx += shadow_wqe->header.posted_wqe_size_in_bu;
send_queue->cons_idx &= GDMA_QUEUE_OFFSET_MASK;

recv_queue->cons_idx += shadow_wqe->read_posted_wqe_size_in_bu;
recv_queue->cons_idx &= GDMA_QUEUE_OFFSET_MASK;

uint32_t offset = shadow_wqe->header.unmasked_queue_offset +
shadow_wqe->header.posted_wqe_size_in_bu;
mana_ib_update_shared_mem_left_offset(qp, offset & GDMA_QUEUE_OFFSET_MASK);

shadow_queue_advance_next_to_complete(&qp->shadow_sq);
wcs++;
}

uint32_t prev_psn = PSN_DEC(qp->rc_qp.sq_psn);

if (qp->rc_qp.sq_highest_completed_psn == prev_psn)
gdma_arm_normal_cqe(recv_queue, qp->rc_qp.sq_psn);
else
gdma_arm_normal_cqe(recv_queue, prev_psn);

return wcs;
}

static inline uint32_t handle_rc_responder_cqe(struct mana_qp *qp, struct gdma_cqe *cqe)
{
struct mana_gdma_queue *recv_queue = &qp->rc_qp.queues[USER_RC_RECV_QUEUE_RESPONDER];
struct rc_rq_shadow_wqe *shadow_wqe;

shadow_wqe = (struct rc_rq_shadow_wqe *)shadow_queue_get_next_to_complete(&qp->shadow_rq);
if (!shadow_wqe)
return 0;

uint32_t offset_cqe = cqe->rdma_cqe.rc_recv.rx_wqe_offset / GDMA_WQE_ALIGNMENT_UNIT_SIZE;
uint32_t offset_wqe = shadow_wqe->header.unmasked_queue_offset & GDMA_QUEUE_OFFSET_MASK;

if (offset_cqe != offset_wqe)
return 0;

shadow_wqe->byte_len = cqe->rdma_cqe.rc_recv.msg_len;
shadow_wqe->imm_or_rkey = cqe->rdma_cqe.rc_recv.imm_data;

switch (cqe->rdma_cqe.cqe_type) {
case CQE_TYPE_RC_WRITE_IMM:
shadow_wqe->header.opcode = IBV_WC_RECV_RDMA_WITH_IMM;
SWITCH_FALLTHROUGH;
case CQE_TYPE_RC_SEND_IMM:
shadow_wqe->header.flags |= IBV_WC_WITH_IMM;
break;
case CQE_TYPE_RC_SEND_INV:
shadow_wqe->header.flags |= IBV_WC_WITH_INV;
break;
default:
break;
}

recv_queue->cons_idx += shadow_wqe->header.posted_wqe_size_in_bu;
recv_queue->cons_idx &= GDMA_QUEUE_OFFSET_MASK;

shadow_queue_advance_next_to_complete(&qp->shadow_rq);
return 1;
}

static inline uint32_t mana_handle_cqe(struct mana_context *ctx, struct gdma_cqe *cqe)
{
struct mana_qp *qp;

if (cqe->is_sq) // impossible for rc
return 0;

qp = mana_get_qp_from_rq(ctx, cqe->wqid);
if (!qp)
return 0;

if (cqe->rdma_cqe.cqe_type == CQE_TYPE_ARMED_CMPL)
return handle_rc_requester_cqe(qp, cqe);
else
return handle_rc_responder_cqe(qp, cqe);
}

static inline int gdma_read_cqe(struct mana_cq *cq, struct gdma_cqe *cqe)
{
uint32_t new_entry_owner_bits;
uint32_t old_entry_owner_bits;
struct gdma_cqe *current_cqe;
uint32_t owner_bits;

current_cqe = ((struct gdma_cqe *)cq->buf) + (cq->head % cq->cqe);
new_entry_owner_bits = (cq->head / cq->cqe) & CQ_OWNER_MASK;
old_entry_owner_bits = (cq->head / cq->cqe - 1) & CQ_OWNER_MASK;
owner_bits = current_cqe->owner_bits;

if (owner_bits == old_entry_owner_bits)
return 0; /* no new entry */
if (owner_bits != new_entry_owner_bits)
return -1; /*overflow detected*/

udma_from_device_barrier();
*cqe = *current_cqe;
cq->head++;
return 1;
}

static void fill_verbs_from_shadow_wqe(struct mana_qp *qp, struct ibv_wc *wc,
const struct shadow_wqe_header *shadow_wqe)
{
const struct rc_rq_shadow_wqe *rc_wqe = (const struct rc_rq_shadow_wqe *)shadow_wqe;

wc->wr_id = shadow_wqe->wr_id;
wc->status = IBV_WC_SUCCESS;
wc->opcode = shadow_wqe->opcode;
wc->vendor_err = 0;
wc->wc_flags = shadow_wqe->flags;
wc->qp_num = qp->ibqp.qp.qp_num;
wc->pkey_index = 0;

if (shadow_wqe->opcode & IBV_WC_RECV) {
wc->byte_len = rc_wqe->byte_len;
wc->imm_data = htobe32(rc_wqe->imm_or_rkey);
}
}

static int mana_process_completions(struct mana_cq *cq, int nwc, struct ibv_wc *wc)
{
struct shadow_wqe_header *shadow_wqe;
struct mana_qp *qp;
int wc_index = 0;

/* process send shadow queue completions */
list_for_each(&cq->send_qp_list, qp, send_cq_node) {
while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_sq))
!= NULL) {
if (wc_index >= nwc)
goto out;

fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe);
shadow_queue_advance_consumer(&qp->shadow_sq);
wc_index++;
}
}

/* process recv shadow queue completions */
list_for_each(&cq->recv_qp_list, qp, recv_cq_node) {
while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_rq))
!= NULL) {
if (wc_index >= nwc)
goto out;

fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe);
shadow_queue_advance_consumer(&qp->shadow_rq);
wc_index++;
}
}

out:
return wc_index;
}

int mana_poll_cq(struct ibv_cq *ibcq, int nwc, struct ibv_wc *wc)
{
/* This version of driver supports RAW QP only.
* Polling CQ is done directly in the application.
*/
return EOPNOTSUPP;
struct mana_cq *cq = container_of(ibcq, struct mana_cq, ibcq);
struct mana_context *ctx = to_mctx(ibcq->context);
struct gdma_cqe gdma_cqe;
int num_polled = 0;
int ret;

pthread_spin_lock(&cq->lock);

while (cq->ready_wcs < nwc) {
ret = gdma_read_cqe(cq, &gdma_cqe);
if (ret < 0) {
num_polled = -1;
goto out;
}
if (ret == 0)
break;
cq->ready_wcs += mana_handle_cqe(ctx, &gdma_cqe);
}

num_polled = mana_process_completions(cq, nwc, wc);
cq->ready_wcs -= num_polled;
out:
pthread_spin_unlock(&cq->lock);

return num_polled;
}
1 change: 1 addition & 0 deletions providers/mana/doorbells.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "mana.h"

#define GDMA_CQE_OWNER_BITS 3
#define CQ_OWNER_MASK ((1 << (GDMA_CQE_OWNER_BITS)) - 1)

#define DOORBELL_OFFSET_SQ 0x0
#define DOORBELL_OFFSET_RQ 0x400
Expand Down
1 change: 1 addition & 0 deletions providers/mana/mana.c
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ static const struct verbs_context_ops mana_ctx_ops = {
.query_device_ex = mana_query_device_ex,
.query_port = mana_query_port,
.reg_mr = mana_reg_mr,
.req_notify_cq = mana_arm_cq,
};

static struct verbs_device *mana_device_alloc(struct verbs_sysfs_dev *sysfs_dev)
Expand Down
2 changes: 2 additions & 0 deletions providers/mana/mana.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,5 +240,7 @@ int mana_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
int mana_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
struct ibv_send_wr **bad);

int mana_arm_cq(struct ibv_cq *ibcq, int solicited);

struct mana_qp *mana_get_qp_from_rq(struct mana_context *ctx, uint32_t qpn);
#endif

0 comments on commit 4b0e953

Please sign in to comment.