Skip to content

Commit

Permalink
prov/efa: Fix a memory leak in efa_rdm_ep_post_handshake
Browse files Browse the repository at this point in the history
efa_rdm_ep_post_handshake failed to clean up allocated txe
in the error path, this patch fixes this issue.

Signed-off-by: Shi Jin <sjina@amazon.com>
  • Loading branch information
shijin-aws committed Aug 8, 2024
1 parent 5e42d69 commit 4df951a
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 0 deletions.
2 changes: 2 additions & 0 deletions prov/efa/src/rdm/efa_rdm_ep_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,7 @@ ssize_t efa_rdm_ep_post_handshake(struct efa_rdm_ep *ep, struct efa_rdm_peer *pe
pkt_entry = efa_rdm_pke_alloc(ep, ep->efa_tx_pkt_pool, EFA_RDM_PKE_FROM_EFA_TX_POOL);
if (OFI_UNLIKELY(!pkt_entry)) {
EFA_WARN(FI_LOG_EP_CTRL, "PKE entries exhausted.\n");
efa_rdm_txe_release(txe);
return -FI_EAGAIN;
}

Expand All @@ -618,6 +619,7 @@ ssize_t efa_rdm_ep_post_handshake(struct efa_rdm_ep *ep, struct efa_rdm_peer *pe
ret = efa_rdm_pke_sendv(&pkt_entry, 1, 0);
if (OFI_UNLIKELY(ret)) {
efa_rdm_pke_release_tx(pkt_entry);
efa_rdm_txe_release(txe);
}
return ret;
}
Expand Down
63 changes: 63 additions & 0 deletions prov/efa/test/efa_unit_test_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -1107,3 +1107,66 @@ void test_efa_rdm_ep_zcpy_recv_cancel(struct efa_resource **state)
assert_int_equal(fi_cancel((struct fid *)resource->ep, &cancel_context), -FI_EOPNOTSUPP);

}

/**
* @brief when efa_rdm_ep_post_handshake_error failed due to pkt pool exhaustion,
* make sure both txe is cleaned
*
* @param[in] state struct efa_resource that is managed by the framework
*/
void test_efa_rdm_ep_post_handshake_error_handling_pke_exhaustion(struct efa_resource **state)
{
struct efa_rdm_ep *efa_rdm_ep;
struct efa_rdm_peer *peer;
struct efa_resource *resource = *state;
struct efa_ep_addr raw_addr = {0};
size_t raw_addr_len = sizeof(struct efa_ep_addr);
fi_addr_t peer_addr;
int err, numaddr;
struct efa_rdm_pke **pkt_entry_vec;
int i;

efa_unit_test_resource_construct(resource, FI_EP_RDM);

/* create a fake peer */
err = fi_getname(&resource->ep->fid, &raw_addr, &raw_addr_len);
assert_int_equal(err, 0);
raw_addr.qpn = 1;
raw_addr.qkey = 0x1234;
numaddr = fi_av_insert(resource->av, &raw_addr, 1, &peer_addr, 0, NULL);
assert_int_equal(numaddr, 1);

efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid);
/* close shm_ep to force efa_rdm_ep to use efa device to send */
if (efa_rdm_ep->shm_ep) {
err = fi_close(&efa_rdm_ep->shm_ep->fid);
assert_int_equal(err, 0);
efa_rdm_ep->shm_ep = NULL;
}
/* set peer->flag to EFA_RDM_PEER_REQ_SENT will make efa_rdm_atomic() think
* a REQ packet has been sent to the peer (so no need to send again)
* handshake has not been received, so we do not know whether the peer support DC
*/
peer = efa_rdm_ep_get_peer(efa_rdm_ep, peer_addr);
peer->flags = EFA_RDM_PEER_REQ_SENT;
peer->is_local = false;

pkt_entry_vec = calloc(efa_rdm_ep->tx_size, sizeof(struct efa_rdm_pke *));
assert_non_null(pkt_entry_vec);

/* Exhaust the tx pkt pool */
for (i = 0; i < efa_rdm_ep->tx_size; i++) {
pkt_entry_vec[i] = efa_rdm_pke_alloc(efa_rdm_ep, efa_rdm_ep->efa_tx_pkt_pool, EFA_RDM_PKE_FROM_EFA_TX_POOL);
assert_non_null(pkt_entry_vec[i]);
}

/* txe list should be empty before and after the failed handshake post call */
assert_true(dlist_empty(&efa_rdm_ep->txe_list));
assert_int_equal(efa_rdm_ep_post_handshake(efa_rdm_ep, peer), -FI_EAGAIN);
assert_true(dlist_empty(&efa_rdm_ep->txe_list));

for (i = 0; i < efa_rdm_ep->tx_size; i++)
efa_rdm_pke_release_tx(pkt_entry_vec[i]);

free(pkt_entry_vec);
}
1 change: 1 addition & 0 deletions prov/efa/test/efa_unit_tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ int main(void)
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_user_p2p_not_supported_zcpy_rx_happy, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_close_discard_posted_recv, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_zcpy_recv_cancel, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_post_handshake_error_handling_pke_exhaustion, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_dgram_cq_read_empty_cq, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_empty_cq, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_failed_poll, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
Expand Down
1 change: 1 addition & 0 deletions prov/efa/test/efa_unit_tests.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ void test_efa_rdm_ep_user_zcpy_rx_unhappy_due_to_sas();
void test_efa_rdm_ep_user_p2p_not_supported_zcpy_rx_happy();
void test_efa_rdm_ep_close_discard_posted_recv();
void test_efa_rdm_ep_zcpy_recv_cancel();
void test_efa_rdm_ep_post_handshake_error_handling_pke_exhaustion();
void test_dgram_cq_read_empty_cq();
void test_ibv_cq_ex_read_empty_cq();
void test_ibv_cq_ex_read_failed_poll();
Expand Down

0 comments on commit 4df951a

Please sign in to comment.