Skip to content

Commit

Permalink
[v1.22.x] prov/efa: Fix the unexp_pkt clean up.
Browse files Browse the repository at this point in the history
Currently there are two issues with the rxe->unexp_pkt:

1. rxe->unexp_pkt is assigned to a pkt entry when
the rxe was allocated as an unexpected rx entry.
After user posts a receive and such rxe is matched,
it should clean this unexp_pkt field when processing
the pkts. Otherwise it will cause a double free error
in `efa_rdm_rxe_handle_error` which will release the
unexp_pkt again if it's not NULL.

2. rxe->unexp_pkt is a linked list for multi-req protocols
(medium, runting). The current code just call efa_rdm_pke_release_rx
to release it, which is wrong, because efa_rdm_pke_release_rx
requires the pkt entry to be unlinked. This patch introduces
a new function efa_rdm_pke_release_rx_list to unlink and release
each pkt entry in the linked list.

Signed-off-by: Shi Jin <sjina@amazon.com>
(cherry picked from commit 36b974d)
  • Loading branch information
shijin-aws committed Feb 13, 2025
1 parent 9c7aa79 commit 7117a10
Show file tree
Hide file tree
Showing 11 changed files with 149 additions and 2 deletions.
1 change: 1 addition & 0 deletions prov/efa/Makefile.include
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ prov_efa_test_efa_unit_test_LDFLAGS = $(cmocka_rpath) $(efa_LDFLAGS) $(cmocka_LD
-Wl,--wrap=efadv_query_device \
-Wl,--wrap=ofi_copy_from_hmem_iov \
-Wl,--wrap=efa_rdm_pke_read \
-Wl,--wrap=efa_rdm_pke_proc_matched_rtm \
-Wl,--wrap=efa_device_support_unsolicited_write_recv

if HAVE_EFADV_CQ_EX
Expand Down
2 changes: 1 addition & 1 deletion prov/efa/src/rdm/efa_rdm_ope.c
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@ void efa_rdm_rxe_handle_error(struct efa_rdm_ope *rxe, int err, int prov_errno)
dlist_remove(&rxe->queued_entry);

if (rxe->unexp_pkt) {
efa_rdm_pke_release_rx(rxe->unexp_pkt);
efa_rdm_pke_release_rx_list(rxe->unexp_pkt);
rxe->unexp_pkt = NULL;
}

Expand Down
21 changes: 21 additions & 0 deletions prov/efa/src/rdm/efa_rdm_pke.c
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,27 @@ void efa_rdm_pke_release_rx(struct efa_rdm_pke *pkt_entry)
efa_rdm_pke_release(pkt_entry);
}

/**
* @brief Release all rx packet entries that are linked
* This can happen when medium / runting protocol is used.
* This function will unlink the pkt entries in the list before
* releasing each pkt entry as it is required by efa_rdm_pke_release_rx
* (see above).
* @param pkt_entry the head of the pkt entry linked list
*/
void efa_rdm_pke_release_rx_list(struct efa_rdm_pke *pkt_entry)
{
struct efa_rdm_pke *curr, *next;

curr = pkt_entry;
while (curr) {
next = curr->next;
curr->next = NULL;
efa_rdm_pke_release_rx(curr);
curr = next;
}
}

void efa_rdm_pke_copy(struct efa_rdm_pke *dest,
struct efa_rdm_pke *src)
{
Expand Down
2 changes: 2 additions & 0 deletions prov/efa/src/rdm/efa_rdm_pke.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,8 @@ void efa_rdm_pke_release_tx(struct efa_rdm_pke *pkt_entry);

void efa_rdm_pke_release_rx(struct efa_rdm_pke *pkt_entry);

void efa_rdm_pke_release_rx_list(struct efa_rdm_pke *pkt_entry);

void efa_rdm_pke_release(struct efa_rdm_pke *pkt_entry);

void efa_rdm_pke_append(struct efa_rdm_pke *dst,
Expand Down
9 changes: 8 additions & 1 deletion prov/efa/src/rdm/efa_rdm_srx.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ static int efa_rdm_srx_start(struct fi_peer_rx_entry *peer_rxe)

rxe->state = EFA_RDM_RXE_MATCHED;

/**
* Since the rxe is now matched, we need to clean the unexp_pkt
* as the pkts are now processed.
*/
rxe->unexp_pkt = NULL;

ret = efa_rdm_pke_proc_matched_rtm(pkt_entry);
if (OFI_UNLIKELY(ret)) {
/* If we run out of memory registrations, we fall back to
Expand Down Expand Up @@ -101,7 +107,8 @@ static int efa_rdm_srx_discard(struct fi_peer_rx_entry *peer_rxe)
EFA_WARN(FI_LOG_EP_CTRL,
"Discarding unmatched unexpected rxe: %p pkt_entry %p\n",
rxe, rxe->unexp_pkt);
efa_rdm_pke_release_rx(rxe->unexp_pkt);
efa_rdm_pke_release_rx_list(rxe->unexp_pkt);
rxe->unexp_pkt = NULL;
efa_rdm_rxe_release_internal(rxe);
return FI_SUCCESS;
}
Expand Down
11 changes: 11 additions & 0 deletions prov/efa/test/efa_unit_test_mocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,11 @@ int efa_mock_efa_rdm_pke_read_return_mock(struct efa_rdm_ope *ope)
return mock();
}

ssize_t efa_mock_efa_rdm_pke_proc_matched_rtm_no_op(struct efa_rdm_pke *pkt_entry)
{
return FI_SUCCESS;
}

bool efa_mock_efa_device_support_unsolicited_write_recv()
{
return mock();
Expand All @@ -220,6 +225,7 @@ struct efa_unit_test_mocks g_efa_unit_test_mocks = {
#endif
.ofi_copy_from_hmem_iov = __real_ofi_copy_from_hmem_iov,
.efa_rdm_pke_read = __real_efa_rdm_pke_read,
.efa_rdm_pke_proc_matched_rtm = __real_efa_rdm_pke_proc_matched_rtm,
.efa_device_support_unsolicited_write_recv = __real_efa_device_support_unsolicited_write_recv,
.ibv_is_fork_initialized = __real_ibv_is_fork_initialized,
#if HAVE_EFADV_QUERY_MR
Expand Down Expand Up @@ -336,6 +342,11 @@ int __wrap_efa_rdm_pke_read(struct efa_rdm_ope *ope)
return g_efa_unit_test_mocks.efa_rdm_pke_read(ope);
}

int __wrap_efa_rdm_pke_proc_matched_rtm(struct efa_rdm_pke *pkt_entry)
{
return g_efa_unit_test_mocks.efa_rdm_pke_proc_matched_rtm(pkt_entry);
}

bool __wrap_efa_device_support_unsolicited_write_recv(void)
{
return g_efa_unit_test_mocks.efa_device_support_unsolicited_write_recv();
Expand Down
6 changes: 6 additions & 0 deletions prov/efa/test/efa_unit_test_mocks.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ bool __real_efa_device_support_unsolicited_write_recv();

int efa_mock_efa_rdm_pke_read_return_mock(struct efa_rdm_ope *ope);

ssize_t __real_efa_rdm_pke_proc_matched_rtm(struct efa_rdm_pke *pkt_entry);

ssize_t efa_mock_efa_rdm_pke_proc_matched_rtm_no_op(struct efa_rdm_pke *pkt_entry);

bool efa_mock_efa_device_support_unsolicited_write_recv(void);

struct efa_unit_test_mocks
Expand Down Expand Up @@ -120,6 +124,8 @@ struct efa_unit_test_mocks

int (*efa_rdm_pke_read)(struct efa_rdm_ope *ope);

ssize_t (*efa_rdm_pke_proc_matched_rtm)(struct efa_rdm_pke *pkt_entry);

bool (*efa_device_support_unsolicited_write_recv)(void);

enum ibv_fork_status (*ibv_is_fork_initialized)(void);
Expand Down
40 changes: 40 additions & 0 deletions prov/efa/test/efa_unit_test_pke.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,43 @@ void test_efa_rdm_pke_handle_longcts_rtm_send_completion(struct efa_resource **s

efa_rdm_pke_release_tx(pkt_entry);
}

/**
* @brief Test the efa_rdm_pke_release_rx_list function
*
* @param state
*/
void test_efa_rdm_pke_release_rx_list(struct efa_resource **state)
{
struct efa_resource *resource = *state;
struct efa_rdm_ep *efa_rdm_ep;
struct efa_rdm_pke *pke, *curr;
int i;

efa_unit_test_resource_construct(resource, FI_EP_RDM);

efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid);

/* Fake a rx pkt entry */
pke = efa_rdm_pke_alloc(efa_rdm_ep, efa_rdm_ep->efa_rx_pkt_pool, EFA_RDM_PKE_FROM_EFA_RX_POOL);
assert_non_null(pke);
efa_rdm_ep->efa_rx_pkts_posted = efa_rdm_ep_get_rx_pool_size(efa_rdm_ep);

/* link multiple pkes to this pke */
for (i = 1; i < 10; i++) {
curr = efa_rdm_pke_alloc(efa_rdm_ep, efa_rdm_ep->efa_rx_pkt_pool, EFA_RDM_PKE_FROM_EFA_RX_POOL);
assert_non_null(curr);
efa_rdm_pke_append(pke, curr);
}

/* Release all entries in the linked list */
efa_rdm_pke_release_rx_list(pke);

/**
* Now the rx pkt buffer pool should be empty so we can destroy it
* Otherwise there will be an assertion error on the use cnt is
* is non-zero
*/
ofi_bufpool_destroy(efa_rdm_ep->efa_rx_pkt_pool);
efa_rdm_ep->efa_rx_pkt_pool = NULL;
}
54 changes: 54 additions & 0 deletions prov/efa/test/efa_unit_test_srx.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "efa_unit_tests.h"
#include "ofi_util.h"
#include "efa_rdm_ep.h"
#include "efa_rdm_msg.h"

/**
* @brief This test validates whether the default min_multi_recv size is correctly
Expand Down Expand Up @@ -67,3 +68,56 @@ void test_efa_srx_lock(struct efa_resource **state)
util_domain.domain_fid.fid);
assert_true(((void *) srx_ctx->lock == (void *) &efa_domain->srx_lock));
}


/**
* @brief Test srx's start ops is updating the rxe status correctly
*
* @param state
*/
void test_efa_srx_unexp_pkt(struct efa_resource **state)
{
struct efa_resource *resource = *state;
struct efa_rdm_ep *efa_rdm_ep;
struct util_srx_ctx *srx_ctx;
struct efa_rdm_ope *rxe;
struct efa_rdm_pke *pke;
struct efa_unit_test_eager_rtm_pkt_attr pke_attr = {
.msg_id = 0,
.connid = 0x1234
};

g_efa_unit_test_mocks.efa_rdm_pke_proc_matched_rtm = &efa_mock_efa_rdm_pke_proc_matched_rtm_no_op;

efa_unit_test_resource_construct(resource, FI_EP_RDM);

efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid);
srx_ctx = efa_rdm_ep_get_peer_srx_ctx(efa_rdm_ep);

/* Fake a rx pkt entry */
pke = efa_rdm_pke_alloc(efa_rdm_ep, efa_rdm_ep->efa_rx_pkt_pool, EFA_RDM_PKE_FROM_EFA_RX_POOL);
assert_non_null(pke);
efa_rdm_ep->efa_rx_pkts_posted = efa_rdm_ep_get_rx_pool_size(efa_rdm_ep);
pke->addr = FI_ADDR_UNSPEC;

efa_unit_test_eager_msgrtm_pkt_construct(pke, &pke_attr);
/**
* Allocate an rxe with the rx pkt.
* Since there is no recv posted, the rxe must be unexpected
*/
ofi_genlock_lock(srx_ctx->lock);
rxe = efa_rdm_msg_alloc_rxe_for_msgrtm(efa_rdm_ep, &pke);
assert_true(rxe->state == EFA_RDM_RXE_UNEXP);
assert_true(rxe->unexp_pkt == pke);

/* Start progressing the unexpected rxe */
srx_ctx->peer_srx.peer_ops->start_msg(rxe->peer_rxe);

/* Make sure rxe is updated as mateched and unexp_pkt is NULL */
assert_true(rxe->state == EFA_RDM_RXE_MATCHED);
assert_true(rxe->unexp_pkt == NULL);

efa_rdm_pke_release_rx(pke);
efa_rdm_rxe_release(rxe);
ofi_genlock_unlock(srx_ctx->lock);
}
3 changes: 3 additions & 0 deletions prov/efa/test/efa_unit_tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ static int efa_unit_test_mocks_teardown(void **state)
#endif
.ofi_copy_from_hmem_iov = __real_ofi_copy_from_hmem_iov,
.efa_rdm_pke_read = __real_efa_rdm_pke_read,
.efa_rdm_pke_proc_matched_rtm = __real_efa_rdm_pke_proc_matched_rtm,
.efa_device_support_unsolicited_write_recv = __real_efa_device_support_unsolicited_write_recv,
.ibv_is_fork_initialized = __real_ibv_is_fork_initialized,
};
Expand Down Expand Up @@ -161,6 +162,7 @@ int main(void)
cmocka_unit_test_setup_teardown(test_efa_srx_min_multi_recv_size, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_srx_cq, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_srx_lock, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_srx_unexp_pkt, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rnr_queue_and_resend, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ope_prepare_to_post_send_with_no_enough_tx_pkts, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ope_prepare_to_post_send_host_memory, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
Expand All @@ -186,6 +188,7 @@ int main(void)
cmocka_unit_test_setup_teardown(test_efa_rdm_peer_select_readbase_rtm_no_runt, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_peer_select_readbase_rtm_do_runt, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_pke_get_available_copy_methods_align128, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_pke_release_rx_list, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_domain_open_ops_wrong_name, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_domain_open_ops_mr_query, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_cq_ibv_cq_poll_list_same_tx_rx_cq_single_ep, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
Expand Down
2 changes: 2 additions & 0 deletions prov/efa/test/efa_unit_tests.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ void test_efa_use_device_rdma_opt_old();
void test_efa_srx_min_multi_recv_size();
void test_efa_srx_cq();
void test_efa_srx_lock();
void test_efa_srx_unexp_pkt();
void test_efa_rnr_queue_and_resend();
void test_efa_rdm_ope_prepare_to_post_send_with_no_enough_tx_pkts();
void test_efa_rdm_ope_prepare_to_post_send_host_memory();
Expand Down Expand Up @@ -209,6 +210,7 @@ void test_efa_rdm_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep();
void test_efa_rdm_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep();
void test_efa_cntr_post_initial_rx_pkts();
void test_efa_rdm_pke_handle_longcts_rtm_send_completion();
void test_efa_rdm_pke_release_rx_list();
void test_efa_rdm_peer_reorder_expected_msg_id();
void test_efa_rdm_peer_reorder_smaller_msg_id();
void test_efa_rdm_peer_reorder_larger_msg_id();
Expand Down

0 comments on commit 7117a10

Please sign in to comment.