From c9a7a3529c2a78a03efc23676b73ad776a861441 Mon Sep 17 00:00:00 2001 From: Shi Jin Date: Tue, 30 Jul 2024 19:15:37 +0000 Subject: [PATCH] [v1.22.x] prov/efa: Make fi_cancel return EOPNOTSUPP for zero copy receive mode. A receive cannot be safely cancelled in zero copy receive mode as we cannot cancel the recv in the HW level. Make fi_cancel return EOPNOTSUPP instead of making hacks that don't really fake the behaviors. Signed-off-by: Shi Jin (cherry picked from commit 01c0f6cf44074d7d58c86183e19bda5310308f8e) --- man/fi_efa.7.md | 2 ++ prov/efa/docs/efa_rdm_protocol_v4.md | 4 ++++ prov/efa/src/rdm/efa_rdm_ep_fiops.c | 5 +++++ prov/efa/test/efa_unit_test_ep.c | 33 +++++++++++++++++++++++++++- prov/efa/test/efa_unit_tests.c | 1 + prov/efa/test/efa_unit_tests.h | 1 + 6 files changed, 45 insertions(+), 1 deletion(-) diff --git a/man/fi_efa.7.md b/man/fi_efa.7.md index f2bb7a3e9ce..365905f3002 100644 --- a/man/fi_efa.7.md +++ b/man/fi_efa.7.md @@ -84,6 +84,8 @@ No support for counters for the DGRAM endpoint. No support for inject. +No support for `fi_cancel()` for the [zero-copy receive mode](https://github.com/ofiwg/libfabric/blob/main/prov/efa/docs/efa_rdm_protocol_v4.md#48-user-receive-qp-feature--request-and-zero-copy-receive). + When using FI_HMEM for AWS Neuron or Habana SynapseAI buffers, the provider requires peer to peer transaction support between the EFA and the FI_HMEM device. Therefore, the FI_HMEM_P2P_DISABLED option is not supported by the EFA diff --git a/prov/efa/docs/efa_rdm_protocol_v4.md b/prov/efa/docs/efa_rdm_protocol_v4.md index 2cbd80c561e..33954a25777 100644 --- a/prov/efa/docs/efa_rdm_protocol_v4.md +++ b/prov/efa/docs/efa_rdm_protocol_v4.md @@ -1599,6 +1599,10 @@ If the receiver supports it, sender will then send packets with user data to the there is no ordering or tagging requirement, and the receiver already knows the sender, sender can send packets without any headers in the payload. If the receiver doesn't support this extra feature, the sender will continue send packets with headers to the receiver's default QP. + +On the receiver side, it will post the user recv buffer to the user recv QP directly when the user +calls fi_recv(). Currently such receive cannot be cancelled and fi_cancel() is not supported in +zero-copy receive mode. If a receiver gets RTM packets delivered to its default QP, it raises an error because it requests all RTM packets must be delivered to its user recv QP. diff --git a/prov/efa/src/rdm/efa_rdm_ep_fiops.c b/prov/efa/src/rdm/efa_rdm_ep_fiops.c index af719fbf1e6..9c187d5c33b 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_fiops.c +++ b/prov/efa/src/rdm/efa_rdm_ep_fiops.c @@ -1299,6 +1299,11 @@ ssize_t efa_rdm_ep_cancel(fid_t fid_ep, void *context) struct efa_rdm_ep *ep; ep = container_of(fid_ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid.fid); + if (ep->use_zcpy_rx) { + EFA_WARN(FI_LOG_EP_CTRL, "fi_cancel is not supported in zero-copy receive mode.\n"); + return -FI_EOPNOTSUPP; + } + return ep->peer_srx_ep->ops->cancel(&ep->peer_srx_ep->fid, context); } diff --git a/prov/efa/test/efa_unit_test_ep.c b/prov/efa/test/efa_unit_test_ep.c index 8ba0041620e..d05672d4c6f 100644 --- a/prov/efa/test/efa_unit_test_ep.c +++ b/prov/efa/test/efa_unit_test_ep.c @@ -1017,4 +1017,35 @@ void test_efa_rdm_ep_close_discard_posted_recv(struct efa_resource **state) /* Reset to NULL to avoid test reaper closing again */ resource->ep = NULL; -} \ No newline at end of file +} + +void test_efa_rdm_ep_zcpy_recv_cancel(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + struct fi_context cancel_context = {0}; + struct efa_unit_test_buff recv_buff; + + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + assert_non_null(resource->hints); + + resource->hints->tx_attr->msg_order = FI_ORDER_NONE; + resource->hints->rx_attr->msg_order = FI_ORDER_NONE; + resource->hints->caps = FI_MSG; + + /* enable zero-copy recv mode in ep */ + test_efa_rdm_ep_use_zcpy_rx_impl(resource, true); + + /* Construct a recv buffer with mr */ + efa_unit_test_buff_construct(&recv_buff, resource, 16); + + assert_int_equal(fi_recv(resource->ep, recv_buff.buff, recv_buff.size, fi_mr_desc(recv_buff.mr), FI_ADDR_UNSPEC, &cancel_context), 0); + + assert_int_equal(fi_cancel((struct fid *)resource->ep, &cancel_context), -FI_EOPNOTSUPP); + + /** + * the buf is still posted to rdma-core, so unregistering mr can + * return non-zero. Currently ignore this failure. + */ + (void) fi_close(&recv_buff.mr->fid); + free(recv_buff.buff); +} diff --git a/prov/efa/test/efa_unit_tests.c b/prov/efa/test/efa_unit_tests.c index dbf582f1434..f45748dea4e 100644 --- a/prov/efa/test/efa_unit_tests.c +++ b/prov/efa/test/efa_unit_tests.c @@ -103,6 +103,7 @@ int main(void) cmocka_unit_test_setup_teardown(test_efa_rdm_ep_user_zcpy_rx_happy, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ep_user_zcpy_rx_unhappy_due_to_sas, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ep_close_discard_posted_recv, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_ep_zcpy_recv_cancel, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_dgram_cq_read_empty_cq, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_empty_cq, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_failed_poll, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), diff --git a/prov/efa/test/efa_unit_tests.h b/prov/efa/test/efa_unit_tests.h index 59972007d82..a029daae424 100644 --- a/prov/efa/test/efa_unit_tests.h +++ b/prov/efa/test/efa_unit_tests.h @@ -117,6 +117,7 @@ void test_efa_rdm_ep_enable_qp_in_order_aligned_128_bytes_bad(); void test_efa_rdm_ep_user_zcpy_rx_happy(); void test_efa_rdm_ep_user_zcpy_rx_unhappy_due_to_sas(); void test_efa_rdm_ep_close_discard_posted_recv(); +void test_efa_rdm_ep_zcpy_recv_cancel(); void test_dgram_cq_read_empty_cq(); void test_ibv_cq_ex_read_empty_cq(); void test_ibv_cq_ex_read_failed_poll();