From a4e75cee899dadd9e3fce0988708f9934c20e3ff Mon Sep 17 00:00:00 2001 From: Shi Jin Date: Wed, 31 Jul 2024 21:59:47 +0000 Subject: [PATCH] [v1.22.x] prov/efa: Require shm to be disabled for using zero-copy recv zero-copy receive cannot work with mixed efa and shm traffic, this patch makes shm disabling as a requirement for using zero-copy recv. Signed-off-by: Shi Jin (cherry picked from commit 591094d546dd5809f4c05571986fa99bc791dcdb) --- fabtests/pytest/efa/test_rdm.py | 8 ++++++-- prov/efa/docs/efa_rdm_protocol_v4.md | 2 ++ prov/efa/src/rdm/efa_rdm_ep_fiops.c | 10 ++++++++-- prov/efa/test/efa_unit_test_ep.c | 6 ++++++ 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/fabtests/pytest/efa/test_rdm.py b/fabtests/pytest/efa/test_rdm.py index abd6453c1a0..0c68b898478 100644 --- a/fabtests/pytest/efa/test_rdm.py +++ b/fabtests/pytest/efa/test_rdm.py @@ -114,12 +114,16 @@ def test_rdm_pingpong_1G(cmdline_args, completion_semantic): def test_rdm_pingpong_zcpy_recv(cmdline_args, memory_type, zcpy_recv_max_msg_size, zcpy_recv_message_size): if cmdline_args.server_id == cmdline_args.client_id: pytest.skip("no zero copy recv for intra-node communication") - efa_run_client_server_test(cmdline_args, f"fi_rdm_pingpong --max-msg-size {zcpy_recv_max_msg_size}", + cmdline_args_copy = copy.copy(cmdline_args) + cmdline_args_copy.append_environ("FI_EFA_ENABLE_SHM_TRANSFER=0") + efa_run_client_server_test(cmdline_args_copy, f"fi_rdm_pingpong --max-msg-size {zcpy_recv_max_msg_size}", "short", "transmit_complete", memory_type, zcpy_recv_message_size) @pytest.mark.functional def test_rdm_bw_zcpy_recv(cmdline_args, memory_type, zcpy_recv_max_msg_size, zcpy_recv_message_size): if cmdline_args.server_id == cmdline_args.client_id: pytest.skip("no zero copy recv for intra-node communication") - efa_run_client_server_test(cmdline_args, f"fi_rdm_bw --max-msg-size {zcpy_recv_max_msg_size}", + cmdline_args_copy = copy.copy(cmdline_args) + cmdline_args_copy.append_environ("FI_EFA_ENABLE_SHM_TRANSFER=0") + efa_run_client_server_test(cmdline_args_copy, f"fi_rdm_bw --max-msg-size {zcpy_recv_max_msg_size}", "short", "transmit_complete", memory_type, zcpy_recv_message_size) diff --git a/prov/efa/docs/efa_rdm_protocol_v4.md b/prov/efa/docs/efa_rdm_protocol_v4.md index 33954a25777..9f0b457a1bf 100644 --- a/prov/efa/docs/efa_rdm_protocol_v4.md +++ b/prov/efa/docs/efa_rdm_protocol_v4.md @@ -1358,6 +1358,8 @@ buffer at a later time. However, if an application has the following set of requ 2. Only sends/receives eager messages 3. Does not use tagged send 4. Does not require `FI_DIRECTED_RECV` (the ability to receive only from certain addresses) + 5. Does not use Libfabric's shared memory communication, e.g. by setting `FI_OPT_SHARED_MEMORY_PERMITTED` as false + via `fi_setopt`. it should be possible to receive data directly using the application buffer since, under such conditions, the receiver does not have special requirements on the data it is going to receive, and it will thus accept any diff --git a/prov/efa/src/rdm/efa_rdm_ep_fiops.c b/prov/efa/src/rdm/efa_rdm_ep_fiops.c index 9c187d5c33b..0a6e1997314 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_fiops.c +++ b/prov/efa/src/rdm/efa_rdm_ep_fiops.c @@ -455,6 +455,12 @@ void efa_rdm_ep_set_use_zcpy_rx(struct efa_rdm_ep *ep) goto out; } + if (ep->shm_ep) { + EFA_INFO(FI_LOG_EP_CTRL, "Libfabric SHM is not turned off, zero-copy receive protocol will be disabled\n"); + ep->use_zcpy_rx = false; + goto out; + } + out: EFA_INFO(FI_LOG_EP_CTRL, "efa_rdm_ep->use_zcpy_rx = %d\n", ep->use_zcpy_rx); @@ -1211,6 +1217,8 @@ static int efa_rdm_ep_ctrl(struct fid *fid, int command, void *arg) if (ret) return ret; + efa_rdm_ep_update_shm(ep); + efa_rdm_ep_set_use_zcpy_rx(ep); ret = efa_rdm_ep_create_base_ep_ibv_qp(ep); @@ -1241,8 +1249,6 @@ static int efa_rdm_ep_ctrl(struct fid *fid, int command, void *arg) EFA_INFO(FI_LOG_EP_CTRL, "libfabric %s efa endpoint created! address: %s\n", fi_tostr("1", FI_TYPE_VERSION), ep_addr_str); - efa_rdm_ep_update_shm(ep); - /* Enable shm provider endpoint & post recv buff. * Once core ep enabled, 18 bytes efa_addr (16 bytes raw + 2 bytes qpn) is set. * We convert the address to 'gid_qpn' format, and set it as shm ep name, so diff --git a/prov/efa/test/efa_unit_test_ep.c b/prov/efa/test/efa_unit_test_ep.c index c0907ffed59..ea9908a34fc 100644 --- a/prov/efa/test/efa_unit_test_ep.c +++ b/prov/efa/test/efa_unit_test_ep.c @@ -946,6 +946,7 @@ static void test_efa_rdm_ep_use_zcpy_rx_impl(struct efa_resource *resource, bool expected_use_zcpy_rx) { struct efa_rdm_ep *ep; size_t max_msg_size = 1000; + bool shm_permitted = false; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), resource->hints, false, true); @@ -955,6 +956,11 @@ test_efa_rdm_ep_use_zcpy_rx_impl(struct efa_resource *resource, bool expected_us /* Set sufficiently small max_msg_size */ assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_MAX_MSG_SIZE, &max_msg_size, sizeof max_msg_size), 0); + + /* Disable shm */ + assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_SHARED_MEMORY_PERMITTED, + &shm_permitted, sizeof shm_permitted), 0); + assert_true(ep->max_msg_size == max_msg_size); assert_int_equal(fi_enable(resource->ep), 0); assert_true(ep->use_zcpy_rx == expected_use_zcpy_rx);