diff --git a/fabtests/pytest/efa/test_rdm.py b/fabtests/pytest/efa/test_rdm.py index abd6453c1a0..0c68b898478 100644 --- a/fabtests/pytest/efa/test_rdm.py +++ b/fabtests/pytest/efa/test_rdm.py @@ -114,12 +114,16 @@ def test_rdm_pingpong_1G(cmdline_args, completion_semantic): def test_rdm_pingpong_zcpy_recv(cmdline_args, memory_type, zcpy_recv_max_msg_size, zcpy_recv_message_size): if cmdline_args.server_id == cmdline_args.client_id: pytest.skip("no zero copy recv for intra-node communication") - efa_run_client_server_test(cmdline_args, f"fi_rdm_pingpong --max-msg-size {zcpy_recv_max_msg_size}", + cmdline_args_copy = copy.copy(cmdline_args) + cmdline_args_copy.append_environ("FI_EFA_ENABLE_SHM_TRANSFER=0") + efa_run_client_server_test(cmdline_args_copy, f"fi_rdm_pingpong --max-msg-size {zcpy_recv_max_msg_size}", "short", "transmit_complete", memory_type, zcpy_recv_message_size) @pytest.mark.functional def test_rdm_bw_zcpy_recv(cmdline_args, memory_type, zcpy_recv_max_msg_size, zcpy_recv_message_size): if cmdline_args.server_id == cmdline_args.client_id: pytest.skip("no zero copy recv for intra-node communication") - efa_run_client_server_test(cmdline_args, f"fi_rdm_bw --max-msg-size {zcpy_recv_max_msg_size}", + cmdline_args_copy = copy.copy(cmdline_args) + cmdline_args_copy.append_environ("FI_EFA_ENABLE_SHM_TRANSFER=0") + efa_run_client_server_test(cmdline_args_copy, f"fi_rdm_bw --max-msg-size {zcpy_recv_max_msg_size}", "short", "transmit_complete", memory_type, zcpy_recv_message_size) diff --git a/prov/efa/docs/efa_rdm_protocol_v4.md b/prov/efa/docs/efa_rdm_protocol_v4.md index 33954a25777..9f0b457a1bf 100644 --- a/prov/efa/docs/efa_rdm_protocol_v4.md +++ b/prov/efa/docs/efa_rdm_protocol_v4.md @@ -1358,6 +1358,8 @@ buffer at a later time. However, if an application has the following set of requ 2. Only sends/receives eager messages 3. Does not use tagged send 4. Does not require `FI_DIRECTED_RECV` (the ability to receive only from certain addresses) + 5. Does not use Libfabric's shared memory communication, e.g. by setting `FI_OPT_SHARED_MEMORY_PERMITTED` as false + via `fi_setopt`. it should be possible to receive data directly using the application buffer since, under such conditions, the receiver does not have special requirements on the data it is going to receive, and it will thus accept any diff --git a/prov/efa/src/rdm/efa_rdm_ep_fiops.c b/prov/efa/src/rdm/efa_rdm_ep_fiops.c index 9c187d5c33b..0a6e1997314 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_fiops.c +++ b/prov/efa/src/rdm/efa_rdm_ep_fiops.c @@ -455,6 +455,12 @@ void efa_rdm_ep_set_use_zcpy_rx(struct efa_rdm_ep *ep) goto out; } + if (ep->shm_ep) { + EFA_INFO(FI_LOG_EP_CTRL, "Libfabric SHM is not turned off, zero-copy receive protocol will be disabled\n"); + ep->use_zcpy_rx = false; + goto out; + } + out: EFA_INFO(FI_LOG_EP_CTRL, "efa_rdm_ep->use_zcpy_rx = %d\n", ep->use_zcpy_rx); @@ -1211,6 +1217,8 @@ static int efa_rdm_ep_ctrl(struct fid *fid, int command, void *arg) if (ret) return ret; + efa_rdm_ep_update_shm(ep); + efa_rdm_ep_set_use_zcpy_rx(ep); ret = efa_rdm_ep_create_base_ep_ibv_qp(ep); @@ -1241,8 +1249,6 @@ static int efa_rdm_ep_ctrl(struct fid *fid, int command, void *arg) EFA_INFO(FI_LOG_EP_CTRL, "libfabric %s efa endpoint created! address: %s\n", fi_tostr("1", FI_TYPE_VERSION), ep_addr_str); - efa_rdm_ep_update_shm(ep); - /* Enable shm provider endpoint & post recv buff. * Once core ep enabled, 18 bytes efa_addr (16 bytes raw + 2 bytes qpn) is set. * We convert the address to 'gid_qpn' format, and set it as shm ep name, so diff --git a/prov/efa/test/efa_unit_test_ep.c b/prov/efa/test/efa_unit_test_ep.c index c0907ffed59..ea9908a34fc 100644 --- a/prov/efa/test/efa_unit_test_ep.c +++ b/prov/efa/test/efa_unit_test_ep.c @@ -946,6 +946,7 @@ static void test_efa_rdm_ep_use_zcpy_rx_impl(struct efa_resource *resource, bool expected_use_zcpy_rx) { struct efa_rdm_ep *ep; size_t max_msg_size = 1000; + bool shm_permitted = false; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), resource->hints, false, true); @@ -955,6 +956,11 @@ test_efa_rdm_ep_use_zcpy_rx_impl(struct efa_resource *resource, bool expected_us /* Set sufficiently small max_msg_size */ assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_MAX_MSG_SIZE, &max_msg_size, sizeof max_msg_size), 0); + + /* Disable shm */ + assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_SHARED_MEMORY_PERMITTED, + &shm_permitted, sizeof shm_permitted), 0); + assert_true(ep->max_msg_size == max_msg_size); assert_int_equal(fi_enable(resource->ep), 0); assert_true(ep->use_zcpy_rx == expected_use_zcpy_rx);