Skip to content

Commit

Permalink
prov/efa: Require shm to be disabled for using zero-copy recv
Browse files Browse the repository at this point in the history
zero-copy receive cannot work with mixed efa and shm traffic,
this patch makes shm disabling as a requirement for using
zero-copy recv.

Signed-off-by: Shi Jin <sjina@amazon.com>
  • Loading branch information
shijin-aws committed Jul 31, 2024
1 parent 9387629 commit 18b7b18
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 4 deletions.
8 changes: 6 additions & 2 deletions fabtests/pytest/efa/test_rdm.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,16 @@ def test_rdm_pingpong_1G(cmdline_args, completion_semantic):
def test_rdm_pingpong_zcpy_recv(cmdline_args, memory_type, zcpy_recv_max_msg_size, zcpy_recv_message_size):
if cmdline_args.server_id == cmdline_args.client_id:
pytest.skip("no zero copy recv for intra-node communication")
efa_run_client_server_test(cmdline_args, f"fi_rdm_pingpong --max-msg-size {zcpy_recv_max_msg_size}",
cmdline_args_copy = copy.copy(cmdline_args)
cmdline_args_copy.append_environ("FI_EFA_ENABLE_SHM_TRANSFER=0")
efa_run_client_server_test(cmdline_args_copy, f"fi_rdm_pingpong --max-msg-size {zcpy_recv_max_msg_size}",
"short", "transmit_complete", memory_type, zcpy_recv_message_size)

@pytest.mark.functional
def test_rdm_bw_zcpy_recv(cmdline_args, memory_type, zcpy_recv_max_msg_size, zcpy_recv_message_size):
if cmdline_args.server_id == cmdline_args.client_id:
pytest.skip("no zero copy recv for intra-node communication")
efa_run_client_server_test(cmdline_args, f"fi_rdm_bw --max-msg-size {zcpy_recv_max_msg_size}",
cmdline_args_copy = copy.copy(cmdline_args)
cmdline_args_copy.append_environ("FI_EFA_ENABLE_SHM_TRANSFER=0")
efa_run_client_server_test(cmdline_args_copy, f"fi_rdm_bw --max-msg-size {zcpy_recv_max_msg_size}",
"short", "transmit_complete", memory_type, zcpy_recv_message_size)
10 changes: 8 additions & 2 deletions prov/efa/src/rdm/efa_rdm_ep_fiops.c
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,12 @@ void efa_rdm_ep_set_use_zcpy_rx(struct efa_rdm_ep *ep)
goto out;
}

if (ep->shm_ep) {
EFA_INFO(FI_LOG_EP_CTRL, "Libfabric SHM is not turned off, zero-copy receive protocol will be disabled\n");
ep->use_zcpy_rx = false;
goto out;
}

out:
EFA_INFO(FI_LOG_EP_CTRL, "efa_rdm_ep->use_zcpy_rx = %d\n",
ep->use_zcpy_rx);
Expand Down Expand Up @@ -1225,6 +1231,8 @@ static int efa_rdm_ep_ctrl(struct fid *fid, int command, void *arg)
if (ret)
return ret;

efa_rdm_ep_update_shm(ep);

efa_rdm_ep_set_use_zcpy_rx(ep);

ret = efa_rdm_ep_create_base_ep_ibv_qp(ep);
Expand Down Expand Up @@ -1255,8 +1263,6 @@ static int efa_rdm_ep_ctrl(struct fid *fid, int command, void *arg)
EFA_INFO(FI_LOG_EP_CTRL, "libfabric %s efa endpoint created! address: %s\n",
fi_tostr("1", FI_TYPE_VERSION), ep_addr_str);

efa_rdm_ep_update_shm(ep);

/* Enable shm provider endpoint & post recv buff.
* Once core ep enabled, 18 bytes efa_addr (16 bytes raw + 2 bytes qpn) is set.
* We convert the address to 'gid_qpn' format, and set it as shm ep name, so
Expand Down
6 changes: 6 additions & 0 deletions prov/efa/test/efa_unit_test_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -946,6 +946,7 @@ static void
test_efa_rdm_ep_use_zcpy_rx_impl(struct efa_resource *resource, bool expected_use_zcpy_rx) {
struct efa_rdm_ep *ep;
size_t max_msg_size = 1000;
bool shm_permitted = false;

efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14),
resource->hints, false, true);
Expand All @@ -955,6 +956,11 @@ test_efa_rdm_ep_use_zcpy_rx_impl(struct efa_resource *resource, bool expected_us
/* Set sufficiently small max_msg_size */
assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_MAX_MSG_SIZE,
&max_msg_size, sizeof max_msg_size), 0);

/* Disable shm */
assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_SHARED_MEMORY_PERMITTED,
&shm_permitted, sizeof shm_permitted), 0);

assert_true(ep->max_msg_size == max_msg_size);
assert_int_equal(fi_enable(resource->ep), 0);
assert_true(ep->use_zcpy_rx == expected_use_zcpy_rx);
Expand Down

0 comments on commit 18b7b18

Please sign in to comment.