Skip to content

Commit

Permalink
prov/efa: disable zero-copy receive if p2p is not supported
Browse files Browse the repository at this point in the history
Zero-copy receive protocol requires posting application receive buffer
directly to EFA device. This includes HMEM device memory, which requires
P2P support. Hence if P2P is not supported but required we have to
disable zcpy recv.

Signed-off-by: Wenduo Wang <wenduwan@amazon.com>
(cherry picked from commit 3146608)
  • Loading branch information
wenduwan authored and shijin-aws committed Aug 1, 2024
1 parent 0f68516 commit 99c614d
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 9 deletions.
9 changes: 8 additions & 1 deletion man/fi_efa.7.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,14 @@ No support for counters for the DGRAM endpoint.

No support for inject.

No support for `fi_cancel()` for the [zero-copy receive mode](https://github.com/ofiwg/libfabric/blob/main/prov/efa/docs/efa_rdm_protocol_v4.md#48-user-receive-qp-feature--request-and-zero-copy-receive).
## [zero-copy receive mode](../prov/efa/docs/efa_rdm_protocol_v4.md#48-user-receive-qp-feature--request-and-zero-copy-receive)
- The receive operation cannot be cancelled via `fi_cancel()`.
- Zero-copy receive mode can be enabled only if SHM transfer is disabled.
- Unless the application explicitly disables P2P, e.g. via FI_HMEM_P2P_DISABLED,
zero-copy receive can be enabled only if available FI_HMEM devices all have
P2P support.



When using FI_HMEM for AWS Neuron or Habana SynapseAI buffers, the provider
requires peer to peer transaction support between the EFA and the FI_HMEM
Expand Down
17 changes: 17 additions & 0 deletions prov/efa/src/rdm/efa_rdm_ep_fiops.c
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,8 @@ static struct fi_ops efa_rdm_ep_base_ops = {
static inline
void efa_rdm_ep_set_use_zcpy_rx(struct efa_rdm_ep *ep)
{
enum fi_hmem_iface iface;
struct efa_hmem_info *hmem_info;
uint64_t unsupported_caps = FI_DIRECTED_RECV | FI_TAGGED | FI_ATOMIC;

ep->use_zcpy_rx = true;
Expand Down Expand Up @@ -461,6 +463,21 @@ void efa_rdm_ep_set_use_zcpy_rx(struct efa_rdm_ep *ep)
goto out;
}

/* Zero-copy receive requires P2P support. Disable it if any initialized HMEM iface does not support P2P. */
for (iface = FI_HMEM_SYSTEM; iface < OFI_HMEM_MAX; ++iface) {
hmem_info = &ep->base_ep.domain->hmem_info[iface];
if (hmem_info->initialized &&
!hmem_info->p2p_disabled_by_user &&
!hmem_info->p2p_supported_by_device) {
EFA_INFO(FI_LOG_EP_CTRL,
"%s does not support P2P, zero-copy receive "
"protocol will be disabled\n",
fi_tostr(&iface, FI_TYPE_HMEM_IFACE));
ep->use_zcpy_rx = false;
goto out;
}
}

out:
EFA_INFO(FI_LOG_EP_CTRL, "efa_rdm_ep->use_zcpy_rx = %d\n",
ep->use_zcpy_rx);
Expand Down
72 changes: 66 additions & 6 deletions prov/efa/test/efa_unit_test_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -942,15 +942,39 @@ void test_efa_rdm_ep_enable_qp_in_order_aligned_128_bytes_bad(struct efa_resourc

#endif

static void
test_efa_rdm_ep_use_zcpy_rx_impl(struct efa_resource *resource, bool expected_use_zcpy_rx) {
static void test_efa_rdm_ep_use_zcpy_rx_impl(struct efa_resource *resource,
bool cuda_p2p_disabled,
bool cuda_p2p_supported,
bool expected_use_zcpy_rx)
{
struct efa_domain *efa_domain;
struct efa_rdm_ep *ep;
size_t max_msg_size = 1000;
bool shm_permitted = false;

efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14),
resource->hints, false, true);

efa_domain = container_of(resource->domain, struct efa_domain,
util_domain.domain_fid.fid);

/* System memory P2P should always be enabled */
assert_true(efa_domain->hmem_info[FI_HMEM_SYSTEM].initialized);
assert_false(efa_domain->hmem_info[FI_HMEM_SYSTEM].p2p_disabled_by_user);
assert_true(efa_domain->hmem_info[FI_HMEM_SYSTEM].p2p_supported_by_device);

/**
* We want to be able to run this test on any platform:
* 1. Fake CUDA support.
* 2. Disable all other hmem ifaces.
*/
efa_domain->hmem_info[FI_HMEM_CUDA].initialized = true;
efa_domain->hmem_info[FI_HMEM_CUDA].p2p_disabled_by_user = cuda_p2p_disabled;
efa_domain->hmem_info[FI_HMEM_CUDA].p2p_supported_by_device = cuda_p2p_supported;

efa_domain->hmem_info[FI_HMEM_NEURON].initialized = false;
efa_domain->hmem_info[FI_HMEM_SYNAPSEAI].initialized = false;

ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid);

/* Set sufficiently small max_msg_size */
Expand All @@ -973,7 +997,25 @@ test_efa_rdm_ep_use_zcpy_rx_impl(struct efa_resource *resource, bool expected_us
* 3. app's max msg size is smaller than mtu_size - prefix_size
* 4. app doesn't use FI_DIRECTED_RECV, FI_TAGGED, FI_ATOMIC capability
*/
void test_efa_rdm_ep_user_zcpy_rx_happy(struct efa_resource **state)
void test_efa_rdm_ep_user_zcpy_rx_disabled(struct efa_resource **state)
{
struct efa_resource *resource = *state;

resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM);
assert_non_null(resource->hints);

resource->hints->tx_attr->msg_order = FI_ORDER_NONE;
resource->hints->rx_attr->msg_order = FI_ORDER_NONE;
resource->hints->mode = FI_MSG_PREFIX;
resource->hints->caps = FI_MSG;

test_efa_rdm_ep_use_zcpy_rx_impl(resource, false, true, true);
}

/**
* @brief Verify zcpy_rx is enabled if CUDA P2P is explictly disabled
*/
void test_efa_rdm_ep_user_disable_p2p_zcpy_rx_happy(struct efa_resource **state)
{
struct efa_resource *resource = *state;

Expand All @@ -985,7 +1027,7 @@ void test_efa_rdm_ep_user_zcpy_rx_happy(struct efa_resource **state)
resource->hints->mode = FI_MSG_PREFIX;
resource->hints->caps = FI_MSG;

test_efa_rdm_ep_use_zcpy_rx_impl(resource, true);
test_efa_rdm_ep_use_zcpy_rx_impl(resource, true, false, true);
}

/**
Expand All @@ -1003,7 +1045,25 @@ void test_efa_rdm_ep_user_zcpy_rx_unhappy_due_to_sas(struct efa_resource **state
resource->hints->mode = FI_MSG_PREFIX;
resource->hints->caps = FI_MSG;

test_efa_rdm_ep_use_zcpy_rx_impl(resource, false);
test_efa_rdm_ep_use_zcpy_rx_impl(resource, false, true, false);
}

/**
* @brief Verify zcpy_rx is disabled if CUDA P2P is enabled but not supported
*/
void test_efa_rdm_ep_user_p2p_not_supported_zcpy_rx_happy(struct efa_resource **state)
{
struct efa_resource *resource = *state;

resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM);
assert_non_null(resource->hints);

resource->hints->tx_attr->msg_order = FI_ORDER_NONE;
resource->hints->rx_attr->msg_order = FI_ORDER_NONE;
resource->hints->mode = FI_MSG_PREFIX;
resource->hints->caps = FI_MSG;

test_efa_rdm_ep_use_zcpy_rx_impl(resource, false, false, false);
}

void test_efa_rdm_ep_close_discard_posted_recv(struct efa_resource **state)
Expand Down Expand Up @@ -1039,7 +1099,7 @@ void test_efa_rdm_ep_zcpy_recv_cancel(struct efa_resource **state)
resource->hints->caps = FI_MSG;

/* enable zero-copy recv mode in ep */
test_efa_rdm_ep_use_zcpy_rx_impl(resource, true);
test_efa_rdm_ep_use_zcpy_rx_impl(resource, false, true, true);

/* fi_recv should work with a recv buffer with NULL desc */
assert_int_equal(fi_recv(resource->ep, recv_buff, 16, NULL, FI_ADDR_UNSPEC, &cancel_context), 0);
Expand Down
4 changes: 3 additions & 1 deletion prov/efa/test/efa_unit_tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,10 @@ int main(void)
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_send_with_shm_no_copy, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_rma_without_caps, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_atomic_without_caps, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_user_zcpy_rx_happy, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_user_zcpy_rx_disabled, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_user_disable_p2p_zcpy_rx_happy, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_user_zcpy_rx_unhappy_due_to_sas, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_user_p2p_not_supported_zcpy_rx_happy, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_close_discard_posted_recv, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_zcpy_recv_cancel, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_dgram_cq_read_empty_cq, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
Expand Down
4 changes: 3 additions & 1 deletion prov/efa/test/efa_unit_tests.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,10 @@ void test_efa_rdm_ep_atomic_without_caps();
void test_efa_rdm_ep_setopt_shared_memory_permitted();
void test_efa_rdm_ep_enable_qp_in_order_aligned_128_bytes_good();
void test_efa_rdm_ep_enable_qp_in_order_aligned_128_bytes_bad();
void test_efa_rdm_ep_user_zcpy_rx_happy();
void test_efa_rdm_ep_user_zcpy_rx_disabled();
void test_efa_rdm_ep_user_disable_p2p_zcpy_rx_happy();
void test_efa_rdm_ep_user_zcpy_rx_unhappy_due_to_sas();
void test_efa_rdm_ep_user_p2p_not_supported_zcpy_rx_happy();
void test_efa_rdm_ep_close_discard_posted_recv();
void test_efa_rdm_ep_zcpy_recv_cancel();
void test_dgram_cq_read_empty_cq();
Expand Down

0 comments on commit 99c614d

Please sign in to comment.