From 99c614da79e9fe89a46e5c193126987f99a24f5a Mon Sep 17 00:00:00 2001 From: Wenduo Wang Date: Thu, 1 Aug 2024 04:33:03 +0000 Subject: [PATCH] prov/efa: disable zero-copy receive if p2p is not supported Zero-copy receive protocol requires posting application receive buffer directly to EFA device. This includes HMEM device memory, which requires P2P support. Hence if P2P is not supported but required we have to disable zcpy recv. Signed-off-by: Wenduo Wang (cherry picked from commit 3146608fd6485424abdd863232ee11c35596b665) --- man/fi_efa.7.md | 9 +++- prov/efa/src/rdm/efa_rdm_ep_fiops.c | 17 +++++++ prov/efa/test/efa_unit_test_ep.c | 72 ++++++++++++++++++++++++++--- prov/efa/test/efa_unit_tests.c | 4 +- prov/efa/test/efa_unit_tests.h | 4 +- 5 files changed, 97 insertions(+), 9 deletions(-) diff --git a/man/fi_efa.7.md b/man/fi_efa.7.md index a7705098403..02ef1d80b73 100644 --- a/man/fi_efa.7.md +++ b/man/fi_efa.7.md @@ -84,7 +84,14 @@ No support for counters for the DGRAM endpoint. No support for inject. -No support for `fi_cancel()` for the [zero-copy receive mode](https://github.com/ofiwg/libfabric/blob/main/prov/efa/docs/efa_rdm_protocol_v4.md#48-user-receive-qp-feature--request-and-zero-copy-receive). +## [zero-copy receive mode](../prov/efa/docs/efa_rdm_protocol_v4.md#48-user-receive-qp-feature--request-and-zero-copy-receive) +- The receive operation cannot be cancelled via `fi_cancel()`. +- Zero-copy receive mode can be enabled only if SHM transfer is disabled. +- Unless the application explicitly disables P2P, e.g. via FI_HMEM_P2P_DISABLED, + zero-copy receive can be enabled only if available FI_HMEM devices all have + P2P support. + + When using FI_HMEM for AWS Neuron or Habana SynapseAI buffers, the provider requires peer to peer transaction support between the EFA and the FI_HMEM diff --git a/prov/efa/src/rdm/efa_rdm_ep_fiops.c b/prov/efa/src/rdm/efa_rdm_ep_fiops.c index 0a6e1997314..9c3f7f6d147 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_fiops.c +++ b/prov/efa/src/rdm/efa_rdm_ep_fiops.c @@ -422,6 +422,8 @@ static struct fi_ops efa_rdm_ep_base_ops = { static inline void efa_rdm_ep_set_use_zcpy_rx(struct efa_rdm_ep *ep) { + enum fi_hmem_iface iface; + struct efa_hmem_info *hmem_info; uint64_t unsupported_caps = FI_DIRECTED_RECV | FI_TAGGED | FI_ATOMIC; ep->use_zcpy_rx = true; @@ -461,6 +463,21 @@ void efa_rdm_ep_set_use_zcpy_rx(struct efa_rdm_ep *ep) goto out; } + /* Zero-copy receive requires P2P support. Disable it if any initialized HMEM iface does not support P2P. */ + for (iface = FI_HMEM_SYSTEM; iface < OFI_HMEM_MAX; ++iface) { + hmem_info = &ep->base_ep.domain->hmem_info[iface]; + if (hmem_info->initialized && + !hmem_info->p2p_disabled_by_user && + !hmem_info->p2p_supported_by_device) { + EFA_INFO(FI_LOG_EP_CTRL, + "%s does not support P2P, zero-copy receive " + "protocol will be disabled\n", + fi_tostr(&iface, FI_TYPE_HMEM_IFACE)); + ep->use_zcpy_rx = false; + goto out; + } + } + out: EFA_INFO(FI_LOG_EP_CTRL, "efa_rdm_ep->use_zcpy_rx = %d\n", ep->use_zcpy_rx); diff --git a/prov/efa/test/efa_unit_test_ep.c b/prov/efa/test/efa_unit_test_ep.c index ea9908a34fc..5a31f6cd22a 100644 --- a/prov/efa/test/efa_unit_test_ep.c +++ b/prov/efa/test/efa_unit_test_ep.c @@ -942,8 +942,12 @@ void test_efa_rdm_ep_enable_qp_in_order_aligned_128_bytes_bad(struct efa_resourc #endif -static void -test_efa_rdm_ep_use_zcpy_rx_impl(struct efa_resource *resource, bool expected_use_zcpy_rx) { +static void test_efa_rdm_ep_use_zcpy_rx_impl(struct efa_resource *resource, + bool cuda_p2p_disabled, + bool cuda_p2p_supported, + bool expected_use_zcpy_rx) +{ + struct efa_domain *efa_domain; struct efa_rdm_ep *ep; size_t max_msg_size = 1000; bool shm_permitted = false; @@ -951,6 +955,26 @@ test_efa_rdm_ep_use_zcpy_rx_impl(struct efa_resource *resource, bool expected_us efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), resource->hints, false, true); + efa_domain = container_of(resource->domain, struct efa_domain, + util_domain.domain_fid.fid); + + /* System memory P2P should always be enabled */ + assert_true(efa_domain->hmem_info[FI_HMEM_SYSTEM].initialized); + assert_false(efa_domain->hmem_info[FI_HMEM_SYSTEM].p2p_disabled_by_user); + assert_true(efa_domain->hmem_info[FI_HMEM_SYSTEM].p2p_supported_by_device); + + /** + * We want to be able to run this test on any platform: + * 1. Fake CUDA support. + * 2. Disable all other hmem ifaces. + */ + efa_domain->hmem_info[FI_HMEM_CUDA].initialized = true; + efa_domain->hmem_info[FI_HMEM_CUDA].p2p_disabled_by_user = cuda_p2p_disabled; + efa_domain->hmem_info[FI_HMEM_CUDA].p2p_supported_by_device = cuda_p2p_supported; + + efa_domain->hmem_info[FI_HMEM_NEURON].initialized = false; + efa_domain->hmem_info[FI_HMEM_SYNAPSEAI].initialized = false; + ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); /* Set sufficiently small max_msg_size */ @@ -973,7 +997,25 @@ test_efa_rdm_ep_use_zcpy_rx_impl(struct efa_resource *resource, bool expected_us * 3. app's max msg size is smaller than mtu_size - prefix_size * 4. app doesn't use FI_DIRECTED_RECV, FI_TAGGED, FI_ATOMIC capability */ -void test_efa_rdm_ep_user_zcpy_rx_happy(struct efa_resource **state) +void test_efa_rdm_ep_user_zcpy_rx_disabled(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + assert_non_null(resource->hints); + + resource->hints->tx_attr->msg_order = FI_ORDER_NONE; + resource->hints->rx_attr->msg_order = FI_ORDER_NONE; + resource->hints->mode = FI_MSG_PREFIX; + resource->hints->caps = FI_MSG; + + test_efa_rdm_ep_use_zcpy_rx_impl(resource, false, true, true); +} + +/** + * @brief Verify zcpy_rx is enabled if CUDA P2P is explictly disabled + */ +void test_efa_rdm_ep_user_disable_p2p_zcpy_rx_happy(struct efa_resource **state) { struct efa_resource *resource = *state; @@ -985,7 +1027,7 @@ void test_efa_rdm_ep_user_zcpy_rx_happy(struct efa_resource **state) resource->hints->mode = FI_MSG_PREFIX; resource->hints->caps = FI_MSG; - test_efa_rdm_ep_use_zcpy_rx_impl(resource, true); + test_efa_rdm_ep_use_zcpy_rx_impl(resource, true, false, true); } /** @@ -1003,7 +1045,25 @@ void test_efa_rdm_ep_user_zcpy_rx_unhappy_due_to_sas(struct efa_resource **state resource->hints->mode = FI_MSG_PREFIX; resource->hints->caps = FI_MSG; - test_efa_rdm_ep_use_zcpy_rx_impl(resource, false); + test_efa_rdm_ep_use_zcpy_rx_impl(resource, false, true, false); +} + +/** + * @brief Verify zcpy_rx is disabled if CUDA P2P is enabled but not supported + */ +void test_efa_rdm_ep_user_p2p_not_supported_zcpy_rx_happy(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); + assert_non_null(resource->hints); + + resource->hints->tx_attr->msg_order = FI_ORDER_NONE; + resource->hints->rx_attr->msg_order = FI_ORDER_NONE; + resource->hints->mode = FI_MSG_PREFIX; + resource->hints->caps = FI_MSG; + + test_efa_rdm_ep_use_zcpy_rx_impl(resource, false, false, false); } void test_efa_rdm_ep_close_discard_posted_recv(struct efa_resource **state) @@ -1039,7 +1099,7 @@ void test_efa_rdm_ep_zcpy_recv_cancel(struct efa_resource **state) resource->hints->caps = FI_MSG; /* enable zero-copy recv mode in ep */ - test_efa_rdm_ep_use_zcpy_rx_impl(resource, true); + test_efa_rdm_ep_use_zcpy_rx_impl(resource, false, true, true); /* fi_recv should work with a recv buffer with NULL desc */ assert_int_equal(fi_recv(resource->ep, recv_buff, 16, NULL, FI_ADDR_UNSPEC, &cancel_context), 0); diff --git a/prov/efa/test/efa_unit_tests.c b/prov/efa/test/efa_unit_tests.c index b2aeb1faf47..1836028c57d 100644 --- a/prov/efa/test/efa_unit_tests.c +++ b/prov/efa/test/efa_unit_tests.c @@ -100,8 +100,10 @@ int main(void) cmocka_unit_test_setup_teardown(test_efa_rdm_ep_send_with_shm_no_copy, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ep_rma_without_caps, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ep_atomic_without_caps, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), - cmocka_unit_test_setup_teardown(test_efa_rdm_ep_user_zcpy_rx_happy, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_ep_user_zcpy_rx_disabled, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_ep_user_disable_p2p_zcpy_rx_happy, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ep_user_zcpy_rx_unhappy_due_to_sas, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_ep_user_p2p_not_supported_zcpy_rx_happy, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ep_close_discard_posted_recv, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ep_zcpy_recv_cancel, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_dgram_cq_read_empty_cq, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), diff --git a/prov/efa/test/efa_unit_tests.h b/prov/efa/test/efa_unit_tests.h index 30e02771653..0f18231eda0 100644 --- a/prov/efa/test/efa_unit_tests.h +++ b/prov/efa/test/efa_unit_tests.h @@ -114,8 +114,10 @@ void test_efa_rdm_ep_atomic_without_caps(); void test_efa_rdm_ep_setopt_shared_memory_permitted(); void test_efa_rdm_ep_enable_qp_in_order_aligned_128_bytes_good(); void test_efa_rdm_ep_enable_qp_in_order_aligned_128_bytes_bad(); -void test_efa_rdm_ep_user_zcpy_rx_happy(); +void test_efa_rdm_ep_user_zcpy_rx_disabled(); +void test_efa_rdm_ep_user_disable_p2p_zcpy_rx_happy(); void test_efa_rdm_ep_user_zcpy_rx_unhappy_due_to_sas(); +void test_efa_rdm_ep_user_p2p_not_supported_zcpy_rx_happy(); void test_efa_rdm_ep_close_discard_posted_recv(); void test_efa_rdm_ep_zcpy_recv_cancel(); void test_dgram_cq_read_empty_cq();