From b0197d91d828401ba0b7b8593e026f2215cb70eb Mon Sep 17 00:00:00 2001 From: Wei Zhang Date: Sun, 26 Sep 2021 03:37:20 +0000 Subject: [PATCH] osc/rdma: use btl/self for self communication as last resort ompi_osc_rdma_peer_btl_endpoint() is used to select btl and endpoint to communicate with a peer. This patch added a change to ompi_osc_rdma_peer_btl_endpoint() that: if no btl/endpoint has been selected for self communication, and if bml has the btl/self, then use btl/self for self communication. It also made a change to ompi_osc_rdma_new_peer(): Currently if no btl can be found and the peer is self, the function still continues. This patch made the function to fail in this case, because the ability to do self communication is essential for osc/rdma. Signed-off-by: Wei Zhang --- ompi/mca/osc/rdma/osc_rdma_peer.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/ompi/mca/osc/rdma/osc_rdma_peer.c b/ompi/mca/osc/rdma/osc_rdma_peer.c index 30592d873d2..2543ef4eecb 100644 --- a/ompi/mca/osc/rdma/osc_rdma_peer.c +++ b/ompi/mca/osc/rdma/osc_rdma_peer.c @@ -73,7 +73,19 @@ static int ompi_osc_rdma_peer_btl_endpoint (struct ompi_osc_rdma_module_t *modul } } - /* unlikely but can happen when creating a peer for self */ + if (peer_id == ompi_comm_rank (module->comm)) { + for (int btl_index = 0 ; btl_index < num_btls ; ++btl_index) { + struct mca_btl_base_module_t *btl; + + btl = bml_endpoint->btl_rdma.bml_btls[btl_index].btl; + if (strcmp(btl->btl_component->btl_version.mca_component_name, "self")==0) { + *btl_out = btl; + *endpoint = bml_endpoint->btl_eager.bml_btls[btl_index].btl_endpoint; + return OMPI_SUCCESS; + } + } + } + return OMPI_ERR_UNREACH; } @@ -86,9 +98,7 @@ int ompi_osc_rdma_new_peer (struct ompi_osc_rdma_module_t *module, int peer_id, /* find a btl/endpoint to use for this peer */ int ret = ompi_osc_rdma_peer_btl_endpoint (module, peer_id, &btl, &endpoint); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret && - !(module->selected_btls[0]->btl_atomic_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB) && - (peer_id != ompi_comm_rank (module->comm)))) { + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return ret; }