From 7690f4027a98a4bde09779bd0ff9273a735d9340 Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Tue, 8 Dec 2015 08:38:01 +0200 Subject: [PATCH] Yet one more fix to intercommunicator splitting logic. Previous commit f2794740 reverts Nathans changes. However it turns out that I was unable to trace his logic until I started investigation of icsplit hang. Bug was triggered when splitting Intercom was giving a group where on side of the communicator was empty (icsplit, intercom create #2). in this case remote_size == 0 and there is no way to distinguish between inter- and intra-communicator. Conclusion: We do need to distinguish between intra- and inter-communicators. So we should use ompi_mpi_group_null.group. --- ompi/communicator/comm.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ompi/communicator/comm.c b/ompi/communicator/comm.c index 346b87546fe..31db8047326 100644 --- a/ompi/communicator/comm.c +++ b/ompi/communicator/comm.c @@ -148,7 +148,7 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, local_size = ompi_group_size (local_group); } - if (NULL != remote_group) { + if ( (NULL != remote_group) && (&ompi_mpi_group_null.group != remote_group) ) { remote_size = ompi_group_size (remote_group); } @@ -177,10 +177,10 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, newcomm->c_my_rank = newcomm->c_local_group->grp_my_rank; /* Set remote group and duplicate the local comm, if applicable */ - if (0 < remote_size) { + if ( NULL != remote_group ) { ompi_communicator_t *old_localcomm; - if (NULL == remote_group) { + if (&ompi_mpi_group_null.group == remote_group) { ret = ompi_group_incl(oldcomm->c_remote_group, remote_size, remote_ranks, &newcomm->c_remote_group); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { @@ -432,7 +432,7 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key, int rc=OMPI_SUCCESS; ompi_communicator_t *newcomp = NULL; int *lranks=NULL, *rranks=NULL; - ompi_group_t * local_group=NULL; + ompi_group_t * local_group=NULL, *remote_group=NULL; ompi_comm_allgatherfct *allgatherfct=NULL; @@ -508,6 +508,7 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key, /* Step 2: determine all the information for the remote group */ /* --------------------------------------------------------- */ if ( inter ) { + remote_group = &ompi_mpi_group_null.group; rsize = comm->c_remote_group->grp_proc_count; rresults = (int *) malloc ( rsize * 2 * sizeof(int)); if ( NULL == rresults ) { @@ -591,7 +592,7 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key, comm->error_handler,/* error handler */ pass_on_topo, local_group, /* local group */ - NULL); /* remote group */ + remote_group); /* remote group */ if ( NULL == newcomp ) { rc = MPI_ERR_INTERN;