Skip to content

Commit

Permalink
Yet one more fix to intercommunicator splitting logic.
Browse files Browse the repository at this point in the history
Previous commit f279474 reverts Nathans changes. However it turns out
that I was unable to trace his logic until I started investigation of
icsplit hang. Bug was triggered when splitting Intercom was giving a group
where on side of the communicator was empty (icsplit, intercom create #2).
in this case remote_size == 0 and there is no way to distinguish between
inter- and intra-communicator.
Conclusion: We do need to distinguish between intra- and inter-communicators.
So we should use ompi_mpi_group_null.group.
  • Loading branch information
artpol84 committed Dec 8, 2015
1 parent 63d8feb commit 7690f40
Showing 1 changed file with 6 additions and 5 deletions.
11 changes: 6 additions & 5 deletions ompi/communicator/comm.c
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm,
local_size = ompi_group_size (local_group);
}

if (NULL != remote_group) {
if ( (NULL != remote_group) && (&ompi_mpi_group_null.group != remote_group) ) {
remote_size = ompi_group_size (remote_group);
}

Expand Down Expand Up @@ -177,10 +177,10 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm,
newcomm->c_my_rank = newcomm->c_local_group->grp_my_rank;

/* Set remote group and duplicate the local comm, if applicable */
if (0 < remote_size) {
if ( NULL != remote_group ) {
ompi_communicator_t *old_localcomm;

if (NULL == remote_group) {
if (&ompi_mpi_group_null.group == remote_group) {
ret = ompi_group_incl(oldcomm->c_remote_group, remote_size,
remote_ranks, &newcomm->c_remote_group);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
Expand Down Expand Up @@ -432,7 +432,7 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key,
int rc=OMPI_SUCCESS;
ompi_communicator_t *newcomp = NULL;
int *lranks=NULL, *rranks=NULL;
ompi_group_t * local_group=NULL;
ompi_group_t * local_group=NULL, *remote_group=NULL;

ompi_comm_allgatherfct *allgatherfct=NULL;

Expand Down Expand Up @@ -508,6 +508,7 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key,
/* Step 2: determine all the information for the remote group */
/* --------------------------------------------------------- */
if ( inter ) {
remote_group = &ompi_mpi_group_null.group;
rsize = comm->c_remote_group->grp_proc_count;
rresults = (int *) malloc ( rsize * 2 * sizeof(int));
if ( NULL == rresults ) {
Expand Down Expand Up @@ -591,7 +592,7 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key,
comm->error_handler,/* error handler */
pass_on_topo,
local_group, /* local group */
NULL); /* remote group */
remote_group); /* remote group */

if ( NULL == newcomp ) {
rc = MPI_ERR_INTERN;
Expand Down

0 comments on commit 7690f40

Please sign in to comment.