Skip to content

Commit

Permalink
Merge pull request #11688 from bosilca/topic/fix_split_type_for_inter…
Browse files Browse the repository at this point in the history
…comm

Topic/fix split type for intercomm
  • Loading branch information
bosilca authored Aug 18, 2023
2 parents 980bed2 + 74de336 commit 76b91ce
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 10 deletions.
10 changes: 10 additions & 0 deletions ompi/communicator/comm.c
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,16 @@ int ompi_comm_set_nb (ompi_communicator_t **ncomm, ompi_communicator_t *oldcomm,
newcomm->c_assertions = 0;

/* Set remote group and duplicate the local comm, if applicable */
if ((NULL == remote_group) && (NULL != remote_ranks)) {
/* determine how the list of local_rank can be stored most
efficiently */
ret = ompi_group_incl(oldcomm->c_remote_group, remote_size,
remote_ranks, &newcomm->c_remote_group);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
remote_group = newcomm->c_remote_group;
}
if ( NULL != remote_group ) {
ompi_communicator_t *old_localcomm;

Expand Down
21 changes: 11 additions & 10 deletions ompi/mca/coll/inter/coll_inter_allreduce.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ mca_coll_inter_allreduce_inter(const void *sbuf, void *rbuf, int count,
mca_coll_base_module_t *module)
{
int err, rank, root = 0;
char *tmpbuf = NULL, *pml_buffer = NULL;
char *tmpbuf = NULL, *pml_buffer = NULL, *source;
ptrdiff_t gap, span;

rank = ompi_comm_rank(comm);
Expand All @@ -58,20 +58,21 @@ mca_coll_inter_allreduce_inter(const void *sbuf, void *rbuf, int count,

tmpbuf = (char *) malloc(span);
if (NULL == tmpbuf) {
return OMPI_ERR_OUT_OF_RESOURCE;
return OMPI_ERR_OUT_OF_RESOURCE;
}
pml_buffer = tmpbuf - gap;
source = (MPI_IN_PLACE == sbuf) ? rbuf : sbuf;

err = comm->c_local_comm->c_coll->coll_reduce(sbuf, pml_buffer, count,
dtype, op, root,
comm->c_local_comm,
comm->c_local_comm->c_coll->coll_reduce_module);
err = comm->c_local_comm->c_coll->coll_reduce(source, pml_buffer, count,
dtype, op, root,
comm->c_local_comm,
comm->c_local_comm->c_coll->coll_reduce_module);
if (OMPI_SUCCESS != err) {
goto exit;
goto exit;
}

if (rank == root) {
/* Do a send-recv between the two root procs. to avoid deadlock */
/* Do a send-recv between the two root procs. to avoid deadlock */
err = ompi_coll_base_sendrecv_actual(pml_buffer, count, dtype, 0,
MCA_COLL_BASE_TAG_ALLREDUCE,
rbuf, count, dtype, 0,
Expand All @@ -84,8 +85,8 @@ mca_coll_inter_allreduce_inter(const void *sbuf, void *rbuf, int count,

/* bcast the message to all the local processes */
err = comm->c_local_comm->c_coll->coll_bcast(rbuf, count, dtype,
root, comm->c_local_comm,
comm->c_local_comm->c_coll->coll_bcast_module);
root, comm->c_local_comm,
comm->c_local_comm->c_coll->coll_bcast_module);
if (OMPI_SUCCESS != err) {
goto exit;
}
Expand Down

0 comments on commit 76b91ce

Please sign in to comment.