Skip to content

Commit

Permalink
ompi/coll/cuda: Implement reduce local
Browse files Browse the repository at this point in the history
Signed-off-by: Akshay Venkatesh <akvenkatesh@nvidia.com>
(cherry picked from commit e25e897)
  • Loading branch information
Akshay-Venkatesh committed Oct 1, 2024
1 parent da2c8fd commit e3ad86e
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 0 deletions.
5 changes: 5 additions & 0 deletions ompi/mca/coll/cuda/coll_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ int mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);

int mca_coll_cuda_reduce_local(const void *sbuf, void *rbuf, size_t count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
mca_coll_base_module_t *module);

int mca_coll_cuda_exscan(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
Expand Down
3 changes: 3 additions & 0 deletions ompi/mca/coll/cuda/coll_cuda_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ static void mca_coll_cuda_module_destruct(mca_coll_cuda_module_t *module)
{
OBJ_RELEASE(module->c_coll.coll_allreduce_module);
OBJ_RELEASE(module->c_coll.coll_reduce_module);
OBJ_RELEASE(module->c_coll.coll_reduce_local_module);
OBJ_RELEASE(module->c_coll.coll_reduce_scatter_block_module);
OBJ_RELEASE(module->c_coll.coll_scatter_module);
/* If the exscan module is not NULL, then this was an
Expand Down Expand Up @@ -103,6 +104,7 @@ mca_coll_cuda_comm_query(struct ompi_communicator_t *comm,
cuda_module->super.coll_gather = NULL;
cuda_module->super.coll_gatherv = NULL;
cuda_module->super.coll_reduce = mca_coll_cuda_reduce;
cuda_module->super.coll_reduce_local = mca_coll_cuda_reduce_local;
cuda_module->super.coll_reduce_scatter = NULL;
cuda_module->super.coll_reduce_scatter_block = mca_coll_cuda_reduce_scatter_block;
cuda_module->super.coll_scan = mca_coll_cuda_scan;
Expand Down Expand Up @@ -135,6 +137,7 @@ int mca_coll_cuda_module_enable(mca_coll_base_module_t *module,

CHECK_AND_RETAIN(comm, s, allreduce);
CHECK_AND_RETAIN(comm, s, reduce);
CHECK_AND_RETAIN(comm, s, reduce_local);
CHECK_AND_RETAIN(comm, s, reduce_scatter_block);
CHECK_AND_RETAIN(comm, s, scatter);
if (!OMPI_COMM_IS_INTER(comm)) {
Expand Down
57 changes: 57 additions & 0 deletions ompi/mca/coll/cuda/coll_cuda_reduce.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,60 @@ mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count,
}
return rc;
}

int
mca_coll_cuda_reduce_local(const void *sbuf, void *rbuf, size_t count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
mca_coll_base_module_t *module)
{
ptrdiff_t gap;
char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
size_t bufsize;
int rc;

bufsize = opal_datatype_span(&dtype->super, count, &gap);

rc = mca_coll_cuda_check_buf((void *)sbuf);
if (rc < 0) {
return rc;
}

if ((MPI_IN_PLACE != sbuf) && (rc > 0)) {
sbuf1 = (char*)malloc(bufsize);
if (NULL == sbuf1) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
mca_coll_cuda_memcpy(sbuf1, sbuf, bufsize);
sbuf = sbuf1 - gap;
}

rc = mca_coll_cuda_check_buf(rbuf);
if (rc < 0) {
return rc;
}

if (rc > 0) {
rbuf1 = (char*)malloc(bufsize);
if (NULL == rbuf1) {
if (NULL != sbuf1) free(sbuf1);
return OMPI_ERR_OUT_OF_RESOURCE;
}
mca_coll_cuda_memcpy(rbuf1, rbuf, bufsize);
rbuf2 = rbuf; /* save away original buffer */
rbuf = rbuf1 - gap;
}

ompi_op_reduce(op, (void *)sbuf, rbuf, count, dtype);
rc = OMPI_SUCCESS;

if (NULL != sbuf1) {
free(sbuf1);
}
if (NULL != rbuf1) {
rbuf = rbuf2;
mca_coll_cuda_memcpy(rbuf, rbuf1, bufsize);
free(rbuf1);
}
return rc;
}

0 comments on commit e3ad86e

Please sign in to comment.