Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
ba221e1
coll/base/allgatherv: fix MPI_IN_PLACE processing
mkurnosov Jul 27, 2018
d9d84d5
coll/libnbc: fix NBC_Unpack()
ggouaillardet Sep 12, 2018
2692840
Always return a valid error code from collective operations
abouteiller Jan 26, 2018
547fb3d
libnbc: remove some stale/dead code
jsquyres Sep 12, 2018
65990af
coll/libnbc: add recursive doubling algorithm for MPI_Iscan
mkurnosov Sep 20, 2018
de5e435
coll/libnbc: add recursive doubling algorithm for MPI_Iexscan
mkurnosov Sep 17, 2018
6f6d818
coll libnbc: Remove dead code
bwbarrett Sep 29, 2018
a318f11
coll/libnbc: add Rabenseifner's algorithm for MPI_Ireduce
mkurnosov Sep 10, 2018
6971dab
coll/libnbc: add knomial tree algorithm for MPI_Ibcast
mkurnosov Sep 25, 2018
91a4b4c
coll/libnbc: add recursive doubling algorithm for MPI_Iallgather
mkurnosov Oct 11, 2018
fd29cce
Remove few warnings in libnbc identified by clang-1000.11.45.2
bosilca Oct 17, 2018
5ee1fb6
coll/libnbc: add Rabenseifner's algorithm for MPI_Iallreduce
mkurnosov Oct 6, 2018
bf1c8bb
coll/libnbc/ireduce: silence Coverity warning CID 1440360
mkurnosov Oct 18, 2018
ba11f31
coll/libnbc: remove debug output
mkurnosov Oct 26, 2018
2891a23
coll/libnbc: add recursive doubling algorithm for MPI_Iallreduce
alex-anenkov May 19, 2019
f51bd8c
Coll/hcoll: adding scatterv interface
Jan 17, 2019
221fad6
coll/cuda: remove unnecessary references to ORTE
ggouaillardet Sep 10, 2019
7eb9416
COLL/TUNED: Add linear scatter using isend for mlnx platform
brminich Oct 24, 2019
03758b1
coll/tuned: Fix typos
wckzhang Apr 14, 2020
db6ed18
coll/tuned: Add NULL check to prevent segfault
wckzhang Apr 14, 2020
339ee63
dist: Add Collectives backports to NEWS
bwbarrett May 31, 2020
7987a7f
common_ofi: fix preprocessor macro typo
jsquyres Jun 26, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ included in the vX.Y.Z section and be denoted as:
- OFI/libfabric: Added support for multiple NICs
- OFI/libfabric: Added support for Scalable Endpoints
- OFI/libfabric: Added btl for one-sided support
- libnbc: Adding numerous performance-improving algorithms

4.0.4 -- June, 2020
-----------------------
Expand Down
5 changes: 5 additions & 0 deletions contrib/platform/mellanox/optimized.conf
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2019 Mellanox Technologies. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
Expand Down Expand Up @@ -84,4 +85,8 @@ bml_r2_show_unreach_errors = 0
coll_tuned_alltoall_large_msg = 250000
coll_tuned_alltoall_min_procs = 2048
coll_tuned_alltoall_algorithm_max_requests = 8
coll_tuned_scatter_intermediate_msg = 8192
coll_tuned_scatter_large_msg = 250000
coll_tuned_scatter_min_procs = 1048510
coll_tuned_scatter_algorithm_max_requests = 64

12 changes: 0 additions & 12 deletions ompi/mca/coll/base/coll_base_allgatherv.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,6 @@ int ompi_coll_base_allgatherv_intra_bruck(const void *sbuf, int scount,
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
"coll:base:allgather_intra_bruck rank %d", rank));

err = ompi_datatype_get_extent (sdtype, &slb, &sext);
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }

err = ompi_datatype_get_extent (rdtype, &rlb, &rext);
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }

Expand Down Expand Up @@ -238,9 +235,6 @@ int ompi_coll_base_allgatherv_intra_ring(const void *sbuf, int scount,
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
"coll:base:allgatherv_intra_ring rank %d", rank));

err = ompi_datatype_get_extent (sdtype, &slb, &sext);
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }

err = ompi_datatype_get_extent (rdtype, &rlb, &rext);
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }

Expand Down Expand Up @@ -381,9 +375,6 @@ ompi_coll_base_allgatherv_intra_neighborexchange(const void *sbuf, int scount,
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
"coll:base:allgatherv_intra_neighborexchange rank %d", rank));

err = ompi_datatype_get_extent (sdtype, &slb, &sext);
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }

err = ompi_datatype_get_extent (rdtype, &rlb, &rext);
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }

Expand Down Expand Up @@ -520,9 +511,6 @@ int ompi_coll_base_allgatherv_intra_two_procs(const void *sbuf, int scount,
return MPI_ERR_UNSUPPORTED_OPERATION;
}

err = ompi_datatype_get_extent (sdtype, &lb, &sext);
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }

err = ompi_datatype_get_extent (rdtype, &lb, &rext);
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }

Expand Down
6 changes: 4 additions & 2 deletions ompi/mca/coll/base/coll_base_allreduce.c
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ ompi_coll_base_allreduce_intra_ring(const void *sbuf, void *rbuf, int count,
char *tmpsend = NULL, *tmprecv = NULL, *inbuf[2] = {NULL, NULL};
ptrdiff_t true_lb, true_extent, lb, extent;
ptrdiff_t block_offset, max_real_segsize;
ompi_request_t *reqs[2] = {NULL, NULL};
ompi_request_t *reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL};

size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
Expand Down Expand Up @@ -528,6 +528,7 @@ ompi_coll_base_allreduce_intra_ring(const void *sbuf, void *rbuf, int count,
error_hndl:
OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tRank %d Error occurred %d\n",
__FILE__, line, rank, ret));
ompi_coll_base_free_reqs(reqs, 2);
(void)line; // silence compiler warning
if (NULL != inbuf[0]) free(inbuf[0]);
if (NULL != inbuf[1]) free(inbuf[1]);
Expand Down Expand Up @@ -627,7 +628,7 @@ ompi_coll_base_allreduce_intra_ring_segmented(const void *sbuf, void *rbuf, int
size_t typelng;
char *tmpsend = NULL, *tmprecv = NULL, *inbuf[2] = {NULL, NULL};
ptrdiff_t block_offset, max_real_segsize;
ompi_request_t *reqs[2] = {NULL, NULL};
ompi_request_t *reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL};
ptrdiff_t lb, extent, gap;

size = ompi_comm_size(comm);
Expand Down Expand Up @@ -847,6 +848,7 @@ ompi_coll_base_allreduce_intra_ring_segmented(const void *sbuf, void *rbuf, int
error_hndl:
OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tRank %d Error occurred %d\n",
__FILE__, line, rank, ret));
ompi_coll_base_free_reqs(reqs, 2);
(void)line; // silence compiler warning
if (NULL != inbuf[0]) free(inbuf[0]);
if (NULL != inbuf[1]) free(inbuf[1]);
Expand Down
21 changes: 20 additions & 1 deletion ompi/mca/coll/base/coll_base_alltoall.c
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,7 @@ int ompi_coll_base_alltoall_intra_linear_sync(const void *sbuf, int scount,
if (0 < total_reqs) {
reqs = ompi_coll_base_comm_get_reqs(module->base_data, 2 * total_reqs);
if (NULL == reqs) { error = -1; line = __LINE__; goto error_hndl; }
reqs[0] = reqs[1] = MPI_REQUEST_NULL;
}

prcv = (char *) rbuf;
Expand Down Expand Up @@ -468,6 +469,15 @@ int ompi_coll_base_alltoall_intra_linear_sync(const void *sbuf, int scount,
return MPI_SUCCESS;

error_hndl:
/* find a real error code */
if (MPI_ERR_IN_STATUS == error) {
for( ri = 0; ri < nreqs; ri++ ) {
if (MPI_REQUEST_NULL == reqs[ri]) continue;
if (MPI_ERR_PENDING == reqs[ri]->req_status.MPI_ERROR) continue;
error = reqs[ri]->req_status.MPI_ERROR;
break;
}
}
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
"%s:%4d\tError occurred %d, rank %2d", __FILE__, line, error,
rank));
Expand Down Expand Up @@ -661,7 +671,16 @@ int ompi_coll_base_alltoall_intra_basic_linear(const void *sbuf, int scount,
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }

err_hndl:
if( MPI_SUCCESS != err ) {
if (MPI_SUCCESS != err) {
/* find a real error code */
if (MPI_ERR_IN_STATUS == err) {
for( i = 0; i < nreqs; i++ ) {
if (MPI_REQUEST_NULL == req[i]) continue;
if (MPI_ERR_PENDING == req[i]->req_status.MPI_ERROR) continue;
err = req[i]->req_status.MPI_ERROR;
break;
}
}
OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
__FILE__, line, err, rank) );
(void)line; // silence compiler warning
Expand Down
11 changes: 10 additions & 1 deletion ompi/mca/coll/base/coll_base_alltoallv.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2016 The University of Tennessee and The University
* Copyright (c) 2004-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
Expand Down Expand Up @@ -276,6 +276,15 @@ ompi_coll_base_alltoallv_intra_basic_linear(const void *sbuf, const int *scounts
err = ompi_request_wait_all(nreqs, reqs, MPI_STATUSES_IGNORE);

err_hndl:
/* find a real error code */
if (MPI_ERR_IN_STATUS == err) {
for( i = 0; i < nreqs; i++ ) {
if (MPI_REQUEST_NULL == reqs[i]) continue;
if (MPI_ERR_PENDING == reqs[i]->req_status.MPI_ERROR) continue;
err = reqs[i]->req_status.MPI_ERROR;
break;
}
}
/* Free the requests in all cases as they are persistent */
ompi_coll_base_free_reqs(reqs, nreqs);

Expand Down
49 changes: 36 additions & 13 deletions ompi/mca/coll/base/coll_base_barrier.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2016 The University of Tennessee and The University
* Copyright (c) 2004-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
Expand Down Expand Up @@ -102,8 +102,10 @@ int ompi_coll_base_barrier_intra_doublering(struct ompi_communicator_t *comm,
{
int rank, size, err = 0, line = 0, left, right;

rank = ompi_comm_rank(comm);
size = ompi_comm_size(comm);
if( 1 == size )
return OMPI_SUCCESS;
rank = ompi_comm_rank(comm);

OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"ompi_coll_base_barrier_intra_doublering rank %d", rank));

Expand Down Expand Up @@ -172,8 +174,10 @@ int ompi_coll_base_barrier_intra_recursivedoubling(struct ompi_communicator_t *c
{
int rank, size, adjsize, err, line, mask, remote;

rank = ompi_comm_rank(comm);
size = ompi_comm_size(comm);
if( 1 == size )
return OMPI_SUCCESS;
rank = ompi_comm_rank(comm);
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
"ompi_coll_base_barrier_intra_recursivedoubling rank %d",
rank));
Expand Down Expand Up @@ -251,8 +255,10 @@ int ompi_coll_base_barrier_intra_bruck(struct ompi_communicator_t *comm,
{
int rank, size, distance, to, from, err, line = 0;

rank = ompi_comm_rank(comm);
size = ompi_comm_size(comm);
if( 1 == size )
return MPI_SUCCESS;
rank = ompi_comm_rank(comm);
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
"ompi_coll_base_barrier_intra_bruck rank %d", rank));

Expand Down Expand Up @@ -285,16 +291,19 @@ int ompi_coll_base_barrier_intra_bruck(struct ompi_communicator_t *comm,
int ompi_coll_base_barrier_intra_two_procs(struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
int remote, err;
int remote, size, err;

size = ompi_comm_size(comm);
if( 1 == size )
return MPI_SUCCESS;
if( 2 != ompi_comm_size(comm) ) {
return MPI_ERR_UNSUPPORTED_OPERATION;
}

remote = ompi_comm_rank(comm);
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
"ompi_coll_base_barrier_intra_two_procs rank %d", remote));

if (2 != ompi_comm_size(comm)) {
return MPI_ERR_UNSUPPORTED_OPERATION;
}

remote = (remote + 1) & 0x1;

err = ompi_coll_base_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER,
Expand Down Expand Up @@ -324,8 +333,10 @@ int ompi_coll_base_barrier_intra_basic_linear(struct ompi_communicator_t *comm,
int i, err, rank, size, line;
ompi_request_t** requests = NULL;

rank = ompi_comm_rank(comm);
size = ompi_comm_size(comm);
if( 1 == size )
return MPI_SUCCESS;
rank = ompi_comm_rank(comm);

/* All non-root send & receive zero-length message. */
if (rank > 0) {
Expand Down Expand Up @@ -367,11 +378,21 @@ int ompi_coll_base_barrier_intra_basic_linear(struct ompi_communicator_t *comm,
/* All done */
return MPI_SUCCESS;
err_hndl:
if( NULL != requests ) {
/* find a real error code */
if (MPI_ERR_IN_STATUS == err) {
for( i = 0; i < size; i++ ) {
if (MPI_REQUEST_NULL == requests[i]) continue;
if (MPI_ERR_PENDING == requests[i]->req_status.MPI_ERROR) continue;
err = requests[i]->req_status.MPI_ERROR;
break;
}
}
ompi_coll_base_free_reqs(requests, size);
}
OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
__FILE__, line, err, rank) );
(void)line; // silence compiler warning
if( NULL != requests )
ompi_coll_base_free_reqs(requests, size);
return err;
}
/* copied function (with appropriate renaming) ends here */
Expand All @@ -385,8 +406,10 @@ int ompi_coll_base_barrier_intra_tree(struct ompi_communicator_t *comm,
{
int rank, size, depth, err, jump, partner;

rank = ompi_comm_rank(comm);
size = ompi_comm_size(comm);
if( 1 == size )
return MPI_SUCCESS;
rank = ompi_comm_rank(comm);
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
"ompi_coll_base_barrier_intra_tree %d",
rank));
Expand Down
41 changes: 34 additions & 7 deletions ompi/mca/coll/base/coll_base_bcast.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2016 The University of Tennessee and The University
* Copyright (c) 2004-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
Expand Down Expand Up @@ -214,13 +214,29 @@ ompi_coll_base_bcast_intra_generic( void* buffer,
return (MPI_SUCCESS);

error_hndl:
OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
__FILE__, line, err, rank) );
(void)line; // silence compiler warnings
if (MPI_ERR_IN_STATUS == err) {
for( req_index = 0; req_index < 2; req_index++ ) {
if (MPI_REQUEST_NULL == recv_reqs[req_index]) continue;
if (MPI_ERR_PENDING == recv_reqs[req_index]->req_status.MPI_ERROR) continue;
err = recv_reqs[req_index]->req_status.MPI_ERROR;
break;
}
}
ompi_coll_base_free_reqs( recv_reqs, 2);
if( NULL != send_reqs ) {
if (MPI_ERR_IN_STATUS == err) {
for( req_index = 0; req_index < tree->tree_nextsize; req_index++ ) {
if (MPI_REQUEST_NULL == send_reqs[req_index]) continue;
if (MPI_ERR_PENDING == send_reqs[req_index]->req_status.MPI_ERROR) continue;
err = send_reqs[req_index]->req_status.MPI_ERROR;
break;
}
}
ompi_coll_base_free_reqs(send_reqs, tree->tree_nextsize);
}
OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
__FILE__, line, err, rank) );
(void)line; // silence compiler warnings

return err;
}
Expand Down Expand Up @@ -630,7 +646,9 @@ ompi_coll_base_bcast_intra_basic_linear(void *buff, int count,

/* Root sends data to all others. */
preq = reqs = ompi_coll_base_comm_get_reqs(module->base_data, size-1);
if( NULL == reqs ) { err = OMPI_ERR_OUT_OF_RESOURCE; goto err_hndl; }
if( NULL == reqs ) {
return OMPI_ERR_OUT_OF_RESOURCE;
}

for (i = 0; i < size; ++i) {
if (i == rank) {
Expand All @@ -649,12 +667,21 @@ ompi_coll_base_bcast_intra_basic_linear(void *buff, int count,
* care what the error was -- just that there *was* an error. The
* PML will finish all requests, even if one or more of them fail.
* i.e., by the end of this call, all the requests are free-able.
* So free them anyway -- even if there was an error, and return
* the error after we free everything. */
* So free them anyway -- even if there was an error.
* Note we still need to get the actual error, as collective
* operations cannot return MPI_ERR_IN_STATUS.
*/

err = ompi_request_wait_all(i, reqs, MPI_STATUSES_IGNORE);
err_hndl:
if( MPI_SUCCESS != err ) { /* Free the reqs */
/* first find the real error code */
for( preq = reqs; preq < reqs+i; preq++ ) {
if (MPI_REQUEST_NULL == *preq) continue;
if (MPI_ERR_PENDING == (*preq)->req_status.MPI_ERROR) continue;
err = (*preq)->req_status.MPI_ERROR;
break;
}
ompi_coll_base_free_reqs(reqs, i);
}

Expand Down
2 changes: 2 additions & 0 deletions ompi/mca/coll/base/coll_base_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
* Copyright (c) 2017 FUJITSU LIMITED. All rights reserved.
* Copyright (c) 2019 Mellanox Technologies. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -291,6 +292,7 @@ int ompi_coll_base_scan_intra_recursivedoubling(SCAN_ARGS);
/* Scatter */
int ompi_coll_base_scatter_intra_basic_linear(SCATTER_ARGS);
int ompi_coll_base_scatter_intra_binomial(SCATTER_ARGS);
int ompi_coll_base_scatter_intra_linear_nb(SCATTER_ARGS, int max_reqs);

/* ScatterV */

Expand Down
9 changes: 9 additions & 0 deletions ompi/mca/coll/base/coll_base_gather.c
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,15 @@ ompi_coll_base_gather_intra_linear_sync(const void *sbuf, int scount,
return MPI_SUCCESS;
error_hndl:
if (NULL != reqs) {
/* find a real error code */
if (MPI_ERR_IN_STATUS == ret) {
for( i = 0; i < size; i++ ) {
if (MPI_REQUEST_NULL == reqs[i]) continue;
if (MPI_ERR_PENDING == reqs[i]->req_status.MPI_ERROR) continue;
ret = reqs[i]->req_status.MPI_ERROR;
break;
}
}
ompi_coll_base_free_reqs(reqs, size);
}
OPAL_OUTPUT (( ompi_coll_base_framework.framework_output,
Expand Down
Loading