diff --git a/ompi/mca/coll/libnbc/coll_libnbc.h b/ompi/mca/coll/libnbc/coll_libnbc.h index 66f0f75e66d..724b86d678a 100644 --- a/ompi/mca/coll/libnbc/coll_libnbc.h +++ b/ompi/mca/coll/libnbc/coll_libnbc.h @@ -15,6 +15,7 @@ * reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -67,6 +68,8 @@ BEGIN_C_DECLS /* number of implemented collective functions */ #define NBC_NUM_COLL 17 +extern bool libnbc_ibcast_skip_dt_decision; + struct ompi_coll_libnbc_component_t { mca_coll_base_component_2_0_0_t super; opal_free_list_t requests; diff --git a/ompi/mca/coll/libnbc/coll_libnbc_component.c b/ompi/mca/coll/libnbc/coll_libnbc_component.c index e4602d68a5d..8670181bb71 100644 --- a/ompi/mca/coll/libnbc/coll_libnbc_component.c +++ b/ompi/mca/coll/libnbc/coll_libnbc_component.c @@ -39,6 +39,7 @@ const char *mca_coll_libnbc_component_version_string = static int libnbc_priority = 10; +bool libnbc_ibcast_skip_dt_decision = true; static int libnbc_open(void); @@ -131,6 +132,27 @@ libnbc_register(void) MCA_BASE_VAR_SCOPE_READONLY, &libnbc_priority); + /* ibcast decision function can make the wrong decision if a legal + * non-uniform data type signature is used. This has resulted in the + * collective operation failing, and possibly producing wrong answers. + * We are investigating a fix for this problem, but it is taking a while. + * https://github.com/open-mpi/ompi/issues/2256 + * https://github.com/open-mpi/ompi/issues/1763 + * As a result we are adding an MCA parameter to make a conservative + * decision to avoid this issue. If the user knows that their application + * does not use data types in this way, then they can set this parameter + * to get the old behavior. Once the issue is truely fixed, then this + * parameter can be removed. + */ + libnbc_ibcast_skip_dt_decision = true; + (void) mca_base_component_var_register(&mca_coll_libnbc_component.super.collm_version, + "ibcast_skip_dt_decision", + "In ibcast only use size of communicator to choose algorithm, exclude data type signature. Set to 'false' to use data type signature in decision. WARNING: If you set this to 'false' then your application should not use non-uniform data type signatures in calls to ibcast.", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &libnbc_ibcast_skip_dt_decision); + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ibcast.c b/ompi/mca/coll/libnbc/nbc_ibcast.c index 5f0470a1337..840e6cdce9b 100644 --- a/ompi/mca/coll/libnbc/nbc_ibcast.c +++ b/ompi/mca/coll/libnbc/nbc_ibcast.c @@ -9,6 +9,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. * * Author(s): Torsten Hoefler * @@ -65,16 +66,26 @@ int ompi_coll_libnbc_ibcast(void *buffer, int count, MPI_Datatype datatype, int segsize = 16384; /* algorithm selection */ - if (p <= 4) { - alg = NBC_BCAST_LINEAR; - } else if (size * count < 65536) { - alg = NBC_BCAST_BINOMIAL; - } else if (size * count < 524288) { - alg = NBC_BCAST_CHAIN; - segsize = 8192; - } else { - alg = NBC_BCAST_CHAIN; - segsize = 32768; + if( libnbc_ibcast_skip_dt_decision ) { + if (p <= 4) { + alg = NBC_BCAST_LINEAR; + } + else { + alg = NBC_BCAST_BINOMIAL; + } + } + else { + if (p <= 4) { + alg = NBC_BCAST_LINEAR; + } else if (size * count < 65536) { + alg = NBC_BCAST_BINOMIAL; + } else if (size * count < 524288) { + alg = NBC_BCAST_CHAIN; + segsize = 8192; + } else { + alg = NBC_BCAST_CHAIN; + segsize = 32768; + } } #ifdef NBC_CACHE_SCHEDULE