Skip to content

Commit

Permalink
Address Github issue open-mpi#11532 by translating legacy parameters …
Browse files Browse the repository at this point in the history
…for direct launches

Borrow code from the OMPI schizo module in PRRTE that translates legacy
MCA parameters when an application is direct launched (PRRTE will translate
legacy parameters when natively launched).

Signed-off-by: Quincey Koziol <qkoziol@amazon.com>
  • Loading branch information
qkoziol committed Aug 14, 2023
1 parent 8514e71 commit b95d112
Show file tree
Hide file tree
Showing 2 changed files with 298 additions and 5 deletions.
7 changes: 3 additions & 4 deletions config/ompi_setup_prrte.m4
Original file line number Diff line number Diff line change
Expand Up @@ -242,11 +242,12 @@ dnl Try to find an external prrte with sufficient version.
AC_DEFUN([_OMPI_SETUP_PRRTE_EXTERNAL], [
OPAL_VAR_SCOPE_PUSH([ompi_prte_min_version ompi_prte_min_num_version setup_prrte_external_happy opal_prrte_CPPFLAGS_save])
opal_prrte_CPPFLAGS_save=$CPPFLAGS
AS_IF([test -n "${with_prrte}" -a "${with_prrte}" != "yes" -a "${with_prrte}" != "no"],
[OPAL_FLAGS_APPEND_UNIQ([CPPFLAGS], ["-I${with_prrte}/include"])])
AC_CHECK_HEADER([prte_framework_names.h], [setup_prrte_external_happy=yes],
[setup_prrte_external_happy=no])
AC_CHECK_HEADER([prte.h], [setup_prrte_external_happy=yes],
[setup_prrte_external_happy=no])
Expand All @@ -266,8 +267,6 @@ AC_DEFUN([_OMPI_SETUP_PRRTE_EXTERNAL], [
AS_IF([test "${ompi_setup_prrte_cv_version_happy}" = "no"],
[setup_prrte_external_happy="no"])])
CPPFLAGS="$opal_prrte_CPPFLAGS_save"
# If an external build and the user told us where to find PRRTE,
# find prterun and save that path.
prterun_path=
Expand Down
296 changes: 295 additions & 1 deletion opal/mca/pmix/base/pmix_base_fns.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,19 @@

#include "opal/class/opal_pointer_array.h"
#include "opal/util/argv.h"
#include "opal/util/opal_environ.h"
#include "opal/util/os_path.h"
#include "opal/util/output.h"
#include "opal/util/printf.h"
#include "opal/util/proc.h"
#include "opal_stdint.h"

#include "opal/mca/base/mca_base_vari.h"
#include "opal/mca/pmix/base/base.h"

#include "pmix_framework_names.h"
#include "prte_framework_names.h"

int opal_pmix_base_exchange(pmix_info_t *indat, pmix_pdata_t *outdat, int timeout)
{
pmix_status_t rc;
Expand Down Expand Up @@ -73,6 +80,288 @@ int opal_pmix_base_exchange(pmix_info_t *indat, pmix_pdata_t *outdat, int timeou
return opal_pmix_convert_status(rc);
}

static char **prte_frameworks_tocheck = NULL;
static const char *prte_framework_names = PRTE_FRAMEWORK_NAMES;
static bool prte_frameworks_setup = false;

static void setup_prte_frameworks(void)
{
if (prte_frameworks_setup) {
return;
}
prte_frameworks_setup = true;

// Split framework names into an argv-style array.
prte_frameworks_tocheck = opal_argv_split(prte_framework_names, ',');
}

static bool check_prte_param(char *param)
{
char *p;
size_t n;
int len;

setup_prte_frameworks();

p = strchr(param, '_');
len = (int)(p - param);

if (0 == strncmp(param, "prte", len)) {
return true;
}
for (n=0; NULL != prte_frameworks_tocheck[n]; n++) {
if (0 == strncmp(param, prte_frameworks_tocheck[n], len)) {
return true;
}
}
return false;
}

static char **pmix_frameworks_tocheck = NULL;
static const char *pmix_framework_names = PMIX_FRAMEWORK_NAMES;
static bool pmix_frameworks_setup = false;

static void setup_pmix_frameworks(void)
{
if (pmix_frameworks_setup) {
return;
}
pmix_frameworks_setup = true;

const char *pmix_frameworks = getenv("PMIX_MCA_PREFIXES");
if (NULL == pmix_frameworks)
pmix_frameworks = pmix_framework_names;

// Split framework names into an argv-style array.
pmix_frameworks_tocheck = opal_argv_split(pmix_frameworks, ',');
}

static bool check_pmix_param(char *param)
{
char *p;
size_t n;
int len;

setup_pmix_frameworks();

p = strchr(param, '_');
len = (int)(p - param);

if (0 == strncmp(param, "pmix", len)) {
return true;
}
for (n=0; NULL != pmix_frameworks_tocheck[n]; n++) {
if (0 == strncmp(param, pmix_frameworks_tocheck[n], len)) {
return true;
}
}
return false;
}

static bool check_prte_overlap(char *var, char *value)
{
char *tmp;

if (0 == strncmp(var, "dl_", 3)) {
opal_asprintf(&tmp, "PRTE_MCA_prtedl_%s", &var[3]);
// set it, but don't overwrite if they already
// have a value in our environment
setenv(tmp, value, false);
free(tmp);
return true;
} else if (0 == strncmp(var, "oob_", 4)) {
opal_asprintf(&tmp, "PRTE_MCA_%s", var);
// set it, but don't overwrite if they already
// have a value in our environment
setenv(tmp, value, false);
free(tmp);
return true;
} else if (0 == strncmp(var, "hwloc_", 6)) {
opal_asprintf(&tmp, "PRTE_MCA_%s", var);
// set it, but don't overwrite if they already
// have a value in our environment
setenv(tmp, value, false);
free(tmp);
return true;
} else if (0 == strncmp(var, "if_", 3)) {
// need to convert if to prteif
opal_asprintf(&tmp, "PRTE_MCA_prteif_%s", &var[3]);
// set it, but don't overwrite if they already
// have a value in our environment
setenv(tmp, value, false);
free(tmp);
return true;
} else if (0 == strncmp(var, "reachable_", strlen("reachable_"))) {
// need to convert reachable to prtereachable
opal_asprintf(&tmp, "PRTE_MCA_prtereachable_%s", &var[strlen("reachable")]);
// set it, but don't overwrite if they already
// have a value in our environment
setenv(tmp, value, false);
free(tmp);
return true;
}
return false;
}


static bool check_pmix_overlap(char *var, char *value)
{
char *tmp;

if (0 == strncmp(var, "dl_", 3)) {
opal_asprintf(&tmp, "PMIX_MCA_pdl_%s", &var[3]);
// set it, but don't overwrite if they already
// have a value in our environment
setenv(tmp, value, false);
free(tmp);
return true;
} else if (0 == strncmp(var, "oob_", 4)) {
opal_asprintf(&tmp, "PMIX_MCA_ptl_%s", &var[4]);
// set it, but don't overwrite if they already
// have a value in our environment
setenv(tmp, value, false);
free(tmp);
return true;
} else if (0 == strncmp(var, "hwloc_", 6)) {
opal_asprintf(&tmp, "PMIX_MCA_%s", var);
// set it, but don't overwrite if they already
// have a value in our environment
setenv(tmp, value, false);
free(tmp);
return true;
} else if (0 == strncmp(var, "if_", 3)) {
// need to convert if to pif
opal_asprintf(&tmp, "PMIX_MCA_pif_%s", &var[3]);
// set it, but don't overwrite if they already
// have a value in our environment
setenv(tmp, value, false);
free(tmp);
return true;
}
return false;
}

// NOTE: This code is fundamentally the same (module PMIX <-> OPAL)
// as the translate_params() routine in the PRRTE repo's
// src/mca/schizo/ompi/schizo_ompi.c file. If there are
// changes here, there are likely to be changes there.
static void translate_params(void)
{
char *evar, *tmp, *e2;
char *file;
const char *home;
opal_list_t params;
mca_base_var_file_value_t *fv;
int n, len;

/* Since we are direct launched, we need to check the OMPI default
* MCA params to see if there is something relating to PRRTE
* in them - this would be "old" references to things from
* ORTE, as well as a few OPAL references that also impact us
*
* NOTE: we do this in the following precedence order. Note
* that we do not overwrite at any step - this is so that we
* don't overwrite something previously set by the user. So
* the order to execution is the opposite of the intended
* precedence order.
*
* 1. check the environmental paramaters for OMPI_MCA values
* that need to be translated
*
* 2. the user's home directory file as it should
* overwrite the system default file, but not the
* envars
*
* 3. the system default parameter file
*/
len = strlen("OMPI_MCA_");
for (n=0; NULL != environ[n]; n++) {
if (0 == strncmp(environ[n], "OMPI_MCA_", len)) {
e2 = strdup(environ[n]);
evar = strrchr(e2, '=');
*evar = '\0';
++evar;
if (check_prte_overlap(&e2[len], evar)) {
// check for pmix overlap
check_pmix_overlap(&e2[len], evar);
} else if (check_prte_param(&e2[len])) {
opal_asprintf(&tmp, "PRTE_MCA_%s", &e2[len]);
// set it, but don't overwrite if they already
// have a value in our environment
setenv(tmp, evar, false);
free(tmp);
// check for pmix overlap
check_pmix_overlap(&e2[len], evar);
} else if (check_pmix_param(&e2[len])) {
opal_asprintf(&tmp, "PMIX_MCA_%s", &e2[len]);
// set it, but don't overwrite if they already
// have a value in our environment
setenv(tmp, evar, false);
free(tmp);
}
free(e2);
}
}

/* try to get user's home directory */
home = opal_home_directory();
if (NULL != home) {
file = opal_os_path(false, home, ".openmpi", "mca-params.conf", NULL);
OBJ_CONSTRUCT(&params, opal_list_t);
mca_base_parse_paramfile(file, &params);
free(file);
OPAL_LIST_FOREACH (fv, &params, mca_base_var_file_value_t) {
// see if this param relates to PRRTE
if (check_prte_overlap(fv->mbvfv_var, fv->mbvfv_value)) {
check_pmix_overlap(fv->mbvfv_var, fv->mbvfv_value);
} else if (check_prte_param(fv->mbvfv_var)) {
opal_asprintf(&tmp, "PRTE_MCA_%s", fv->mbvfv_var);
// set it, but don't overwrite if they already
// have a value in our environment
setenv(tmp, fv->mbvfv_value, false);
free(tmp);
// if this relates to the DL, OOB, HWLOC, IF, or
// REACHABLE frameworks, then we also need to set
// the equivalent PMIx value
check_pmix_overlap(fv->mbvfv_var, fv->mbvfv_value);
} else if (check_pmix_param(fv->mbvfv_var)) {
opal_asprintf(&tmp, "PMIX_MCA_%s", fv->mbvfv_var);
// set it, but don't overwrite if they already
// have a value in our environment
setenv(tmp, fv->mbvfv_value, false);
free(tmp);
}
}
OPAL_LIST_DESTRUCT(&params);
}

/* check if the user has set OMPIHOME in their environment */
if (NULL != (evar = getenv("OMPIHOME"))) {
/* look for the default MCA param file */
file = opal_os_path(false, evar, "etc", "openmpi-mca-params.conf", NULL);
OBJ_CONSTRUCT(&params, opal_list_t);
mca_base_parse_paramfile(file, &params);
free(file);
OPAL_LIST_FOREACH (fv, &params, mca_base_var_file_value_t) {
// see if this param relates to PRRTE
if (check_prte_overlap(fv->mbvfv_var, fv->mbvfv_value)) {
check_pmix_overlap(fv->mbvfv_var, fv->mbvfv_value);
} else if (check_prte_param(fv->mbvfv_var)) {
opal_asprintf(&tmp, "PRTE_MCA_%s", fv->mbvfv_var);
// set it, but don't overwrite if they already
// have a value in our environment
setenv(tmp, fv->mbvfv_value, false);
free(tmp);
// if this relates to the DL, OOB, HWLOC, IF, or
// REACHABLE frameworks, then we also need to set
// the equivalent PMIx value
check_pmix_overlap(fv->mbvfv_var, fv->mbvfv_value);
}
}
OPAL_LIST_DESTRUCT(&params);
}
}

typedef struct {
opal_list_item_t super;
pmix_nspace_t nspace;
Expand All @@ -85,8 +374,13 @@ static opal_list_t localnspaces;
void opal_pmix_setup_nspace_tracker(void)
{
/* check if we were launched by PRRTE */
if (NULL != getenv("PRRTE_LAUNCHED")) {
if (NULL != getenv("PRTE_LAUNCHED")) {
opal_process_info.nativelaunch = true;
} else {
// When direct launched, translate MCA parameters from older releases
// into newer versions here, since PRRTE isn't involved. (When
// natively launched, PRRTE will already have translated the params)
translate_params();
}

OBJ_CONSTRUCT(&localnspaces, opal_list_t);
Expand Down

0 comments on commit b95d112

Please sign in to comment.