Skip to content

Pass oversubscribe status to MPI layer #8998

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion 3rd-party/prrte
Submodule prrte updated 52 files
+1 −0 .gitignore
+1 −0 contrib/dist/make_dist_tarball
+43 −22 examples/dynamic.c
+1 −1 examples/fault.c
+1 −1 examples/launcher.c
+1 −1 src/mca/base/prte_mca_base_var.c
+5 −4 src/mca/errmgr/detector/errmgr_detector.c
+2 −9 src/mca/errmgr/dvm/errmgr_dvm.c
+4 −3 src/mca/ess/base/ess_base_std_prted.c
+3 −4 src/mca/grpcomm/base/grpcomm_base_stubs.c
+18 −6 src/mca/grpcomm/bmg/grpcomm_bmg_module.c
+2 −2 src/mca/iof/base/base.h
+1 −8 src/mca/iof/base/iof_base_frame.c
+84 −40 src/mca/iof/base/iof_base_output.c
+14 −13 src/mca/iof/base/iof_base_setup.c
+1 −0 src/mca/iof/base/iof_base_setup.h
+8 −2 src/mca/iof/hnp/iof_hnp.c
+4 −5 src/mca/iof/hnp/iof_hnp_read.c
+8 −2 src/mca/iof/prted/iof_prted.c
+1 −1 src/mca/odls/alps/odls_alps_module.c
+6 −0 src/mca/odls/base/odls_base_default_fns.c
+1 −1 src/mca/odls/default/odls_default_module.c
+11 −13 src/mca/plm/base/plm_base_launch_support.c
+68 −93 src/mca/plm/plm_types.h
+11 −20 src/mca/propagate/prperror/propagate_prperror.c
+12 −4 src/mca/ras/base/ras_base_allocate.c
+2 −3 src/mca/ras/base/ras_base_node.c
+27 −3 src/mca/rmaps/base/help-prte-rmaps-base.txt
+2 −1 src/mca/rmaps/base/rmaps_base_map_job.c
+1 −0 src/mca/schizo/base/base.h
+207 −0 src/mca/schizo/base/schizo_base_frame.c
+8 −10 src/mca/schizo/ompi/schizo_ompi.c
+7 −113 src/mca/schizo/prte/schizo_prte.c
+32 −16 src/mca/state/dvm/state_dvm.c
+84 −27 src/pmix/pmix.c
+9 −0 src/prted/pmix/pmix_server_dyn.c
+1 −1 src/prted/pmix/pmix_server_gen.c
+6 −0 src/prted/pmix/pmix_server_register_fns.c
+2 −2 src/prted/prted_comm.c
+9 −7 src/runtime/prte_mca_params.c
+95 −59 src/tools/prte/prte.c
+96 −57 src/tools/prun/prun.c
+2 −0 src/util/attr.c
+1 −0 src/util/attr.h
+6 −2 src/util/dash_host/dash_host.c
+26 −11 src/util/hostfile/hostfile.c
+12 −6 src/util/proc_info.c
+2 −1 test/Makefile
+10 −0 test/hello.c
+1 −1 test/loop_spawn.c
+87 −0 test/reinit.c
+40 −30 test/simple_spawn.c
5 changes: 5 additions & 0 deletions ompi/runtime/ompi_mpi_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,11 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
OMPI_TIMING_IMPORT_OPAL("rte_init");

ompi_rte_initialized = true;
/* if we are oversubscribed, then set yield_when_idle
* accordingly */
if (ompi_mpi_oversubscribed) {
ompi_mpi_yield_when_idle = true;
}

/* Register the default errhandler callback */
/* we want to go first */
Expand Down
16 changes: 2 additions & 14 deletions ompi/runtime/ompi_mpi_params.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
* and Technology (RIST). All rights reserved.
* Copyright (c) 2021 Triad National Security, LLC. All rights
* reserved.
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -91,7 +92,7 @@ static bool show_default_mca_params = false;
static bool show_file_mca_params = false;
static bool show_enviro_mca_params = false;
static bool show_override_mca_params = false;
static bool ompi_mpi_oversubscribe = false;
bool ompi_mpi_oversubscribed = false;

#if OPAL_ENABLE_FT_MPI
int ompi_ftmpi_output_handle = 0;
Expand Down Expand Up @@ -147,20 +148,7 @@ int ompi_mpi_register_params(void)
ompi_mpi_param_check = false;
}

/*
* opal_progress: decide whether to yield and the event library
* tick rate
*/
ompi_mpi_oversubscribe = false;
(void) mca_base_var_register("ompi", "mpi", NULL, "oversubscribe",
"Internal MCA parameter set by the runtime environment when oversubscribing nodes",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_mpi_oversubscribe);

/* yield if the node is oversubscribed and allow users to override */
ompi_mpi_yield_when_idle |= ompi_mpi_oversubscribe;
(void) mca_base_var_register("ompi", "mpi", NULL, "yield_when_idle",
"Yield the processor when waiting for MPI communication (for MPI processes, will default to 1 when oversubscribing nodes)",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
Expand Down
10 changes: 10 additions & 0 deletions ompi/runtime/ompi_rte.c
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,16 @@ int ompi_rte_init(int *pargc, char ***pargv)
}
}

#ifdef PMIX_NODE_OVERSUBSCRIBED
pname.jobid = opal_process_info.my_name.jobid;
pname.vpid = OPAL_VPID_WILDCARD;
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, PMIX_NODE_OVERSUBSCRIBED, &pname,
NULL, PMIX_BOOL);
if (PMIX_SUCCESS == ret) {
ompi_mpi_oversubscribed = true;
}
#endif

return OPAL_SUCCESS;

error:
Expand Down
7 changes: 7 additions & 0 deletions ompi/runtime/params.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
* Copyright (c) 2013 Intel, Inc. All rights reserved
* Copyright (c) 2021 Triad National Security, LLC. All rights
* reserved.
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -198,6 +199,12 @@ OMPI_DECLSPEC int ompi_mpi_register_params(void);
*/
int ompi_show_all_mca_params(int32_t, int, char *);

/**
* Set by checking PMIx to see if we are running in an oversubscribed
* environment or not.
*/
OMPI_DECLSPEC extern bool ompi_mpi_oversubscribed;

END_C_DECLS

#endif /* OMPI_RUNTIME_PARAMS_H */
2 changes: 1 addition & 1 deletion opal/mca/pmix/pmix-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ typedef struct {
(r) = PMIX_ERR_NOT_FOUND; \
} else if (_kv->type != (t)) { \
(r) = PMIX_ERR_TYPE_MISMATCH; \
} else if (PMIX_SUCCESS == (r)) { \
} else if (PMIX_SUCCESS == (r) && NULL != (d)) { \
PMIX_VALUE_UNLOAD((r), _kv, (void **) (d), &_sz); \
} \
if (NULL != _kv) { \
Expand Down