Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix second pass on map-by-obj #1463

Merged
merged 1 commit into from
Aug 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/mca/rmaps/base/help-prte-rmaps-base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ slots that were requested by the application:

%s

Either request fewer slots for your application, or make more slots
Either request fewer procs for your application, or make more slots
available for use.

A "slot" is the PRRTE term for an allocatable unit where we can
Expand Down
53 changes: 36 additions & 17 deletions src/mca/rmaps/round_robin/rmaps_rr_mappers.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,14 @@ int prte_rmaps_rr_byslot(prte_job_t *jdata,
}
}

if (!options->oversubscribe) {
/* since oversubscribe is not allowed, cap our usage
* at the number of available slots. */
if (node->slots_available < options->nprocs) {
options->nprocs = node->slots_available;
}
}

/* if the number of procs is greater than the number of CPUs
* on this node, but less or equal to the number of slots,
* then we are not oversubscribed but we are overloaded. If
Expand Down Expand Up @@ -240,6 +248,14 @@ int prte_rmaps_rr_bynode(prte_job_t *jdata,
{
prte_rmaps_base_get_cpuset(jdata, node, options);

if (!options->oversubscribe) {
/* since oversubscribe is not allowed, cap our usage
* at the number of available slots. */
if (node->slots_available < options->nprocs) {
options->nprocs = node->slots_available;
}
}

/* if the number of procs is greater than the number of CPUs
* on this node, but less or equal to the number of slots,
* then we are not oversubscribed but we are overloaded. If
Expand All @@ -259,19 +275,6 @@ int prte_rmaps_rr_bynode(prte_job_t *jdata,
continue;
}

/* if oversubscribe is specified, then just ignore the
* number of slots on each node and assign this number.
* Note that oversubscribe automatically dictates that
* we do not bind, so binding can also be ignored */

if (!options->oversubscribe) {
/* since oversubscribe is not allowed , cap our usage
* at the number of available slots */
if (node->slots_available < options->nprocs) {
options->nprocs = node->slots_available;
}
}

PRTE_OUTPUT_VERBOSE((10, prte_rmaps_base_framework.framework_output,
"%s NODE %s ASSIGNING %d PROCS",
PRTE_NAME_PRINT(PRTE_PROC_MY_NAME),
Expand Down Expand Up @@ -374,6 +377,14 @@ int prte_rmaps_rr_bycpu(prte_job_t *jdata, prte_app_context_t *app,
}
}

if (!options->oversubscribe) {
/* oversubscribe is not allowed, so cap our usage
* at the number of available slots. */
if (node->slots_available < options->nprocs) {
options->nprocs = node->slots_available;
}
}

/* if the number of procs is greater than the number of CPUs
* on this node, but less or equal to the number of slots,
* then we are not oversubscribed but we are overloaded. If
Expand Down Expand Up @@ -576,8 +587,14 @@ int prte_rmaps_rr_byobj(prte_job_t *jdata, prte_app_context_t *app,
}
}
}
prte_output_verbose(2, prte_rmaps_base_framework.framework_output,
"mca:rmaps:rr: assigning nprocs %d", nprocs);

if (!options->oversubscribe) {
/* since oversubscribe is not allowed, cap our usage
* at the number of available slots. */
if (node->slots_available < nprocs) {
nprocs = node->slots_available;
}
}

/* if the number of procs is greater than the number of CPUs
* on this node, but less or equal to the number of slots,
Expand All @@ -592,6 +609,9 @@ int prte_rmaps_rr_byobj(prte_job_t *jdata, prte_app_context_t *app,
jdata->map->binding = PRTE_BIND_TO_NONE;
}

prte_output_verbose(2, prte_rmaps_base_framework.framework_output,
"mca:rmaps:rr: assigning nprocs %d", nprocs);

nodefull = false;
if (span) {
/* if we are mapping spanned, then we loop over
Expand Down Expand Up @@ -700,8 +720,7 @@ int prte_rmaps_rr_byobj(prte_job_t *jdata, prte_app_context_t *app,
* handling the oversubscription. Figure out how many procs
* to add to each of them.
*/
balance = (float) ((int) app->num_procs - nprocs_mapped)
/ (float) total_nobjs;
balance = (float) ((int) app->num_procs - nprocs_mapped) / (float) options->total_nobjs;
extra_procs_to_assign = (int) balance;
if (0 < (balance - (float) extra_procs_to_assign)) {
/* compute how many nodes need an extra proc */
Expand Down