Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/docs/prrte-rst-content/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ dist_rst_DATA = \
cli-no-app-prefix.rst \
cli-rank-by.rst \
cli-runtime-options.rst \
cli-set-env.rst \
cli-stream-buffering.rst \
cli-tune.rst \
cli-unset-env.rst \
Expand Down
8 changes: 6 additions & 2 deletions src/docs/prrte-rst-content/cli-display.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.. -*- rst -*-

Copyright (c) 2022-2023 Nanook Consulting. All rights reserved.
Copyright (c) 2022-2025 Nanook Consulting All rights reserved.
Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved.

$COPYRIGHT$
Expand Down Expand Up @@ -50,4 +50,8 @@ colon (``:``) and any combination of one or more of the following
is easily parsed by machines. Note that ``PARSABLE`` is also accepted as
a typical spelling for the qualifier.

Provided qualifiers will apply to *all* of the display directives.
* ``PHYSICAL`` directs that the output of the ``BINDINGS`` option be displayed
using physical (instead of logical) CPU IDs.

Provided qualifiers will apply to *all* of the display directives unless
noted.
16 changes: 16 additions & 0 deletions src/docs/prrte-rst-content/cli-set-env.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
.. -*- rst -*-

Copyright (c) 2022-2025 Nanook Consulting All rights reserved.
Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved.

$COPYRIGHT$

Additional copyrights may follow

$HEADER$

.. The following line is included so that Sphinx won't complain
about this file not being directly included in some toctree

Set the named environmental variable to the specified value. This will overwrite the
existing value, if it exists. Equivalent to the "-x foo=val" option
14 changes: 14 additions & 0 deletions src/hwloc/help-prte-hwloc-base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,17 @@ The specified binding lies above the mapping object type:
Binding level: %s

Please correct the map/bind directives and try again.
#
[pu-not-found]
Construction of the binding output string failed due to inabilty
to obtain a processor unit object:

PU number: %u

There will be no impact to your application, so we will continue
but will not be able to output the binding locations.
#
[too-many-sites]
At least one process in your application is bound to too many sites
for us to report in a string. There will be no impact to your application
so we will continue but will not be able to output the binding locations.
3 changes: 2 additions & 1 deletion src/hwloc/hwloc-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
*
* Copyright (c) 2021-2024 Nanook Consulting All rights reserved.
* Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
* $COPYRIGHT$
*
Expand Down Expand Up @@ -332,6 +332,7 @@ PRTE_EXPORT int prte_hwloc_base_memory_set(prte_hwloc_base_memory_segment_t *seg
*/
PRTE_EXPORT char *prte_hwloc_base_cset2str(hwloc_const_cpuset_t cpuset,
bool use_hwthread_cpus,
bool physical,
hwloc_topology_t topo);

PRTE_EXPORT void prte_hwloc_get_binding_info(hwloc_const_cpuset_t cpuset,
Expand Down
191 changes: 179 additions & 12 deletions src/hwloc/hwloc_base_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -1352,12 +1352,180 @@ void prte_hwloc_get_binding_info(hwloc_const_cpuset_t cpuset,
}
}

static int compare_unsigned(const void *a, const void *b)
{
return (*(unsigned *)a - *(unsigned *)b);
}

/* generate a logical string output of a hwloc_cpuset_t */
static bool build_map(char *answer, size_t size,
hwloc_const_cpuset_t bitmap,
bool use_hwthread_cpus,
bool physical, bool bits_as_cores,
hwloc_topology_t topo)
{
unsigned indices[2048], id;
int nsites = 0, n, start, end, idx;
hwloc_obj_t pu;
char tmp[128], *prefix;
bool inrange, first, unique;
unsigned val;

if (bits_as_cores || !use_hwthread_cpus) {
if (physical) {
prefix = "core:P";
} else {
prefix = "core:L";
}
} else {
if (physical) {
prefix = "hwt:P";
} else {
prefix = "hwt:L";
}
}

for (id = hwloc_bitmap_first(bitmap);
id != (unsigned)-1;
id = hwloc_bitmap_next(bitmap, id)) {
// id is the physical ID for the given PU
if (bits_as_cores) {
pu = hwloc_get_obj_by_type(topo, HWLOC_OBJ_CORE, id);
} else if (!use_hwthread_cpus) {
// the id's are for threads, but we want cores
pu = hwloc_get_pu_obj_by_os_index(topo, id);
// go upward to find the core that contains this pu
while (NULL != pu && pu->type != HWLOC_OBJ_CORE) {
pu = pu->parent;
}
if (NULL == pu) {
return false;
}
} else {
pu = hwloc_get_pu_obj_by_os_index(topo, id);
}
if (NULL == pu) {
pmix_show_help("help-prte-hwloc-base.txt", "pu-not-found", true, id);
return false;
}
if (physical) {
// record the physical site
val = pu->os_index;
} else {
// record the logical site
val = pu->logical_index;
}
// add it uniquely to the array of indices - it could be a duplicate
// if we are looking for cores
unique = true;
for (n=0; n < nsites; n++) {
if (indices[n] == val) {
unique = false;
break;
}
}
if (unique) {
indices[nsites] = val;
++nsites;
if (2048 == nsites) {
pmix_show_help("help-prte-hwloc-base.txt", "too-many-sites", true);
return false;
}
}

}

/* this should never happen as it would mean that the bitmap was
* empty, which is something we checked before calling this function */
if (0 == nsites) {
return false;
}

if (1 == nsites) {
// only bound to one location - most common case
snprintf(answer, size, "%s%u", prefix, indices[0]);
return true;
}

// sort them
qsort(indices, nsites, sizeof(unsigned), compare_unsigned);

// parse through and look for ranges
start = indices[0];
end = indices[0];
inrange = false;
first = true;
// prep the answer
snprintf(answer, size, "%s", prefix);
idx = strlen(prefix);

for (n=1; n < nsites; n++) {
// see if we are in a range
if (1 == (indices[n]-end)) {
inrange = true;
end = indices[n];
continue;
}
// we are not in a range, or we are
// at the end of a range
if (inrange) {
// we are at the end of the range
if (start == end) {
if (first) {
snprintf(tmp, 128, "%u", start);
first = false;
} else {
snprintf(tmp, 128, ",%u", start);
}
memcpy(&answer[idx], tmp, strlen(tmp));
idx += strlen(tmp);
} else {
if (first) {
snprintf(tmp, 128, "%u-%u", start, end);
first = false;
} else {
snprintf(tmp, 128, ",%u-%u", start, end);
}
memcpy(&answer[idx], tmp, strlen(tmp));
idx += strlen(tmp);
}
// mark the end of the range
inrange = false;
start = indices[n];
end = indices[n];
} else {
inrange = true;
end = indices[n];
}
}
// see if we have a dangling entry
if (start == end) {
if (first) {
snprintf(tmp, 128, "%u", start);
} else {
snprintf(tmp, 128, ",%u", start);
}
memcpy(&answer[idx], tmp, strlen(tmp));
snprintf(tmp, 128, "%u", start);
} else {
if (first) {
snprintf(tmp, 128, "%u-%u", start, end);
first = false;
} else {
snprintf(tmp, 128, ",%u-%u", start, end);
}
memcpy(&answer[idx], tmp, strlen(tmp));
idx += strlen(tmp);
}
return true;
}

/*
* Make a prettyprint string for a hwloc_cpuset_t
*/
char *prte_hwloc_base_cset2str(hwloc_const_cpuset_t cpuset,
bool use_hwthread_cpus,
bool physical,
hwloc_topology_t topo)
{
int n, npkgs, npus, ncores;
Expand All @@ -1366,6 +1534,7 @@ char *prte_hwloc_base_cset2str(hwloc_const_cpuset_t cpuset,
char **output = NULL, *result;
hwloc_obj_t pkg;
bool bits_as_cores = false;
bool complete;

/* if the cpuset is all zero, then something is wrong */
if (hwloc_bitmap_iszero(cpuset)) {
Expand Down Expand Up @@ -1403,19 +1572,17 @@ char *prte_hwloc_base_cset2str(hwloc_const_cpuset_t cpuset,
if (hwloc_bitmap_iszero(avail)) {
continue;
}
if (bits_as_cores) {
/* can just use the hwloc fn directly */
hwloc_bitmap_list_snprintf(tmp, 2048, avail);
snprintf(ans, 4096, "package[%d][core:%s]", n, tmp);
} else if (use_hwthread_cpus) {
/* can just use the hwloc fn directly */
hwloc_bitmap_list_snprintf(tmp, 2048, avail);
snprintf(ans, 4096, "package[%d][hwt:%s]", n, tmp);
// build the map for this cpuset
complete = build_map(tmp, 2048, avail, use_hwthread_cpus,
physical, bits_as_cores, topo);
if (complete) {
if (physical) {
snprintf(ans, 4096, "package[%d][%s]", n, tmp);
} else {
snprintf(ans, 4096, "package[%d][%s]", n, tmp);
}
} else {
prte_hwloc_build_map(topo, avail, use_hwthread_cpus | bits_as_cores, coreset);
/* now print out the string */
hwloc_bitmap_list_snprintf(tmp, 2048, coreset);
snprintf(ans, 4096, "package[%d][core:%s]", n, tmp);
snprintf(ans, 4096, "package[%d][N/A]", n);
}
PMIX_ARGV_APPEND_NOSIZE_COMPAT(&output, ans);
}
Expand Down
4 changes: 3 additions & 1 deletion src/mca/odls/base/odls_base_bind.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ static void report_binding(prte_job_t *jobdat, int rank)
char *tmp1;
hwloc_cpuset_t mycpus;
bool use_hwthread_cpus;
bool physical;

/* check for type of cpu being used */
if (prte_get_attribute(&jobdat->attributes, PRTE_JOB_HWT_CPUS, NULL, PMIX_BOOL)) {
Expand All @@ -76,7 +77,8 @@ static void report_binding(prte_job_t *jobdat, int rank)
if (hwloc_get_cpubind(prte_hwloc_topology, mycpus, HWLOC_CPUBIND_PROCESS) < 0) {
pmix_output(0, "Rank %d is not bound", rank);
} else {
tmp1 = prte_hwloc_base_cset2str(mycpus, use_hwthread_cpus, prte_hwloc_topology);
physical = prte_get_attribute(&jobdat->attributes, PRTE_JOB_REPORT_PHYSICAL_CPUS, NULL, PMIX_BOOL);
tmp1 = prte_hwloc_base_cset2str(mycpus, use_hwthread_cpus, physical, prte_hwloc_topology);
pmix_output(0, "Rank %d bound to %s", rank, tmp1);
free(tmp1);
}
Expand Down
5 changes: 4 additions & 1 deletion src/mca/rmaps/base/rmaps_base_binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,10 @@ static int bind_generic(prte_job_t *jdata, prte_proc_t *proc,
hwloc_bitmap_list_asprintf(&proc->cpuset, tgtcpus); // bind to the entire target object
if (4 < pmix_output_get_verbosity(prte_rmaps_base_framework.framework_output)) {
char *tmp1;
tmp1 = prte_hwloc_base_cset2str(trg_obj->cpuset, options->use_hwthreads, node->topology->topo);
bool physical;
physical = prte_get_attribute(&jdata->attributes, PRTE_JOB_REPORT_PHYSICAL_CPUS, NULL, PMIX_BOOL);
tmp1 = prte_hwloc_base_cset2str(trg_obj->cpuset, options->use_hwthreads,
physical, node->topology->topo);
pmix_output(prte_rmaps_base_framework.framework_output, "%s BOUND PROC %s[%s] TO %s",
PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&proc->name),
node->name, tmp1);
Expand Down
14 changes: 10 additions & 4 deletions src/mca/rmaps/base/rmaps_base_map_job.c
Original file line number Diff line number Diff line change
Expand Up @@ -1028,7 +1028,10 @@ void prte_rmaps_base_report_bindings(prte_job_t *jdata,
char **cache = NULL;
char *out, *tmp;
pmix_proc_t source;
bool physical;

// see if we are to report physical (vs logical) cpu IDs
physical = prte_get_attribute(&jdata->attributes, PRTE_JOB_REPORT_PHYSICAL_CPUS, NULL, PMIX_BOOL);
for (n=0; n < jdata->procs->size; n++) {
proc = (prte_proc_t*)pmix_pointer_array_get_item(jdata->procs, n);
if (NULL == proc) {
Expand All @@ -1041,6 +1044,7 @@ void prte_rmaps_base_report_bindings(prte_job_t *jdata,
hwloc_bitmap_list_sscanf(prte_rmaps_base.available, proc->cpuset);
tmp = prte_hwloc_base_cset2str(prte_rmaps_base.available,
options->use_hwthreads,
physical,
proc->node->topology->topo);
pmix_asprintf(&out, "Proc %s Node %s bound to %s",
PRTE_NAME_PRINT(&proc->name),
Expand All @@ -1050,12 +1054,14 @@ void prte_rmaps_base_report_bindings(prte_job_t *jdata,
PMIX_ARGV_APPEND_NOSIZE_COMPAT(&cache, out);
free(out);
}

if (NULL == cache) {
out = strdup("Error: job has no procs");
} else {
/* add a blank line with \n on it so IOF will output the last line */
PMIX_ARGV_APPEND_NOSIZE_COMPAT(&cache, "");
out = PMIX_ARGV_JOIN_COMPAT(cache, '\n');
PMIX_ARGV_FREE_COMPAT(cache);
}
PMIX_LOAD_PROCID(&source, jdata->nspace, PMIX_RANK_WILDCARD);
prte_iof_base_output(&source, PMIX_FWD_STDOUT_CHANNEL, out);
Expand Down Expand Up @@ -1366,8 +1372,8 @@ static void inherit_env_directives(prte_job_t *jdata,
}

// if it doesn't exist, then inherit it
prte_prepend_attribute(&jdata->attributes, attr->key, PRTE_ATTR_GLOBAL,
envar, PMIX_ENVAR);
prte_set_attribute(&jdata->attributes, attr->key, PRTE_ATTR_GLOBAL,
envar, PMIX_ENVAR);
}

/* There is no one-to-one correlation between the apps, but we can
Expand Down Expand Up @@ -1419,8 +1425,8 @@ static void inherit_env_directives(prte_job_t *jdata,
}

// if it doesn't exist, then inherit it
prte_prepend_attribute(&app2->attributes, attr->key, PRTE_ATTR_GLOBAL,
envar, PMIX_ENVAR);
prte_set_attribute(&app2->attributes, attr->key, PRTE_ATTR_GLOBAL,
envar, PMIX_ENVAR);
}
}

Expand Down
Loading
Loading