From 0b0ad2768b33d4e899b1081764c076642f2cff06 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 19 Mar 2019 13:52:52 -0700 Subject: [PATCH 1/3] Backport: Ensure that nodes are always used in order provided Corresponds to following commits to OMPI master: 35a597178dbf8cfc6b177f53ee891ecb5de8941b 2794ae43b32a05a54ee58b1c3ff6fbac0f07611f aed06e68b9753971312d2c74db76b34a0b37151e 5aa775c02e4d4f8375641ed10fcdc9d8d311ee78 Signed-off-by: Ralph Castain --- opal/mca/compress/base/Makefile.am | 4 +- opal/mca/compress/base/base.h | 7 + opal/mca/compress/base/compress_base_fns.c | 9 +- opal/mca/compress/base/compress_base_frame.c | 91 ++ opal/mca/compress/base/compress_base_select.c | 17 +- opal/mca/compress/bzip/Makefile.am | 38 - opal/mca/compress/bzip/compress_bzip.h | 63 - .../compress/bzip/compress_bzip_component.c | 149 -- opal/mca/compress/bzip/compress_bzip_module.c | 239 --- opal/mca/compress/compress.h | 19 + opal/mca/compress/gzip/Makefile.am | 38 - opal/mca/compress/gzip/compress_gzip.h | 63 - opal/mca/compress/gzip/compress_gzip_module.c | 242 ---- opal/mca/compress/gzip/owner.txt | 7 - opal/mca/compress/zlib/Makefile.am | 42 + opal/mca/compress/zlib/compress_zlib.c | 133 ++ opal/mca/compress/zlib/compress_zlib.h | 66 + .../compress_zlib_component.c} | 95 +- opal/mca/compress/zlib/configure.m4 | 102 ++ opal/mca/compress/{bzip => zlib}/owner.txt | 0 orte/mca/ess/base/ess_base_std_orted.c | 54 +- orte/mca/ess/hnp/ess_hnp_module.c | 13 +- orte/mca/grpcomm/base/grpcomm_base_stubs.c | 8 +- orte/mca/grpcomm/direct/grpcomm_direct.c | 39 +- orte/mca/odls/base/odls_base_default_fns.c | 156 +- orte/mca/odls/odls_types.h | 5 +- orte/mca/plm/base/plm_base_launch_support.c | 143 +- orte/mca/regx/Makefile.am | 30 - orte/mca/regx/base/Makefile.am | 18 - orte/mca/regx/base/base.h | 74 - orte/mca/regx/base/owner.txt | 7 - orte/mca/regx/base/regx_base_default_fns.c | 1281 ----------------- orte/mca/regx/base/regx_base_frame.c | 77 - orte/mca/regx/base/regx_base_select.c | 61 - orte/mca/regx/fwd/Makefile.am | 36 - orte/mca/regx/fwd/owner.txt | 7 - orte/mca/regx/fwd/regx_fwd.c | 300 ---- orte/mca/regx/fwd/regx_fwd.h | 28 - orte/mca/regx/fwd/regx_fwd_component.c | 44 - orte/mca/regx/regx.h | 127 -- orte/mca/regx/reverse/Makefile.am | 36 - orte/mca/regx/reverse/owner.txt | 7 - orte/mca/regx/reverse/regx_reverse.c | 319 ---- orte/mca/regx/reverse/regx_reverse.h | 28 - .../mca/regx/reverse/regx_reverse_component.c | 44 - orte/orted/orted_comm.c | 36 +- orte/orted/orted_main.c | 37 +- orte/util/Makefile.am | 8 +- orte/util/compress.c | 117 -- orte/util/compress.h | 53 - orte/util/nidmap.c | 1170 +++++++++++++++ orte/util/nidmap.h | 52 + 52 files changed, 1961 insertions(+), 3878 deletions(-) create mode 100644 opal/mca/compress/base/compress_base_frame.c delete mode 100644 opal/mca/compress/bzip/Makefile.am delete mode 100644 opal/mca/compress/bzip/compress_bzip.h delete mode 100644 opal/mca/compress/bzip/compress_bzip_component.c delete mode 100644 opal/mca/compress/bzip/compress_bzip_module.c delete mode 100644 opal/mca/compress/gzip/Makefile.am delete mode 100644 opal/mca/compress/gzip/compress_gzip.h delete mode 100644 opal/mca/compress/gzip/compress_gzip_module.c delete mode 100644 opal/mca/compress/gzip/owner.txt create mode 100644 opal/mca/compress/zlib/Makefile.am create mode 100644 opal/mca/compress/zlib/compress_zlib.c create mode 100644 opal/mca/compress/zlib/compress_zlib.h rename opal/mca/compress/{gzip/compress_gzip_component.c => zlib/compress_zlib_component.c} (51%) create mode 100644 opal/mca/compress/zlib/configure.m4 rename opal/mca/compress/{bzip => zlib}/owner.txt (100%) delete mode 100644 orte/mca/regx/Makefile.am delete mode 100644 orte/mca/regx/base/Makefile.am delete mode 100644 orte/mca/regx/base/base.h delete mode 100644 orte/mca/regx/base/owner.txt delete mode 100644 orte/mca/regx/base/regx_base_default_fns.c delete mode 100644 orte/mca/regx/base/regx_base_frame.c delete mode 100644 orte/mca/regx/base/regx_base_select.c delete mode 100644 orte/mca/regx/fwd/Makefile.am delete mode 100644 orte/mca/regx/fwd/owner.txt delete mode 100644 orte/mca/regx/fwd/regx_fwd.c delete mode 100644 orte/mca/regx/fwd/regx_fwd.h delete mode 100644 orte/mca/regx/fwd/regx_fwd_component.c delete mode 100644 orte/mca/regx/regx.h delete mode 100644 orte/mca/regx/reverse/Makefile.am delete mode 100644 orte/mca/regx/reverse/owner.txt delete mode 100644 orte/mca/regx/reverse/regx_reverse.c delete mode 100644 orte/mca/regx/reverse/regx_reverse.h delete mode 100644 orte/mca/regx/reverse/regx_reverse_component.c delete mode 100644 orte/util/compress.c delete mode 100644 orte/util/compress.h create mode 100644 orte/util/nidmap.c create mode 100644 orte/util/nidmap.h diff --git a/opal/mca/compress/base/Makefile.am b/opal/mca/compress/base/Makefile.am index 385d0b3fed0..47c168bd056 100644 --- a/opal/mca/compress/base/Makefile.am +++ b/opal/mca/compress/base/Makefile.am @@ -3,6 +3,7 @@ # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -14,7 +15,6 @@ headers += \ base/base.h libmca_compress_la_SOURCES += \ - base/compress_base_open.c \ - base/compress_base_close.c \ + base/compress_base_frame.c \ base/compress_base_select.c \ base/compress_base_fns.c diff --git a/opal/mca/compress/base/base.h b/opal/mca/compress/base/base.h index df84fe083af..02dedb3ed51 100644 --- a/opal/mca/compress/base/base.h +++ b/opal/mca/compress/base/base.h @@ -3,6 +3,7 @@ * University Research and Technology * Corporation. All rights reserved. * + * Copyright (c) 2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -27,6 +28,12 @@ extern "C" { #endif +typedef struct { + size_t compress_limit; +} opal_compress_base_t; + +OPAL_DECLSPEC extern opal_compress_base_t opal_compress_base; + /** * Initialize the COMPRESS MCA framework * diff --git a/opal/mca/compress/base/compress_base_fns.c b/opal/mca/compress/base/compress_base_fns.c index 1187d8ee391..70733ca3408 100644 --- a/opal/mca/compress/base/compress_base_fns.c +++ b/opal/mca/compress/base/compress_base_fns.c @@ -4,6 +4,8 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * + * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,6 +36,7 @@ #include "opal/util/os_dirpath.h" #include "opal/util/output.h" #include "opal/util/argv.h" +#include "opal/util/printf.h" #include "opal/mca/compress/compress.h" #include "opal/mca/compress/base/base.h" @@ -54,12 +57,12 @@ int opal_compress_base_tar_create(char ** target) pid_t child_pid = 0; int status = 0; - asprintf(&tar_target, "%s.tar", *target); + opal_asprintf(&tar_target, "%s.tar", *target); child_pid = fork(); if( 0 == child_pid ) { /* Child */ char *cmd; - asprintf(&cmd, "tar -cf %s %s", tar_target, *target); + opal_asprintf(&cmd, "tar -cf %s %s", tar_target, *target); argv = opal_argv_split(cmd, ' '); status = execvp(argv[0], argv); @@ -101,7 +104,7 @@ int opal_compress_base_tar_extract(char ** target) child_pid = fork(); if( 0 == child_pid ) { /* Child */ char *cmd; - asprintf(&cmd, "tar -xf %s", *target); + opal_asprintf(&cmd, "tar -xf %s", *target); argv = opal_argv_split(cmd, ' '); status = execvp(argv[0], argv); diff --git a/opal/mca/compress/base/compress_base_frame.c b/opal/mca/compress/base/compress_base_frame.c new file mode 100644 index 00000000000..c46a43bcc9d --- /dev/null +++ b/opal/mca/compress/base/compress_base_frame.c @@ -0,0 +1,91 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2010 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2011-2013 Los Alamos National Security, LLC. + * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/mca/base/base.h" +#include "opal/mca/compress/base/base.h" + +#include "opal/mca/compress/base/static-components.h" + +/* + * Globals + */ +static bool compress_block(uint8_t *inbytes, + size_t inlen, + uint8_t **outbytes, + size_t *olen) +{ + return false; +} + +static bool decompress_block(uint8_t **outbytes, size_t olen, + uint8_t *inbytes, size_t len) +{ + return false; +} + +opal_compress_base_module_t opal_compress = { + NULL, /* init */ + NULL, /* finalize */ + NULL, /* compress */ + NULL, /* compress_nb */ + NULL, /* decompress */ + NULL, /* decompress_nb */ + compress_block, + decompress_block +}; +opal_compress_base_t opal_compress_base = {0}; + +opal_compress_base_component_t opal_compress_base_selected_component = {{0}}; + +static int opal_compress_base_register(mca_base_register_flag_t flags); + +MCA_BASE_FRAMEWORK_DECLARE(opal, compress, "COMPRESS MCA", + opal_compress_base_register, opal_compress_base_open, + opal_compress_base_close, mca_compress_base_static_components, 0); + +static int opal_compress_base_register(mca_base_register_flag_t flags) +{ + opal_compress_base.compress_limit = 4096; + (void) mca_base_var_register("opal", "compress", "base", "limit", + "Threshold beyond which data will be compressed", + MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_READONLY, &opal_compress_base.compress_limit); + + return OPAL_SUCCESS; +} + +/** + * Function for finding and opening either all MCA components, + * or the one that was specifically requested via a MCA parameter. + */ +int opal_compress_base_open(mca_base_open_flag_t flags) +{ + /* Open up all available components */ + return mca_base_framework_components_open(&opal_compress_base_framework, flags); +} + +int opal_compress_base_close(void) +{ + /* Call the component's finalize routine */ + if( NULL != opal_compress.finalize ) { + opal_compress.finalize(); + } + + /* Close all available modules that are open */ + return mca_base_framework_components_close (&opal_compress_base_framework, NULL); +} diff --git a/opal/mca/compress/base/compress_base_select.c b/opal/mca/compress/base/compress_base_select.c index 6e98f33a275..b9fdadbe626 100644 --- a/opal/mca/compress/base/compress_base_select.c +++ b/opal/mca/compress/base/compress_base_select.c @@ -7,6 +7,7 @@ * * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,17 +30,10 @@ int opal_compress_base_select(void) { - int ret, exit_status = OPAL_SUCCESS; + int ret = OPAL_SUCCESS; opal_compress_base_component_t *best_component = NULL; opal_compress_base_module_t *best_module = NULL; - /* Compression currently only used with C/R */ - if( !opal_cr_is_enabled ) { - opal_output_verbose(10, opal_compress_base_framework.framework_output, - "compress:open: FT is not enabled, skipping!"); - return OPAL_SUCCESS; - } - /* * Select the best component */ @@ -47,8 +41,8 @@ int opal_compress_base_select(void) &opal_compress_base_framework.framework_components, (mca_base_module_t **) &best_module, (mca_base_component_t **) &best_component, NULL) ) { - /* This will only happen if no component was selected */ - exit_status = OPAL_ERROR; + /* This will only happen if no component was selected, + * in which case we use the default one */ goto cleanup; } @@ -58,12 +52,11 @@ int opal_compress_base_select(void) /* Initialize the winner */ if (NULL != best_module) { if (OPAL_SUCCESS != (ret = best_module->init()) ) { - exit_status = ret; goto cleanup; } opal_compress = *best_module; } cleanup: - return exit_status; + return ret; } diff --git a/opal/mca/compress/bzip/Makefile.am b/opal/mca/compress/bzip/Makefile.am deleted file mode 100644 index 90b9c363750..00000000000 --- a/opal/mca/compress/bzip/Makefile.am +++ /dev/null @@ -1,38 +0,0 @@ -# -# Copyright (c) 2004-2010 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - compress_bzip.h \ - compress_bzip_component.c \ - compress_bzip_module.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_opal_compress_bzip_DSO -component_noinst = -component_install = mca_compress_bzip.la -else -component_noinst = libmca_compress_bzip.la -component_install = -endif - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_compress_bzip_la_SOURCES = $(sources) -mca_compress_bzip_la_LDFLAGS = -module -avoid-version -mca_compress_bzip_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_compress_bzip_la_SOURCES = $(sources) -libmca_compress_bzip_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/compress/bzip/compress_bzip.h b/opal/mca/compress/bzip/compress_bzip.h deleted file mode 100644 index e329037fe70..00000000000 --- a/opal/mca/compress/bzip/compress_bzip.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * BZIP COMPRESS component - * - * Uses the bzip library - */ - -#ifndef MCA_COMPRESS_BZIP_EXPORT_H -#define MCA_COMPRESS_BZIP_EXPORT_H - -#include "opal_config.h" - -#include "opal/util/output.h" - -#include "opal/mca/mca.h" -#include "opal/mca/compress/compress.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - - /* - * Local Component structures - */ - struct opal_compress_bzip_component_t { - opal_compress_base_component_t super; /** Base COMPRESS component */ - - }; - typedef struct opal_compress_bzip_component_t opal_compress_bzip_component_t; - OPAL_MODULE_DECLSPEC extern opal_compress_bzip_component_t mca_compress_bzip_component; - - int opal_compress_bzip_component_query(mca_base_module_t **module, int *priority); - - /* - * Module functions - */ - int opal_compress_bzip_module_init(void); - int opal_compress_bzip_module_finalize(void); - - /* - * Actual funcationality - */ - int opal_compress_bzip_compress(char *fname, char **cname, char **postfix); - int opal_compress_bzip_compress_nb(char *fname, char **cname, char **postfix, pid_t *child_pid); - int opal_compress_bzip_decompress(char *cname, char **fname); - int opal_compress_bzip_decompress_nb(char *cname, char **fname, pid_t *child_pid); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif /* MCA_COMPRESS_BZIP_EXPORT_H */ diff --git a/opal/mca/compress/bzip/compress_bzip_component.c b/opal/mca/compress/bzip/compress_bzip_component.c deleted file mode 100644 index 2d0d1493c24..00000000000 --- a/opal/mca/compress/bzip/compress_bzip_component.c +++ /dev/null @@ -1,149 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include "opal/constants.h" -#include "opal/mca/compress/compress.h" -#include "opal/mca/compress/base/base.h" -#include "compress_bzip.h" - -/* - * Public string for version number - */ -const char *opal_compress_bzip_component_version_string = -"OPAL COMPRESS bzip MCA component version " OPAL_VERSION; - -/* - * Local functionality - */ -static int compress_bzip_register (void); -static int compress_bzip_open(void); -static int compress_bzip_close(void); - -/* - * Instantiate the public struct with all of our public information - * and pointer to our public functions in it - */ -opal_compress_bzip_component_t mca_compress_bzip_component = { - /* First do the base component stuff */ - { - /* Handle the general mca_component_t struct containing - * meta information about the component itbzip - */ - .base_version = { - OPAL_COMPRESS_BASE_VERSION_2_0_0, - - /* Component name and version */ - .mca_component_name = "bzip", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = compress_bzip_open, - .mca_close_component = compress_bzip_close, - .mca_query_component = opal_compress_bzip_component_query, - .mca_register_component_params = compress_bzip_register - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .verbose = 0, - .output_handle = -1, - } -}; - -/* - * Bzip module - */ -static opal_compress_base_module_t loc_module = { - /** Initialization Function */ - opal_compress_bzip_module_init, - /** Finalization Function */ - opal_compress_bzip_module_finalize, - - /** Compress Function */ - opal_compress_bzip_compress, - opal_compress_bzip_compress_nb, - - /** Decompress Function */ - opal_compress_bzip_decompress, - opal_compress_bzip_decompress_nb -}; - -static int compress_bzip_register (void) -{ - int ret; - - mca_compress_bzip_component.super.priority = 10; - ret = mca_base_component_var_register (&mca_compress_bzip_component.super.base_version, - "priority", "Priority of the COMPRESS bzip component " - "(default: 10)", MCA_BASE_VAR_TYPE_INT, NULL, 0, - MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_compress_bzip_component.super.priority); - if (0 > ret) { - return ret; - } - - ret = mca_base_component_var_register (&mca_compress_bzip_component.super.base_version, - "verbose", - "Verbose level for the COMPRESS bzip component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_compress_bzip_component.super.verbose); - return (0 > ret) ? ret : OPAL_SUCCESS; -} - -static int compress_bzip_open(void) -{ - /* If there is a custom verbose level for this component than use it - * otherwise take our parents level and output channel - */ - if ( 0 != mca_compress_bzip_component.super.verbose) { - mca_compress_bzip_component.super.output_handle = opal_output_open(NULL); - opal_output_set_verbosity(mca_compress_bzip_component.super.output_handle, - mca_compress_bzip_component.super.verbose); - } else { - mca_compress_bzip_component.super.output_handle = opal_compress_base_framework.framework_output; - } - - /* - * Debug output - */ - opal_output_verbose(10, mca_compress_bzip_component.super.output_handle, - "compress:bzip: open()"); - opal_output_verbose(20, mca_compress_bzip_component.super.output_handle, - "compress:bzip: open: priority = %d", - mca_compress_bzip_component.super.priority); - opal_output_verbose(20, mca_compress_bzip_component.super.output_handle, - "compress:bzip: open: verbosity = %d", - mca_compress_bzip_component.super.verbose); - return OPAL_SUCCESS; -} - -static int compress_bzip_close(void) -{ - return OPAL_SUCCESS; -} - -int opal_compress_bzip_component_query(mca_base_module_t **module, int *priority) -{ - *module = (mca_base_module_t *)&loc_module; - *priority = mca_compress_bzip_component.super.priority; - - return OPAL_SUCCESS; -} - diff --git a/opal/mca/compress/bzip/compress_bzip_module.c b/opal/mca/compress/bzip/compress_bzip_module.c deleted file mode 100644 index 96a2fb39ef6..00000000000 --- a/opal/mca/compress/bzip/compress_bzip_module.c +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * - * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include -#include -#include -#include -#if HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ - -#include "opal/util/opal_environ.h" -#include "opal/util/output.h" -#include "opal/util/argv.h" -#include "opal/util/opal_environ.h" - -#include "opal/constants.h" -#include "opal/util/basename.h" - -#include "opal/mca/compress/compress.h" -#include "opal/mca/compress/base/base.h" -#include "opal/runtime/opal_cr.h" - -#include "compress_bzip.h" - -static bool is_directory(char *fname ); - -int opal_compress_bzip_module_init(void) -{ - return OPAL_SUCCESS; -} - -int opal_compress_bzip_module_finalize(void) -{ - return OPAL_SUCCESS; -} - -int opal_compress_bzip_compress(char * fname, char **cname, char **postfix) -{ - pid_t child_pid = 0; - int status = 0; - - opal_output_verbose(10, mca_compress_bzip_component.super.output_handle, - "compress:bzip: compress(%s)", - fname); - - opal_compress_bzip_compress_nb(fname, cname, postfix, &child_pid); - waitpid(child_pid, &status, 0); - - if( WIFEXITED(status) ) { - return OPAL_SUCCESS; - } else { - return OPAL_ERROR; - } -} - -int opal_compress_bzip_compress_nb(char * fname, char **cname, char **postfix, pid_t *child_pid) -{ - char **argv = NULL; - char * base_fname = NULL; - char * dir_fname = NULL; - int status; - bool is_dir; - - is_dir = is_directory(fname); - - *child_pid = fork(); - if( *child_pid == 0 ) { /* Child */ - char * cmd; - - dir_fname = opal_dirname(fname); - base_fname = opal_basename(fname); - - chdir(dir_fname); - - if( is_dir ) { -#if 0 - opal_compress_base_tar_create(&base_fname); - asprintf(cname, "%s.bz2", base_fname); - asprintf(&cmd, "bzip2 %s", base_fname); -#else - asprintf(cname, "%s.tar.bz2", base_fname); - asprintf(&cmd, "tar -jcf %s %s", *cname, base_fname); -#endif - } else { - asprintf(cname, "%s.bz2", base_fname); - asprintf(&cmd, "bzip2 %s", base_fname); - } - - opal_output_verbose(10, mca_compress_bzip_component.super.output_handle, - "compress:bzip: compress_nb(%s -> [%s])", - fname, *cname); - opal_output_verbose(10, mca_compress_bzip_component.super.output_handle, - "compress:bzip: compress_nb() command [%s]", - cmd); - - argv = opal_argv_split(cmd, ' '); - status = execvp(argv[0], argv); - - opal_output(0, "compress:bzip: compress_nb: Failed to exec child [%s] status = %d\n", cmd, status); - exit(OPAL_ERROR); - } - else if( *child_pid > 0 ) { - if( is_dir ) { - *postfix = strdup(".tar.bz2"); - } else { - *postfix = strdup(".bz2"); - } - asprintf(cname, "%s%s", fname, *postfix); - } - else { - return OPAL_ERROR; - } - - return OPAL_SUCCESS; -} - -int opal_compress_bzip_decompress(char * cname, char **fname) -{ - pid_t child_pid = 0; - int status = 0; - - opal_output_verbose(10, mca_compress_bzip_component.super.output_handle, - "compress:bzip: decompress(%s)", - cname); - - opal_compress_bzip_decompress_nb(cname, fname, &child_pid); - waitpid(child_pid, &status, 0); - - if( WIFEXITED(status) ) { - return OPAL_SUCCESS; - } else { - return OPAL_ERROR; - } -} - -int opal_compress_bzip_decompress_nb(char * cname, char **fname, pid_t *child_pid) -{ - char **argv = NULL; - char * dir_cname = NULL; - pid_t loc_pid = 0; - int status; - bool is_tar = false; - - if( 0 == strncmp(&(cname[strlen(cname)-8]), ".tar.bz2", strlen(".tar.bz2")) ) { - is_tar = true; - } - - *fname = strdup(cname); - if( is_tar ) { - (*fname)[strlen(cname)-8] = '\0'; - } else { - (*fname)[strlen(cname)-4] = '\0'; - } - - opal_output_verbose(10, mca_compress_bzip_component.super.output_handle, - "compress:bzip: decompress_nb(%s -> [%s])", - cname, *fname); - - *child_pid = fork(); - if( *child_pid == 0 ) { /* Child */ - dir_cname = opal_dirname(cname); - - chdir(dir_cname); - - /* Fork(bunzip) */ - loc_pid = fork(); - if( loc_pid == 0 ) { /* Child */ - char * cmd; - asprintf(&cmd, "bunzip2 %s", cname); - - opal_output_verbose(10, mca_compress_bzip_component.super.output_handle, - "compress:bzip: decompress_nb() command [%s]", - cmd); - - argv = opal_argv_split(cmd, ' '); - status = execvp(argv[0], argv); - - opal_output(0, "compress:bzip: decompress_nb: Failed to exec child [%s] status = %d\n", cmd, status); - exit(OPAL_ERROR); - } - else if( loc_pid > 0 ) { /* Parent */ - waitpid(loc_pid, &status, 0); - if( !WIFEXITED(status) ) { - opal_output(0, "compress:bzip: decompress_nb: Failed to bunzip the file [%s] status = %d\n", cname, status); - exit(OPAL_ERROR); - } - } - else { - exit(OPAL_ERROR); - } - - /* tar_decompress */ - if( is_tar ) { - /* Strip off '.bz2' leaving just '.tar' */ - cname[strlen(cname)-4] = '\0'; - opal_compress_base_tar_extract(&cname); - } - - /* Once this child is done, then directly exit */ - exit(OPAL_SUCCESS); - } - else if( *child_pid > 0 ) { - ; - } - else { - return OPAL_ERROR; - } - - return OPAL_SUCCESS; -} - -static bool is_directory(char *fname ) { - struct stat file_status; - int rc; - - if(0 != (rc = stat(fname, &file_status) ) ) { - return false; - } - if(S_ISDIR(file_status.st_mode)) { - return true; - } - - return false; -} diff --git a/opal/mca/compress/compress.h b/opal/mca/compress/compress.h index 8b5ba6a7d4a..74295526d60 100644 --- a/opal/mca/compress/compress.h +++ b/opal/mca/compress/compress.h @@ -6,6 +6,7 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * + * Copyright (c) 2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -82,6 +83,20 @@ typedef int (*opal_compress_base_module_decompress_fn_t) typedef int (*opal_compress_base_module_decompress_nb_fn_t) (char * cname, char **fname, pid_t *child_pid); +/** + * Compress a string + * + * Arguments: + * + */ +typedef bool (*opal_compress_base_module_compress_string_fn_t)(uint8_t *inbytes, + size_t inlen, + uint8_t **outbytes, + size_t *olen); +typedef bool (*opal_compress_base_module_decompress_string_fn_t)(uint8_t **outbytes, size_t olen, + uint8_t *inbytes, size_t len); + + /** * Structure for COMPRESS components. */ @@ -117,6 +132,10 @@ struct opal_compress_base_module_1_0_0_t { /** Decompress Interface */ opal_compress_base_module_decompress_fn_t decompress; opal_compress_base_module_decompress_nb_fn_t decompress_nb; + + /* COMPRESS STRING */ + opal_compress_base_module_compress_string_fn_t compress_block; + opal_compress_base_module_decompress_string_fn_t decompress_block; }; typedef struct opal_compress_base_module_1_0_0_t opal_compress_base_module_1_0_0_t; typedef struct opal_compress_base_module_1_0_0_t opal_compress_base_module_t; diff --git a/opal/mca/compress/gzip/Makefile.am b/opal/mca/compress/gzip/Makefile.am deleted file mode 100644 index 40ee38cf091..00000000000 --- a/opal/mca/compress/gzip/Makefile.am +++ /dev/null @@ -1,38 +0,0 @@ -# -# Copyright (c) 2004-2010 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - compress_gzip.h \ - compress_gzip_component.c \ - compress_gzip_module.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_opal_compress_gzip_DSO -component_noinst = -component_install = mca_compress_gzip.la -else -component_noinst = libmca_compress_gzip.la -component_install = -endif - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_compress_gzip_la_SOURCES = $(sources) -mca_compress_gzip_la_LDFLAGS = -module -avoid-version -mca_compress_gzip_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_compress_gzip_la_SOURCES = $(sources) -libmca_compress_gzip_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/compress/gzip/compress_gzip.h b/opal/mca/compress/gzip/compress_gzip.h deleted file mode 100644 index 29102476bcf..00000000000 --- a/opal/mca/compress/gzip/compress_gzip.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * GZIP COMPRESS component - * - * Uses the gzip library - */ - -#ifndef MCA_COMPRESS_GZIP_EXPORT_H -#define MCA_COMPRESS_GZIP_EXPORT_H - -#include "opal_config.h" - -#include "opal/util/output.h" - -#include "opal/mca/mca.h" -#include "opal/mca/compress/compress.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - - /* - * Local Component structures - */ - struct opal_compress_gzip_component_t { - opal_compress_base_component_t super; /** Base COMPRESS component */ - - }; - typedef struct opal_compress_gzip_component_t opal_compress_gzip_component_t; - OPAL_MODULE_DECLSPEC extern opal_compress_gzip_component_t mca_compress_gzip_component; - - int opal_compress_gzip_component_query(mca_base_module_t **module, int *priority); - - /* - * Module functions - */ - int opal_compress_gzip_module_init(void); - int opal_compress_gzip_module_finalize(void); - - /* - * Actual funcationality - */ - int opal_compress_gzip_compress(char *fname, char **cname, char **postfix); - int opal_compress_gzip_compress_nb(char *fname, char **cname, char **postfix, pid_t *child_pid); - int opal_compress_gzip_decompress(char *cname, char **fname); - int opal_compress_gzip_decompress_nb(char *cname, char **fname, pid_t *child_pid); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif /* MCA_COMPRESS_GZIP_EXPORT_H */ diff --git a/opal/mca/compress/gzip/compress_gzip_module.c b/opal/mca/compress/gzip/compress_gzip_module.c deleted file mode 100644 index a9003f80c95..00000000000 --- a/opal/mca/compress/gzip/compress_gzip_module.c +++ /dev/null @@ -1,242 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * - * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include -#include -#include -#include -#if HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ - -#include "opal/util/opal_environ.h" -#include "opal/util/output.h" -#include "opal/util/argv.h" -#include "opal/util/opal_environ.h" - -#include "opal/constants.h" -#include "opal/util/basename.h" - -#include "opal/mca/compress/compress.h" -#include "opal/mca/compress/base/base.h" -#include "opal/runtime/opal_cr.h" - -#include "compress_gzip.h" - -static bool is_directory(char *fname ); - -int opal_compress_gzip_module_init(void) -{ - return OPAL_SUCCESS; -} - -int opal_compress_gzip_module_finalize(void) -{ - return OPAL_SUCCESS; -} - -int opal_compress_gzip_compress(char * fname, char **cname, char **postfix) -{ - pid_t child_pid = 0; - int status = 0; - - opal_output_verbose(10, mca_compress_gzip_component.super.output_handle, - "compress:gzip: compress(%s)", - fname); - - opal_compress_gzip_compress_nb(fname, cname, postfix, &child_pid); - waitpid(child_pid, &status, 0); - - if( WIFEXITED(status) ) { - return OPAL_SUCCESS; - } else { - return OPAL_ERROR; - } -} - -int opal_compress_gzip_compress_nb(char * fname, char **cname, char **postfix, pid_t *child_pid) -{ - char **argv = NULL; - char * base_fname = NULL; - char * dir_fname = NULL; - int status; - bool is_dir; - - is_dir = is_directory(fname); - - *child_pid = fork(); - if( *child_pid == 0 ) { /* Child */ - char * cmd = NULL; - - dir_fname = opal_dirname(fname); - base_fname = opal_basename(fname); - - chdir(dir_fname); - - if( is_dir ) { -#if 0 - opal_compress_base_tar_create(&base_fname); - asprintf(cname, "%s.gz", base_fname); - asprintf(&cmd, "gzip %s", base_fname); -#else - asprintf(cname, "%s.tar.gz", base_fname); - asprintf(&cmd, "tar -zcf %s %s", *cname, base_fname); -#endif - } else { - asprintf(cname, "%s.gz", base_fname); - asprintf(&cmd, "gzip %s", base_fname); - } - - opal_output_verbose(10, mca_compress_gzip_component.super.output_handle, - "compress:gzip: compress_nb(%s -> [%s])", - fname, *cname); - opal_output_verbose(10, mca_compress_gzip_component.super.output_handle, - "compress:gzip: compress_nb() command [%s]", - cmd); - - argv = opal_argv_split(cmd, ' '); - status = execvp(argv[0], argv); - - opal_output(0, "compress:gzip: compress_nb: Failed to exec child [%s] status = %d\n", cmd, status); - exit(OPAL_ERROR); - } - else if( *child_pid > 0 ) { - if( is_dir ) { - *postfix = strdup(".tar.gz"); - } else { - *postfix = strdup(".gz"); - } - asprintf(cname, "%s%s", fname, *postfix); - - } - else { - return OPAL_ERROR; - } - - return OPAL_SUCCESS; -} - -int opal_compress_gzip_decompress(char * cname, char **fname) -{ - pid_t child_pid = 0; - int status = 0; - - opal_output_verbose(10, mca_compress_gzip_component.super.output_handle, - "compress:gzip: decompress(%s)", - cname); - - opal_compress_gzip_decompress_nb(cname, fname, &child_pid); - waitpid(child_pid, &status, 0); - - if( WIFEXITED(status) ) { - return OPAL_SUCCESS; - } else { - return OPAL_ERROR; - } -} - -int opal_compress_gzip_decompress_nb(char * cname, char **fname, pid_t *child_pid) -{ - char **argv = NULL; - char * dir_cname = NULL; - pid_t loc_pid = 0; - int status; - bool is_tar = false; - - if( 0 == strncmp(&(cname[strlen(cname)-7]), ".tar.gz", strlen(".tar.gz")) ) { - is_tar = true; - } - - *fname = strdup(cname); - if( is_tar ) { - /* Strip off '.tar.gz' */ - (*fname)[strlen(cname)-7] = '\0'; - } else { - /* Strip off '.gz' */ - (*fname)[strlen(cname)-3] = '\0'; - } - - opal_output_verbose(10, mca_compress_gzip_component.super.output_handle, - "compress:gzip: decompress_nb(%s -> [%s])", - cname, *fname); - - *child_pid = fork(); - if( *child_pid == 0 ) { /* Child */ - char * cmd; - dir_cname = opal_dirname(cname); - - chdir(dir_cname); - - /* Fork(gunzip) */ - loc_pid = fork(); - if( loc_pid == 0 ) { /* Child */ - asprintf(&cmd, "gunzip %s", cname); - - opal_output_verbose(10, mca_compress_gzip_component.super.output_handle, - "compress:gzip: decompress_nb() command [%s]", - cmd); - - argv = opal_argv_split(cmd, ' '); - status = execvp(argv[0], argv); - - opal_output(0, "compress:gzip: decompress_nb: Failed to exec child [%s] status = %d\n", cmd, status); - exit(OPAL_ERROR); - } - else if( loc_pid > 0 ) { /* Parent */ - waitpid(loc_pid, &status, 0); - if( !WIFEXITED(status) ) { - opal_output(0, "compress:gzip: decompress_nb: Failed to bunzip the file [%s] status = %d\n", cname, status); - exit(OPAL_ERROR); - } - } - else { - exit(OPAL_ERROR); - } - - /* tar_decompress */ - if( is_tar ) { - /* Strip off '.gz' leaving just '.tar' */ - cname[strlen(cname)-3] = '\0'; - opal_compress_base_tar_extract(&cname); - } - - /* Once this child is done, then directly exit */ - exit(OPAL_SUCCESS); - } - else if( *child_pid > 0 ) { - ; - } - else { - return OPAL_ERROR; - } - - return OPAL_SUCCESS; -} - -static bool is_directory(char *fname ) { - struct stat file_status; - int rc; - - if(0 != (rc = stat(fname, &file_status) ) ) { - return false; - } - if(S_ISDIR(file_status.st_mode)) { - return true; - } - - return false; -} diff --git a/opal/mca/compress/gzip/owner.txt b/opal/mca/compress/gzip/owner.txt deleted file mode 100644 index b1efc765f07..00000000000 --- a/opal/mca/compress/gzip/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner:project -status:maintenance diff --git a/opal/mca/compress/zlib/Makefile.am b/opal/mca/compress/zlib/Makefile.am new file mode 100644 index 00000000000..d9e2da948eb --- /dev/null +++ b/opal/mca/compress/zlib/Makefile.am @@ -0,0 +1,42 @@ +# +# Copyright (c) 2004-2010 The Trustees of Indiana University. +# All rights reserved. +# Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 IBM Corporation. All rights reserved. +# Copyright (c) 2019 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_CPPFLAGS = $(compress_zlib_CPPFLAGS) + +sources = \ + compress_zlib.h \ + compress_zlib_component.c \ + compress_zlib.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_opal_compress_zlib_DSO +component_noinst = +component_install = mca_compress_zlib.la +else +component_noinst = libmca_compress_zlib.la +component_install = +endif + +mcacomponentdir = $(opallibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_compress_zlib_la_SOURCES = $(sources) +mca_compress_zlib_la_LDFLAGS = -module -avoid-version $(compress_zlib_LDFLAGS) +mca_compress_zlib_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la $(compress_zlib_LIBS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_compress_zlib_la_SOURCES = $(sources) +libmca_compress_zlib_la_LDFLAGS = -module -avoid-version $(compress_zlib_LDFLAGS) +libmca_compress_zlib_la_LIBADD = $(compress_zlib_LIBS) diff --git a/opal/mca/compress/zlib/compress_zlib.c b/opal/mca/compress/zlib/compress_zlib.c new file mode 100644 index 00000000000..850fa6aa2c5 --- /dev/null +++ b/opal/mca/compress/zlib/compress_zlib.c @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2004-2010 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * + * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2019 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include +#include +#include +#include +#if HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H */ +#include + +#include "opal/util/opal_environ.h" +#include "opal/util/output.h" +#include "opal/util/argv.h" +#include "opal/util/opal_environ.h" +#include "opal/util/printf.h" + +#include "opal/constants.h" +#include "opal/util/basename.h" + +#include "opal/mca/compress/compress.h" +#include "opal/mca/compress/base/base.h" + +#include "compress_zlib.h" + +int opal_compress_zlib_module_init(void) +{ + return OPAL_SUCCESS; +} + +int opal_compress_zlib_module_finalize(void) +{ + return OPAL_SUCCESS; +} + +bool opal_compress_zlib_compress_block(uint8_t *inbytes, + size_t inlen, + uint8_t **outbytes, + size_t *olen) +{ + z_stream strm; + size_t len; + uint8_t *tmp; + + if (inlen < opal_compress_base.compress_limit) { + return false; + } + opal_output_verbose(2, opal_compress_base_framework.framework_output, + "COMPRESSING"); + + /* set default output */ + *outbytes = NULL; + *olen = 0; + + /* setup the stream */ + memset (&strm, 0, sizeof (strm)); + deflateInit (&strm, 9); + + /* get an upper bound on the required output storage */ + len = deflateBound(&strm, inlen); + if (NULL == (tmp = (uint8_t*)malloc(len))) { + return false; + } + strm.next_in = inbytes; + strm.avail_in = inlen; + + /* allocating the upper bound guarantees zlib will + * always successfully compress into the available space */ + strm.avail_out = len; + strm.next_out = tmp; + + deflate (&strm, Z_FINISH); + deflateEnd (&strm); + + *outbytes = tmp; + *olen = len - strm.avail_out; + opal_output_verbose(2, opal_compress_base_framework.framework_output, + "\tINSIZE %d OUTSIZE %d", (int)inlen, (int)*olen); + return true; // we did the compression +} + +bool opal_compress_zlib_uncompress_block(uint8_t **outbytes, size_t olen, + uint8_t *inbytes, size_t len) +{ + uint8_t *dest; + z_stream strm; + + /* set the default error answer */ + *outbytes = NULL; + opal_output_verbose(2, opal_compress_base_framework.framework_output, "DECOMPRESS"); + + /* setting destination to the fully decompressed size */ + dest = (uint8_t*)malloc(olen); + if (NULL == dest) { + return false; + } + + memset (&strm, 0, sizeof (strm)); + if (Z_OK != inflateInit(&strm)) { + free(dest); + return false; + } + strm.avail_in = len; + strm.next_in = inbytes; + strm.avail_out = olen; + strm.next_out = dest; + + if (Z_STREAM_END != inflate (&strm, Z_FINISH)) { + opal_output(0, "\tDECOMPRESS FAILED: %s", strm.msg); + } + inflateEnd (&strm); + *outbytes = dest; + opal_output_verbose(2, opal_compress_base_framework.framework_output, + "\tINSIZE: %d OUTSIZE %d", (int)len, (int)olen); + return true; +} diff --git a/opal/mca/compress/zlib/compress_zlib.h b/opal/mca/compress/zlib/compress_zlib.h new file mode 100644 index 00000000000..44e08d54080 --- /dev/null +++ b/opal/mca/compress/zlib/compress_zlib.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2004-2010 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2019 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * @file + * + * ZLIB COMPRESS component + * + * Uses the zlib library + */ + +#ifndef MCA_COMPRESS_ZLIB_EXPORT_H +#define MCA_COMPRESS_ZLIB_EXPORT_H + +#include "opal_config.h" + +#include "opal/util/output.h" + +#include "opal/mca/mca.h" +#include "opal/mca/compress/compress.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + + /* + * Local Component structures + */ + struct opal_compress_zlib_component_t { + opal_compress_base_component_t super; /** Base COMPRESS component */ + + }; + typedef struct opal_compress_zlib_component_t opal_compress_zlib_component_t; + extern opal_compress_zlib_component_t mca_compress_zlib_component; + + int opal_compress_zlib_component_query(mca_base_module_t **module, int *priority); + + /* + * Module functions + */ + int opal_compress_zlib_module_init(void); + int opal_compress_zlib_module_finalize(void); + + /* + * Actual funcationality + */ + bool opal_compress_zlib_compress_block(uint8_t *inbytes, + size_t inlen, + uint8_t **outbytes, + size_t *olen); + bool opal_compress_zlib_uncompress_block(uint8_t **outbytes, size_t olen, + uint8_t *inbytes, size_t len); + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + +#endif /* MCA_COMPRESS_ZLIB_EXPORT_H */ diff --git a/opal/mca/compress/gzip/compress_gzip_component.c b/opal/mca/compress/zlib/compress_zlib_component.c similarity index 51% rename from opal/mca/compress/gzip/compress_gzip_component.c rename to opal/mca/compress/zlib/compress_zlib_component.c index 62be24d71b9..9e2e38b6fb3 100644 --- a/opal/mca/compress/gzip/compress_gzip_component.c +++ b/opal/mca/compress/zlib/compress_zlib_component.c @@ -4,6 +4,7 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -16,44 +17,44 @@ #include "opal/constants.h" #include "opal/mca/compress/compress.h" #include "opal/mca/compress/base/base.h" -#include "compress_gzip.h" +#include "compress_zlib.h" /* * Public string for version number */ -const char *opal_compress_gzip_component_version_string = -"OPAL COMPRESS gzip MCA component version " OPAL_VERSION; +const char *opal_compress_zlib_component_version_string = +"OPAL COMPRESS zlib MCA component version " OPAL_VERSION; /* * Local functionality */ -static int compress_gzip_register (void); -static int compress_gzip_open(void); -static int compress_gzip_close(void); +static int compress_zlib_register (void); +static int compress_zlib_open(void); +static int compress_zlib_close(void); /* * Instantiate the public struct with all of our public information * and pointer to our public functions in it */ -opal_compress_gzip_component_t mca_compress_gzip_component = { +opal_compress_zlib_component_t mca_compress_zlib_component = { /* First do the base component stuff */ { /* Handle the general mca_component_t struct containing - * meta information about the component itgzip + * meta information about the component itzlib */ .base_version = { OPAL_COMPRESS_BASE_VERSION_2_0_0, /* Component name and version */ - .mca_component_name = "gzip", + .mca_component_name = "zlib", MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), + OPAL_RELEASE_VERSION), /* Component open and close functions */ - .mca_open_component = compress_gzip_open, - .mca_close_component = compress_gzip_close, - .mca_query_component = opal_compress_gzip_component_query, - .mca_register_component_params = compress_gzip_register + .mca_open_component = compress_zlib_open, + .mca_close_component = compress_zlib_close, + .mca_query_component = opal_compress_zlib_component_query, + .mca_register_component_params = compress_zlib_register }, .base_data = { /* The component is checkpoint ready */ @@ -66,84 +67,82 @@ opal_compress_gzip_component_t mca_compress_gzip_component = { }; /* - * Gzip module + * Zlib module */ static opal_compress_base_module_t loc_module = { /** Initialization Function */ - opal_compress_gzip_module_init, + .init = opal_compress_zlib_module_init, /** Finalization Function */ - opal_compress_gzip_module_finalize, + .finalize = opal_compress_zlib_module_finalize, /** Compress Function */ - opal_compress_gzip_compress, - opal_compress_gzip_compress_nb, + .compress_block = opal_compress_zlib_compress_block, /** Decompress Function */ - opal_compress_gzip_decompress, - opal_compress_gzip_decompress_nb + .decompress_block = opal_compress_zlib_uncompress_block, }; -static int compress_gzip_register (void) +static int compress_zlib_register (void) { int ret; - mca_compress_gzip_component.super.priority = 15; - ret = mca_base_component_var_register (&mca_compress_gzip_component.super.base_version, - "priority", "Priority of the COMPRESS gzip component " - "(default: 15)", MCA_BASE_VAR_TYPE_INT, NULL, 0, + mca_compress_zlib_component.super.priority = 50; + ret = mca_base_component_var_register (&mca_compress_zlib_component.super.base_version, + "priority", "Priority of the COMPRESS zlib component " + "(default: 50)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_compress_gzip_component.super.priority); + &mca_compress_zlib_component.super.priority); if (0 > ret) { return ret; } - mca_compress_gzip_component.super.verbose = 0; - ret = mca_base_component_var_register (&mca_compress_gzip_component.super.base_version, + mca_compress_zlib_component.super.verbose = 0; + ret = mca_base_component_var_register (&mca_compress_zlib_component.super.base_version, "verbose", - "Verbose level for the COMPRESS gzip component", + "Verbose level for the COMPRESS zlib component", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_compress_gzip_component.super.verbose); + &mca_compress_zlib_component.super.verbose); return (0 > ret) ? ret : OPAL_SUCCESS; } -static int compress_gzip_open(void) +static int compress_zlib_open(void) { /* If there is a custom verbose level for this component than use it * otherwise take our parents level and output channel */ - if ( 0 != mca_compress_gzip_component.super.verbose) { - mca_compress_gzip_component.super.output_handle = opal_output_open(NULL); - opal_output_set_verbosity(mca_compress_gzip_component.super.output_handle, - mca_compress_gzip_component.super.verbose); + if ( 0 != mca_compress_zlib_component.super.verbose) { + mca_compress_zlib_component.super.output_handle = opal_output_open(NULL); + opal_output_set_verbosity(mca_compress_zlib_component.super.output_handle, + mca_compress_zlib_component.super.verbose); } else { - mca_compress_gzip_component.super.output_handle = opal_compress_base_framework.framework_output; + mca_compress_zlib_component.super.output_handle = opal_compress_base_framework.framework_output; } /* * Debug output */ - opal_output_verbose(10, mca_compress_gzip_component.super.output_handle, - "compress:gzip: open()"); - opal_output_verbose(20, mca_compress_gzip_component.super.output_handle, - "compress:gzip: open: priority = %d", - mca_compress_gzip_component.super.priority); - opal_output_verbose(20, mca_compress_gzip_component.super.output_handle, - "compress:gzip: open: verbosity = %d", - mca_compress_gzip_component.super.verbose); + opal_output_verbose(10, mca_compress_zlib_component.super.output_handle, + "compress:zlib: open()"); + opal_output_verbose(20, mca_compress_zlib_component.super.output_handle, + "compress:zlib: open: priority = %d", + mca_compress_zlib_component.super.priority); + opal_output_verbose(20, mca_compress_zlib_component.super.output_handle, + "compress:zlib: open: verbosity = %d", + mca_compress_zlib_component.super.verbose); return OPAL_SUCCESS; } -static int compress_gzip_close(void) +static int compress_zlib_close(void) { return OPAL_SUCCESS; } -int opal_compress_gzip_component_query(mca_base_module_t **module, int *priority) +int opal_compress_zlib_component_query(mca_base_module_t **module, int *priority) { *module = (mca_base_module_t *)&loc_module; - *priority = mca_compress_gzip_component.super.priority; + *priority = mca_compress_zlib_component.super.priority; return OPAL_SUCCESS; } diff --git a/opal/mca/compress/zlib/configure.m4 b/opal/mca/compress/zlib/configure.m4 new file mode 100644 index 00000000000..426d8889f18 --- /dev/null +++ b/opal/mca/compress/zlib/configure.m4 @@ -0,0 +1,102 @@ +# -*- shell-script -*- +# +# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_compress_zlib_CONFIG([action-if-can-compile], +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_opal_compress_zlib_CONFIG],[ + AC_CONFIG_FILES([opal/mca/compress/zlib/Makefile]) + + OPAL_VAR_SCOPE_PUSH([opal_zlib_dir opal_zlib_libdir opal_zlib_standard_lib_location opal_zlib_standard_header_location opal_check_zlib_save_CPPFLAGS opal_check_zlib_save_LDFLAGS opal_check_zlib_save_LIBS]) + + AC_ARG_WITH([zlib], + [AC_HELP_STRING([--with-zlib=DIR], + [Search for zlib headers and libraries in DIR ])]) + + AC_ARG_WITH([zlib-libdir], + [AC_HELP_STRING([--with-zlib-libdir=DIR], + [Search for zlib libraries in DIR ])]) + + opal_check_zlib_save_CPPFLAGS="$CPPFLAGS" + opal_check_zlib_save_LDFLAGS="$LDFLAGS" + opal_check_zlib_save_LIBS="$LIBS" + + opal_zlib_support=0 + + if test "$with_zlib" != "no"; then + AC_MSG_CHECKING([for zlib in]) + if test ! -z "$with_zlib" && test "$with_zlib" != "yes"; then + opal_zlib_dir=$with_zlib + opal_zlib_source=$with_zlib + opal_zlib_standard_header_location=no + opal_zlib_standard_lib_location=no + AS_IF([test -z "$with_zlib_libdir" || test "$with_zlib_libdir" = "yes"], + [if test -d $with_zlib/lib; then + opal_zlib_libdir=$with_zlib/lib + elif test -d $with_zlib/lib64; then + opal_zlib_libdir=$with_zlib/lib64 + else + AC_MSG_RESULT([Could not find $with_zlib/lib or $with_zlib/lib64]) + AC_MSG_ERROR([Can not continue]) + fi + AC_MSG_RESULT([$opal_zlib_dir and $opal_zlib_libdir])], + [AC_MSG_RESULT([$with_zlib_libdir])]) + else + AC_MSG_RESULT([(default search paths)]) + opal_zlib_source=standard + opal_zlib_standard_header_location=yes + opal_zlib_standard_lib_location=yes + fi + AS_IF([test ! -z "$with_zlib_libdir" && test "$with_zlib_libdir" != "yes"], + [opal_zlib_libdir="$with_zlib_libdir" + opal_zlib_standard_lib_location=no]) + + OPAL_CHECK_PACKAGE([compress_zlib], + [zlib.h], + [z], + [deflate], + [-lz], + [$opal_zlib_dir], + [$opal_zlib_libdir], + [opal_zlib_support=1], + [opal_zlib_support=0]) + fi + + if test ! -z "$with_zlib" && test "$with_zlib" != "no" && test "$opal_zlib_support" != "1"; then + AC_MSG_WARN([ZLIB SUPPORT REQUESTED AND NOT FOUND]) + AC_MSG_ERROR([CANNOT CONTINUE]) + fi + + AC_MSG_CHECKING([will zlib support be built]) + if test "$opal_zlib_support" != "1"; then + AC_MSG_RESULT([no]) + else + AC_MSG_RESULT([yes]) + fi + + CPPFLAGS="$opal_check_zlib_save_CPPFLAGS" + LDFLAGS="$opal_check_zlib_save_LDFLAGS" + LIBS="$opal_check_zlib_save_LIBS" + + AS_IF([test "$opal_zlib_support" = "1"], + [$1 + OPAL_SUMMARY_ADD([[External Packages]],[[ZLIB]], [opal_zlib], [yes ($opal_zlib_source)])], + [$2]) + + # substitute in the things needed to build this component + AC_SUBST([compress_zlib_CFLAGS]) + AC_SUBST([compress_zlib_CPPFLAGS]) + AC_SUBST([compress_zlib_LDFLAGS]) + AC_SUBST([compress_zlib_LIBS]) + + OPAL_VAR_SCOPE_POP +])dnl diff --git a/opal/mca/compress/bzip/owner.txt b/opal/mca/compress/zlib/owner.txt similarity index 100% rename from opal/mca/compress/bzip/owner.txt rename to opal/mca/compress/zlib/owner.txt diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index 7f505338800..0abcb2f142b 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -14,7 +14,7 @@ * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -58,7 +58,6 @@ #include "orte/mca/iof/base/base.h" #include "orte/mca/plm/base/base.h" #include "orte/mca/odls/base/base.h" -#include "orte/mca/regx/base/base.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rmaps/base/base.h" #include "orte/mca/filem/base/base.h" @@ -516,17 +515,6 @@ int orte_ess_base_orted_setup(void) error = "orte_rmaps_base_select"; goto error; } - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_regx_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_regx_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_regx_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_regx_base_select"; - goto error; - } - /* if a topology file was given, then the rmaps framework open * will have reset our topology. Ensure we always get the right @@ -543,46 +531,6 @@ int orte_ess_base_orted_setup(void) opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO); } - /* if we were given the host list, then we need to setup - * the daemon info so the RML can function properly - * without requiring a wireup stage. This must be done - * after we enable_comm as that function determines our - * own port, which we need in order to construct the nidmap - */ - if (NULL != orte_node_regex) { - if (ORTE_SUCCESS != (ret = orte_regx.nidmap_parse(orte_node_regex))) { - ORTE_ERROR_LOG(ret); - error = "construct nidmap"; - goto error; - } - /* be sure to update the routing tree so any tree spawn operation - * properly gets the number of children underneath us */ - orte_routed.update_routing_plan(NULL); - } - - if (orte_static_ports || orte_fwd_mpirun_port) { - if (NULL == orte_node_regex) { - /* we didn't get the node info */ - error = "cannot construct daemon map for static ports - no node map info"; - goto error; - } - /* extract the node info from the environment and - * build a nidmap from it - this will update the - * routing plan as well - */ - if (ORTE_SUCCESS != (ret = orte_regx.build_daemon_nidmap())) { - ORTE_ERROR_LOG(ret); - error = "construct daemon map from static ports"; - goto error; - } - /* be sure to update the routing tree so the initial "phone home" - * to mpirun goes through the tree if static ports were enabled - */ - orte_routed.update_routing_plan(NULL); - /* routing can be enabled */ - orte_routed_base.routing_enabled = true; - } - /* Now provide a chance for the PLM * to perform any module-specific init functions. This * needs to occur AFTER the communications are setup diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index 70f79e67bff..c5ee0102426 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -14,7 +14,7 @@ * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -67,7 +67,6 @@ #include "orte/mca/grpcomm/base/base.h" #include "orte/mca/iof/base/base.h" #include "orte/mca/ras/base/base.h" -#include "orte/mca/regx/base/base.h" #include "orte/mca/plm/base/base.h" #include "orte/mca/plm/plm.h" #include "orte/mca/odls/base/base.h" @@ -556,16 +555,6 @@ static int rte_init(void) error = "orte_rmaps_base_find_available"; goto error; } - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_regx_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_regx_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_regx_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_regx_base_select"; - goto error; - } /* if a topology file was given, then the rmaps framework open * will have reset our topology. Ensure we always get the right diff --git a/orte/mca/grpcomm/base/grpcomm_base_stubs.c b/orte/mca/grpcomm/base/grpcomm_base_stubs.c index 91fbb1ef5a8..5e350b0f77c 100644 --- a/orte/mca/grpcomm/base/grpcomm_base_stubs.c +++ b/orte/mca/grpcomm/base/grpcomm_base_stubs.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -32,8 +32,8 @@ #include "opal/dss/dss.h" +#include "opal/mca/compress/compress.h" -#include "orte/util/compress.h" #include "orte/util/proc_info.h" #include "orte/util/error_strings.h" #include "orte/mca/errmgr/errmgr.h" @@ -506,8 +506,8 @@ static int pack_xcast(orte_grpcomm_signature_t *sig, } /* see if we want to compress this message */ - if (orte_util_compress_block((uint8_t*)data.base_ptr, data.bytes_used, - &cmpdata, &cmplen)) { + if (opal_compress.compress_block((uint8_t*)data.base_ptr, data.bytes_used, + &cmpdata, &cmplen)) { /* the data was compressed - mark that we compressed it */ flag = 1; if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &flag, 1, OPAL_INT8))) { diff --git a/orte/mca/grpcomm/direct/grpcomm_direct.c b/orte/mca/grpcomm/direct/grpcomm_direct.c index 530e2ced01c..6d5b44573d0 100644 --- a/orte/mca/grpcomm/direct/grpcomm_direct.c +++ b/orte/mca/grpcomm/direct/grpcomm_direct.c @@ -5,7 +5,7 @@ * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All * rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -23,16 +23,16 @@ #include "opal/dss/dss.h" #include "opal/class/opal_list.h" +#include "opal/mca/compress/compress.h" #include "opal/mca/pmix/pmix.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/regx/regx.h" #include "orte/mca/rml/base/base.h" #include "orte/mca/rml/base/rml_contact.h" #include "orte/mca/routed/base/base.h" #include "orte/mca/state/state.h" -#include "orte/util/compress.h" #include "orte/util/name_fns.h" +#include "orte/util/nidmap.h" #include "orte/util/proc_info.h" #include "orte/mca/grpcomm/base/base.h" @@ -271,7 +271,7 @@ static void xcast_recv(int status, orte_process_name_t* sender, opal_list_t coll; orte_grpcomm_signature_t *sig; orte_rml_tag_t tag; - char *rtmod, *nidmap; + char *rtmod; size_t inlen, cmplen; uint8_t *packed_data, *cmpdata; int32_t nvals, i; @@ -336,8 +336,8 @@ static void xcast_recv(int status, orte_process_name_t* sender, return; } /* decompress the data */ - if (orte_util_uncompress_block(&cmpdata, cmplen, - packed_data, inlen)) { + if (opal_compress.decompress_block(&cmpdata, cmplen, + packed_data, inlen)) { /* the data has been uncompressed */ opal_dss.load(&datbuf, cmpdata, cmplen); data = &datbuf; @@ -409,38 +409,17 @@ static void xcast_recv(int status, orte_process_name_t* sender, ORTE_ERROR_LOG(ret); goto relay; } - /* unpack the nidmap string - may be NULL */ - cnt = 1; - if (OPAL_SUCCESS != (ret = opal_dss.unpack(data, &nidmap, &cnt, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - goto relay; - } - if (NULL != nidmap) { - if (ORTE_SUCCESS != (ret = orte_regx.nidmap_parse(nidmap))) { - ORTE_ERROR_LOG(ret); - goto relay; - } - free(nidmap); - } - /* see if they included info on node capabilities */ + /* unpack flag indicating if nidmap included */ cnt = 1; if (OPAL_SUCCESS != (ret = opal_dss.unpack(data, &flag, &cnt, OPAL_INT8))) { ORTE_ERROR_LOG(ret); goto relay; } - if (0 != flag) { - /* update our local nidmap, if required - the decode function - * knows what to do - */ - OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:direct:xcast updating daemon nidmap", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - if (ORTE_SUCCESS != (ret = orte_regx.decode_daemon_nodemap(data))) { + if (1 == flag) { + if (ORTE_SUCCESS != (ret = orte_util_decode_nidmap(data))) { ORTE_ERROR_LOG(ret); goto relay; } - if (!ORTE_PROC_IS_HNP) { /* update the routing plan - the HNP already did * it when it computed the VM, so don't waste time diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 34b6268f603..8e60df8a24a 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -14,7 +14,7 @@ * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2011-2018 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved. @@ -66,7 +66,6 @@ #include "orte/mca/ess/base/base.h" #include "orte/mca/grpcomm/base/base.h" #include "orte/mca/plm/base/base.h" -#include "orte/mca/regx/regx.h" #include "orte/mca/rml/base/rml_contact.h" #include "orte/mca/rmaps/rmaps_types.h" #include "orte/mca/rmaps/base/base.h" @@ -79,6 +78,7 @@ #include "orte/util/context_fns.h" #include "orte/util/name_fns.h" +#include "orte/util/nidmap.h" #include "orte/util/session_dir.h" #include "orte/util/proc_info.h" #include "orte/util/show_help.h" @@ -148,7 +148,6 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer, int8_t flag; void *nptr; uint32_t key; - char *nidmap; orte_proc_t *dmn, *proc; opal_value_t *val = NULL, *kv; opal_list_t *modex; @@ -167,33 +166,20 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer, return ORTE_SUCCESS; } - /* if we couldn't provide the allocation regex on the orted - * cmd line, then we need to provide all the info here */ - if (!orte_nidmap_communicated) { - if (ORTE_SUCCESS != (rc = orte_regx.nidmap_create(orte_node_pool, &nidmap))) { - ORTE_ERROR_LOG(rc); - return rc; - } - orte_nidmap_communicated = true; - } else { - nidmap = NULL; - } - opal_dss.pack(buffer, &nidmap, 1, OPAL_STRING); - if (NULL != nidmap) { - free(nidmap); - } - /* if we haven't already done so, provide the info on the * capabilities of each node */ if (1 < orte_process_info.num_procs && (!orte_node_info_communicated || orte_get_attribute(&jdata->attributes, ORTE_JOB_LAUNCHED_DAEMONS, NULL, OPAL_BOOL))) { + /* mark that we did include this info */ flag = 1; opal_dss.pack(buffer, &flag, 1, OPAL_INT8); - if (ORTE_SUCCESS != (rc = orte_regx.encode_nodemap(buffer))) { + /* load the nidmap */ + if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(orte_node_pool, buffer))) { ORTE_ERROR_LOG(rc); return rc; } + /* get wireup info for daemons */ if (NULL == (jptr = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) { ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); @@ -257,74 +243,71 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer, OBJ_RELEASE(val); } } - /* if we didn't rollup the connection info, then we have - * to provide a complete map of connection info */ - if (!orte_static_ports && !orte_fwd_mpirun_port) { - for (v=1; v < jptr->procs->size; v++) { - if (NULL == (dmn = (orte_proc_t*)opal_pointer_array_get_item(jptr->procs, v))) { - continue; - } - val = NULL; - if (opal_pmix.legacy_get()) { - if (OPAL_SUCCESS != (rc = opal_pmix.get(&dmn->name, OPAL_PMIX_PROC_URI, NULL, &val)) || NULL == val) { + /* provide a complete map of connection info */ + for (v=1; v < jptr->procs->size; v++) { + if (NULL == (dmn = (orte_proc_t*)opal_pointer_array_get_item(jptr->procs, v))) { + continue; + } + val = NULL; + if (opal_pmix.legacy_get()) { + if (OPAL_SUCCESS != (rc = opal_pmix.get(&dmn->name, OPAL_PMIX_PROC_URI, NULL, &val)) || NULL == val) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buffer); + OBJ_RELEASE(wireup); + return rc; + } else { + /* pack the name of the daemon */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &dmn->name, 1, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buffer); + OBJ_RELEASE(wireup); + return rc; + } + /* pack the URI */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &val->data.string, 1, OPAL_STRING))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buffer); OBJ_RELEASE(wireup); return rc; - } else { - /* pack the name of the daemon */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &dmn->name, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - OBJ_RELEASE(wireup); - return rc; - } - /* pack the URI */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &val->data.string, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - OBJ_RELEASE(wireup); - return rc; - } - OBJ_RELEASE(val); } + OBJ_RELEASE(val); + } + } else { + if (OPAL_SUCCESS != (rc = opal_pmix.get(&dmn->name, NULL, NULL, &val)) || NULL == val) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buffer); + return rc; } else { - if (OPAL_SUCCESS != (rc = opal_pmix.get(&dmn->name, NULL, NULL, &val)) || NULL == val) { + /* the data is returned as a list of key-value pairs in the opal_value_t */ + if (OPAL_PTR != val->type) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + OBJ_RELEASE(buffer); + return ORTE_ERR_NOT_FOUND; + } + if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &dmn->name, 1, ORTE_NAME))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buffer); + OBJ_RELEASE(wireup); return rc; - } else { - /* the data is returned as a list of key-value pairs in the opal_value_t */ - if (OPAL_PTR != val->type) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(buffer); - return ORTE_ERR_NOT_FOUND; - } - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &dmn->name, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - OBJ_RELEASE(wireup); - return rc; - } - modex = (opal_list_t*)val->data.ptr; - numbytes = (int32_t)opal_list_get_size(modex); - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &numbytes, 1, OPAL_INT32))) { + } + modex = (opal_list_t*)val->data.ptr; + numbytes = (int32_t)opal_list_get_size(modex); + if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &numbytes, 1, OPAL_INT32))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buffer); + OBJ_RELEASE(wireup); + return rc; + } + OPAL_LIST_FOREACH(kv, modex, opal_value_t) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &kv, 1, OPAL_VALUE))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buffer); OBJ_RELEASE(wireup); return rc; } - OPAL_LIST_FOREACH(kv, modex, opal_value_t) { - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &kv, 1, OPAL_VALUE))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - OBJ_RELEASE(wireup); - return rc; - } - } - OPAL_LIST_RELEASE(modex); - OBJ_RELEASE(val); } + OPAL_LIST_RELEASE(modex); + OBJ_RELEASE(val); } } } @@ -417,17 +400,11 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer, } if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { - /* compute and pack the ppn regex */ - if (ORTE_SUCCESS != (rc = orte_regx.generate_ppn(jdata, &nidmap))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &nidmap, 1, OPAL_STRING))) { + /* compute and pack the ppn */ + if (ORTE_SUCCESS != (rc = orte_util_generate_ppn(jdata, buffer))) { ORTE_ERROR_LOG(rc); - free(nidmap); return rc; } - free(nidmap); } /* get any application prep info */ @@ -472,7 +449,6 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, orte_proc_t *pptr, *dmn; orte_app_context_t *app; int8_t flag; - char *ppn; opal_value_t *kv; opal_list_t local_support, cache; opal_pmix_lock_t lock; @@ -610,29 +586,21 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, * and sent us the complete array of procs in the orte_job_t, so we * don't need to do anything more here */ if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { - /* extract the ppn regex */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &ppn, &cnt, OPAL_STRING))) { + /* load the ppn info into the job and node arrays - the + * function will ignore the data on the HNP as it already + * has the info */ + if (ORTE_SUCCESS != (rc = orte_util_decode_ppn(jdata, buffer))) { ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } if (!ORTE_PROC_IS_HNP) { - /* populate the node array of the job map and the proc array of - * the job object so we know how many procs are on each node */ - if (ORTE_SUCCESS != (rc = orte_regx.parse_ppn(jdata, ppn))) { - ORTE_ERROR_LOG(rc); - free(ppn); - goto REPORT_ERROR; - } /* now assign locations to the procs */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) { ORTE_ERROR_LOG(rc); - free(ppn); goto REPORT_ERROR; } } - free(ppn); /* compute the ranks and add the proc objects * to the jdata->procs array */ diff --git a/orte/mca/odls/odls_types.h b/orte/mca/odls/odls_types.h index 539f9a6ef5e..aabbb34b3ba 100644 --- a/orte/mca/odls/odls_types.h +++ b/orte/mca/odls/odls_types.h @@ -12,7 +12,7 @@ * Copyright (c) 2011-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -93,6 +93,9 @@ typedef uint8_t orte_daemon_cmd_flag_t; /* tell DVM daemons to cleanup resources from job */ #define ORTE_DAEMON_DVM_CLEANUP_JOB_CMD (orte_daemon_cmd_flag_t) 34 +/* pass node info */ +#define ORTE_DAEMON_PASS_NODE_INFO_CMD (orte_daemon_cmd_flag_t) 35 + /* * Struct written up the pipe from the child to the parent. */ diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 5c851da5bf5..52a02cddf4c 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -13,7 +13,7 @@ * Copyright (c) 2009 Institut National de Recherche en Informatique * et Automatique. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -41,10 +41,12 @@ #include "opal/util/opal_environ.h" #include "opal/class/opal_pointer_array.h" #include "opal/dss/dss.h" +#include "opal/mca/compress/compress.h" #include "opal/mca/hwloc/hwloc-internal.h" #include "opal/mca/pmix/pmix.h" #include "orte/util/dash_host/dash_host.h" +#include "orte/util/nidmap.h" #include "orte/util/session_dir.h" #include "orte/util/show_help.h" #include "orte/mca/errmgr/errmgr.h" @@ -52,7 +54,6 @@ #include "orte/mca/iof/base/base.h" #include "orte/mca/odls/base/base.h" #include "orte/mca/ras/base/base.h" -#include "orte/mca/regx/regx.h" #include "orte/mca/rmaps/rmaps.h" #include "orte/mca/rmaps/base/base.h" #include "orte/mca/rml/rml.h" @@ -71,7 +72,6 @@ #include "orte/runtime/runtime.h" #include "orte/runtime/orte_locks.h" #include "orte/runtime/orte_quit.h" -#include "orte/util/compress.h" #include "orte/util/name_fns.h" #include "orte/util/pre_condition_transports.h" #include "orte/util/proc_info.h" @@ -129,7 +129,11 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata) orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; orte_topology_t *t; orte_node_t *node; - int i; + int i, rc; + uint8_t u8; + opal_buffer_t buf; + orte_grpcomm_signature_t *sig; + orte_daemon_cmd_flag_t command = ORTE_DAEMON_PASS_NODE_INFO_CMD; ORTE_ACQUIRE_OBJECT(caddy); @@ -176,6 +180,78 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata) /* ensure we update the routing plan */ orte_routed.update_routing_plan(NULL); + /* prep the buffer */ + OBJ_CONSTRUCT(&buf, opal_buffer_t); + /* load the command */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &command, 1, ORTE_DAEMON_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&buf); + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + OBJ_RELEASE(caddy); + return; + } + + + /* if we did not execute a tree-spawn, then the daemons do + * not currently have a nidmap for the job - in that case, + * send one to them */ + if (!orte_nidmap_communicated) { + u8 = 1; + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &u8, 1, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&buf); + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + OBJ_RELEASE(caddy); + return; + } + if (OPAL_SUCCESS != (rc = orte_util_nidmap_create(orte_node_pool, &buf))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&buf); + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + OBJ_RELEASE(caddy); + return; + } + orte_nidmap_communicated = true; + } else { + u8 = 0; + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &u8, 1, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&buf); + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + OBJ_RELEASE(caddy); + return; + } + } + + /* we always send the topologies and the #slots on each node. Note + * that we cannot send the #slots until after the above step since, + * for unmanaged allocations, we might have just determined it! */ + if (OPAL_SUCCESS != (rc = orte_util_pass_node_info(&buf))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&buf); + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + OBJ_RELEASE(caddy); + return; + } + + /* goes to all daemons */ + sig = OBJ_NEW(orte_grpcomm_signature_t); + sig->signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); + sig->signature[0].jobid = ORTE_PROC_MY_NAME->jobid; + sig->signature[0].vpid = ORTE_VPID_WILDCARD; + sig->sz = 1; + if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(sig, ORTE_RML_TAG_DAEMON, &buf))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(sig); + OBJ_DESTRUCT(&buf); + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + OBJ_RELEASE(caddy); + return; + } + OBJ_DESTRUCT(&buf); + /* maintain accounting */ + OBJ_RELEASE(sig); + /* progress the job */ caddy->jdata->state = ORTE_JOB_STATE_DAEMONS_REPORTED; ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_VM_READY); @@ -579,9 +655,9 @@ void orte_plm_base_send_launch_msg(int fd, short args, void *cbdata) uint8_t *cmpdata; size_t cmplen; /* report the size of the launch message */ - compressed = orte_util_compress_block((uint8_t*)jdata->launch_msg.base_ptr, - jdata->launch_msg.bytes_used, - &cmpdata, &cmplen); + compressed = opal_compress.compress_block((uint8_t*)jdata->launch_msg.base_ptr, + jdata->launch_msg.bytes_used, + &cmpdata, &cmplen); if (compressed) { opal_output(0, "LAUNCH MSG RAW SIZE: %d COMPRESSED SIZE: %d", (int)jdata->launch_msg.bytes_used, (int)cmplen); @@ -856,8 +932,8 @@ void orte_plm_base_daemon_topology(int status, orte_process_name_t* sender, goto CLEANUP; } /* decompress the data */ - if (orte_util_uncompress_block(&cmpdata, cmplen, - packed_data, inlen)) { + if (opal_compress.decompress_block(&cmpdata, cmplen, + packed_data, inlen)) { /* the data has been uncompressed */ opal_dss.load(&datbuf, cmpdata, cmplen); data = &datbuf; @@ -1183,8 +1259,8 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, goto CLEANUP; } /* decompress the data */ - if (orte_util_uncompress_block(&cmpdata, cmplen, - packed_data, inlen)) { + if (opal_compress.decompress_block(&cmpdata, cmplen, + packed_data, inlen)) { /* the data has been uncompressed */ opal_dss.load(&datbuf, cmpdata, cmplen); data = &datbuf; @@ -1514,45 +1590,10 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv, opal_argv_append(argc, argv, param); free(param); - /* convert the nodes with daemons to a regex */ - param = NULL; - if (ORTE_SUCCESS != (rc = orte_regx.nidmap_create(orte_node_pool, ¶m))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (NULL != orte_node_regex) { - free(orte_node_regex); - } - orte_node_regex = param; - /* if this is too long, then we'll have to do it with - * a phone home operation instead */ - if (strlen(param) < orte_plm_globals.node_regex_threshold) { - opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); - opal_argv_append(argc, argv, "orte_node_regex"); - opal_argv_append(argc, argv, orte_node_regex); - /* mark that the nidmap has been communicated */ - orte_nidmap_communicated = true; - } - - if (!orte_static_ports && !orte_fwd_mpirun_port) { - /* if we are using static ports, or we are forwarding - * mpirun's port, then we would have built all the - * connection info and so there is nothing to be passed. - * Otherwise, we have to pass the HNP uri so we can - * phone home */ - opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); - opal_argv_append(argc, argv, "orte_hnp_uri"); - opal_argv_append(argc, argv, orte_process_info.my_hnp_uri); - } - - /* if requested, pass our port */ - if (orte_fwd_mpirun_port) { - asprintf(¶m, "%d", orte_process_info.my_port); - opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); - opal_argv_append(argc, argv, "oob_tcp_static_ipv4_ports"); - opal_argv_append(argc, argv, param); - free(param); - } + /* pass the HNP uri */ + opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); + opal_argv_append(argc, argv, "orte_hnp_uri"); + opal_argv_append(argc, argv, orte_process_info.my_hnp_uri); /* if --xterm was specified, pass that along */ if (NULL != orte_xterm) { @@ -2135,7 +2176,7 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata) opal_list_remove_item(&nodes, item); OBJ_RELEASE(item); } else { - /* The filtering logic sets this flag only for nodes which + /* The filtering logic sets this flag only for nodes which * are kept after filtering. This flag will be subsequently * used in rmaps components and must be reset here */ ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); diff --git a/orte/mca/regx/Makefile.am b/orte/mca/regx/Makefile.am deleted file mode 100644 index 8248d9f4647..00000000000 --- a/orte/mca/regx/Makefile.am +++ /dev/null @@ -1,30 +0,0 @@ -# -# Copyright (c) 2015-2018 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# main library setup -noinst_LTLIBRARIES = libmca_regx.la -libmca_regx_la_SOURCES = - -# pkgdata setup -dist_ortedata_DATA = - -# local files -headers = regx.h -libmca_regx_la_SOURCES += $(headers) - -# Conditionally install the header files -if WANT_INSTALL_HEADERS -ortedir = $(orteincludedir)/$(subdir) -nobase_orte_HEADERS = $(headers) -endif - -include base/Makefile.am - -distclean-local: - rm -f base/static-components.h diff --git a/orte/mca/regx/base/Makefile.am b/orte/mca/regx/base/Makefile.am deleted file mode 100644 index cee4dd7cebc..00000000000 --- a/orte/mca/regx/base/Makefile.am +++ /dev/null @@ -1,18 +0,0 @@ -# -# Copyright (c) 2015-2018 Intel, Inc. All rights reserved. -# Copyright (c) 2018 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -headers += \ - base/base.h - -libmca_regx_la_SOURCES += \ - base/regx_base_default_fns.c \ - base/regx_base_frame.c \ - base/regx_base_select.c diff --git a/orte/mca/regx/base/base.h b/orte/mca/regx/base/base.h deleted file mode 100644 index a1d34e67c77..00000000000 --- a/orte/mca/regx/base/base.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * regx framework base functionality. - */ - -#ifndef ORTE_MCA_REGX_BASE_H -#define ORTE_MCA_REGX_BASE_H - -/* - * includes - */ -#include "orte_config.h" -#include "orte/types.h" - -#include "opal/class/opal_list.h" -#include "orte/mca/mca.h" - -#include "orte/runtime/orte_globals.h" - -#include "orte/mca/regx/regx.h" - -BEGIN_C_DECLS - -/* - * MCA Framework - */ -ORTE_DECLSPEC extern mca_base_framework_t orte_regx_base_framework; -/* select all components */ -ORTE_DECLSPEC int orte_regx_base_select(void); - -/* - * common stuff - */ -typedef struct { - opal_list_item_t super; - int vpid; - int cnt; - int slots; - orte_topology_t *t; -} orte_regex_range_t; - -OBJ_CLASS_DECLARATION(orte_regex_range_t); - -typedef struct { - /* list object */ - opal_list_item_t super; - char *prefix; - char *suffix; - int num_digits; - opal_list_t ranges; -} orte_regex_node_t; -END_C_DECLS - -OBJ_CLASS_DECLARATION(orte_regex_node_t); - -ORTE_DECLSPEC extern int orte_regx_base_nidmap_parse(char *regex); - -ORTE_DECLSPEC extern int orte_regx_base_encode_nodemap(opal_buffer_t *buffer); - -ORTE_DECLSPEC int orte_regx_base_decode_daemon_nodemap(opal_buffer_t *buffer); - -ORTE_DECLSPEC int orte_regx_base_generate_ppn(orte_job_t *jdata, char **ppn); - -ORTE_DECLSPEC int orte_regx_base_parse_ppn(orte_job_t *jdata, char *regex); - -ORTE_DECLSPEC int orte_regx_base_extract_node_names(char *regexp, char ***names); -#endif diff --git a/orte/mca/regx/base/owner.txt b/orte/mca/regx/base/owner.txt deleted file mode 100644 index 85b4416d206..00000000000 --- a/orte/mca/regx/base/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: active diff --git a/orte/mca/regx/base/regx_base_default_fns.c b/orte/mca/regx/base/regx_base_default_fns.c deleted file mode 100644 index 6b70f78cad0..00000000000 --- a/orte/mca/regx/base/regx_base_default_fns.c +++ /dev/null @@ -1,1281 +0,0 @@ -/* - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include "orte_config.h" -#include "orte/types.h" -#include "opal/types.h" - -#ifdef HAVE_UNISTD_H -#include -#endif -#include - -#include "opal/util/argv.h" -#include "opal/util/basename.h" -#include "opal/util/opal_environ.h" - -#include "orte/runtime/orte_globals.h" -#include "orte/util/name_fns.h" -#include "orte/util/show_help.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rmaps/base/base.h" -#include "orte/mca/routed/routed.h" -#include "orte/mca/regx/base/base.h" - -static void range_construct(orte_regex_range_t *ptr) -{ - ptr->vpid = 0; - ptr->cnt = 0; -} -OBJ_CLASS_INSTANCE(orte_regex_range_t, - opal_list_item_t, - range_construct, NULL); - -static void orte_regex_node_construct(orte_regex_node_t *ptr) -{ - ptr->prefix = NULL; - ptr->suffix = NULL; - ptr->num_digits = 0; - OBJ_CONSTRUCT(&ptr->ranges, opal_list_t); -} - -static void orte_regex_node_destruct(orte_regex_node_t *ptr) -{ - opal_list_item_t *item; - - if (NULL != ptr->prefix) { - free(ptr->prefix); - } - if (NULL != ptr->suffix) { - free(ptr->suffix); - } - - while (NULL != (item = opal_list_remove_first(&ptr->ranges))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&ptr->ranges); -} - -OBJ_CLASS_INSTANCE(orte_regex_node_t, - opal_list_item_t, - orte_regex_node_construct, - orte_regex_node_destruct); - -int orte_regx_base_nidmap_parse(char *regex) -{ - char *nodelist, *vpids, *ptr; - char **nodes, **dvpids; - int rc, n, cnt; - orte_regex_range_t *rng; - opal_list_t dids; - orte_job_t *daemons; - orte_node_t *nd; - orte_proc_t *proc; - - /* if we are the HNP, we don't need to parse this */ - if (ORTE_PROC_IS_HNP) { - return ORTE_SUCCESS; - } - - /* split the regex into its node and vpid parts */ - nodelist = regex; - vpids = strchr(regex, '@'); - if (NULL == vpids) { - /* indicates the regex got mangled somewhere */ - return ORTE_ERR_BAD_PARAM; - } - *vpids = '\0'; // terminate the nodelist string - ++vpids; // step over the separator - if (NULL == vpids || '\0' == *vpids) { - /* indicates the regex got mangled somewhere */ - return ORTE_ERR_BAD_PARAM; - } - - /* decompress the nodes regex */ - nodes = NULL; - if (ORTE_SUCCESS != (rc = orte_regx.extract_node_names(nodelist, &nodes))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (NULL == nodes) { - /* should not happen */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - - /* decompress the vpids */ - OBJ_CONSTRUCT(&dids, opal_list_t); - dvpids = opal_argv_split(vpids, ','); - for (n=0; NULL != dvpids[n]; n++) { - rng = OBJ_NEW(orte_regex_range_t); - opal_list_append(&dids, &rng->super); - /* check for a count */ - if (NULL != (ptr = strchr(dvpids[n], '('))) { - dvpids[n][strlen(dvpids[n])-1] = '\0'; // remove trailing paren - *ptr = '\0'; - ++ptr; - rng->cnt = strtoul(ptr, NULL, 10); - } else { - rng->cnt = 1; - } - /* convert the number */ - rng->vpid = strtoul(dvpids[n], NULL, 10); - } - opal_argv_free(dvpids); - - /* get the daemon job object */ - daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); - - /* create the node pool array - this will include - * _all_ nodes known to the allocation */ - rng = (orte_regex_range_t*)opal_list_get_first(&dids); - cnt = 0; - for (n=0; NULL != nodes[n]; n++) { - nd = OBJ_NEW(orte_node_t); - nd->name = nodes[n]; - opal_pointer_array_set_item(orte_node_pool, n, nd); - /* see if it has a daemon on it */ - if (-1 != rng->vpid) { - /* we have a daemon, so let's create the tracker for it */ - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, rng->vpid+cnt))) { - proc = OBJ_NEW(orte_proc_t); - proc->name.jobid = ORTE_PROC_MY_NAME->jobid; - proc->name.vpid = rng->vpid + cnt; - proc->state = ORTE_PROC_STATE_RUNNING; - ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_ALIVE); - daemons->num_procs++; - opal_pointer_array_set_item(daemons->procs, proc->name.vpid, proc); - } - nd->index = proc->name.vpid; - OBJ_RETAIN(nd); - proc->node = nd; - OBJ_RETAIN(proc); - nd->daemon = proc; - } - ++cnt; - if (rng->cnt <= cnt) { - rng = (orte_regex_range_t*)opal_list_get_next(&rng->super); - if (NULL == rng) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - cnt = 0; - } - } - - /* update num procs */ - if (orte_process_info.num_procs != daemons->num_procs) { - orte_process_info.num_procs = daemons->num_procs; - /* need to update the routing plan */ - orte_routed.update_routing_plan(NULL); - } - - if (orte_process_info.max_procs < orte_process_info.num_procs) { - orte_process_info.max_procs = orte_process_info.num_procs; - } - - if (0 < opal_output_get_verbosity(orte_regx_base_framework.framework_output)) { - int i; - for (i=0; i < orte_node_pool->size; i++) { - if (NULL == (nd = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { - continue; - } - opal_output(0, "%s node[%d].name %s daemon %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i, - (NULL == nd->name) ? "NULL" : nd->name, - (NULL == nd->daemon) ? "NONE" : ORTE_VPID_PRINT(nd->daemon->name.vpid)); - } - } - - return ORTE_SUCCESS; -} - -int orte_regx_base_encode_nodemap(opal_buffer_t *buffer) -{ - int n; - bool test; - orte_regex_range_t *rng, *slt, *tp, *flg; - opal_list_t slots, topos, flags; - opal_list_item_t *item; - char *tmp, *tmp2; - orte_node_t *nptr; - int rc; - uint8_t ui8; - orte_topology_t *ortetopo; - - /* setup the list of results */ - OBJ_CONSTRUCT(&slots, opal_list_t); - OBJ_CONSTRUCT(&topos, opal_list_t); - OBJ_CONSTRUCT(&flags, opal_list_t); - - slt = NULL; - tp = NULL; - flg = NULL; - - /* pack a flag indicating if the HNP was included in the allocation */ - if (orte_hnp_is_allocated) { - ui8 = 1; - } else { - ui8 = 0; - } - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &ui8, 1, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack a flag indicating if we are in a managed allocation */ - if (orte_managed_allocation) { - ui8 = 1; - } else { - ui8 = 0; - } - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &ui8, 1, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* handle the topologies - as the most common case by far - * is to have homogeneous topologies, we only send them - * if something is different. We know that the HNP is - * the first topology, and that any differing topology - * on the compute nodes must follow. So send the topologies - * if and only if: - * - * (a) the HNP is being used to house application procs and - * there is more than one topology on our list; or - * - * (b) the HNP is not being used, but there are more than - * two topologies on our list, thus indicating that - * there are multiple topologies on the compute nodes - */ - nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0); - if (!orte_hnp_is_allocated || (ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) & ORTE_MAPPING_NO_USE_LOCAL)) { - /* assign a NULL topology so we still account for our presence, - * but don't cause us to send topology info when not needed */ - tp = OBJ_NEW(orte_regex_range_t); - tp->t = NULL; - tp->cnt = 1; - } else { - /* there is always one topology - our own - so start with it */ - tp = OBJ_NEW(orte_regex_range_t); - tp->t = nptr->topology; - tp->cnt = 1; - } - opal_list_append(&topos, &tp->super); - - opal_output_verbose(5, orte_regx_base_framework.framework_output, - "%s STARTING WITH TOPOLOGY FOR NODE %s: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - nptr->name, (NULL == tp->t) ? "NULL" : tp->t->sig); - - /* likewise, we have slots */ - slt = OBJ_NEW(orte_regex_range_t); - slt->slots = nptr->slots; - slt->cnt = 1; - opal_list_append(&slots, &slt->super); - - /* and flags */ - flg = OBJ_NEW(orte_regex_range_t); - if (ORTE_FLAG_TEST(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN)) { - flg->slots = 1; - } else { - flg->slots = 0; - } - flg->cnt = 1; - opal_list_append(&flags, &flg->super); - - for (n=1; n < orte_node_pool->size; n++) { - if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { - continue; - } - /* check the #slots */ - /* is this the next in line */ - if (nptr->slots == slt->slots) { - slt->cnt++; - } else { - /* need to start another range */ - slt = OBJ_NEW(orte_regex_range_t); - slt->slots = nptr->slots; - slt->cnt = 1; - opal_list_append(&slots, &slt->super); - } - /* check the topologies */ - if (NULL != tp->t && NULL == nptr->topology) { - /* we don't know this topology, likely because - * we don't have a daemon on the node */ - tp = OBJ_NEW(orte_regex_range_t); - tp->t = NULL; - tp->cnt = 1; - opal_output_verbose(5, orte_regx_base_framework.framework_output, - "%s ADD TOPOLOGY FOR NODE %s: NULL", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nptr->name); - opal_list_append(&topos, &tp->super); - } else { - /* is this the next in line */ - if (tp->t == nptr->topology) { - tp->cnt++; - opal_output_verbose(5, orte_regx_base_framework.framework_output, - "%s CONTINUE TOPOLOGY RANGE (%d) WITH NODE %s: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - tp->cnt, nptr->name, - (NULL == tp->t) ? "N/A" : tp->t->sig); - } else { - /* need to start another range */ - tp = OBJ_NEW(orte_regex_range_t); - tp->t = nptr->topology; - tp->cnt = 1; - opal_output_verbose(5, orte_regx_base_framework.framework_output, - "%s STARTING NEW TOPOLOGY RANGE WITH NODE %s: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - nptr->name, tp->t->sig); - opal_list_append(&topos, &tp->super); - } - } - /* check the flags */ - test = ORTE_FLAG_TEST(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN); - /* is this the next in line */ - if ((test && 1 == flg->slots) || - (!test && 0 == flg->slots)) { - flg->cnt++; - } else { - /* need to start another range */ - flg = OBJ_NEW(orte_regex_range_t); - if (test) { - flg->slots = 1; - } else { - flg->slots = 0; - } - flg->cnt = 1; - opal_list_append(&flags, &flg->super); - } - } - - /* pass #slots on each node */ - tmp = NULL; - while (NULL != (item = opal_list_remove_first(&slots))) { - rng = (orte_regex_range_t*)item; - if (NULL == tmp) { - asprintf(&tmp, "%d[%d]", rng->cnt, rng->slots); - } else { - asprintf(&tmp2, "%s,%d[%d]", tmp, rng->cnt, rng->slots); - free(tmp); - tmp = tmp2; - } - OBJ_RELEASE(rng); - } - OPAL_LIST_DESTRUCT(&slots); - opal_output_verbose(1, orte_regx_base_framework.framework_output, - "%s SLOT ASSIGNMENTS: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp); - /* pack the string */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (NULL != tmp) { - free(tmp); - } - - /* do the same to pass the flags for each node */ - tmp = NULL; - while (NULL != (item = opal_list_remove_first(&flags))) { - rng = (orte_regex_range_t*)item; - if (NULL == tmp) { - asprintf(&tmp, "%d[%d]", rng->cnt, rng->slots); - } else { - asprintf(&tmp2, "%s,%d[%d]", tmp, rng->cnt, rng->slots); - free(tmp); - tmp = tmp2; - } - OBJ_RELEASE(rng); - } - OPAL_LIST_DESTRUCT(&flags); - - /* pack the string */ - opal_output_verbose(1, orte_regx_base_framework.framework_output, - "%s FLAG ASSIGNMENTS: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp); - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (NULL != tmp) { - free(tmp); - } - - /* don't try to be cute - there aren't going to be that many - * topologies, so just scan the list and see if they are the - * same, excluding any NULL values */ - ortetopo = NULL; - test = false; - OPAL_LIST_FOREACH(rng, &topos, orte_regex_range_t) { - if (NULL == rng->t) { - continue; - } - if (NULL == ortetopo) { - ortetopo = rng->t; - } else if (0 != strcmp(ortetopo->sig, rng->t->sig)) { - /* we have a difference, so send them */ - test = true; - } - } - tmp = NULL; - if (test) { - opal_buffer_t bucket, *bptr; - OBJ_CONSTRUCT(&bucket, opal_buffer_t); - while (NULL != (item = opal_list_remove_first(&topos))) { - rng = (orte_regex_range_t*)item; - opal_output_verbose(5, orte_regx_base_framework.framework_output, - "%s PASSING TOPOLOGY %s RANGE %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (NULL == rng->t) ? "NULL" : rng->t->sig, rng->cnt); - if (NULL == tmp) { - asprintf(&tmp, "%d", rng->cnt); - } else { - asprintf(&tmp2, "%s,%d", tmp, rng->cnt); - free(tmp); - tmp = tmp2; - } - if (NULL == rng->t) { - /* need to account for NULL topology */ - opal_output_verbose(1, orte_regx_base_framework.framework_output, - "%s PACKING NULL TOPOLOGY", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - tmp2 = NULL; - if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &tmp2, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(rng); - OPAL_LIST_DESTRUCT(&topos); - OBJ_DESTRUCT(&bucket); - free(tmp); - return rc; - } - } else { - opal_output_verbose(1, orte_regx_base_framework.framework_output, - "%s PACKING TOPOLOGY: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), rng->t->sig); - /* pack this topology string */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &rng->t->sig, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(rng); - OPAL_LIST_DESTRUCT(&topos); - OBJ_DESTRUCT(&bucket); - free(tmp); - return rc; - } - /* pack the topology itself */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &rng->t->topo, 1, OPAL_HWLOC_TOPO))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(rng); - OPAL_LIST_DESTRUCT(&topos); - OBJ_DESTRUCT(&bucket); - free(tmp); - return rc; - } - } - OBJ_RELEASE(rng); - } - OPAL_LIST_DESTRUCT(&topos); - /* pack the string */ - opal_output_verbose(1, orte_regx_base_framework.framework_output, - "%s TOPOLOGY ASSIGNMENTS: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp); - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&bucket); - free(tmp); - return rc; - } - free(tmp); - - /* now pack the topologies */ - bptr = &bucket; - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &bptr, 1, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&bucket); - return rc; - } - OBJ_DESTRUCT(&bucket); - } else { - opal_output_verbose(1, orte_regx_base_framework.framework_output, - "%s NOT PASSING TOPOLOGIES", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - /* need to pack the NULL just to terminate the region */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - return ORTE_SUCCESS; -} - -int orte_regx_base_decode_daemon_nodemap(opal_buffer_t *buffer) -{ - int n, nn, rc, cnt, offset; - orte_node_t *node; - char *slots=NULL, *topos=NULL, *flags=NULL; - char *rmndr, **tmp; - opal_list_t slts, flgs;; - opal_buffer_t *bptr=NULL; - orte_topology_t *t2; - orte_regex_range_t *rng, *srng, *frng; - uint8_t ui8; - - OBJ_CONSTRUCT(&slts, opal_list_t); - OBJ_CONSTRUCT(&flgs, opal_list_t); - - /* unpack the flag indicating if the HNP was allocated */ - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ui8, &n, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - if (0 == ui8) { - orte_hnp_is_allocated = false; - } else { - orte_hnp_is_allocated = true; - } - - /* unpack the flag indicating we are in a managed allocation */ - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ui8, &n, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - if (0 == ui8) { - orte_managed_allocation = false; - } else { - orte_managed_allocation = true; - } - - /* unpack the slots regex */ - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &slots, &n, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - /* this is not allowed to be NULL */ - if (NULL == slots) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - rc = ORTE_ERR_BAD_PARAM; - goto cleanup; - } - - /* unpack the flags regex */ - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &flags, &n, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - /* this is not allowed to be NULL */ - if (NULL == flags) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - rc = ORTE_ERR_BAD_PARAM; - goto cleanup; - } - - /* unpack the topos regex - this may not have been - * provided (e.g., for a homogeneous machine) */ - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &topos, &n, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - if (NULL != topos) { - /* need to unpack the topologies */ - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &bptr, &n, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - } - - /* if we are the HNP, then we just discard these strings as we already - * have a complete picture - but we needed to unpack them in order to - * maintain sync in the unpacking order */ - if (ORTE_PROC_IS_HNP) { - rc = ORTE_SUCCESS; - goto cleanup; - } - - /* decompress the slots */ - tmp = opal_argv_split(slots, ','); - for (n=0; NULL != tmp[n]; n++) { - rng = OBJ_NEW(orte_regex_range_t); - opal_list_append(&slts, &rng->super); - /* find the '[' as that delimits the value */ - rmndr = strchr(tmp[n], '['); - if (NULL == rmndr) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - rc = ORTE_ERR_BAD_PARAM; - opal_argv_free(tmp); - goto cleanup; - } - *rmndr = '\0'; - ++rmndr; - /* convert that number as this is the number of - * slots for this range */ - rng->slots = strtoul(rmndr, NULL, 10); - /* convert the initial number as that is the cnt */ - rng->cnt = strtoul(tmp[n], NULL, 10); - } - opal_argv_free(tmp); - - /* decompress the flags */ - tmp = opal_argv_split(flags, ','); - for (n=0; NULL != tmp[n]; n++) { - rng = OBJ_NEW(orte_regex_range_t); - opal_list_append(&flgs, &rng->super); - /* find the '[' as that delimits the value */ - rmndr = strchr(tmp[n], '['); - if (NULL == rmndr) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - opal_argv_free(tmp); - rc = ORTE_ERR_BAD_PARAM; - goto cleanup; - } - *rmndr = '\0'; - ++rmndr; - /* check the value - it is just one character */ - if ('1' == *rmndr) { - rng->slots = 1; - } else { - rng->slots = 0; - } - /* convert the initial number as that is the cnt */ - rng->cnt = strtoul(tmp[n], NULL, 10); - } - opal_argv_free(tmp); - free(flags); - - /* update the node array */ - srng = (orte_regex_range_t*)opal_list_get_first(&slts); - frng = (orte_regex_range_t*)opal_list_get_first(&flgs); - for (n=0; n < orte_node_pool->size; n++) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { - continue; - } - /* set the number of slots */ - node->slots = srng->slots; - srng->cnt--; - if (0 == srng->cnt) { - srng = (orte_regex_range_t*)opal_list_get_next(&srng->super); - } - /* set the flags */ - if (0 == frng->slots) { - ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_SLOTS_GIVEN); - } else { - ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN); - } - frng->cnt--; - if (0 == frng->cnt) { - frng = (orte_regex_range_t*)opal_list_get_next(&frng->super); - } - } - - /* if no topology info was passed, then everyone shares our topology */ - if (NULL == bptr) { - /* our topology is first in the array */ - t2 = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, 0); - opal_output_verbose(1, orte_regx_base_framework.framework_output, - "%s ASSIGNING ALL TOPOLOGIES TO: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), t2->sig); - for (n=0; n < orte_node_pool->size; n++) { - if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { - if (NULL == node->topology) { - OBJ_RETAIN(t2); - node->topology = t2; - } - } - } - } else { - char *sig; - hwloc_topology_t topo; - /* decompress the topology regex */ - tmp = opal_argv_split(topos, ','); - /* there must be a topology definition for each range */ - offset = 0; - for (nn=0; NULL != tmp[nn]; nn++) { - cnt = strtoul(tmp[nn], NULL, 10); - /* unpack the signature */ - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(bptr, &sig, &n, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - opal_argv_free(tmp); - OBJ_RELEASE(bptr); - goto cleanup; - } - if (NULL == sig) { - /* the nodes in this range have not reported a topology, - * so skip them */ - offset += cnt; - continue; - } - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(bptr, &topo, &n, OPAL_HWLOC_TOPO))) { - ORTE_ERROR_LOG(rc); - opal_argv_free(tmp); - OBJ_RELEASE(bptr); - free(sig); - goto cleanup; - } - /* see if we already have this topology - could be an update */ - t2 = NULL; - for (n=0; n < orte_node_topologies->size; n++) { - if (NULL == (t2 = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, n))) { - continue; - } - if (0 == strcmp(t2->sig, sig)) { - /* found a match */ - free(sig); - opal_hwloc_base_free_topology(topo); - sig = NULL; - break; - } - } - if (NULL != sig || NULL == t2) { - /* new topology - record it */ - t2 = OBJ_NEW(orte_topology_t); - t2->sig = sig; - t2->topo = topo; - opal_pointer_array_add(orte_node_topologies, t2); - } - /* point each of the nodes in this range to this topology */ - n=0; - while (n < cnt && (n+offset) < orte_node_pool->size) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n+offset))) { - continue; - } - opal_output_verbose(1, orte_regx_base_framework.framework_output, - "%s ASSIGNING NODE %s WITH TOPO: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - node->name, t2->sig); - if (NULL == node->topology) { - OBJ_RETAIN(t2); - node->topology = t2; - } - ++n; - } - offset += cnt; - } - OBJ_RELEASE(bptr); - opal_argv_free(tmp); - } - - cleanup: - OPAL_LIST_DESTRUCT(&slts); - OPAL_LIST_DESTRUCT(&flgs); - return rc; -} - -int orte_regx_base_generate_ppn(orte_job_t *jdata, char **ppn) -{ - orte_nidmap_regex_t *prng, **actives; - opal_list_t *prk; - orte_node_t *nptr; - orte_proc_t *proc; - size_t n; - int *cnt, i, k; - char *tmp2, *ptmp, **cache = NULL; - - /* create an array of lists to handle the number of app_contexts in this job */ - prk = (opal_list_t*)malloc(jdata->num_apps * sizeof(opal_list_t)); - cnt = (int*)malloc(jdata->num_apps * sizeof(int)); - actives = (orte_nidmap_regex_t**)malloc(jdata->num_apps * sizeof(orte_nidmap_regex_t*)); - for (n=0; n < jdata->num_apps; n++) { - OBJ_CONSTRUCT(&prk[n], opal_list_t); - actives[n] = NULL; - } - - /* we provide a complete map in the regex, with an entry for every - * node in the pool */ - for (i=0; i < orte_node_pool->size; i++) { - if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { - continue; - } - /* if a daemon has been assigned, then count how many procs - * for each app_context from the specified job are assigned to this node */ - memset(cnt, 0, jdata->num_apps * sizeof(int)); - if (NULL != nptr->daemon) { - for (k=0; k < nptr->procs->size; k++) { - if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(nptr->procs, k))) { - if (proc->name.jobid == jdata->jobid) { - ++cnt[proc->app_idx]; - } - } - } - } - /* track the #procs on this node */ - for (n=0; n < jdata->num_apps; n++) { - if (NULL == actives[n]) { - /* just starting */ - actives[n] = OBJ_NEW(orte_nidmap_regex_t); - actives[n]->nprocs = cnt[n]; - actives[n]->cnt = 1; - opal_list_append(&prk[n], &actives[n]->super); - } else { - /* is this the next in line */ - if (cnt[n] == actives[n]->nprocs) { - actives[n]->cnt++; - } else { - /* need to start another range */ - actives[n] = OBJ_NEW(orte_nidmap_regex_t); - actives[n]->nprocs = cnt[n]; - actives[n]->cnt = 1; - opal_list_append(&prk[n], &actives[n]->super); - } - } - } - } - - /* construct the regex from the found ranges for each app_context */ - ptmp = NULL; - for (n=0; n < jdata->num_apps; n++) { - OPAL_LIST_FOREACH(prng, &prk[n], orte_nidmap_regex_t) { - if (1 < prng->cnt) { - if (NULL == ptmp) { - asprintf(&ptmp, "%u(%u)", prng->nprocs, prng->cnt); - } else { - asprintf(&tmp2, "%s,%u(%u)", ptmp, prng->nprocs, prng->cnt); - free(ptmp); - ptmp = tmp2; - } - } else { - if (NULL == ptmp) { - asprintf(&ptmp, "%u", prng->nprocs); - } else { - asprintf(&tmp2, "%s,%u", ptmp, prng->nprocs); - free(ptmp); - ptmp = tmp2; - } - } - } - OPAL_LIST_DESTRUCT(&prk[n]); // releases all the actives objects - if (NULL != ptmp) { - opal_argv_append_nosize(&cache, ptmp); - free(ptmp); - ptmp = NULL; - } - } - free(prk); - free(cnt); - free(actives); - - *ppn = opal_argv_join(cache, '@'); - opal_argv_free(cache); - - return ORTE_SUCCESS; -} - -int orte_regx_base_parse_ppn(orte_job_t *jdata, char *regex) -{ - orte_node_t *node; - orte_proc_t *proc; - int n, k, m, cnt; - char **tmp, *ptr, **ppn; - orte_nidmap_regex_t *rng; - opal_list_t trk; - int rc = ORTE_SUCCESS; - - /* split the regex by app_context */ - tmp = opal_argv_split(regex, '@'); - - /* for each app_context, set the ppn */ - for (n=0; NULL != tmp[n]; n++) { - ppn = opal_argv_split(tmp[n], ','); - /* decompress the ppn */ - OBJ_CONSTRUCT(&trk, opal_list_t); - for (m=0; NULL != ppn[m]; m++) { - rng = OBJ_NEW(orte_nidmap_regex_t); - opal_list_append(&trk, &rng->super); - /* check for a count */ - if (NULL != (ptr = strchr(ppn[m], '('))) { - ppn[m][strlen(ppn[m])-1] = '\0'; // remove trailing paren - *ptr = '\0'; - ++ptr; - rng->cnt = strtoul(ptr, NULL, 10); - } else { - rng->cnt = 1; - } - /* convert the number */ - rng->nprocs = strtoul(ppn[m], NULL, 10); - } - opal_argv_free(ppn); - - /* cycle thru our node pool and add the indicated number of procs - * to each node */ - rng = (orte_nidmap_regex_t*)opal_list_get_first(&trk); - cnt = 0; - for (m=0; m < orte_node_pool->size; m++) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, m))) { - continue; - } - /* see if it has any procs for this job and app_context */ - if (0 < rng->nprocs) { - /* add this node to the job map if it isn't already there */ - if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { - OBJ_RETAIN(node); - ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); - opal_pointer_array_add(jdata->map->nodes, node); - } - /* create a proc object for each one */ - for (k=0; k < rng->nprocs; k++) { - proc = OBJ_NEW(orte_proc_t); - proc->name.jobid = jdata->jobid; - /* leave the vpid undefined as this will be determined - * later when we do the overall ranking */ - proc->app_idx = n; - proc->parent = node->daemon->name.vpid; - OBJ_RETAIN(node); - proc->node = node; - /* flag the proc as ready for launch */ - proc->state = ORTE_PROC_STATE_INIT; - opal_pointer_array_add(node->procs, proc); - /* we will add the proc to the jdata array when we - * compute its rank */ - } - node->num_procs += rng->nprocs; - } - ++cnt; - if (rng->cnt <= cnt) { - rng = (orte_nidmap_regex_t*)opal_list_get_next(&rng->super); - if (NULL == rng) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - opal_argv_free(tmp); - rc = ORTE_ERR_NOT_FOUND; - goto complete; - } - cnt = 0; - } - } - OPAL_LIST_DESTRUCT(&trk); - } - opal_argv_free(tmp); - - complete: - /* reset any node map flags we used so the next job will start clean */ - for (n=0; n < jdata->map->nodes->size; n++) { - if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, n))) { - ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); - } - } - - return rc; -} - - -static int regex_parse_node_range(char *base, char *range, int num_digits, char *suffix, char ***names); - -/* - * Parse one or more ranges in a set - * - * @param base The base text of the node name - * @param *ranges A pointer to a range. This can contain multiple ranges - * (i.e. "1-3,10" or "5" or "9,0100-0130,250") - * @param ***names An argv array to add the newly discovered nodes to - */ -static int regex_parse_node_ranges(char *base, char *ranges, int num_digits, char *suffix, char ***names) -{ - int i, len, ret; - char *start, *orig; - - /* Look for commas, the separator between ranges */ - - len = strlen(ranges); - for (orig = start = ranges, i = 0; i < len; ++i) { - if (',' == ranges[i]) { - ranges[i] = '\0'; - ret = regex_parse_node_range(base, start, num_digits, suffix, names); - if (ORTE_SUCCESS != ret) { - ORTE_ERROR_LOG(ret); - return ret; - } - start = ranges + i + 1; - } - } - - /* Pick up the last range, if it exists */ - - if (start < orig + len) { - - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, - "%s regex:parse:ranges: parse range %s (2)", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), start)); - - ret = regex_parse_node_range(base, start, num_digits, suffix, names); - if (ORTE_SUCCESS != ret) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - /* All done */ - return ORTE_SUCCESS; -} - - -/* - * Parse a single range in a set and add the full names of the nodes - * found to the names argv - * - * @param base The base text of the node name - * @param *ranges A pointer to a single range. (i.e. "1-3" or "5") - * @param ***names An argv array to add the newly discovered nodes to - */ -static int regex_parse_node_range(char *base, char *range, int num_digits, char *suffix, char ***names) -{ - char *str, tmp[132]; - size_t i, k, start, end; - size_t base_len, len; - bool found; - int ret; - - if (NULL == base || NULL == range) { - return ORTE_ERROR; - } - - len = strlen(range); - base_len = strlen(base); - /* Silence compiler warnings; start and end are always assigned - properly, below */ - start = end = 0; - - /* Look for the beginning of the first number */ - - for (found = false, i = 0; i < len; ++i) { - if (isdigit((int) range[i])) { - if (!found) { - start = atoi(range + i); - found = true; - break; - } - } - } - if (!found) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - - /* Look for the end of the first number */ - - for (found = false; i < len; ++i) { - if (!isdigit(range[i])) { - break; - } - } - - /* Was there no range, just a single number? */ - - if (i >= len) { - end = start; - found = true; - } else { - /* Nope, there was a range. Look for the beginning of the second - * number - */ - for (; i < len; ++i) { - if (isdigit(range[i])) { - end = strtol(range + i, NULL, 10); - found = true; - break; - } - } - } - if (!found) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - - /* Make strings for all values in the range */ - - len = base_len + num_digits + 32; - if (NULL != suffix) { - len += strlen(suffix); - } - str = (char *) malloc(len); - if (NULL == str) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - for (i = start; i <= end; ++i) { - memset(str, 0, len); - strcpy(str, base); - /* we need to zero-pad the digits */ - for (k=0; k < (size_t)num_digits; k++) { - str[k+base_len] = '0'; - } - memset(tmp, 0, 132); - snprintf(tmp, 132, "%lu", (unsigned long)i); - for (k=0; k < strlen(tmp); k++) { - str[base_len + num_digits - k - 1] = tmp[strlen(tmp)-k-1]; - } - /* if there is a suffix, add it */ - if (NULL != suffix) { - strcat(str, suffix); - } - ret = opal_argv_append_nosize(names, str); - if(ORTE_SUCCESS != ret) { - ORTE_ERROR_LOG(ret); - free(str); - return ret; - } - } - free(str); - - /* All done */ - return ORTE_SUCCESS; -} - -static int regex_parse_node_range(char *base, char *range, int num_digits, char *suffix, char ***names); - -int orte_regx_base_extract_node_names(char *regexp, char ***names) -{ - int i, j, k, len, ret; - char *base; - char *orig, *suffix; - bool found_range = false; - bool more_to_come = false; - int num_digits; - - if (NULL == regexp) { - *names = NULL; - return ORTE_SUCCESS; - } - - orig = base = strdup(regexp); - if (NULL == base) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, - "%s regex:extract:nodenames: checking nodelist: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - regexp)); - - do { - /* Find the base */ - len = strlen(base); - for (i = 0; i <= len; ++i) { - if (base[i] == '[') { - /* we found a range. this gets dealt with below */ - base[i] = '\0'; - found_range = true; - break; - } - if (base[i] == ',') { - /* we found a singleton node, and there are more to come */ - base[i] = '\0'; - found_range = false; - more_to_come = true; - break; - } - if (base[i] == '\0') { - /* we found a singleton node */ - found_range = false; - more_to_come = false; - break; - } - } - if (i == 0 && !found_range) { - /* we found a special character at the beginning of the string */ - orte_show_help("help-regex.txt", "regex:special-char", true, regexp); - free(orig); - return ORTE_ERR_BAD_PARAM; - } - - if (found_range) { - /* If we found a range, get the number of digits in the numbers */ - i++; /* step over the [ */ - for (j=i; j < len; j++) { - if (base[j] == ':') { - base[j] = '\0'; - break; - } - } - if (j >= len) { - /* we didn't find the number of digits */ - orte_show_help("help-regex.txt", "regex:num-digits-missing", true, regexp); - free(orig); - return ORTE_ERR_BAD_PARAM; - } - num_digits = strtol(&base[i], NULL, 10); - i = j + 1; /* step over the : */ - /* now find the end of the range */ - for (j = i; j < len; ++j) { - if (base[j] == ']') { - base[j] = '\0'; - break; - } - } - if (j >= len) { - /* we didn't find the end of the range */ - orte_show_help("help-regex.txt", "regex:end-range-missing", true, regexp); - free(orig); - return ORTE_ERR_BAD_PARAM; - } - /* check for a suffix */ - if (j+1 < len && base[j+1] != ',') { - /* find the next comma, if present */ - for (k=j+1; k < len && base[k] != ','; k++); - if (k < len) { - base[k] = '\0'; - } - suffix = strdup(&base[j+1]); - if (k < len) { - base[k] = ','; - } - j = k-1; - } else { - suffix = NULL; - } - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, - "%s regex:extract:nodenames: parsing range %s %s %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - base, base + i, suffix)); - - ret = regex_parse_node_ranges(base, base + i, num_digits, suffix, names); - if (NULL != suffix) { - free(suffix); - } - if (ORTE_SUCCESS != ret) { - orte_show_help("help-regex.txt", "regex:bad-value", true, regexp); - free(orig); - return ret; - } - if (j+1 < len && base[j + 1] == ',') { - more_to_come = true; - base = &base[j + 2]; - } else { - more_to_come = false; - } - } else { - /* If we didn't find a range, just add the node */ - if(ORTE_SUCCESS != (ret = opal_argv_append_nosize(names, base))) { - ORTE_ERROR_LOG(ret); - free(orig); - return ret; - } - /* step over the comma */ - i++; - /* set base equal to the (possible) next base to look at */ - base = &base[i]; - } - } while(more_to_come); - - free(orig); - - /* All done */ - return ret; -} diff --git a/orte/mca/regx/base/regx_base_frame.c b/orte/mca/regx/base/regx_base_frame.c deleted file mode 100644 index 61a8f0bf61e..00000000000 --- a/orte/mca/regx/base/regx_base_frame.c +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include - -#include "orte/mca/mca.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "opal/mca/base/base.h" - -#include "orte/runtime/orte_globals.h" -#include "orte/util/show_help.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/regx/base/base.h" -/* - * The following file was created by configure. It contains extern - * statements and the definition of an array of pointers to each - * component's public mca_base_component_t struct. - */ - -#include "orte/mca/regx/base/static-components.h" - -/* - * Global variables - */ -orte_regx_base_module_t orte_regx = {0}; - -static int orte_regx_base_close(void) -{ - /* give the selected module a chance to finalize */ - if (NULL != orte_regx.finalize) { - orte_regx.finalize(); - } - return mca_base_framework_components_close(&orte_regx_base_framework, NULL); -} - -/** - * Function for finding and opening either all MCA components, or the one - * that was specifically requested via a MCA parameter. - */ -static int orte_regx_base_open(mca_base_open_flag_t flags) -{ - int rc; - - /* Open up all available components */ - rc = mca_base_framework_components_open(&orte_regx_base_framework, flags); - - /* All done */ - return rc; -} - -MCA_BASE_FRAMEWORK_DECLARE(orte, regx, "ORTE Regx Subsystem", NULL, - orte_regx_base_open, orte_regx_base_close, - mca_regx_base_static_components, 0); - -/* OBJECT INSTANTIATIONS */ -static void nrcon(orte_nidmap_regex_t *p) -{ - p->ctx = 0; - p->nprocs = -1; - p->cnt = 0; -} -OBJ_CLASS_INSTANCE(orte_nidmap_regex_t, - opal_list_item_t, - nrcon, NULL); diff --git a/orte/mca/regx/base/regx_base_select.c b/orte/mca/regx/base/regx_base_select.c deleted file mode 100644 index 35cd36dc2a6..00000000000 --- a/orte/mca/regx/base/regx_base_select.c +++ /dev/null @@ -1,61 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2018 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/constants.h" - -#include "orte/mca/mca.h" -#include "opal/mca/base/base.h" - -#include "orte/mca/regx/base/base.h" - - -/** - * Function for selecting one component from all those that are - * available. - */ -int orte_regx_base_select(void) -{ - orte_regx_base_component_t *best_component = NULL; - orte_regx_base_module_t *best_module = NULL; - int rc = ORTE_SUCCESS; - - /* - * Select the best component - */ - if (OPAL_SUCCESS != mca_base_select("regx", orte_regx_base_framework.framework_output, - &orte_regx_base_framework.framework_components, - (mca_base_module_t **) &best_module, - (mca_base_component_t **) &best_component, NULL)) { - /* This will only happen if no component was selected */ - return ORTE_ERR_NOT_FOUND; - } - - /* Save the winner */ - orte_regx = *best_module; - /* give it a chance to init */ - if (NULL != orte_regx.init) { - rc = orte_regx.init(); - } - return rc; -} diff --git a/orte/mca/regx/fwd/Makefile.am b/orte/mca/regx/fwd/Makefile.am deleted file mode 100644 index 44cd769f236..00000000000 --- a/orte/mca/regx/fwd/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2016-2018 Intel, Inc. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - regx_fwd_component.c \ - regx_fwd.h \ - regx_fwd.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_regx_fwd_DSO -component_noinst = -component_install = mca_regx_fwd.la -else -component_noinst = libmca_regx_fwd.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_regx_fwd_la_SOURCES = $(sources) -mca_regx_fwd_la_LDFLAGS = -module -avoid-version -mca_regx_fwd_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_regx_fwd_la_SOURCES = $(sources) -libmca_regx_fwd_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/regx/fwd/owner.txt b/orte/mca/regx/fwd/owner.txt deleted file mode 100644 index 85b4416d206..00000000000 --- a/orte/mca/regx/fwd/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: active diff --git a/orte/mca/regx/fwd/regx_fwd.c b/orte/mca/regx/fwd/regx_fwd.c deleted file mode 100644 index c5c4d5d9030..00000000000 --- a/orte/mca/regx/fwd/regx_fwd.c +++ /dev/null @@ -1,300 +0,0 @@ -/* - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include "orte_config.h" -#include "orte/types.h" -#include "opal/types.h" - -#ifdef HAVE_UNISTD_H -#include -#endif -#include - -#include "opal/util/argv.h" -#include "opal/util/basename.h" -#include "opal/util/opal_environ.h" - -#include "orte/runtime/orte_globals.h" -#include "orte/util/name_fns.h" -#include "orte/util/show_help.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rmaps/base/base.h" -#include "orte/mca/routed/routed.h" -#include "orte/mca/regx/base/base.h" - -#include "regx_fwd.h" - -static int nidmap_create(opal_pointer_array_t *pool, char **regex); - -orte_regx_base_module_t orte_regx_fwd_module = { - .nidmap_create = nidmap_create, - .nidmap_parse = orte_regx_base_nidmap_parse, - .extract_node_names = orte_regx_base_extract_node_names, - .encode_nodemap = orte_regx_base_encode_nodemap, - .decode_daemon_nodemap = orte_regx_base_decode_daemon_nodemap, - .generate_ppn = orte_regx_base_generate_ppn, - .parse_ppn = orte_regx_base_parse_ppn -}; - -static int nidmap_create(opal_pointer_array_t *pool, char **regex) -{ - char *node; - char prefix[ORTE_MAX_NODE_PREFIX]; - int i, j, n, len, startnum, nodenum, numdigits; - bool found; - char *suffix, *sfx, *nodenames; - orte_regex_node_t *ndreg; - orte_regex_range_t *range, *rng; - opal_list_t nodenms, dvpids; - opal_list_item_t *item, *itm2; - char **regexargs = NULL, *tmp, *tmp2; - orte_node_t *nptr; - orte_vpid_t vpid; - - OBJ_CONSTRUCT(&nodenms, opal_list_t); - OBJ_CONSTRUCT(&dvpids, opal_list_t); - - rng = NULL; - for (n=0; n < pool->size; n++) { - if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(pool, n))) { - continue; - } - /* if no daemon has been assigned, then this node is not being used */ - if (NULL == nptr->daemon) { - vpid = -1; // indicates no daemon assigned - } else { - vpid = nptr->daemon->name.vpid; - } - /* deal with the daemon vpid - see if it is next in the - * current range */ - if (NULL == rng) { - /* just starting */ - rng = OBJ_NEW(orte_regex_range_t); - rng->vpid = vpid; - rng->cnt = 1; - opal_list_append(&dvpids, &rng->super); - } else if (UINT32_MAX == vpid) { - if (-1 == rng->vpid) { - rng->cnt++; - } else { - /* need to start another range */ - rng = OBJ_NEW(orte_regex_range_t); - rng->vpid = vpid; - rng->cnt = 1; - opal_list_append(&dvpids, &rng->super); - } - } else if (-1 == rng->vpid) { - /* need to start another range */ - rng = OBJ_NEW(orte_regex_range_t); - rng->vpid = vpid; - rng->cnt = 1; - opal_list_append(&dvpids, &rng->super); - } else { - /* is this the next in line */ - if (vpid == (orte_vpid_t)(rng->vpid + rng->cnt)) { - rng->cnt++; - } else { - /* need to start another range */ - rng = OBJ_NEW(orte_regex_range_t); - rng->vpid = vpid; - rng->cnt = 1; - opal_list_append(&dvpids, &rng->super); - } - } - node = nptr->name; - /* determine this node's prefix by looking for first digit char */ - len = strlen(node); - startnum = -1; - memset(prefix, 0, ORTE_MAX_NODE_PREFIX); - for (i=0, j=0; i < len; i++) { - /* valid hostname characters are ascii letters, digits and the '-' character. */ - if (isdigit(node[i])) { - /* count the size of the numeric field - but don't - * add the digits to the prefix - */ - if (startnum < 0) { - /* okay, this defines end of the prefix */ - startnum = i; - } - continue; - } - /* this must be either an alpha, a '.', or '-' */ - if (!isalpha(node[i]) && '-' != node[i] && '.' != node[i]) { - orte_show_help("help-regex.txt", "regex:invalid-name", true, node); - return ORTE_ERR_SILENT; - } - if (startnum < 0) { - prefix[j++] = node[i]; - } - } - if (startnum < 0) { - /* can't compress this name - just add it to the list */ - ndreg = OBJ_NEW(orte_regex_node_t); - ndreg->prefix = strdup(node); - opal_list_append(&nodenms, &ndreg->super); - continue; - } - /* convert the digits and get any suffix */ - nodenum = strtol(&node[startnum], &sfx, 10); - if (NULL != sfx) { - suffix = strdup(sfx); - numdigits = (int)(sfx - &node[startnum]); - } else { - suffix = NULL; - numdigits = (int)strlen(&node[startnum]); - } - /* is this node name already on our list? */ - found = false; - if (0 != opal_list_get_size(&nodenms)) { - ndreg = (orte_regex_node_t*)opal_list_get_last(&nodenms); - - if ((0 < strlen(prefix) && NULL == ndreg->prefix) || - (0 == strlen(prefix) && NULL != ndreg->prefix) || - (0 < strlen(prefix) && NULL != ndreg->prefix && - 0 != strcmp(prefix, ndreg->prefix)) || - (NULL == suffix && NULL != ndreg->suffix) || - (NULL != suffix && NULL == ndreg->suffix) || - (NULL != suffix && NULL != ndreg->suffix && - 0 != strcmp(suffix, ndreg->suffix)) || - (numdigits != ndreg->num_digits)) { - found = false; - } else { - /* found a match - flag it */ - found = true; - } - } - if (found) { - range = (orte_regex_range_t*)opal_list_get_last(&ndreg->ranges); - if (NULL == range) { - /* first range for this nodeid */ - range = OBJ_NEW(orte_regex_range_t); - range->vpid = nodenum; - range->cnt = 1; - opal_list_append(&ndreg->ranges, &range->super); - /* see if the node number is out of sequence */ - } else if (nodenum != (range->vpid + range->cnt)) { - /* start a new range */ - range = OBJ_NEW(orte_regex_range_t); - range->vpid = nodenum; - range->cnt = 1; - opal_list_append(&ndreg->ranges, &range->super); - } else { - /* everything matches - just increment the cnt */ - range->cnt++; - } - } else { - /* need to add it */ - ndreg = OBJ_NEW(orte_regex_node_t); - if (0 < strlen(prefix)) { - ndreg->prefix = strdup(prefix); - } - if (NULL != suffix) { - ndreg->suffix = strdup(suffix); - } - ndreg->num_digits = numdigits; - opal_list_append(&nodenms, &ndreg->super); - /* record the first range for this nodeid - we took - * care of names we can't compress above - */ - range = OBJ_NEW(orte_regex_range_t); - range->vpid = nodenum; - range->cnt = 1; - opal_list_append(&ndreg->ranges, &range->super); - } - if (NULL != suffix) { - free(suffix); - } - } - /* begin constructing the regular expression */ - while (NULL != (item = opal_list_remove_first(&nodenms))) { - ndreg = (orte_regex_node_t*)item; - - /* if no ranges, then just add the name */ - if (0 == opal_list_get_size(&ndreg->ranges)) { - if (NULL != ndreg->prefix) { - /* solitary node */ - asprintf(&tmp, "%s", ndreg->prefix); - opal_argv_append_nosize(®exargs, tmp); - free(tmp); - } - OBJ_RELEASE(ndreg); - continue; - } - /* start the regex for this nodeid with the prefix */ - if (NULL != ndreg->prefix) { - asprintf(&tmp, "%s[%d:", ndreg->prefix, ndreg->num_digits); - } else { - asprintf(&tmp, "[%d:", ndreg->num_digits); - } - /* add the ranges */ - while (NULL != (itm2 = opal_list_remove_first(&ndreg->ranges))) { - range = (orte_regex_range_t*)itm2; - if (1 == range->cnt) { - asprintf(&tmp2, "%s%u,", tmp, range->vpid); - } else { - asprintf(&tmp2, "%s%u-%u,", tmp, range->vpid, range->vpid + range->cnt - 1); - } - free(tmp); - tmp = tmp2; - OBJ_RELEASE(range); - } - /* replace the final comma */ - tmp[strlen(tmp)-1] = ']'; - if (NULL != ndreg->suffix) { - /* add in the suffix, if provided */ - asprintf(&tmp2, "%s%s", tmp, ndreg->suffix); - free(tmp); - tmp = tmp2; - } - opal_argv_append_nosize(®exargs, tmp); - free(tmp); - OBJ_RELEASE(ndreg); - } - - /* assemble final result */ - nodenames = opal_argv_join(regexargs, ','); - /* cleanup */ - opal_argv_free(regexargs); - OBJ_DESTRUCT(&nodenms); - - /* do the same for the vpids */ - tmp = NULL; - while (NULL != (item = opal_list_remove_first(&dvpids))) { - rng = (orte_regex_range_t*)item; - if (1 < rng->cnt) { - if (NULL == tmp) { - asprintf(&tmp, "%u(%u)", rng->vpid, rng->cnt); - } else { - asprintf(&tmp2, "%s,%u(%u)", tmp, rng->vpid, rng->cnt); - free(tmp); - tmp = tmp2; - } - } else { - if (NULL == tmp) { - asprintf(&tmp, "%u", rng->vpid); - } else { - asprintf(&tmp2, "%s,%u", tmp, rng->vpid); - free(tmp); - tmp = tmp2; - } - } - OBJ_RELEASE(rng); - } - OPAL_LIST_DESTRUCT(&dvpids); - - /* now concatenate the results into one string */ - asprintf(&tmp2, "%s@%s", nodenames, tmp); - free(nodenames); - free(tmp); - *regex = tmp2; - return ORTE_SUCCESS; -} diff --git a/orte/mca/regx/fwd/regx_fwd.h b/orte/mca/regx/fwd/regx_fwd.h deleted file mode 100644 index c6f4a966d1b..00000000000 --- a/orte/mca/regx/fwd/regx_fwd.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef _MCA_REGX_FwD_H_ -#define _MCA_REGX_FwD_H_ - -#include "orte_config.h" - -#include "orte/types.h" - -#include "opal/mca/base/base.h" -#include "orte/mca/regx/regx.h" - - -BEGIN_C_DECLS - -ORTE_MODULE_DECLSPEC extern orte_regx_base_component_t mca_regx_fwd_component; -extern orte_regx_base_module_t orte_regx_fwd_module; - -END_C_DECLS - -#endif /* MCA_REGX_FwD_H_ */ diff --git a/orte/mca/regx/fwd/regx_fwd_component.c b/orte/mca/regx/fwd/regx_fwd_component.c deleted file mode 100644 index 822cf622dd2..00000000000 --- a/orte/mca/regx/fwd/regx_fwd_component.c +++ /dev/null @@ -1,44 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/types.h" -#include "opal/types.h" - -#include "opal/util/show_help.h" - -#include "orte/mca/regx/regx.h" -#include "regx_fwd.h" - -static int component_query(mca_base_module_t **module, int *priority); - -/* - * Struct of function pointers and all that to let us be initialized - */ -orte_regx_base_component_t mca_regx_fwd_component = { - .base_version = { - MCA_REGX_BASE_VERSION_1_0_0, - .mca_component_name = "fwd", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - .mca_query_component = component_query, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static int component_query(mca_base_module_t **module, int *priority) -{ - *module = (mca_base_module_t*)&orte_regx_fwd_module; - *priority = 10; - return ORTE_SUCCESS; -} diff --git a/orte/mca/regx/regx.h b/orte/mca/regx/regx.h deleted file mode 100644 index 2d3630e0b68..00000000000 --- a/orte/mca/regx/regx.h +++ /dev/null @@ -1,127 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open RTE Personality Framework (regx) - * - * Multi-select framework so that multiple personalities can be - * simultaneously supported - * - */ - -#ifndef ORTE_MCA_REGX_H -#define ORTE_MCA_REGX_H - -#include "orte_config.h" -#include "orte/types.h" - -#include "opal/class/opal_pointer_array.h" -#include "opal/dss/dss_types.h" -#include "orte/mca/mca.h" - -#include "orte/runtime/orte_globals.h" - - -BEGIN_C_DECLS - -/* - * regx module functions - */ - -#define ORTE_MAX_NODE_PREFIX 50 -#define ORTE_CONTIG_NODE_CMD 0x01 -#define ORTE_NON_CONTIG_NODE_CMD 0x02 - -/** -* REGX module functions - the modules are accessed via -* the base stub functions -*/ -typedef struct { - opal_list_item_t super; - int ctx; - int nprocs; - int cnt; -} orte_nidmap_regex_t; -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_nidmap_regex_t); - -/* initialize the module - allow it to do whatever one-time - * things it requires */ -typedef int (*orte_regx_base_module_init_fn_t)(void); - -typedef int (*orte_regx_base_module_nidmap_create_fn_t)(opal_pointer_array_t *pool, char **regex); -typedef int (*orte_regx_base_module_nidmap_parse_fn_t)(char *regex); -typedef int (*orte_regx_base_module_extract_node_names_fn_t)(char *regexp, char ***names); - -/* create a regular expression describing the nodes in the - * allocation */ -typedef int (*orte_regx_base_module_encode_nodemap_fn_t)(opal_buffer_t *buffer); - -/* decode a regular expression created by the encode function - * into the orte_node_pool array */ -typedef int (*orte_regx_base_module_decode_daemon_nodemap_fn_t)(opal_buffer_t *buffer); - -typedef int (*orte_regx_base_module_build_daemon_nidmap_fn_t)(void); - -/* create a regular expression describing the ppn for a job */ -typedef int (*orte_regx_base_module_generate_ppn_fn_t)(orte_job_t *jdata, char **ppn); - -/* decode the ppn */ -typedef int (*orte_regx_base_module_parse_ppn_fn_t)(orte_job_t *jdata, char *ppn); - - -/* give the component a chance to cleanup */ -typedef void (*orte_regx_base_module_finalize_fn_t)(void); - -/* - * regx module version 1.0.0 - */ -typedef struct { - orte_regx_base_module_init_fn_t init; - orte_regx_base_module_nidmap_create_fn_t nidmap_create; - orte_regx_base_module_nidmap_parse_fn_t nidmap_parse; - orte_regx_base_module_extract_node_names_fn_t extract_node_names; - orte_regx_base_module_encode_nodemap_fn_t encode_nodemap; - orte_regx_base_module_decode_daemon_nodemap_fn_t decode_daemon_nodemap; - orte_regx_base_module_build_daemon_nidmap_fn_t build_daemon_nidmap; - orte_regx_base_module_generate_ppn_fn_t generate_ppn; - orte_regx_base_module_parse_ppn_fn_t parse_ppn; - orte_regx_base_module_finalize_fn_t finalize; -} orte_regx_base_module_t; - -ORTE_DECLSPEC extern orte_regx_base_module_t orte_regx; - -/* - * regx component - */ - -/** - * regx component version 1.0.0 - */ -typedef struct { - /** Base MCA structure */ - mca_base_component_t base_version; - /** Base MCA data */ - mca_base_component_data_t base_data; -} orte_regx_base_component_t; - -/** - * Macro for use in components that are of type regx - */ -#define MCA_REGX_BASE_VERSION_1_0_0 \ - ORTE_MCA_BASE_VERSION_2_1_0("regx", 1, 0, 0) - - -END_C_DECLS - -#endif diff --git a/orte/mca/regx/reverse/Makefile.am b/orte/mca/regx/reverse/Makefile.am deleted file mode 100644 index d5272544510..00000000000 --- a/orte/mca/regx/reverse/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2016-2018 Intel, Inc. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - regx_reverse_component.c \ - regx_reverse.h \ - regx_reverse.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_regx_reverse_DSO -component_noinst = -component_install = mca_regx_reverse.la -else -component_noinst = libmca_regx_reverse.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_regx_reverse_la_SOURCES = $(sources) -mca_regx_reverse_la_LDFLAGS = -module -avoid-version -mca_regx_reverse_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_regx_reverse_la_SOURCES = $(sources) -libmca_regx_reverse_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/regx/reverse/owner.txt b/orte/mca/regx/reverse/owner.txt deleted file mode 100644 index 2fd247dddb1..00000000000 --- a/orte/mca/regx/reverse/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: IBM -status: active diff --git a/orte/mca/regx/reverse/regx_reverse.c b/orte/mca/regx/reverse/regx_reverse.c deleted file mode 100644 index b6c3ba187a0..00000000000 --- a/orte/mca/regx/reverse/regx_reverse.c +++ /dev/null @@ -1,319 +0,0 @@ -/* - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2018 IBM Corporation. All rights reserved. - * Copyright (c) 2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include "orte_config.h" -#include "orte/types.h" -#include "opal/types.h" - -#ifdef HAVE_UNISTD_H -#include -#endif -#include - -#include "opal/util/argv.h" -#include "opal/util/basename.h" -#include "opal/util/opal_environ.h" - -#include "orte/runtime/orte_globals.h" -#include "orte/util/name_fns.h" -#include "orte/util/show_help.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rmaps/base/base.h" -#include "orte/mca/routed/routed.h" -#include "orte/mca/regx/base/base.h" - -#include "regx_reverse.h" - -static int nidmap_create(opal_pointer_array_t *pool, char **regex); - -orte_regx_base_module_t orte_regx_reverse_module = { - .nidmap_create = nidmap_create, - .nidmap_parse = orte_regx_base_nidmap_parse, - .extract_node_names = orte_regx_base_extract_node_names, - .encode_nodemap = orte_regx_base_encode_nodemap, - .decode_daemon_nodemap = orte_regx_base_decode_daemon_nodemap, - .generate_ppn = orte_regx_base_generate_ppn, - .parse_ppn = orte_regx_base_parse_ppn -}; - -static int nidmap_create(opal_pointer_array_t *pool, char **regex) -{ - char *node; - char prefix[ORTE_MAX_NODE_PREFIX]; - int i, j, n, len, startnum, nodenum, numdigits; - bool found; - char *suffix, *sfx, *nodenames; - orte_regex_node_t *ndreg; - orte_regex_range_t *range, *rng; - opal_list_t nodenms, dvpids; - opal_list_item_t *item, *itm2; - char **regexargs = NULL, *tmp, *tmp2; - orte_node_t *nptr; - orte_vpid_t vpid; - - OBJ_CONSTRUCT(&nodenms, opal_list_t); - OBJ_CONSTRUCT(&dvpids, opal_list_t); - - rng = NULL; - for (n=0; n < pool->size; n++) { - if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(pool, n))) { - continue; - } - /* if no daemon has been assigned, then this node is not being used */ - if (NULL == nptr->daemon) { - vpid = -1; // indicates no daemon assigned - } else { - vpid = nptr->daemon->name.vpid; - } - /* deal with the daemon vpid - see if it is next in the - * current range */ - if (NULL == rng) { - /* just starting */ - rng = OBJ_NEW(orte_regex_range_t); - rng->vpid = vpid; - rng->cnt = 1; - opal_list_append(&dvpids, &rng->super); - } else if (UINT32_MAX == vpid) { - if (-1 == rng->vpid) { - rng->cnt++; - } else { - /* need to start another range */ - rng = OBJ_NEW(orte_regex_range_t); - rng->vpid = vpid; - rng->cnt = 1; - opal_list_append(&dvpids, &rng->super); - } - } else if (-1 == rng->vpid) { - /* need to start another range */ - rng = OBJ_NEW(orte_regex_range_t); - rng->vpid = vpid; - rng->cnt = 1; - opal_list_append(&dvpids, &rng->super); - } else { - /* is this the next in line */ - if (vpid == (orte_vpid_t)(rng->vpid + rng->cnt)) { - rng->cnt++; - } else { - /* need to start another range */ - rng = OBJ_NEW(orte_regex_range_t); - rng->vpid = vpid; - rng->cnt = 1; - opal_list_append(&dvpids, &rng->super); - } - } - node = nptr->name; - opal_output_verbose(5, orte_regx_base_framework.framework_output, - "%s PROCESS NODE <%s>", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - node); - /* determine this node's prefix by looking for first digit char */ - len = strlen(node); - startnum = -1; - memset(prefix, 0, ORTE_MAX_NODE_PREFIX); - numdigits = 0; - - /* Valid hostname characters are: - * - ascii letters, digits, and the '-' character. - * Determine the prefix in reverse to better support hostnames like: - * c712f6n01, c699c086 where there are sets of digits, and the lowest - * set changes most frequently. - */ - startnum = -1; - memset(prefix, 0, ORTE_MAX_NODE_PREFIX); - numdigits = 0; - for (i=len-1; i >= 0; i--) { - // Count all of the digits - if( isdigit(node[i]) ) { - numdigits++; - continue; - } - else { - // At this point everything at and above position 'i' is prefix. - for( j = 0; j <= i; ++j) { - prefix[j] = node[j]; - } - if (numdigits) { - startnum = j; - } - break; - } - } - - opal_output_verbose(5, orte_regx_base_framework.framework_output, - "%s PROCESS NODE <%s> : reverse / prefix \"%s\" / numdigits %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - node, prefix, numdigits); - - if (startnum < 0) { - /* can't compress this name - just add it to the list */ - ndreg = OBJ_NEW(orte_regex_node_t); - ndreg->prefix = strdup(node); - opal_list_append(&nodenms, &ndreg->super); - continue; - } - /* convert the digits and get any suffix */ - nodenum = strtol(&node[startnum], &sfx, 10); - if (NULL != sfx) { - suffix = strdup(sfx); - } else { - suffix = NULL; - } - /* is this node name already on our list? */ - found = false; - if (0 != opal_list_get_size(&nodenms)) { - ndreg = (orte_regex_node_t*)opal_list_get_last(&nodenms); - - if ((0 < strlen(prefix) && NULL == ndreg->prefix) || - (0 == strlen(prefix) && NULL != ndreg->prefix) || - (0 < strlen(prefix) && NULL != ndreg->prefix && - 0 != strcmp(prefix, ndreg->prefix)) || - (NULL == suffix && NULL != ndreg->suffix) || - (NULL != suffix && NULL == ndreg->suffix) || - (NULL != suffix && NULL != ndreg->suffix && - 0 != strcmp(suffix, ndreg->suffix)) || - (numdigits != ndreg->num_digits)) { - found = false; - } else { - /* found a match - flag it */ - found = true; - } - } - if (found) { - /* get the last range on this nodeid - we do this - * to preserve order - */ - range = (orte_regex_range_t*)opal_list_get_last(&ndreg->ranges); - if (NULL == range) { - /* first range for this nodeid */ - range = OBJ_NEW(orte_regex_range_t); - range->vpid = nodenum; - range->cnt = 1; - opal_list_append(&ndreg->ranges, &range->super); - /* see if the node number is out of sequence */ - } else if (nodenum != (range->vpid + range->cnt)) { - /* start a new range */ - range = OBJ_NEW(orte_regex_range_t); - range->vpid = nodenum; - range->cnt = 1; - opal_list_append(&ndreg->ranges, &range->super); - } else { - /* everything matches - just increment the cnt */ - range->cnt++; - } - } else { - /* need to add it */ - ndreg = OBJ_NEW(orte_regex_node_t); - if (0 < strlen(prefix)) { - ndreg->prefix = strdup(prefix); - } - if (NULL != suffix) { - ndreg->suffix = strdup(suffix); - } - ndreg->num_digits = numdigits; - opal_list_append(&nodenms, &ndreg->super); - /* record the first range for this nodeid - we took - * care of names we can't compress above - */ - range = OBJ_NEW(orte_regex_range_t); - range->vpid = nodenum; - range->cnt = 1; - opal_list_append(&ndreg->ranges, &range->super); - } - if (NULL != suffix) { - free(suffix); - } - } - /* begin constructing the regular expression */ - while (NULL != (item = opal_list_remove_first(&nodenms))) { - ndreg = (orte_regex_node_t*)item; - - /* if no ranges, then just add the name */ - if (0 == opal_list_get_size(&ndreg->ranges)) { - if (NULL != ndreg->prefix) { - /* solitary node */ - asprintf(&tmp, "%s", ndreg->prefix); - opal_argv_append_nosize(®exargs, tmp); - free(tmp); - } - OBJ_RELEASE(ndreg); - continue; - } - /* start the regex for this nodeid with the prefix */ - if (NULL != ndreg->prefix) { - asprintf(&tmp, "%s[%d:", ndreg->prefix, ndreg->num_digits); - } else { - asprintf(&tmp, "[%d:", ndreg->num_digits); - } - /* add the ranges */ - while (NULL != (itm2 = opal_list_remove_first(&ndreg->ranges))) { - range = (orte_regex_range_t*)itm2; - if (1 == range->cnt) { - asprintf(&tmp2, "%s%u,", tmp, range->vpid); - } else { - asprintf(&tmp2, "%s%u-%u,", tmp, range->vpid, range->vpid + range->cnt - 1); - } - free(tmp); - tmp = tmp2; - OBJ_RELEASE(range); - } - /* replace the final comma */ - tmp[strlen(tmp)-1] = ']'; - if (NULL != ndreg->suffix) { - /* add in the suffix, if provided */ - asprintf(&tmp2, "%s%s", tmp, ndreg->suffix); - free(tmp); - tmp = tmp2; - } - opal_argv_append_nosize(®exargs, tmp); - free(tmp); - OBJ_RELEASE(ndreg); - } - - /* assemble final result */ - nodenames = opal_argv_join(regexargs, ','); - /* cleanup */ - opal_argv_free(regexargs); - OBJ_DESTRUCT(&nodenms); - - /* do the same for the vpids */ - tmp = NULL; - while (NULL != (item = opal_list_remove_first(&dvpids))) { - rng = (orte_regex_range_t*)item; - if (1 < rng->cnt) { - if (NULL == tmp) { - asprintf(&tmp, "%u(%u)", rng->vpid, rng->cnt); - } else { - asprintf(&tmp2, "%s,%u(%u)", tmp, rng->vpid, rng->cnt); - free(tmp); - tmp = tmp2; - } - } else { - if (NULL == tmp) { - asprintf(&tmp, "%u", rng->vpid); - } else { - asprintf(&tmp2, "%s,%u", tmp, rng->vpid); - free(tmp); - tmp = tmp2; - } - } - OBJ_RELEASE(rng); - } - OPAL_LIST_DESTRUCT(&dvpids); - - /* now concatenate the results into one string */ - asprintf(&tmp2, "%s@%s", nodenames, tmp); - free(nodenames); - free(tmp); - *regex = tmp2; - return ORTE_SUCCESS; -} diff --git a/orte/mca/regx/reverse/regx_reverse.h b/orte/mca/regx/reverse/regx_reverse.h deleted file mode 100644 index e16954d1627..00000000000 --- a/orte/mca/regx/reverse/regx_reverse.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef _MCA_REGX_REVERSE_H_ -#define _MCA_REGX_REVERSE_H_ - -#include "orte_config.h" - -#include "orte/types.h" - -#include "opal/mca/base/base.h" -#include "orte/mca/regx/regx.h" - - -BEGIN_C_DECLS - -ORTE_MODULE_DECLSPEC extern orte_regx_base_component_t mca_regx_reverse_component; -extern orte_regx_base_module_t orte_regx_reverse_module; - -END_C_DECLS - -#endif /* MCA_REGX_ORTE_H_ */ diff --git a/orte/mca/regx/reverse/regx_reverse_component.c b/orte/mca/regx/reverse/regx_reverse_component.c deleted file mode 100644 index a4921b91deb..00000000000 --- a/orte/mca/regx/reverse/regx_reverse_component.c +++ /dev/null @@ -1,44 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/types.h" -#include "opal/types.h" - -#include "opal/util/show_help.h" - -#include "orte/mca/regx/regx.h" -#include "regx_reverse.h" - -static int component_query(mca_base_module_t **module, int *priority); - -/* - * Struct of function pointers and all that to let us be initialized - */ -orte_regx_base_component_t mca_regx_reverse_component = { - .base_version = { - MCA_REGX_BASE_VERSION_1_0_0, - .mca_component_name = "reverse", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - .mca_query_component = component_query, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static int component_query(mca_base_module_t **module, int *priority) -{ - *module = (mca_base_module_t*)&orte_regx_reverse_module; - *priority = 1; - return ORTE_SUCCESS; -} diff --git a/orte/orted/orted_comm.c b/orte/orted/orted_comm.c index 3f52cfee225..a329f63caca 100644 --- a/orte/orted/orted_comm.c +++ b/orte/orted/orted_comm.c @@ -14,7 +14,7 @@ * reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -47,6 +47,7 @@ #include "opal/mca/event/event.h" #include "opal/mca/base/base.h" +#include "opal/mca/compress/compress.h" #include "opal/mca/pstat/pstat.h" #include "opal/util/output.h" #include "opal/util/opal_environ.h" @@ -58,7 +59,7 @@ #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" #include "orte/util/name_fns.h" -#include "orte/util/compress.h" +#include "orte/util/nidmap.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/grpcomm/base/base.h" @@ -126,7 +127,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, char *coprocessors; orte_job_map_t *map; int8_t flag; - uint8_t *cmpdata; + uint8_t *cmpdata, u8; size_t cmplen; /* unpack the command */ @@ -241,6 +242,31 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, } break; + case ORTE_DAEMON_PASS_NODE_INFO_CMD: + if (orte_debug_daemons_flag) { + opal_output(0, "%s orted_cmd: received pass_node_info", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + } + if (!ORTE_PROC_IS_HNP) { + n = 1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &u8, &n, OPAL_UINT8))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + if (1 == u8) { + if (ORTE_SUCCESS != (ret = orte_util_decode_nidmap(buffer))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + } + if (ORTE_SUCCESS != (ret = orte_util_parse_node_info(buffer))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + } + break; + + /**** ADD_LOCAL_PROCS ****/ case ORTE_DAEMON_ADD_LOCAL_PROCS: case ORTE_DAEMON_DVM_ADD_PROCS: @@ -639,8 +665,8 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, free(coprocessors); } answer = OBJ_NEW(opal_buffer_t); - if (orte_util_compress_block((uint8_t*)data.base_ptr, data.bytes_used, - &cmpdata, &cmplen)) { + if (opal_compress.compress_block((uint8_t*)data.base_ptr, data.bytes_used, + &cmpdata, &cmplen)) { /* the data was compressed - mark that we compressed it */ flag = 1; if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &flag, 1, OPAL_INT8))) { diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index 89191c4352c..1280b066ce4 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -16,7 +16,7 @@ * Copyright (c) 2009 Institut National de Recherche en Informatique * et Automatique. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -63,6 +63,7 @@ #include "opal/util/fd.h" #include "opal/runtime/opal.h" #include "opal/mca/base/mca_base_var.h" +#include "opal/mca/compress/compress.h" #include "opal/util/daemon_init.h" #include "opal/dss/dss.h" #include "opal/mca/hwloc/hwloc-internal.h" @@ -72,17 +73,16 @@ #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" #include "orte/util/name_fns.h" +#include "orte/util/nidmap.h" #include "orte/util/parse_options.h" #include "orte/mca/rml/base/rml_contact.h" #include "orte/util/pre_condition_transports.h" -#include "orte/util/compress.h" #include "orte/util/threads.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/ess/ess.h" #include "orte/mca/grpcomm/grpcomm.h" #include "orte/mca/grpcomm/base/base.h" -#include "orte/mca/regx/regx.h" #include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml_types.h" #include "orte/mca/odls/odls.h" @@ -221,10 +221,6 @@ opal_cmd_line_init_t orte_cmd_line_opts[] = { NULL, OPAL_CMD_LINE_TYPE_BOOL, "Whether to report process bindings to stderr" }, - { "orte_node_regex", '\0', "nodes", "nodes", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Regular expression defining nodes in system" }, - /* End of list */ { NULL, '\0', NULL, NULL, 0, NULL, OPAL_CMD_LINE_TYPE_NULL, NULL } @@ -755,11 +751,9 @@ int orte_daemon(int argc, char *argv[]) * a little time in the launch phase by "warming up" the * connection to our parent while we wait for our children */ buffer = OBJ_NEW(opal_buffer_t); // zero-byte message - if (NULL == orte_node_regex) { - orte_rml.recv_buffer_nb(ORTE_PROC_MY_PARENT, ORTE_RML_TAG_NODE_REGEX_REPORT, - ORTE_RML_PERSISTENT, node_regex_report, &node_regex_waiting); - node_regex_waiting = true; - } + orte_rml.recv_buffer_nb(ORTE_PROC_MY_PARENT, ORTE_RML_TAG_NODE_REGEX_REPORT, + ORTE_RML_PERSISTENT, node_regex_report, &node_regex_waiting); + node_regex_waiting = true; if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, ORTE_PROC_MY_PARENT, buffer, ORTE_RML_TAG_WARMUP_CONNECTION, @@ -917,8 +911,8 @@ int orte_daemon(int argc, char *argv[]) if (ORTE_SUCCESS != (ret = opal_dss.pack(&data, &opal_hwloc_topology, 1, OPAL_HWLOC_TOPO))) { ORTE_ERROR_LOG(ret); } - if (orte_util_compress_block((uint8_t*)data.base_ptr, data.bytes_used, - &cmpdata, &cmplen)) { + if (opal_compress.compress_block((uint8_t*)data.base_ptr, data.bytes_used, + &cmpdata, &cmplen)) { /* the data was compressed - mark that we compressed it */ flag = 1; if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &flag, 1, OPAL_INT8))) { @@ -1174,20 +1168,11 @@ static void report_orted() { static void node_regex_report(int status, orte_process_name_t* sender, opal_buffer_t *buffer, orte_rml_tag_t tag, void *cbdata) { - int rc, n=1; - char * regex; - assert(NULL == orte_node_regex); + int rc; bool * active = (bool *)cbdata; - /* extract the node regex if needed, and update the routing tree */ - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, ®ex, &n, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return; - } - orte_node_regex = regex; - - if (ORTE_SUCCESS != (rc = orte_regx.nidmap_parse(orte_node_regex))) { + /* extract the node info if needed, and update the routing tree */ + if (ORTE_SUCCESS != (rc = orte_util_decode_nidmap(buffer))) { ORTE_ERROR_LOG(rc); return; } diff --git a/orte/util/Makefile.am b/orte/util/Makefile.am index d54503b3bb0..5e050c3bf2d 100644 --- a/orte/util/Makefile.am +++ b/orte/util/Makefile.am @@ -11,7 +11,7 @@ # All rights reserved. # Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2019 Intel, Inc. All rights reserved. # Copyright (c) 2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ @@ -58,8 +58,8 @@ headers += \ util/comm/comm.h \ util/attr.h \ util/listener.h \ - util/compress.h \ - util/threads.h + util/threads.h \ + util/nidmap.h lib@ORTE_LIB_PREFIX@open_rte_la_SOURCES += \ util/error_strings.c \ @@ -77,7 +77,7 @@ lib@ORTE_LIB_PREFIX@open_rte_la_SOURCES += \ util/comm/comm.c \ util/attr.c \ util/listener.c \ - util/compress.c + util/nidmap.c # Remove the generated man pages distclean-local: diff --git a/orte/util/compress.c b/orte/util/compress.c deleted file mode 100644 index d899f2d7f19..00000000000 --- a/orte/util/compress.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include - - -#include -#ifdef HAVE_STRING_H -#include -#endif -#ifdef HAVE_ZLIB_H -#include -#endif - -#include "opal/util/output.h" -#include "compress.h" - -#if OPAL_HAVE_ZLIB -bool orte_util_compress_block(uint8_t *inbytes, - size_t inlen, - uint8_t **outbytes, - size_t *olen) -{ - z_stream strm; - size_t len; - uint8_t *tmp; - - if (inlen < ORTE_COMPRESS_LIMIT) { - return false; - } - - /* set default output */ - *outbytes = NULL; - *olen = 0; - - /* setup the stream */ - memset (&strm, 0, sizeof (strm)); - deflateInit (&strm, 9); - - /* get an upper bound on the required output storage */ - len = deflateBound(&strm, inlen); - if (NULL == (tmp = (uint8_t*)malloc(len))) { - return false; - } - strm.next_in = inbytes; - strm.avail_in = inlen; - - /* allocating the upper bound guarantees zlib will - * always successfully compress into the available space */ - strm.avail_out = len; - strm.next_out = tmp; - - deflate (&strm, Z_FINISH); - deflateEnd (&strm); - - *outbytes = tmp; - *olen = len - strm.avail_out; - return true; // we did the compression -} -#else -bool orte_util_compress_block(uint8_t *inbytes, - size_t inlen, - uint8_t **outbytes, - size_t *olen) -{ - return false; // we did not compress -} -#endif - -#if OPAL_HAVE_ZLIB -bool orte_util_uncompress_block(uint8_t **outbytes, size_t olen, - uint8_t *inbytes, size_t len) -{ - uint8_t *dest; - z_stream strm; - - /* set the default error answer */ - *outbytes = NULL; - - /* setting destination to the fully decompressed size */ - dest = (uint8_t*)malloc(olen); - if (NULL == dest) { - return false; - } - - memset (&strm, 0, sizeof (strm)); - if (Z_OK != inflateInit(&strm)) { - free(dest); - return false; - } - strm.avail_in = len; - strm.next_in = inbytes; - strm.avail_out = olen; - strm.next_out = dest; - - if (Z_STREAM_END != inflate (&strm, Z_FINISH)) { - opal_output(0, "\tDECOMPRESS FAILED: %s", strm.msg); - } - inflateEnd (&strm); - *outbytes = dest; - return true; -} -#else -bool orte_util_uncompress_block(uint8_t **outbytes, size_t olen, - uint8_t *inbytes, size_t len) -{ - return false; -} -#endif diff --git a/orte/util/compress.h b/orte/util/compress.h deleted file mode 100644 index 5ba3faf46e8..00000000000 --- a/orte/util/compress.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * Compress/decompress long data blocks - */ - -#ifndef ORTE_COMPRESS_H -#define ORTE_COMPRESS_H - -#include - - -BEGIN_C_DECLS - -/* define a limit for compression */ -#define ORTE_COMPRESS_LIMIT 4096 - -/** - * Compress a string into a byte object using Zlib - */ -ORTE_DECLSPEC bool orte_util_compress_block(uint8_t *inbytes, - size_t inlen, - uint8_t **outbytes, - size_t *olen); - -/** - * Decompress a byte object - */ -ORTE_DECLSPEC bool orte_util_uncompress_block(uint8_t **outbytes, size_t olen, - uint8_t *inbytes, size_t len); - -END_C_DECLS - -#endif /* ORTE_COMPRESS_H */ diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c new file mode 100644 index 00000000000..c1d67fde9b4 --- /dev/null +++ b/orte/util/nidmap.c @@ -0,0 +1,1170 @@ +/* + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2018-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#ifdef HAVE_UNISTD_H +#include +#endif +#include + +#include "opal/dss/dss_types.h" +#include "opal/mca/compress/compress.h" +#include "opal/util/argv.h" + +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rmaps/base/base.h" +#include "orte/mca/routed/routed.h" +#include "orte/runtime/orte_globals.h" + +#include "orte/util/nidmap.h" + +int orte_util_nidmap_create(opal_pointer_array_t *pool, + opal_buffer_t *buffer) +{ + char *raw = NULL; + uint8_t *vpids=NULL, u8; + uint16_t u16; + uint32_t u32; + int n, ndaemons, rc, nbytes; + bool compressed; + char **names = NULL, **ranks = NULL; + orte_node_t *nptr; + opal_byte_object_t bo, *boptr; + size_t sz; + + /* pack a flag indicating if the HNP was included in the allocation */ + if (orte_hnp_is_allocated) { + u8 = 1; + } else { + u8 = 0; + } + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &u8, 1, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* pack a flag indicating if we are in a managed allocation */ + if (orte_managed_allocation) { + u8 = 1; + } else { + u8 = 0; + } + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &u8, 1, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* daemon vpids start from 0 and increase linearly by one + * up to the number of nodes in the system. The vpid is + * a 32-bit value. We don't know how many of the nodes + * in the system have daemons - we may not be using them + * all just yet. However, even the largest systems won't + * have more than a million nodes for quite some time, + * so for now we'll just allocate enough space to hold + * them all. Someone can optimize this further later */ + if (256 >= pool->size) { + nbytes = 1; + } else if (65536 >= pool->size) { + nbytes = 2; + } else { + nbytes = 4; + } + vpids = (uint8_t*)malloc(nbytes * pool->size); + + ndaemons = 0; + for (n=0; n < pool->size; n++) { + if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(pool, n))) { + continue; + } + /* add the hostname to the argv */ + opal_argv_append_nosize(&names, nptr->name); + /* store the vpid */ + if (1 == nbytes) { + if (NULL == nptr->daemon) { + vpids[ndaemons] = UINT8_MAX; + } else { + vpids[ndaemons] = nptr->daemon->name.vpid; + } + } else if (2 == nbytes) { + if (NULL == nptr->daemon) { + u16 = UINT16_MAX; + } else { + u16 = nptr->daemon->name.vpid; + } + memcpy(&vpids[nbytes*ndaemons], &u16, 2); + } else { + if (NULL == nptr->daemon) { + u32 = UINT32_MAX; + } else { + u32 = nptr->daemon->name.vpid; + } + memcpy(&vpids[nbytes*ndaemons], &u32, 4); + } + ++ndaemons; + } + + /* construct the string of node names for compression */ + raw = opal_argv_join(names, ','); + if (opal_compress.compress_block((uint8_t*)raw, strlen(raw)+1, + (uint8_t**)&bo.bytes, &sz)) { + /* mark that this was compressed */ + compressed = true; + bo.size = sz; + } else { + /* mark that this was not compressed */ + compressed = false; + bo.bytes = (uint8_t*)raw; + bo.size = strlen(raw)+1; + } + /* indicate compression */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &compressed, 1, OPAL_BOOL))) { + if (compressed) { + free(bo.bytes); + } + goto cleanup; + } + /* if compressed, provide the uncompressed size */ + if (compressed) { + sz = strlen(raw)+1; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &sz, 1, OPAL_SIZE))) { + free(bo.bytes); + goto cleanup; + } + } + /* add the object */ + boptr = &bo; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT))) { + if (compressed) { + free(bo.bytes); + } + goto cleanup; + } + if (compressed) { + free(bo.bytes); + } + + /* compress the vpids */ + if (opal_compress.compress_block(vpids, nbytes*ndaemons, + (uint8_t**)&bo.bytes, &sz)) { + /* mark that this was compressed */ + compressed = true; + bo.size = sz; + } else { + /* mark that this was not compressed */ + compressed = false; + bo.bytes = vpids; + bo.size = nbytes*ndaemons; + } + /* indicate compression */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &compressed, 1, OPAL_BOOL))) { + if (compressed) { + free(bo.bytes); + } + goto cleanup; + } + /* provide the #bytes/vpid */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &nbytes, 1, OPAL_INT))) { + if (compressed) { + free(bo.bytes); + } + goto cleanup; + } + /* if compressed, provide the uncompressed size */ + if (compressed) { + sz = nbytes*ndaemons; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &sz, 1, OPAL_SIZE))) { + free(bo.bytes); + goto cleanup; + } + } + /* add the object */ + boptr = &bo; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT))) { + if (compressed) { + free(bo.bytes); + } + goto cleanup; + } + if (compressed) { + free(bo.bytes); + } + + cleanup: + if (NULL != names) { + opal_argv_free(names); + } + if (NULL != raw) { + free(raw); + } + if (NULL != ranks) { + opal_argv_free(ranks); + } + if (NULL != vpids) { + free(vpids); + } + + return rc; +} + +int orte_util_decode_nidmap(opal_buffer_t *buf) +{ + uint8_t u8, *vp8 = NULL; + uint16_t *vp16 = NULL; + uint32_t *vp32 = NULL, vpid; + int cnt, rc, nbytes, n; + bool compressed; + size_t sz; + opal_byte_object_t *boptr; + char *raw = NULL, **names = NULL; + orte_node_t *nd; + orte_job_t *daemons; + orte_proc_t *proc; + orte_topology_t *t; + + /* unpack the flag indicating if HNP is in allocation */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &u8, &cnt, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + if (1 == u8) { + orte_hnp_is_allocated = true; + } else { + orte_hnp_is_allocated = false; + } + + /* unpack the flag indicating if we are in managed allocation */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &u8, &cnt, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + if (1 == u8) { + orte_managed_allocation = true; + } else { + orte_managed_allocation = false; + } + + /* unpack compression flag for node names */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &compressed, &cnt, OPAL_BOOL))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* if compressed, get the uncompressed size */ + if (compressed) { + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &sz, &cnt, OPAL_SIZE))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + } + + /* unpack the nodename object */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &boptr, &cnt, OPAL_BYTE_OBJECT))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* if compressed, decompress */ + if (compressed) { + if (!opal_compress.decompress_block((uint8_t**)&raw, sz, + boptr->bytes, boptr->size)) { + ORTE_ERROR_LOG(ORTE_ERROR); + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + rc = ORTE_ERROR; + goto cleanup; + } + } else { + raw = (char*)boptr->bytes; + boptr->bytes = NULL; + boptr->size = 0; + } + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + names = opal_argv_split(raw, ','); + free(raw); + + + /* unpack compression flag for daemon vpids */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &compressed, &cnt, OPAL_BOOL))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* unpack the #bytes/vpid */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &nbytes, &cnt, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* if compressed, get the uncompressed size */ + if (compressed) { + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &sz, &cnt, OPAL_SIZE))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + } + + /* unpack the vpid object */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &boptr, &cnt, OPAL_BYTE_OBJECT))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* if compressed, decompress */ + if (compressed) { + if (!opal_compress.decompress_block((uint8_t**)&vp8, sz, + boptr->bytes, boptr->size)) { + ORTE_ERROR_LOG(ORTE_ERROR); + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + rc = ORTE_ERROR; + goto cleanup; + } + } else { + vp8 = (uint8_t*)boptr->bytes; + boptr->bytes = NULL; + boptr->size = 0; + } + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + if (2 == nbytes) { + vp16 = (uint16_t*)vp8; + vp8 = NULL; + } else if (4 == nbytes) { + vp32 = (uint32_t*)vp8; + vp8 = NULL; + } + + /* if we are the HNP, we don't need any of this stuff */ + if (ORTE_PROC_IS_HNP) { + goto cleanup; + } + + /* get the daemon job object */ + daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); + + /* get our topology */ + for (n=0; n < orte_node_topologies->size; n++) { + if (NULL != (t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, n))) { + break; + } + } + + /* create the node pool array - this will include + * _all_ nodes known to the allocation */ + for (n=0; NULL != names[n]; n++) { + /* add this name to the pool */ + nd = OBJ_NEW(orte_node_t); + nd->name = strdup(names[n]); + nd->index = n; + opal_pointer_array_set_item(orte_node_pool, n, nd); + /* set the topology - always default to homogeneous + * as that is the most common scenario */ + nd->topology = t; + /* see if it has a daemon on it */ + if (1 == nbytes && UINT8_MAX != vp8[n]) { + vpid = vp8[n]; + } else if (2 == nbytes && UINT16_MAX != vp16[n]) { + vpid = vp16[n]; + } else if (4 == nbytes && UINT32_MAX != vp32[n]) { + vpid = vp32[n]; + } else { + vpid = UINT32_MAX; + } + if (UINT32_MAX != vpid) { + if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, vpid))) { + proc = OBJ_NEW(orte_proc_t); + proc->name.jobid = ORTE_PROC_MY_NAME->jobid; + proc->name.vpid = vpid; + proc->state = ORTE_PROC_STATE_RUNNING; + ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_ALIVE); + daemons->num_procs++; + opal_pointer_array_set_item(daemons->procs, proc->name.vpid, proc); + } + OBJ_RETAIN(nd); + proc->node = nd; + OBJ_RETAIN(proc); + nd->daemon = proc; + } + } + + /* update num procs */ + if (orte_process_info.num_procs != daemons->num_procs) { + orte_process_info.num_procs = daemons->num_procs; + } + /* need to update the routing plan */ + orte_routed.update_routing_plan(NULL); + + if (orte_process_info.max_procs < orte_process_info.num_procs) { + orte_process_info.max_procs = orte_process_info.num_procs; + } + + cleanup: + if (NULL != vp8) { + free(vp8); + } + if (NULL != vp16) { + free(vp16); + } + if (NULL != vp32) { + free(vp32); + } + if (NULL != names) { + opal_argv_free(names); + } + return rc; +} + +typedef struct { + opal_list_item_t super; + orte_topology_t *t; +} orte_tptr_trk_t; +static OBJ_CLASS_INSTANCE(orte_tptr_trk_t, + opal_list_item_t, + NULL, NULL); + +int orte_util_pass_node_info(opal_buffer_t *buffer) +{ + uint16_t *slots=NULL, slot = UINT16_MAX; + uint8_t *flags=NULL, flag = UINT8_MAX, *topologies = NULL; + int8_t i8, ntopos; + int rc, n, nbitmap, nstart; + bool compressed, unislots = true, uniflags = true, unitopos = true; + orte_node_t *nptr; + opal_byte_object_t bo, *boptr; + size_t sz, nslots; + opal_buffer_t bucket; + orte_tptr_trk_t *trk; + opal_list_t topos; + orte_topology_t *t; + + /* make room for the number of slots on each node */ + nslots = sizeof(uint16_t) * orte_node_pool->size; + slots = (uint16_t*)malloc(nslots); + /* and for the flags for each node - only need one bit/node */ + nbitmap = (orte_node_pool->size / 8) + 1; + flags = (uint8_t*)calloc(1, nbitmap); + + /* handle the topologies - as the most common case by far + * is to have homogeneous topologies, we only send them + * if something is different. We know that the HNP is + * the first topology, and that any differing topology + * on the compute nodes must follow. So send the topologies + * if and only if: + * + * (a) the HNP is being used to house application procs and + * there is more than one topology in our array; or + * + * (b) the HNP is not being used, but there are more than + * two topologies in our array, thus indicating that + * there are multiple topologies on the compute nodes + */ + if (!orte_hnp_is_allocated || (ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) & ORTE_MAPPING_NO_USE_LOCAL)) { + nstart = 1; + } else { + nstart = 0; + } + OBJ_CONSTRUCT(&topos, opal_list_t); + OBJ_CONSTRUCT(&bucket, opal_buffer_t); + for (n=nstart; n < orte_node_topologies->size; n++) { + if (NULL == (t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, n))) { + continue; + } + trk = OBJ_NEW(orte_tptr_trk_t); + trk->t = t; + opal_list_append(&topos, &trk->super); + /* pack this topology string */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &t->sig, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&bucket); + goto cleanup; + } + /* pack the topology itself */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &t->topo, 1, OPAL_HWLOC_TOPO))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&bucket); + goto cleanup; + } + } + /* pack the number of topologies in allocation */ + ntopos = opal_list_get_size(&topos); + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &ntopos, 1, OPAL_INT8))) { + goto cleanup; + } + if (1 < ntopos) { + /* need to send them along */ + opal_dss.copy_payload(buffer, &bucket); + /* allocate space to report them */ + ntopos = orte_node_pool->size; + topologies = (uint8_t*)malloc(ntopos); + unitopos = false; + } + OBJ_DESTRUCT(&bucket); + + for (n=0; n < orte_node_pool->size; n++) { + if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { + continue; + } + /* store the topology, if required */ + if (!unitopos) { + topologies[n] = 0; + if (0 == nstart || 0 < n) { + OPAL_LIST_FOREACH(trk, &topos, orte_tptr_trk_t) { + if (trk->t == nptr->topology) { + break; + } + topologies[n]++; + } + } + } + /* store the number of slots */ + slots[n] = nptr->slots; + if (UINT16_MAX == slot) { + slot = nptr->slots; + } else if (slot != nptr->slots) { + unislots = false; + } + /* store the flag */ + if (ORTE_FLAG_TEST(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN)) { + flags[n/8] |= (1 << (7 - (n % 8))); + if (UINT8_MAX == flag) { + flag = 1; + } else if (1 != flag) { + uniflags = false; + } + } else { + if (UINT8_MAX == flag) { + flag = 0; + } else if (0 != flag) { + uniflags = false; + } + } + } + + /* deal with the topology assignments */ + if (!unitopos) { + if (opal_compress.compress_block((uint8_t*)topologies, ntopos, + (uint8_t**)&bo.bytes, &sz)) { + /* mark that this was compressed */ + i8 = 1; + compressed = true; + bo.size = sz; + } else { + /* mark that this was not compressed */ + i8 = 0; + compressed = false; + bo.bytes = topologies; + bo.size = nbitmap; + } + /* indicate compression */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &i8, 1, OPAL_INT8))) { + if (compressed) { + free(bo.bytes); + } + goto cleanup; + } + /* if compressed, provide the uncompressed size */ + if (compressed) { + sz = nslots; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &sz, 1, OPAL_SIZE))) { + free(bo.bytes); + goto cleanup; + } + } + /* add the object */ + boptr = &bo; + rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT); + if (compressed) { + free(bo.bytes); + } + } + + /* if we have uniform #slots, then just flag it - no + * need to pass anything */ + if (unislots) { + i8 = -1 * slot; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &i8, 1, OPAL_INT8))) { + goto cleanup; + } + } else { + if (opal_compress.compress_block((uint8_t*)slots, nslots, + (uint8_t**)&bo.bytes, &sz)) { + /* mark that this was compressed */ + i8 = 1; + compressed = true; + bo.size = sz; + } else { + /* mark that this was not compressed */ + i8 = 0; + compressed = false; + bo.bytes = flags; + bo.size = nbitmap; + } + /* indicate compression */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &i8, 1, OPAL_INT8))) { + if (compressed) { + free(bo.bytes); + } + goto cleanup; + } + /* if compressed, provide the uncompressed size */ + if (compressed) { + sz = nslots; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &sz, 1, OPAL_SIZE))) { + free(bo.bytes); + goto cleanup; + } + } + /* add the object */ + boptr = &bo; + rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT); + if (compressed) { + free(bo.bytes); + } + } + + /* if we have uniform flags, then just flag it - no + * need to pass anything */ + if (uniflags) { + if (1 == flag) { + i8 = -1; + } else { + i8 = -2; + } + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &i8, 1, OPAL_INT8))) { + goto cleanup; + } + } else { + if (opal_compress.compress_block(flags, nbitmap, + (uint8_t**)&bo.bytes, &sz)) { + /* mark that this was compressed */ + i8 = 2; + compressed = true; + bo.size = sz; + } else { + /* mark that this was not compressed */ + i8 = 3; + compressed = false; + bo.bytes = flags; + bo.size = nbitmap; + } + /* indicate compression */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &i8, 1, OPAL_INT8))) { + if (compressed) { + free(bo.bytes); + } + goto cleanup; + } + /* if compressed, provide the uncompressed size */ + if (compressed) { + sz = nbitmap; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &sz, 1, OPAL_SIZE))) { + free(bo.bytes); + goto cleanup; + } + } + /* add the object */ + boptr = &bo; + rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT); + if (compressed) { + free(bo.bytes); + } + } + + cleanup: + if (NULL != slots) { + free(slots); + } + if (NULL != flags) { + free(flags); + } + return rc; +} + +int orte_util_parse_node_info(opal_buffer_t *buf) +{ + int8_t i8; + int rc = ORTE_SUCCESS, cnt, n, m; + orte_node_t *nptr; + size_t sz; + opal_byte_object_t *boptr; + uint16_t *slots = NULL; + uint8_t *flags = NULL; + uint8_t *topologies = NULL; + orte_topology_t *t2, **tps = NULL; + hwloc_topology_t topo; + char *sig; + + /* check to see if we have uniform topologies */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &i8, &cnt, OPAL_INT8))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + /* we already defaulted to uniform topology, so only need to + * process this if it is non-uniform */ + if (1 < i8) { + /* create an array to cache these */ + tps = (orte_topology_t**)malloc(sizeof(orte_topology_t*)); + for (n=0; n < i8; n++) { + cnt = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buf, &sig, &cnt, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + cnt = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buf, &topo, &cnt, OPAL_HWLOC_TOPO))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + /* new topology - record it */ + t2 = OBJ_NEW(orte_topology_t); + t2->sig = sig; + t2->topo = topo; + opal_pointer_array_add(orte_node_topologies, t2); + /* keep a cached copy */ + tps[n] = t2; + } + /* now get the array of assigned topologies */ + /* if compressed, get the uncompressed size */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &sz, &cnt, OPAL_SIZE))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + /* unpack the topologies object */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &boptr, &cnt, OPAL_BYTE_OBJECT))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + /* if compressed, decompress */ + if (1 == i8) { + if (!opal_compress.decompress_block((uint8_t**)&topologies, sz, + boptr->bytes, boptr->size)) { + ORTE_ERROR_LOG(ORTE_ERROR); + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + rc = ORTE_ERROR; + goto cleanup; + } + } else { + topologies = (uint8_t*)boptr->bytes; + boptr->bytes = NULL; + boptr->size = 0; + } + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + /* cycle across the node pool and assign the values */ + for (n=0, m=0; n < orte_node_pool->size; n++) { + if (NULL != (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { + nptr->topology = tps[topologies[m]]; + ++m; + } + } + } + + /* check to see if we have uniform slot assignments */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &i8, &cnt, OPAL_INT8))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* if so, then make every node the same */ + if (0 > i8) { + i8 = -1 * i8; + for (n=0; n < orte_node_pool->size; n++) { + if (NULL != (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { + nptr->slots = i8; + } + } + } else { + /* if compressed, get the uncompressed size */ + if (1 == i8) { + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &sz, &cnt, OPAL_SIZE))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + } + /* unpack the slots object */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &boptr, &cnt, OPAL_BYTE_OBJECT))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + /* if compressed, decompress */ + if (1 == i8) { + if (!opal_compress.decompress_block((uint8_t**)&slots, sz, + boptr->bytes, boptr->size)) { + ORTE_ERROR_LOG(ORTE_ERROR); + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + rc = ORTE_ERROR; + goto cleanup; + } + } else { + slots = (uint16_t*)boptr->bytes; + boptr->bytes = NULL; + boptr->size = 0; + } + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + /* cycle across the node pool and assign the values */ + for (n=0, m=0; n < orte_node_pool->size; n++) { + if (NULL != (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { + nptr->slots = slots[m]; + ++m; + } + } + } + + /* check to see if we have uniform flag assignments */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &i8, &cnt, OPAL_INT8))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* if so, then make every node the same */ + if (0 > i8) { + i8 += 2; + for (n=0; n < orte_node_pool->size; n++) { + if (NULL != (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { + if (i8) { + ORTE_FLAG_SET(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN); + } else { + ORTE_FLAG_UNSET(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN); + } + } + } + } else { + /* if compressed, get the uncompressed size */ + if (1 == i8) { + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &sz, &cnt, OPAL_SIZE))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + } + /* unpack the slots object */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &boptr, &cnt, OPAL_BYTE_OBJECT))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + /* if compressed, decompress */ + if (1 == i8) { + if (!opal_compress.decompress_block((uint8_t**)&flags, sz, + boptr->bytes, boptr->size)) { + ORTE_ERROR_LOG(ORTE_ERROR); + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + rc = ORTE_ERROR; + goto cleanup; + } + } else { + flags = (uint8_t*)boptr->bytes; + boptr->bytes = NULL; + boptr->size = 0; + } + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + /* cycle across the node pool and assign the values */ + for (n=0, m=0; n < orte_node_pool->size; n++) { + if (NULL != (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { + if (flags[m]) { + ORTE_FLAG_SET(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN); + } else { + ORTE_FLAG_UNSET(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN); + } + ++m; + } + } + } + + cleanup: + if (NULL != slots) { + free(slots); + } + if (NULL != flags) { + free(flags); + } + if (NULL != tps) { + free(tps); + } + if (NULL != topologies) { + free(topologies); + } + return rc; +} + + +int orte_util_generate_ppn(orte_job_t *jdata, + opal_buffer_t *buf) +{ + uint16_t ppn; + uint8_t *bytes; + int32_t nbytes; + int rc = ORTE_SUCCESS; + orte_app_idx_t i; + int j, k; + opal_byte_object_t bo, *boptr; + bool compressed; + orte_node_t *nptr; + orte_proc_t *proc; + size_t sz; + opal_buffer_t bucket; + + OBJ_CONSTRUCT(&bucket, opal_buffer_t); + + for (i=0; i < jdata->num_apps; i++) { + /* for each app_context */ + for (j=0; j < jdata->map->nodes->size; j++) { + if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, j))) { + continue; + } + if (NULL == nptr->daemon) { + continue; + } + ppn = 0; + for (k=0; k < nptr->procs->size; k++) { + if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(nptr->procs, k))) { + if (proc->name.jobid == jdata->jobid) { + ++ppn; + } + } + } + if (0 < ppn) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &nptr->index, 1, ORTE_STD_CNTR))) { + goto cleanup; + } + if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &ppn, 1, OPAL_UINT16))) { + goto cleanup; + } + } + } + opal_dss.unload(&bucket, (void**)&bytes, &nbytes); + + if (opal_compress.compress_block(bytes, (size_t)nbytes, + (uint8_t**)&bo.bytes, &sz)) { + /* mark that this was compressed */ + compressed = true; + bo.size = sz; + } else { + /* mark that this was not compressed */ + compressed = false; + bo.bytes = bytes; + bo.size = nbytes; + } + /* indicate compression */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &compressed, 1, OPAL_BOOL))) { + if (compressed) { + free(bo.bytes); + } + goto cleanup; + } + /* if compressed, provide the uncompressed size */ + if (compressed) { + sz = nbytes; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &sz, 1, OPAL_SIZE))) { + free(bo.bytes); + goto cleanup; + } + } + /* add the object */ + boptr = &bo; + rc = opal_dss.pack(buf, &boptr, 1, OPAL_BYTE_OBJECT); + if (OPAL_SUCCESS != rc) { + break; + } + } + + cleanup: + OBJ_DESTRUCT(&bucket); + return rc; +} + +int orte_util_decode_ppn(orte_job_t *jdata, + opal_buffer_t *buf) +{ + orte_std_cntr_t index; + orte_app_idx_t n; + int cnt, rc, m; + opal_byte_object_t *boptr; + bool compressed; + uint8_t *bytes; + size_t sz; + uint16_t ppn, k; + orte_node_t *node; + orte_proc_t *proc; + opal_buffer_t bucket; + + /* reset any flags */ + for (m=0; m < orte_node_pool->size; m++) { + if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, m))) { + ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); + } + } + + for (n=0; n < jdata->num_apps; n++) { + /* unpack the compression flag */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &compressed, &cnt, OPAL_BOOL))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* if compressed, unpack the raw size */ + if (compressed) { + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &sz, &cnt, OPAL_SIZE))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + /* unpack the byte object describing this app */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &boptr, &cnt, OPAL_BYTE_OBJECT))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + if (ORTE_PROC_IS_HNP) { + /* just discard it */ + free(boptr->bytes); + free(boptr); + continue; + } + + /* decompress if required */ + if (compressed) { + if (!opal_compress.decompress_block(&bytes, sz, + boptr->bytes, boptr->size)) { + ORTE_ERROR_LOG(ORTE_ERROR); + OBJ_RELEASE(boptr); + return ORTE_ERROR; + } + } else { + bytes = boptr->bytes; + sz = boptr->size; + boptr->bytes = NULL; + boptr->size = 0; + } + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + + /* setup to unpack */ + OBJ_CONSTRUCT(&bucket, opal_buffer_t); + opal_dss.load(&bucket, bytes, sz); + + /* unpack each node and its ppn */ + cnt = 1; + while (OPAL_SUCCESS == (rc = opal_dss.unpack(&bucket, &index, &cnt, ORTE_STD_CNTR))) { + /* get the corresponding node object */ + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, index))) { + rc = ORTE_ERR_NOT_FOUND; + ORTE_ERROR_LOG(rc); + goto error; + } + /* add the node to the job map if not already assigned */ + if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { + OBJ_RETAIN(node); + opal_pointer_array_add(jdata->map->nodes, node); + ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); + } + /* get the ppn */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(&bucket, &ppn, &cnt, OPAL_UINT16))) { + ORTE_ERROR_LOG(rc); + goto error; + } + /* create a proc object for each one */ + for (k=0; k < ppn; k++) { + proc = OBJ_NEW(orte_proc_t); + proc->name.jobid = jdata->jobid; + /* leave the vpid undefined as this will be determined + * later when we do the overall ranking */ + proc->app_idx = n; + proc->parent = node->daemon->name.vpid; + OBJ_RETAIN(node); + proc->node = node; + /* flag the proc as ready for launch */ + proc->state = ORTE_PROC_STATE_INIT; + opal_pointer_array_add(node->procs, proc); + node->num_procs++; + /* we will add the proc to the jdata array when we + * compute its rank */ + } + node->num_procs += ppn; + cnt = 1; + } + OBJ_DESTRUCT(&bucket); + } + if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { + ORTE_ERROR_LOG(rc); + } + + /* reset any flags */ + for (m=0; m < jdata->map->nodes->size; m++) { + node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m); + if (NULL != node) { + ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); + } + } + return ORTE_SUCCESS; + + error: + OBJ_DESTRUCT(&bucket); + /* reset any flags */ + for (m=0; m < jdata->map->nodes->size; m++) { + node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m); + if (NULL != node) { + ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); + } + } + return rc; +} diff --git a/orte/util/nidmap.h b/orte/util/nidmap.h new file mode 100644 index 00000000000..ab728176aad --- /dev/null +++ b/orte/util/nidmap.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * All rights reserved. + * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef ORTE_NIDMAP_H +#define ORTE_NIDMAP_H + +#include "orte_config.h" + +#include "opal/class/opal_pointer_array.h" +#include "opal/dss/dss_types.h" +#include "orte/runtime/orte_globals.h" + +/* pass info about the nodes in an allocation */ +ORTE_DECLSPEC int orte_util_nidmap_create(opal_pointer_array_t *pool, + opal_buffer_t *buf); + +ORTE_DECLSPEC int orte_util_decode_nidmap(opal_buffer_t *buf); + + +/* pass topology and #slots info */ +ORTE_DECLSPEC int orte_util_pass_node_info(opal_buffer_t *buf); + +ORTE_DECLSPEC int orte_util_parse_node_info(opal_buffer_t *buf); + + +/* pass info about node assignments for a specific job */ +ORTE_DECLSPEC int orte_util_generate_ppn(orte_job_t *jdata, + opal_buffer_t *buf); + +ORTE_DECLSPEC int orte_util_decode_ppn(orte_job_t *jdata, + opal_buffer_t *buf); + +#endif /* ORTE_NIDMAP_H */ From fbe380c9c228f32f6ce236b2ad54fc9d4b9a2999 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 19 Mar 2019 21:29:05 -0700 Subject: [PATCH 2/3] Update ignores Signed-off-by: Ralph Castain --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index d40aac1e60f..50cddfeacb7 100644 --- a/.gitignore +++ b/.gitignore @@ -199,6 +199,8 @@ ompi/mca/rte/orte/mpirun.1 ompi/mca/sharedfp/addproc/mca_sharedfp_addproc_control +ompi/mca/topo/treematch/config.h + ompi/mpi/c/profile/p*.c ompi/mpi/fortran/configure-fortran-output.h From 3fcbc297a2e7665bc0bc67870c7daab5811405b4 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 21 Mar 2019 11:15:11 -0700 Subject: [PATCH 3/3] Catch the remaining places where node_regex must be replaced Signed-off-by: Ralph Castain --- orte/mca/plm/base/plm_base_frame.c | 9 +-------- orte/mca/plm/base/plm_private.h | 3 +-- orte/mca/rml/base/rml_base_msg_handlers.c | 8 ++++---- orte/orted/orted_main.c | 4 ---- orte/runtime/orte_globals.c | 3 +-- orte/runtime/orte_globals.h | 3 +-- orte/runtime/orte_mca_params.c | 10 +--------- 7 files changed, 9 insertions(+), 31 deletions(-) diff --git a/orte/mca/plm/base/plm_base_frame.c b/orte/mca/plm/base/plm_base_frame.c index fde0dad522d..aac8f33825f 100644 --- a/orte/mca/plm/base/plm_base_frame.c +++ b/orte/mca/plm/base/plm_base_frame.c @@ -13,6 +13,7 @@ * All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -54,14 +55,6 @@ orte_plm_base_module_t orte_plm = {0}; static int mca_plm_base_register(mca_base_register_flag_t flags) { - orte_plm_globals.node_regex_threshold = 1024; - (void) mca_base_framework_var_register (&orte_plm_base_framework, "node_regex_threshold", - "Only pass the node regex on the orted command line if smaller than this threshold", - MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, - MCA_BASE_VAR_FLAG_INTERNAL, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_plm_globals.node_regex_threshold); return ORTE_SUCCESS; } diff --git a/orte/mca/plm/base/plm_private.h b/orte/mca/plm/base/plm_private.h index db779674def..45029a800a5 100644 --- a/orte/mca/plm/base/plm_private.h +++ b/orte/mca/plm/base/plm_private.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2017-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2017-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -61,7 +61,6 @@ typedef struct { opal_buffer_t tree_spawn_cmd; /* daemon nodes assigned at launch */ bool daemon_nodes_assigned_at_launch; - size_t node_regex_threshold; } orte_plm_globals_t; /** * Global instance of PLM framework data diff --git a/orte/mca/rml/base/rml_base_msg_handlers.c b/orte/mca/rml/base/rml_base_msg_handlers.c index 72a37cdae9f..1c6759911a9 100644 --- a/orte/mca/rml/base/rml_base_msg_handlers.c +++ b/orte/mca/rml/base/rml_base_msg_handlers.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -44,6 +44,7 @@ #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" #include "orte/util/name_fns.h" +#include "orte/util/nidmap.h" #include "orte/util/threads.h" #include "orte/mca/rml/rml.h" @@ -181,9 +182,8 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata) ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return; } - assert (NULL != orte_node_regex); - - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &orte_node_regex, 1, OPAL_STRING))) { + /* send the daemon map back to our child */ + if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(orte_node_pool, buffer))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buffer); return; diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index 1280b066ce4..188e7ec9790 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -1014,10 +1014,6 @@ int orte_daemon(int argc, char *argv[]) i += 2; } } - if (NULL != orte_node_regex) { - /* now launch any child daemons of ours */ - orte_plm.remote_spawn(); - } } if (orte_debug_daemons_flag) { diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index 4f043329d18..57c41d4a744 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -13,7 +13,7 @@ * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -159,7 +159,6 @@ char *orte_default_hostfile = NULL; bool orte_default_hostfile_given = false; char *orte_rankfile = NULL; int orte_num_allocated_nodes = 0; -char *orte_node_regex = NULL; char *orte_default_dash_host = NULL; /* tool communication controls */ diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index 3c93c6dbe2c..a9f2907ce6e 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -13,7 +13,7 @@ * Copyright (c) 2007-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2017-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -543,7 +543,6 @@ ORTE_DECLSPEC extern char *orte_default_hostfile; ORTE_DECLSPEC extern bool orte_default_hostfile_given; ORTE_DECLSPEC extern char *orte_rankfile; ORTE_DECLSPEC extern int orte_num_allocated_nodes; -ORTE_DECLSPEC extern char *orte_node_regex; ORTE_DECLSPEC extern char *orte_default_dash_host; /* PMI version control */ diff --git a/orte/runtime/orte_mca_params.c b/orte/runtime/orte_mca_params.c index 0053a663e44..12e08bd932b 100644 --- a/orte/runtime/orte_mca_params.c +++ b/orte/runtime/orte_mca_params.c @@ -13,7 +13,7 @@ * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -406,14 +406,6 @@ int orte_register_params(void) orte_default_dash_host = NULL; } - /* regex of nodes in system */ - orte_node_regex = NULL; - (void) mca_base_var_register ("orte", "orte", NULL, "node_regex", - "Regular expression defining nodes in the system", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &orte_node_regex); - /* whether or not to keep FQDN hostnames */ orte_keep_fqdn_hostnames = false; (void) mca_base_var_register ("orte", "orte", NULL, "keep_fqdn_hostnames",