diff --git a/ompi/mca/coll/sm/coll_sm_module.c b/ompi/mca/coll/sm/coll_sm_module.c index 4739217bc15..88672211bd4 100644 --- a/ompi/mca/coll/sm/coll_sm_module.c +++ b/ompi/mca/coll/sm/coll_sm_module.c @@ -15,7 +15,7 @@ * All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -124,9 +124,9 @@ static int mca_coll_sm_module_disable(mca_coll_base_module_t *module, struct omp { mca_coll_sm_module_t *sm_module = (mca_coll_sm_module_t*) module; if (NULL != sm_module->previous_reduce_module) { - sm_module->previous_reduce = NULL; + sm_module->previous_reduce = NULL; OBJ_RELEASE(sm_module->previous_reduce_module); - sm_module->previous_reduce_module = NULL; + sm_module->previous_reduce_module = NULL; } return OMPI_SUCCESS; } @@ -174,7 +174,7 @@ mca_coll_sm_comm_query(struct ompi_communicator_t *comm, int *priority) if (OMPI_COMM_IS_INTER(comm) || 1 == ompi_comm_size(comm) || ompi_group_have_remote_peers (comm->c_local_group)) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, "coll:sm:comm_query (%d/%s): intercomm, comm is too small, or not all peers local; disqualifying myself", comm->c_contextid, comm->c_name); - return NULL; + return NULL; } /* Get the priority level attached to this module. If priority is less @@ -183,7 +183,7 @@ mca_coll_sm_comm_query(struct ompi_communicator_t *comm, int *priority) if (mca_coll_sm_component.sm_priority <= 0) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, "coll:sm:comm_query (%d/%s): priority too low; disqualifying myself", comm->c_contextid, comm->c_name); - return NULL; + return NULL; } sm_module = OBJ_NEW(mca_coll_sm_module_t); @@ -246,6 +246,7 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module, mca_coll_sm_comm_t *data = NULL; size_t control_size, frag_size; mca_coll_sm_component_t *c = &mca_coll_sm_component; + hwloc_topology_t topo; opal_hwloc_base_memory_segment_t *maffinity; int parent, min_child, num_children; unsigned char *base = NULL; @@ -446,7 +447,10 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module, /* Setup memory affinity so that the pages that belong to this process are local to this process */ - opal_hwloc_base_memory_set(maffinity, j); + if (NULL != (topo = opal_hwloc_base_get_topology())) { + opal_hwloc_base_memory_set(topo, maffinity, j); + opal_hwloc_base_free_topology(topo); + } free(maffinity); /* Zero out the control structures that belong to this process */ diff --git a/ompi/mca/topo/treematch/topo_treematch_component.c b/ompi/mca/topo/treematch/topo_treematch_component.c index 221efd0a7ee..6062bf1ed31 100644 --- a/ompi/mca/topo/treematch/topo_treematch_component.c +++ b/ompi/mca/topo/treematch/topo_treematch_component.c @@ -4,6 +4,7 @@ * reserved. * Copyright (c) 2011-2015 INRIA. All rights reserved. * Copyright (c) 2011-2015 Université Bordeaux 1 + * Copyright (c) 2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -61,9 +62,6 @@ mca_topo_treematch_component_2_2_0_t mca_topo_treematch_component = static int init_query(bool enable_progress_threads, bool enable_mpi_threads) { - if(NULL == opal_hwloc_topology) { - return OPAL_ERR_NOT_SUPPORTED; - } return OMPI_SUCCESS; } @@ -97,4 +95,3 @@ static int mca_topo_treematch_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_topo_treematch_component.reorder_mode); return OMPI_SUCCESS; } - diff --git a/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c b/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c index 4d4f4d3f03f..010ec6bd195 100644 --- a/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c +++ b/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c @@ -5,7 +5,7 @@ * reserved. * Copyright (c) 2011-2015 INRIA. All rights reserved. * Copyright (c) 2012-2015 Bordeaux Poytechnic Institute - * Copyright (c) 2015 Intel, Inc. All rights reserved + * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights @@ -145,6 +145,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, mca_topo_base_comm_dist_graph_2_2_0_t *topo = NULL; ompi_proc_t *proc = NULL; MPI_Request *reqs = NULL; + hwloc_topology_t mytopo = NULL; hwloc_cpuset_t set; hwloc_obj_t object,root_obj; hwloc_obj_t *tracker = NULL; @@ -256,14 +257,19 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, /* Then, we need to know if the processes are bound */ /* We make the hypothesis that all processes are in */ /* the same state : all bound or none bound */ - assert(NULL != opal_hwloc_topology); - root_obj = hwloc_get_root_obj(opal_hwloc_topology); + + /* ensure the topology is present */ + if (NULL == (mytopo = opal_hwloc_base_get_topology())) { + goto fallback; + } + + root_obj = hwloc_get_root_obj(mytopo); if (NULL == root_obj) goto fallback; /* if cpubind returns an error, it will be full anyway */ set = hwloc_bitmap_alloc_full(); - hwloc_get_cpubind(opal_hwloc_topology,set,0); - num_pus_in_node = hwloc_get_nbobjs_by_type(opal_hwloc_topology, HWLOC_OBJ_PU); + hwloc_get_cpubind(mytopo,set,0); + num_pus_in_node = hwloc_get_nbobjs_by_type(mytopo, HWLOC_OBJ_PU); if(hwloc_bitmap_isincluded(root_obj->cpuset,set)){ /* processes are not bound on the machine */ @@ -274,8 +280,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, /* we try to bind to cores or above objects if enough are present */ /* Not sure that cores are present in ALL nodes */ - depth = hwloc_get_type_or_above_depth(opal_hwloc_topology,HWLOC_OBJ_CORE); - num_objs_in_node = hwloc_get_nbobjs_by_depth(opal_hwloc_topology,depth); + depth = hwloc_get_type_or_above_depth(mytopo,HWLOC_OBJ_CORE); + num_objs_in_node = hwloc_get_nbobjs_by_depth(mytopo,depth); /* Check for oversubscribing */ oversubscribing_objs = check_oversubscribing(rank,num_nodes, @@ -295,7 +301,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, FALLBACK(); } else { obj_rank = ompi_process_info.my_local_rank%num_pus_in_node; - effective_depth = hwloc_topology_get_depth(opal_hwloc_topology) - 1; + effective_depth = hwloc_topology_get_depth(mytopo) - 1; num_objs_in_node = num_pus_in_node; #ifdef __DEBUG__ fprintf(stdout,"Process not bound : binding on PU#%i \n",obj_rank); @@ -304,22 +310,22 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, } else { obj_rank = ompi_process_info.my_local_rank%num_objs_in_node; effective_depth = depth; - object = hwloc_get_obj_by_depth(opal_hwloc_topology,effective_depth,obj_rank); + object = hwloc_get_obj_by_depth(mytopo,effective_depth,obj_rank); if( NULL == object) FALLBACK(); hwloc_bitmap_copy(set,object->cpuset); hwloc_bitmap_singlify(set); /* we don't want the process to move */ - hwloc_err = hwloc_set_cpubind(opal_hwloc_topology,set,0); + hwloc_err = hwloc_set_cpubind(mytopo,set,0); if( -1 == hwloc_err) FALLBACK(); #ifdef __DEBUG__ fprintf(stdout,"Process not bound : binding on OBJ#%i \n",obj_rank); #endif } } else { /* the processes are already bound */ - object = hwloc_get_obj_covering_cpuset(opal_hwloc_topology,set); + object = hwloc_get_obj_covering_cpuset(mytopo,set); obj_rank = object->logical_index; effective_depth = object->depth; - num_objs_in_node = hwloc_get_nbobjs_by_depth(opal_hwloc_topology, effective_depth); + num_objs_in_node = hwloc_get_nbobjs_by_depth(mytopo, effective_depth); /* Check for oversubscribing */ oversubscribing_objs = check_oversubscribing(rank,num_nodes, @@ -346,7 +352,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, int *myhierarchy = (int *)calloc(array_size,sizeof(int)); for (i = 0; i < array_size ; i++) { - myhierarchy[i] = hwloc_get_nbobjs_by_depth(opal_hwloc_topology,i); + myhierarchy[i] = hwloc_get_nbobjs_by_depth(mytopo,i); #ifdef __DEBUG__ fprintf(stdout,"hierarchy[%i] = %i\n",i,myhierarchy[i]); #endif @@ -361,15 +367,15 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, i = 1; while (i < array_size){ if(myhierarchy[i] != myhierarchy[i-1]) - tracker[idx++] = hwloc_get_obj_by_depth(opal_hwloc_topology,i-1,0); + tracker[idx++] = hwloc_get_obj_by_depth(mytopo,i-1,0); i++; } - tracker[idx] = hwloc_get_obj_by_depth(opal_hwloc_topology,effective_depth,0); + tracker[idx] = hwloc_get_obj_by_depth(mytopo,effective_depth,0); free(myhierarchy); #ifdef __DEBUG__ fprintf(stdout,">>>>>>>>>>>>>>>>>>>>> Effective depth is : %i (total depth %i)| num_levels %i\n", - effective_depth,hwloc_topology_get_depth(opal_hwloc_topology),numlevels); + effective_depth,hwloc_topology_get_depth(mytopo),numlevels); for(i = 0 ; i < numlevels ; i++) fprintf(stdout,"tracker[%i] : arity %i | depth %i\n",i,tracker[i]->arity,tracker[i]->depth); #endif @@ -379,17 +385,26 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, for(i = 1; i < num_procs_in_node; i++) { if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(&localrank_to_objnum[i],1,MPI_INT, - local_procs[i],111, comm_old,&reqs[i-1])))) + local_procs[i],111, comm_old,&reqs[i-1])))) { + opal_hwloc_base_free_topology(mytopo); + mytopo = NULL; return err; + } } if (OMPI_SUCCESS != ( err = ompi_request_wait_all(num_procs_in_node-1, - reqs,MPI_STATUSES_IGNORE))) + reqs,MPI_STATUSES_IGNORE))) { + opal_hwloc_base_free_topology(mytopo); + mytopo = NULL; return err; + } } else { /* sending my core number to my local master on the node */ if (OMPI_SUCCESS != (err = MCA_PML_CALL(send(&obj_rank, 1, MPI_INT, local_procs[0], - 111, MCA_PML_BASE_SEND_STANDARD, comm_old)))) + 111, MCA_PML_BASE_SEND_STANDARD, comm_old)))) { + opal_hwloc_base_free_topology(mytopo); + mytopo = NULL; return err; + } } free(reqs); @@ -417,8 +432,11 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, if (OMPI_SUCCESS != (err = comm_old->c_coll.coll_gather(MPI_IN_PLACE, size, MPI_DOUBLE, local_pattern, size, MPI_DOUBLE, 0, comm_old, - comm_old->c_coll.coll_gather_module))) + comm_old->c_coll.coll_gather_module))) { + opal_hwloc_base_free_topology(mytopo); + mytopo = NULL; return err; + } } } else { local_pattern = (double *)calloc(size,sizeof(double)); @@ -430,8 +448,11 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, if (OMPI_SUCCESS != (err = comm_old->c_coll.coll_gather(local_pattern, size, MPI_DOUBLE, NULL,0,0, 0, comm_old, - comm_old->c_coll.coll_gather_module))) + comm_old->c_coll.coll_gather_module))) { + opal_hwloc_base_free_topology(mytopo); + mytopo = NULL; return err; + } } } @@ -448,7 +469,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, for(i = 0 ; i < num_objs_in_node ; i++) obj_to_rank_in_comm[i] = -1; for(i = 0 ; i < num_objs_in_node ; i++) { - object = hwloc_get_obj_by_depth(opal_hwloc_topology,effective_depth,i); + object = hwloc_get_obj_by_depth(mytopo,effective_depth,i); for( j = 0; j < num_procs_in_node ; j++ ) if(localrank_to_objnum[j] == (int)(object->logical_index)) break; @@ -796,7 +817,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, tm_topology->nb_nodes = (int *)calloc(tm_topology->nb_levels,sizeof(int)); tm_topology->node_id = (int **)malloc(tm_topology->nb_levels*sizeof(int *)); for(i = 0 ; i < tm_topology->nb_levels ; i++){ - int nb_objs = hwloc_get_nbobjs_by_depth(opal_hwloc_topology,tracker[i]->depth); + int nb_objs = hwloc_get_nbobjs_by_depth(mytopo,tracker[i]->depth); tm_topology->nb_nodes[i] = nb_objs; tm_topology->node_id[i] = (int*)malloc(sizeof(int)*nb_objs); tm_topology->arity[i] = tracker[i]->arity; @@ -864,16 +885,16 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, localcomm->c_coll.coll_bcast_module))) ERR_EXIT(err); - object = hwloc_get_obj_by_depth(opal_hwloc_topology, + object = hwloc_get_obj_by_depth(mytopo, effective_depth,matching[ompi_process_info.my_local_rank]); if( NULL == object) goto fallback; hwloc_bitmap_copy(set,object->cpuset); hwloc_bitmap_singlify(set); - hwloc_err = hwloc_set_cpubind(opal_hwloc_topology,set,0); + hwloc_err = hwloc_set_cpubind(mytopo,set,0); if( -1 == hwloc_err) goto fallback; /* Report new binding to ORTE/OPAL */ - /* hwloc_bitmap_list_asprintf(&orte_process_info.cpuset,set); */ + /* hwloc_bitmap_list_asprintf(&orte_process_info.cpuset,set); */ err = hwloc_bitmap_snprintf (set_as_string,64,set); #ifdef __DEBUG__ @@ -888,7 +909,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, (void)opal_pmix.store_local((opal_process_name_t*)ORTE_PROC_MY_NAME, &kv); OBJ_DESTRUCT(&kv); - locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, + locality = opal_hwloc_base_get_relative_locality(mytopo, orte_process_info.cpuset,set_as_string); OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_LOCALITY); @@ -912,6 +933,9 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, free(lrank_to_grank); } /* distributed reordering end */ + if (NULL != mytopo) { + opal_hwloc_base_free_topology(mytopo); + } if(rank == local_procs[0]) free(tracker); free(nodes_roots); diff --git a/ompi/mpiext/affinity/c/mpiext_affinity_str.c b/ompi/mpiext/affinity/c/mpiext_affinity_str.c index 62fa0ccda6b..501ad70e8fe 100644 --- a/ompi/mpiext/affinity/c/mpiext_affinity_str.c +++ b/ompi/mpiext/affinity/c/mpiext_affinity_str.c @@ -8,7 +8,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -42,12 +42,18 @@ static const char ompi_nobind_str[] = "Open MPI did not bind this process"; static const char not_bound_str[] = "Not bound (i.e., bound to all processors)"; -static int get_rsrc_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX]); -static int get_rsrc_current_binding(char str[OMPI_AFFINITY_STRING_MAX]); -static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX]); -static int get_layout_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX]); -static int get_layout_current_binding(char str[OMPI_AFFINITY_STRING_MAX]); -static int get_layout_exists(char str[OMPI_AFFINITY_STRING_MAX]); +static int get_rsrc_ompi_bound(hwloc_topology_t topo, + char str[OMPI_AFFINITY_STRING_MAX]); +static int get_rsrc_current_binding(hwloc_topology_t topo, + char str[OMPI_AFFINITY_STRING_MAX]); +static int get_rsrc_exists(hwloc_topology_t topo, + char str[OMPI_AFFINITY_STRING_MAX]); +static int get_layout_ompi_bound(hwloc_topology_t topo, + char str[OMPI_AFFINITY_STRING_MAX]); +static int get_layout_current_binding(hwloc_topology_t topo, + char str[OMPI_AFFINITY_STRING_MAX]); +static int get_layout_exists(hwloc_topology_t topo, + char str[OMPI_AFFINITY_STRING_MAX]); int OMPI_Affinity_str(ompi_affinity_fmt_t fmt_type, @@ -56,28 +62,33 @@ int OMPI_Affinity_str(ompi_affinity_fmt_t fmt_type, char exists[OMPI_AFFINITY_STRING_MAX]) { int ret; + hwloc_topology_t topo; memset(ompi_bound, 0, OMPI_AFFINITY_STRING_MAX); memset(current_binding, 0, OMPI_AFFINITY_STRING_MAX); /* If we have no hwloc support, return nothing */ - if (NULL == opal_hwloc_topology) { + if (NULL == (topo = opal_hwloc_base_get_topology())) { return MPI_SUCCESS; } /* Otherwise, return useful information */ switch (fmt_type) { case OMPI_AFFINITY_RSRC_STRING_FMT: - if (OMPI_SUCCESS != (ret = get_rsrc_ompi_bound(ompi_bound)) || - OMPI_SUCCESS != (ret = get_rsrc_current_binding(current_binding)) || - OMPI_SUCCESS != (ret = get_rsrc_exists(exists))) { + if (OMPI_SUCCESS != (ret = get_rsrc_ompi_bound(topo, ompi_bound)) || + OMPI_SUCCESS != (ret = get_rsrc_current_binding(topo, current_binding)) || + OMPI_SUCCESS != (ret = get_rsrc_exists(topo, exists))) { + opal_hwloc_base_free_topology(topo); + topo = NULL; return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, FUNC_NAME); } break; case OMPI_AFFINITY_LAYOUT_FMT: - if (OMPI_SUCCESS != (ret = get_layout_ompi_bound(ompi_bound)) || - OMPI_SUCCESS != (ret = get_layout_current_binding(current_binding)) || - OMPI_SUCCESS != (ret = get_layout_exists(exists))) { + if (OMPI_SUCCESS != (ret = get_layout_ompi_bound(topo, ompi_bound)) || + OMPI_SUCCESS != (ret = get_layout_current_binding(topo, current_binding)) || + OMPI_SUCCESS != (ret = get_layout_exists(topo, exists))) { + opal_hwloc_base_free_topology(topo); + topo = NULL; return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, FUNC_NAME); } break; @@ -85,6 +96,8 @@ int OMPI_Affinity_str(ompi_affinity_fmt_t fmt_type, return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); } + opal_hwloc_base_free_topology(topo); + topo = NULL; return MPI_SUCCESS; } @@ -93,7 +106,8 @@ int OMPI_Affinity_str(ompi_affinity_fmt_t fmt_type, /* * Where did OMPI bind this process? (prettyprint) */ -static int get_rsrc_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX]) +static int get_rsrc_ompi_bound(hwloc_topology_t topo, + char str[OMPI_AFFINITY_STRING_MAX]) { int ret; @@ -107,7 +121,7 @@ static int get_rsrc_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX]) ret = OPAL_ERR_NOT_BOUND; } else { ret = opal_hwloc_base_cset2str(str, OMPI_AFFINITY_STRING_MAX, - opal_hwloc_topology, + topo, orte_proc_applied_binding); } if (OPAL_ERR_NOT_BOUND == ret) { @@ -121,7 +135,8 @@ static int get_rsrc_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX]) /* * Where is this process currently bound? (prettyprint) */ -static int get_rsrc_current_binding(char str[OMPI_AFFINITY_STRING_MAX]) +static int get_rsrc_current_binding(hwloc_topology_t topo, + char str[OMPI_AFFINITY_STRING_MAX]) { int ret; hwloc_obj_t root; @@ -129,12 +144,12 @@ static int get_rsrc_current_binding(char str[OMPI_AFFINITY_STRING_MAX]) bool bound = false; /* get our root object */ - root = hwloc_get_root_obj(opal_hwloc_topology); - rootset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, root); + root = hwloc_get_root_obj(topo); + rootset = opal_hwloc_base_get_available_cpus(topo, root); /* get our bindings */ boundset = hwloc_bitmap_alloc(); - if (hwloc_get_cpubind(opal_hwloc_topology, boundset, + if (hwloc_get_cpubind(topo, boundset, HWLOC_CPUBIND_PROCESS) < 0) { /* we are NOT bound if get_cpubind fails, nor can we be bound - the environment does not support it */ @@ -158,7 +173,7 @@ static int get_rsrc_current_binding(char str[OMPI_AFFINITY_STRING_MAX]) /* If we are bound, print it out */ else { ret = opal_hwloc_base_cset2str(str, OMPI_AFFINITY_STRING_MAX, - opal_hwloc_topology, + topo, boundset); if (OPAL_ERR_NOT_BOUND == ret) { strncpy(str, not_bound_str, OMPI_AFFINITY_STRING_MAX - 1); @@ -176,7 +191,8 @@ static int get_rsrc_current_binding(char str[OMPI_AFFINITY_STRING_MAX]) * this is *everything* -- not just the ones that are available to * this process. */ -static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX]) +static int get_rsrc_exists(hwloc_topology_t topo, + char str[OMPI_AFFINITY_STRING_MAX]) { bool first = true; int i, num_cores, num_pus; @@ -185,7 +201,7 @@ static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX]) hwloc_obj_t socket, core, c2; str[0] = '\0'; - for (socket = hwloc_get_obj_by_type(opal_hwloc_topology, + for (socket = hwloc_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 0); NULL != socket; socket = socket->next_cousin) { /* If this isn't the first socket, add a delimiter */ @@ -200,15 +216,15 @@ static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX]) /* Find out how many cores are inside this socket, and get an object pointing to the first core. Also count how many PUs are in the first core. */ - num_cores = hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology, + num_cores = hwloc_get_nbobjs_inside_cpuset_by_type(topo, socket->cpuset, HWLOC_OBJ_CORE); - core = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology, + core = hwloc_get_obj_inside_cpuset_by_type(topo, socket->cpuset, HWLOC_OBJ_CORE, 0); if (NULL != core) { num_pus = - hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology, + hwloc_get_nbobjs_inside_cpuset_by_type(topo, core->cpuset, HWLOC_OBJ_PU); @@ -234,7 +250,7 @@ static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX]) /* Do all the cores have the same number of PUs? */ for (c2 = core; NULL != c2; c2 = c2->next_cousin) { - if (hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology, + if (hwloc_get_nbobjs_inside_cpuset_by_type(topo, core->cpuset, HWLOC_OBJ_PU) != num_pus) { @@ -264,7 +280,7 @@ static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX]) } first = false; - i = hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology, + i = hwloc_get_nbobjs_inside_cpuset_by_type(topo, core->cpuset, HWLOC_OBJ_PU); snprintf(tmp, stmp, "%d", i); @@ -285,7 +301,8 @@ static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX]) /* * Where did OMPI bind this process? (layout string) */ -static int get_layout_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX]) +static int get_layout_ompi_bound(hwloc_topology_t topo, + char str[OMPI_AFFINITY_STRING_MAX]) { int ret; @@ -300,7 +317,7 @@ static int get_layout_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX]) ret = OPAL_ERR_NOT_BOUND; } else { ret = opal_hwloc_base_cset2mapstr(str, OMPI_AFFINITY_STRING_MAX, - opal_hwloc_topology, + topo, orte_proc_applied_binding); } if (OPAL_ERR_NOT_BOUND == ret) { @@ -314,7 +331,8 @@ static int get_layout_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX]) /* * Where is this process currently bound? (layout string) */ -static int get_layout_current_binding(char str[OMPI_AFFINITY_STRING_MAX]) +static int get_layout_current_binding(hwloc_topology_t topo, + char str[OMPI_AFFINITY_STRING_MAX]) { int ret; hwloc_obj_t root; @@ -322,12 +340,12 @@ static int get_layout_current_binding(char str[OMPI_AFFINITY_STRING_MAX]) bool bound = false; /* get our root object */ - root = hwloc_get_root_obj(opal_hwloc_topology); - rootset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, root); + root = hwloc_get_root_obj(topo); + rootset = opal_hwloc_base_get_available_cpus(topo, root); /* get our bindings */ boundset = hwloc_bitmap_alloc(); - if (hwloc_get_cpubind(opal_hwloc_topology, boundset, + if (hwloc_get_cpubind(topo, boundset, HWLOC_CPUBIND_PROCESS) < 0) { /* we are NOT bound if get_cpubind fails, nor can we be bound - the environment does not support it */ @@ -351,7 +369,7 @@ static int get_layout_current_binding(char str[OMPI_AFFINITY_STRING_MAX]) /* If we are bound, print it out */ else { ret = opal_hwloc_base_cset2mapstr(str, OMPI_AFFINITY_STRING_MAX, - opal_hwloc_topology, + topo, boundset); if (OPAL_ERR_NOT_BOUND == ret) { strncpy(str, not_bound_str, OMPI_AFFINITY_STRING_MAX - 1); @@ -373,7 +391,8 @@ static int get_layout_current_binding(char str[OMPI_AFFINITY_STRING_MAX]) * / - signifies core * . - signifies PU */ -static int get_layout_exists(char str[OMPI_AFFINITY_STRING_MAX]) +static int get_layout_exists(hwloc_topology_t topo, + char str[OMPI_AFFINITY_STRING_MAX]) { int core_index, pu_index; int len = OMPI_AFFINITY_STRING_MAX; @@ -382,7 +401,7 @@ static int get_layout_exists(char str[OMPI_AFFINITY_STRING_MAX]) str[0] = '\0'; /* Iterate over all existing sockets */ - for (socket = hwloc_get_obj_by_type(opal_hwloc_topology, + for (socket = hwloc_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 0); NULL != socket; socket = socket->next_cousin) { @@ -390,11 +409,11 @@ static int get_layout_exists(char str[OMPI_AFFINITY_STRING_MAX]) /* Iterate over all existing cores in this socket */ core_index = 0; - for (core = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology, + for (core = hwloc_get_obj_inside_cpuset_by_type(topo, socket->cpuset, HWLOC_OBJ_CORE, core_index); NULL != core; - core = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology, + core = hwloc_get_obj_inside_cpuset_by_type(topo, socket->cpuset, HWLOC_OBJ_CORE, ++core_index)) { if (core_index > 0) { @@ -403,11 +422,11 @@ static int get_layout_exists(char str[OMPI_AFFINITY_STRING_MAX]) /* Iterate over all existing PUs in this core */ pu_index = 0; - for (pu = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology, + for (pu = hwloc_get_obj_inside_cpuset_by_type(topo, core->cpuset, HWLOC_OBJ_PU, pu_index); NULL != pu; - pu = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology, + pu = hwloc_get_obj_inside_cpuset_by_type(topo, core->cpuset, HWLOC_OBJ_PU, ++pu_index)) { strncat(str, ".", len - strlen(str)); diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index d7d7a1bf972..af031927355 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -508,16 +508,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* check for timing request - get stop time and report elapsed time if so */ OPAL_TIMING_MNEXT((&tm,"time from completion of rte_init to modex")); - /* if hwloc is available but didn't get setup for some - * reason, do so now - */ - if (NULL == opal_hwloc_topology) { - if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) { - error = "Topology init"; - goto error; - } - } - /* Register the default errhandler callback */ errtrk.status = OPAL_ERROR; errtrk.active = true; diff --git a/opal/mca/btl/openib/btl_openib_component.c b/opal/mca/btl/openib/btl_openib_component.c index d6c119f6ec5..c6a204771de 100644 --- a/opal/mca/btl/openib/btl_openib_component.c +++ b/opal/mca/btl/openib/btl_openib_component.c @@ -18,7 +18,7 @@ * Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. @@ -1502,13 +1502,20 @@ static uint64_t read_module_param(char *file, uint64_t value, uint64_t max) static uint64_t calculate_total_mem (void) { hwloc_obj_t machine; + hwloc_topology_t topo; + uint64_t mem; - machine = hwloc_get_next_obj_by_type (opal_hwloc_topology, HWLOC_OBJ_MACHINE, NULL); - if (NULL == machine) { + if (NULL == (topo = opal_hwloc_base_get_topology())) { return 0; } + machine = hwloc_get_next_obj_by_type (topo, HWLOC_OBJ_MACHINE, NULL); + if (NULL == machine) { + mem = 0; + } - return machine->memory.total_memory; + mem = machine->memory.total_memory; + opal_hwloc_base_free_topology(topo); + return mem; } @@ -2320,15 +2327,19 @@ static float get_ib_dev_distance(struct ibv_device *dev) int i; hwloc_cpuset_t my_cpuset = NULL, ibv_cpuset = NULL; hwloc_obj_t my_obj, ibv_obj, node_obj; + hwloc_topology_t topo = NULL; /* Note that this struct is owned by hwloc; there's no need to free it at the end of time */ static const struct hwloc_distances_s *hwloc_distances = NULL; + if (NULL == (topo = opal_hwloc_base_get_topology())) { + goto out; + } + if (NULL == hwloc_distances) { hwloc_distances = - hwloc_get_whole_distance_matrix_by_type(opal_hwloc_topology, - HWLOC_OBJ_NODE); + hwloc_get_whole_distance_matrix_by_type(topo, HWLOC_OBJ_NODE); } /* If we got no info, just return 0 */ @@ -2341,10 +2352,10 @@ static float get_ib_dev_distance(struct ibv_device *dev) if (NULL == ibv_cpuset) { goto out; } - if (0 != hwloc_ibv_get_device_cpuset(opal_hwloc_topology, dev, ibv_cpuset)) { + if (0 != hwloc_ibv_get_device_cpuset(topo, dev, ibv_cpuset)) { goto out; } - ibv_obj = hwloc_get_obj_covering_cpuset(opal_hwloc_topology, ibv_cpuset); + ibv_obj = hwloc_get_obj_covering_cpuset(topo, ibv_cpuset); if (NULL == ibv_obj) { goto out; } @@ -2390,10 +2401,10 @@ static float get_ib_dev_distance(struct ibv_device *dev) if (NULL == my_cpuset) { goto out; } - if (0 != hwloc_get_cpubind(opal_hwloc_topology, my_cpuset, 0)) { + if (0 != hwloc_get_cpubind(topo, my_cpuset, 0)) { goto out; } - my_obj = hwloc_get_obj_covering_cpuset(opal_hwloc_topology, my_cpuset); + my_obj = hwloc_get_obj_covering_cpuset(topo, my_cpuset); if (NULL == my_obj) { goto out; } @@ -2427,11 +2438,11 @@ static float get_ib_dev_distance(struct ibv_device *dev) /* If the obj is above a NUMA node, then we're bound to more than one NUMA node. Find the max distance. */ i = 0; - for (node_obj = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology, + for (node_obj = hwloc_get_obj_inside_cpuset_by_type(topo, ibv_obj->cpuset, HWLOC_OBJ_NODE, i); NULL != node_obj; - node_obj = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology, + node_obj = hwloc_get_obj_inside_cpuset_by_type(topo, ibv_obj->cpuset, HWLOC_OBJ_NODE, ++i)) { @@ -2448,6 +2459,9 @@ static float get_ib_dev_distance(struct ibv_device *dev) } out: + if (NULL != topo) { + opal_hwloc_base_free_topology(topo); + } if (NULL != ibv_cpuset) { hwloc_bitmap_free(ibv_cpuset); } diff --git a/opal/mca/btl/sm/btl_sm.c b/opal/mca/btl/sm/btl_sm.c index d5a8d31e0ae..d5d4b109bcd 100644 --- a/opal/mca/btl/sm/btl_sm.c +++ b/opal/mca/btl/sm/btl_sm.c @@ -16,7 +16,7 @@ * All rights reserved. * Copyright (c) 2010-2012 IBM Corporation. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 ARM, Inc. All rights reserved. @@ -223,14 +223,15 @@ sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int my_mem_node, num_mem_nodes, i, rc; mca_common_sm_mpool_resources_t *res = NULL; mca_btl_sm_component_t* m = &mca_btl_sm_component; + hwloc_topology_t topo = NULL; /* Assume we don't have hwloc support and fill in dummy info */ mca_btl_sm_component.mem_node = my_mem_node = 0; mca_btl_sm_component.num_mem_nodes = num_mem_nodes = 1; /* If we have hwloc support, then get accurate information */ - if (NULL != opal_hwloc_topology) { - i = opal_hwloc_base_get_nbobjs_by_type(opal_hwloc_topology, + if (NULL != (topo = opal_hwloc_base_get_topology())) { + i = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_NODE, 0, OPAL_HWLOC_AVAILABLE); @@ -253,13 +254,13 @@ sm_btl_first_time_init(mca_btl_sm_t *sm_btl, if (NULL != opal_process_info.cpuset) { /* count the number of NUMA nodes to which we are bound */ for (w=0; w < i; w++) { - if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, + if (NULL == (obj = opal_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_NODE, 0, w, OPAL_HWLOC_AVAILABLE))) { continue; } /* get that NUMA node's available cpus */ - avail = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj); + avail = opal_hwloc_base_get_available_cpus(topo, obj); /* see if we intersect */ if (hwloc_bitmap_intersects(avail, opal_hwloc_my_cpuset)) { n_bound++; @@ -276,6 +277,7 @@ sm_btl_first_time_init(mca_btl_sm_t *sm_btl, } } } + opal_hwloc_base_free_topology(topo); } if (NULL == (res = calloc(1, sizeof(*res)))) { diff --git a/opal/mca/btl/smcuda/btl_smcuda.c b/opal/mca/btl/smcuda/btl_smcuda.c index 6208ea5399d..8c9761e70c9 100644 --- a/opal/mca/btl/smcuda/btl_smcuda.c +++ b/opal/mca/btl/smcuda/btl_smcuda.c @@ -18,7 +18,7 @@ * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -232,14 +232,15 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, int my_mem_node, num_mem_nodes, i, rc; mca_common_sm_mpool_resources_t *res = NULL; mca_btl_smcuda_component_t* m = &mca_btl_smcuda_component; + hwloc_topology_t topo = NULL; /* Assume we don't have hwloc support and fill in dummy info */ mca_btl_smcuda_component.mem_node = my_mem_node = 0; mca_btl_smcuda_component.num_mem_nodes = num_mem_nodes = 1; /* If we have hwloc support, then get accurate information */ - if (NULL != opal_hwloc_topology) { - i = opal_hwloc_base_get_nbobjs_by_type(opal_hwloc_topology, + if (NULL != (topo = opal_hwloc_base_get_topology())) { + i = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_NODE, 0, OPAL_HWLOC_AVAILABLE); @@ -262,13 +263,13 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, if (NULL != opal_process_info.cpuset) { /* count the number of NUMA nodes to which we are bound */ for (w=0; w < i; w++) { - if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, + if (NULL == (obj = opal_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_NODE, 0, w, OPAL_HWLOC_AVAILABLE))) { continue; } /* get that NUMA node's available cpus */ - avail = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj); + avail = opal_hwloc_base_get_available_cpus(topo, obj); /* see if we intersect */ if (hwloc_bitmap_intersects(avail, opal_hwloc_my_cpuset)) { n_bound++; @@ -285,6 +286,7 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, } } } + opal_hwloc_base_free_topology(topo); } if (NULL == (res = calloc(1, sizeof(*res)))) { @@ -431,7 +433,7 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, mca_btl_smcuda_component.sm_free_list_inc, mca_btl_smcuda_component.sm_mpool, 0, NULL, NULL, NULL); if ( OPAL_SUCCESS != i ) - return i; + return i; mca_btl_smcuda_component.num_outstanding_frags = 0; @@ -1120,8 +1122,8 @@ int mca_btl_smcuda_get_cuda (struct mca_btl_base_module_t *btl, mca_common_wait_stream_synchronize(&rget_reg); rc = mca_common_cuda_memcpy(local_address, remote_memory_address, size, - "mca_btl_smcuda_get", (mca_btl_base_descriptor_t *)frag, - &done); + "mca_btl_smcuda_get", (mca_btl_base_descriptor_t *)frag, + &done); if (OPAL_SUCCESS != rc) { /* Out of resources can be handled by upper layers. */ if (OPAL_ERR_OUT_OF_RESOURCE != rc) { diff --git a/opal/mca/btl/usnic/btl_usnic_hwloc.c b/opal/mca/btl/usnic/btl_usnic_hwloc.c index 78ef4c3abcb..938624cb96e 100644 --- a/opal/mca/btl/usnic/btl_usnic_hwloc.c +++ b/opal/mca/btl/usnic/btl_usnic_hwloc.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -9,7 +10,7 @@ #include "opal_config.h" -#include "opal/mca/hwloc/hwloc.h" +#include "opal/mca/hwloc/base/base.h" #include "opal/constants.h" #if BTL_IN_OPAL @@ -23,7 +24,8 @@ /* * Local variables */ -static hwloc_obj_t my_numa_node = NULL; +static int my_numa_node_index = -1; +static bool numa_node_index_found = false; static int num_numa_nodes = 0; static const struct hwloc_distances_s *matrix = NULL; @@ -35,9 +37,13 @@ static const struct hwloc_distances_s *matrix = NULL; */ static int get_distance_matrix(void) { + hwloc_topology_t topo; + if (NULL == matrix) { - matrix = hwloc_get_whole_distance_matrix_by_type(opal_hwloc_topology, - HWLOC_OBJ_NODE); + if (NULL == (topo = opal_hwloc_base_get_topology())) { + return OPAL_ERROR; + } + matrix = hwloc_get_whole_distance_matrix_by_type(topo, HWLOC_OBJ_NODE); } return (NULL == matrix) ? OPAL_ERROR : OPAL_SUCCESS; @@ -46,12 +52,13 @@ static int get_distance_matrix(void) /* * Find the NUMA node that covers a given cpuset */ -static hwloc_obj_t find_numa_node(hwloc_bitmap_t cpuset) +static hwloc_obj_t find_numa_node(hwloc_topology_t topo, + hwloc_bitmap_t cpuset) { hwloc_obj_t obj; obj = - hwloc_get_first_largest_obj_inside_cpuset(opal_hwloc_topology, cpuset); + hwloc_get_first_largest_obj_inside_cpuset(topo, cpuset); /* Go upwards until we hit the NUMA node or run out of parents */ while (obj->type > HWLOC_OBJ_NODE && @@ -68,8 +75,7 @@ static hwloc_obj_t find_numa_node(hwloc_bitmap_t cpuset) /* Finally, make sure that our cpuset doesn't span more than 1 NUMA node */ - if (hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology, - cpuset, HWLOC_OBJ_NODE) > 1) { + if (hwloc_get_nbobjs_inside_cpuset_by_type(topo, cpuset, HWLOC_OBJ_NODE) > 1) { opal_output_verbose(5, USNIC_OUT, "btl:usnic:filter_numa: this process is bound to more than 1 NUMA node; filtering by NUMA distance not possible"); return NULL; @@ -87,13 +93,18 @@ static hwloc_obj_t find_numa_node(hwloc_bitmap_t cpuset) * Note that the my_numa_node value we find is just a handle; we * aren't responsible for freeing it. */ -static int find_my_numa_node(void) +static int find_my_numa_node_index(void) { hwloc_obj_t obj; hwloc_bitmap_t cpuset; + hwloc_topology_t topo; - if (NULL != my_numa_node) { - return OPAL_SUCCESS; + if (numa_node_index_found) { + return my_numa_node_index; + } + + if (NULL == (topo = opal_hwloc_base_get_topology())) { + return OPAL_ERROR; } /* Get this process' binding */ @@ -101,45 +112,48 @@ static int find_my_numa_node(void) if (NULL == cpuset) { return OPAL_ERR_OUT_OF_RESOURCE; } - if (0 != hwloc_get_cpubind(opal_hwloc_topology, cpuset, 0)) { + if (0 != hwloc_get_cpubind(topo, cpuset, 0)) { hwloc_bitmap_free(cpuset); + opal_hwloc_base_free_topology(topo); return OPAL_ERR_NOT_AVAILABLE; } /* Get the largest object type in the cpuset */ - obj = find_numa_node(cpuset); + obj = find_numa_node(topo, cpuset); hwloc_bitmap_free(cpuset); if (NULL == obj) { + opal_hwloc_base_free_topology(topo); return OPAL_ERR_NOT_AVAILABLE; } /* Happiness */ - my_numa_node = obj; - num_numa_nodes = hwloc_get_nbobjs_by_type(opal_hwloc_topology, - HWLOC_OBJ_NODE); - return OPAL_SUCCESS; + my_numa_node_index = obj->logical_index; + num_numa_nodes = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_NODE); + opal_hwloc_base_free_topology(topo); + numa_node_index_found = true; + return my_numa_node_index; } /* * Find a NUMA node covering the device associated with this module */ -static hwloc_obj_t find_device_numa(opal_btl_usnic_module_t *module) +static hwloc_obj_t find_device_numa(hwloc_topology_t topo, + opal_btl_usnic_module_t *module) { struct fi_usnic_info *uip; hwloc_obj_t obj; /* Bozo checks */ assert(NULL != matrix); - assert(NULL != my_numa_node); uip = &module->usnic_info; - /* Look for the IP device name in the hwloc topology (the usnic + /* Look for the IP device name in the hwloc topology (the usnic device is simply an alternate API to reach the same device, so if we find the IP device name, we've found the usNIC device) */ obj = NULL; - while (NULL != (obj = hwloc_get_next_osdev(opal_hwloc_topology, obj))) { + while (NULL != (obj = hwloc_get_next_osdev(topo, obj))) { assert(HWLOC_OBJ_OS_DEVICE == obj->type); if (0 == strcmp(obj->name, uip->ui.v1.ui_ifname)) { break; @@ -148,6 +162,7 @@ static hwloc_obj_t find_device_numa(opal_btl_usnic_module_t *module) /* Did not find it */ if (NULL == obj) { + opal_hwloc_base_free_topology(topo); return NULL; } @@ -163,8 +178,10 @@ static hwloc_obj_t find_device_numa(opal_btl_usnic_module_t *module) opal_output_verbose(5, USNIC_OUT, "btl:usnic:filter_numa: could not find NUMA node for %s; filtering by NUMA distance not possible", module->linux_device_name); + opal_hwloc_base_free_topology(topo); return NULL; } + opal_hwloc_base_free_topology(topo); return obj; } @@ -177,6 +194,7 @@ int opal_btl_usnic_hwloc_distance(opal_btl_usnic_module_t *module) { int ret; hwloc_obj_t dev_numa; + hwloc_topology_t topo; /* Bozo check */ assert(NULL != module); @@ -197,30 +215,28 @@ int opal_btl_usnic_hwloc_distance(opal_btl_usnic_module_t *module) } /* Find my NUMA node */ - if (OPAL_SUCCESS != (ret = find_my_numa_node())) { + if (0 > (ret = find_my_numa_node_index())) { return ret; } - /* If my_numa_node is still NULL, that means we span more than 1 - NUMA node. So... no sorting/pruning for you! */ - if (NULL == my_numa_node) { - return OPAL_SUCCESS; - } /* Find the NUMA node covering this module's device */ - dev_numa = find_device_numa(module); + if (NULL == (topo = opal_hwloc_base_get_topology())) { + return OPAL_ERROR; + } + + dev_numa = find_device_numa(topo, module); /* Lookup the distance between my NUMA node and the NUMA node of the device */ if (NULL != dev_numa) { module->numa_distance = - matrix->latency[dev_numa->logical_index * num_numa_nodes + - my_numa_node->logical_index]; + matrix->latency[dev_numa->logical_index * num_numa_nodes + my_numa_node_index]; opal_output_verbose(5, USNIC_OUT, "btl:usnic:filter_numa: %s is distance %d from me", module->linux_device_name, module->numa_distance); } - + opal_hwloc_base_free_topology(topo); return OPAL_SUCCESS; } diff --git a/opal/mca/hwloc/base/base.h b/opal/mca/hwloc/base/base.h index 826aeb81a84..00cfce03dee 100644 --- a/opal/mca/hwloc/base/base.h +++ b/opal/mca/hwloc/base/base.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -42,7 +42,6 @@ BEGIN_C_DECLS * Debugging output stream */ OPAL_DECLSPEC extern bool opal_hwloc_base_inited; -OPAL_DECLSPEC extern bool opal_hwloc_topology_inited; OPAL_DECLSPEC extern mca_base_framework_t opal_hwloc_base_framework; @@ -94,7 +93,7 @@ OPAL_DECLSPEC int opal_hwloc_base_set_binding_policy(opal_binding_policy_t *poli * not bound, use the hwloc root object's (available and online) * cpuset. */ -OPAL_DECLSPEC void opal_hwloc_base_get_local_cpuset(void); +OPAL_DECLSPEC void opal_hwloc_base_get_local_cpuset(hwloc_topology_t topo); struct opal_rmaps_numa_node_t { opal_list_item_t super; @@ -140,12 +139,12 @@ OPAL_DECLSPEC int opal_hwloc_base_filter_cpus(hwloc_topology_t topo); * Discover / load the hwloc topology (i.e., call hwloc_topology_init() and * hwloc_topology_load()). */ -OPAL_DECLSPEC int opal_hwloc_base_get_topology(void); +OPAL_DECLSPEC hwloc_topology_t opal_hwloc_base_get_topology(void); /** * Set the hwloc topology to that from the given topo file */ -OPAL_DECLSPEC int opal_hwloc_base_set_topology(char *topofile); +OPAL_DECLSPEC hwloc_topology_t opal_hwloc_base_set_topology(char *topofile); /** * Free the hwloc topology. @@ -219,14 +218,15 @@ OPAL_DECLSPEC int opal_hwloc_base_report_bind_failure(const char *file, * loaded by opal_hwloc_base_open(). Hence, an upper layer needs to * invoke this function after opal_hwloc_topology has been loaded. */ -OPAL_DECLSPEC int opal_hwloc_base_set_process_membind_policy(void); +OPAL_DECLSPEC int opal_hwloc_base_set_process_membind_policy(hwloc_topology_t topo); OPAL_DECLSPEC int opal_hwloc_base_membind(opal_hwloc_base_memory_segment_t *segs, size_t count, int node_id); OPAL_DECLSPEC int opal_hwloc_base_node_name_to_id(char *node_name, int *id); -OPAL_DECLSPEC int opal_hwloc_base_memory_set(opal_hwloc_base_memory_segment_t *segments, +OPAL_DECLSPEC int opal_hwloc_base_memory_set(hwloc_topology_t topo, + opal_hwloc_base_memory_segment_t *segments, size_t num_segments); /* datatype support */ diff --git a/opal/mca/hwloc/base/hwloc_base_frame.c b/opal/mca/hwloc/base/hwloc_base_frame.c index bb5e8d980f7..7b9d2780b9a 100644 --- a/opal/mca/hwloc/base/hwloc_base_frame.c +++ b/opal/mca/hwloc/base/hwloc_base_frame.c @@ -38,7 +38,6 @@ * Globals */ bool opal_hwloc_base_inited = false; -hwloc_topology_t opal_hwloc_topology=NULL; hwloc_cpuset_t opal_hwloc_my_cpuset=NULL; hwloc_cpuset_t opal_hwloc_base_given_cpus=NULL; opal_hwloc_base_map_t opal_hwloc_base_map = OPAL_HWLOC_BASE_MAP_NONE; @@ -286,14 +285,6 @@ static int opal_hwloc_base_close(void) hwloc_bitmap_free(opal_hwloc_my_cpuset); opal_hwloc_my_cpuset = NULL; } - - /* destroy the topology */ - if (NULL != opal_hwloc_topology) { - opal_hwloc_base_free_topology(opal_hwloc_topology); - opal_hwloc_topology = NULL; - } - - /* All done */ opal_hwloc_base_inited = false; return OPAL_SUCCESS; diff --git a/opal/mca/hwloc/base/hwloc_base_maffinity.c b/opal/mca/hwloc/base/hwloc_base_maffinity.c index 6a9c5939ddf..84dac302ff0 100644 --- a/opal/mca/hwloc/base/hwloc_base_maffinity.c +++ b/opal/mca/hwloc/base/hwloc_base_maffinity.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,15 +24,14 @@ * which has to do some extra steps to get error messages to be * displayed). */ -int opal_hwloc_base_set_process_membind_policy(void) +int opal_hwloc_base_set_process_membind_policy(hwloc_topology_t topo) { int rc = 0, flags; hwloc_membind_policy_t policy; hwloc_cpuset_t cpuset; - /* Make sure opal_hwloc_topology has been set by the time we've - been called */ - if (NULL == opal_hwloc_topology) { + /* bozo check */ + if (NULL == topo) { return OPAL_ERR_BAD_PARAM; } @@ -55,9 +55,8 @@ int opal_hwloc_base_set_process_membind_policy(void) rc = OPAL_ERR_OUT_OF_RESOURCE; } else { int e; - hwloc_get_cpubind(opal_hwloc_topology, cpuset, 0); - rc = hwloc_set_membind(opal_hwloc_topology, - cpuset, policy, flags); + hwloc_get_cpubind(topo, cpuset, 0); + rc = hwloc_set_membind(topo, cpuset, policy, flags); e = errno; hwloc_bitmap_free(cpuset); @@ -73,7 +72,8 @@ int opal_hwloc_base_set_process_membind_policy(void) return (0 == rc) ? OPAL_SUCCESS : OPAL_ERROR; } -int opal_hwloc_base_memory_set(opal_hwloc_base_memory_segment_t *segments, +int opal_hwloc_base_memory_set(hwloc_topology_t topo, + opal_hwloc_base_memory_segment_t *segments, size_t num_segments) { int rc = OPAL_SUCCESS; @@ -82,7 +82,7 @@ int opal_hwloc_base_memory_set(opal_hwloc_base_memory_segment_t *segments, hwloc_cpuset_t cpuset = NULL; /* bozo check */ - if (NULL == opal_hwloc_topology) { + if (NULL == topo) { msg = "hwloc_set_area_membind() failure - topology not available"; return opal_hwloc_base_report_bind_failure(__FILE__, __LINE__, msg, rc); @@ -97,9 +97,9 @@ int opal_hwloc_base_memory_set(opal_hwloc_base_memory_segment_t *segments, msg = "hwloc_bitmap_alloc() failure"; goto out; } - hwloc_get_cpubind(opal_hwloc_topology, cpuset, 0); + hwloc_get_cpubind(topo, cpuset, 0); for (i = 0; i < num_segments; ++i) { - if (0 != hwloc_set_area_membind(opal_hwloc_topology, + if (0 != hwloc_set_area_membind(topo, segments[i].mbs_start_addr, segments[i].mbs_len, cpuset, HWLOC_MEMBIND_BIND, @@ -135,9 +135,9 @@ int opal_hwloc_base_membind(opal_hwloc_base_memory_segment_t *segs, int rc = OPAL_SUCCESS; char *msg = NULL; hwloc_cpuset_t cpuset = NULL; + hwloc_topology_t topo = NULL; - /* bozo check */ - if (NULL == opal_hwloc_topology) { + if (NULL == (topo = opal_hwloc_base_get_topology())) { msg = "hwloc_set_area_membind() failure - topology not available"; return opal_hwloc_base_report_bind_failure(__FILE__, __LINE__, msg, rc); @@ -151,7 +151,7 @@ int opal_hwloc_base_membind(opal_hwloc_base_memory_segment_t *segs, } hwloc_bitmap_set(cpuset, node_id); for(i = 0; i < count; i++) { - if (0 != hwloc_set_area_membind(opal_hwloc_topology, + if (0 != hwloc_set_area_membind(topo, segs[i].mbs_start_addr, segs[i].mbs_len, cpuset, HWLOC_MEMBIND_BIND, @@ -166,6 +166,9 @@ int opal_hwloc_base_membind(opal_hwloc_base_memory_segment_t *segs, if (NULL != cpuset) { hwloc_bitmap_free(cpuset); } + if (NULL != topo) { + hwloc_topology_destroy(topo); + } if (OPAL_SUCCESS != rc) { return opal_hwloc_base_report_bind_failure(__FILE__, __LINE__, msg, rc); } diff --git a/opal/mca/hwloc/base/hwloc_base_proc_mempolicy.c b/opal/mca/hwloc/base/hwloc_base_proc_mempolicy.c index b3715f60928..145cb21eb67 100644 --- a/opal/mca/hwloc/base/hwloc_base_proc_mempolicy.c +++ b/opal/mca/hwloc/base/hwloc_base_proc_mempolicy.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,15 +24,14 @@ * which has to do some extra steps to get error messages to be * displayed). */ -int opal_hwloc_base_set_process_membind_policy(void) +int opal_hwloc_base_set_process_membind_policy(hwloc_topology_t topo) { int rc = 0, flags; hwloc_membind_policy_t policy; hwloc_cpuset_t cpuset; - /* Make sure opal_hwloc_topology has been set by the time we've - been called */ - if (NULL == opal_hwloc_topology) { + /* bozo check */ + if (NULL == topo) { return OPAL_ERR_BAD_PARAM; } @@ -55,9 +55,8 @@ int opal_hwloc_base_set_process_membind_policy(void) rc = OPAL_ERR_OUT_OF_RESOURCE; } else { int e; - hwloc_get_cpubind(opal_hwloc_topology, cpuset, 0); - rc = hwloc_set_membind(opal_hwloc_topology, - cpuset, policy, flags); + hwloc_get_cpubind(topo, cpuset, 0); + rc = hwloc_set_membind(topo, cpuset, policy, flags); e = errno; hwloc_bitmap_free(cpuset); diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c index 040e531352f..361d487d48e 100644 --- a/opal/mca/hwloc/base/hwloc_base_util.c +++ b/opal/mca/hwloc/base/hwloc_base_util.c @@ -13,7 +13,7 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -38,8 +38,10 @@ #include "opal/util/argv.h" #include "opal/util/output.h" #include "opal/util/os_dirpath.h" +#include "opal/util/proc.h" #include "opal/util/show_help.h" #include "opal/threads/tsd.h" +#include "opal/mca/pmix/pmix.h" #include "opal/mca/hwloc/hwloc.h" #include "opal/mca/hwloc/base/base.h" @@ -200,7 +202,7 @@ int opal_hwloc_base_filter_cpus(hwloc_topology_t topo) return OPAL_SUCCESS; } -static void fill_cache_line_size(void) +static void fill_cache_line_size(hwloc_topology_t topo) { int i = 0, cache_level = 2; unsigned size; @@ -212,7 +214,7 @@ static void fill_cache_line_size(void) while (cache_level > 0 && !found) { i=0; while (1) { - obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, + obj = opal_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_CACHE, cache_level, i, OPAL_HWLOC_LOGICAL); if (NULL == obj) { @@ -238,72 +240,139 @@ static void fill_cache_line_size(void) } } -int opal_hwloc_base_get_topology(void) +hwloc_topology_t opal_hwloc_base_get_topology(void) { int rc=OPAL_SUCCESS; + char *val; + opal_process_name_t wildcard_rank; + opal_value_t *kv; + int u32; + hwloc_topology_t topo; OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, "hwloc:base:get_topology")); if (NULL == opal_hwloc_base_topo_file) { - if (0 != hwloc_topology_init(&opal_hwloc_topology) || - 0 != hwloc_topology_set_flags(opal_hwloc_topology, - (HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | - HWLOC_TOPOLOGY_FLAG_IO_DEVICES)) || - 0 != hwloc_topology_load(opal_hwloc_topology)) { - return OPAL_ERR_NOT_SUPPORTED; - } - if (OPAL_SUCCESS != (rc = opal_hwloc_base_filter_cpus(opal_hwloc_topology))) { - return rc; + /* setup a name for retrieving data associated with the job */ + wildcard_rank.jobid = OPAL_PROC_MY_NAME.jobid; + wildcard_rank.vpid = OPAL_NAME_WILDCARD->vpid; + + /* retrieve our topology */ + val = NULL; + OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_LOCAL_TOPO, + &wildcard_rank, &val, OPAL_STRING); + if (OPAL_SUCCESS == rc && NULL != val) { + /* load the topology */ + if (0 != hwloc_topology_init(&topo)) { + free(val); + return NULL; + } + if (0 != hwloc_topology_set_xmlbuffer(topo, val, strlen(val))) { + free(val); + hwloc_topology_destroy(topo); + return NULL; + } + /* since we are loading this from an external source, we have to + * explicitly set a flag so hwloc sets things up correctly + */ + if (0 != hwloc_topology_set_flags(topo, + (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | + HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | + HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { + hwloc_topology_destroy(topo); + free(val); + return NULL; + } + /* now load the topology */ + if (0 != hwloc_topology_load(topo)) { + hwloc_topology_destroy(topo); + free(val); + return NULL; + } + free(val); + /* filter the cpus thru any default cpu set */ + if (OPAL_SUCCESS != (rc = opal_hwloc_base_filter_cpus(topo))) { + OPAL_ERROR_LOG(rc); + hwloc_topology_destroy(topo); + return NULL; + } + } else { + /* it wasn't passed down to us, so go get it */ + if (0 != hwloc_topology_init(&topo) || + 0 != hwloc_topology_set_flags(topo, + (HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | + HWLOC_TOPOLOGY_FLAG_IO_DEVICES)) || + 0 != hwloc_topology_load(topo)) { + if (NULL != topo) { + hwloc_topology_destroy(topo); + } + return NULL; + } + if (OPAL_SUCCESS != (rc = opal_hwloc_base_filter_cpus(topo))) { + OPAL_ERROR_LOG(rc); + hwloc_topology_destroy(topo); + return NULL; + } + /* push it into the PMIx database in case someone + * tries to retrieve it so we avoid an attempt to + * get it again */ + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_LOCAL_TOPO); + kv->type = OPAL_STRING; + if (0 != (rc = hwloc_topology_export_xmlbuffer(topo, &kv->data.string, &u32))) { + OBJ_RELEASE(kv); + hwloc_topology_destroy(topo); + return NULL; + } + opal_pmix.store_local(&wildcard_rank, kv); // it is okay for this to fail + OBJ_RELEASE(kv); } } else { - if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology(opal_hwloc_base_topo_file))) { - return rc; + if (NULL == (topo = opal_hwloc_base_set_topology(opal_hwloc_base_topo_file))) { + return NULL; } } /* fill opal_cache_line_size global with the smallest L1 cache line size */ - fill_cache_line_size(); + fill_cache_line_size(topo); - return rc; + return topo; } -int opal_hwloc_base_set_topology(char *topofile) +hwloc_topology_t opal_hwloc_base_set_topology(char *topofile) { struct hwloc_topology_support *support; int rc; + hwloc_topology_t topo; - OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, - "hwloc:base:set_topology %s", topofile)); + OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, + "hwloc:base:set_topology %s", topofile)); - if (NULL != opal_hwloc_topology) { - hwloc_topology_destroy(opal_hwloc_topology); - } - if (0 != hwloc_topology_init(&opal_hwloc_topology)) { - return OPAL_ERR_NOT_SUPPORTED; + if (0 != hwloc_topology_init(&topo)) { + return NULL; } - if (0 != hwloc_topology_set_xml(opal_hwloc_topology, topofile)) { - hwloc_topology_destroy(opal_hwloc_topology); + if (0 != hwloc_topology_set_xml(topo, topofile)) { + hwloc_topology_destroy(topo); OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, "hwloc:base:set_topology bad topo file")); - return OPAL_ERR_NOT_SUPPORTED; + return NULL; } /* since we are loading this from an external source, we have to * explicitly set a flag so hwloc sets things up correctly */ - if (0 != hwloc_topology_set_flags(opal_hwloc_topology, + if (0 != hwloc_topology_set_flags(topo, (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { - hwloc_topology_destroy(opal_hwloc_topology); - return OPAL_ERR_NOT_SUPPORTED; + hwloc_topology_destroy(topo); + return NULL; } - if (0 != hwloc_topology_load(opal_hwloc_topology)) { - hwloc_topology_destroy(opal_hwloc_topology); + if (0 != hwloc_topology_load(topo)) { + hwloc_topology_destroy(topo); OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, "hwloc:base:set_topology failed to load")); - return OPAL_ERR_NOT_SUPPORTED; + return NULL; } /* unfortunately, hwloc does not include support info in its @@ -311,22 +380,23 @@ int opal_hwloc_base_set_topology(char *topofile) * systems that use this option are likely to provide * binding support */ - support = (struct hwloc_topology_support*)hwloc_topology_get_support(opal_hwloc_topology); + support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo); support->cpubind->set_thisproc_cpubind = true; support->membind->set_thisproc_membind = true; /* filter the cpus thru any default cpu set */ - rc = opal_hwloc_base_filter_cpus(opal_hwloc_topology); + rc = opal_hwloc_base_filter_cpus(topo); if (OPAL_SUCCESS != rc) { - return rc; + hwloc_topology_destroy(topo); + return NULL; } /* fill opal_cache_line_size global with the smallest L1 cache line size */ - fill_cache_line_size(); + fill_cache_line_size(topo); /* all done */ - return OPAL_SUCCESS; + return topo; } static void free_object(hwloc_obj_t obj) @@ -353,6 +423,10 @@ void opal_hwloc_base_free_topology(hwloc_topology_t topo) opal_hwloc_topo_data_t *rdata; unsigned k; + if (NULL == topo) { + return; + } + obj = hwloc_get_root_obj(topo); /* release the root-level userdata */ if (NULL != obj->userdata) { @@ -369,23 +443,23 @@ void opal_hwloc_base_free_topology(hwloc_topology_t topo) hwloc_topology_destroy(topo); } -void opal_hwloc_base_get_local_cpuset(void) +void opal_hwloc_base_get_local_cpuset(hwloc_topology_t topo) { hwloc_obj_t root; hwloc_cpuset_t base_cpus; - if (NULL != opal_hwloc_topology) { + if (NULL != topo) { if (NULL == opal_hwloc_my_cpuset) { opal_hwloc_my_cpuset = hwloc_bitmap_alloc(); } /* get the cpus we are bound to */ - if (hwloc_get_cpubind(opal_hwloc_topology, + if (hwloc_get_cpubind(topo, opal_hwloc_my_cpuset, HWLOC_CPUBIND_PROCESS) < 0) { /* we are not bound - use the root's available cpuset */ - root = hwloc_get_root_obj(opal_hwloc_topology); - base_cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, root); + root = hwloc_get_root_obj(topo); + base_cpus = opal_hwloc_base_get_available_cpus(topo, root); hwloc_bitmap_copy(opal_hwloc_my_cpuset, base_cpus); } } @@ -1205,7 +1279,7 @@ int opal_hwloc_base_slot_list_parse(const char *slot_str, int core_id, lower_range, upper_range; /* bozo checks */ - if (NULL == opal_hwloc_topology) { + if (NULL == topo) { return OPAL_ERR_NOT_SUPPORTED; } if (NULL == slot_str || 0 == strlen(slot_str)) { @@ -1502,9 +1576,9 @@ static char *hwloc_getline(FILE *fp) ret = fgets(input, OPAL_HWLOC_MAX_ELOG_LINE, fp); if (NULL != ret) { - input[strlen(input)-1] = '\0'; /* remove newline */ - buff = strdup(input); - return buff; + input[strlen(input)-1] = '\0'; /* remove newline */ + buff = strdup(input); + return buff; } return NULL; diff --git a/opal/mca/hwloc/hwloc.h b/opal/mca/hwloc/hwloc.h index a074be86e03..ef914645430 100644 --- a/opal/mca/hwloc/hwloc.h +++ b/opal/mca/hwloc/hwloc.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * @@ -211,7 +211,6 @@ typedef uint16_t opal_binding_policy_t; (OPAL_BIND_ALLOW_OVERLOAD & (n)) /* some global values */ -OPAL_DECLSPEC extern hwloc_topology_t opal_hwloc_topology; OPAL_DECLSPEC extern opal_binding_policy_t opal_hwloc_binding_policy; OPAL_DECLSPEC extern hwloc_cpuset_t opal_hwloc_my_cpuset; OPAL_DECLSPEC extern bool opal_hwloc_report_bindings; diff --git a/opal/mca/pmix/cray/pmix_cray.c b/opal/mca/pmix/cray/pmix_cray.c index 31bf7a59b1e..344a56c5276 100644 --- a/opal/mca/pmix/cray/pmix_cray.c +++ b/opal/mca/pmix/cray/pmix_cray.c @@ -5,7 +5,7 @@ * Copyright (c) 2011-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All * rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -690,6 +690,7 @@ static void fencenb(int sd, short args, void *cbdata) opal_hwloc_locality_t locality; opal_list_t vals; char *cpuset = NULL; + hwloc_topology_t topo = NULL; opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:cray executing fence cache_global %p cache_local %p", @@ -827,6 +828,9 @@ static void fencenb(int sd, short args, void *cbdata) * we only need to set locality for each local rank as "not found" * equates to "non-local" */ + if (NULL == (topo = opal_hwloc_base_get_topology())) { + goto fn_exit; + } for (i=0; i < pmix_nlranks; i++) { id.vpid = pmix_lranks[i]; @@ -856,7 +860,7 @@ static void fencenb(int sd, short args, void *cbdata) locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; } else { /* determine relative location on our node */ - locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, + locality = opal_hwloc_base_get_relative_locality(topo, cpuset, kp->data.string); } @@ -877,6 +881,9 @@ static void fencenb(int sd, short args, void *cbdata) } fn_exit: + if (NULL != topo) { + opal_hwloc_base_free_topology(topo); + } if (NULL != cpuset) { free(cpuset); } diff --git a/opal/mca/pmix/ext2x/pmix2x_server_south.c b/opal/mca/pmix/ext2x/pmix2x_server_south.c index cdeecdaa2d3..b333d6b0616 100644 --- a/opal/mca/pmix/ext2x/pmix2x_server_south.c +++ b/opal/mca/pmix/ext2x/pmix2x_server_south.c @@ -147,9 +147,12 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, /* as we might want to use some client-side functions, be sure * to register our own nspace */ + PMIX_INFO_CREATE(pinfo, 1); + PMIX_INFO_LOAD(&pinfo[0], PMIX_REGISTER_NODATA, NULL, PMIX_BOOL); active = true; - PMIx_server_register_nspace(job->nspace, 1, NULL, 0, op2cbfunc, (void*)&active); + PMIx_server_register_nspace(job->nspace, 1, pinfo, 1, op2cbfunc, (void*)&active); PMIX_WAIT_FOR_COMPLETION(active); + PMIX_INFO_FREE(pinfo, 1); return OPAL_SUCCESS; } diff --git a/opal/mca/pmix/pmix2x/pmix/VERSION b/opal/mca/pmix/pmix2x/pmix/VERSION index 5e9c64e086f..edb7c94cb72 100644 --- a/opal/mca/pmix/pmix2x/pmix/VERSION +++ b/opal/mca/pmix/pmix2x/pmix/VERSION @@ -30,7 +30,7 @@ greek= # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=gitb9778a7 +repo_rev=git422b238 # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Dec 19, 2016" +date="Dec 28, 2016" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix2x/pmix/config/Makefile.am b/opal/mca/pmix/pmix2x/pmix/config/Makefile.am index 2593f424840..0c0556379a4 100644 --- a/opal/mca/pmix/pmix2x/pmix/config/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/config/Makefile.am @@ -12,7 +12,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2010 Oracle and/or its affiliates. All rights # reserved. # Copyright (c) 2016 Intel, Inc. All rights reserved. @@ -41,7 +41,7 @@ EXTRA_DIST = \ pmix.m4 \ pmix_search_libs.m4 \ pmix_setup_cc.m4 \ - pmix_setup_hwloc.m4 \ + pmix_setup_zlib.m4 \ pmix_setup_libevent.m4 \ pmix_mca_priority_sort.pl diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 index fa69a266fcb..781ef0b61c2 100644 --- a/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 @@ -358,7 +358,6 @@ AC_DEFUN([PMIX_SETUP_CORE],[ #endif ]) - # Note that sometimes we have , but it doesn't work (e.g., # have both Portland and GNU installed; using pgcc will find GNU's # , which all it does -- by standard -- is define "bool" to @@ -641,11 +640,11 @@ AC_DEFUN([PMIX_SETUP_CORE],[ PMIX_LIBEVENT_CONFIG ################################## - # HWLOC + # ZLIB COMPRESSION ################################## - pmix_show_title "HWLOC" + pmix_show_title "ZLIB" - PMIX_HWLOC_CONFIG + PMIX_ZLIB_CONFIG ################################## # MCA diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix_setup_hwloc.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix_setup_hwloc.m4 deleted file mode 100644 index d67ecafa88d..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/config/pmix_setup_hwloc.m4 +++ /dev/null @@ -1,123 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_hwloc_CONFIG([action-if-found], [action-if-not-found]) -# -------------------------------------------------------------------- -AC_DEFUN([PMIX_HWLOC_CONFIG],[ - AC_ARG_WITH([hwloc-header], - [AC_HELP_STRING([--with-hwloc-header=HEADER], - [The value that should be included in C files to include hwloc.h])]) - - AC_ARG_ENABLE([embedded-hwloc], - [AC_HELP_STRING([--enable-embedded-hwloc], - [Enable use of locally embedded hwloc])]) - - pmix_hwloc_support=0 - AS_IF([test "$enable_embedded_hwloc" = "yes"], - [_PMIX_HWLOC_EMBEDDED_MODE], - [_PMIX_HWLOC_EXTERNAL]) - - AC_DEFINE_UNQUOTED(PMIX_HAVE_HWLOC, [$pmix_hwloc_support], - [Whether we have hwloc support or not]) - - AC_MSG_CHECKING([hwloc header]) - AC_DEFINE_UNQUOTED([PMIX_HWLOC_HEADER], [$PMIX_HWLOC_HEADER], - [Location of hwloc.h]) - AC_MSG_RESULT([$PMIX_HWLOC_HEADER]) - - CPPFLAGS="$CPPFLAGS $PMIX_HWLOC_CPPFLAGS" - LDFLAGS="$LDFLAGS $PMIX_HWLOC_LDFLAGS" - LIBS="$LIBS $PMIX_HWLOC_LIBS" -]) - -AC_DEFUN([_PMIX_HWLOC_EMBEDDED_MODE],[ - AC_MSG_CHECKING([for hwloc]) - AC_MSG_RESULT([assumed available (embedded mode)]) - - PMIX_HWLOC_HEADER="$with_hwloc_header" - PMIX_HWLOC_CPPFLAGS= - PMIX_HWLOC_LIB= - PMIX_HWLOC_LDFLAGS= - - pmix_hwloc_support=1 -]) - -AC_DEFUN([_PMIX_HWLOC_EXTERNAL],[ - PMIX_VAR_SCOPE_PUSH([pmix_hwloc_dir pmix_hwloc_libdir]) - - AC_ARG_WITH([hwloc], - [AC_HELP_STRING([--with-hwloc=DIR], - [Search for hwloc headers and libraries in DIR ])]) - - AC_ARG_WITH([hwloc-libdir], - [AC_HELP_STRING([--with-hwloc-libdir=DIR], - [Search for hwloc libraries in DIR ])]) - - pmix_hwloc_support=0 - if test "$with_hwloc" != "no"; then - AC_MSG_CHECKING([for hwloc in]) - if test ! -z "$with_hwloc" && test "$with_hwloc" != "yes"; then - pmix_hwloc_dir=$with_hwloc - if test -d $with_hwloc/lib; then - pmix_hwloc_libdir=$with_hwloc/lib - elif test -d $with_hwloc/lib64; then - pmix_hwloc_libdir=$with_hwloc/lib64 - else - AC_MSG_RESULT([Could not find $with_hwloc/lib or $with_hwloc/lib64]) - AC_MSG_ERROR([Can not continue]) - fi - AC_MSG_RESULT([$pmix_hwloc_dir and $pmix_hwloc_libdir]) - else - AC_MSG_RESULT([(default search paths)]) - fi - AS_IF([test ! -z "$with_hwloc_libdir" && test "$with_hwloc_libdir" != "yes"], - [pmix_hwloc_libdir="$with_hwloc_libdir"]) - - PMIX_CHECK_PACKAGE([pmix_hwloc], - [hwloc.h], - [hwloc], - [hwloc_topology_dup], - [-lhwloc], - [$pmix_hwloc_dir], - [$pmix_hwloc_libdir], - [pmix_hwloc_support=1], - [pmix_hwloc_support=0]) - if test $pmix_hwloc_support == "1"; then - CPPFLAGS="$pmix_hwloc_CPPFLAGS $CPPFLAGS" - LIBS="$LIBS -lhwloc" - LDFLAGS="$pmix_hwloc_LDFLAGS $LDFLAGS" - fi - fi - - if test ! -z "$with_hwloc" && test "$with_hwloc" != "no" && test "$pmix_hwloc_support" != "1"; then - AC_MSG_WARN([HWLOC SUPPORT REQUESTED AND NOT FOUND. PMIX HWLOC]) - AC_MSG_WARN([SUPPORT REQUIRES A MINIMUM OF VERSION 1.9.1]) - AC_MSG_ERROR([CANNOT CONTINUE]) - fi - - # Set output variables - PMIX_HWLOC_HEADER="" - PMIX_HWLOC_LIB=-lhwloc - AS_IF([test "$pmix_hwloc_dir" != ""], - [PMIX_HWLOC_CPPFLAGS="-I$pmix_hwloc_dir/include"]) - AS_IF([test "$pmix_hwloc_libdir" != ""], - [PMIX_HWLOC_LDFLAGS="-L$pmix_hwloc_libdir"]) - - AC_MSG_CHECKING([will hwloc support be built]) - if test "$pmix_hwloc_support" != "1"; then - AC_MSG_RESULT([no]) - else - AC_MSG_RESULT([yes]) - fi - - PMIX_VAR_SCOPE_POP -])dnl diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix_setup_zlib.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix_setup_zlib.m4 new file mode 100644 index 00000000000..0a887fe6fd4 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix_setup_zlib.m4 @@ -0,0 +1,85 @@ +# -*- shell-script -*- +# +# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. +# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_zlib_CONFIG([action-if-found], [action-if-not-found]) +# -------------------------------------------------------------------- +AC_DEFUN([PMIX_ZLIB_CONFIG],[ + PMIX_VAR_SCOPE_PUSH([pmix_zlib_dir pmix_zlib_libdir]) + + AC_ARG_WITH([zlib], + [AC_HELP_STRING([--with-zlib=DIR], + [Search for zlib headers and libraries in DIR ])]) + + AC_ARG_WITH([zlib-libdir], + [AC_HELP_STRING([--with-zlib-libdir=DIR], + [Search for zlib libraries in DIR ])]) + + pmix_zlib_support=0 + if test "$with_zlib" != "no"; then + AC_MSG_CHECKING([for zlib in]) + if test ! -z "$with_zlib" && test "$with_zlib" != "yes"; then + pmix_zlib_dir=$with_zlib + pmix_zlib_standard_header_location=no + if test -d $with_zlib/lib; then + pmix_zlib_libdir=$with_zlib/lib + elif test -d $with_zlib/lib64; then + pmix_zlib_libdir=$with_zlib/lib64 + else + AC_MSG_RESULT([Could not find $with_zlib/lib or $with_zlib/lib64]) + AC_MSG_ERROR([Can not continue]) + fi + AC_MSG_RESULT([$pmix_zlib_dir and $pmix_zlib_libdir]) + else + AC_MSG_RESULT([(default search paths)]) + pmix_zlib_standard_header_location=yes + fi + AS_IF([test ! -z "$with_zlib_libdir" && test "$with_zlib_libdir" != "yes"], + [pmix_zlib_libdir="$with_zlib_libdir" + pmix_zlib_standard_lib_location=no], + [pmix_zlib_standard_lib_location=yes]) + + PMIX_CHECK_PACKAGE([pmix_zlib], + [zlib.h], + [z], + [deflate], + [-lz], + [$pmix_zlib_dir], + [$pmix_zlib_libdir], + [pmix_zlib_support=1], + [pmix_zlib_support=0]) + if test $pmix_zlib_support == "1"; then + LIBS="$LIBS -lz" + if test "$pmix_zlib_standard_header_location" != "yes"; then + CPPFLAGS="$CPPFLAGS $pmix_zlib_CPPFLAGS" + fi + if test "$pmix_zlib_standard_lib_location" != "yes"; then + LDFLAGS="$LDFLAGS $pmix_zlib_LDFLAGS" + fi + fi + fi + + if test ! -z "$with_zlib" && test "$with_zlib" != "no" && test "$pmix_zlib_support" != "1"; then + AC_MSG_WARN([ZLIB SUPPORT REQUESTED AND NOT FOUND]) + AC_MSG_ERROR([CANNOT CONTINUE]) + fi + + AC_MSG_CHECKING([will zlib support be built]) + if test "$pmix_zlib_support" != "1"; then + AC_MSG_RESULT([no]) + else + AC_MSG_RESULT([yes]) + fi + + AC_DEFINE_UNQUOTED([PMIX_HAVE_ZLIB], [$pmix_zlib_support], + [Whether or not we have zlib support]) + PMIX_VAR_SCOPE_POP +])dnl diff --git a/opal/mca/pmix/pmix2x/pmix/contrib/perf_tools/README b/opal/mca/pmix/pmix2x/pmix/contrib/perf_tools/README index 31cbae7a548..a6677587823 100644 --- a/opal/mca/pmix/pmix2x/pmix/contrib/perf_tools/README +++ b/opal/mca/pmix/pmix2x/pmix/contrib/perf_tools/README @@ -24,4 +24,12 @@ make sure that both MPI and PMIx libraries are visible for the loader. For PMIx testing "convenience" there is a `run.sh` script that can be used to ensure that environment is set properly (not a production grade so may not work -for all environments) +for all environments). To use it open it and fix: +- OMPI_BASE to point to your MPI installation +- PMIX_LIB to point to your PMIx installation + +If you are running inside the supported batch system you shoud be fine to +just run fixed `run.sh` with the first argument defining how many processes +needs to be launched and all other parameters will be passed to the performance +tool. For example: +$ ./run.sh 10 -d diff --git a/opal/mca/pmix/pmix2x/pmix/contrib/perf_tools/pmi_intra_perf.c b/opal/mca/pmix/pmix2x/pmix/contrib/perf_tools/pmi_intra_perf.c index a654579290b..680b82547ec 100644 --- a/opal/mca/pmix/pmix2x/pmix/contrib/perf_tools/pmi_intra_perf.c +++ b/opal/mca/pmix/pmix2x/pmix/contrib/perf_tools/pmi_intra_perf.c @@ -493,7 +493,7 @@ int main(int argc, char **argv) fprintf(stderr,"get: max loc %lf rem %lf (loc: %d, rem: %d)\n", max_get_loc_time, max_get_rem_time, max_get_loc_idx, max_get_rem_idx); fprintf(stderr,"total: max %lf min %lf\n", max_total_time, min_total_time); - fprintf(stderr,"mem: loc %0.2lf rem %0.2lf min %0.2lf max %0.2lf total %0.2lf Kb\n", + fprintf(stderr,"mem: loc %0.2lf avg %0.2lf min %0.2lf max %0.2lf total %0.2lf Kb\n", mem_pss, cum_mem_pss / nproc, min_mem_pss, max_mem_pss, cum_mem_pss); diff --git a/opal/mca/pmix/pmix2x/pmix/contrib/perf_tools/pmix.c b/opal/mca/pmix/pmix2x/pmix/contrib/perf_tools/pmix.c index c5e9425580d..9bac8fd4ac0 100644 --- a/opal/mca/pmix/pmix2x/pmix/contrib/perf_tools/pmix.c +++ b/opal/mca/pmix/pmix2x/pmix/contrib/perf_tools/pmix.c @@ -17,6 +17,7 @@ pmix_proc_t this_proc; void pmi_init(int *rank, int *size) { pmix_value_t value, *val = &value; + pmix_proc_t job_proc; int rc; /* init us */ @@ -30,8 +31,12 @@ void pmi_init(int *rank, int *size) abort(); } + job_proc = this_proc; +#if (PMIX_VERSION_MAJOR > 1 ) + job_proc.rank = PMIX_RANK_WILDCARD; +#endif /* get our job size */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&this_proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + if (PMIX_SUCCESS != (rc = PMIx_Get(&job_proc, PMIX_JOB_SIZE, NULL, 0, &val))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %d", this_proc.nspace, this_proc.rank, rc); abort(); } @@ -45,9 +50,13 @@ void pmi_get_local_ranks(int **local_ranks, int *local_cnt) pmix_value_t value, *val = &value; char *ptr; int i, rc; + pmix_proc_t job_proc = this_proc; +#if (PMIX_VERSION_MAJOR > 1 ) + job_proc.rank = PMIX_RANK_WILDCARD; +#endif /* get our job size */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&this_proc, PMIX_LOCAL_SIZE, NULL, 0, &val))) { + if (PMIX_SUCCESS != (rc = PMIx_Get(&job_proc, PMIX_LOCAL_SIZE, NULL, 0, &val))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get PMIX_LOCAL_SIZE failed: %d", this_proc.nspace, this_proc.rank, rc); abort(); } @@ -56,7 +65,7 @@ void pmi_get_local_ranks(int **local_ranks, int *local_cnt) *local_ranks = calloc(*local_cnt, sizeof(int)); /* get our job size */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&this_proc, PMIX_LOCAL_PEERS, NULL, 0, &val))) { + if (PMIX_SUCCESS != (rc = PMIx_Get(&job_proc, PMIX_LOCAL_PEERS, NULL, 0, &val))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get PMIX_LOCAL_PEERS failed: %d", this_proc.nspace, this_proc.rank, rc); abort(); } diff --git a/opal/mca/pmix/pmix2x/pmix/contrib/perf_tools/run.sh b/opal/mca/pmix/pmix2x/pmix/contrib/perf_tools/run.sh index 474ffdb11d3..0e6181ef520 100755 Binary files a/opal/mca/pmix/pmix2x/pmix/contrib/perf_tools/run.sh and b/opal/mca/pmix/pmix2x/pmix/contrib/perf_tools/run.sh differ diff --git a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h index 38200308865..4f5884de1e1 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h +++ b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h @@ -3,6 +3,9 @@ * Copyright (c) 2013-2016 Intel, Inc. All rights reserved * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2016 Mellanox Technologies, Inc. + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are @@ -43,8 +46,6 @@ * * Additional copyrights may follow * - * Copyright (c) 2016 IBM Corporation. All rights reserved. - * * $HEADER$ */ @@ -121,6 +122,7 @@ typedef uint32_t pmix_rank_t; #define PMIX_CONNECT_TO_SYSTEM "pmix.cnct.sys" // (bool) The requestor requires that a connection be made only to // a local system-level PMIx server #define PMIX_CONNECT_SYSTEM_FIRST "pmix.cnct.sys.first" // (bool) Preferentially look for a system-level PMIx server first +#define PMIX_REGISTER_NODATA "pmix.reg.nodata" // (bool) Registration is for nspace only, do not copy job data /* identification attributes */ #define PMIX_USERID "pmix.euid" // (uint32_t) effective user id @@ -435,28 +437,28 @@ typedef uint16_t pmix_data_type_t; #define PMIX_STATUS 20 // needs to be tracked separately from integer for those times // when we are embedded and it needs to be converted to the // host error definitions -#define PMIX_HWLOC_TOPO 21 -#define PMIX_VALUE 22 -#define PMIX_PROC 23 -#define PMIX_APP 24 -#define PMIX_INFO 25 -#define PMIX_PDATA 26 -#define PMIX_BUFFER 27 -#define PMIX_BYTE_OBJECT 28 -#define PMIX_KVAL 29 -#define PMIX_MODEX 30 -#define PMIX_PERSIST 31 -#define PMIX_POINTER 32 -#define PMIX_SCOPE 33 -#define PMIX_DATA_RANGE 34 -#define PMIX_COMMAND 35 -#define PMIX_INFO_DIRECTIVES 36 -#define PMIX_DATA_TYPE 37 -#define PMIX_PROC_STATE 38 -#define PMIX_PROC_INFO 39 -#define PMIX_DATA_ARRAY 40 -#define PMIX_PROC_RANK 41 -#define PMIX_QUERY 42 +#define PMIX_VALUE 21 +#define PMIX_PROC 22 +#define PMIX_APP 23 +#define PMIX_INFO 24 +#define PMIX_PDATA 25 +#define PMIX_BUFFER 26 +#define PMIX_BYTE_OBJECT 27 +#define PMIX_KVAL 28 +#define PMIX_MODEX 29 +#define PMIX_PERSIST 30 +#define PMIX_POINTER 31 +#define PMIX_SCOPE 32 +#define PMIX_DATA_RANGE 33 +#define PMIX_COMMAND 34 +#define PMIX_INFO_DIRECTIVES 35 +#define PMIX_DATA_TYPE 36 +#define PMIX_PROC_STATE 37 +#define PMIX_PROC_INFO 38 +#define PMIX_DATA_ARRAY 39 +#define PMIX_PROC_RANK 40 +#define PMIX_QUERY 41 +#define PMIX_COMPRESSED_STRING 42 // string compressed with zlib /**** DEPRECATED ****/ #define PMIX_INFO_ARRAY 43 /********************/ @@ -681,7 +683,8 @@ typedef struct pmix_value { if (NULL != (m)->data.string) { \ free((m)->data.string); \ } \ - } else if (PMIX_BYTE_OBJECT == (m)->type) { \ + } else if ((PMIX_BYTE_OBJECT == (m)->type) || \ + (PMIX_COMPRESSED_STRING == (m)->type)) { \ if (NULL != (m)->data.bo.bytes) { \ free((m)->data.bo.bytes); \ } \ @@ -765,7 +768,7 @@ typedef struct pmix_value { * includes internal functions - we don't * want to expose the entire header here */ -void pmix_value_load(pmix_value_t *v, void *data, pmix_data_type_t type); +void pmix_value_load(pmix_value_t *v, const void *data, pmix_data_type_t type); pmix_status_t pmix_value_xfer(pmix_value_t *kv, pmix_value_t *src); pmix_status_t pmix_argv_append_nosize(char ***argv, const char *arg); pmix_status_t pmix_setenv(const char *name, const char *value, @@ -1364,6 +1367,9 @@ pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc, #define PMIX_VAL_CMP_byte PMIX_VAL_cmp_val #define PMIX_VAL_CMP_flag PMIX_VAL_cmp_val +#define PMIX_VAL_ASSIGN(_v, _field, _val) \ + PMIX_VAL_set_assign(_v, _field, _val) + #define PMIX_VAL_CMP(_field, _val1, _val2) \ PMIX_VAL_CMP_ ## _field(_val1, _val2) diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/buffer_ops.h b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/buffer_ops.h index dca688e5386..a02bfa77a5a 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/buffer_ops.h +++ b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/buffer_ops.h @@ -45,7 +45,7 @@ BEGIN_C_DECLS * another pmix_value_t structure */ pmix_status_t pmix_value_xfer(pmix_value_t *kv, pmix_value_t *src); -void pmix_value_load(pmix_value_t *v, void *data, +void pmix_value_load(pmix_value_t *v, const void *data, pmix_data_type_t type); pmix_status_t pmix_value_unload(pmix_value_t *kv, void **data, size_t *sz, pmix_data_type_t type); diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/copy.c b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/copy.c index 29b827dde42..f80ed83e30c 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/copy.c +++ b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/copy.c @@ -259,6 +259,12 @@ bool pmix_value_cmp(pmix_value_t *p, pmix_value_t *p1) case PMIX_STRING: rc = strcmp(p->data.string, p1->data.string); break; + case PMIX_COMPRESSED_STRING: + if (p->data.bo.size != p1->data.bo.size) { + return false; + } else { + return true; + } case PMIX_STATUS: rc = (p->data.status == p1->data.status); break; @@ -370,6 +376,7 @@ PMIX_EXPORT pmix_status_t pmix_value_xfer(pmix_value_t *p, pmix_value_t *src) memcpy(&p->data.proc, &src->data.rank, sizeof(pmix_rank_t)); break; case PMIX_BYTE_OBJECT: + case PMIX_COMPRESSED_STRING: memset(&p->data.bo, 0, sizeof(pmix_byte_object_t)); if (NULL != src->data.bo.bytes && 0 < src->data.bo.size) { p->data.bo.bytes = malloc(src->data.bo.size); @@ -612,6 +619,7 @@ PMIX_EXPORT pmix_status_t pmix_value_xfer(pmix_value_t *p, pmix_value_t *src) } break; case PMIX_BYTE_OBJECT: + case PMIX_COMPRESSED_STRING: p->data.darray->array = (pmix_byte_object_t*)malloc(src->data.darray->size * sizeof(pmix_byte_object_t)); if (NULL == p->data.darray->array) { return PMIX_ERR_NOMEM; @@ -891,16 +899,6 @@ pmix_status_t pmix_bfrop_copy_proc(pmix_proc_t **dest, pmix_proc_t *src, return PMIX_SUCCESS; } -#if PMIX_HAVE_HWLOC -pmix_status_t pmix_bfrop_copy_topo(hwloc_topology_t *dest, - hwloc_topology_t src, - pmix_data_type_t type) -{ - /* use the hwloc dup function */ - return hwloc_topology_dup(dest, src); -} -#endif - pmix_status_t pmix_bfrop_copy_modex(pmix_modex_data_t **dest, pmix_modex_data_t *src, pmix_data_type_t type) { @@ -1240,6 +1238,7 @@ pmix_status_t pmix_bfrop_copy_darray(pmix_data_array_t **dest, } break; case PMIX_BYTE_OBJECT: + case PMIX_COMPRESSED_STRING: p->array = (pmix_byte_object_t*)malloc(src->size * sizeof(pmix_byte_object_t)); if (NULL == p->array) { free(p); diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/internal.h b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/internal.h index e207bc0be3a..e3702549aa9 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/internal.h +++ b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/internal.h @@ -38,9 +38,6 @@ #ifdef HAVE_STRING_H #include -#endif -#if PMIX_HAVE_HWLOC -#include PMIX_HWLOC_HEADER #endif BEGIN_C_DECLS @@ -279,11 +276,6 @@ pmix_status_t pmix_bfrop_pack_time(pmix_buffer_t *buffer, const void *src, int32_t num_vals, pmix_data_type_t type); pmix_status_t pmix_bfrop_pack_status(pmix_buffer_t *buffer, const void *src, int32_t num_vals, pmix_data_type_t type); - -#if PMIX_HAVE_HWLOC -pmix_status_t pmix_bfrop_pack_topo(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -#endif pmix_status_t pmix_bfrop_pack_value(pmix_buffer_t *buffer, const void *src, int32_t num_vals, pmix_data_type_t type); pmix_status_t pmix_bfrop_pack_proc(pmix_buffer_t *buffer, const void *src, @@ -364,11 +356,6 @@ pmix_status_t pmix_bfrop_pack_array(pmix_buffer_t *buffer, const void *src, int32_t *num_vals, pmix_data_type_t type); pmix_status_t pmix_bfrop_unpack_status(pmix_buffer_t *buffer, void *dest, int32_t *num_vals, pmix_data_type_t type); - -#if PMIX_HAVE_HWLOC - pmix_status_t pmix_bfrop_unpack_topo(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); -#endif pmix_status_t pmix_bfrop_unpack_value(pmix_buffer_t *buffer, void *dest, int32_t *num_vals, pmix_data_type_t type); pmix_status_t pmix_bfrop_unpack_proc(pmix_buffer_t *buffer, void *dest, @@ -418,15 +405,8 @@ pmix_status_t pmix_bfrop_unpack_array(pmix_buffer_t *buffer, void *dest, * Internal copy functions */ - pmix_status_t pmix_bfrop_std_copy(void **dest, void *src, pmix_data_type_t type); - - pmix_status_t pmix_bfrop_copy_string(char **dest, char *src, pmix_data_type_t type); - -#if PMIX_HAVE_HWLOC - pmix_status_t pmix_bfrop_copy_topo(hwloc_topology_t *dest, - hwloc_topology_t src, - pmix_data_type_t type); -#endif +pmix_status_t pmix_bfrop_std_copy(void **dest, void *src, pmix_data_type_t type); +pmix_status_t pmix_bfrop_copy_string(char **dest, char *src, pmix_data_type_t type); pmix_status_t pmix_bfrop_copy_value(pmix_value_t **dest, pmix_value_t *src, pmix_data_type_t type); pmix_status_t pmix_bfrop_copy_proc(pmix_proc_t **dest, pmix_proc_t *src, @@ -487,11 +467,6 @@ pmix_status_t pmix_bfrop_print_double(char **output, char *prefix, double *src, pmix_status_t pmix_bfrop_print_timeval(char **output, char *prefix, struct timeval *src, pmix_data_type_t type); pmix_status_t pmix_bfrop_print_time(char **output, char *prefix, time_t *src, pmix_data_type_t type); pmix_status_t pmix_bfrop_print_status(char **output, char *prefix, pmix_status_t *src, pmix_data_type_t type); - -#if PMIX_HAVE_HWLOC -pmix_status_t pmix_bfrop_print_topo(char **output, char *prefix, - hwloc_topology_t src, pmix_data_type_t type); -#endif pmix_status_t pmix_bfrop_print_value(char **output, char *prefix, pmix_value_t *src, pmix_data_type_t type); pmix_status_t pmix_bfrop_print_proc(char **output, char *prefix, pmix_proc_t *src, pmix_data_type_t type); diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/open_close.c b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/open_close.c index f6fbe00be77..eabe0da7f36 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/open_close.c +++ b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/open_close.c @@ -297,14 +297,6 @@ pmix_status_t pmix_bfrop_open(void) pmix_bfrop_std_copy, pmix_bfrop_print_status); -#if PMIX_HAVE_HWLOC - PMIX_REGISTER_TYPE("PMIX_HWLOC_TOPO", PMIX_HWLOC_TOPO, - pmix_bfrop_pack_topo, - pmix_bfrop_unpack_topo, - pmix_bfrop_copy_topo, - pmix_bfrop_print_topo); -#endif - PMIX_REGISTER_TYPE("PMIX_VALUE", PMIX_VALUE, pmix_bfrop_pack_value, pmix_bfrop_unpack_value, @@ -425,6 +417,13 @@ pmix_status_t pmix_bfrop_open(void) pmix_bfrop_copy_query, pmix_bfrop_print_query); + PMIX_REGISTER_TYPE("PMIX_COMPRESSED_STRING", + PMIX_COMPRESSED_STRING, + pmix_bfrop_pack_bo, + pmix_bfrop_unpack_bo, + pmix_bfrop_copy_bo, + pmix_bfrop_print_bo); + /**** DEPRECATED ****/ PMIX_REGISTER_TYPE("PMIX_INFO_ARRAY", PMIX_INFO_ARRAY, pmix_bfrop_pack_array, @@ -462,7 +461,8 @@ pmix_status_t pmix_bfrop_close(void) } /**** UTILITY SUPPORT ****/ -PMIX_EXPORT void pmix_value_load(pmix_value_t *v, void *data, +PMIX_EXPORT void pmix_value_load(pmix_value_t *v, + const void *data, pmix_data_type_t type) { pmix_byte_object_t *bo; diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/pack.c b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/pack.c index e71e13704e4..bac502693f4 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/pack.c +++ b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/pack.c @@ -550,6 +550,7 @@ static pmix_status_t pack_val(pmix_buffer_t *buffer, } break; case PMIX_BYTE_OBJECT: + case PMIX_COMPRESSED_STRING: if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.bo, 1, PMIX_BYTE_OBJECT))) { return ret; } @@ -819,65 +820,6 @@ pmix_status_t pmix_bfrop_pack_kval(pmix_buffer_t *buffer, const void *src, return PMIX_SUCCESS; } -#if PMIX_HAVE_HWLOC -pmix_status_t pmix_bfrop_pack_topo(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - /* NOTE: hwloc defines topology_t as a pointer to a struct! */ - hwloc_topology_t t, *tarray = (hwloc_topology_t*)src; - pmix_status_t rc; - int i; - char *xmlbuffer=NULL; - int len; - struct hwloc_topology_support *support; - - for (i=0; i < num_vals; i++) { - t = tarray[i]; - - /* extract an xml-buffer representation of the tree */ - if (0 != hwloc_topology_export_xmlbuffer(t, &xmlbuffer, &len)) { - return PMIX_ERROR; - } - - /* add to buffer */ - if (PMIX_SUCCESS != (rc = pmix_bfrop_pack_string(buffer, &xmlbuffer, 1, PMIX_STRING))) { - free(xmlbuffer); - return rc; - } - - /* cleanup */ - if (NULL != xmlbuffer) { - free(xmlbuffer); - } - - /* get the available support - hwloc unfortunately does - * not include this info in its xml export! - */ - support = (struct hwloc_topology_support*)hwloc_topology_get_support(t); - /* pack the discovery support */ - if (PMIX_SUCCESS != (rc = pmix_bfrop_pack_byte(buffer, support->discovery, - sizeof(struct hwloc_topology_discovery_support), - PMIX_BYTE))) { - return rc; - } - /* pack the cpubind support */ - if (PMIX_SUCCESS != (rc = pmix_bfrop_pack_byte(buffer, support->cpubind, - sizeof(struct hwloc_topology_cpubind_support), - PMIX_BYTE))) { - return rc; - } - /* pack the membind support */ - if (PMIX_SUCCESS != (rc = pmix_bfrop_pack_byte(buffer, support->membind, - sizeof(struct hwloc_topology_membind_support), - PMIX_BYTE))) { - return rc; - } -} - -return PMIX_SUCCESS; -} -#endif - pmix_status_t pmix_bfrop_pack_modex(pmix_buffer_t *buffer, const void *src, int32_t num_vals, pmix_data_type_t type) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/print.c b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/print.c index 46d26da4f07..0d88bb8a1bd 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/print.c +++ b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/print.c @@ -1021,130 +1021,6 @@ pmix_status_t pmix_bfrop_print_kval(char **output, char *prefix, return PMIX_SUCCESS; } -#if PMIX_HAVE_HWLOC -#define PMIX_HWLOC_MAX_STRING 2048 - -static void print_hwloc_obj(char **output, char *prefix, - hwloc_topology_t topo, hwloc_obj_t obj) -{ - hwloc_obj_t obj2; - char string[1024], *tmp, *tmp2, *pfx; - unsigned i; - struct hwloc_topology_support *support; - - /* print the object type */ - hwloc_obj_type_snprintf(string, 1024, obj, 1); - if (0 > asprintf(&pfx, "\n%s\t", (NULL == prefix) ? "" : prefix)) { - return; - } - if (0 > asprintf(&tmp, "%sType: %s Number of child objects: %u%sName=%s", - (NULL == prefix) ? "" : prefix, string, obj->arity, - pfx, (NULL == obj->name) ? "NULL" : obj->name)) { - free(pfx); - return; -} -if (0 < hwloc_obj_attr_snprintf(string, 1024, obj, pfx, 1)) { - /* print the attributes */ - if (0 > asprintf(&tmp2, "%s%s%s", tmp, pfx, string)) { - free(tmp); - free(pfx); - return; - } - free(tmp); - tmp = tmp2; -} - /* print the cpusets - apparently, some new HWLOC types don't - * have cpusets, so protect ourselves here - */ - if (NULL != obj->cpuset) { - hwloc_bitmap_snprintf(string, PMIX_HWLOC_MAX_STRING, obj->cpuset); - if (0 > asprintf(&tmp2, "%s%sCpuset: %s", tmp, pfx, string)) { - free(tmp); - free(pfx); - return; - } - free(tmp); - tmp = tmp2; - } - if (NULL != obj->online_cpuset) { - hwloc_bitmap_snprintf(string, PMIX_HWLOC_MAX_STRING, obj->online_cpuset); - if (0 > asprintf(&tmp2, "%s%sOnline: %s", tmp, pfx, string)) { - free(tmp); - free(pfx); - return; - } - free(tmp); - tmp = tmp2; - } - if (NULL != obj->allowed_cpuset) { - hwloc_bitmap_snprintf(string, PMIX_HWLOC_MAX_STRING, obj->allowed_cpuset); - if (0 > asprintf(&tmp2, "%s%sAllowed: %s", tmp, pfx, string)) { - free(tmp); - free(pfx); - return; - } - free(tmp); - tmp = tmp2; - } - if (HWLOC_OBJ_MACHINE == obj->type) { - /* root level object - add support values */ - support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo); - if (0 > asprintf(&tmp2, "%s%sBind CPU proc: %s%sBind CPU thread: %s", tmp, pfx, - (support->cpubind->set_thisproc_cpubind) ? "TRUE" : "FALSE", pfx, - (support->cpubind->set_thisthread_cpubind) ? "TRUE" : "FALSE")) { - free(tmp); - free(pfx); - return; - } - free(tmp); - tmp = tmp2; - if (0 > asprintf(&tmp2, "%s%sBind MEM proc: %s%sBind MEM thread: %s", tmp, pfx, - (support->membind->set_thisproc_membind) ? "TRUE" : "FALSE", pfx, - (support->membind->set_thisthread_membind) ? "TRUE" : "FALSE")) { - free(tmp); - free(pfx); - return; -} -free(tmp); -tmp = tmp2; -} -if (0 > asprintf(&tmp2, "%s%s\n", (NULL == *output) ? "" : *output, tmp)) { - free(tmp); - return; -} -free(tmp); -free(pfx); -if (0 > asprintf(&pfx, "%s\t", (NULL == prefix) ? "" : prefix)) { - return; -} -for (i=0; i < obj->arity; i++) { - obj2 = obj->children[i]; - /* print the object */ - print_hwloc_obj(&tmp2, pfx, topo, obj2); -} -free(pfx); -if (NULL != *output) { - free(*output); -} -*output = tmp2; -} - -pmix_status_t pmix_bfrop_print_topo(char **output, char *prefix, - hwloc_topology_t src, pmix_data_type_t type) -{ - hwloc_obj_t obj; - char *tmp=NULL; - - /* get root object */ - obj = hwloc_get_root_obj(src); - /* print it */ - print_hwloc_obj(&tmp, prefix, src, obj); - *output = tmp; - return PMIX_SUCCESS; -} - -#endif - pmix_status_t pmix_bfrop_print_modex(char **output, char *prefix, pmix_modex_data_t *src, pmix_data_type_t type) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/unpack.c b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/unpack.c index 915c9b0ce95..d7b16a1a62c 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/unpack.c +++ b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/unpack.c @@ -646,6 +646,7 @@ pmix_status_t pmix_bfrop_unpack_status(pmix_buffer_t *buffer, void *dest, } break; case PMIX_BYTE_OBJECT: + case PMIX_COMPRESSED_STRING: if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.bo, &m, PMIX_BYTE_OBJECT))) { return ret; } @@ -1031,89 +1032,6 @@ pmix_status_t pmix_bfrop_unpack_kval(pmix_buffer_t *buffer, void *dest, return PMIX_SUCCESS; } -#if PMIX_HAVE_HWLOC -pmix_status_t pmix_bfrop_unpack_topo(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, - pmix_data_type_t type) -{ - /* NOTE: hwloc defines topology_t as a pointer to a struct! */ - hwloc_topology_t t, *tarray = (hwloc_topology_t*)dest; - pmix_status_t rc=PMIX_SUCCESS; - int32_t cnt, i, j; - char *xmlbuffer; - struct hwloc_topology_support *support; - - for (i=0, j=0; i < *num_vals; i++) { - /* unpack the xml string */ - cnt=1; - xmlbuffer = NULL; - if (PMIX_SUCCESS != (rc = pmix_bfrop_unpack_string(buffer, &xmlbuffer, &cnt, PMIX_STRING))) { - goto cleanup; - } - if (NULL == xmlbuffer) { - goto cleanup; - } - /* convert the xml */ - if (0 != hwloc_topology_init(&t)) { - rc = PMIX_ERROR; - goto cleanup; - } - if (0 != hwloc_topology_set_xmlbuffer(t, xmlbuffer, strlen(xmlbuffer))) { - rc = PMIX_ERROR; - free(xmlbuffer); - hwloc_topology_destroy(t); - goto cleanup; - } - /* since we are loading this from an external source, we have to - * explicitly set a flag so hwloc sets things up correctly - */ - if (0 != hwloc_topology_set_flags(t, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | HWLOC_TOPOLOGY_FLAG_IO_DEVICES)) { - free(xmlbuffer); - rc = PMIX_ERROR; - hwloc_topology_destroy(t); - goto cleanup; - } - /* now load the topology */ - if (0 != hwloc_topology_load(t)) { - free(xmlbuffer); - rc = PMIX_ERROR; - hwloc_topology_destroy(t); - goto cleanup; - } - if (NULL != xmlbuffer) { - free(xmlbuffer); - } - - /* get the available support - hwloc unfortunately does - * not include this info in its xml import! - */ - support = (struct hwloc_topology_support*)hwloc_topology_get_support(t); - cnt = sizeof(struct hwloc_topology_discovery_support); - if (PMIX_SUCCESS != (rc = pmix_bfrop_unpack_byte(buffer, support->discovery, &cnt, PMIX_BYTE))) { - goto cleanup; - } - cnt = sizeof(struct hwloc_topology_cpubind_support); - if (PMIX_SUCCESS != (rc = pmix_bfrop_unpack_byte(buffer, support->cpubind, &cnt, PMIX_BYTE))) { - goto cleanup; - } - cnt = sizeof(struct hwloc_topology_membind_support); - if (PMIX_SUCCESS != (rc = pmix_bfrop_unpack_byte(buffer, support->membind, &cnt, PMIX_BYTE))) { - goto cleanup; - } - - /* pass it back */ - tarray[i] = t; - - /* track the number added */ - j++; - } - - cleanup: - *num_vals = j; - return rc; -} -#endif - pmix_status_t pmix_bfrop_unpack_modex(pmix_buffer_t *buffer, void *dest, int32_t *num_vals, pmix_data_type_t type) { @@ -1145,7 +1063,6 @@ pmix_status_t pmix_bfrop_unpack_modex(pmix_buffer_t *buffer, void *dest, return PMIX_SUCCESS; } - pmix_status_t pmix_bfrop_unpack_persist(pmix_buffer_t *buffer, void *dest, int32_t *num_vals, pmix_data_type_t type) { @@ -1354,6 +1271,7 @@ pmix_status_t pmix_bfrop_unpack_darray(pmix_buffer_t *buffer, void *dest, nbytes = sizeof(pmix_proc_t); break; case PMIX_BYTE_OBJECT: + case PMIX_COMPRESSED_STRING: nbytes = sizeof(pmix_byte_object_t); break; case PMIX_PERSIST: diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_hash_table.h b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_hash_table.h index 6ac212a6346..c27d9878c02 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_hash_table.h +++ b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_hash_table.h @@ -75,10 +75,10 @@ typedef struct pmix_hash_table_t pmix_hash_table_t; * */ -int pmix_hash_table_init(pmix_hash_table_t* ht, size_t table_size); +PMIX_EXPORT int pmix_hash_table_init(pmix_hash_table_t* ht, size_t table_size); /* this could be the new init if people wanted a more general API */ -int pmix_hash_table_init2(pmix_hash_table_t* ht, size_t estimated_max_size, +PMIX_EXPORT int pmix_hash_table_init2(pmix_hash_table_t* ht, size_t estimated_max_size, int density_numer, int density_denom, int growth_numer, int growth_denom); @@ -103,7 +103,7 @@ static inline size_t pmix_hash_table_get_size(pmix_hash_table_t *ht) * */ -int pmix_hash_table_remove_all(pmix_hash_table_t *ht); +PMIX_EXPORT int pmix_hash_table_remove_all(pmix_hash_table_t *ht); /** * Retrieve value via uint32_t key. @@ -118,7 +118,7 @@ int pmix_hash_table_remove_all(pmix_hash_table_t *ht); * */ -int pmix_hash_table_get_value_uint32(pmix_hash_table_t* table, uint32_t key, +PMIX_EXPORT int pmix_hash_table_get_value_uint32(pmix_hash_table_t* table, uint32_t key, void** ptr); /** @@ -131,7 +131,7 @@ int pmix_hash_table_get_value_uint32(pmix_hash_table_t* table, uint32_t key, * */ -int pmix_hash_table_set_value_uint32(pmix_hash_table_t* table, uint32_t key, void* value); +PMIX_EXPORT int pmix_hash_table_set_value_uint32(pmix_hash_table_t* table, uint32_t key, void* value); /** * Remove value based on uint32_t key. @@ -142,7 +142,7 @@ int pmix_hash_table_set_value_uint32(pmix_hash_table_t* table, uint32_t key, voi * */ -int pmix_hash_table_remove_value_uint32(pmix_hash_table_t* table, uint32_t key); +PMIX_EXPORT int pmix_hash_table_remove_value_uint32(pmix_hash_table_t* table, uint32_t key); /** * Retrieve value via uint64_t key. @@ -157,7 +157,7 @@ int pmix_hash_table_remove_value_uint32(pmix_hash_table_t* table, uint32_t key); * */ -int pmix_hash_table_get_value_uint64(pmix_hash_table_t *table, uint64_t key, +PMIX_EXPORT int pmix_hash_table_get_value_uint64(pmix_hash_table_t *table, uint64_t key, void **ptr); /** @@ -170,7 +170,7 @@ int pmix_hash_table_get_value_uint64(pmix_hash_table_t *table, uint64_t key, * */ -int pmix_hash_table_set_value_uint64(pmix_hash_table_t *table, uint64_t key, void* value); +PMIX_EXPORT int pmix_hash_table_set_value_uint64(pmix_hash_table_t *table, uint64_t key, void* value); /** * Remove value based on uint64_t key. @@ -181,7 +181,7 @@ int pmix_hash_table_set_value_uint64(pmix_hash_table_t *table, uint64_t key, voi * */ -int pmix_hash_table_remove_value_uint64(pmix_hash_table_t *table, uint64_t key); +PMIX_EXPORT int pmix_hash_table_remove_value_uint64(pmix_hash_table_t *table, uint64_t key); /** * Retrieve value via arbitrary length binary key. @@ -196,7 +196,7 @@ int pmix_hash_table_remove_value_uint64(pmix_hash_table_t *table, uint64_t key); * */ -int pmix_hash_table_get_value_ptr(pmix_hash_table_t *table, const void* key, +PMIX_EXPORT int pmix_hash_table_get_value_ptr(pmix_hash_table_t *table, const void* key, size_t keylen, void **ptr); /** @@ -209,7 +209,7 @@ int pmix_hash_table_get_value_ptr(pmix_hash_table_t *table, const void* key, * */ -int pmix_hash_table_set_value_ptr(pmix_hash_table_t *table, const void* key, size_t keylen, void* value); +PMIX_EXPORT int pmix_hash_table_set_value_ptr(pmix_hash_table_t *table, const void* key, size_t keylen, void* value); /** * Remove value based on arbitrary length binary key. @@ -220,7 +220,7 @@ int pmix_hash_table_set_value_ptr(pmix_hash_table_t *table, const void* key, siz * */ -int pmix_hash_table_remove_value_ptr(pmix_hash_table_t *table, const void* key, size_t keylen); +PMIX_EXPORT int pmix_hash_table_remove_value_ptr(pmix_hash_table_t *table, const void* key, size_t keylen); /** The following functions are only for allowing iterating through @@ -245,7 +245,7 @@ int pmix_hash_table_remove_value_ptr(pmix_hash_table_t *table, const void* key, * */ -int pmix_hash_table_get_first_key_uint32(pmix_hash_table_t *table, uint32_t *key, +PMIX_EXPORT int pmix_hash_table_get_first_key_uint32(pmix_hash_table_t *table, uint32_t *key, void **value, void **node); @@ -263,7 +263,7 @@ int pmix_hash_table_get_first_key_uint32(pmix_hash_table_t *table, uint32_t *key * */ -int pmix_hash_table_get_next_key_uint32(pmix_hash_table_t *table, uint32_t *key, +PMIX_EXPORT int pmix_hash_table_get_next_key_uint32(pmix_hash_table_t *table, uint32_t *key, void **value, void *in_node, void **out_node); @@ -281,7 +281,7 @@ int pmix_hash_table_get_next_key_uint32(pmix_hash_table_t *table, uint32_t *key, * */ -int pmix_hash_table_get_first_key_uint64(pmix_hash_table_t *table, uint64_t *key, +PMIX_EXPORT int pmix_hash_table_get_first_key_uint64(pmix_hash_table_t *table, uint64_t *key, void **value, void **node); @@ -299,7 +299,7 @@ int pmix_hash_table_get_first_key_uint64(pmix_hash_table_t *table, uint64_t *key * */ -int pmix_hash_table_get_next_key_uint64(pmix_hash_table_t *table, uint64_t *key, +PMIX_EXPORT int pmix_hash_table_get_next_key_uint64(pmix_hash_table_t *table, uint64_t *key, void **value, void *in_node, void **out_node); @@ -318,7 +318,7 @@ int pmix_hash_table_get_next_key_uint64(pmix_hash_table_t *table, uint64_t *key, * */ -int pmix_hash_table_get_first_key_ptr(pmix_hash_table_t *table, void* *key, +PMIX_EXPORT int pmix_hash_table_get_first_key_ptr(pmix_hash_table_t *table, void* *key, size_t *key_size, void **value, void **node); @@ -337,7 +337,7 @@ int pmix_hash_table_get_first_key_ptr(pmix_hash_table_t *table, void* *key, * */ -int pmix_hash_table_get_next_key_ptr(pmix_hash_table_t *table, void* *key, +PMIX_EXPORT int pmix_hash_table_get_next_key_ptr(pmix_hash_table_t *table, void* *key, size_t *key_size, void **value, void *in_node, void **out_node); diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_hotel.h b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_hotel.h index f7958578f05..4972e4a74a8 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_hotel.h +++ b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_hotel.h @@ -157,7 +157,7 @@ PMIX_CLASS_DECLARATION(pmix_hotel_t); * @return PMIX_SUCCESS if all initializations were succesful. Otherwise, * the error indicate what went wrong in the function. */ -int pmix_hotel_init(pmix_hotel_t *hotel, int num_rooms, +PMIX_EXPORT int pmix_hotel_init(pmix_hotel_t *hotel, int num_rooms, pmix_event_base_t *evbase, uint32_t eviction_timeout, int eviction_event_priority, diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_object.h b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_object.h index d82e1a16919..740da76ca10 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_object.h +++ b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_object.h @@ -165,7 +165,7 @@ struct pmix_class_t { size_t cls_sizeof; /**< size of an object instance */ }; -extern int pmix_class_init_epoch; +PMIX_EXPORT extern int pmix_class_init_epoch; /** * For static initializations of OBJects. @@ -394,7 +394,7 @@ PMIX_CLASS_DECLARATION(pmix_object_t); * * @param class Pointer to class descriptor */ -void pmix_class_initialize(pmix_class_t *); +PMIX_EXPORT void pmix_class_initialize(pmix_class_t *); /** * Shut down the class system and release all memory @@ -405,7 +405,7 @@ void pmix_class_initialize(pmix_class_t *); * tools like valgrind and purify don't report still-reachable memory * upon process termination. */ -int pmix_class_finalize(void); +PMIX_EXPORT int pmix_class_finalize(void); /** * Run the hierarchy of class constructors for this object, in a diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_pointer_array.h b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_pointer_array.h index aca3578fc07..b3f647f89de 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_pointer_array.h +++ b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_pointer_array.h @@ -79,7 +79,7 @@ PMIX_CLASS_DECLARATION(pmix_pointer_array_t); * @return PMIX_SUCCESS if all initializations were succesfull. Otherwise, * the error indicate what went wrong in the function. */ -pmix_status_t pmix_pointer_array_init(pmix_pointer_array_t* array, +PMIX_EXPORT pmix_status_t pmix_pointer_array_init(pmix_pointer_array_t* array, int initial_allocation, int max_size, int block_size ); @@ -92,7 +92,7 @@ pmix_status_t pmix_pointer_array_init(pmix_pointer_array_t* array, * @return Index of inserted array element. Return value of * (-1) indicates an error. */ -int pmix_pointer_array_add(pmix_pointer_array_t *array, void *ptr); +PMIX_EXPORT int pmix_pointer_array_add(pmix_pointer_array_t *array, void *ptr); /** * Set the value of an element in array @@ -104,7 +104,7 @@ int pmix_pointer_array_add(pmix_pointer_array_t *array, void *ptr); * @return PMIX_SUCCESS if item was inserted. Otherwise, * the error indicate what went wrong in the function. */ -pmix_status_t pmix_pointer_array_set_item(pmix_pointer_array_t *array, +PMIX_EXPORT pmix_status_t pmix_pointer_array_set_item(pmix_pointer_array_t *array, int index, void *value); /** @@ -157,7 +157,7 @@ static inline int pmix_pointer_array_get_size(pmix_pointer_array_t *array) * Simple function to set the size of the array in order to * hide the member field from external users. */ -pmix_status_t pmix_pointer_array_set_size(pmix_pointer_array_t *array, int size); +PMIX_EXPORT pmix_status_t pmix_pointer_array_set_size(pmix_pointer_array_t *array, int size); /** * Test whether a certain element is already in use. If not yet @@ -173,7 +173,7 @@ pmix_status_t pmix_pointer_array_set_size(pmix_pointer_array_t *array, int size) * In contrary to array_set, this function does not allow to overwrite * a value, unless the previous value is NULL ( equiv. to free ). */ -bool pmix_pointer_array_test_and_set_item (pmix_pointer_array_t *table, +PMIX_EXPORT bool pmix_pointer_array_test_and_set_item (pmix_pointer_array_t *table, int index, void *value); diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_ring_buffer.h b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_ring_buffer.h index 27baae7c33b..8e8d236bd60 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_ring_buffer.h +++ b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_ring_buffer.h @@ -64,7 +64,7 @@ PMIX_CLASS_DECLARATION(pmix_ring_buffer_t); * @return PMIX_SUCCESS if all initializations were succesful. Otherwise, * the error indicate what went wrong in the function. */ -int pmix_ring_buffer_init(pmix_ring_buffer_t* ring, int size); +PMIX_EXPORT int pmix_ring_buffer_init(pmix_ring_buffer_t* ring, int size); /** * Push an item onto the ring buffer, displacing the oldest @@ -76,7 +76,7 @@ int pmix_ring_buffer_init(pmix_ring_buffer_t* ring, int size); * @return Pointer to displaced item, NULL if ring * is not yet full */ -void* pmix_ring_buffer_push(pmix_ring_buffer_t *ring, void *ptr); +PMIX_EXPORT void* pmix_ring_buffer_push(pmix_ring_buffer_t *ring, void *ptr); /** @@ -88,14 +88,14 @@ void* pmix_ring_buffer_push(pmix_ring_buffer_t *ring, void *ptr); * @return Error code. NULL indicates an error. */ -void* pmix_ring_buffer_pop(pmix_ring_buffer_t *ring); +PMIX_EXPORT void* pmix_ring_buffer_pop(pmix_ring_buffer_t *ring); /* * Access an element of the ring, without removing it, indexed * starting at the tail - a value of -1 will return the element * at the head of the ring */ -void* pmix_ring_buffer_poke(pmix_ring_buffer_t *ring, int i); +PMIX_EXPORT void* pmix_ring_buffer_poke(pmix_ring_buffer_t *ring, int i); END_C_DECLS diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_value_array.h b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_value_array.h index 84692d42d97..1b10a5e79ab 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_value_array.h +++ b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_value_array.h @@ -126,7 +126,7 @@ static inline size_t pmix_value_array_get_size(pmix_value_array_t* array) * return the new size. */ -int pmix_value_array_set_size(pmix_value_array_t* array, size_t size); +PMIX_EXPORT int pmix_value_array_set_size(pmix_value_array_t* array, size_t size); /** diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c index 34dd19569c2..68eb2963aa6 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c @@ -45,6 +45,10 @@ #ifdef HAVE_SYS_TYPES_H #include #endif + +#if PMIX_HAVE_ZLIB +#include +#endif #include PMIX_EVENT_HEADER #include PMIX_EVENT2_THREAD_HEADER @@ -70,6 +74,9 @@ #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) #include "src/dstore/pmix_dstore.h" #endif /* PMIX_ENABLE_DSTORE */ +#ifdef HAVE_ZLIB_H +#include +#endif #include "pmix_client_ops.h" #include "src/include/pmix_jobdata.h" @@ -541,18 +548,104 @@ PMIX_EXPORT pmix_status_t PMIx_Abort(int flag, const char msg[], return PMIX_SUCCESS; } +#if PMIX_HAVE_ZLIB +static bool compress_string(char *instring, + uint8_t **outbytes, + size_t *nbytes) +{ + z_stream strm; + size_t len, outlen; + uint8_t *tmp, *ptr; + uint32_t inlen; + + /* set default output */ + *outbytes = NULL; + + /* setup the stream */ + inlen = strlen(instring); + memset (&strm, 0, sizeof (strm)); + deflateInit (&strm, 9); + + /* get an upper bound on the required output storage */ + len = deflateBound(&strm, inlen); + if (NULL == (tmp = (uint8_t*)malloc(len))) { + *outbytes = NULL; + return false; + } + strm.next_in = (uint8_t*)instring; + strm.avail_in = strlen(instring); + + /* allocating the upper bound guarantees zlib will + * always successfully compress into the available space */ + strm.avail_out = len; + strm.next_out = tmp; + + deflate (&strm, Z_FINISH); + deflateEnd (&strm); + + /* allocate 4 bytes beyond the size reqd by zlib so we + * can pass the size of the uncompressed string to the + * decompress side */ + outlen = len - strm.avail_out + sizeof(uint32_t); + ptr = (uint8_t*)malloc(outlen); + if (NULL == ptr) { + free(tmp); + return false; + } + *outbytes = ptr; + *nbytes = outlen; + + /* fold the uncompressed length into the buffer */ + memcpy(ptr, &inlen, sizeof(uint32_t)); + ptr += sizeof(uint32_t); + /* bring over the compressed data */ + memcpy(ptr, tmp, outlen-sizeof(uint32_t)); + free(tmp); + pmix_output_verbose(10, pmix_globals.debug_output, + "JOBDATA COMPRESS INPUT STRING OF LEN %d OUTPUT SIZE %lu", + inlen, outlen-sizeof(uint32_t)); + return true; // we did the compression +} +#else +static bool compress_string(char *instring, + uint8_t **outbytes, + size_t *nbytes) +{ + return false; // we did not compress +} +#endif + static void _putfn(int sd, short args, void *cbdata) { pmix_cb_t *cb = (pmix_cb_t*)cbdata; pmix_status_t rc; pmix_kval_t *kv; pmix_nspace_t *ns; + uint8_t *tmp; + size_t len; /* setup to xfer the data */ kv = PMIX_NEW(pmix_kval_t); kv->key = strdup(cb->key); // need to copy as the input belongs to the user kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); - rc = pmix_value_xfer(kv->value, cb->value); + if (PMIX_STRING == cb->value->type && + PMIX_STRING_LIMIT < strlen(cb->value->data.string)) { + /* compress large strings */ + if (compress_string(cb->value->data.string, &tmp, &len)) { + if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + rc = PMIX_ERR_NOMEM; + } + kv->value->type = PMIX_COMPRESSED_STRING; + kv->value->data.bo.bytes = (char*)tmp; + kv->value->data.bo.size = len; + rc = PMIX_SUCCESS; + } else { + rc = pmix_value_xfer(kv->value, cb->value); + } + } else { + rc = pmix_value_xfer(kv->value, cb->value); + } if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto done; @@ -595,7 +688,7 @@ static void _putfn(int sd, short args, void *cbdata) } } - done: + done: PMIX_RELEASE(kv); // maintain accounting cb->pstatus = rc; cb->active = false; diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c index 7e5dce6ece9..a7bc6c04468 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c @@ -45,6 +45,10 @@ #ifdef HAVE_SYS_TYPES_H #include #endif + +#if PMIX_HAVE_ZLIB +#include +#endif #include PMIX_EVENT_HEADER #include "src/class/pmix_list.h" @@ -73,6 +77,63 @@ static void _getnb_cbfunc(struct pmix_peer_t *pr, static void _value_cbfunc(pmix_status_t status, pmix_value_t *kv, void *cbdata); +#if PMIX_HAVE_ZLIB +static void uncompress_string(char **outstring, uint8_t *inbytes, size_t len) +{ + uint8_t *dest; + int32_t len2; + z_stream strm; + int rc; + + /* set the default error answer */ + *outstring = NULL; + + /* the first 4 bytes contains the uncompressed size */ + memcpy(&len2, inbytes, sizeof(uint32_t)); + + pmix_output_verbose(10, pmix_globals.debug_output, + "DECOMPRESSING INPUT OF LEN %lu OUTPUT %d", len, len2); + + /* setting destination to the fully decompressed size, +1 to + * hold the NULL terminator */ + dest = (uint8_t*)malloc(len2+1); + if (NULL == dest) { + return; + } + memset(dest, 0, len2+1); + + memset (&strm, 0, sizeof (strm)); + if (Z_OK != inflateInit(&strm)) { + free(dest); + return; + } + strm.avail_in = len; + strm.next_in = (uint8_t*)(inbytes + sizeof(uint32_t)); + strm.avail_out = len2; + strm.next_out = (uint8_t*)dest; + + rc = inflate (&strm, Z_FINISH); + inflateEnd (&strm); + /* ensure this is NULL terminated! */ + dest[len2] = '\0'; + *outstring = (char*)dest; + pmix_output_verbose(10, pmix_globals.debug_output, + "\tFINAL LEN: %lu CODE: %d", strlen(*outstring), rc); + return; +} +#else +/* this can never actually be used - there is no way we should + * receive a PMIX_COMPRESSED_STRING unless we compressed it, + * which means PMIX_HAVE_ZLIB must have been true. Still, we + * include the stub just to avoid requiring #if's in the rest + * of the code */ +static void uncompress_string(char **outstring, uint8_t *inbytes, size_t len) +{ + *outstring = NULL; +} +#endif + + PMIX_EXPORT pmix_status_t PMIx_Get(const pmix_proc_t *proc, const char key[], const pmix_info_t info[], size_t ninfo, pmix_value_t **val) @@ -249,6 +310,7 @@ static void _getnb_cbfunc(struct pmix_peer_t *pr, #if (PMIX_ENABLE_DSTORE != 1) pmix_rank_t cur_rank; #endif + char *tmp; pmix_output_verbose(2, pmix_globals.debug_output, "pmix: get_nb callback recvd"); @@ -370,6 +432,20 @@ static void _getnb_cbfunc(struct pmix_peer_t *pr, if (NULL != cb && NULL != cb->value_cbfunc) { if (NULL == val) { rc = PMIX_ERR_NOT_FOUND; + } else { + /* if this is a compressed string, then uncompress it */ + if (PMIX_COMPRESSED_STRING == val->type) { + uncompress_string(&tmp, (uint8_t*)val->data.bo.bytes, val->data.bo.size); + if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + rc = PMIX_ERR_NOMEM; + PMIX_VALUE_RELEASE(val); + val = NULL; + } else { + PMIX_VALUE_DESTRUCT(val); + PMIX_VAL_ASSIGN(val, string, tmp); + } + } } cb->value_cbfunc(rc, val, cb->cbdata); } @@ -462,6 +538,7 @@ static void _getnbfn(int fd, short flags, void *cbdata) pmix_status_t rc; pmix_nspace_t *ns, *nptr; size_t n, nvals; + char *tmp; pmix_output_verbose(2, pmix_globals.debug_output, "pmix: getnbfn value for proc %s:%d key %s", @@ -550,7 +627,18 @@ static void _getnbfn(int fd, short flags, void *cbdata) for (n=0; n < (size_t)results.size && n < nvals; n++) { if (NULL != (info = (pmix_info_t*)pmix_pointer_array_get_item(&results, n))) { (void)strncpy(iptr[n].key, info->key, PMIX_MAX_KEYLEN); - pmix_value_xfer(&iptr[n].value, &info->value); + /* if this is a compressed string, then uncompress it */ + if (PMIX_COMPRESSED_STRING == info->value.type) { + iptr[n].value.type = PMIX_STRING; + uncompress_string(&iptr[n].value.data.string, + (uint8_t*)info->value.data.bo.bytes, + info->value.data.bo.size); + if (NULL == iptr[n].value.data.string) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + } + } else { + pmix_value_xfer(&iptr[n].value, &info->value); + } PMIX_INFO_FREE(info, 1); } } @@ -565,6 +653,19 @@ static void _getnbfn(int fd, short flags, void *cbdata) #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->internal, cb->rank, cb->key, &val))) { + /* if this is a compressed string, then uncompress it */ + if (PMIX_COMPRESSED_STRING == val->type) { + uncompress_string(&tmp, (uint8_t*)val->data.bo.bytes, val->data.bo.size); + if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + rc = PMIX_ERR_NOMEM; + PMIX_VALUE_RELEASE(val); + val = NULL; + } else { + PMIX_VALUE_DESTRUCT(val); + PMIX_VAL_ASSIGN(val, string, tmp); + } + } /* found it - we are in an event, so we can * just execute the callback */ cb->value_cbfunc(rc, val, cb->cbdata); @@ -586,6 +687,19 @@ static void _getnbfn(int fd, short flags, void *cbdata) #else if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->internal, cb->rank, cb->key, &val))) { #endif + /* if this is a compressed string, then uncompress it */ + if (PMIX_COMPRESSED_STRING == val->type) { + uncompress_string(&tmp, (uint8_t*)val->data.bo.bytes, val->data.bo.size); + if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + rc = PMIX_ERR_NOMEM; + PMIX_VALUE_RELEASE(val); + val = NULL; + } else { + PMIX_VALUE_DESTRUCT(val); + PMIX_VAL_ASSIGN(val, string, tmp); + } + } /* found it - we are in an event, so we can * just execute the callback */ cb->value_cbfunc(rc, val, cb->cbdata); @@ -635,6 +749,19 @@ static void _getnbfn(int fd, short flags, void *cbdata) if ( PMIX_SUCCESS == rc ) { pmix_output_verbose(2, pmix_globals.debug_output, "pmix_get[%d]: value retrieved from dstore", __LINE__); + /* if this is a compressed string, then uncompress it */ + if (PMIX_COMPRESSED_STRING == val->type) { + uncompress_string(&tmp, (uint8_t*)val->data.bo.bytes, val->data.bo.size); + if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + rc = PMIX_ERR_NOMEM; + PMIX_VALUE_RELEASE(val); + val = NULL; + } else { + PMIX_VALUE_DESTRUCT(val); + PMIX_VAL_ASSIGN(val, string, tmp); + } + } /* found it - we are in an event, so we can * just execute the callback */ cb->value_cbfunc(rc, val, cb->cbdata); diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h index bedb6fcaed0..0de1071595e 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h @@ -23,7 +23,7 @@ typedef struct { pmix_list_t pending_requests; // list of pmix_cb_t pending data requests } pmix_client_globals_t; -extern pmix_client_globals_t pmix_client_globals; +PMIX_EXPORT extern pmix_client_globals_t pmix_client_globals; END_C_DECLS diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_jobdata.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_jobdata.c index b2a953e56df..6c305b4b0fc 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_jobdata.c +++ b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_jobdata.c @@ -20,6 +20,9 @@ #include "src/util/argv.h" #include "src/util/hash.h" #include "src/include/pmix_jobdata.h" +#ifdef HAVE_ZLIB_H +#include +#endif #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) #include "src/dstore/pmix_dstore.h" @@ -28,6 +31,73 @@ static inline int _add_key_for_rank(pmix_rank_t rank, pmix_kval_t *kv, void *cbdata); static inline pmix_status_t _job_data_store(const char *nspace, void *cbdata); +#ifdef HAVE_ZLIB_H +static bool compress_string(char *instring, + uint8_t **outbytes, + size_t *nbytes) +{ + z_stream strm; + size_t len, outlen; + uint8_t *tmp, *ptr; + uint32_t inlen; + + /* set default output */ + *outbytes = NULL; + + /* setup the stream */ + inlen = strlen(instring); + memset (&strm, 0, sizeof (strm)); + deflateInit (&strm, 9); + + /* get an upper bound on the required output storage */ + len = deflateBound(&strm, inlen); + if (NULL == (tmp = (uint8_t*)malloc(len))) { + *outbytes = NULL; + return false; + } + strm.next_in = (uint8_t*)instring; + strm.avail_in = strlen(instring); + + /* allocating the upper bound guarantees zlib will + * always successfully compress into the available space */ + strm.avail_out = len; + strm.next_out = tmp; + + deflate (&strm, Z_FINISH); + deflateEnd (&strm); + + /* allocate 4 bytes beyond the size reqd by zlib so we + * can pass the size of the uncompressed string to the + * decompress side */ + outlen = len - strm.avail_out + sizeof(uint32_t); + ptr = (uint8_t*)malloc(outlen); + if (NULL == ptr) { + free(tmp); + return false; + } + *outbytes = ptr; + *nbytes = outlen; + + /* fold the uncompressed length into the buffer */ + memcpy(ptr, &inlen, sizeof(uint32_t)); + ptr += sizeof(uint32_t); + /* bring over the compressed data */ + memcpy(ptr, tmp, outlen-sizeof(uint32_t)); + free(tmp); + pmix_output_verbose(10, pmix_globals.debug_output, + "JOBDATA COMPRESS INPUT STRING OF LEN %d OUTPUT SIZE %lu", + inlen, outlen-sizeof(uint32_t)); + return true; // we did the compression +} +#else +static bool compress_string(char *instring, + uint8_t **outbytes, + size_t *nbytes) +{ + return false; // we did not compress +} +#endif + static inline int _add_key_for_rank(pmix_rank_t rank, pmix_kval_t *kv, void *cbdata) { pmix_job_data_caddy_t *cb = (pmix_job_data_caddy_t*)(cbdata); @@ -135,13 +205,14 @@ static inline pmix_status_t _job_data_store(const char *nspace, void *cbdata) pmix_nspace_t *nsptr = NULL, *nsptr2 = NULL; pmix_kval_t *kptr, *kp2, kv; int32_t cnt; - size_t nnodes; + size_t nnodes, len; uint32_t i; #if !(defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)) uint32_t j; #endif pmix_nrec_t *nrec, *nr2; char **procs = NULL; + uint8_t *tmp; pmix_byte_object_t *bo; pmix_buffer_t buf2; int rank; @@ -149,7 +220,9 @@ static inline pmix_status_t _job_data_store(const char *nspace, void *cbdata) "server" : "client"; pmix_output_verbose(10, pmix_globals.debug_output, - "pmix:%s pmix_jobdata_store %s", proc_type_str, nspace); + "[%s:%d] pmix:%s pmix_jobdata_store %s", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + proc_type_str, nspace); /* check buf data */ if ((NULL == job_data) && (0 != job_data->bytes_used)) { @@ -213,6 +286,22 @@ static inline pmix_status_t _job_data_store(const char *nspace, void *cbdata) cnt = 1; kp2 = PMIX_NEW(pmix_kval_t); while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(&buf2, kp2, &cnt, PMIX_KVAL))) { + /* if the value contains a string that is longer than the + * limit, then compress it */ + if (PMIX_STRING == kp2->value->type && + PMIX_STRING_LIMIT < strlen(kp2->value->data.string)) { + if (compress_string(kp2->value->data.string, &tmp, &len)) { + if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + rc = PMIX_ERR_NOMEM; + goto exit; + } + kp2->value->type = PMIX_COMPRESSED_STRING; + free(kp2->value->data.string); + kp2->value->data.bo.bytes = (char*)tmp; + kp2->value->data.bo.size = len; + } + } /* this is data provided by a job-level exchange, so store it * in the job-level data hash_table */ if (PMIX_SUCCESS != (rc = _add_key_for_rank(rank, kp2, cb))) { @@ -249,6 +338,22 @@ static inline pmix_status_t _job_data_store(const char *nspace, void *cbdata) PMIX_DESTRUCT(&kv); goto exit; } + /* if the value contains a string that is longer than the + * limit, then compress it */ + if (PMIX_STRING == kv.value->type && + PMIX_STRING_LIMIT < strlen(kv.value->data.string)) { + if (compress_string(kv.value->data.string, &tmp, &len)) { + if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + rc = PMIX_ERR_NOMEM; + goto exit; + } + kv.value->type = PMIX_COMPRESSED_STRING; + free(kv.value->data.string); + kv.value->data.bo.bytes = (char*)tmp; + kv.value->data.bo.size = len; + } + } /* the name of the node is in the key, and the value is * a comma-delimited list of procs on that node. See if we already * have this node */ @@ -310,6 +415,22 @@ static inline pmix_status_t _job_data_store(const char *nspace, void *cbdata) /* cleanup */ PMIX_DESTRUCT(&buf2); } else { + /* if the value contains a string that is longer than the + * limit, then compress it */ + if (PMIX_STRING == kptr->value->type && + PMIX_STRING_LIMIT < strlen(kptr->value->data.string)) { + if (compress_string(kptr->value->data.string, &tmp, &len)) { + if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + rc = PMIX_ERR_NOMEM; + goto exit; + } + kptr->value->type = PMIX_COMPRESSED_STRING; + free(kptr->value->data.string); + kptr->value->data.bo.bytes = (char*)tmp; + kptr->value->data.bo.size = len; + } + } if (PMIX_SUCCESS != (rc = _add_key_for_rank(PMIX_RANK_WILDCARD, kptr, cb))) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(kptr); diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c index a6f9d6c85d2..339b374edd5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c @@ -64,9 +64,9 @@ static void cbdes(pmix_cb_t *p) { PMIX_DESTRUCT(&p->data); } -PMIX_CLASS_INSTANCE(pmix_cb_t, - pmix_list_item_t, - cbcon, cbdes); +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_cb_t, + pmix_list_item_t, + cbcon, cbdes); static void pcon(pmix_peer_t *p) { @@ -106,9 +106,9 @@ static void pdes(pmix_peer_t *p) PMIX_RELEASE(p->recv_msg); } } -PMIX_CLASS_INSTANCE(pmix_peer_t, - pmix_object_t, - pcon, pdes); +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_peer_t, + pmix_object_t, + pcon, pdes); static void nscon(pmix_nspace_t *p) { @@ -142,9 +142,9 @@ static void nsdes(pmix_nspace_t *p) PMIX_RELEASE(p->server); } } -PMIX_CLASS_INSTANCE(pmix_nspace_t, - pmix_list_item_t, - nscon, nsdes); +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_nspace_t, + pmix_list_item_t, + nscon, nsdes); static void ncon(pmix_nrec_t *p) { @@ -160,9 +160,9 @@ static void ndes(pmix_nrec_t *p) free(p->procs); } } -PMIX_CLASS_INSTANCE(pmix_nrec_t, - pmix_list_item_t, - ncon, ndes); +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_nrec_t, + pmix_list_item_t, + ncon, ndes); static void sncon(pmix_server_nspace_t *p) { @@ -185,9 +185,9 @@ static void sndes(pmix_server_nspace_t *p) PMIX_DESTRUCT(&p->myremote); PMIX_DESTRUCT(&p->remote); } -PMIX_CLASS_INSTANCE(pmix_server_nspace_t, - pmix_object_t, - sncon, sndes); +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_server_nspace_t, + pmix_object_t, + sncon, sndes); static void info_con(pmix_rank_info_t *info) { @@ -204,9 +204,9 @@ static void info_des(pmix_rank_info_t *info) PMIX_RELEASE(info->nptr); } } -PMIX_CLASS_INSTANCE(pmix_rank_info_t, - pmix_list_item_t, - info_con, info_des); +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_rank_info_t, + pmix_list_item_t, + info_con, info_des); static void scon(pmix_shift_caddy_t *p) { @@ -236,9 +236,9 @@ static void scdes(pmix_shift_caddy_t *p) PMIX_RELEASE(p->kv); } } -PMIX_CLASS_INSTANCE(pmix_shift_caddy_t, - pmix_object_t, - scon, scdes); +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_shift_caddy_t, + pmix_object_t, + scon, scdes); PMIX_CLASS_INSTANCE(pmix_info_caddy_t, pmix_list_item_t, diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h index 9709f4a8801..1ba7c583e9c 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h @@ -45,6 +45,7 @@ BEGIN_C_DECLS /* some limits */ #define PMIX_MAX_CRED_SIZE 131072 // set max at 128kbytes #define PMIX_MAX_ERR_CONSTANT INT_MIN +#define PMIX_STRING_LIMIT 512 // max length of string before compressing /**** ENUM DEFINITIONS ****/ diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_var.h b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_var.h index 8888a22b2de..1a26a4ba6be 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_var.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_var.h @@ -400,7 +400,7 @@ int pmix_mca_base_var_init(void); * the caller may free the original string after this function returns * successfully. */ -int pmix_mca_base_var_register (const char *project_name, const char *framework_name, +PMIX_EXPORT int pmix_mca_base_var_register (const char *project_name, const char *framework_name, const char *component_name, const char *variable_name, const char *description, pmix_mca_base_var_type_t type, pmix_mca_base_var_enum_t *enumerator, int bind, pmix_mca_base_var_flag_t flags, @@ -416,7 +416,7 @@ int pmix_mca_base_var_register (const char *project_name, const char *framework_ * be unregistered / made unavailable when that component is closed by * its framework. */ -int pmix_mca_base_component_var_register (const pmix_mca_base_component_t *component, +PMIX_EXPORT int pmix_mca_base_component_var_register (const pmix_mca_base_component_t *component, const char *variable_name, const char *description, pmix_mca_base_var_type_t type, pmix_mca_base_var_enum_t *enumerator, int bind, pmix_mca_base_var_flag_t flags, @@ -428,7 +428,7 @@ int pmix_mca_base_component_var_register (const pmix_mca_base_component_t *compo * function is equivalent to pmix_mca_base_var_register with component_name = "base" and * with the MCA_BASE_VAR_FLAG_DWG set. See pmix_mca_base_var_register(). */ -int pmix_mca_base_framework_var_register (const pmix_mca_base_framework_t *framework, +PMIX_EXPORT int pmix_mca_base_framework_var_register (const pmix_mca_base_framework_t *framework, const char *variable_name, const char *help_msg, pmix_mca_base_var_type_t type, pmix_mca_base_var_enum_t *enumerator, int bind, @@ -471,7 +471,7 @@ int pmix_mca_base_framework_var_register (const pmix_mca_base_framework_t *frame * variable names "B" and "C" (and does *not* set a value for * "A"), it is undefined as to which value will be used. */ -int pmix_mca_base_var_register_synonym (int synonym_for, const char *project_name, +PMIX_EXPORT int pmix_mca_base_var_register_synonym (int synonym_for, const char *project_name, const char *framework_name, const char *component_name, const char *synonym_name, diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/base.h b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/base.h index 30ee996e66b..e68c3cfe378 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/base.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/base.h @@ -33,9 +33,6 @@ #ifdef HAVE_STRING_H #include #endif -#if PMIX_HAVE_HWLOC -#include PMIX_HWLOC_HEADER -#endif #include "src/class/pmix_pointer_array.h" #include "src/mca/mca.h" diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/base.h b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/base.h index 6bf1eccb3ee..a99e277f5f0 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/base.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/base.h @@ -33,9 +33,6 @@ #ifdef HAVE_STRING_H #include #endif -#if PMIX_HAVE_HWLOC -#include PMIX_HWLOC_HEADER -#endif #include "src/class/pmix_pointer_array.h" #include "src/mca/mca.h" @@ -49,7 +46,7 @@ /* * MCA Framework */ -extern pmix_mca_base_framework_t pmix_ptl_base_framework; +PMIX_EXPORT extern pmix_mca_base_framework_t pmix_ptl_base_framework; /** * PTL select function * @@ -82,7 +79,7 @@ struct pmix_ptl_globals_t { }; typedef struct pmix_ptl_globals_t pmix_ptl_globals_t; -extern pmix_ptl_globals_t pmix_ptl_globals; +PMIX_EXPORT extern pmix_ptl_globals_t pmix_ptl_globals; /* API stubs */ PMIX_EXPORT pmix_status_t pmix_ptl_stub_set_notification_cbfunc(pmix_ptl_cbfunc_t cbfunc); diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c index 2b67cb4631a..9942bd5b34e 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c @@ -157,9 +157,9 @@ static void srcon(pmix_ptl_sr_t *p) p->cbfunc = NULL; p->cbdata = NULL; } -PMIX_CLASS_INSTANCE(pmix_ptl_sr_t, - pmix_object_t, - srcon, NULL); +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_ptl_sr_t, + pmix_object_t, + srcon, NULL); static void pccon(pmix_pending_connection_t *p) { @@ -186,9 +186,9 @@ static void pcdes(pmix_pending_connection_t *p) free(p->cred); } } -PMIX_CLASS_INSTANCE(pmix_pending_connection_t, - pmix_object_t, - pccon, pcdes); +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_pending_connection_t, + pmix_object_t, + pccon, pcdes); static void lcon(pmix_listener_t *p) { @@ -211,10 +211,10 @@ static void ldes(pmix_listener_t *p) free(p->uri); } } -PMIX_CLASS_INSTANCE(pmix_listener_t, - pmix_list_item_t, - lcon, ldes); +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_listener_t, + pmix_list_item_t, + lcon, ldes); -PMIX_CLASS_INSTANCE(pmix_ptl_queue_t, - pmix_object_t, - NULL, NULL); +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_ptl_queue_t, + pmix_object_t, + NULL, NULL); diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c index 097d71e6454..5a1d4067a5f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c @@ -73,7 +73,7 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ - pmix_ptl_tcp_component_t mca_ptl_tcp_component = { + PMIX_EXPORT pmix_ptl_tcp_component_t mca_ptl_tcp_component = { .super = { .base = { PMIX_PTL_BASE_VERSION_1_0_0, diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c index 6eba2a42d54..2bc7dc31afc 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c @@ -73,7 +73,7 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ - pmix_ptl_usock_component_t mca_ptl_usock_component = { +PMIX_EXPORT pmix_ptl_usock_component_t mca_ptl_usock_component = { .super = { .base = { PMIX_PTL_BASE_VERSION_1_0_0, diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c index 97291c8db3f..1db60f58996 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c @@ -286,6 +286,7 @@ static void _register_nspace(int sd, short args, void *cbdata) pmix_info_t *iptr; pmix_value_t val; char *msg; + bool nodata = false; #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) pmix_buffer_t *jobdata = PMIX_NEW(pmix_buffer_t); char *nspace = NULL; @@ -335,6 +336,14 @@ static void _register_nspace(int sd, short args, void *cbdata) "pmix:server _register_nspace recording %s", cd->info[i].key); + if (0 == strcmp(cd->info[i].key, PMIX_REGISTER_NODATA)) { + /* we don't want to save any job data for this nspace */ + nodata = true; + /* free anything that was previously stored */ + PMIX_DESTRUCT(&nptr->server->job_info); + PMIX_CONSTRUCT(&nptr->server->job_info, pmix_buffer_t); + break; + } if (0 == strcmp(cd->info[i].key, PMIX_NODE_MAP)) { /* parse the regex to get the argv array of node names */ if (PMIX_SUCCESS != (rc = pmix_regex_parse_nodes(cd->info[i].value.data.string, &nodes))) { @@ -436,19 +445,21 @@ static void _register_nspace(int sd, short args, void *cbdata) PMIX_ERROR_LOG(rc); goto release; } - pmix_bfrop.copy_payload(jobdata, &nptr->server->job_info); - pmix_bfrop.copy_payload(jobdata, &pmix_server_globals.gdata); + if (!nodata) { + pmix_bfrop.copy_payload(jobdata, &nptr->server->job_info); + pmix_bfrop.copy_payload(jobdata, &pmix_server_globals.gdata); - /* unpack the nspace - we don't really need it, but have to - * unpack it to maintain sequence */ - cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(jobdata, &nspace, &cnt, PMIX_STRING))) { - PMIX_ERROR_LOG(rc); - goto release; - } - if (PMIX_SUCCESS != (rc = pmix_job_data_dstore_store(cd->proc.nspace, jobdata))) { - PMIX_ERROR_LOG(rc); - goto release; + /* unpack the nspace - we don't really need it, but have to + * unpack it to maintain sequence */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(jobdata, &nspace, &cnt, PMIX_STRING))) { + PMIX_ERROR_LOG(rc); + goto release; + } + if (PMIX_SUCCESS != (rc = pmix_job_data_dstore_store(cd->proc.nspace, jobdata))) { + PMIX_ERROR_LOG(rc); + goto release; + } } #endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c index 08ba88d895b..d6ca188fb6a 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c @@ -1558,9 +1558,9 @@ static void scadcon(pmix_setup_caddy_t *p) static void scaddes(pmix_setup_caddy_t *p) { } -PMIX_CLASS_INSTANCE(pmix_setup_caddy_t, - pmix_object_t, - scadcon, scaddes); +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_setup_caddy_t, + pmix_object_t, + scadcon, scaddes); static void ncon(pmix_notify_caddy_t *p) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h index 2188cd0103a..2abb45f6f97 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h @@ -217,7 +217,7 @@ pmix_status_t pmix_server_event_recvd_from_client(pmix_peer_t *peer, void *cbdata); void pmix_server_execute_collective(int sd, short args, void *cbdata); -extern pmix_server_module_t pmix_host_server; -extern pmix_server_globals_t pmix_server_globals; +PMIX_EXPORT extern pmix_server_module_t pmix_host_server; +PMIX_EXPORT extern pmix_server_globals_t pmix_server_globals; #endif // PMIX_SERVER_OPS_H diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/argv.h b/opal/mca/pmix/pmix2x/pmix/src/util/argv.h index 08ba066c406..44d83e7562c 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/argv.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/argv.h @@ -74,7 +74,7 @@ BEGIN_C_DECLS * value into the argv array; there is no need to keep the original * string (i.e., the arg parameter) after invoking this function. */ -pmix_status_t pmix_argv_append(int *argc, char ***argv, const char *arg) __pmix_attribute_nonnull__(1) __pmix_attribute_nonnull__(3); +PMIX_EXPORT pmix_status_t pmix_argv_append(int *argc, char ***argv, const char *arg) __pmix_attribute_nonnull__(1) __pmix_attribute_nonnull__(3); /** * Append to an argv-style array, but ignore the size of the array. @@ -91,7 +91,7 @@ pmix_status_t pmix_argv_append(int *argc, char ***argv, const char *arg) __pmix_ * argv-style arrays that do not have integers that are actively * maintaing their sizes. */ -pmix_status_t pmix_argv_append_nosize(char ***argv, const char *arg); +PMIX_EXPORT pmix_status_t pmix_argv_append_nosize(char ***argv, const char *arg); /** * Insert the provided arg at the beginning of the array @@ -102,7 +102,7 @@ pmix_status_t pmix_argv_append_nosize(char ***argv, const char *arg); * @retval PMIX_SUCCESS On success * @retval PMIX_ERROR On failure */ -pmix_status_t pmix_argv_prepend_nosize(char ***argv, const char *arg); +PMIX_EXPORT pmix_status_t pmix_argv_prepend_nosize(char ***argv, const char *arg); /** * Append to an argv-style array, but only if the provided argument @@ -119,7 +119,7 @@ pmix_status_t pmix_argv_prepend_nosize(char ***argv, const char *arg); * except that it only appends the provided argument if it does not already * exist in the provided array, or overwrites it if it is. */ -pmix_status_t pmix_argv_append_unique_nosize(char ***argv, const char *arg, bool overwrite); +PMIX_EXPORT pmix_status_t pmix_argv_append_unique_nosize(char ***argv, const char *arg, bool overwrite); /** * Free a NULL-terminated argv array. @@ -134,7 +134,7 @@ pmix_status_t pmix_argv_append_unique_nosize(char ***argv, const char *arg, bool * not safe to invoke this function with a non-NULL-terminated argv * array. */ -void pmix_argv_free(char **argv); +PMIX_EXPORT void pmix_argv_free(char **argv); /** * Split a string into a NULL-terminated argv array. Do not include empty @@ -151,7 +151,7 @@ void pmix_argv_free(char **argv); * argument (i.e., it can be freed after calling this function * without invalidating the output argv). */ -char **pmix_argv_split(const char *src_string, int delimiter) __pmix_attribute_malloc__ __pmix_attribute_warn_unused_result__; +PMIX_EXPORT char **pmix_argv_split(const char *src_string, int delimiter) __pmix_attribute_malloc__ __pmix_attribute_warn_unused_result__; /** * Split a string into a NULL-terminated argv array. Include empty @@ -168,7 +168,7 @@ char **pmix_argv_split(const char *src_string, int delimiter) __pmix_attribute_m * argument (i.e., it can be freed after calling this function * without invalidating the output argv). */ -char **pmix_argv_split_with_empty(const char *src_string, int delimiter) __pmix_attribute_malloc__ __pmix_attribute_warn_unused_result__; +PMIX_EXPORT char **pmix_argv_split_with_empty(const char *src_string, int delimiter) __pmix_attribute_malloc__ __pmix_attribute_warn_unused_result__; /** * Return the length of a NULL-terminated argv array. @@ -180,7 +180,7 @@ char **pmix_argv_split_with_empty(const char *src_string, int delimiter) __pmix_ * * The argv array must be NULL-terminated. */ -int pmix_argv_count(char **argv); +PMIX_EXPORT int pmix_argv_count(char **argv); /** * Join all the elements of an argv array into a single @@ -198,9 +198,9 @@ int pmix_argv_count(char **argv); * * It is the callers responsibility to free the returned string. */ -char *pmix_argv_join(char **argv, int delimiter) __pmix_attribute_malloc__ __pmix_attribute_warn_unused_result__; +PMIX_EXPORT char *pmix_argv_join(char **argv, int delimiter) __pmix_attribute_malloc__ __pmix_attribute_warn_unused_result__; -char *pmix_argv_join_range(char **argv, size_t start, size_t end, int delimiter) __pmix_attribute_malloc__ __pmix_attribute_warn_unused_result__; +PMIX_EXPORT char *pmix_argv_join_range(char **argv, size_t start, size_t end, int delimiter) __pmix_attribute_malloc__ __pmix_attribute_warn_unused_result__; /** * Return the number of bytes consumed by an argv array. @@ -211,7 +211,7 @@ char *pmix_argv_join_range(char **argv, size_t start, size_t end, int delimiter) * array. This includes the number of bytes used by each of the * strings as well as the pointers used in the argv array. */ -size_t pmix_argv_len(char **argv); +PMIX_EXPORT size_t pmix_argv_len(char **argv); /** * Copy a NULL-terminated argv array. @@ -225,7 +225,7 @@ size_t pmix_argv_len(char **argv); * Specifically, the output argv will be an array of the same length * as the input argv, and strcmp(argv_in[i], argv_out[i]) will be 0. */ -char **pmix_argv_copy(char **argv) __pmix_attribute_malloc__ __pmix_attribute_warn_unused_result__; +PMIX_EXPORT char **pmix_argv_copy(char **argv) __pmix_attribute_malloc__ __pmix_attribute_warn_unused_result__; /** * Delete one or more tokens from the middle of an argv. @@ -252,7 +252,7 @@ char **pmix_argv_copy(char **argv) __pmix_attribute_malloc__ __pmix_attribute_wa * free()ed (it is assumed that the argv "owns" the memory that * the pointer points to). */ -pmix_status_t pmix_argv_delete(int *argc, char ***argv, +PMIX_EXPORT pmix_status_t pmix_argv_delete(int *argc, char ***argv, int start, int num_to_delete); /** @@ -276,7 +276,7 @@ pmix_status_t pmix_argv_delete(int *argc, char ***argv, * source points to are strdup'ed into the new locations in * target). */ -pmix_status_t pmix_argv_insert(char ***target, int start, char **source); +PMIX_EXPORT pmix_status_t pmix_argv_insert(char ***target, int start, char **source); /** * Insert one argv element in front of a specific position in an array @@ -299,7 +299,7 @@ pmix_status_t pmix_argv_insert(char ***target, int start, char **source); * source points to is strdup'ed into the new location in * target). */ -pmix_status_t pmix_argv_insert_element(char ***target, int location, char *source); +PMIX_EXPORT pmix_status_t pmix_argv_insert_element(char ***target, int location, char *source); END_C_DECLS diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/fd.h b/opal/mca/pmix/pmix2x/pmix/src/util/fd.h index ffd52e3167d..d67fe248359 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/fd.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/fd.h @@ -36,7 +36,7 @@ BEGIN_C_DECLS * Loop over reading from the fd until len bytes are read or an error * occurs. EAGAIN and EINTR are transparently handled. */ -pmix_status_t pmix_fd_read(int fd, int len, void *buffer); +PMIX_EXPORT pmix_status_t pmix_fd_read(int fd, int len, void *buffer); /** * Write a complete buffer to a file descriptor. @@ -51,7 +51,7 @@ pmix_status_t pmix_fd_read(int fd, int len, void *buffer); * Loop over writing to the fd until len bytes are written or an error * occurs. EAGAIN and EINTR are transparently handled. */ -pmix_status_t pmix_fd_write(int fd, int len, const void *buffer); +PMIX_EXPORT pmix_status_t pmix_fd_write(int fd, int len, const void *buffer); /** * Convenience function to set a file descriptor to be close-on-exec. @@ -65,7 +65,7 @@ pmix_status_t pmix_fd_write(int fd, int len, const void *buffer); * This is simply a convenience function because there's a few steps * to setting a file descriptor to be close-on-exec. */ -pmix_status_t pmix_fd_set_cloexec(int fd); +PMIX_EXPORT pmix_status_t pmix_fd_set_cloexec(int fd); END_C_DECLS diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/os_path.h b/opal/mca/pmix/pmix2x/pmix/src/util/os_path.h index 7057c49524c..715a5c84359 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/os_path.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/os_path.h @@ -65,7 +65,7 @@ BEGIN_C_DECLS * appropriate to the local operating system. The path_name string has been malloc'd * and therefore the user is responsible for free'ing the field. */ -char *pmix_os_path(bool relative, ...) __pmix_attribute_malloc__ __pmix_attribute_sentinel__ __pmix_attribute_warn_unused_result__; +PMIX_EXPORT char *pmix_os_path(bool relative, ...) __pmix_attribute_malloc__ __pmix_attribute_sentinel__ __pmix_attribute_warn_unused_result__; /** * Convert the path to be OS friendly. On UNIX this function will diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/show_help.h b/opal/mca/pmix/pmix2x/pmix/src/util/show_help.h index 69c4047ddc3..b028c99ca0a 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/show_help.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/show_help.h @@ -131,7 +131,7 @@ int pmix_show_help_finalize(void); */ typedef int (*pmix_show_help_fn_t)(const char *filename, const char *topic, bool want_error_header, ...); -extern pmix_show_help_fn_t pmix_show_help; +PMIX_EXPORT extern pmix_show_help_fn_t pmix_show_help; /** * This function does the same thing as pmix_show_help(), but accepts @@ -139,23 +139,23 @@ extern pmix_show_help_fn_t pmix_show_help; */ typedef int (*pmix_show_vhelp_fn_t)(const char *filename, const char *topic, bool want_error_header, va_list ap); -extern pmix_show_vhelp_fn_t pmix_show_vhelp; +PMIX_EXPORT extern pmix_show_vhelp_fn_t pmix_show_vhelp; /** * This function does the same thing as pmix_show_help(), but returns * its output in a string (that must be freed by the caller). */ -char* pmix_show_help_string(const char *filename, - const char *topic, - bool want_error_header, ...); +PMIX_EXPORT char* pmix_show_help_string(const char *filename, + const char *topic, + bool want_error_header, ...); /** * This function does the same thing as pmix_show_help_string(), but * accepts a va_list form of varargs. */ -char* pmix_show_help_vstring(const char *filename, - const char *topic, - bool want_error_header, va_list ap); +PMIX_EXPORT char* pmix_show_help_vstring(const char *filename, + const char *topic, + bool want_error_header, va_list ap); /** * This function adds another search location for the files that diff --git a/opal/mca/pmix/pmix2x/pmix2x_client.c b/opal/mca/pmix/pmix2x/pmix2x_client.c index fe0de9d59c3..c2728a68e49 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_client.c +++ b/opal/mca/pmix/pmix2x/pmix2x_client.c @@ -192,7 +192,6 @@ int pmix2x_store_local(const opal_process_name_t *proc, opal_value_t *val) } } if (NULL == job) { - OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); return OPAL_ERR_NOT_FOUND; } (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); diff --git a/opal/mca/pmix/pmix2x/pmix2x_server_south.c b/opal/mca/pmix/pmix2x/pmix2x_server_south.c index f18dadb49fa..06c2f3cea13 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_server_south.c +++ b/opal/mca/pmix/pmix2x/pmix2x_server_south.c @@ -147,9 +147,12 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, /* as we might want to use some client-side functions, be sure * to register our own nspace */ + PMIX_INFO_CREATE(pinfo, 1); + PMIX_INFO_LOAD(&pinfo[0], PMIX_REGISTER_NODATA, NULL, PMIX_BOOL); active = true; - PMIx_server_register_nspace(job->nspace, 1, NULL, 0, op2cbfunc, (void*)&active); + PMIx_server_register_nspace(job->nspace, 1, pinfo, 1, op2cbfunc, (void*)&active); PMIX_WAIT_FOR_COMPLETION(active); + PMIX_INFO_FREE(pinfo, 1); return OPAL_SUCCESS; } diff --git a/opal/mca/pmix/s1/pmix_s1.c b/opal/mca/pmix/s1/pmix_s1.c index fdec5ec729e..888752c93e0 100644 --- a/opal/mca/pmix/s1/pmix_s1.c +++ b/opal/mca/pmix/s1/pmix_s1.c @@ -538,6 +538,7 @@ static void fencenb(int sd, short args, void *cbdata) int rc = OPAL_SUCCESS; int32_t i; opal_value_t *kp, kvn; + hwloc_topology_t topo = NULL; opal_hwloc_locality_t locality; opal_process_name_t s1_pname; @@ -564,6 +565,9 @@ static void fencenb(int sd, short args, void *cbdata) got_modex_data = true; /* we only need to set locality for each local rank as "not found" * equates to "non-local" */ + if (NULL == (topo = opal_hwloc_base_get_topology())) { + goto cleanup; + } for (i=0; i < nlranks; i++) { s1_pname.vpid = lranks[i]; rc = opal_pmix_base_cache_keys_locally(&s1_pname, OPAL_PMIX_CPUSET, @@ -579,7 +583,7 @@ static void fencenb(int sd, short args, void *cbdata) locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; } else { /* determine relative location on our node */ - locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, + locality = opal_hwloc_base_get_relative_locality(topo, opal_process_info.cpuset, kp->data.string); } @@ -602,6 +606,9 @@ static void fencenb(int sd, short args, void *cbdata) } cleanup: + if (NULL != topo) { + opal_hwloc_base_free_topology(topo); + } if (NULL != op->opcbfunc) { op->opcbfunc(rc, op->cbdata); } diff --git a/opal/mca/pmix/s2/pmix_s2.c b/opal/mca/pmix/s2/pmix_s2.c index c4e2e8005a2..72f546c28ba 100644 --- a/opal/mca/pmix/s2/pmix_s2.c +++ b/opal/mca/pmix/s2/pmix_s2.c @@ -555,6 +555,7 @@ static void fencenb(int sd, short args, void *cbdata) int rc = OPAL_SUCCESS; int32_t i; opal_value_t *kp, kvn; + hwloc_topology_t topo = NULL; opal_hwloc_locality_t locality; opal_process_name_t pname; @@ -581,6 +582,9 @@ static void fencenb(int sd, short args, void *cbdata) got_modex_data = true; /* we only need to set locality for each local rank as "not found" * equates to "non-local" */ + if (NULL == (topo = opal_hwloc_base_get_topology())) { + goto cleanup; + } for (i=0; i < s2_nlranks; i++) { pname.vpid = s2_lranks[i]; rc = opal_pmix_base_cache_keys_locally(&s2_pname, OPAL_PMIX_CPUSET, @@ -596,7 +600,7 @@ static void fencenb(int sd, short args, void *cbdata) locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; } else { /* determine relative location on our node */ - locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, + locality = opal_hwloc_base_get_relative_locality(topo, opal_process_info.cpuset, kp->data.string); } @@ -619,6 +623,9 @@ static void fencenb(int sd, short args, void *cbdata) } cleanup: + if (NULL != topo) { + opal_hwloc_base_free_topology(topo); + } if (NULL != op->opcbfunc) { op->opcbfunc(rc, op->cbdata); } diff --git a/orte/mca/ess/base/ess_base_fns.c b/orte/mca/ess/base/ess_base_fns.c index 1458ca56f84..79998cd24ff 100644 --- a/orte/mca/ess/base/ess_base_fns.c +++ b/orte/mca/ess/base/ess_base_fns.c @@ -12,7 +12,7 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -46,6 +46,7 @@ int orte_ess_base_proc_binding(void) { + hwloc_topology_t topo = NULL; hwloc_obj_t node, obj; hwloc_cpuset_t cpus, nodeset; hwloc_obj_type_t target; @@ -77,17 +78,17 @@ int orte_ess_base_proc_binding(void) "%s Not bound at launch", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* we were not bound at launch */ - if (NULL == opal_hwloc_topology) { + if (NULL == (topo = opal_hwloc_base_get_topology())) { /* there is nothing we can do, so just return */ return ORTE_SUCCESS; } - support = (struct hwloc_topology_support*)hwloc_topology_get_support(opal_hwloc_topology); + support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo); /* get our node object */ - node = hwloc_get_root_obj(opal_hwloc_topology); - nodeset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, node); + node = hwloc_get_root_obj(topo); + nodeset = opal_hwloc_base_get_available_cpus(topo, node); /* get our bindings */ cpus = hwloc_bitmap_alloc(); - if (hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS) < 0) { + if (hwloc_get_cpubind(topo, cpus, HWLOC_CPUBIND_PROCESS) < 0) { /* we are NOT bound if get_cpubind fails, nor can we be bound - the * environment does not support it */ @@ -121,13 +122,13 @@ int orte_ess_base_proc_binding(void) hwloc_bitmap_zero(cpus); if (OPAL_BIND_TO_CPUSET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { if (OPAL_SUCCESS != (ret = opal_hwloc_base_slot_list_parse(opal_hwloc_base_slot_list, - opal_hwloc_topology, + topo, OPAL_HWLOC_LOGICAL, cpus))) { error = "Setting processor affinity failed"; hwloc_bitmap_free(cpus); goto error; } - if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { + if (0 > hwloc_set_cpubind(topo, cpus, 0)) { error = "Setting processor affinity failed"; hwloc_bitmap_free(cpus); goto error; @@ -156,14 +157,14 @@ int orte_ess_base_proc_binding(void) * hwthread on this node */ if (OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { - if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_PU, + if (NULL == (obj = opal_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_PU, 0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) { ret = ORTE_ERR_NOT_FOUND; error = "Getting hwthread object"; goto error; } - cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj); - if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { + cpus = opal_hwloc_base_get_available_cpus(topo, obj); + if (0 > hwloc_set_cpubind(topo, cpus, 0)) { ret = ORTE_ERROR; error = "Setting processor affinity failed"; goto error; @@ -177,14 +178,14 @@ int orte_ess_base_proc_binding(void) /* if the binding policy is core, then we bind to the nrank-th * core on this node */ - if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE, + if (NULL == (obj = opal_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_CORE, 0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) { ret = ORTE_ERR_NOT_FOUND; error = "Getting core object"; goto error; } - cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj); - if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { + cpus = opal_hwloc_base_get_available_cpus(topo, obj); + if (0 > hwloc_set_cpubind(topo, cpus, 0)) { error = "Setting processor affinity failed"; ret = ORTE_ERROR; goto error; @@ -197,7 +198,7 @@ int orte_ess_base_proc_binding(void) /* for all higher binding policies, we bind to the specified * object that the nrank-th core belongs to */ - if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE, + if (NULL == (obj = opal_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_CORE, 0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) { ret = ORTE_ERR_NOT_FOUND; error = "Getting core object"; @@ -227,8 +228,8 @@ int orte_ess_base_proc_binding(void) continue; } /* this is the place! */ - cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj); - if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { + cpus = opal_hwloc_base_get_available_cpus(topo, obj); + if (0 > hwloc_set_cpubind(topo, cpus, 0)) { ret = ORTE_ERROR; error = "Setting processor affinity failed"; goto error; @@ -260,49 +261,57 @@ int orte_ess_base_proc_binding(void) /* get or update our local cpuset - it will get used multiple * times, so it's more efficient to keep a global copy */ - opal_hwloc_base_get_local_cpuset(); + opal_hwloc_base_get_local_cpuset(topo); - /* get the cpus we are bound to */ - mycpus = hwloc_bitmap_alloc(); - if (hwloc_get_cpubind(opal_hwloc_topology, - mycpus, - HWLOC_CPUBIND_PROCESS) < 0) { - if (NULL != orte_process_info.cpuset) { - free(orte_process_info.cpuset); - orte_process_info.cpuset = NULL; - } - if (opal_hwloc_report_bindings || 4 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) { - opal_output(0, "MCW rank %d is not bound", - ORTE_PROC_MY_NAME->vpid); - } - } else { - /* store/update the string representation of our local binding */ - if (NULL != orte_process_info.cpuset) { - free(orte_process_info.cpuset); - orte_process_info.cpuset = NULL; - } - hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, mycpus); - /* report the binding, if requested */ - if (opal_hwloc_report_bindings || 4 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) { - char tmp1[1024], tmp2[1024]; - if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2str(tmp1, sizeof(tmp1), opal_hwloc_topology, mycpus)) { - opal_output(0, "MCW rank %d is not bound (or bound to all available processors)", ORTE_PROC_MY_NAME->vpid); - } else { - opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), opal_hwloc_topology, mycpus); - opal_output(0, "MCW rank %d bound to %s: %s", - ORTE_PROC_MY_NAME->vpid, tmp1, tmp2); + if (NULL != topo) { + /* get the cpus we are bound to */ + mycpus = hwloc_bitmap_alloc(); + if (hwloc_get_cpubind(topo, + mycpus, + HWLOC_CPUBIND_PROCESS) < 0) { + if (NULL != orte_process_info.cpuset) { + free(orte_process_info.cpuset); + orte_process_info.cpuset = NULL; + } + if (opal_hwloc_report_bindings || 4 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) { + opal_output(0, "MCW rank %d is not bound", + ORTE_PROC_MY_NAME->vpid); + } + } else { + /* store/update the string representation of our local binding */ + if (NULL != orte_process_info.cpuset) { + free(orte_process_info.cpuset); + orte_process_info.cpuset = NULL; + } + hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, mycpus); + /* report the binding, if requested */ + if (opal_hwloc_report_bindings || 4 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) { + char tmp1[1024], tmp2[1024]; + if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2str(tmp1, sizeof(tmp1), topo, mycpus)) { + opal_output(0, "MCW rank %d is not bound (or bound to all available processors)", ORTE_PROC_MY_NAME->vpid); + } else { + opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), topo, mycpus); + opal_output(0, "MCW rank %d bound to %s: %s", + ORTE_PROC_MY_NAME->vpid, tmp1, tmp2); + } } } + hwloc_bitmap_free(mycpus); } - hwloc_bitmap_free(mycpus); /* push our cpuset so others can calculate our locality */ if (NULL != orte_process_info.cpuset) { OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_CPUSET, orte_process_info.cpuset, OPAL_STRING); } + if (NULL != topo) { + opal_hwloc_base_free_topology(topo); + } return ORTE_SUCCESS; error: + if (NULL != topo) { + opal_hwloc_base_free_topology(topo); + } if (ORTE_ERR_SILENT != ret) { orte_show_help("help-orte-runtime", "orte_init:startup:internal-failure", diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index 374d1dbceca..a254c91fc7d 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -138,21 +138,36 @@ int orte_ess_base_orted_setup(char **hosts) setup_sighandler(SIGUSR2, &sigusr2_handler, signal_callback); signals_set = true; - /* get the local topology */ - if (NULL == opal_hwloc_topology) { - if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) { - error = "topology discovery"; - goto error; - } + /* setup the PMIx framework - ensure it skips all non-PMIx components, + * but do not override anything we were given */ + opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ); + if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { + ORTE_ERROR_LOG(ret); + error = "orte_pmix_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = opal_pmix_base_select())) { + ORTE_ERROR_LOG(ret); + error = "opal_pmix_base_select"; + goto error; + } + /* set the event base */ + opal_pmix_base_set_evbase(orte_event_base); + + /* ensure we have the local topology */ + if (NULL == (orte_server_topology = opal_hwloc_base_get_topology())) { + error = "topology discovery"; + goto error; } + /* generate the signature */ - orte_topo_signature = opal_hwloc_base_get_topo_signature(opal_hwloc_topology); + orte_topo_signature = opal_hwloc_base_get_topo_signature(orte_server_topology); /* remove the hostname from the topology. Unfortunately, hwloc * decided to add the source hostname to the "topology", thus * rendering it unusable as a pure topological description. So * we remove that information here. */ - obj = hwloc_get_root_obj(opal_hwloc_topology); + obj = hwloc_get_root_obj(orte_server_topology); for (i=0; i < obj->infos_count; i++) { if (NULL == obj->infos[i].name || NULL == obj->infos[i].value) { @@ -173,7 +188,7 @@ int orte_ess_base_orted_setup(char **hosts) } if (15 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) { opal_output(0, "%s Topology Info:", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO); + opal_dss.dump(0, orte_server_topology, OPAL_HWLOC_TOPO); } /* open and setup the opal_pstat framework so we can provide @@ -335,7 +350,7 @@ int orte_ess_base_orted_setup(char **hosts) node->name = strdup(orte_process_info.nodename); node->index = opal_pointer_array_set_item(orte_node_pool, ORTE_PROC_MY_NAME->vpid, node); /* point our topology to the one detected locally */ - node->topology = opal_hwloc_topology; + node->topology = orte_server_topology; /* create and store a proc object for us */ proc = OBJ_NEW(orte_proc_t); @@ -363,21 +378,6 @@ int orte_ess_base_orted_setup(char **hosts) /* obviously, we have "reported" */ jdata->num_reported = 1; - /* setup the PMIx framework - ensure it skips all non-PMIx components, - * but do not override anything we were given */ - opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ); - if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_pmix_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = opal_pmix_base_select())) { - ORTE_ERROR_LOG(ret); - error = "opal_pmix_base_select"; - goto error; - } - /* set the event base */ - opal_pmix_base_set_evbase(orte_event_base); /* setup the PMIx server */ if (ORTE_SUCCESS != (ret = pmix_server_init())) { /* the server code already barked, so let's be quiet */ @@ -657,6 +657,10 @@ int orte_ess_base_orted_finalize(void) (void) mca_base_framework_close(&orte_rml_base_framework); (void) mca_base_framework_close(&orte_oob_base_framework); (void) mca_base_framework_close(&orte_state_base_framework); + /* cleanup topology */ + if (NULL != orte_server_topology) { + opal_hwloc_base_free_topology(orte_server_topology); + } /* remove our use of the session directory tree */ orte_session_dir_finalize(ORTE_PROC_MY_NAME); /* ensure we scrub the session directory tree */ diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index 14699ae42da..2a68a8d2331 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -198,19 +198,34 @@ static int rte_init(void) setup_sighandler(SIGCONT, &sigcont_handler, signal_forward_callback); signals_set = true; - /* get the local topology */ - if (NULL == opal_hwloc_topology) { - if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) { - error = "topology discovery"; - goto error; - } + /* setup the PMIx framework - ensure it skips all non-PMIx components, but + * do not override anything we were given */ + opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ); + if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { + ORTE_ERROR_LOG(ret); + error = "orte_pmix_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = opal_pmix_base_select())) { + ORTE_ERROR_LOG(ret); + error = "opal_pmix_base_select"; + goto error; + } + /* set the event base */ + opal_pmix_base_set_evbase(orte_event_base); + + /* ensure we have the local topology */ + if (NULL == (orte_server_topology = opal_hwloc_base_get_topology())) { + error = "topology discovery"; + goto error; } + /* generate the signature */ - orte_topo_signature = opal_hwloc_base_get_topo_signature(opal_hwloc_topology); + orte_topo_signature = opal_hwloc_base_get_topo_signature(orte_server_topology); if (15 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) { opal_output(0, "%s Topology Info:", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO); + opal_dss.dump(0, orte_server_topology, OPAL_HWLOC_TOPO); } @@ -429,7 +444,7 @@ static int rte_init(void) /* add it to the array of known topologies */ t = OBJ_NEW(orte_topology_t); - t->topo = opal_hwloc_topology; + t->topo = orte_server_topology; t->sig = strdup(orte_topo_signature); opal_pointer_array_add(orte_node_topologies, t); @@ -511,7 +526,7 @@ static int rte_init(void) * will have reset our topology. Ensure we always get the right * one by setting our node topology afterwards */ - node->topology = opal_hwloc_topology; + node->topology = orte_server_topology; /* init the hash table, if necessary */ if (NULL == orte_coprocessors) { @@ -519,7 +534,7 @@ static int rte_init(void) opal_hash_table_init(orte_coprocessors, orte_process_info.num_procs); } /* detect and add any coprocessors */ - coprocessors = opal_hwloc_base_find_coprocessors(opal_hwloc_topology); + coprocessors = opal_hwloc_base_find_coprocessors(orte_server_topology); if (NULL != coprocessors) { /* separate the serial numbers of the coprocessors * on this host @@ -617,22 +632,6 @@ static int rte_init(void) free(contact_path); } - /* setup the PMIx framework - ensure it skips all non-PMIx components, but - * do not override anything we were given */ - opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ); - if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_pmix_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = opal_pmix_base_select())) { - ORTE_ERROR_LOG(ret); - error = "opal_pmix_base_select"; - goto error; - } - /* set the event base */ - opal_pmix_base_set_evbase(orte_event_base); - /* setup the PMIx server */ if (ORTE_SUCCESS != (ret = pmix_server_init())) { /* the server code already barked, so let's be quiet */ @@ -831,6 +830,9 @@ static int rte_finalize(void) (void) mca_base_framework_close(&orte_rml_base_framework); (void) mca_base_framework_close(&orte_oob_base_framework); + /* cleanup topology */ + opal_hwloc_base_free_topology(orte_server_topology); + /* remove our use of the session directory tree */ orte_session_dir_finalize(ORTE_PROC_MY_NAME); /* ensure we scrub the session directory tree */ diff --git a/orte/mca/ess/pmi/ess_pmi_module.c b/orte/mca/ess/pmi/ess_pmi_module.c index addb2b67526..2f2dda0662f 100644 --- a/orte/mca/ess/pmi/ess_pmi_module.c +++ b/orte/mca/ess/pmi/ess_pmi_module.c @@ -98,6 +98,7 @@ static int rte_init(void) opal_process_name_t wildcard_rank, pname; bool bool_val, *bool_ptr = &bool_val, tdir_mca_override = false; size_t i; + hwloc_topology_t topo; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { @@ -248,7 +249,7 @@ static int rte_init(void) /* retrieve temp directories info */ OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_TMPDIR, &wildcard_rank, &val, OPAL_STRING); if (OPAL_SUCCESS == ret && NULL != val) { - /* We want to provide user with ability + /* We want to provide user with ability * to override RM settings at his own risk */ if( NULL == orte_process_info.top_session_dir ){ @@ -264,7 +265,7 @@ static int rte_init(void) if( !tdir_mca_override ){ OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_NSDIR, &wildcard_rank, &val, OPAL_STRING); if (OPAL_SUCCESS == ret && NULL != val) { - /* We want to provide user with ability + /* We want to provide user with ability * to override RM settings at his own risk */ if( NULL == orte_process_info.job_session_dir ){ @@ -281,7 +282,7 @@ static int rte_init(void) if( !tdir_mca_override ){ OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_PROCDIR, &wildcard_rank, &val, OPAL_STRING); if (OPAL_SUCCESS == ret && NULL != val) { - /* We want to provide user with ability + /* We want to provide user with ability * to override RM settings at his own risk */ if( NULL == orte_process_info.proc_session_dir ){ @@ -302,75 +303,6 @@ static int rte_init(void) } } - /* retrieve our topology */ - val = NULL; - OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_TOPO, - &wildcard_rank, &val, OPAL_STRING); - if (OPAL_SUCCESS == ret && NULL != val) { - /* load the topology */ - if (0 != hwloc_topology_init(&opal_hwloc_topology)) { - ret = OPAL_ERROR; - free(val); - error = "setting topology"; - goto error; - } - if (0 != hwloc_topology_set_xmlbuffer(opal_hwloc_topology, val, strlen(val))) { - ret = OPAL_ERROR; - free(val); - hwloc_topology_destroy(opal_hwloc_topology); - error = "setting topology"; - goto error; - } - /* since we are loading this from an external source, we have to - * explicitly set a flag so hwloc sets things up correctly - */ - if (0 != hwloc_topology_set_flags(opal_hwloc_topology, - (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | - HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | - HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { - ret = OPAL_ERROR; - hwloc_topology_destroy(opal_hwloc_topology); - free(val); - error = "setting topology"; - goto error; - } - /* now load the topology */ - if (0 != hwloc_topology_load(opal_hwloc_topology)) { - ret = OPAL_ERROR; - hwloc_topology_destroy(opal_hwloc_topology); - free(val); - error = "setting topology"; - goto error; - } - free(val); - /* filter the cpus thru any default cpu set */ - if (OPAL_SUCCESS != (ret = opal_hwloc_base_filter_cpus(opal_hwloc_topology))) { - error = "filtering topology"; - goto error; - } - } else { - /* it wasn't passed down to us, so go get it */ - if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) { - error = "topology discovery"; - goto error; - } - /* push it into the PMIx database in case someone - * tries to retrieve it so we avoid an attempt to - * get it again */ - kv = OBJ_NEW(opal_value_t); - kv->key = strdup(OPAL_PMIX_LOCAL_TOPO); - kv->type = OPAL_STRING; - if (0 != (ret = hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &kv->data.string, &u32))) { - error = "topology export"; - goto error; - } - if (OPAL_SUCCESS != (ret = opal_pmix.store_local(&wildcard_rank, kv))) { - error = "topology store"; - goto error; - } - OBJ_RELEASE(kv); - } - /* get our local peers */ if (0 < orte_process_info.num_local_peers) { /* if my local rank if too high, then that's an error */ @@ -405,7 +337,12 @@ static int rte_init(void) /* set the locality */ if (NULL != peers) { - /* indentify our cpuset */ + /* retrieve our topology */ + if (NULL == (topo = opal_hwloc_base_get_topology())) { + error = "getting topology"; + goto error; + } + /* identify our cpuset */ if (NULL != cpusets) { mycpuset = cpusets[orte_process_info.my_local_rank]; } else { @@ -426,7 +363,7 @@ static int rte_init(void) u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; } else { /* we have it, so compute the locality */ - u16 = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, mycpuset, cpusets[i]); + u16 = opal_hwloc_base_get_relative_locality(topo, mycpuset, cpusets[i]); } OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, "%s ess:pmi:locality: proc %s locality %x", @@ -438,10 +375,12 @@ static int rte_init(void) error = "local store of locality"; opal_argv_free(peers); opal_argv_free(cpusets); + opal_hwloc_base_free_topology(topo); goto error; } OBJ_RELEASE(kv); } + opal_hwloc_base_free_topology(topo); opal_argv_free(peers); opal_argv_free(cpusets); } diff --git a/orte/mca/ess/singleton/ess_singleton_module.c b/orte/mca/ess/singleton/ess_singleton_module.c index 7e523219cd5..db1db2d73ae 100644 --- a/orte/mca/ess/singleton/ess_singleton_module.c +++ b/orte/mca/ess/singleton/ess_singleton_module.c @@ -84,8 +84,6 @@ static int rte_init(void) { int rc, ret; char *error = NULL; - opal_value_t *kv; - char *val = NULL; int u32, *u32ptr; uint16_t u16, *u16ptr; orte_process_name_t name; @@ -267,73 +265,10 @@ static int rte_init(void) */ assert (NULL != getenv(OPAL_MCA_PREFIX"orte_precondition_transports")); - /* retrieve our topology */ - OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_TOPO, - &name, &val, OPAL_STRING); - if (OPAL_SUCCESS == ret && NULL != val) { - /* load the topology */ - if (0 != hwloc_topology_init(&opal_hwloc_topology)) { - ret = OPAL_ERROR; - free(val); - error = "setting topology"; - goto error; - } - if (0 != hwloc_topology_set_xmlbuffer(opal_hwloc_topology, val, strlen(val))) { - ret = OPAL_ERROR; - free(val); - hwloc_topology_destroy(opal_hwloc_topology); - error = "setting topology"; - goto error; - } - /* since we are loading this from an external source, we have to - * explicitly set a flag so hwloc sets things up correctly - */ - if (0 != hwloc_topology_set_flags(opal_hwloc_topology, - (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | - HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | - HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { - ret = OPAL_ERROR; - hwloc_topology_destroy(opal_hwloc_topology); - free(val); - error = "setting topology"; - goto error; - } - /* now load the topology */ - if (0 != hwloc_topology_load(opal_hwloc_topology)) { - ret = OPAL_ERROR; - hwloc_topology_destroy(opal_hwloc_topology); - free(val); - error = "setting topology"; - goto error; - } - free(val); - } else { - /* it wasn't passed down to us, so go get it */ - if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) { - error = "topology discovery"; - goto error; - } - /* push it into the PMIx database in case someone - * tries to retrieve it so we avoid an attempt to - * get it again */ - kv = OBJ_NEW(opal_value_t); - kv->key = strdup(OPAL_PMIX_LOCAL_TOPO); - kv->type = OPAL_STRING; - if (0 != (ret = hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &kv->data.string, &u32))) { - error = "topology export"; - goto error; - } - if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, kv))) { - error = "topology store"; - goto error; - } - OBJ_RELEASE(kv); - } - /* use the std app init to complete the procedure */ if (ORTE_SUCCESS != (rc = orte_ess_base_app_setup(true))) { - ORTE_ERROR_LOG(rc); - return rc; + error = "app setup"; + goto error; } /* push our hostname so others can find us, if they need to */ diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 5a228a80635..c2644c06e02 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -1076,7 +1076,7 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); found = true; node->topology = t->topo; - hwloc_topology_destroy(topo); + opal_hwloc_base_free_topology(topo); free(sig); break; } diff --git a/orte/mca/ras/simulator/ras_sim_module.c b/orte/mca/ras/simulator/ras_sim_module.c index dcf41e09ded..a058d7eaa57 100644 --- a/orte/mca/ras/simulator/ras_sim_module.c +++ b/orte/mca/ras/simulator/ras_sim_module.c @@ -3,7 +3,7 @@ * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved + * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -25,6 +25,7 @@ #include "orte/util/show_help.h" #include "orte/runtime/orte_globals.h" +#include "orte/orted/pmix/pmix_server.h" #include "ras_sim.h" @@ -118,7 +119,7 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) /* check for topology */ if (use_local_topology) { /* use our topology */ - topo = opal_hwloc_topology; + topo = orte_server_topology; } else if (NULL != files) { if (0 != hwloc_topology_init(&topo)) { orte_show_help("help-ras-simulator.txt", @@ -129,7 +130,7 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) if (0 != hwloc_topology_set_xml(topo, files[n])) { orte_show_help("help-ras-simulator.txt", "hwloc failed to load xml", true, files[n]); - hwloc_topology_destroy(topo); + opal_hwloc_base_free_topology(topo); goto error_silent; } /* since we are loading this from an external source, we have to @@ -139,14 +140,14 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "hwloc_topology_set_flags"); - hwloc_topology_destroy(topo); + opal_hwloc_base_free_topology(topo); goto error_silent; } if (0 != hwloc_topology_load(topo)) { orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "hwloc_topology_load"); - hwloc_topology_destroy(topo); + opal_hwloc_base_free_topology(topo); goto error_silent; } /* remove the hostname from the topology. Unfortunately, hwloc @@ -195,21 +196,21 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "hwloc_topology_set_synthetic"); - hwloc_topology_destroy(topo); + opal_hwloc_base_free_topology(topo); goto error_silent; } if (0 != hwloc_topology_load(topo)) { orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "hwloc_topology_load"); - hwloc_topology_destroy(topo); + opal_hwloc_base_free_topology(topo); goto error_silent; } if (OPAL_SUCCESS != opal_hwloc_base_filter_cpus(topo)) { orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "opal_hwloc_base_filter_cpus"); - hwloc_topology_destroy(topo); + opal_hwloc_base_free_topology(topo); goto error_silent; } /* remove the hostname from the topology. Unfortunately, hwloc diff --git a/orte/mca/rmaps/base/rmaps_base_frame.c b/orte/mca/rmaps/base/rmaps_base_frame.c index c1b03e88902..3d7f95b499b 100644 --- a/orte/mca/rmaps/base/rmaps_base_frame.c +++ b/orte/mca/rmaps/base/rmaps_base_frame.c @@ -12,7 +12,7 @@ * Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -33,6 +33,7 @@ #include "opal/mca/base/base.h" #include "orte/runtime/orte_globals.h" +#include "orte/orted/pmix/pmix_server.h" #include "orte/util/show_help.h" #include "orte/mca/errmgr/errmgr.h" @@ -261,13 +262,16 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags) * for mapping purposes */ if (NULL != rmaps_base_topo_file) { - if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology(rmaps_base_topo_file))) { + if (NULL != orte_server_topology) { + opal_hwloc_base_free_topology(orte_server_topology); + } + if (NULL == (orte_server_topology = opal_hwloc_base_set_topology(rmaps_base_topo_file))) { orte_show_help("help-orte-rmaps-base.txt", "topo-file", true, rmaps_base_topo_file); return ORTE_ERR_SILENT; } } - /* check for violations that has to be detected before we parse the mapping option */ + /* check for violations that have to be detected before we parse the mapping option */ if (NULL != orte_rmaps_base.ppr) { orte_show_help("help-orte-rmaps-base.txt", "deprecated", true, "--ppr, -ppr", "--map-by ppr:", diff --git a/orte/mca/rmaps/base/rmaps_base_map_job.c b/orte/mca/rmaps/base/rmaps_base_map_job.c old mode 100755 new mode 100644 diff --git a/orte/mca/rmaps/lama/rmaps_lama_max_tree.c b/orte/mca/rmaps/lama/rmaps_lama_max_tree.c index a1183028b3b..3be06e6b196 100644 --- a/orte/mca/rmaps/lama/rmaps_lama_max_tree.c +++ b/orte/mca/rmaps/lama/rmaps_lama_max_tree.c @@ -4,6 +4,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * + * Copyright (c) 2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -262,7 +263,7 @@ static int rmaps_lama_convert_hwloc_tree_to_opal_tree(opal_tree_t *opal_tree, hw if( 15 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { opal_output_verbose(15, orte_rmaps_base_framework.framework_output, "mca:rmaps:lama: ----- Converting Topology:"); - /* opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO); */ + /* opal_dss.dump(0, orte_server_topology, OPAL_HWLOC_TOPO); */ opal_dss.dump(0, *hwloc_topo, OPAL_HWLOC_TOPO); } diff --git a/orte/mca/rtc/hwloc/rtc_hwloc.c b/orte/mca/rtc/hwloc/rtc_hwloc.c index 91cb18328bb..3f95578b471 100644 --- a/orte/mca/rtc/hwloc/rtc_hwloc.c +++ b/orte/mca/rtc/hwloc/rtc_hwloc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,6 +24,7 @@ #include "orte/util/show_help.h" #include "orte/util/error_strings.h" #include "orte/runtime/orte_globals.h" +#include "orte/orted/pmix/pmix_server.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rmaps/rmaps_types.h" @@ -91,7 +92,7 @@ static void set(orte_job_t *jobdat, NULL == cpu_bitmap || 0 == strlen(cpu_bitmap)) { /* if the daemon is bound, then we need to "free" this proc */ if (NULL != orte_daemon_cores) { - root = hwloc_get_root_obj(opal_hwloc_topology); + root = hwloc_get_root_obj(orte_server_topology); if (NULL == root->userdata) { orte_rtc_base_send_warn_show_help(write_fd, "help-orte-odls-default.txt", "incorrectly bound", @@ -100,7 +101,7 @@ static void set(orte_job_t *jobdat, } sum = (opal_hwloc_topo_data_t*)root->userdata; /* bind this proc to all available processors */ - rc = hwloc_set_cpubind(opal_hwloc_topology, sum->available, 0); + rc = hwloc_set_cpubind(orte_server_topology, sum->available, 0); /* if we got an error and this wasn't a default binding policy, then report it */ if (rc < 0 && OPAL_BINDING_POLICY_IS_SET(jobdat->map->binding)) { if (errno == ENOSYS) { @@ -168,7 +169,7 @@ static void set(orte_job_t *jobdat, } } /* bind as specified */ - rc = hwloc_set_cpubind(opal_hwloc_topology, cpuset, 0); + rc = hwloc_set_cpubind(orte_server_topology, cpuset, 0); /* if we got an error and this wasn't a default binding policy, then report it */ if (rc < 0 && OPAL_BINDING_POLICY_IS_SET(jobdat->map->binding)) { char *tmp = NULL; @@ -208,16 +209,16 @@ static void set(orte_job_t *jobdat, hwloc_cpuset_t mycpus; /* get the cpus we are bound to */ mycpus = hwloc_bitmap_alloc(); - if (hwloc_get_cpubind(opal_hwloc_topology, + if (hwloc_get_cpubind(orte_server_topology, mycpus, HWLOC_CPUBIND_PROCESS) < 0) { opal_output(0, "MCW rank %d is not bound", child->name.vpid); } else { - if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2str(tmp1, sizeof(tmp1), opal_hwloc_topology, mycpus)) { + if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2str(tmp1, sizeof(tmp1), orte_server_topology, mycpus)) { opal_output(0, "MCW rank %d is not bound (or bound to all available processors)", child->name.vpid); } else { - opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), opal_hwloc_topology, mycpus); + opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), orte_server_topology, mycpus); opal_output(0, "MCW rank %d bound to %s: %s", child->name.vpid, tmp1, tmp2); } @@ -231,7 +232,7 @@ static void set(orte_job_t *jobdat, /* set memory affinity policy - if we get an error, don't report * anything unless the user actually specified the binding policy */ - rc = opal_hwloc_base_set_process_membind_policy(); + rc = opal_hwloc_base_set_process_membind_policy(orte_server_topology); if (ORTE_SUCCESS != rc && OPAL_BINDING_POLICY_IS_SET(jobdat->map->binding)) { if (errno == ENOSYS) { msg = "hwloc indicates memory binding not supported"; diff --git a/orte/mca/schizo/ompi/schizo_ompi.c b/orte/mca/schizo/ompi/schizo_ompi.c index b35a798ce79..2e3980146d7 100644 --- a/orte/mca/schizo/ompi/schizo_ompi.c +++ b/orte/mca/schizo/ompi/schizo_ompi.c @@ -47,6 +47,7 @@ #include "orte/util/session_dir.h" #include "orte/util/show_help.h" #include "orte/runtime/orte_globals.h" +#include "orte/orted/pmix/pmix_server.h" #include "orte/mca/schizo/base/base.h" @@ -871,8 +872,8 @@ static int setup_fork(orte_job_t *jdata, */ hwloc_obj_t obj; char *htmp; - if (NULL != opal_hwloc_topology) { - obj = hwloc_get_root_obj(opal_hwloc_topology); + if (NULL != orte_server_topology) { + obj = hwloc_get_root_obj(orte_server_topology); if (NULL != (htmp = (char*)hwloc_obj_get_info_by_name(obj, "CPUType")) || NULL != (htmp = orte_local_cpu_type)) { opal_setenv("OMPI_MCA_orte_cpu_type", htmp, true, &app->env); diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index 0c4d928c3b9..9714e598351 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -388,7 +388,7 @@ int orte_daemon(int argc, char *argv[]) res = hwloc_bitmap_alloc(); for (i=0; NULL != cores[i]; i++) { core = strtoul(cores[i], NULL, 10); - if (NULL == (pu = opal_hwloc_base_get_pu(opal_hwloc_topology, core, OPAL_HWLOC_LOGICAL))) { + if (NULL == (pu = opal_hwloc_base_get_pu(orte_server_topology, core, OPAL_HWLOC_LOGICAL))) { /* turn off the show help forwarding as we won't * be able to cycle the event library to send */ @@ -409,9 +409,9 @@ int orte_daemon(int argc, char *argv[]) } /* if the result is all zeros, then don't bind */ if (!hwloc_bitmap_iszero(ours)) { - (void)hwloc_set_cpubind(opal_hwloc_topology, ours, 0); + (void)hwloc_set_cpubind(orte_server_topology, ours, 0); if (opal_hwloc_report_bindings) { - opal_hwloc_base_cset2mapstr(tmp, sizeof(tmp), opal_hwloc_topology, ours); + opal_hwloc_base_cset2mapstr(tmp, sizeof(tmp), orte_server_topology, ours); opal_output(0, "Daemon %s is bound to cores %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp); } @@ -785,7 +785,7 @@ int orte_daemon(int argc, char *argv[]) if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &orte_topo_signature, 1, OPAL_STRING))) { ORTE_ERROR_LOG(ret); } - if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &opal_hwloc_topology, 1, OPAL_HWLOC_TOPO))) { + if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &orte_server_topology, 1, OPAL_HWLOC_TOPO))) { ORTE_ERROR_LOG(ret); } } else { @@ -795,7 +795,7 @@ int orte_daemon(int argc, char *argv[]) } } /* detect and add any coprocessors */ - coprocessors = opal_hwloc_base_find_coprocessors(opal_hwloc_topology); + coprocessors = opal_hwloc_base_find_coprocessors(orte_server_topology); if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &coprocessors, 1, OPAL_STRING))) { ORTE_ERROR_LOG(ret); } diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index c766ff584e7..8cba9fb8135 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -73,6 +73,9 @@ #include "pmix_server.h" #include "pmix_server_internal.h" +/* global variable */ +hwloc_topology_t orte_server_topology = NULL; + /* * Local utility functions */ @@ -239,12 +242,12 @@ int pmix_server_init(void) * topology themselves as this could overwhelm the local * system on large-scale SMPs */ OBJ_CONSTRUCT(&info, opal_list_t); - if (NULL != opal_hwloc_topology) { + if (NULL != orte_server_topology) { char *xmlbuffer=NULL; int len; kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCAL_TOPO); - if (0 != hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &xmlbuffer, &len)) { + if (0 != hwloc_topology_export_xmlbuffer(orte_server_topology, &xmlbuffer, &len)) { OBJ_RELEASE(kv); OBJ_DESTRUCT(&info); return ORTE_ERROR; diff --git a/orte/orted/pmix/pmix_server.h b/orte/orted/pmix/pmix_server.h index 39a91c7f656..17b344ba2bd 100644 --- a/orte/orted/pmix/pmix_server.h +++ b/orte/orted/pmix/pmix_server.h @@ -12,7 +12,7 @@ * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -36,7 +36,10 @@ ORTE_DECLSPEC void pmix_server_register_params(void); ORTE_DECLSPEC int orte_pmix_server_register_nspace(orte_job_t *jdata); +/* the daemon's make relatively frequent use of the topology, + * so we cache it for them */ +ORTE_DECLSPEC hwloc_topology_t orte_server_topology; + END_C_DECLS #endif /* PMIX_SERVER_H_ */ - diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index 708110be63f..f1b47f6933a 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -907,7 +907,7 @@ static void tcon(orte_topology_t *t) static void tdes(orte_topology_t *t) { if (NULL != t->topo) { - hwloc_topology_destroy(t->topo); + opal_hwloc_base_free_topology(t->topo); } if (NULL != t->sig) { free(t->sig); diff --git a/orte/test/mpi/binding.c b/orte/test/mpi/binding.c index 622ea9a65e6..cfd8e62a961 100644 --- a/orte/test/mpi/binding.c +++ b/orte/test/mpi/binding.c @@ -10,7 +10,7 @@ #include #include #include -#include "opal/mca/hwloc/hwloc.h" +#include "opal/mca/hwloc/base/base.h" #include "mpi.h" #include "orte/util/proc_info.h" @@ -18,6 +18,7 @@ int main(int argc, char* argv[]) { int rank, size, rc; + hwloc_topology_t topo; hwloc_cpuset_t cpus; char *bindings; cpu_set_t *mask; @@ -31,8 +32,10 @@ int main(int argc, char* argv[]) gethostname(hostname, 1024); cpus = hwloc_bitmap_alloc(); - rc = hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS); + topo = opal_hwloc_base_get_topology(); + rc = hwloc_get_cpubind(topo, cpus, HWLOC_CPUBIND_PROCESS); hwloc_bitmap_list_asprintf(&bindings, cpus); + opal_hwloc_base_free_topology(topo); printf("[%s;%d] Hello, World, I am %d of %d [%d local peers]: get_cpubind: %d bitmap %s\n", hostname, (int)getpid(), rank, size, orte_process_info.num_local_peers, rc, diff --git a/orte/test/mpi/hello.c b/orte/test/mpi/hello.c index e8dbb2ac2c3..7ce80e71514 100644 --- a/orte/test/mpi/hello.c +++ b/orte/test/mpi/hello.c @@ -6,7 +6,7 @@ */ #include -#include "opal/mca/hwloc/hwloc.h" +#include "opal/mca/hwloc/base/base.h" #include "mpi.h" #include "orte/util/proc_info.h" @@ -14,6 +14,7 @@ int main(int argc, char* argv[]) { int rank, size, rc; + hwloc_topology_t topo; hwloc_cpuset_t cpus; char *bindings; @@ -21,9 +22,11 @@ int main(int argc, char* argv[]) MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); + topo = opal_hwloc_base_get_topology(); cpus = hwloc_bitmap_alloc(); - rc = hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS); + rc = hwloc_get_cpubind(topo, cpus, HWLOC_CPUBIND_PROCESS); hwloc_bitmap_list_asprintf(&bindings, cpus); + opal_hwloc_base_free_topology(topo); printf("Hello, World, I am %d of %d [%d local peers]: get_cpubind: %d bitmap %s\n", rank, size, orte_process_info.num_local_peers, rc, diff --git a/orte/test/system/opal_hwloc.c b/orte/test/system/opal_hwloc.c index da8180998af..3b16dba6d7f 100644 --- a/orte/test/system/opal_hwloc.c +++ b/orte/test/system/opal_hwloc.c @@ -22,7 +22,7 @@ static void fill_cache_line_size(void) /* Look for the smallest L2 cache size */ size = 4096; while (1) { - obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, + obj = opal_hwloc_base_get_obj_by_type(my_topology, HWLOC_OBJ_CACHE, 2, i, OPAL_HWLOC_LOGICAL); if (NULL == obj) { @@ -66,7 +66,7 @@ int main(int argc, char* argv[]) return OPAL_ERR_NOT_SUPPORTED; } if (0 != hwloc_topology_set_xml(my_topology, argv[1])) { - hwloc_topology_destroy(my_topology); + opal_hwloc_base_free_topology(my_topology); return OPAL_ERR_NOT_SUPPORTED; } /* since we are loading this from an external source, we have to @@ -76,11 +76,11 @@ int main(int argc, char* argv[]) (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { - hwloc_topology_destroy(my_topology); + opal_hwloc_base_free_topology(my_topology); return OPAL_ERR_NOT_SUPPORTED; } if (0 != hwloc_topology_load(my_topology)) { - hwloc_topology_destroy(my_topology); + opal_hwloc_base_free_topology(my_topology); return OPAL_ERR_NOT_SUPPORTED; } /* remove the hostname from the topology. Unfortunately, hwloc @@ -127,7 +127,7 @@ int main(int argc, char* argv[]) fprintf(stderr, "DIDN'T FIND A CORE\n"); } - hwloc_topology_destroy(my_topology); + opal_hwloc_base_free_topology(my_topology); opal_finalize();