From 869b60e256e52c07994a14e2f66b68b2ac3eca9e Mon Sep 17 00:00:00 2001 From: Clement Foyer Date: Wed, 1 Mar 2017 17:14:03 +0100 Subject: [PATCH] Adapt to the new definition of communicators Signed-off-by: Clement Foyer --- .../monitoring/coll_monitoring_component.c | 14 ++++++------ .../mca/common/monitoring/common_monitoring.c | 22 +++++-------------- .../monitoring/common_monitoring_coll.c | 19 ++++++++++++++++ test/monitoring/check_monitoring.c | 12 +++++----- 4 files changed, 37 insertions(+), 30 deletions(-) diff --git a/ompi/mca/coll/monitoring/coll_monitoring_component.c b/ompi/mca/coll/monitoring/coll_monitoring_component.c index 7ac99399dc8..461bcf855a9 100644 --- a/ompi/mca/coll/monitoring/coll_monitoring_component.c +++ b/ompi/mca/coll/monitoring/coll_monitoring_component.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 Inria. All rights reserved. + * Copyright (c) 2016-2017 Inria. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -16,9 +16,9 @@ #define MONITORING_SAVE_PREV_COLL_API(__module, __comm, __api) \ do { \ - if( NULL != __comm->c_coll.coll_ ## __api ## _module ) { \ - __module->real.coll_ ## __api = __comm->c_coll.coll_ ## __api; \ - __module->real.coll_ ## __api ## _module = __comm->c_coll.coll_ ## __api ## _module; \ + if( NULL != __comm->c_coll->coll_ ## __api ## _module ) { \ + __module->real.coll_ ## __api = __comm->c_coll->coll_ ## __api; \ + __module->real.coll_ ## __api ## _module = __comm->c_coll->coll_ ## __api ## _module; \ OBJ_RETAIN(__module->real.coll_ ## __api ## _module); \ } else { \ /* If no function previously provided, do not monitor */ \ @@ -26,9 +26,9 @@ OPAL_MONITORING_PRINT_WARN("COMM \"%s\": No monitoring available for " \ "coll_" # __api, __comm->c_name); \ } \ - if( NULL != __comm->c_coll.coll_i ## __api ## _module ) { \ - __module->real.coll_i ## __api = __comm->c_coll.coll_i ## __api; \ - __module->real.coll_i ## __api ## _module = __comm->c_coll.coll_i ## __api ## _module; \ + if( NULL != __comm->c_coll->coll_i ## __api ## _module ) { \ + __module->real.coll_i ## __api = __comm->c_coll->coll_i ## __api; \ + __module->real.coll_i ## __api ## _module = __comm->c_coll->coll_i ## __api ## _module; \ OBJ_RETAIN(__module->real.coll_i ## __api ## _module); \ } else { \ /* If no function previously provided, do not monitor */ \ diff --git a/ompi/mca/common/monitoring/common_monitoring.c b/ompi/mca/common/monitoring/common_monitoring.c index 6d7d36e8f69..5ad49244223 100644 --- a/ompi/mca/common/monitoring/common_monitoring.c +++ b/ompi/mca/common/monitoring/common_monitoring.c @@ -73,10 +73,10 @@ static int nprocs_world = 0; opal_hash_table_t *common_monitoring_translation_ht = NULL; /* Reset all the monitoring arrays */ -static void mca_common_monitoring_reset( void ); +static void mca_common_monitoring_reset ( void ); /* Flushes the monitored data and reset the values */ -static int mca_common_monitoring_flush(int fd, char* filename); +static int mca_common_monitoring_flush (int fd, char* filename); /* Retreive the PML recorded count of messages sent */ static int mca_common_monitoring_get_pml_count (const struct mca_base_pvar_t *pvar, @@ -691,9 +691,6 @@ static void mca_common_monitoring_output( FILE *pf, int my_rank, int nbprocs ) fprintf(pf, "%" PRIu64 "%s", size_histogram[i * max_size_histogram + j], j < max_size_histogram - 1 ? "," : "\n"); } - /* reset phase array */ - pml_data[i] = 0; - pml_count[i] = 0; } /* Dump outgoing synchronization/collective messages */ @@ -715,9 +712,6 @@ static void mca_common_monitoring_output( FILE *pf, int my_rank, int nbprocs ) j < max_size_histogram - 1 ? "," : "\n"); } } - /* reset phase array */ - filtered_pml_data[i] = 0; - filtered_pml_count[i] = 0; } } @@ -732,23 +726,15 @@ static void mca_common_monitoring_output( FILE *pf, int my_rank, int nbprocs ) fprintf(pf, "R\t%" PRId32 "\t%" PRId32 "\t%" PRIu64 " bytes\t%" PRIu64 " msgs sent\n", my_rank, i, osc_data_r[i], osc_count_r[i]); } - /* reset phase array */ - osc_data_s[i] = 0; - osc_count_s[i] = 0; - osc_data_r[i] = 0; - osc_count_r[i] = 0; } /* Dump collectives */ fprintf(pf, "# COLLECTIVES\n"); for (int i = 0 ; i < nbprocs ; i++) { if(coll_count[i] > 0) { - fprintf(pf, "C\t%" PRId32 "\t%" PRId32 "\t%" PRIu64 " bytes\t%" PRIu64 " msgs sent\t", + fprintf(pf, "C\t%" PRId32 "\t%" PRId32 "\t%" PRIu64 " bytes\t%" PRIu64 " msgs sent\n", my_rank, i, coll_data[i], coll_count[i]); } - /* reset phase array */ - coll_data[i] = 0; - coll_count[i] = 0; } mca_common_monitoring_coll_flush_all(pf); } @@ -795,5 +781,7 @@ static int mca_common_monitoring_flush(int fd, char* filename) fclose(pf); } + /* Reset to 0 all monitored data */ + mca_common_monitoring_reset(); return OMPI_SUCCESS; } diff --git a/ompi/mca/common/monitoring/common_monitoring_coll.c b/ompi/mca/common/monitoring/common_monitoring_coll.c index b3783d4356a..0049eb00100 100644 --- a/ompi/mca/common/monitoring/common_monitoring_coll.c +++ b/ompi/mca/common/monitoring/common_monitoring_coll.c @@ -42,11 +42,27 @@ struct mca_monitoring_coll_data_t { /* Collectives operation monitoring */ static opal_hash_table_t *comm_data = NULL; +/* Check whether the communicator's name have been changed. Update the + * data->comm_name field if so. + */ +static inline void mca_common_monitoring_coll_check_name(mca_monitoring_coll_data_t*data) +{ + if( data->comm_name && data->p_comm && (data->p_comm->c_flags & OMPI_COMM_NAMEISSET) + && data->p_comm->c_name && 0 < strlen(data->p_comm->c_name) + && 0 != strncmp(data->p_comm->c_name, data->comm_name, OPAL_MAX_OBJECT_NAME - 1) ) + { + free(data->comm_name); + data->comm_name = strdup(data->p_comm->c_name); + } +} + static inline void mca_common_monitoring_coll_cache(mca_monitoring_coll_data_t*data) { int world_rank; if( NULL == data->comm_name && 0 < strlen(data->p_comm->c_name) ) { data->comm_name = strdup(data->p_comm->c_name); + } else { + mca_common_monitoring_coll_check_name(data); } if( -1 == data->world_rank ) { /* Get current process world_rank */ @@ -150,6 +166,9 @@ void mca_common_monitoring_coll_finalize( void ) void mca_common_monitoring_coll_flush(FILE *pf, mca_monitoring_coll_data_t*data) { + /* Check for any change in the communicator's name */ + mca_common_monitoring_coll_check_name(data); + /* Flush data */ fprintf(pf, "D\t%s\tprocs: %s\n" diff --git a/test/monitoring/check_monitoring.c b/test/monitoring/check_monitoring.c index 9e4369a8a3e..684aa1a059d 100644 --- a/test/monitoring/check_monitoring.c +++ b/test/monitoring/check_monitoring.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2016 Inria. All rights reserved. - * Copyright (c) 2017 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. + * Copyright (c) 2016-2017 Inria. All rights reserved. + * Copyright (c) 2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -305,9 +305,9 @@ static inline int pvar_coll_check(MPI_T_pvar_session session, int world_size, in fprintf(stderr, "Error in %s: size_values[%d]=%" PRIu64 ", and should be equal to %" PRIu64 ".\n", __func__, i, svalues[i], (uint64_t) 0); ret = -1; - } else if ( i != world_rank && svalues[i] < (uint64_t) ((world_size * (world_size - 1) * 2 * 13 * sizeof(char)) + 3 * 13 * sizeof(char) + sizeof(int)) ) { + } else if ( i != world_rank && svalues[i] < (uint64_t) (world_size * (2 * 13 * sizeof(char) + sizeof(int)) + 13 * 3 * sizeof(char) + sizeof(int)) ) { fprintf(stderr, "Error in %s: size_values[%d]=%" PRIu64 ", and should be >= %" PRIu64 ".\n", - __func__, i, svalues[i], (uint64_t) ((world_size * (world_size - 1) * 2 * 13 * sizeof(char)) + 3 * 13 * sizeof(char) + sizeof(int))); + __func__, i, svalues[i], (uint64_t) (world_size * (2 * 13 * sizeof(char) + sizeof(int)) + 13 * 3 * sizeof(char) + sizeof(int))); ret = -1; } }