diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index 4b30c27d90..acd21bfc58 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -178,6 +178,14 @@ LBManager::makeLB(MsgSharedPtr msg) { lb, node, "LBManager: finished migrations\n" ); + + // Statistics output when LB is enabled and appropriate flag is enabled +#if vt_check_enabled(lblite) + if (theConfig()->vt_lb_stats) { + theNodeStats()->outputStatsForPhase(phase); + } +#endif + theNodeStats()->startIterCleanup(phase, model_->getNumPastPhasesNeeded()); this->finishedRunningLB(phase); }); diff --git a/src/vt/vrt/collection/balance/node_stats.cc b/src/vt/vrt/collection/balance/node_stats.cc index 924c33b08e..6a054ae38c 100644 --- a/src/vt/vrt/collection/balance/node_stats.cc +++ b/src/vt/vrt/collection/balance/node_stats.cc @@ -198,6 +198,21 @@ void NodeStats::createStatsFile() { stats_file_ = fopen(file_name.c_str(), "w+"); } +void NodeStats::openStatsFile() { + auto const node = theContext()->getNode(); + auto const base_file = std::string(theConfig()->vt_lb_stats_file); + auto const dir = std::string(theConfig()->vt_lb_stats_dir); + auto const file = fmt::format("{}.{}.out", base_file, node); + auto const file_name = fmt::format("{}/{}", dir, file); + + vt_debug_print( + lb, node, + "NodeStats: openStatsFile file={}\n", file_name + ); + + stats_file_ = fopen(file_name.c_str(), "a+"); +} + void NodeStats::closeStatsFile() { if (stats_file_) { fclose(stats_file_); @@ -205,71 +220,77 @@ void NodeStats::closeStatsFile() { } } -void NodeStats::outputStatsFile() { +void NodeStats::outputStatsForPhase(PhaseType phase) { + // TODO (STRZ) - directory cannot be created here. So where? + // if (stats_file_ == nullptr) { + // phase == 0 + // ? createStatsFile() + // : openStatsFile(); + // } + if (stats_file_ == nullptr) { - createStatsFile(); + openStatsFile(); } vtAssertExpr(stats_file_ != nullptr); - auto const num_iters = node_data_.size(); + vt_print(lb, "NodeStats::outputStatsFile: file={}, phase={}\n", print_ptr(stats_file_), phase); + + auto i = phase; + for (auto&& elm : node_data_.at(i)) { + ElementIDType id = elm.first; + TimeType time = elm.second; + const auto& subphase_times = node_subphase_data_.at(i)[id]; + size_t subphases = subphase_times.size(); + + auto obj_str = fmt::format("{},{},{},{},[", i, id, time, subphases); + for (size_t s = 0; s < subphases; s++) { + obj_str += std::to_string(subphase_times[s]); + if (s != subphases - 1) + obj_str += ","; + } - vt_print(lb, "NodeStats::outputStatsFile: file={}, iter={}\n", print_ptr(stats_file_), num_iters); + obj_str += "]\n"; - for (size_t i = 0; i < num_iters; i++) { - for (auto&& elm : node_data_.at(i)) { - ElementIDType id = elm.first; - TimeType time = elm.second; - const auto& subphase_times = node_subphase_data_.at(i)[id]; - size_t subphases = subphase_times.size(); + fprintf(stats_file_, "%s", obj_str.c_str()); + } - auto obj_str = fmt::format("{},{},{},{},[", i, id, time, subphases); - for (size_t s = 0; s < subphases; s++) { - obj_str += std::to_string(subphase_times[s]); - if (s != subphases - 1) - obj_str += ","; - } + for (auto&& elm : node_comm_.at(i)) { + using E = typename std::underlying_type::type; - obj_str += "]\n"; + auto const& key = elm.first; + auto const& val = elm.second; + auto const cat = static_cast(key.cat_); + if ( + key.cat_ == CommCategory::SendRecv or + key.cat_ == CommCategory::Broadcast + ) { + auto const to = key.toObj(); + auto const from = key.fromObj(); + auto obj_str = fmt::format("{},{},{},{},{}\n", i, to, from, val.bytes, cat); fprintf(stats_file_, "%s", obj_str.c_str()); - } - for (auto&& elm : node_comm_.at(i)) { - using E = typename std::underlying_type::type; - - auto const& key = elm.first; - auto const& val = elm.second; - auto const cat = static_cast(key.cat_); - - if ( - key.cat_ == CommCategory::SendRecv or - key.cat_ == CommCategory::Broadcast - ) { - auto const to = key.toObj(); - auto const from = key.fromObj(); - auto obj_str = fmt::format("{},{},{},{},{}\n", i, to, from, val.bytes, cat); - fprintf(stats_file_, "%s", obj_str.c_str()); - } else if ( - key.cat_ == CommCategory::NodeToCollection or - key.cat_ == CommCategory::NodeToCollectionBcast - ) { - auto const to = key.toObj(); - auto const from = key.fromNode(); - auto obj_str = fmt::format("{},{},{},{},{}\n", i, to, from, val.bytes, cat); - fprintf(stats_file_, "%s", obj_str.c_str()); - } else if ( - key.cat_ == CommCategory::CollectionToNode or - key.cat_ == CommCategory::CollectionToNodeBcast - ) { - auto const to = key.toNode(); - auto const from = key.fromObj(); - auto obj_str = fmt::format("{},{},{},{},{}\n", i, to, from, val.bytes, cat); - fprintf(stats_file_, "%s", obj_str.c_str()); - } else { - vtAssert(false, "Invalid balance::CommCategory enum value"); - } + } else if ( + key.cat_ == CommCategory::NodeToCollection or + key.cat_ == CommCategory::NodeToCollectionBcast + ) { + auto const to = key.toObj(); + auto const from = key.fromNode(); + auto obj_str = fmt::format("{},{},{},{},{}\n", i, to, from, val.bytes, cat); + fprintf(stats_file_, "%s", obj_str.c_str()); + } else if ( + key.cat_ == CommCategory::CollectionToNode or + key.cat_ == CommCategory::CollectionToNodeBcast + ) { + auto const to = key.toNode(); + auto const from = key.fromObj(); + auto obj_str = fmt::format("{},{},{},{},{}\n", i, to, from, val.bytes, cat); + fprintf(stats_file_, "%s", obj_str.c_str()); + } else { + vtAssert(false, "Invalid balance::CommCategory enum value"); } } + fflush(stats_file_); closeStatsFile(); diff --git a/src/vt/vrt/collection/balance/node_stats.h b/src/vt/vrt/collection/balance/node_stats.h index a4a697871d..09c49900aa 100644 --- a/src/vt/vrt/collection/balance/node_stats.h +++ b/src/vt/vrt/collection/balance/node_stats.h @@ -133,7 +133,7 @@ struct NodeStats : runtime::component::Component { void releaseLB(); /** - * \internal \brief Output stats file based on instrumented data + * \internal \brief Output stats file for given phase based on instrumented data * * The contents of the file consist of a series of records separated * by newlines. Each record consists of comma separated fields. The @@ -161,7 +161,7 @@ struct NodeStats : runtime::component::Component { * recipient and distinguishing point-to-point messages from * broadcasts, as a decimal integer. */ - void outputStatsFile(); + void outputStatsForPhase(PhaseType phase); /** * \internal \brief Generate the next object element ID for LB @@ -243,6 +243,11 @@ struct NodeStats : runtime::component::Component { */ void createStatsFile(); + /** + * \internal \brief Open the stats file + */ + void openStatsFile(); + /** * \internal \brief Close the stats file */ diff --git a/src/vt/vrt/collection/manager.cc b/src/vt/vrt/collection/manager.cc index 9590d35724..2b6e816cd2 100644 --- a/src/vt/vrt/collection/manager.cc +++ b/src/vt/vrt/collection/manager.cc @@ -57,10 +57,9 @@ CollectionManager::CollectionManager() { } void CollectionManager::finalize() { cleanupAll<>(); - // Statistics output when LB is enabled and appropriate flag is enabled + // If statistics are enabled, clear them while finilizing #if vt_check_enabled(lblite) if (theConfig()->vt_lb_stats) { - theNodeStats()->outputStatsFile(); theNodeStats()->clearStats(); } #endif