From 1703aec5be604670197c0d897a219f37a2776046 Mon Sep 17 00:00:00 2001 From: Jakub Strzebonski Date: Tue, 8 Sep 2020 14:32:16 +0200 Subject: [PATCH] #1014 dump node stats before trimming --- .../balance/lb_invoke/lb_manager.cc | 8 ++ src/vt/vrt/collection/balance/node_stats.cc | 122 +++++++----------- src/vt/vrt/collection/balance/node_stats.h | 5 +- src/vt/vrt/collection/manager.cc | 14 +- src/vt/vrt/collection/manager.h | 1 + 5 files changed, 73 insertions(+), 77 deletions(-) diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index 4b30c27d90..acd21bfc58 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -178,6 +178,14 @@ LBManager::makeLB(MsgSharedPtr msg) { lb, node, "LBManager: finished migrations\n" ); + + // Statistics output when LB is enabled and appropriate flag is enabled +#if vt_check_enabled(lblite) + if (theConfig()->vt_lb_stats) { + theNodeStats()->outputStatsForPhase(phase); + } +#endif + theNodeStats()->startIterCleanup(phase, model_->getNumPastPhasesNeeded()); this->finishedRunningLB(phase); }); diff --git a/src/vt/vrt/collection/balance/node_stats.cc b/src/vt/vrt/collection/balance/node_stats.cc index 9884fd8a3d..db34eee295 100644 --- a/src/vt/vrt/collection/balance/node_stats.cc +++ b/src/vt/vrt/collection/balance/node_stats.cc @@ -182,21 +182,6 @@ void NodeStats::createStatsFile() { "NodeStats: createStatsFile file={}\n", file_name ); - // Node 0 creates the directory - if (not created_dir_ and node == 0) { - mkdir(dir.c_str(), S_IRWXU); - created_dir_ = true; - } - - // Barrier: wait for node 0 to create directory before trying to put a file in - // the stats destination directory - if (curRT) { - curRT->systemSync(); - } else { - // Something is wrong - vtAssert(false, "Trying to dump stats when VT runtime is deallocated?"); - } - stats_file_ = fopen(file_name.c_str(), "w+"); } @@ -207,74 +192,67 @@ void NodeStats::closeStatsFile() { } } -void NodeStats::outputStatsFile() { - if (stats_file_ == nullptr) { - createStatsFile(); - } - +void NodeStats::outputStatsForPhase(PhaseType phase) { vtAssertExpr(stats_file_ != nullptr); - auto const num_iters = node_data_.size(); + vt_print(lb, "NodeStats::outputStatsFile: file={}, phase={}\n", print_ptr(stats_file_), phase); - vt_print(lb, "NodeStats::outputStatsFile: file={}, iter={}\n", print_ptr(stats_file_), num_iters); + auto i = phase; + for (auto&& elm : node_data_.at(i)) { + ElementIDType id = elm.first; + TimeType time = elm.second; + const auto& subphase_times = node_subphase_data_.at(i)[id]; + size_t subphases = subphase_times.size(); - for (size_t i = 0; i < num_iters; i++) { - for (auto&& elm : node_data_.at(i)) { - ElementIDType id = elm.first; - TimeType time = elm.second; - const auto& subphase_times = node_subphase_data_.at(i)[id]; - size_t subphases = subphase_times.size(); + auto obj_str = fmt::format("{},{},{},{},[", i, id, time, subphases); + for (size_t s = 0; s < subphases; s++) { + obj_str += std::to_string(subphase_times[s]); + if (s != subphases - 1) + obj_str += ","; + } - auto obj_str = fmt::format("{},{},{},{},[", i, id, time, subphases); - for (size_t s = 0; s < subphases; s++) { - obj_str += std::to_string(subphase_times[s]); - if (s != subphases - 1) - obj_str += ","; - } + obj_str += "]\n"; - obj_str += "]\n"; + fprintf(stats_file_, "%s", obj_str.c_str()); + } + for (auto&& elm : node_comm_.at(i)) { + using E = typename std::underlying_type::type; + + auto const& key = elm.first; + auto const& val = elm.second; + auto const cat = static_cast(key.cat_); + + if ( + key.cat_ == CommCategory::SendRecv or + key.cat_ == CommCategory::Broadcast + ) { + auto const to = key.toObj(); + auto const from = key.fromObj(); + auto obj_str = fmt::format("{},{},{},{},{}\n", i, to, from, val.bytes, cat); fprintf(stats_file_, "%s", obj_str.c_str()); - } - for (auto&& elm : node_comm_.at(i)) { - using E = typename std::underlying_type::type; - - auto const& key = elm.first; - auto const& val = elm.second; - auto const cat = static_cast(key.cat_); - - if ( - key.cat_ == CommCategory::SendRecv or - key.cat_ == CommCategory::Broadcast - ) { - auto const to = key.toObj(); - auto const from = key.fromObj(); - auto obj_str = fmt::format("{},{},{},{},{}\n", i, to, from, val.bytes, cat); - fprintf(stats_file_, "%s", obj_str.c_str()); - } else if ( - key.cat_ == CommCategory::NodeToCollection or - key.cat_ == CommCategory::NodeToCollectionBcast - ) { - auto const to = key.toObj(); - auto const from = key.fromNode(); - auto obj_str = fmt::format("{},{},{},{},{}\n", i, to, from, val.bytes, cat); - fprintf(stats_file_, "%s", obj_str.c_str()); - } else if ( - key.cat_ == CommCategory::CollectionToNode or - key.cat_ == CommCategory::CollectionToNodeBcast - ) { - auto const to = key.toNode(); - auto const from = key.fromObj(); - auto obj_str = fmt::format("{},{},{},{},{}\n", i, to, from, val.bytes, cat); - fprintf(stats_file_, "%s", obj_str.c_str()); - } else { - vtAssert(false, "Invalid balance::CommCategory enum value"); - } + } else if ( + key.cat_ == CommCategory::NodeToCollection or + key.cat_ == CommCategory::NodeToCollectionBcast + ) { + auto const to = key.toObj(); + auto const from = key.fromNode(); + auto obj_str = fmt::format("{},{},{},{},{}\n", i, to, from, val.bytes, cat); + fprintf(stats_file_, "%s", obj_str.c_str()); + } else if ( + key.cat_ == CommCategory::CollectionToNode or + key.cat_ == CommCategory::CollectionToNodeBcast + ) { + auto const to = key.toNode(); + auto const from = key.fromObj(); + auto obj_str = fmt::format("{},{},{},{},{}\n", i, to, from, val.bytes, cat); + fprintf(stats_file_, "%s", obj_str.c_str()); + } else { + vtAssert(false, "Invalid balance::CommCategory enum value"); } } - fflush(stats_file_); - closeStatsFile(); + fflush(stats_file_); } ElementIDType NodeStats::addNodeStats( diff --git a/src/vt/vrt/collection/balance/node_stats.h b/src/vt/vrt/collection/balance/node_stats.h index a4a697871d..355a1de26e 100644 --- a/src/vt/vrt/collection/balance/node_stats.h +++ b/src/vt/vrt/collection/balance/node_stats.h @@ -133,7 +133,7 @@ struct NodeStats : runtime::component::Component { void releaseLB(); /** - * \internal \brief Output stats file based on instrumented data + * \internal \brief Output stats file for given phase based on instrumented data * * The contents of the file consist of a series of records separated * by newlines. Each record consists of comma separated fields. The @@ -161,7 +161,7 @@ struct NodeStats : runtime::component::Component { * recipient and distinguishing point-to-point messages from * broadcasts, as a decimal integer. */ - void outputStatsFile(); + void outputStatsForPhase(PhaseType phase); /** * \internal \brief Generate the next object element ID for LB @@ -237,7 +237,6 @@ struct NodeStats : runtime::component::Component { */ VirtualProxyType getCollectionProxyForElement(ElementIDType temp_id) const; -private: /** * \internal \brief Create the stats file */ diff --git a/src/vt/vrt/collection/manager.cc b/src/vt/vrt/collection/manager.cc index e23fb6836e..4bc2b1b7ec 100644 --- a/src/vt/vrt/collection/manager.cc +++ b/src/vt/vrt/collection/manager.cc @@ -54,13 +54,23 @@ namespace vt { namespace vrt { namespace collection { CollectionManager::CollectionManager() { } +void CollectionManager::initialize() { + // If statistics are enabled create output directory and file +#if vt_check_enabled(lblite) + if (theConfig()->vt_lb_stats) { + mkdir(theConfig()->vt_lb_stats_dir.c_str(), S_IRWXU); + theNodeStats()->createStatsFile(); + } +#endif +} + void CollectionManager::finalize() { cleanupAll<>(); - // Statistics output when LB is enabled and appropriate flag is enabled + // If statistics are enabled, close output file and clear stats #if vt_check_enabled(lblite) if (theConfig()->vt_lb_stats) { - theNodeStats()->outputStatsFile(); + theNodeStats()->closeStatsFile(); theNodeStats()->clearStats(); } #endif diff --git a/src/vt/vrt/collection/manager.h b/src/vt/vrt/collection/manager.h index 2872ba411d..430bb00ed9 100644 --- a/src/vt/vrt/collection/manager.h +++ b/src/vt/vrt/collection/manager.h @@ -157,6 +157,7 @@ struct CollectionManager virtual ~CollectionManager(); + void initialize() override; void finalize() override; std::string name() override { return "CollectionManager"; }