From 9d676d9f21fe23d6a25cce258e761026ea9a633d Mon Sep 17 00:00:00 2001 From: Jakub Strzebonski Date: Tue, 8 Sep 2020 14:32:16 +0200 Subject: [PATCH] #1014 dump node stats before trimming --- src/vt/vrt/collection/balance/node_stats.cc | 134 +++++++++++--------- src/vt/vrt/collection/balance/node_stats.h | 9 +- src/vt/vrt/collection/manager.cc | 8 -- 3 files changed, 84 insertions(+), 67 deletions(-) diff --git a/src/vt/vrt/collection/balance/node_stats.cc b/src/vt/vrt/collection/balance/node_stats.cc index 2d007d452f..5e1700f6a4 100644 --- a/src/vt/vrt/collection/balance/node_stats.cc +++ b/src/vt/vrt/collection/balance/node_stats.cc @@ -142,6 +142,11 @@ void NodeStats::startIterCleanup(PhaseType phase, int look_back) { } node_data_[phase] = std::move(new_data); + // Statistics output when LB is enabled and appropriate flag is enabled + if (theConfig()->vt_lb_stats) { + outputStatsForPhase(phase); + } + if (phase - look_back >= 0) { node_data_.erase(phase - look_back); node_subphase_data_.erase(phase - look_back); @@ -170,6 +175,14 @@ void NodeStats::releaseLB() { CollectionManager::releaseLBPhase(msg_hold.get()); } +void NodeStats::initialize() { +#if vt_check_enabled(lblite) + if (theConfig()->vt_lb_stats) { + theNodeStats()->createStatsFile(); + } +#endif +} + void NodeStats::createStatsFile() { auto const node = theContext()->getNode(); auto const base_file = std::string(theConfig()->vt_lb_stats_file); @@ -198,6 +211,17 @@ void NodeStats::createStatsFile() { } stats_file_ = fopen(file_name.c_str(), "w+"); + vtAssertExpr(stats_file_ != nullptr); +} + +void NodeStats::finalize() { + // If statistics are enabled, close output file and clear stats +#if vt_check_enabled(lblite) + if (theConfig()->vt_lb_stats) { + closeStatsFile(); + clearStats(); + } +#endif } void NodeStats::closeStatsFile() { @@ -207,74 +231,70 @@ void NodeStats::closeStatsFile() { } } -void NodeStats::outputStatsFile() { - if (stats_file_ == nullptr) { - createStatsFile(); - } - +void NodeStats::outputStatsForPhase(PhaseType phase) { vtAssertExpr(stats_file_ != nullptr); - auto const num_iters = node_data_.size(); + vt_print(lb, "NodeStats::outputStatsForPhase: file={}, phase={}\n", print_ptr(stats_file_), phase); - vt_print(lb, "NodeStats::outputStatsFile: file={}, iter={}\n", print_ptr(stats_file_), num_iters); + auto i = phase; + for (auto&& elm : node_data_.at(i)) { + ElementIDType id = elm.first; + TimeType time = elm.second; + const auto& subphase_times = node_subphase_data_.at(i)[id]; + size_t subphases = subphase_times.size(); - for (size_t i = 0; i < num_iters; i++) { - for (auto&& elm : node_data_.at(i)) { - ElementIDType id = elm.first; - TimeType time = elm.second; - const auto& subphase_times = node_subphase_data_.at(i)[id]; - size_t subphases = subphase_times.size(); + auto obj_str = fmt::format("{},{},{},{},[", i, id, time, subphases); - auto obj_str = fmt::format("{},{},{},{},[", i, id, time, subphases); - for (size_t s = 0; s < subphases; s++) { - obj_str += std::to_string(subphase_times[s]); - if (s != subphases - 1) - obj_str += ","; + for (size_t s = 0; s < subphases; s++) { + if (s > 0) { + obj_str += ","; } - obj_str += "]\n"; - - fprintf(stats_file_, "%s", obj_str.c_str()); - } - for (auto&& elm : node_comm_.at(i)) { - using E = typename std::underlying_type::type; - - auto const& key = elm.first; - auto const& val = elm.second; - auto const cat = static_cast(key.cat_); - - if ( - key.cat_ == CommCategory::SendRecv or - key.cat_ == CommCategory::Broadcast - ) { - auto const to = key.toObj(); - auto const from = key.fromObj(); - auto obj_str = fmt::format("{},{},{},{},{}\n", i, to, from, val.bytes, cat); - fprintf(stats_file_, "%s", obj_str.c_str()); - } else if ( - key.cat_ == CommCategory::NodeToCollection or - key.cat_ == CommCategory::NodeToCollectionBcast - ) { - auto const to = key.toObj(); - auto const from = key.fromNode(); - auto obj_str = fmt::format("{},{},{},{},{}\n", i, to, from, val.bytes, cat); - fprintf(stats_file_, "%s", obj_str.c_str()); - } else if ( - key.cat_ == CommCategory::CollectionToNode or - key.cat_ == CommCategory::CollectionToNodeBcast - ) { - auto const to = key.toNode(); - auto const from = key.fromObj(); - auto obj_str = fmt::format("{},{},{},{},{}\n", i, to, from, val.bytes, cat); - fprintf(stats_file_, "%s", obj_str.c_str()); - } else { - vtAssert(false, "Invalid balance::CommCategory enum value"); - } + obj_str += std::to_string(subphase_times[s]); } + + obj_str += "]\n"; + + fprintf(stats_file_, "%s", obj_str.c_str()); } + + for (auto&& elm : node_comm_.at(i)) { + using E = typename std::underlying_type::type; + + auto const& comm = elm.first; + auto const recvSend = getRecvSend(comm); + auto const cat = static_cast(comm.cat_); + auto obj_str = fmt::format("{},{},{},{},{}\n", i, recvSend.first, recvSend.second, elm.second.bytes, cat); + fprintf(stats_file_, "%s", obj_str.c_str()); + } + fflush(stats_file_); +} + +std::pair NodeStats::getRecvSend(CommKeyType const& comm) { + if ( + comm.cat_ == CommCategory::SendRecv or + comm.cat_ == CommCategory::Broadcast + ) { + return std::make_pair(comm.toObj(), comm.fromObj()); + } + + if ( + comm.cat_ == CommCategory::NodeToCollection or + comm.cat_ == CommCategory::NodeToCollectionBcast + ) { + return std::make_pair(comm.toObj(), comm.fromNode()); + } + + if ( + comm.cat_ == CommCategory::CollectionToNode or + comm.cat_ == CommCategory::CollectionToNodeBcast + ) { + return std::make_pair(comm.toNode(), comm.fromObj()); + } - closeStatsFile(); + vtAssert(false, "Invalid balance::CommCategory enum value"); + return std::make_pair(ElementIDType{}, ElementIDType{}); } ElementIDType NodeStats::addNodeStats( diff --git a/src/vt/vrt/collection/balance/node_stats.h b/src/vt/vrt/collection/balance/node_stats.h index a4a697871d..4020ebf294 100644 --- a/src/vt/vrt/collection/balance/node_stats.h +++ b/src/vt/vrt/collection/balance/node_stats.h @@ -133,7 +133,7 @@ struct NodeStats : runtime::component::Component { void releaseLB(); /** - * \internal \brief Output stats file based on instrumented data + * \internal \brief Output stats file for given phase based on instrumented data * * The contents of the file consist of a series of records separated * by newlines. Each record consists of comma separated fields. The @@ -161,7 +161,7 @@ struct NodeStats : runtime::component::Component { * recipient and distinguishing point-to-point messages from * broadcasts, as a decimal integer. */ - void outputStatsFile(); + void outputStatsForPhase(PhaseType phase); /** * \internal \brief Generate the next object element ID for LB @@ -237,6 +237,9 @@ struct NodeStats : runtime::component::Component { */ VirtualProxyType getCollectionProxyForElement(ElementIDType temp_id) const; + void initialize() override; + void finalize() override; + private: /** * \internal \brief Create the stats file @@ -248,6 +251,8 @@ struct NodeStats : runtime::component::Component { */ void closeStatsFile(); + static std::pair getRecvSend(CommKeyType const& comm); + private: /// Local proxy to objgroup objgroup::proxy::Proxy proxy_; diff --git a/src/vt/vrt/collection/manager.cc b/src/vt/vrt/collection/manager.cc index e23fb6836e..0d15168387 100644 --- a/src/vt/vrt/collection/manager.cc +++ b/src/vt/vrt/collection/manager.cc @@ -56,14 +56,6 @@ CollectionManager::CollectionManager() { } void CollectionManager::finalize() { cleanupAll<>(); - - // Statistics output when LB is enabled and appropriate flag is enabled -#if vt_check_enabled(lblite) - if (theConfig()->vt_lb_stats) { - theNodeStats()->outputStatsFile(); - theNodeStats()->clearStats(); - } -#endif } /*virtual*/ CollectionManager::~CollectionManager() { }