diff --git a/src/vt/vrt/collection/balance/baselb/baselb.cc b/src/vt/vrt/collection/balance/baselb/baselb.cc index a68f3666ad..a7aa822070 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.cc +++ b/src/vt/vrt/collection/balance/baselb/baselb.cc @@ -48,7 +48,6 @@ #include "vt/config.h" #include "vt/vrt/collection/balance/baselb/baselb.h" #include "vt/vrt/collection/balance/lb_comm.h" -#include "vt/vrt/collection/balance/lb_invoke/start_lb_msg.h" #include "vt/vrt/collection/balance/read_lb.h" #include "vt/vrt/collection/balance/lb_invoke/lb_manager.h" #include "vt/vrt/collection/balance/node_stats.h" diff --git a/src/vt/vrt/collection/balance/baselb/baselb.h b/src/vt/vrt/collection/balance/baselb/baselb.h index 2e4f7ad107..cf3e546ff9 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.h +++ b/src/vt/vrt/collection/balance/baselb/baselb.h @@ -47,7 +47,6 @@ #include "vt/config.h" #include "vt/vrt/collection/balance/lb_common.h" -#include "vt/vrt/collection/balance/lb_invoke/start_lb_msg.h" #include "vt/vrt/collection/balance/baselb/baselb_msgs.h" #include "vt/vrt/collection/balance/stats_msg.h" #include "vt/vrt/collection/balance/lb_comm.h" diff --git a/src/vt/vrt/collection/balance/elm_stats.impl.h b/src/vt/vrt/collection/balance/elm_stats.impl.h index a51fad944a..48b8aafc0c 100644 --- a/src/vt/vrt/collection/balance/elm_stats.impl.h +++ b/src/vt/vrt/collection/balance/elm_stats.impl.h @@ -106,8 +106,6 @@ template before_ready, after_ready, ready ); - using MsgType = InvokeReduceMsg; - auto lb_man = theLBManager()->getProxy(); auto const single_node = theContext()->getNumNodes() == 1; @@ -115,10 +113,10 @@ template bool const must_run_lb = lb != LBType::NoLB and not single_node; auto const num_collections = theCollection()->numCollections<>(); auto const do_sync = msg->doSync(); - auto nmsg = makeMessage(cur_phase,lb,msg->manual(),num_collections); + auto nmsg = makeMessage(cur_phase,lb,msg->manual(),num_collections); if (must_run_lb) { - auto cb = theCB()->makeBcast>(lb_man); + auto cb = theCB()->makeBcast(lb_man); proxy.reduce(nmsg.get(),cb); } else { @@ -129,7 +127,7 @@ template theCollection()->elmFinishedLB(elm_proxy,cur_phase); } - auto cb = theCB()->makeBcast>(lb_man); + auto cb = theCB()->makeBcast(lb_man); proxy.reduce(nmsg.get(),cb); } } diff --git a/src/vt/vrt/collection/balance/greedylb/greedylb.cc b/src/vt/vrt/collection/balance/greedylb/greedylb.cc index 07c0cfbe22..dff057e621 100644 --- a/src/vt/vrt/collection/balance/greedylb/greedylb.cc +++ b/src/vt/vrt/collection/balance/greedylb/greedylb.cc @@ -299,12 +299,6 @@ void GreedyLB::loadOverBin(ObjBinType bin, ObjBinListType& bin_list) { auto const threshold = this_threshold * avg_load; auto const obj_id = bin_list.back(); - if (load_over.find(bin) == load_over.end()) { - load_over_size += sizeof(std::size_t) * 4; - load_over_size += sizeof(ObjBinType); - } - load_over_size += sizeof(ObjIDType); - load_over[bin].push_back(obj_id); bin_list.pop_back(); diff --git a/src/vt/vrt/collection/balance/greedylb/greedylb.h b/src/vt/vrt/collection/balance/greedylb/greedylb.h index 6715ca01cd..f40cd43ef9 100644 --- a/src/vt/vrt/collection/balance/greedylb/greedylb.h +++ b/src/vt/vrt/collection/balance/greedylb/greedylb.h @@ -50,7 +50,6 @@ #include "vt/vrt/collection/balance/greedylb/greedylb_types.h" #include "vt/vrt/collection/balance/greedylb/greedylb_constants.h" #include "vt/vrt/collection/balance/greedylb/greedylb_msgs.h" -#include "vt/vrt/collection/balance/lb_invoke/start_lb_msg.h" #include "vt/vrt/collection/balance/baselb/baselb.h" #include "vt/timing/timing.h" @@ -98,14 +97,12 @@ struct GreedyLB : BaseLB { static objgroup::proxy::Proxy scatter_proxy; private: - double greedy_max_threshold = 0.0f; - double greedy_threshold = 0.0f; - bool greedy_auto_threshold = true; double this_threshold = 0.0f; LoadType this_load_begin = 0.0f; ObjSampleType load_over; - std::size_t load_over_size = 0; objgroup::proxy::Proxy proxy = {}; + + // Parameters read from LB spec file double max_threshold = 0.0f; double min_threshold = 0.0f; bool auto_threshold = true; diff --git a/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc b/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc index 24d6335039..0ef7eb30ea 100644 --- a/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc +++ b/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc @@ -219,15 +219,13 @@ void HierarchicalLB::loadStats() { calcLoadOver(extract_strategy); lbTreeUpSend( - bottom_parent, this_load, this_node, load_over, 1, load_over_size + bottom_parent, this_load, this_node, load_over, 1 ); if (children.size() == 0) { - auto const& total_size = sizeof(std::size_t) * 4; ObjSampleType empty_obj{}; lbTreeUpSend( - parent, hierlb_no_load_sentinel, this_node, empty_obj, agg_node_size, - total_size + parent, hierlb_no_load_sentinel, this_node, empty_obj, agg_node_size ); } } @@ -237,12 +235,6 @@ void HierarchicalLB::loadOverBin(ObjBinType bin, ObjBinListType& bin_list) { auto const threshold = this_threshold * getAvgLoad(); auto const obj_id = bin_list.back(); - if (load_over.find(bin) == load_over.end()) { - load_over_size += sizeof(std::size_t) * 4; - load_over_size += sizeof(ObjBinType); - } - load_over_size += sizeof(ObjIDType); - load_over[bin].push_back(obj_id); bin_list.pop_back(); @@ -391,8 +383,7 @@ std::size_t HierarchicalLB::getSize(ObjSampleType const& sample) { void HierarchicalLB::lbTreeUpSend( NodeType const node, LoadType const child_load, NodeType const child, - ObjSampleType const& load, NodeType const child_size, - std::size_t const& load_size_approx + ObjSampleType const& load, NodeType const child_size ) { auto msg = makeMessage(child_load,child,load,child_size); proxy[node].template send(msg); @@ -683,31 +674,26 @@ void HierarchicalLB::distributeAmoungChildren() { } } - auto const& data_size = clearObj(given_objs); + clearObj(given_objs); lbTreeUpSend( - parent, total_child_load, this_node, given_objs, total_size, data_size + parent, total_child_load, this_node, given_objs, total_size ); given_objs.clear(); } -std::size_t HierarchicalLB::clearObj(ObjSampleType& objs) { - std::size_t total_size = 0; +void HierarchicalLB::clearObj(ObjSampleType& objs) { std::vector to_remove{}; for (auto&& bin : objs) { if (bin.second.size() == 0) { to_remove.push_back(bin.first); } - total_size += bin.second.size() * sizeof(ObjIDType); - total_size += sizeof(ObjBinType); - total_size += sizeof(std::size_t) * 4; } for (auto&& r : to_remove) { auto giter = objs.find(r); vtAssert(giter != objs.end(), "Must exist"); objs.erase(giter); } - return total_size; } void HierarchicalLB::runLB() { diff --git a/src/vt/vrt/collection/balance/hierarchicallb/hierlb.h b/src/vt/vrt/collection/balance/hierarchicallb/hierlb.h index 9975324283..d7c8c20f3f 100644 --- a/src/vt/vrt/collection/balance/hierarchicallb/hierlb.h +++ b/src/vt/vrt/collection/balance/hierarchicallb/hierlb.h @@ -52,7 +52,6 @@ #include "vt/vrt/collection/balance/hierarchicallb/hierlb_msgs.h" #include "vt/vrt/collection/balance/hierarchicallb/hierlb_strat.h" #include "vt/vrt/collection/balance/baselb/baselb.h" -#include "vt/vrt/collection/balance/lb_invoke/start_lb_msg.h" #include "vt/timing/timing.h" #include "vt/objgroup/headers.h" @@ -98,8 +97,7 @@ struct HierarchicalLB : BaseLB { ); void lbTreeUpSend( NodeType const node, LoadType const child_load, NodeType const child, - ObjSampleType const& load, NodeType const child_size, - std::size_t const& load_size_approx + ObjSampleType const& load, NodeType const child_size ); void downTree( NodeType const from, ObjSampleType excess, bool const final_child @@ -111,7 +109,7 @@ struct HierarchicalLB : BaseLB { void sendDownTree(); void distributeAmoungChildren(); - std::size_t clearObj(ObjSampleType& objs); + void clearObj(ObjSampleType& objs); HierLBChild* findMinChild(); void startMigrations(); @@ -130,7 +128,6 @@ struct HierarchicalLB : BaseLB { ChildMapType children; LoadType this_load_begin = 0.0f; ObjSampleType load_over, given_objs, taken_objs; - std::size_t load_over_size = 0; int64_t migrates_expected = 0, transfer_count = 0; TransferType transfers; objgroup::proxy::Proxy proxy = {}; diff --git a/src/vt/vrt/collection/balance/lb_invoke/invoke_msg.h b/src/vt/vrt/collection/balance/lb_invoke/invoke_msg.h index 080c3ec8a4..d7e4904b5a 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/invoke_msg.h +++ b/src/vt/vrt/collection/balance/lb_invoke/invoke_msg.h @@ -52,10 +52,9 @@ namespace vt { namespace vrt { namespace collection { namespace balance { -template -struct InvokeBaseMsg : MsgT { - InvokeBaseMsg() = default; - InvokeBaseMsg( +struct InvokeMsg : collective::ReduceNoneMsg { + InvokeMsg() = default; + InvokeMsg( PhaseType in_phase, LBType in_lb, bool manual, std::size_t in_num_colls = 1 ) : phase_(in_phase), lb_(in_lb), manual_(manual), num_collections_(in_num_colls) @@ -67,9 +66,6 @@ struct InvokeBaseMsg : MsgT { std::size_t num_collections_ = 0; }; -using InvokeMsg = InvokeBaseMsg; -using InvokeReduceMsg = InvokeBaseMsg; - }}}} /* end namespace vt::vrt::collection::balance */ #endif /*INCLUDED_VT_VRT_COLLECTION_BALANCE_LB_INVOKE_INVOKE_MSG_H*/ diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index 440a839f33..890759ad88 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -46,7 +46,6 @@ #include "vt/configs/arguments/app_config.h" #include "vt/context/context.h" #include "vt/vrt/collection/balance/lb_invoke/lb_manager.h" -#include "vt/vrt/collection/balance/lb_invoke/start_lb_msg.h" #include "vt/vrt/collection/balance/read_lb.h" #include "vt/vrt/collection/balance/lb_type.h" #include "vt/vrt/collection/balance/node_stats.h" @@ -144,16 +143,22 @@ void LBManager::setLoadModel(std::shared_ptr model) { } template -void -LBManager::makeLB(MsgSharedPtr msg) { +LBManager::LBProxyType +LBManager::makeLB() { auto proxy = theObjGroup()->makeCollective(); auto strat = proxy.get(); strat->init(proxy); auto base_proxy = proxy.template registerBaseCollective(); - auto phase = msg->getPhase(); destroy_lb_ = [proxy]{ proxy.destroyCollective(); }; + return base_proxy; +} + +void +LBManager::runLB(LBProxyType base_proxy, PhaseType phase) { + lb::BaseLB* strat = base_proxy.get(); + runInEpochCollective([=] { model_->updateLoads(phase); }); @@ -209,16 +214,15 @@ void LBManager::collectiveImpl( ); } - auto msg = makeMessage(phase); switch (lb) { - case LBType::HierarchicalLB: makeLB(msg); break; - case LBType::GreedyLB: makeLB(msg); break; - case LBType::RotateLB: makeLB(msg); break; - case LBType::GossipLB: makeLB(msg); break; - case LBType::StatsMapLB: makeLB(msg); break; - case LBType::RandomLB: makeLB(msg); break; + case LBType::HierarchicalLB: lb_instances_["chosen"] = makeLB(); break; + case LBType::GreedyLB: lb_instances_["chosen"] = makeLB(); break; + case LBType::RotateLB: lb_instances_["chosen"] = makeLB(); break; + case LBType::GossipLB: lb_instances_["chosen"] = makeLB(); break; + case LBType::StatsMapLB: lb_instances_["chosen"] = makeLB(); break; + case LBType::RandomLB: lb_instances_["chosen"] = makeLB(); break; # if vt_check_enabled(zoltan) - case LBType::ZoltanLB: makeLB(msg); break; + case LBType::ZoltanLB: lb_instances_["chosen"] = makeLB(); break; # endif case LBType::NoLB: vtAssert(false, "LBType::NoLB is not a valid LB for collectiveImpl"); @@ -227,6 +231,10 @@ void LBManager::collectiveImpl( vtAssert(false, "A valid LB must be passed to collectiveImpl"); break; } + + LBProxyType base_proxy = lb_instances_["chosen"]; + + runLB(base_proxy, phase); } } @@ -302,6 +310,22 @@ void LBManager::releaseNow(PhaseType phase) { num_invocations_ = num_release_ = 0; } +void LBManager::sysLB(InvokeMsg* msg) { + vt_debug_print(lb, node, "sysLB\n"); + printMemoryUsage(msg->phase_); + flushTraceNextPhase(); + setTraceEnabledNextPhase(msg->phase_); + return collectiveImpl(msg->phase_, msg->lb_, msg->manual_, msg->num_collections_); +} + +void LBManager::sysReleaseLB(InvokeMsg* msg) { + vt_debug_print(lb, node, "sysReleaseLB\n"); + printMemoryUsage(msg->phase_); + flushTraceNextPhase(); + setTraceEnabledNextPhase(msg->phase_); + return releaseImpl(msg->phase_, msg->num_collections_); +} + void LBManager::setTraceEnabledNextPhase(PhaseType phase) { // Set if tracing is enabled for this next phase. Do this immediately before // LB runs so LB is always instrumented as the beginning of the next phase diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h index 2d080e8b95..bb8838909d 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h @@ -48,9 +48,10 @@ #include "vt/config.h" #include "vt/vrt/collection/balance/lb_type.h" #include "vt/vrt/collection/balance/lb_invoke/invoke_msg.h" -#include "vt/vrt/collection/balance/lb_invoke/start_lb_msg.h" +#include "vt/configs/arguments/args.h" #include "vt/runtime/component/component_pack.h" #include "vt/objgroup/proxy/proxy_objgroup.h" +#include "vt/vrt/collection/balance/baselb/baselb.h" #include @@ -69,6 +70,7 @@ class LoadModel; */ struct LBManager : runtime::component::Component { using ListenerFnType = std::function; + using LBProxyType = objgroup::proxy::Proxy; /** * \internal \brief System call to construct a \c LBManager @@ -184,14 +186,7 @@ struct LBManager : runtime::component::Component { * * \param[in] msg the LB message */ - template - void sysLB(MsgT* msg) { - vt_debug_print(lb, node, "sysLB\n"); - printMemoryUsage(msg->phase_); - flushTraceNextPhase(); - setTraceEnabledNextPhase(msg->phase_); - return collectiveImpl(msg->phase_, msg->lb_, msg->manual_, msg->num_collections_); - } + void sysLB(InvokeMsg* msg); /** * \internal \brief Tell the manager that a collection has hit \c nextPhase, @@ -199,14 +194,7 @@ struct LBManager : runtime::component::Component { * * \param[in] msg the LB message */ - template - void sysReleaseLB(MsgT* msg) { - vt_debug_print(lb, node, "sysReleaseLB\n"); - printMemoryUsage(msg->phase_); - flushTraceNextPhase(); - setTraceEnabledNextPhase(msg->phase_); - return releaseImpl(msg->phase_, msg->num_collections_); - } + void sysReleaseLB(InvokeMsg* msg); public: /** @@ -256,12 +244,14 @@ struct LBManager : runtime::component::Component { /** * \internal \brief Collectively construct a new load balancer * - * \param[in] msg the start LB message + * \param[in] LB the type of strategy to instantiate * * \return objgroup proxy to the new load balancer */ template - void makeLB(MsgSharedPtr msg); + LBProxyType makeLB(); + + void runLB(LBProxyType base_proxy, PhaseType phase); private: std::size_t num_invocations_ = 0; @@ -274,6 +264,7 @@ struct LBManager : runtime::component::Component { objgroup::proxy::Proxy proxy_; std::shared_ptr base_model_; std::shared_ptr model_; + std::unordered_map lb_instances_; }; }}}} /* end namespace vt::vrt::collection::balance */ diff --git a/src/vt/vrt/collection/balance/lb_invoke/start_lb_msg.h b/src/vt/vrt/collection/balance/lb_invoke/start_lb_msg.h deleted file mode 100644 index b233f189ba..0000000000 --- a/src/vt/vrt/collection/balance/lb_invoke/start_lb_msg.h +++ /dev/null @@ -1,67 +0,0 @@ -/* -//@HEADER -// ***************************************************************************** -// -// start_lb_msg.h -// DARMA Toolkit v. 1.0.0 -// DARMA/vt => Virtual Transport -// -// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC -// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * Neither the name of the copyright holder nor the names of its -// contributors may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact darma@sandia.gov -// -// ***************************************************************************** -//@HEADER -*/ - -#if !defined INCLUDED_VT_VRT_COLLECTION_BALANCE_LB_INVOKE_START_LB_MSG_H -#define INCLUDED_VT_VRT_COLLECTION_BALANCE_LB_INVOKE_START_LB_MSG_H - -#include "vt/config.h" -#include "vt/messaging/message.h" - -namespace vt { namespace vrt { namespace collection { namespace balance { - -struct StartLBMsg : vt::Message { - StartLBMsg() = default; - explicit StartLBMsg(PhaseType const& phase) - : cur_phase_(phase) - {} - - PhaseType getPhase() const { return cur_phase_; } - -private: - PhaseType cur_phase_ = fst_lb_phase; -}; - -}}}} /* end namespace vt::vrt::collection::balance */ - -#endif /*INCLUDED_VT_VRT_COLLECTION_BALANCE_LB_INVOKE_START_LB_MSG_H*/ diff --git a/src/vt/vrt/collection/balance/rotatelb/rotatelb.h b/src/vt/vrt/collection/balance/rotatelb/rotatelb.h index a6b96f6b54..87bde3c74d 100644 --- a/src/vt/vrt/collection/balance/rotatelb/rotatelb.h +++ b/src/vt/vrt/collection/balance/rotatelb/rotatelb.h @@ -48,7 +48,6 @@ #include "vt/config.h" #include "vt/messaging/message.h" #include "vt/vrt/collection/balance/lb_common.h" -#include "vt/vrt/collection/balance/lb_invoke/start_lb_msg.h" #include "vt/vrt/collection/balance/baselb/baselb.h" #include "vt/timing/timing.h" diff --git a/src/vt/vrt/collection/balance/stats_msg.h b/src/vt/vrt/collection/balance/stats_msg.h index 88d5637a4e..268a1cbbf0 100644 --- a/src/vt/vrt/collection/balance/stats_msg.h +++ b/src/vt/vrt/collection/balance/stats_msg.h @@ -47,7 +47,6 @@ #include "vt/config.h" #include "vt/vrt/collection/balance/lb_common.h" -#include "vt/vrt/collection/balance/lb_invoke/start_lb_msg.h" #include "vt/vrt/collection/messages/user.h" #include "vt/collective/reduce/reduce.h" #include "vt/messaging/message.h" diff --git a/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.h b/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.h index 224aec083a..f7ae7a3eec 100644 --- a/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.h +++ b/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.h @@ -47,7 +47,6 @@ #include "vt/config.h" #include "vt/vrt/collection/balance/lb_common.h" -#include "vt/vrt/collection/balance/lb_invoke/start_lb_msg.h" #include "vt/vrt/collection/balance/baselb/baselb.h" #include "vt/collective/collective_scope.h"