From 2b13fb37efa49312f1768db777341e60098199bc Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Fri, 3 Jan 2020 22:20:22 -0700 Subject: [PATCH 01/63] #582: Introduce load model type and simplest naive persistence implementation --- src/CMakeLists.txt | 1 + .../vrt/collection/balance/model/load_model.h | 73 +++++++++++++++++++ .../balance/model/naive_persistence.cc | 60 +++++++++++++++ .../balance/model/naive_persistence.h | 69 ++++++++++++++++++ 4 files changed, 203 insertions(+) create mode 100644 src/vt/vrt/collection/balance/model/load_model.h create mode 100644 src/vt/vrt/collection/balance/model/naive_persistence.cc create mode 100644 src/vt/vrt/collection/balance/model/naive_persistence.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ec7758ef5d..e3a1236431 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -83,6 +83,7 @@ set( vrt/collection/balance/zoltanlb vrt/collection/balance/randomlb vrt/collection/balance/lb_invoke + vrt/collection/balance/model vrt/collection/balance/proxy lb/instrumentation lb/instrumentation/centralized diff --git a/src/vt/vrt/collection/balance/model/load_model.h b/src/vt/vrt/collection/balance/model/load_model.h new file mode 100644 index 0000000000..4cabef22c2 --- /dev/null +++ b/src/vt/vrt/collection/balance/model/load_model.h @@ -0,0 +1,73 @@ +/* +//@HEADER +// ***************************************************************************** +// +// load_model.h +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#if !defined INCLUDED_VRT_COLLECTION_BALANCE_LOAD_MODEL_H +#define INCLUDED_VRT_COLLECTION_BALANCE_LOAD_MODEL_H + +#include "vt/config.h" +#include "vt/vrt/collection/balance/lb_common.h" +#include "vt/timing/timing_type.h" + +namespace vt { namespace vrt { namespace collection { namespace balance { + +struct PhaseOffset { + unsigned int phases; + static constexpr unsigned int NEXT_PHASE = 0; + + unsigned int subphase; + static constexpr unsigned int WHOLE_PHASE = 0; +}; + +class LoadModel +{ +public: + LoadModel() {} + + virtual TimeType getWork(ElementIDType object, PhaseOffset when) = 0; + +}; // class LoadModel + +}}}} // namespaces + +#endif diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.cc b/src/vt/vrt/collection/balance/model/naive_persistence.cc new file mode 100644 index 0000000000..406f00ed1f --- /dev/null +++ b/src/vt/vrt/collection/balance/model/naive_persistence.cc @@ -0,0 +1,60 @@ +/* +//@HEADER +// ***************************************************************************** +// +// naive_persistence.cpp +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + + +#include "vt/vrt/collection/balance/model/naive_persistence.h" + +namespace vt { namespace vrt { namespace collection { namespace balance { + +NaivePersistence::NaivePersistence(const ElementLoadType &loads) + : loads_(loads) +{ } + +TimeType NaivePersistence::getWork(ElementIDType object, PhaseOffset /*ignored*/) +{ + return loads_.at(object); +} + + +}}}} diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.h b/src/vt/vrt/collection/balance/model/naive_persistence.h new file mode 100644 index 0000000000..177a18fd13 --- /dev/null +++ b/src/vt/vrt/collection/balance/model/naive_persistence.h @@ -0,0 +1,69 @@ +/* +//@HEADER +// ***************************************************************************** +// +// naive_persistence.h +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#if !defined INCLUDED_VRT_COLLECTION_BALANCE_NAIVE_PERSISTENCE_H +#define INCLUDED_VRT_COLLECTION_BALANCE_NAIVE_PERSISTENCE_H + +#include "vt/vrt/collection/balance/model/load_model.h" +#include + +namespace vt { namespace vrt { namespace collection { namespace balance { + +class NaivePersistence : public LoadModel { + using LoadType = double; + using ObjIDType = balance::ElementIDType; + using ElementLoadType = std::unordered_map; + +public: + NaivePersistence(const ElementLoadType &loads); + TimeType getWork(ElementIDType object, PhaseOffset when) override; + +private: + ElementLoadType loads_; + +}; // class NaivePersistence + +}}}} // end namespace + +#endif From 3a10c40b7c94afa9266690b6dfe7d3e644d0e1c7 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Sat, 4 Jan 2020 00:00:55 -0700 Subject: [PATCH 02/63] #582: LB: Remove some unused code and includes --- src/vt/vrt/collection/balance/baselb/baselb.h | 2 +- .../vrt/collection/balance/elm_stats.impl.h | 1 - .../collection/balance/greedylb/greedylb.h | 1 - .../balance/greedylb/greedylb_msgs.h | 2 +- .../balance/greedylb/greedylb_types.h | 1 - .../balance/hierarchicallb/hierlb.h | 2 - .../balance/hierarchicallb/hierlb_msgs.h | 2 +- .../balance/hierarchicallb/hierlb_types.h | 1 - .../collection/balance/rotatelb/rotatelb.h | 1 - src/vt/vrt/collection/balance/stats_msg.h | 39 ------------------- 10 files changed, 3 insertions(+), 49 deletions(-) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.h b/src/vt/vrt/collection/balance/baselb/baselb.h index 573b3400d6..1f7104307a 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.h +++ b/src/vt/vrt/collection/balance/baselb/baselb.h @@ -49,7 +49,7 @@ #include "vt/vrt/collection/balance/lb_common.h" #include "vt/vrt/collection/balance/lb_invoke/start_lb_msg.h" #include "vt/vrt/collection/balance/baselb/baselb_msgs.h" -#include "vt/vrt/collection/balance/proc_stats.h" +#include "vt/vrt/collection/balance/stats_msg.h" #include "vt/vrt/collection/balance/lb_comm.h" #include "vt/vrt/collection/balance/read_lb.h" #include "vt/objgroup/headers.h" diff --git a/src/vt/vrt/collection/balance/elm_stats.impl.h b/src/vt/vrt/collection/balance/elm_stats.impl.h index c13f451872..1635f279ab 100644 --- a/src/vt/vrt/collection/balance/elm_stats.impl.h +++ b/src/vt/vrt/collection/balance/elm_stats.impl.h @@ -49,7 +49,6 @@ #include "vt/vrt/collection/balance/elm_stats.h" #include "vt/vrt/collection/balance/phase_msg.h" #include "vt/vrt/collection/balance/stats_msg.h" -#include "vt/vrt/collection/balance/proc_stats.h" #include "vt/vrt/collection/balance/lb_type.h" #include "vt/vrt/collection/manager.h" #include "vt/vrt/collection/balance/lb_invoke/lb_manager.h" diff --git a/src/vt/vrt/collection/balance/greedylb/greedylb.h b/src/vt/vrt/collection/balance/greedylb/greedylb.h index 11a3d61eb8..6715ca01cd 100644 --- a/src/vt/vrt/collection/balance/greedylb/greedylb.h +++ b/src/vt/vrt/collection/balance/greedylb/greedylb.h @@ -52,7 +52,6 @@ #include "vt/vrt/collection/balance/greedylb/greedylb_msgs.h" #include "vt/vrt/collection/balance/lb_invoke/start_lb_msg.h" #include "vt/vrt/collection/balance/baselb/baselb.h" -#include "vt/vrt/collection/balance/proc_stats.h" #include "vt/timing/timing.h" #include diff --git a/src/vt/vrt/collection/balance/greedylb/greedylb_msgs.h b/src/vt/vrt/collection/balance/greedylb/greedylb_msgs.h index af2a334518..db9675c780 100644 --- a/src/vt/vrt/collection/balance/greedylb/greedylb_msgs.h +++ b/src/vt/vrt/collection/balance/greedylb/greedylb_msgs.h @@ -47,8 +47,8 @@ #include "vt/config.h" #include "vt/vrt/collection/balance/greedylb/greedylb_types.h" -#include "vt/vrt/collection/balance/proc_stats.h" #include "vt/messaging/message.h" +#include "vt/collective/reduce/operators/default_msg.h" #include #include diff --git a/src/vt/vrt/collection/balance/greedylb/greedylb_types.h b/src/vt/vrt/collection/balance/greedylb/greedylb_types.h index 17a85726b5..2b5252100f 100644 --- a/src/vt/vrt/collection/balance/greedylb/greedylb_types.h +++ b/src/vt/vrt/collection/balance/greedylb/greedylb_types.h @@ -47,7 +47,6 @@ #include "vt/config.h" #include "vt/vrt/collection/balance/lb_common.h" -#include "vt/vrt/collection/balance/proc_stats.h" #include #include diff --git a/src/vt/vrt/collection/balance/hierarchicallb/hierlb.h b/src/vt/vrt/collection/balance/hierarchicallb/hierlb.h index 2d12c034b1..9975324283 100644 --- a/src/vt/vrt/collection/balance/hierarchicallb/hierlb.h +++ b/src/vt/vrt/collection/balance/hierarchicallb/hierlb.h @@ -52,7 +52,6 @@ #include "vt/vrt/collection/balance/hierarchicallb/hierlb_msgs.h" #include "vt/vrt/collection/balance/hierarchicallb/hierlb_strat.h" #include "vt/vrt/collection/balance/baselb/baselb.h" -#include "vt/vrt/collection/balance/proc_stats.h" #include "vt/vrt/collection/balance/lb_invoke/start_lb_msg.h" #include "vt/timing/timing.h" #include "vt/objgroup/headers.h" @@ -69,7 +68,6 @@ struct HierarchicalLB : BaseLB { using ChildPtrType = std::unique_ptr; using ChildMapType = std::unordered_map; using ElementLoadType = std::unordered_map; - using ProcStatsMsgType = balance::ProcStatsMsg; using TransferType = std::map>; using LoadType = double; diff --git a/src/vt/vrt/collection/balance/hierarchicallb/hierlb_msgs.h b/src/vt/vrt/collection/balance/hierarchicallb/hierlb_msgs.h index 251c049432..4c3651882c 100644 --- a/src/vt/vrt/collection/balance/hierarchicallb/hierlb_msgs.h +++ b/src/vt/vrt/collection/balance/hierarchicallb/hierlb_msgs.h @@ -48,8 +48,8 @@ #include "vt/config.h" #include "vt/vrt/collection/balance/hierarchicallb/hierlb_constants.h" #include "vt/vrt/collection/balance/hierarchicallb/hierlb_types.h" -#include "vt/vrt/collection/balance/proc_stats.h" #include "vt/messaging/message.h" +#include "vt/collective/reduce/operators/default_msg.h" namespace vt { namespace vrt { namespace collection { namespace lb { diff --git a/src/vt/vrt/collection/balance/hierarchicallb/hierlb_types.h b/src/vt/vrt/collection/balance/hierarchicallb/hierlb_types.h index 56f220eadd..98c9efeb12 100644 --- a/src/vt/vrt/collection/balance/hierarchicallb/hierlb_types.h +++ b/src/vt/vrt/collection/balance/hierarchicallb/hierlb_types.h @@ -47,7 +47,6 @@ #include "vt/config.h" #include "vt/vrt/collection/balance/lb_common.h" -#include "vt/vrt/collection/balance/proc_stats.h" #include #include diff --git a/src/vt/vrt/collection/balance/rotatelb/rotatelb.h b/src/vt/vrt/collection/balance/rotatelb/rotatelb.h index 229257817b..a6b96f6b54 100644 --- a/src/vt/vrt/collection/balance/rotatelb/rotatelb.h +++ b/src/vt/vrt/collection/balance/rotatelb/rotatelb.h @@ -49,7 +49,6 @@ #include "vt/messaging/message.h" #include "vt/vrt/collection/balance/lb_common.h" #include "vt/vrt/collection/balance/lb_invoke/start_lb_msg.h" -#include "vt/vrt/collection/balance/proc_stats.h" #include "vt/vrt/collection/balance/baselb/baselb.h" #include "vt/timing/timing.h" diff --git a/src/vt/vrt/collection/balance/stats_msg.h b/src/vt/vrt/collection/balance/stats_msg.h index 792d1cef83..ed53903285 100644 --- a/src/vt/vrt/collection/balance/stats_msg.h +++ b/src/vt/vrt/collection/balance/stats_msg.h @@ -159,23 +159,6 @@ static_assert( "Must be trivially copyable to avoid serialization." ); -template -struct LoadStatsMsg : NonSerialized< - CollectionMessage, - LoadStatsMsg ->, LoadData -{ - LoadStatsMsg() = default; - LoadStatsMsg(LoadData const& in_load_data, PhaseType const& phase) - : LoadData(in_load_data), cur_phase_(phase) - {} - - PhaseType getPhase() const { return cur_phase_; } - -private: - PhaseType cur_phase_ = fst_lb_phase; -}; - struct ProcStatsMsg : NonSerialized< collective::ReduceTMsg, ProcStatsMsg @@ -199,28 +182,6 @@ struct ProcStatsMsg : NonSerialized< lb::Statistic stat_ = lb::Statistic::P_l; }; -template -struct StatsMsg : collective::ReduceTMsg { - using MessageParentType = collective::ReduceTMsg; - vt_msg_serialize_prohibited(); - - using ProxyType = typename ColT::CollectionProxyType; - - StatsMsg() = default; - StatsMsg( - PhaseType const& in_cur_phase, TimeType const& in_total_load, - ProxyType const& in_proxy - ) : ReduceTMsg({in_total_load}), - proxy_(in_proxy), cur_phase_(in_cur_phase) - { } - - ProxyType getProxy() const { return proxy_; } - PhaseType getPhase() const { return cur_phase_; } -private: - ProxyType proxy_ = {}; - PhaseType cur_phase_ = fst_lb_phase; -}; - }}}} /* end namespace vt::vrt::collection::balance */ #endif /*INCLUDED_VRT_COLLECTION_BALANCE_STATS_MSG_H*/ From 208a1f8146e00429a290157246cd89b804c13e01 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Sat, 4 Jan 2020 00:17:07 -0700 Subject: [PATCH 03/63] #582: Introduce model indirection for process stats, greedylb, and hierarchicallb --- src/vt/vrt/collection/balance/baselb/baselb.cc | 10 +++++++--- src/vt/vrt/collection/balance/baselb/baselb.h | 4 +++- src/vt/vrt/collection/balance/gossiplb/gossiplb.cc | 2 +- src/vt/vrt/collection/balance/greedylb/greedylb.cc | 4 +--- src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc | 4 +--- .../vrt/collection/balance/model/naive_persistence.cc | 4 ++-- .../vrt/collection/balance/model/naive_persistence.h | 4 ++-- src/vt/vrt/collection/balance/rotatelb/rotatelb.cc | 2 +- 8 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.cc b/src/vt/vrt/collection/balance/baselb/baselb.cc index bc480fd108..52dc3e813f 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.cc +++ b/src/vt/vrt/collection/balance/baselb/baselb.cc @@ -55,6 +55,7 @@ #include "vt/collective/reduce/reduce.h" #include "vt/collective/collective_alg.h" #include "vt/vrt/collection/balance/lb_common.h" +#include "vt/vrt/collection/balance/model/naive_persistence.h" #include @@ -116,7 +117,7 @@ void BaseLB::importProcessorData( ); } - load_data = &load_in; + load_data_ = &load_in; comm_data = &comm_in; } @@ -124,6 +125,9 @@ void BaseLB::getArgs(PhaseType phase) { using ArgType = vt::arguments::ArgConfig; using namespace balance; + if (load_model_ == nullptr) + load_model_.reset(new balance::NaivePersistence(load_data_)); + bool has_spec = ReadLBSpec::hasSpec(); if (has_spec) { auto spec = ReadLBSpec::entry(phase); @@ -354,8 +358,8 @@ void BaseLB::computeStatisticsOver(Statistic stat) { case Statistic::O_l: { // Perform the reduction for O_l -> object load only std::vector lds; - for (auto&& elm : *load_data) { - lds.emplace_back(balance::LoadData(elm.second)); + for (auto&& elm : *load_data_) { + lds.emplace_back(load_model_->getWork(elm.second, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE})); } auto msg = makeMessage(Statistic::O_l, reduceVec(std::move(lds))); proxy_.template reduce(msg,cb); diff --git a/src/vt/vrt/collection/balance/baselb/baselb.h b/src/vt/vrt/collection/balance/baselb/baselb.h index 1f7104307a..3221b61802 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.h +++ b/src/vt/vrt/collection/balance/baselb/baselb.h @@ -53,6 +53,7 @@ #include "vt/vrt/collection/balance/lb_comm.h" #include "vt/vrt/collection/balance/read_lb.h" #include "vt/objgroup/headers.h" +#include "vt/vrt/collection/balance/model/load_model.h" #include #include @@ -140,7 +141,7 @@ struct BaseLB { int32_t bin_size_ = 10; ObjSampleType obj_sample = {}; LoadType this_load = 0.0f; - ElementLoadType const* load_data = nullptr; + ElementLoadType const* load_data_ = nullptr; ElementCommType const* comm_data = nullptr; StatisticMapType stats = {}; objgroup::proxy::Proxy proxy_ = {}; @@ -148,6 +149,7 @@ struct BaseLB { bool comm_aware_ = false; bool comm_collectives_ = false; std::unique_ptr spec_entry_ = nullptr; + std::unique_ptr load_model_ = nullptr; private: TransferVecType transfers_ = {}; diff --git a/src/vt/vrt/collection/balance/gossiplb/gossiplb.cc b/src/vt/vrt/collection/balance/gossiplb/gossiplb.cc index 0461b1e9f7..3ac7495bd1 100644 --- a/src/vt/vrt/collection/balance/gossiplb/gossiplb.cc +++ b/src/vt/vrt/collection/balance/gossiplb/gossiplb.cc @@ -122,7 +122,7 @@ void GossipLB::doLBStages() { if (first_iter) { // Copy this node's object assignments to a local, mutable copy - cur_objs_ = *load_data; + cur_objs_ = *load_data_; this_new_load_ = this_load; } else { // Clear out data structures from previous iteration diff --git a/src/vt/vrt/collection/balance/greedylb/greedylb.cc b/src/vt/vrt/collection/balance/greedylb/greedylb.cc index 5eb632ee94..07c0cfbe22 100644 --- a/src/vt/vrt/collection/balance/greedylb/greedylb.cc +++ b/src/vt/vrt/collection/balance/greedylb/greedylb.cc @@ -308,9 +308,7 @@ void GreedyLB::loadOverBin(ObjBinType bin, ObjBinListType& bin_list) { load_over[bin].push_back(obj_id); bin_list.pop_back(); - auto obj_iter = load_data->find(obj_id); - vtAssert(obj_iter != load_data->end(), "Obj must exist in stats"); - auto const& obj_time_milli = loadMilli(obj_iter->second); + auto const& obj_time_milli = loadMilli(load_model_->getWork(obj_id, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE})); this_load -= obj_time_milli; diff --git a/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc b/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc index ee76ae2a6f..24d6335039 100644 --- a/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc +++ b/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc @@ -246,9 +246,7 @@ void HierarchicalLB::loadOverBin(ObjBinType bin, ObjBinListType& bin_list) { load_over[bin].push_back(obj_id); bin_list.pop_back(); - auto obj_iter = load_data->find(obj_id); - vtAssert(obj_iter != load_data->end(), "Obj must exist in stats"); - auto const& obj_time_milli = loadMilli(obj_iter->second); + auto const& obj_time_milli = loadMilli(load_model_->getWork(obj_id, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE})); this_load -= obj_time_milli; diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.cc b/src/vt/vrt/collection/balance/model/naive_persistence.cc index 406f00ed1f..9a2787a480 100644 --- a/src/vt/vrt/collection/balance/model/naive_persistence.cc +++ b/src/vt/vrt/collection/balance/model/naive_persistence.cc @@ -47,13 +47,13 @@ namespace vt { namespace vrt { namespace collection { namespace balance { -NaivePersistence::NaivePersistence(const ElementLoadType &loads) +NaivePersistence::NaivePersistence(const ElementLoadType *loads) : loads_(loads) { } TimeType NaivePersistence::getWork(ElementIDType object, PhaseOffset /*ignored*/) { - return loads_.at(object); + return loads_->at(object); } diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.h b/src/vt/vrt/collection/balance/model/naive_persistence.h index 177a18fd13..fd018189ba 100644 --- a/src/vt/vrt/collection/balance/model/naive_persistence.h +++ b/src/vt/vrt/collection/balance/model/naive_persistence.h @@ -56,11 +56,11 @@ class NaivePersistence : public LoadModel { using ElementLoadType = std::unordered_map; public: - NaivePersistence(const ElementLoadType &loads); + NaivePersistence(const ElementLoadType *loads); TimeType getWork(ElementIDType object, PhaseOffset when) override; private: - ElementLoadType loads_; + const ElementLoadType* loads_; }; // class NaivePersistence diff --git a/src/vt/vrt/collection/balance/rotatelb/rotatelb.cc b/src/vt/vrt/collection/balance/rotatelb/rotatelb.cc index 8c85040e6c..de8b2a55d4 100644 --- a/src/vt/vrt/collection/balance/rotatelb/rotatelb.cc +++ b/src/vt/vrt/collection/balance/rotatelb/rotatelb.cc @@ -70,7 +70,7 @@ void RotateLB::runLB() { fflush(stdout); } - for (auto&& stat : *load_data) { + for (auto&& stat : *load_data_) { auto const& obj = stat.first; auto const& load = stat.second; vt_debug_print( From 7166b25d9cb4b53e89ccf0e449aec82278248a05 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Sat, 4 Jan 2020 09:28:22 -0700 Subject: [PATCH 04/63] #582: Give the model access to the comm_data --- src/vt/vrt/collection/balance/baselb/baselb.cc | 2 +- src/vt/vrt/collection/balance/model/naive_persistence.cc | 4 ++-- src/vt/vrt/collection/balance/model/naive_persistence.h | 6 ++++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.cc b/src/vt/vrt/collection/balance/baselb/baselb.cc index 52dc3e813f..e3bef6bdbe 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.cc +++ b/src/vt/vrt/collection/balance/baselb/baselb.cc @@ -126,7 +126,7 @@ void BaseLB::getArgs(PhaseType phase) { using namespace balance; if (load_model_ == nullptr) - load_model_.reset(new balance::NaivePersistence(load_data_)); + load_model_.reset(new balance::NaivePersistence(load_data_, comm_data)); bool has_spec = ReadLBSpec::hasSpec(); if (has_spec) { diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.cc b/src/vt/vrt/collection/balance/model/naive_persistence.cc index 9a2787a480..95f6e1221f 100644 --- a/src/vt/vrt/collection/balance/model/naive_persistence.cc +++ b/src/vt/vrt/collection/balance/model/naive_persistence.cc @@ -47,8 +47,8 @@ namespace vt { namespace vrt { namespace collection { namespace balance { -NaivePersistence::NaivePersistence(const ElementLoadType *loads) - : loads_(loads) +NaivePersistence::NaivePersistence(const ElementLoadType *loads, const ElementCommType *comms) + : loads_(loads), comms_(comms) { } TimeType NaivePersistence::getWork(ElementIDType object, PhaseOffset /*ignored*/) diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.h b/src/vt/vrt/collection/balance/model/naive_persistence.h index fd018189ba..3be1da6811 100644 --- a/src/vt/vrt/collection/balance/model/naive_persistence.h +++ b/src/vt/vrt/collection/balance/model/naive_persistence.h @@ -46,6 +46,7 @@ #define INCLUDED_VRT_COLLECTION_BALANCE_NAIVE_PERSISTENCE_H #include "vt/vrt/collection/balance/model/load_model.h" +#include "vt/vrt/collection/balance/lb_comm.h" #include namespace vt { namespace vrt { namespace collection { namespace balance { @@ -54,14 +55,15 @@ class NaivePersistence : public LoadModel { using LoadType = double; using ObjIDType = balance::ElementIDType; using ElementLoadType = std::unordered_map; + using ElementCommType = balance::CommMapType; public: - NaivePersistence(const ElementLoadType *loads); + NaivePersistence(const ElementLoadType *loads, const ElementCommType *comms); TimeType getWork(ElementIDType object, PhaseOffset when) override; private: const ElementLoadType* loads_; - + const ElementCommType* comms_; }; // class NaivePersistence }}}} // end namespace From f62c3c0f6d89546f4c93d61f6ead940143c42e81 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Sat, 4 Jan 2020 09:44:06 -0700 Subject: [PATCH 05/63] #582: Record message counts along with total bytes in LB communication data --- .../vrt/collection/balance/baselb/baselb.cc | 6 ++--- src/vt/vrt/collection/balance/elm_stats.cc | 6 ++--- src/vt/vrt/collection/balance/lb_comm.h | 23 ++++++++++++++++++- src/vt/vrt/collection/balance/proc_stats.cc | 6 ++--- 4 files changed, 31 insertions(+), 10 deletions(-) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.cc b/src/vt/vrt/collection/balance/baselb/baselb.cc index e3bef6bdbe..689952c6bc 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.cc +++ b/src/vt/vrt/collection/balance/baselb/baselb.cc @@ -335,8 +335,8 @@ void BaseLB::computeStatisticsOver(Statistic stat) { if (elm.first.onNode() or elm.first.selfEdge()) { continue; } - //vt_print(lb, "comm_load={}, elm={}\n", comm_load, elm.second); - comm_load += elm.second; + //vt_print(lb, "comm_load={}, elm={}\n", comm_load, elm.second.bytes); + comm_load += elm.second.bytes; } auto msg = makeMessage(Statistic::P_c, comm_load); proxy_.template reduce(msg,cb); @@ -348,7 +348,7 @@ void BaseLB::computeStatisticsOver(Statistic stat) { for (auto&& elm : *comm_data) { // Only count object-to-object direct edges in the O_c statistics if (elm.first.cat_ == balance::CommCategory::SendRecv and not elm.first.selfEdge()) { - lds.emplace_back(balance::LoadData(elm.second)); + lds.emplace_back(balance::LoadData(elm.second.bytes)); } } auto msg = makeMessage(Statistic::O_c, reduceVec(std::move(lds))); diff --git a/src/vt/vrt/collection/balance/elm_stats.cc b/src/vt/vrt/collection/balance/elm_stats.cc index e8ace93655..686e8afb27 100644 --- a/src/vt/vrt/collection/balance/elm_stats.cc +++ b/src/vt/vrt/collection/balance/elm_stats.cc @@ -85,7 +85,7 @@ void ElementStats::recvObjData( ) { comm_.resize(cur_phase_ + 1); LBCommKey key(LBCommKey::CollectionTag{}, pfrom, tfrom, pto, tto, bcast); - comm_.at(cur_phase_)[key] += bytes; + comm_.at(cur_phase_)[key].receiveMsg(bytes); } void ElementStats::recvFromNode( @@ -94,7 +94,7 @@ void ElementStats::recvFromNode( ) { comm_.resize(cur_phase_ + 1); LBCommKey key(LBCommKey::NodeToCollectionTag{}, from, pto, tto, bcast); - comm_.at(cur_phase_)[key] += bytes; + comm_.at(cur_phase_)[key].receiveMsg(bytes); } void ElementStats::recvToNode( @@ -103,7 +103,7 @@ void ElementStats::recvToNode( ) { comm_.resize(cur_phase_ + 1); LBCommKey key(LBCommKey::CollectionToNodeTag{}, pfrom, tfrom, to, bcast); - comm_.at(cur_phase_)[key] += bytes; + comm_.at(cur_phase_)[key].receiveMsg(bytes); } void ElementStats::setModelWeight(TimeType const& time) { diff --git a/src/vt/vrt/collection/balance/lb_comm.h b/src/vt/vrt/collection/balance/lb_comm.h index 255822bb44..9ca795a7d9 100644 --- a/src/vt/vrt/collection/balance/lb_comm.h +++ b/src/vt/vrt/collection/balance/lb_comm.h @@ -146,7 +146,28 @@ struct LBCommKey { // Set the types for the communication graph using CommKeyType = LBCommKey; using CommBytesType = double; -using CommMapType = std::unordered_map; + +struct CommVolume { + CommBytesType bytes = 0.0; + uint64_t messages = 0; + + void receiveMsg(double b) { + messages++; + bytes += b; + } + + void operator+=(const CommVolume &rhs) { + bytes += rhs.bytes; + messages += rhs.messages; + } + + template + void serialize(SerializerT &s) { + s | bytes | messages; + } +}; + +using CommMapType = std::unordered_map; }}}} /* end namespace vt::vrt::collection::balance */ diff --git a/src/vt/vrt/collection/balance/proc_stats.cc b/src/vt/vrt/collection/balance/proc_stats.cc index 58896551b2..56d6ef545d 100644 --- a/src/vt/vrt/collection/balance/proc_stats.cc +++ b/src/vt/vrt/collection/balance/proc_stats.cc @@ -243,7 +243,7 @@ void ProcStats::outputStatsFile() { ) { auto const to = key.toObj(); auto const from = key.fromObj(); - auto obj_str = fmt::format("{},{},{},{},{}\n", i, to, from, val, cat); + auto obj_str = fmt::format("{},{},{},{},{}\n", i, to, from, val.bytes, cat); fprintf(stats_file_, "%s", obj_str.c_str()); } else if ( key.cat_ == CommCategory::NodeToCollection or @@ -251,7 +251,7 @@ void ProcStats::outputStatsFile() { ) { auto const to = key.toObj(); auto const from = key.fromNode(); - auto obj_str = fmt::format("{},{},{},{},{}\n", i, to, from, val, cat); + auto obj_str = fmt::format("{},{},{},{},{}\n", i, to, from, val.bytes, cat); fprintf(stats_file_, "%s", obj_str.c_str()); } else if ( key.cat_ == CommCategory::CollectionToNode or @@ -259,7 +259,7 @@ void ProcStats::outputStatsFile() { ) { auto const to = key.toNode(); auto const from = key.fromObj(); - auto obj_str = fmt::format("{},{},{},{},{}\n", i, to, from, val, cat); + auto obj_str = fmt::format("{},{},{},{},{}\n", i, to, from, val.bytes, cat); fprintf(stats_file_, "%s", obj_str.c_str()); } else { vtAssert(false, "Invalid balance::CommCategory enum value"); From a074ad35a1d260c4a7967755725beb1f45fe8009 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Sat, 4 Jan 2020 09:56:34 -0700 Subject: [PATCH 06/63] #582: Hack the NaivePersistence model to account some overhead for received messages --- .../collection/balance/model/naive_persistence.cc | 13 ++++++++++--- .../collection/balance/model/naive_persistence.h | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.cc b/src/vt/vrt/collection/balance/model/naive_persistence.cc index 95f6e1221f..361b4a0b98 100644 --- a/src/vt/vrt/collection/balance/model/naive_persistence.cc +++ b/src/vt/vrt/collection/balance/model/naive_persistence.cc @@ -48,12 +48,19 @@ namespace vt { namespace vrt { namespace collection { namespace balance { NaivePersistence::NaivePersistence(const ElementLoadType *loads, const ElementCommType *comms) - : loads_(loads), comms_(comms) -{ } + : loads_(*loads), comms_(comms) +{ + // Add a bit of overhead for each off-node received message per object + for (auto &&comm : *comms_) { + auto obj = loads_.find(comm.first.toObj()); + if (obj != loads_.end()) + obj->second += 0.001 * comm.second.messages; + } +} TimeType NaivePersistence::getWork(ElementIDType object, PhaseOffset /*ignored*/) { - return loads_->at(object); + return loads_.at(object); } diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.h b/src/vt/vrt/collection/balance/model/naive_persistence.h index 3be1da6811..12faff98eb 100644 --- a/src/vt/vrt/collection/balance/model/naive_persistence.h +++ b/src/vt/vrt/collection/balance/model/naive_persistence.h @@ -62,7 +62,7 @@ class NaivePersistence : public LoadModel { TimeType getWork(ElementIDType object, PhaseOffset when) override; private: - const ElementLoadType* loads_; + ElementLoadType loads_; const ElementCommType* comms_; }; // class NaivePersistence From 4c681ef8b4b6f61a438852b2a65999919d58983b Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Sat, 4 Jan 2020 12:02:19 -0700 Subject: [PATCH 07/63] #582: LB Model: Fix some bugs, now passes EMPIRE LB tests --- src/vt/vrt/collection/balance/baselb/baselb.cc | 8 ++++---- src/vt/vrt/collection/balance/model/naive_persistence.cc | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.cc b/src/vt/vrt/collection/balance/baselb/baselb.cc index 689952c6bc..3e4775faa7 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.cc +++ b/src/vt/vrt/collection/balance/baselb/baselb.cc @@ -119,15 +119,15 @@ void BaseLB::importProcessorData( load_data_ = &load_in; comm_data = &comm_in; + + if (load_model_ == nullptr) + load_model_.reset(new balance::NaivePersistence(load_data_, comm_data)); } void BaseLB::getArgs(PhaseType phase) { using ArgType = vt::arguments::ArgConfig; using namespace balance; - if (load_model_ == nullptr) - load_model_.reset(new balance::NaivePersistence(load_data_, comm_data)); - bool has_spec = ReadLBSpec::hasSpec(); if (has_spec) { auto spec = ReadLBSpec::entry(phase); @@ -359,7 +359,7 @@ void BaseLB::computeStatisticsOver(Statistic stat) { // Perform the reduction for O_l -> object load only std::vector lds; for (auto&& elm : *load_data_) { - lds.emplace_back(load_model_->getWork(elm.second, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE})); + lds.emplace_back(load_model_->getWork(elm.first, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE})); } auto msg = makeMessage(Statistic::O_l, reduceVec(std::move(lds))); proxy_.template reduce(msg,cb); diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.cc b/src/vt/vrt/collection/balance/model/naive_persistence.cc index 361b4a0b98..edc99a6feb 100644 --- a/src/vt/vrt/collection/balance/model/naive_persistence.cc +++ b/src/vt/vrt/collection/balance/model/naive_persistence.cc @@ -50,6 +50,9 @@ namespace vt { namespace vrt { namespace collection { namespace balance { NaivePersistence::NaivePersistence(const ElementLoadType *loads, const ElementCommType *comms) : loads_(*loads), comms_(comms) { + if (!comms) vtAbort("Need comms"); + if (!loads) vtAbort("Need loads"); + // Add a bit of overhead for each off-node received message per object for (auto &&comm : *comms_) { auto obj = loads_.find(comm.first.toObj()); From 10001bdd7486f5cd81c016b3538762f159aa2070 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Fri, 26 Jun 2020 15:32:07 -0400 Subject: [PATCH 08/63] #582: Fix build issues post-rebase --- src/vt/vrt/collection/balance/baselb/baselb.h | 1 + src/vt/vrt/collection/balance/randomlb/randomlb.cc | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.h b/src/vt/vrt/collection/balance/baselb/baselb.h index 3221b61802..f139a9f23a 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.h +++ b/src/vt/vrt/collection/balance/baselb/baselb.h @@ -49,6 +49,7 @@ #include "vt/vrt/collection/balance/lb_common.h" #include "vt/vrt/collection/balance/lb_invoke/start_lb_msg.h" #include "vt/vrt/collection/balance/baselb/baselb_msgs.h" +#include "vt/vrt/collection/balance/proc_stats.h" #include "vt/vrt/collection/balance/stats_msg.h" #include "vt/vrt/collection/balance/lb_comm.h" #include "vt/vrt/collection/balance/read_lb.h" diff --git a/src/vt/vrt/collection/balance/randomlb/randomlb.cc b/src/vt/vrt/collection/balance/randomlb/randomlb.cc index 509d47a52e..b2ec8c32b5 100644 --- a/src/vt/vrt/collection/balance/randomlb/randomlb.cc +++ b/src/vt/vrt/collection/balance/randomlb/randomlb.cc @@ -85,7 +85,7 @@ void RandomLB::runLB() { // Sort the objects so we have a deterministic order over them std::set objs; - for (auto&& stat : *load_data) { + for (auto&& stat : *load_data_) { objs.insert(stat.first); } From 4f420307b487855c1e6a3fdec639dfad56a3848f Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Fri, 26 Jun 2020 16:34:59 -0400 Subject: [PATCH 09/63] #582: Give LBManager ownership of the load model selection --- src/vt/vrt/collection/balance/baselb/baselb.cc | 6 ++---- src/vt/vrt/collection/balance/baselb/baselb.h | 5 +++-- src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc | 6 +++++- src/vt/vrt/collection/balance/lb_invoke/lb_manager.h | 9 +++++++++ 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.cc b/src/vt/vrt/collection/balance/baselb/baselb.cc index 3e4775faa7..564425f9e8 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.cc +++ b/src/vt/vrt/collection/balance/baselb/baselb.cc @@ -55,7 +55,6 @@ #include "vt/collective/reduce/reduce.h" #include "vt/collective/collective_alg.h" #include "vt/vrt/collection/balance/lb_common.h" -#include "vt/vrt/collection/balance/model/naive_persistence.h" #include @@ -64,12 +63,14 @@ namespace vt { namespace vrt { namespace collection { namespace lb { void BaseLB::startLB( PhaseType phase, objgroup::proxy::Proxy proxy, + balance::LoadModel* model, balance::ProcStats::LoadMapType const& in_load_stats, ElementCommType const& in_comm_stats ) { start_time_ = timing::Timing::getCurrentTime(); phase_ = phase; proxy_ = proxy; + load_model_ = model; importProcessorData(in_load_stats, in_comm_stats); @@ -119,9 +120,6 @@ void BaseLB::importProcessorData( load_data_ = &load_in; comm_data = &comm_in; - - if (load_model_ == nullptr) - load_model_.reset(new balance::NaivePersistence(load_data_, comm_data)); } void BaseLB::getArgs(PhaseType phase) { diff --git a/src/vt/vrt/collection/balance/baselb/baselb.h b/src/vt/vrt/collection/balance/baselb/baselb.h index f139a9f23a..1a8f7ea5c8 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.h +++ b/src/vt/vrt/collection/balance/baselb/baselb.h @@ -108,7 +108,7 @@ struct BaseLB { * through calls to `migrateObjectTo`. Callers can then access that * set using `getTransfers` and apply it using `applyMigrations`. */ - void startLB(PhaseType phase, objgroup::proxy::Proxy proxy, balance::ProcStats::LoadMapType const& in_load_stats, ElementCommType const& in_comm_stats); + void startLB(PhaseType phase, objgroup::proxy::Proxy proxy, balance::LoadModel *model, balance::ProcStats::LoadMapType const& in_load_stats, ElementCommType const& in_comm_stats); void computeStatistics(); void importProcessorData(ElementLoadType const& ld, ElementCommType const& cm); void statsHandler(StatsMsgType* msg); @@ -150,7 +150,8 @@ struct BaseLB { bool comm_aware_ = false; bool comm_collectives_ = false; std::unique_ptr spec_entry_ = nullptr; - std::unique_ptr load_model_ = nullptr; + // Observer only - LBManager owns the instance + balance::LoadModel* load_model_ = nullptr; private: TransferVecType transfers_ = {}; diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index 117ad4e5c1..219d4ed7a6 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -59,6 +59,7 @@ #include "vt/vrt/collection/messages/system_create.h" #include "vt/vrt/collection/manager.fwd.h" #include "vt/utils/memory/memory_usage.h" +#include "vt/vrt/collection/balance/model/naive_persistence.h" namespace vt { namespace vrt { namespace collection { namespace balance { @@ -131,9 +132,12 @@ LBManager::makeLB(MsgSharedPtr msg) { auto base_proxy = proxy.template registerBaseCollective(); auto phase = msg->getPhase(); + if (model_ == nullptr) + model_.reset(new balance::NaivePersistence(&theProcStats()->getProcLoad(phase), &theProcStats()->getProcComm(phase))); + EpochType balance_epoch = theTerm()->makeEpochCollective("LBManager::balance_epoch"); theMsg()->pushEpoch(balance_epoch); - strat->startLB(phase, base_proxy, theProcStats()->getProcLoad(phase), theProcStats()->getProcComm(phase)); + strat->startLB(phase, base_proxy, model_.get(), theProcStats()->getProcLoad(phase), theProcStats()->getProcComm(phase)); theMsg()->popEpoch(balance_epoch); theTerm()->finishedEpoch(balance_epoch); diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h index 88aa7a876d..1c323afcea 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h @@ -49,6 +49,7 @@ #include "vt/vrt/collection/balance/lb_type.h" #include "vt/vrt/collection/balance/lb_invoke/invoke_msg.h" #include "vt/vrt/collection/balance/lb_invoke/start_lb_msg.h" +#include "vt/vrt/collection/balance/model/load_model.h" #include "vt/configs/arguments/args.h" #include "vt/runtime/component/component_pack.h" #include "vt/objgroup/proxy/proxy_objgroup.h" @@ -232,6 +233,13 @@ struct LBManager : runtime::component::Component { */ void triggerListeners(PhaseType phase); + /** + * \brief Set a model of expected object loads to use in place of + * naive persistence + * + */ + void setLoadModel(std::unique_ptr model) { model_ = std::move(model); } + protected: /** * \internal \brief Collectively construct a new load balancer @@ -252,6 +260,7 @@ struct LBManager : runtime::component::Component { bool synced_in_lb_ = true; std::vector listeners_ = {}; objgroup::proxy::Proxy proxy_; + std::unique_ptr model_ = nullptr; }; }}}} /* end namespace vt::vrt::collection::balance */ From eb70a6b555e68acb13833bce7b5612d029246625 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Fri, 26 Jun 2020 16:58:37 -0400 Subject: [PATCH 10/63] #582: Add subphase l-norm load model --- src/vt/vrt/collection/balance/model/norm.cc | 81 +++++++++++++++++++++ src/vt/vrt/collection/balance/model/norm.h | 81 +++++++++++++++++++++ 2 files changed, 162 insertions(+) create mode 100644 src/vt/vrt/collection/balance/model/norm.cc create mode 100644 src/vt/vrt/collection/balance/model/norm.h diff --git a/src/vt/vrt/collection/balance/model/norm.cc b/src/vt/vrt/collection/balance/model/norm.cc new file mode 100644 index 0000000000..c713b41456 --- /dev/null +++ b/src/vt/vrt/collection/balance/model/norm.cc @@ -0,0 +1,81 @@ +/* +//@HEADER +// ***************************************************************************** +// +// norm.cc +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + + +#include "vt/vrt/collection/balance/model/norm.h" +#include + +namespace vt { namespace vrt { namespace collection { namespace balance { + +Norm::Norm(const LoadMapType *loads, double power) + : loads_(*loads), power_(power) +{ + if (!loads) vtAbort("Need loads"); + + vtAssert(not std::isnan(power), "Power must have a real value"); + vtAssert(power >= 0.0, "Reciprocal loads make no sense"); +} + +TimeType Norm::getWork(ElementIDType object, PhaseOffset /*ignored*/) +{ + auto const& subphase_loads = loads_.at(object); + + if (std::isfinite(power_)) { + double sum = 0.0; + for (auto t : subphase_loads) + sum += std::pow(t, power_); + + return std::pow(sum, 1.0/power_); + } else { + // l-infinity implies a max norm + double max = 0.0; + for (auto t : subphase_loads) + max = std::max(max, t); + + return max; + } +} + + +}}}} diff --git a/src/vt/vrt/collection/balance/model/norm.h b/src/vt/vrt/collection/balance/model/norm.h new file mode 100644 index 0000000000..ccb7ef2a32 --- /dev/null +++ b/src/vt/vrt/collection/balance/model/norm.h @@ -0,0 +1,81 @@ +/* +//@HEADER +// ***************************************************************************** +// +// norm.h +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#if !defined INCLUDED_VRT_COLLECTION_BALANCE_NAIVE_PERSISTENCE_H +#define INCLUDED_VRT_COLLECTION_BALANCE_NAIVE_PERSISTENCE_H + +#include "vt/vrt/collection/balance/model/load_model.h" +#include "vt/vrt/collection/balance/lb_comm.h" +#include "vt/vrt/collection/balance/proc_stats.h" +#include + +namespace vt { namespace vrt { namespace collection { namespace balance { + +/** + * \brief A load model that computes an l-norm of a given power across + * subphases + */ +class Norm : public LoadModel { + using LoadType = double; + using ObjIDType = balance::ElementIDType; + using LoadMapType = ProcStats::SubphaseLoadMapType; + +public: + /** + * \brief Constructor + * + * \param[in] loads The collection of subphase load data from ProcStats + * \param[in] power The power to use in computing the norms + */ + Norm(const LoadMapType *loads, double power); + TimeType getWork(ElementIDType object, PhaseOffset when) override; + +private: + const LoadMapType loads_; + const double power_; +}; // class Norm + +}}}} // end namespace + +#endif From 3821d73740764154617e44e5df1803fa973cdebd Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 6 Jul 2020 12:22:54 -0400 Subject: [PATCH 11/63] #582: Overhaul API between LBManager, ProcStats, and load models --- .../vrt/collection/balance/baselb/baselb.cc | 2 +- src/vt/vrt/collection/balance/baselb/baselb.h | 2 +- src/vt/vrt/collection/balance/lb_common.h | 3 ++ .../balance/lb_invoke/lb_manager.cc | 13 +++++- .../collection/balance/lb_invoke/lb_manager.h | 2 +- .../vrt/collection/balance/model/load_model.h | 40 ++++++++++++++++++- .../balance/model/naive_persistence.cc | 12 +++--- .../balance/model/naive_persistence.h | 14 +------ src/vt/vrt/collection/balance/model/norm.cc | 8 ++-- src/vt/vrt/collection/balance/model/norm.h | 7 +--- src/vt/vrt/collection/balance/proc_stats.cc | 20 ++++------ src/vt/vrt/collection/balance/proc_stats.h | 26 +++++------- 12 files changed, 84 insertions(+), 65 deletions(-) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.cc b/src/vt/vrt/collection/balance/baselb/baselb.cc index 564425f9e8..ddfafd010a 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.cc +++ b/src/vt/vrt/collection/balance/baselb/baselb.cc @@ -64,7 +64,7 @@ void BaseLB::startLB( PhaseType phase, objgroup::proxy::Proxy proxy, balance::LoadModel* model, - balance::ProcStats::LoadMapType const& in_load_stats, + balance::LoadMapType const& in_load_stats, ElementCommType const& in_comm_stats ) { start_time_ = timing::Timing::getCurrentTime(); diff --git a/src/vt/vrt/collection/balance/baselb/baselb.h b/src/vt/vrt/collection/balance/baselb/baselb.h index 1a8f7ea5c8..863e49ef20 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.h +++ b/src/vt/vrt/collection/balance/baselb/baselb.h @@ -108,7 +108,7 @@ struct BaseLB { * through calls to `migrateObjectTo`. Callers can then access that * set using `getTransfers` and apply it using `applyMigrations`. */ - void startLB(PhaseType phase, objgroup::proxy::Proxy proxy, balance::LoadModel *model, balance::ProcStats::LoadMapType const& in_load_stats, ElementCommType const& in_comm_stats); + void startLB(PhaseType phase, objgroup::proxy::Proxy proxy, balance::LoadModel *model, balance::LoadMapType const& in_load_stats, ElementCommType const& in_comm_stats); void computeStatistics(); void importProcessorData(ElementLoadType const& ld, ElementCommType const& cm); void statsHandler(StatsMsgType* msg); diff --git a/src/vt/vrt/collection/balance/lb_common.h b/src/vt/vrt/collection/balance/lb_common.h index 0adbbfd3de..8455b7a8f0 100644 --- a/src/vt/vrt/collection/balance/lb_common.h +++ b/src/vt/vrt/collection/balance/lb_common.h @@ -46,6 +46,7 @@ #define INCLUDED_VT_VRT_COLLECTION_BALANCE_LB_COMMON_H #include "vt/config.h" +#include "vt/timing/timing_type.h" #include #include @@ -57,6 +58,8 @@ using ElementIDType = uint64_t; static constexpr ElementIDType const no_element_id = 0; +using LoadMapType = std::unordered_map; +using SubphaseLoadMapType = std::unordered_map>; } /* end namespace balance */ namespace lb { diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index 219d4ed7a6..072d2fefce 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -123,6 +123,14 @@ LBType LBManager::decideLBToRun(PhaseType phase, bool try_file) { return the_lb; } +void LBManager::setLoadModel(std::unique_ptr model) { + model_ = std::move(model); + auto stats = theProcStats(); + model_->setLoads(stats->getProcLoad(), + stats->getProcSubphaseLoad(), + stats->getProcComm()); +} + template objgroup::proxy::Proxy LBManager::makeLB(MsgSharedPtr msg) { @@ -133,11 +141,12 @@ LBManager::makeLB(MsgSharedPtr msg) { auto phase = msg->getPhase(); if (model_ == nullptr) - model_.reset(new balance::NaivePersistence(&theProcStats()->getProcLoad(phase), &theProcStats()->getProcComm(phase))); + setLoadModel(std::make_unique()); + model_->updateLoads(phase); EpochType balance_epoch = theTerm()->makeEpochCollective("LBManager::balance_epoch"); theMsg()->pushEpoch(balance_epoch); - strat->startLB(phase, base_proxy, model_.get(), theProcStats()->getProcLoad(phase), theProcStats()->getProcComm(phase)); + strat->startLB(phase, base_proxy, model_.get(), theProcStats()->getProcLoad()->back(), theProcStats()->getProcComm()->back()); theMsg()->popEpoch(balance_epoch); theTerm()->finishedEpoch(balance_epoch); diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h index 1c323afcea..668be25867 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h @@ -238,7 +238,7 @@ struct LBManager : runtime::component::Component { * naive persistence * */ - void setLoadModel(std::unique_ptr model) { model_ = std::move(model); } + void setLoadModel(std::unique_ptr model); protected: /** diff --git a/src/vt/vrt/collection/balance/model/load_model.h b/src/vt/vrt/collection/balance/model/load_model.h index 4cabef22c2..b8ab9c4598 100644 --- a/src/vt/vrt/collection/balance/model/load_model.h +++ b/src/vt/vrt/collection/balance/model/load_model.h @@ -47,7 +47,7 @@ #include "vt/config.h" #include "vt/vrt/collection/balance/lb_common.h" -#include "vt/timing/timing_type.h" +#include "vt/vrt/collection/balance/lb_comm.h" namespace vt { namespace vrt { namespace collection { namespace balance { @@ -64,8 +64,46 @@ class LoadModel public: LoadModel() {} + /** + * \internal \brief Initialize the model instance with pointers to the measured load data + * + * This would typically be called by LBManager when the user has + * passed a new model instance for a collection + */ + void setLoads(std::vector const* proc_load, + std::vector const* proc_subphase_load, + std::vector const* proc_comm) + { + proc_load_ = proc_load; + proc_subphase_load_ = proc_subphase_load; + proc_comm_ = proc_comm; + } + + /** + * \brief Signals that load data for a new phase is available + * + * For models that want to do pre-computation based on measured + * loads before being asked to provide predictions from them + * + * This would typically be called by LBManager + */ + virtual void updateLoads(PhaseType last_completed_phase) { } + + /** + * \brief Provide a prediction of the given object's load during a future interval + * + * \param[in] object The object whose load is desired + * \param[in] when The future interval in which the predicted load is desired + * + * \return How much computation time the object is expected to require + */ virtual TimeType getWork(ElementIDType object, PhaseOffset when) = 0; +protected: + // Observer pointers to the underlying data. In operation, these would be owned by ProcStats + std::vector const* proc_load_; + std::vector const* proc_subphase_load_; + std::vector const* proc_comm_; }; // class LoadModel }}}} // namespaces diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.cc b/src/vt/vrt/collection/balance/model/naive_persistence.cc index edc99a6feb..e2362c67ea 100644 --- a/src/vt/vrt/collection/balance/model/naive_persistence.cc +++ b/src/vt/vrt/collection/balance/model/naive_persistence.cc @@ -47,23 +47,21 @@ namespace vt { namespace vrt { namespace collection { namespace balance { -NaivePersistence::NaivePersistence(const ElementLoadType *loads, const ElementCommType *comms) - : loads_(*loads), comms_(comms) +NaivePersistence::NaivePersistence() { - if (!comms) vtAbort("Need comms"); - if (!loads) vtAbort("Need loads"); - - // Add a bit of overhead for each off-node received message per object + /* +// Add a bit of overhead for each off-node received message per object for (auto &&comm : *comms_) { auto obj = loads_.find(comm.first.toObj()); if (obj != loads_.end()) obj->second += 0.001 * comm.second.messages; } + */ } TimeType NaivePersistence::getWork(ElementIDType object, PhaseOffset /*ignored*/) { - return loads_.at(object); + return proc_load_->back().at(object); } diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.h b/src/vt/vrt/collection/balance/model/naive_persistence.h index 12faff98eb..1cba0db43b 100644 --- a/src/vt/vrt/collection/balance/model/naive_persistence.h +++ b/src/vt/vrt/collection/balance/model/naive_persistence.h @@ -51,19 +51,9 @@ namespace vt { namespace vrt { namespace collection { namespace balance { -class NaivePersistence : public LoadModel { - using LoadType = double; - using ObjIDType = balance::ElementIDType; - using ElementLoadType = std::unordered_map; - using ElementCommType = balance::CommMapType; - -public: - NaivePersistence(const ElementLoadType *loads, const ElementCommType *comms); +struct NaivePersistence : public LoadModel { + NaivePersistence(); TimeType getWork(ElementIDType object, PhaseOffset when) override; - -private: - ElementLoadType loads_; - const ElementCommType* comms_; }; // class NaivePersistence }}}} // end namespace diff --git a/src/vt/vrt/collection/balance/model/norm.cc b/src/vt/vrt/collection/balance/model/norm.cc index c713b41456..8d3362aeee 100644 --- a/src/vt/vrt/collection/balance/model/norm.cc +++ b/src/vt/vrt/collection/balance/model/norm.cc @@ -48,18 +48,16 @@ namespace vt { namespace vrt { namespace collection { namespace balance { -Norm::Norm(const LoadMapType *loads, double power) - : loads_(*loads), power_(power) +Norm::Norm(double power) + : power_(power) { - if (!loads) vtAbort("Need loads"); - vtAssert(not std::isnan(power), "Power must have a real value"); vtAssert(power >= 0.0, "Reciprocal loads make no sense"); } TimeType Norm::getWork(ElementIDType object, PhaseOffset /*ignored*/) { - auto const& subphase_loads = loads_.at(object); + auto const& subphase_loads = proc_subphase_load_->back().at(object); if (std::isfinite(power_)) { double sum = 0.0; diff --git a/src/vt/vrt/collection/balance/model/norm.h b/src/vt/vrt/collection/balance/model/norm.h index ccb7ef2a32..b84bfdeb79 100644 --- a/src/vt/vrt/collection/balance/model/norm.h +++ b/src/vt/vrt/collection/balance/model/norm.h @@ -57,22 +57,17 @@ namespace vt { namespace vrt { namespace collection { namespace balance { * subphases */ class Norm : public LoadModel { - using LoadType = double; - using ObjIDType = balance::ElementIDType; - using LoadMapType = ProcStats::SubphaseLoadMapType; public: /** * \brief Constructor * - * \param[in] loads The collection of subphase load data from ProcStats * \param[in] power The power to use in computing the norms */ - Norm(const LoadMapType *loads, double power); + Norm(double power); TimeType getWork(ElementIDType object, PhaseOffset when) override; private: - const LoadMapType loads_; const double power_; }; // class Norm diff --git a/src/vt/vrt/collection/balance/proc_stats.cc b/src/vt/vrt/collection/balance/proc_stats.cc index 56d6ef545d..19bc1e8fc7 100644 --- a/src/vt/vrt/collection/balance/proc_stats.cc +++ b/src/vt/vrt/collection/balance/proc_stats.cc @@ -103,22 +103,18 @@ bool ProcStats::migrateObjTo(ElementIDType obj_id, NodeType to_node) { return true; } -ProcStats::LoadMapType const& -ProcStats::getProcLoad(PhaseType phase) const { - vtAssert(proc_data_.size() > phase, "Phase must exist in load data"); - return proc_data_.at(phase); +std::vector const* +ProcStats::getProcLoad() const { + return &proc_data_; } -ProcStats::SubphaseLoadMapType const& -ProcStats::getProcSubphaseLoad(PhaseType phase) const { - vtAssert(proc_subphase_data_.size() > phase, "Phase must exist in load data"); - return proc_subphase_data_.at(phase); +std::vector const* +ProcStats::getProcSubphaseLoad() const { + return &proc_subphase_data_; } -CommMapType const& ProcStats::getProcComm(PhaseType phase) const { - vtAssert(proc_comm_.size() > phase, "Phase must exist in comm data"); - return proc_comm_.at(phase); - +std::vector const* ProcStats::getProcComm() const { + return &proc_comm_; } void ProcStats::clearStats() { diff --git a/src/vt/vrt/collection/balance/proc_stats.h b/src/vt/vrt/collection/balance/proc_stats.h index 41c837bc0c..5a5f499102 100644 --- a/src/vt/vrt/collection/balance/proc_stats.h +++ b/src/vt/vrt/collection/balance/proc_stats.h @@ -77,8 +77,6 @@ namespace vt { namespace vrt { namespace collection { namespace balance { */ struct ProcStats : runtime::component::Component { using MigrateFnType = std::function; - using LoadMapType = std::unordered_map; - using SubphaseLoadMapType = std::unordered_map>; /** * \internal \brief System call to construct \c ProcStats @@ -171,31 +169,25 @@ struct ProcStats : runtime::component::Component { ElementIDType getNextElm(); /** - * \internal \brief Get object loads for a given phase + * \internal \brief Get stored object loads * - * \param[in] phase the phase - * - * \return the load map + * \return an observer pointer to the load map */ - LoadMapType const& getProcLoad(PhaseType phase) const; + std::vector const* getProcLoad() const; /** - * \internal \brief Get object loads for the subphases of a given phase - * - * \param[in] phase the phase + * \internal \brief Get stored object loads for individual subphases * - * \return the subphase load map + * \return an observer pointer to the subphase load map */ - SubphaseLoadMapType const& getProcSubphaseLoad(PhaseType phase) const; + std::vector const* getProcSubphaseLoad() const; /** - * \internal \brief Get object comm graph for a given phase - * - * \param[in] phase the phase + * \internal \brief Get stored object comm graph * - * \return the load map + * \return an observer pointer to the comm graph */ - CommMapType const& getProcComm(PhaseType phase) const; + std::vector const* getProcComm() const; /** * \internal \brief Test if this processor has an object to migrate From 23661d7e7d19e04508326fb824cba7bd954c43ef Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 6 Jul 2020 12:33:30 -0400 Subject: [PATCH 12/63] #582: Run load model update in a collective epoch before the strategy, in case it needs to communicate --- .../balance/lb_invoke/lb_manager.cc | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index 072d2fefce..022b2417d4 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -140,17 +140,28 @@ LBManager::makeLB(MsgSharedPtr msg) { auto base_proxy = proxy.template registerBaseCollective(); auto phase = msg->getPhase(); + EpochType model_epoch = theTerm()->makeEpochCollective("LBManager::model_epoch"); + EpochType balance_epoch = theTerm()->makeEpochCollective("LBManager::balance_epoch"); + EpochType migrate_epoch = theTerm()->makeEpochCollective("LBManager::migrate_epoch"); + if (model_ == nullptr) setLoadModel(std::make_unique()); - model_->updateLoads(phase); - EpochType balance_epoch = theTerm()->makeEpochCollective("LBManager::balance_epoch"); - theMsg()->pushEpoch(balance_epoch); - strat->startLB(phase, base_proxy, model_.get(), theProcStats()->getProcLoad()->back(), theProcStats()->getProcComm()->back()); - theMsg()->popEpoch(balance_epoch); - theTerm()->finishedEpoch(balance_epoch); + theMsg()->pushEpoch(model_epoch); + model_->updateLoads(phase); + theMsg()->popEpoch(model_epoch); + theTerm()->finishedEpoch(model_epoch); - EpochType migrate_epoch = theTerm()->makeEpochCollective("LBManager::migrate_epoch"); + theTerm()->addAction(model_epoch, [=] { + vt_debug_print( + lb, node, + "LBManager: running strategy\n" + ); + theMsg()->pushEpoch(balance_epoch); + strat->startLB(phase, base_proxy, model_.get(), theProcStats()->getProcLoad()->back(), theProcStats()->getProcComm()->back()); + theMsg()->popEpoch(balance_epoch); + theTerm()->finishedEpoch(balance_epoch); + }); theTerm()->addAction(balance_epoch, [=] { vt_debug_print( From f472f9a5bf2e19ee94f87e9624795e0629fe0f54 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 6 Jul 2020 12:45:50 -0400 Subject: [PATCH 13/63] #582: Update documentation of load model calling conventions --- src/vt/vrt/collection/balance/lb_invoke/lb_manager.h | 4 ++++ src/vt/vrt/collection/balance/model/load_model.h | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h index 668be25867..f7990d766a 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h @@ -237,6 +237,10 @@ struct LBManager : runtime::component::Component { * \brief Set a model of expected object loads to use in place of * naive persistence * + * \param[in] model the model to apply + * + * This should be called with a similarly-constructed model instance + * on every node */ void setLoadModel(std::unique_ptr model); diff --git a/src/vt/vrt/collection/balance/model/load_model.h b/src/vt/vrt/collection/balance/model/load_model.h index b8ab9c4598..2fa972c049 100644 --- a/src/vt/vrt/collection/balance/model/load_model.h +++ b/src/vt/vrt/collection/balance/model/load_model.h @@ -85,7 +85,9 @@ class LoadModel * For models that want to do pre-computation based on measured * loads before being asked to provide predictions from them * - * This would typically be called by LBManager + * This would typically be called by LBManager collectively inside + * an epoch that can be used for global communication in advance of + * any calls to getWork() */ virtual void updateLoads(PhaseType last_completed_phase) { } From 97376dea028bbb1f4e3d194bcb21ed9f5e92a21b Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 6 Jul 2020 13:11:02 -0400 Subject: [PATCH 14/63] #582: Narrow dependence on definition of LoadModel --- src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc | 3 +++ src/vt/vrt/collection/balance/lb_invoke/lb_manager.h | 7 ++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index 022b2417d4..6d8663f52b 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -59,6 +59,7 @@ #include "vt/vrt/collection/messages/system_create.h" #include "vt/vrt/collection/manager.fwd.h" #include "vt/utils/memory/memory_usage.h" +#include "vt/vrt/collection/balance/model/load_model.h" #include "vt/vrt/collection/balance/model/naive_persistence.h" namespace vt { namespace vrt { namespace collection { namespace balance { @@ -70,6 +71,8 @@ namespace vt { namespace vrt { namespace collection { namespace balance { return ptr; } +LBManager::~LBManager() = default; + LBType LBManager::decideLBToRun(PhaseType phase, bool try_file) { vt_debug_print( lb, node, diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h index f7990d766a..fb8a5acbb7 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h @@ -49,7 +49,6 @@ #include "vt/vrt/collection/balance/lb_type.h" #include "vt/vrt/collection/balance/lb_invoke/invoke_msg.h" #include "vt/vrt/collection/balance/lb_invoke/start_lb_msg.h" -#include "vt/vrt/collection/balance/model/load_model.h" #include "vt/configs/arguments/args.h" #include "vt/runtime/component/component_pack.h" #include "vt/objgroup/proxy/proxy_objgroup.h" @@ -58,6 +57,8 @@ namespace vt { namespace vrt { namespace collection { namespace balance { +class LoadModel; + /** * \struct LBManager * @@ -77,7 +78,7 @@ struct LBManager : runtime::component::Component { LBManager() = default; LBManager(LBManager const&) = delete; LBManager(LBManager&&) = default; - virtual ~LBManager() {} + virtual ~LBManager(); std::string name() override { return "LBManager"; } @@ -264,7 +265,7 @@ struct LBManager : runtime::component::Component { bool synced_in_lb_ = true; std::vector listeners_ = {}; objgroup::proxy::Proxy proxy_; - std::unique_ptr model_ = nullptr; + std::unique_ptr model_; }; }}}} /* end namespace vt::vrt::collection::balance */ From f6fdd0559d279791c1ab44b691def6d8eb5a50ad Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 6 Jul 2020 13:13:18 -0400 Subject: [PATCH 15/63] #582: LoadModel: Add object enumeration API --- src/vt/vrt/collection/balance/baselb/baselb.cc | 9 ++++----- src/vt/vrt/collection/balance/model/load_model.h | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.cc b/src/vt/vrt/collection/balance/baselb/baselb.cc index ddfafd010a..c551623d2f 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.cc +++ b/src/vt/vrt/collection/balance/baselb/baselb.cc @@ -102,9 +102,8 @@ void BaseLB::importProcessorData( "{}: importProcessorData: load stats size={}, load comm size={}\n", this_node, load_in.size(), comm_in.size() ); - for (auto&& stat : load_in) { - auto const& obj = stat.first; - auto const& load = stat.second; + for (auto obj : *load_model_) { + auto load = load_model_->getWork(obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE}); auto const& load_milli = loadMilli(load); auto const& bin = histogramSample(load_milli); this_load += load_milli; @@ -356,8 +355,8 @@ void BaseLB::computeStatisticsOver(Statistic stat) { case Statistic::O_l: { // Perform the reduction for O_l -> object load only std::vector lds; - for (auto&& elm : *load_data_) { - lds.emplace_back(load_model_->getWork(elm.first, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE})); + for (auto elm : *load_model_) { + lds.emplace_back(load_model_->getWork(elm, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE})); } auto msg = makeMessage(Statistic::O_l, reduceVec(std::move(lds))); proxy_.template reduce(msg,cb); diff --git a/src/vt/vrt/collection/balance/model/load_model.h b/src/vt/vrt/collection/balance/model/load_model.h index 2fa972c049..6b5a392a1d 100644 --- a/src/vt/vrt/collection/balance/model/load_model.h +++ b/src/vt/vrt/collection/balance/model/load_model.h @@ -59,6 +59,18 @@ struct PhaseOffset { static constexpr unsigned int WHOLE_PHASE = 0; }; +class ObjectIterator { + using iterator_type = LoadMapType::const_iterator; + using value_type = LoadMapType::key_type; + iterator_type i; + +public: + ObjectIterator(iterator_type in) : i(in) { } + void operator++() { ++i; } + value_type operator*() { return i->first; } + bool operator!=(ObjectIterator rhs) { return i != rhs.i; } +}; + class LoadModel { public: @@ -101,6 +113,10 @@ class LoadModel */ virtual TimeType getWork(ElementIDType object, PhaseOffset when) = 0; + // Object enumeration, to abstract away access to the underlying structures from ProcStats + ObjectIterator begin() { return ObjectIterator(proc_load_->back().begin()); } + ObjectIterator end() { return ObjectIterator(proc_load_->back().end()); } + protected: // Observer pointers to the underlying data. In operation, these would be owned by ProcStats std::vector const* proc_load_; From c604cddbaa5abe1cf3ef111ba750ebc1fc69a855 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 6 Jul 2020 13:26:04 -0400 Subject: [PATCH 16/63] #582: Convert Random and Rotate LB strategies to reference model --- src/vt/vrt/collection/balance/randomlb/randomlb.cc | 4 ++-- src/vt/vrt/collection/balance/rotatelb/rotatelb.cc | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/vt/vrt/collection/balance/randomlb/randomlb.cc b/src/vt/vrt/collection/balance/randomlb/randomlb.cc index b2ec8c32b5..0df1adc7bc 100644 --- a/src/vt/vrt/collection/balance/randomlb/randomlb.cc +++ b/src/vt/vrt/collection/balance/randomlb/randomlb.cc @@ -85,8 +85,8 @@ void RandomLB::runLB() { // Sort the objects so we have a deterministic order over them std::set objs; - for (auto&& stat : *load_data_) { - objs.insert(stat.first); + for (auto obj : *load_model_) { + objs.insert(obj); } for (auto&& obj : objs) { diff --git a/src/vt/vrt/collection/balance/rotatelb/rotatelb.cc b/src/vt/vrt/collection/balance/rotatelb/rotatelb.cc index de8b2a55d4..851c6bebff 100644 --- a/src/vt/vrt/collection/balance/rotatelb/rotatelb.cc +++ b/src/vt/vrt/collection/balance/rotatelb/rotatelb.cc @@ -70,9 +70,8 @@ void RotateLB::runLB() { fflush(stdout); } - for (auto&& stat : *load_data_) { - auto const& obj = stat.first; - auto const& load = stat.second; + for (auto obj : *load_model_) { + auto load = load_model_->getWork(obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE}); vt_debug_print( lb, node, "\t RotateLB::migrating object to: obj={}, load={}, to_node={}\n", From 7122f021e8c562fa9fcf823ee92cd9c779fc54a3 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 6 Jul 2020 13:38:40 -0400 Subject: [PATCH 17/63] #582: ZoltanLB: Convert to using load model --- src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc b/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc index b6075d1bb8..6a0e8d3e90 100644 --- a/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc +++ b/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc @@ -427,7 +427,7 @@ std::unique_ptr ZoltanLB::makeGraph() { auto graph = std::make_unique(); // Number of local vertices (overdecomposed blocks) on this node - graph->num_vertices = static_cast(load_data->size()); + graph->num_vertices = static_cast(std::distance(load_model_->begin(), load_model_->end())); // Allocate space for each vertex to describe it graph->vertex_gid = std::make_unique(graph->num_vertices); @@ -445,8 +445,8 @@ std::unique_ptr ZoltanLB::makeGraph() { // Insert local load objs into a std::set to get a deterministic order to // traverse them for building the graph consistenly std::set load_objs; - for (auto&& elm : *load_data) { - load_objs.insert(elm.first); + for (auto obj : *load_model_) { + load_objs.insert(obj); } // Initialize all the local vertices with global id @@ -468,8 +468,8 @@ std::unique_ptr ZoltanLB::makeGraph() { { int idx = 0; for (auto&& obj : load_objs) { - auto iter = load_data->find(obj); - auto time = static_cast(loadMilli(iter->second)); + auto load = load_model_->getWork(obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE}); + auto time = static_cast(loadMilli(load)); graph->vertex_weight[idx++] = time; } } From a09e65b97cd3777748ffb803e995e58e01e47e6e Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 6 Jul 2020 14:54:27 -0400 Subject: [PATCH 18/63] #582: GossipLB: Convert to using load model --- src/vt/vrt/collection/balance/gossiplb/gossiplb.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/gossiplb/gossiplb.cc b/src/vt/vrt/collection/balance/gossiplb/gossiplb.cc index 3ac7495bd1..6e89bc3d7c 100644 --- a/src/vt/vrt/collection/balance/gossiplb/gossiplb.cc +++ b/src/vt/vrt/collection/balance/gossiplb/gossiplb.cc @@ -122,7 +122,9 @@ void GossipLB::doLBStages() { if (first_iter) { // Copy this node's object assignments to a local, mutable copy - cur_objs_ = *load_data_; + cur_objs_.clear(); + for (auto obj : *load_model_) + cur_objs_[obj] = load_model_->getWork(obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE}); this_new_load_ = this_load; } else { // Clear out data structures from previous iteration From 89167d47f2bc323687695a556dc7d338fcff2d14 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 6 Jul 2020 14:56:03 -0400 Subject: [PATCH 19/63] #582: Drop raw load data from BaseLB --- src/vt/vrt/collection/balance/baselb/baselb.cc | 8 +++----- src/vt/vrt/collection/balance/baselb/baselb.h | 5 ++--- .../vrt/collection/balance/lb_invoke/lb_manager.cc | 2 +- src/vt/vrt/collection/balance/model/load_model.h | 14 +++++++++++--- src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc | 2 +- 5 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.cc b/src/vt/vrt/collection/balance/baselb/baselb.cc index c551623d2f..11f2012390 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.cc +++ b/src/vt/vrt/collection/balance/baselb/baselb.cc @@ -64,7 +64,6 @@ void BaseLB::startLB( PhaseType phase, objgroup::proxy::Proxy proxy, balance::LoadModel* model, - balance::LoadMapType const& in_load_stats, ElementCommType const& in_comm_stats ) { start_time_ = timing::Timing::getCurrentTime(); @@ -72,7 +71,7 @@ void BaseLB::startLB( proxy_ = proxy; load_model_ = model; - importProcessorData(in_load_stats, in_comm_stats); + importProcessorData(in_comm_stats); term::TerminationDetector::Scoped::collective( [this] { computeStatistics(); }, @@ -94,13 +93,13 @@ BaseLB::ObjBinType BaseLB::histogramSample(LoadType const& load) const { } void BaseLB::importProcessorData( - ElementLoadType const& load_in, ElementCommType const& comm_in + ElementCommType const& comm_in ) { auto const& this_node = theContext()->getNode(); vt_debug_print( lb, node, "{}: importProcessorData: load stats size={}, load comm size={}\n", - this_node, load_in.size(), comm_in.size() + this_node, load_model_->getNumObjects(), comm_in.size() ); for (auto obj : *load_model_) { auto load = load_model_->getWork(obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE}); @@ -117,7 +116,6 @@ void BaseLB::importProcessorData( ); } - load_data_ = &load_in; comm_data = &comm_in; } diff --git a/src/vt/vrt/collection/balance/baselb/baselb.h b/src/vt/vrt/collection/balance/baselb/baselb.h index 863e49ef20..9e8b0df0fd 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.h +++ b/src/vt/vrt/collection/balance/baselb/baselb.h @@ -108,9 +108,9 @@ struct BaseLB { * through calls to `migrateObjectTo`. Callers can then access that * set using `getTransfers` and apply it using `applyMigrations`. */ - void startLB(PhaseType phase, objgroup::proxy::Proxy proxy, balance::LoadModel *model, balance::LoadMapType const& in_load_stats, ElementCommType const& in_comm_stats); + void startLB(PhaseType phase, objgroup::proxy::Proxy proxy, balance::LoadModel *model, ElementCommType const& in_comm_stats); void computeStatistics(); - void importProcessorData(ElementLoadType const& ld, ElementCommType const& cm); + void importProcessorData(ElementCommType const& cm); void statsHandler(StatsMsgType* msg); void finishedStats(); @@ -142,7 +142,6 @@ struct BaseLB { int32_t bin_size_ = 10; ObjSampleType obj_sample = {}; LoadType this_load = 0.0f; - ElementLoadType const* load_data_ = nullptr; ElementCommType const* comm_data = nullptr; StatisticMapType stats = {}; objgroup::proxy::Proxy proxy_ = {}; diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index 6d8663f52b..c264334df8 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -161,7 +161,7 @@ LBManager::makeLB(MsgSharedPtr msg) { "LBManager: running strategy\n" ); theMsg()->pushEpoch(balance_epoch); - strat->startLB(phase, base_proxy, model_.get(), theProcStats()->getProcLoad()->back(), theProcStats()->getProcComm()->back()); + strat->startLB(phase, base_proxy, model_.get(), theProcStats()->getProcComm()->back()); theMsg()->popEpoch(balance_epoch); theTerm()->finishedEpoch(balance_epoch); }); diff --git a/src/vt/vrt/collection/balance/model/load_model.h b/src/vt/vrt/collection/balance/model/load_model.h index 6b5a392a1d..2dce1dc544 100644 --- a/src/vt/vrt/collection/balance/model/load_model.h +++ b/src/vt/vrt/collection/balance/model/load_model.h @@ -60,15 +60,21 @@ struct PhaseOffset { }; class ObjectIterator { - using iterator_type = LoadMapType::const_iterator; + using difference_type = std::ptrdiff_t; using value_type = LoadMapType::key_type; - iterator_type i; + using pointer = value_type*; + using reference = value_type&; + + using map_iterator_type = LoadMapType::const_iterator; + using iterator_category = std::iterator_traits::iterator_category; + map_iterator_type i; public: - ObjectIterator(iterator_type in) : i(in) { } + ObjectIterator(map_iterator_type in) : i(in) { } void operator++() { ++i; } value_type operator*() { return i->first; } bool operator!=(ObjectIterator rhs) { return i != rhs.i; } + difference_type operator-(ObjectIterator rhs) { return std::distance(rhs.i, i); } }; class LoadModel @@ -117,6 +123,8 @@ class LoadModel ObjectIterator begin() { return ObjectIterator(proc_load_->back().begin()); } ObjectIterator end() { return ObjectIterator(proc_load_->back().end()); } + int getNumObjects() { return end() - begin(); } + protected: // Observer pointers to the underlying data. In operation, these would be owned by ProcStats std::vector const* proc_load_; diff --git a/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc b/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc index 6a0e8d3e90..b803c4fc83 100644 --- a/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc +++ b/src/vt/vrt/collection/balance/zoltanlb/zoltanlb.cc @@ -427,7 +427,7 @@ std::unique_ptr ZoltanLB::makeGraph() { auto graph = std::make_unique(); // Number of local vertices (overdecomposed blocks) on this node - graph->num_vertices = static_cast(std::distance(load_model_->begin(), load_model_->end())); + graph->num_vertices = load_model_->getNumObjects(); // Allocate space for each vertex to describe it graph->vertex_gid = std::make_unique(graph->num_vertices); From 84aae4be24ab19e5b5f2d973fe315411c13194e6 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 6 Jul 2020 15:10:03 -0400 Subject: [PATCH 20/63] #582: Fix formatting with tabs --- src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index c264334df8..520a7db3b6 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -130,8 +130,8 @@ void LBManager::setLoadModel(std::unique_ptr model) { model_ = std::move(model); auto stats = theProcStats(); model_->setLoads(stats->getProcLoad(), - stats->getProcSubphaseLoad(), - stats->getProcComm()); + stats->getProcSubphaseLoad(), + stats->getProcComm()); } template From 7206808e492d322276b1071ef133de27eca4075a Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 20 Jul 2020 11:18:22 -0400 Subject: [PATCH 21/63] #582: Adjust PhaseOffset to allow historic queries, in support of composition --- .../vrt/collection/balance/model/load_model.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/vt/vrt/collection/balance/model/load_model.h b/src/vt/vrt/collection/balance/model/load_model.h index 2dce1dc544..f3c32cd150 100644 --- a/src/vt/vrt/collection/balance/model/load_model.h +++ b/src/vt/vrt/collection/balance/model/load_model.h @@ -51,12 +51,20 @@ namespace vt { namespace vrt { namespace collection { namespace balance { +/** + * \brief A description of the interval of interest for a modeled load query + * + * The value of `phases` can be in the past or future. Negative values + * represent a distance into the past, in which -1 is most recent. A + * value of 0 represents the immediate upcoming phase. Positive values + * represent more distant future phases. + */ struct PhaseOffset { - unsigned int phases; + int phases; static constexpr unsigned int NEXT_PHASE = 0; unsigned int subphase; - static constexpr unsigned int WHOLE_PHASE = 0; + static constexpr unsigned int WHOLE_PHASE = ~0u; }; class ObjectIterator { @@ -110,12 +118,12 @@ class LoadModel virtual void updateLoads(PhaseType last_completed_phase) { } /** - * \brief Provide a prediction of the given object's load during a future interval + * \brief Provide an estimate of the given object's load during a specified interval * * \param[in] object The object whose load is desired - * \param[in] when The future interval in which the predicted load is desired + * \param[in] when The interval in which the estimated load is desired * - * \return How much computation time the object is expected to require + * \return How much computation time the object is estimated to require */ virtual TimeType getWork(ElementIDType object, PhaseOffset when) = 0; From 54f1c25c127b6a54be3aa89b1286b3dec83b65e8 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 20 Jul 2020 13:33:17 -0400 Subject: [PATCH 22/63] #582: Implement a compositional model of object load prediction --- .../balance/lb_invoke/lb_manager.cc | 3 +- .../collection/balance/model/comm_overhead.cc | 80 +++++++++++++++++++ .../collection/balance/model/comm_overhead.h | 65 +++++++++++++++ .../balance/model/composed_model.cc | 75 +++++++++++++++++ .../collection/balance/model/composed_model.h | 78 ++++++++++++++++++ .../vrt/collection/balance/model/load_model.h | 28 +++---- .../balance/model/naive_persistence.cc | 24 +++--- .../balance/model/naive_persistence.h | 7 +- src/vt/vrt/collection/balance/model/norm.cc | 31 +++++-- src/vt/vrt/collection/balance/model/norm.h | 12 +-- .../vrt/collection/balance/model/raw_data.cc | 75 +++++++++++++++++ .../vrt/collection/balance/model/raw_data.h | 76 ++++++++++++++++++ 12 files changed, 504 insertions(+), 50 deletions(-) create mode 100644 src/vt/vrt/collection/balance/model/comm_overhead.cc create mode 100644 src/vt/vrt/collection/balance/model/comm_overhead.h create mode 100644 src/vt/vrt/collection/balance/model/composed_model.cc create mode 100644 src/vt/vrt/collection/balance/model/composed_model.h create mode 100644 src/vt/vrt/collection/balance/model/raw_data.cc create mode 100644 src/vt/vrt/collection/balance/model/raw_data.h diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index 520a7db3b6..213acd7c26 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -61,6 +61,7 @@ #include "vt/utils/memory/memory_usage.h" #include "vt/vrt/collection/balance/model/load_model.h" #include "vt/vrt/collection/balance/model/naive_persistence.h" +#include "vt/vrt/collection/balance/model/raw_data.h" namespace vt { namespace vrt { namespace collection { namespace balance { @@ -148,7 +149,7 @@ LBManager::makeLB(MsgSharedPtr msg) { EpochType migrate_epoch = theTerm()->makeEpochCollective("LBManager::migrate_epoch"); if (model_ == nullptr) - setLoadModel(std::make_unique()); + setLoadModel(std::make_unique(new balance::RawData)); theMsg()->pushEpoch(model_epoch); model_->updateLoads(phase); diff --git a/src/vt/vrt/collection/balance/model/comm_overhead.cc b/src/vt/vrt/collection/balance/model/comm_overhead.cc new file mode 100644 index 0000000000..a6cb4e47ab --- /dev/null +++ b/src/vt/vrt/collection/balance/model/comm_overhead.cc @@ -0,0 +1,80 @@ +/* +//@HEADER +// ***************************************************************************** +// +// comm_overhead.cc +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + + +#include "vt/vrt/collection/balance/model/comm_overhead.h" + +namespace vt { namespace vrt { namespace collection { namespace balance { + +CommOverhead::CommOverhead(balance::LoadModel *base) + : ComposedModel(base) +{ +} + +void CommOverhead::setLoads(std::vector const* proc_load, + std::vector const* proc_subphase_load, + std::vector const* proc_comm) { + proc_comm_ = proc_comm; + ComposedModel::setLoads(proc_load, proc_subphase_load, proc_comm); +} + +TimeType CommOverhead::getWork(ElementIDType object, PhaseOffset offset) +{ + auto work = ComposedModel::getWork(object, offset); + + vtAbort("Not fully implemented yet"); +#if 0 + // Add a bit of overhead for each off-node received message per object + for (auto &&comm : *comms_) { + auto obj = loads_.find(comm.first.toObj()); + if (obj != loads_.end()) + work += 0.001 * comm.second.messages; + } +#endif + + return work; +} + + +}}}} diff --git a/src/vt/vrt/collection/balance/model/comm_overhead.h b/src/vt/vrt/collection/balance/model/comm_overhead.h new file mode 100644 index 0000000000..4fbadec78c --- /dev/null +++ b/src/vt/vrt/collection/balance/model/comm_overhead.h @@ -0,0 +1,65 @@ +/* +//@HEADER +// ***************************************************************************** +// +// comm_overhead.h +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#if !defined INCLUDED_VRT_COLLECTION_BALANCE_COMM_OVERHEAD_H +#define INCLUDED_VRT_COLLECTION_BALANCE_COMM_OVERHEAD_H + +#include "vt/vrt/collection/balance/model/composed_model.h" +#include + +namespace vt { namespace vrt { namespace collection { namespace balance { + +struct CommOverhead : public ComposedModel { + CommOverhead(balance::LoadModel *base); + void setLoads(std::vector const* proc_load, + std::vector const* proc_subphase_load, + std::vector const* proc_comm) override; + TimeType getWork(ElementIDType object, PhaseOffset when) override; + + std::vector const* proc_comm_; +}; // class CommOverhead + +}}}} // end namespace + +#endif diff --git a/src/vt/vrt/collection/balance/model/composed_model.cc b/src/vt/vrt/collection/balance/model/composed_model.cc new file mode 100644 index 0000000000..99e101f255 --- /dev/null +++ b/src/vt/vrt/collection/balance/model/composed_model.cc @@ -0,0 +1,75 @@ +/* +//@HEADER +// ***************************************************************************** +// +// composed_model.cc +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#include "vt/vrt/collection/balance/model/composed_model.h" + +namespace vt { namespace vrt { namespace collection { namespace balance { + +void ComposedModel::setLoads(std::vector const* proc_load, + std::vector const* proc_subphase_load, + std::vector const* proc_comm) { + base_.setLoads(proc_load, proc_subphase_load, proc_comm); +} + +void ComposedModel::updateLoads(PhaseType last_completed_phase) { + base_.updateLoads(last_completed_phase); +} + +TimeType ComposedModel::getWork(ElementIDType object, PhaseOffset when) { + return base_.getWork(object, when); +} + +ObjectIterator ComposedModel::begin() { + return base_.begin(); +} + +ObjectIterator ComposedModel::end() { + return base_.end(); +} + +int ComposedModel::getNumObjects() { + return base_.getNumObjects(); +} + +}}}} diff --git a/src/vt/vrt/collection/balance/model/composed_model.h b/src/vt/vrt/collection/balance/model/composed_model.h new file mode 100644 index 0000000000..06723a6383 --- /dev/null +++ b/src/vt/vrt/collection/balance/model/composed_model.h @@ -0,0 +1,78 @@ +/* +//@HEADER +// ***************************************************************************** +// +// composed_model.h +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#if !defined INCLUDED_VRT_COLLECTION_BALANCE_COMPOSED_MODEL_H +#define INCLUDED_VRT_COLLECTION_BALANCE_COMPOSED_MODEL_H + +#include "vt/config.h" +#include "vt/vrt/collection/balance/model/load_model.h" + +namespace vt { namespace vrt { namespace collection { namespace balance { + +class ComposedModel : public LoadModel +{ +public: + // \param[in] base must not be null + ComposedModel(LoadModel *base) : base_(*base) {} + + void setLoads(std::vector const* proc_load, + std::vector const* proc_subphase_load, + std::vector const* proc_comm) override; + + void updateLoads(PhaseType last_completed_phase) override; + + TimeType getWork(ElementIDType object, PhaseOffset when) override; + + ObjectIterator begin() override; + ObjectIterator end() override; + + int getNumObjects() override; + +private: + LoadModel &base_; +}; // class ComposedModel + +}}}} // namespaces + +#endif diff --git a/src/vt/vrt/collection/balance/model/load_model.h b/src/vt/vrt/collection/balance/model/load_model.h index f3c32cd150..023d0a3089 100644 --- a/src/vt/vrt/collection/balance/model/load_model.h +++ b/src/vt/vrt/collection/balance/model/load_model.h @@ -88,7 +88,8 @@ class ObjectIterator { class LoadModel { public: - LoadModel() {} + LoadModel() = default; + virtual ~LoadModel() = default; /** * \internal \brief Initialize the model instance with pointers to the measured load data @@ -96,14 +97,9 @@ class LoadModel * This would typically be called by LBManager when the user has * passed a new model instance for a collection */ - void setLoads(std::vector const* proc_load, - std::vector const* proc_subphase_load, - std::vector const* proc_comm) - { - proc_load_ = proc_load; - proc_subphase_load_ = proc_subphase_load; - proc_comm_ = proc_comm; - } + virtual void setLoads(std::vector const* proc_load, + std::vector const* proc_subphase_load, + std::vector const* proc_comm) = 0; /** * \brief Signals that load data for a new phase is available @@ -115,7 +111,7 @@ class LoadModel * an epoch that can be used for global communication in advance of * any calls to getWork() */ - virtual void updateLoads(PhaseType last_completed_phase) { } + virtual void updateLoads(PhaseType last_completed_phase) = 0; /** * \brief Provide an estimate of the given object's load during a specified interval @@ -128,16 +124,10 @@ class LoadModel virtual TimeType getWork(ElementIDType object, PhaseOffset when) = 0; // Object enumeration, to abstract away access to the underlying structures from ProcStats - ObjectIterator begin() { return ObjectIterator(proc_load_->back().begin()); } - ObjectIterator end() { return ObjectIterator(proc_load_->back().end()); } + virtual ObjectIterator begin() = 0; + virtual ObjectIterator end() = 0; - int getNumObjects() { return end() - begin(); } - -protected: - // Observer pointers to the underlying data. In operation, these would be owned by ProcStats - std::vector const* proc_load_; - std::vector const* proc_subphase_load_; - std::vector const* proc_comm_; + virtual int getNumObjects() = 0; }; // class LoadModel }}}} // namespaces diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.cc b/src/vt/vrt/collection/balance/model/naive_persistence.cc index e2362c67ea..31853bad59 100644 --- a/src/vt/vrt/collection/balance/model/naive_persistence.cc +++ b/src/vt/vrt/collection/balance/model/naive_persistence.cc @@ -2,7 +2,7 @@ //@HEADER // ***************************************************************************** // -// naive_persistence.cpp +// naive_persistence.cc // DARMA Toolkit v. 1.0.0 // DARMA/vt => Virtual Transport // @@ -47,22 +47,16 @@ namespace vt { namespace vrt { namespace collection { namespace balance { -NaivePersistence::NaivePersistence() -{ - /* -// Add a bit of overhead for each off-node received message per object - for (auto &&comm : *comms_) { - auto obj = loads_.find(comm.first.toObj()); - if (obj != loads_.end()) - obj->second += 0.001 * comm.second.messages; - } - */ -} +NaivePersistence::NaivePersistence(balance::LoadModel *base) + : ComposedModel(base) +{ } -TimeType NaivePersistence::getWork(ElementIDType object, PhaseOffset /*ignored*/) +TimeType NaivePersistence::getWork(ElementIDType object, PhaseOffset offset) { - return proc_load_->back().at(object); -} + if (offset.phases >= 0) + offset.phases = -1; + return ComposedModel::getWork(object, offset); +} }}}} diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.h b/src/vt/vrt/collection/balance/model/naive_persistence.h index 1cba0db43b..42ae3ff9fa 100644 --- a/src/vt/vrt/collection/balance/model/naive_persistence.h +++ b/src/vt/vrt/collection/balance/model/naive_persistence.h @@ -45,14 +45,13 @@ #if !defined INCLUDED_VRT_COLLECTION_BALANCE_NAIVE_PERSISTENCE_H #define INCLUDED_VRT_COLLECTION_BALANCE_NAIVE_PERSISTENCE_H -#include "vt/vrt/collection/balance/model/load_model.h" -#include "vt/vrt/collection/balance/lb_comm.h" +#include "vt/vrt/collection/balance/model/composed_model.h" #include namespace vt { namespace vrt { namespace collection { namespace balance { -struct NaivePersistence : public LoadModel { - NaivePersistence(); +struct NaivePersistence : public ComposedModel { + NaivePersistence(balance::LoadModel *base); TimeType getWork(ElementIDType object, PhaseOffset when) override; }; // class NaivePersistence diff --git a/src/vt/vrt/collection/balance/model/norm.cc b/src/vt/vrt/collection/balance/model/norm.cc index 8d3362aeee..242eed4e0f 100644 --- a/src/vt/vrt/collection/balance/model/norm.cc +++ b/src/vt/vrt/collection/balance/model/norm.cc @@ -48,28 +48,47 @@ namespace vt { namespace vrt { namespace collection { namespace balance { -Norm::Norm(double power) - : power_(power) +Norm::Norm(balance::LoadModel *base, double power) + : ComposedModel(base) + , power_(power) { vtAssert(not std::isnan(power), "Power must have a real value"); vtAssert(power >= 0.0, "Reciprocal loads make no sense"); } -TimeType Norm::getWork(ElementIDType object, PhaseOffset /*ignored*/) +void Norm::setLoads(std::vector const* proc_load, + std::vector const* proc_subphase_load, + std::vector const* proc_comm) { + const auto& last_phase = proc_subphase_load->back(); + const auto& an_object = *last_phase.begin(); + const auto& subphases = an_object.second; + num_subphases_ = subphases.size(); + + ComposedModel::setLoads(proc_load, proc_subphase_load, proc_comm); +} + +TimeType Norm::getWork(ElementIDType object, PhaseOffset offset) { - auto const& subphase_loads = proc_subphase_load_->back().at(object); + if (offset.subphase != PhaseOffset::WHOLE_PHASE) + return ComposedModel::getWork(object, offset); if (std::isfinite(power_)) { double sum = 0.0; - for (auto t : subphase_loads) + + for (int i = 0; i < num_subphases_; ++i) { + auto t = ComposedModel::getWork(object, offset); sum += std::pow(t, power_); + } return std::pow(sum, 1.0/power_); } else { // l-infinity implies a max norm double max = 0.0; - for (auto t : subphase_loads) + + for (int i = 0; i < num_subphases_; ++i) { + auto t = ComposedModel::getWork(object, offset); max = std::max(max, t); + } return max; } diff --git a/src/vt/vrt/collection/balance/model/norm.h b/src/vt/vrt/collection/balance/model/norm.h index b84bfdeb79..a26fe0451d 100644 --- a/src/vt/vrt/collection/balance/model/norm.h +++ b/src/vt/vrt/collection/balance/model/norm.h @@ -45,9 +45,7 @@ #if !defined INCLUDED_VRT_COLLECTION_BALANCE_NAIVE_PERSISTENCE_H #define INCLUDED_VRT_COLLECTION_BALANCE_NAIVE_PERSISTENCE_H -#include "vt/vrt/collection/balance/model/load_model.h" -#include "vt/vrt/collection/balance/lb_comm.h" -#include "vt/vrt/collection/balance/proc_stats.h" +#include "vt/vrt/collection/balance/model/composed_model.h" #include namespace vt { namespace vrt { namespace collection { namespace balance { @@ -56,7 +54,7 @@ namespace vt { namespace vrt { namespace collection { namespace balance { * \brief A load model that computes an l-norm of a given power across * subphases */ -class Norm : public LoadModel { +class Norm : public ComposedModel { public: /** @@ -64,10 +62,14 @@ class Norm : public LoadModel { * * \param[in] power The power to use in computing the norms */ - Norm(double power); + Norm(balance::LoadModel *base, double power); + void setLoads(std::vector const* proc_load, + std::vector const* proc_subphase_load, + std::vector const* proc_comm) override; TimeType getWork(ElementIDType object, PhaseOffset when) override; private: + int num_subphases_; const double power_; }; // class Norm diff --git a/src/vt/vrt/collection/balance/model/raw_data.cc b/src/vt/vrt/collection/balance/model/raw_data.cc new file mode 100644 index 0000000000..5f64e64046 --- /dev/null +++ b/src/vt/vrt/collection/balance/model/raw_data.cc @@ -0,0 +1,75 @@ +/* +//@HEADER +// ***************************************************************************** +// +// raw_data.cc +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + + +#include "vt/vrt/collection/balance/model/raw_data.h" + +namespace vt { namespace vrt { namespace collection { namespace balance { + +RawData::RawData() +{ +} + +void RawData::setLoads(std::vector const* proc_load, + std::vector const* proc_subphase_load, + std::vector const* proc_comm) +{ + proc_load_ = proc_load; + proc_subphase_load_ = proc_subphase_load; + proc_comm_ = proc_comm; +} + +TimeType RawData::getWork(ElementIDType object, PhaseOffset offset) +{ + vtAssert(offset.phases < 0, + "RawData makes no predictions. Compose with NaivePersistence or some longer-range forecasting model as needed"); + + auto phase = proc_load_->size() - offset.phases; + if (offset.subphase == PhaseOffset::WHOLE_PHASE) + return proc_load_->at(phase).at(object); + else + return proc_subphase_load_->at(phase).at(object).at(offset.subphase); +} + +}}}} diff --git a/src/vt/vrt/collection/balance/model/raw_data.h b/src/vt/vrt/collection/balance/model/raw_data.h new file mode 100644 index 0000000000..2c7c8f38f1 --- /dev/null +++ b/src/vt/vrt/collection/balance/model/raw_data.h @@ -0,0 +1,76 @@ +/* +//@HEADER +// ***************************************************************************** +// +// raw_data.h +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#if !defined INCLUDED_VRT_COLLECTION_BALANCE_RAW_DATA_H +#define INCLUDED_VRT_COLLECTION_BALANCE_RAW_DATA_H + +#include "vt/vrt/collection/balance/model/load_model.h" +#include "vt/vrt/collection/balance/lb_comm.h" +#include + +namespace vt { namespace vrt { namespace collection { namespace balance { + +struct RawData : public LoadModel { + RawData(); + void updateLoads(PhaseType last_completed_phase) override { } + TimeType getWork(ElementIDType object, PhaseOffset when) override; + + void setLoads(std::vector const* proc_load, + std::vector const* proc_subphase_load, + std::vector const* proc_comm) override; + + ObjectIterator begin() override { return ObjectIterator(proc_load_->back().begin()); } + ObjectIterator end() override { return ObjectIterator(proc_load_->back().end()); } + + int getNumObjects() override { return end() - begin(); } + + // Observer pointers to the underlying data. In operation, these would be owned by ProcStats + std::vector const* proc_load_; + std::vector const* proc_subphase_load_; + std::vector const* proc_comm_; +}; // class RawData + +}}}} // end namespace + +#endif From 89ad6952b14936065c5d6dae2dfa5f4dd61bbbbd Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 20 Jul 2020 13:58:30 -0400 Subject: [PATCH 23/63] #582: Shift load model composition to shared_ptr, add API to expose base and current model --- .../collection/balance/lb_invoke/lb_manager.cc | 11 ++++++----- .../collection/balance/lb_invoke/lb_manager.h | 16 +++++++++++++--- .../collection/balance/model/comm_overhead.cc | 2 +- .../vrt/collection/balance/model/comm_overhead.h | 2 +- .../collection/balance/model/composed_model.cc | 12 ++++++------ .../collection/balance/model/composed_model.h | 4 ++-- .../balance/model/naive_persistence.cc | 2 +- .../collection/balance/model/naive_persistence.h | 2 +- src/vt/vrt/collection/balance/model/norm.cc | 2 +- src/vt/vrt/collection/balance/model/norm.h | 2 +- 10 files changed, 33 insertions(+), 22 deletions(-) diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index 213acd7c26..e97e131a76 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -69,6 +69,10 @@ namespace vt { namespace vrt { namespace collection { namespace balance { auto ptr = std::make_unique(); auto proxy = theObjGroup()->makeCollective(ptr.get()); proxy.get()->setProxy(proxy); + + ptr->base_model_ = std::make_shared(std::make_shared()); + ptr->setLoadModel(ptr->base_model_); + return ptr; } @@ -127,8 +131,8 @@ LBType LBManager::decideLBToRun(PhaseType phase, bool try_file) { return the_lb; } -void LBManager::setLoadModel(std::unique_ptr model) { - model_ = std::move(model); +void LBManager::setLoadModel(std::shared_ptr model) { + model_ = model; auto stats = theProcStats(); model_->setLoads(stats->getProcLoad(), stats->getProcSubphaseLoad(), @@ -148,9 +152,6 @@ LBManager::makeLB(MsgSharedPtr msg) { EpochType balance_epoch = theTerm()->makeEpochCollective("LBManager::balance_epoch"); EpochType migrate_epoch = theTerm()->makeEpochCollective("LBManager::migrate_epoch"); - if (model_ == nullptr) - setLoadModel(std::make_unique(new balance::RawData)); - theMsg()->pushEpoch(model_epoch); model_->updateLoads(phase); theMsg()->popEpoch(model_epoch); diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h index fb8a5acbb7..d4842e657c 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h @@ -236,14 +236,23 @@ struct LBManager : runtime::component::Component { /** * \brief Set a model of expected object loads to use in place of - * naive persistence + * the current installed model * * \param[in] model the model to apply * * This should be called with a similarly-constructed model instance * on every node */ - void setLoadModel(std::unique_ptr model); + void setLoadModel(std::shared_ptr model); + + /** + * \brief Get the system-set basic model of object load + */ + std::shared_ptr getBaseLoadModel() { return base_model_; } + /** + * \brief Get the currently installed model of object load + */ + std::shared_ptr getLoadModel() { return model_; } protected: /** @@ -265,7 +274,8 @@ struct LBManager : runtime::component::Component { bool synced_in_lb_ = true; std::vector listeners_ = {}; objgroup::proxy::Proxy proxy_; - std::unique_ptr model_; + std::shared_ptr base_model_; + std::shared_ptr model_; }; }}}} /* end namespace vt::vrt::collection::balance */ diff --git a/src/vt/vrt/collection/balance/model/comm_overhead.cc b/src/vt/vrt/collection/balance/model/comm_overhead.cc index a6cb4e47ab..79782eb106 100644 --- a/src/vt/vrt/collection/balance/model/comm_overhead.cc +++ b/src/vt/vrt/collection/balance/model/comm_overhead.cc @@ -47,7 +47,7 @@ namespace vt { namespace vrt { namespace collection { namespace balance { -CommOverhead::CommOverhead(balance::LoadModel *base) +CommOverhead::CommOverhead(std::shared_ptr base) : ComposedModel(base) { } diff --git a/src/vt/vrt/collection/balance/model/comm_overhead.h b/src/vt/vrt/collection/balance/model/comm_overhead.h index 4fbadec78c..9eae4f0cfd 100644 --- a/src/vt/vrt/collection/balance/model/comm_overhead.h +++ b/src/vt/vrt/collection/balance/model/comm_overhead.h @@ -51,7 +51,7 @@ namespace vt { namespace vrt { namespace collection { namespace balance { struct CommOverhead : public ComposedModel { - CommOverhead(balance::LoadModel *base); + CommOverhead(std::shared_ptr base); void setLoads(std::vector const* proc_load, std::vector const* proc_subphase_load, std::vector const* proc_comm) override; diff --git a/src/vt/vrt/collection/balance/model/composed_model.cc b/src/vt/vrt/collection/balance/model/composed_model.cc index 99e101f255..f60234a967 100644 --- a/src/vt/vrt/collection/balance/model/composed_model.cc +++ b/src/vt/vrt/collection/balance/model/composed_model.cc @@ -49,27 +49,27 @@ namespace vt { namespace vrt { namespace collection { namespace balance { void ComposedModel::setLoads(std::vector const* proc_load, std::vector const* proc_subphase_load, std::vector const* proc_comm) { - base_.setLoads(proc_load, proc_subphase_load, proc_comm); + base_->setLoads(proc_load, proc_subphase_load, proc_comm); } void ComposedModel::updateLoads(PhaseType last_completed_phase) { - base_.updateLoads(last_completed_phase); + base_->updateLoads(last_completed_phase); } TimeType ComposedModel::getWork(ElementIDType object, PhaseOffset when) { - return base_.getWork(object, when); + return base_->getWork(object, when); } ObjectIterator ComposedModel::begin() { - return base_.begin(); + return base_->begin(); } ObjectIterator ComposedModel::end() { - return base_.end(); + return base_->end(); } int ComposedModel::getNumObjects() { - return base_.getNumObjects(); + return base_->getNumObjects(); } }}}} diff --git a/src/vt/vrt/collection/balance/model/composed_model.h b/src/vt/vrt/collection/balance/model/composed_model.h index 06723a6383..c647bf9e8e 100644 --- a/src/vt/vrt/collection/balance/model/composed_model.h +++ b/src/vt/vrt/collection/balance/model/composed_model.h @@ -54,7 +54,7 @@ class ComposedModel : public LoadModel { public: // \param[in] base must not be null - ComposedModel(LoadModel *base) : base_(*base) {} + ComposedModel(std::shared_ptr base) : base_(base) {} void setLoads(std::vector const* proc_load, std::vector const* proc_subphase_load, @@ -70,7 +70,7 @@ class ComposedModel : public LoadModel int getNumObjects() override; private: - LoadModel &base_; + std::shared_ptr base_; }; // class ComposedModel }}}} // namespaces diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.cc b/src/vt/vrt/collection/balance/model/naive_persistence.cc index 31853bad59..0b0dc005eb 100644 --- a/src/vt/vrt/collection/balance/model/naive_persistence.cc +++ b/src/vt/vrt/collection/balance/model/naive_persistence.cc @@ -47,7 +47,7 @@ namespace vt { namespace vrt { namespace collection { namespace balance { -NaivePersistence::NaivePersistence(balance::LoadModel *base) +NaivePersistence::NaivePersistence(std::shared_ptr base) : ComposedModel(base) { } diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.h b/src/vt/vrt/collection/balance/model/naive_persistence.h index 42ae3ff9fa..fe281c67b3 100644 --- a/src/vt/vrt/collection/balance/model/naive_persistence.h +++ b/src/vt/vrt/collection/balance/model/naive_persistence.h @@ -51,7 +51,7 @@ namespace vt { namespace vrt { namespace collection { namespace balance { struct NaivePersistence : public ComposedModel { - NaivePersistence(balance::LoadModel *base); + NaivePersistence(std::shared_ptr base); TimeType getWork(ElementIDType object, PhaseOffset when) override; }; // class NaivePersistence diff --git a/src/vt/vrt/collection/balance/model/norm.cc b/src/vt/vrt/collection/balance/model/norm.cc index 242eed4e0f..11dcf9e5f8 100644 --- a/src/vt/vrt/collection/balance/model/norm.cc +++ b/src/vt/vrt/collection/balance/model/norm.cc @@ -48,7 +48,7 @@ namespace vt { namespace vrt { namespace collection { namespace balance { -Norm::Norm(balance::LoadModel *base, double power) +Norm::Norm(std::shared_ptr base, double power) : ComposedModel(base) , power_(power) { diff --git a/src/vt/vrt/collection/balance/model/norm.h b/src/vt/vrt/collection/balance/model/norm.h index a26fe0451d..d64e4dd0bb 100644 --- a/src/vt/vrt/collection/balance/model/norm.h +++ b/src/vt/vrt/collection/balance/model/norm.h @@ -62,7 +62,7 @@ class Norm : public ComposedModel { * * \param[in] power The power to use in computing the norms */ - Norm(balance::LoadModel *base, double power); + Norm(std::shared_ptr base, double power); void setLoads(std::vector const* proc_load, std::vector const* proc_subphase_load, std::vector const* proc_comm) override; From 4b83aa5885d529244e3b38a8d0f50507370c8b34 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 20 Jul 2020 14:13:22 -0400 Subject: [PATCH 24/63] #582: Factor out subphase enumeration --- src/vt/vrt/collection/balance/model/composed_model.cc | 4 ++++ src/vt/vrt/collection/balance/model/composed_model.h | 1 + src/vt/vrt/collection/balance/model/load_model.h | 1 + src/vt/vrt/collection/balance/model/norm.cc | 9 ++------- src/vt/vrt/collection/balance/model/raw_data.cc | 7 +++++++ src/vt/vrt/collection/balance/model/raw_data.h | 1 + 6 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/vt/vrt/collection/balance/model/composed_model.cc b/src/vt/vrt/collection/balance/model/composed_model.cc index f60234a967..9d879bfe24 100644 --- a/src/vt/vrt/collection/balance/model/composed_model.cc +++ b/src/vt/vrt/collection/balance/model/composed_model.cc @@ -72,4 +72,8 @@ int ComposedModel::getNumObjects() { return base_->getNumObjects(); } +int ComposedModel::getNumSubphases() { + return base_->getNumSubphases(); +} + }}}} diff --git a/src/vt/vrt/collection/balance/model/composed_model.h b/src/vt/vrt/collection/balance/model/composed_model.h index c647bf9e8e..a1662ef44a 100644 --- a/src/vt/vrt/collection/balance/model/composed_model.h +++ b/src/vt/vrt/collection/balance/model/composed_model.h @@ -68,6 +68,7 @@ class ComposedModel : public LoadModel ObjectIterator end() override; int getNumObjects() override; + int getNumSubphases() override; private: std::shared_ptr base_; diff --git a/src/vt/vrt/collection/balance/model/load_model.h b/src/vt/vrt/collection/balance/model/load_model.h index 023d0a3089..8675063eef 100644 --- a/src/vt/vrt/collection/balance/model/load_model.h +++ b/src/vt/vrt/collection/balance/model/load_model.h @@ -128,6 +128,7 @@ class LoadModel virtual ObjectIterator end() = 0; virtual int getNumObjects() = 0; + virtual int getNumSubphases() = 0; }; // class LoadModel }}}} // namespaces diff --git a/src/vt/vrt/collection/balance/model/norm.cc b/src/vt/vrt/collection/balance/model/norm.cc index 11dcf9e5f8..8d5d39163c 100644 --- a/src/vt/vrt/collection/balance/model/norm.cc +++ b/src/vt/vrt/collection/balance/model/norm.cc @@ -59,11 +59,6 @@ Norm::Norm(std::shared_ptr base, double power) void Norm::setLoads(std::vector const* proc_load, std::vector const* proc_subphase_load, std::vector const* proc_comm) { - const auto& last_phase = proc_subphase_load->back(); - const auto& an_object = *last_phase.begin(); - const auto& subphases = an_object.second; - num_subphases_ = subphases.size(); - ComposedModel::setLoads(proc_load, proc_subphase_load, proc_comm); } @@ -75,7 +70,7 @@ TimeType Norm::getWork(ElementIDType object, PhaseOffset offset) if (std::isfinite(power_)) { double sum = 0.0; - for (int i = 0; i < num_subphases_; ++i) { + for (int i = 0; i < getNumSubphases(); ++i) { auto t = ComposedModel::getWork(object, offset); sum += std::pow(t, power_); } @@ -85,7 +80,7 @@ TimeType Norm::getWork(ElementIDType object, PhaseOffset offset) // l-infinity implies a max norm double max = 0.0; - for (int i = 0; i < num_subphases_; ++i) { + for (int i = 0; i < getNumSubphases(); ++i) { auto t = ComposedModel::getWork(object, offset); max = std::max(max, t); } diff --git a/src/vt/vrt/collection/balance/model/raw_data.cc b/src/vt/vrt/collection/balance/model/raw_data.cc index 5f64e64046..77cd46231a 100644 --- a/src/vt/vrt/collection/balance/model/raw_data.cc +++ b/src/vt/vrt/collection/balance/model/raw_data.cc @@ -60,6 +60,13 @@ void RawData::setLoads(std::vector const* proc_load, proc_comm_ = proc_comm; } +int RawData::getNumSubphases() { + const auto& last_phase = proc_subphase_load_->back(); + const auto& an_object = *last_phase.begin(); + const auto& subphases = an_object.second; + return subphases.size(); +} + TimeType RawData::getWork(ElementIDType object, PhaseOffset offset) { vtAssert(offset.phases < 0, diff --git a/src/vt/vrt/collection/balance/model/raw_data.h b/src/vt/vrt/collection/balance/model/raw_data.h index 2c7c8f38f1..cfb1c54e5e 100644 --- a/src/vt/vrt/collection/balance/model/raw_data.h +++ b/src/vt/vrt/collection/balance/model/raw_data.h @@ -64,6 +64,7 @@ struct RawData : public LoadModel { ObjectIterator end() override { return ObjectIterator(proc_load_->back().end()); } int getNumObjects() override { return end() - begin(); } + int getNumSubphases() override; // Observer pointers to the underlying data. In operation, these would be owned by ProcStats std::vector const* proc_load_; From 719365126bc6e6c6995cc5ab62b2aef0e3725f3e Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Mon, 20 Jul 2020 14:50:40 -0400 Subject: [PATCH 25/63] #582: Add load model for a select subset of subphases --- .../balance/model/select_subphases.cc | 82 +++++++++++++++++++ .../balance/model/select_subphases.h | 67 +++++++++++++++ 2 files changed, 149 insertions(+) create mode 100644 src/vt/vrt/collection/balance/model/select_subphases.cc create mode 100644 src/vt/vrt/collection/balance/model/select_subphases.h diff --git a/src/vt/vrt/collection/balance/model/select_subphases.cc b/src/vt/vrt/collection/balance/model/select_subphases.cc new file mode 100644 index 0000000000..131562edf8 --- /dev/null +++ b/src/vt/vrt/collection/balance/model/select_subphases.cc @@ -0,0 +1,82 @@ +/* +//@HEADER +// ***************************************************************************** +// +// select_subphases.cc +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#include "vt/vrt/collection/balance/model/select_subphases.h" + +namespace vt { namespace vrt { namespace collection { namespace balance { + +SelectSubphases::SelectSubphases(std::shared_ptr base, std::vector subphases) + : ComposedModel(base) + , subphases_(subphases) +{ + unsigned int base_subphases = ComposedModel::getNumSubphases(); + for (auto s : subphases) { + vtAssert(s != PhaseOffset::WHOLE_PHASE, "Selecting WHOLE_PHASE as a subphase makes no sense"); + vtAssert(s < base_subphases, "Selected subphase must be within range of base's known subphases"); + } + // Don't check that the 'select' subphases are a smaller set than + // the base - allow multiple counting or other potential cleverness + //vtAssert(subphases_.size() < base_subphases, "..."); +} + +TimeType SelectSubphases::getWork(ElementIDType object, PhaseOffset when) { + if (when.subphase == PhaseOffset::WHOLE_PHASE) { + // sum up the selected subphases + TimeType sum = 0.0; + for (auto s : subphases_) { + PhaseOffset p{when.phases, s}; + sum += ComposedModel::getWork(object, p); + } + return sum; + } else { + when.subphase = subphases_.at(when.subphase); + return ComposedModel::getWork(object, when); + } +} + +int SelectSubphases::getNumSubphases() { + return subphases_.size(); +} + +}}}} diff --git a/src/vt/vrt/collection/balance/model/select_subphases.h b/src/vt/vrt/collection/balance/model/select_subphases.h new file mode 100644 index 0000000000..fef32f892d --- /dev/null +++ b/src/vt/vrt/collection/balance/model/select_subphases.h @@ -0,0 +1,67 @@ +/* +//@HEADER +// ***************************************************************************** +// +// select_subphases.h +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#if !defined INCLUDED_VRT_COLLECTION_BALANCE_SELECT_SUBPHASES_H +#define INCLUDED_VRT_COLLECTION_BALANCE_SELECT_SUBPHASES_H + +#include "vt/config.h" +#include "vt/vrt/collection/balance/model/composed_model.h" + +namespace vt { namespace vrt { namespace collection { namespace balance { + +class SelectSubphases : public ComposedModel +{ +public: + // \param[in] base must not be null + SelectSubphases(std::shared_ptr base, std::vector subphases); + + TimeType getWork(ElementIDType object, PhaseOffset when) override; + int getNumSubphases() override; + + std::vector subphases_; +}; // class SelectSubphases + +}}}} // namespaces + +#endif From b19c33ae7a610e8f7d57aca93fea3f079612dd4c Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Tue, 21 Jul 2020 09:27:09 -0400 Subject: [PATCH 26/63] #582: Document SelectSubphases --- .../collection/balance/model/select_subphases.cc | 2 +- .../collection/balance/model/select_subphases.h | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/vt/vrt/collection/balance/model/select_subphases.cc b/src/vt/vrt/collection/balance/model/select_subphases.cc index 131562edf8..c09cf2db65 100644 --- a/src/vt/vrt/collection/balance/model/select_subphases.cc +++ b/src/vt/vrt/collection/balance/model/select_subphases.cc @@ -62,7 +62,7 @@ SelectSubphases::SelectSubphases(std::shared_ptr base, std::vector base, std::vector subphases); TimeType getWork(ElementIDType object, PhaseOffset when) override; From 077136952c5c9ae3f68133655bd52182cd8f3707 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Tue, 21 Jul 2020 09:37:58 -0400 Subject: [PATCH 27/63] #582: Document NaivePersistence --- src/vt/vrt/collection/balance/model/naive_persistence.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.h b/src/vt/vrt/collection/balance/model/naive_persistence.h index fe281c67b3..3df1cdb91b 100644 --- a/src/vt/vrt/collection/balance/model/naive_persistence.h +++ b/src/vt/vrt/collection/balance/model/naive_persistence.h @@ -50,7 +50,16 @@ namespace vt { namespace vrt { namespace collection { namespace balance { +/** + * \brief Predicts future object loads as exactly matching their load + * in the last phase + */ struct NaivePersistence : public ComposedModel { + /** + * \brief Constructor + * + * \param[in] base: The source of underlying load numbers to return; must not be null + */ NaivePersistence(std::shared_ptr base); TimeType getWork(ElementIDType object, PhaseOffset when) override; }; // class NaivePersistence From e0cfcd078a4c7b83c9da48500d252bc8117c9e78 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Tue, 21 Jul 2020 09:42:28 -0400 Subject: [PATCH 28/63] #582: Document RawData model --- src/vt/vrt/collection/balance/model/raw_data.cc | 4 ---- src/vt/vrt/collection/balance/model/raw_data.h | 8 +++++++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/vt/vrt/collection/balance/model/raw_data.cc b/src/vt/vrt/collection/balance/model/raw_data.cc index 77cd46231a..f05a16a966 100644 --- a/src/vt/vrt/collection/balance/model/raw_data.cc +++ b/src/vt/vrt/collection/balance/model/raw_data.cc @@ -47,10 +47,6 @@ namespace vt { namespace vrt { namespace collection { namespace balance { -RawData::RawData() -{ -} - void RawData::setLoads(std::vector const* proc_load, std::vector const* proc_subphase_load, std::vector const* proc_comm) diff --git a/src/vt/vrt/collection/balance/model/raw_data.h b/src/vt/vrt/collection/balance/model/raw_data.h index cfb1c54e5e..21579536a7 100644 --- a/src/vt/vrt/collection/balance/model/raw_data.h +++ b/src/vt/vrt/collection/balance/model/raw_data.h @@ -51,8 +51,14 @@ namespace vt { namespace vrt { namespace collection { namespace balance { +/** + * \brief A strictly retrospective view of the recorded object timings + * + * For use with load balancers, this must typically be stacked beneath + * a model that predicts future loads, such as NaivePersistence + */ struct RawData : public LoadModel { - RawData(); + RawData() = default; void updateLoads(PhaseType last_completed_phase) override { } TimeType getWork(ElementIDType object, PhaseOffset when) override; From 81b95c4c8f20e9d4ff3637f8ba345f04475f879f Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Tue, 21 Jul 2020 09:52:14 -0400 Subject: [PATCH 29/63] #582: Document CommOverhead model --- src/vt/vrt/collection/balance/model/comm_overhead.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/vt/vrt/collection/balance/model/comm_overhead.h b/src/vt/vrt/collection/balance/model/comm_overhead.h index 9eae4f0cfd..ad54d23a80 100644 --- a/src/vt/vrt/collection/balance/model/comm_overhead.h +++ b/src/vt/vrt/collection/balance/model/comm_overhead.h @@ -50,7 +50,17 @@ namespace vt { namespace vrt { namespace collection { namespace balance { +/** + * \brief Add some implied, unaccounted work time for communication activity + * + * Not yet implemented - will abort at runtime + */ struct CommOverhead : public ComposedModel { + /** + * \brief Constructor + * + * \param[in] base: the underlying source of object work loads + */ CommOverhead(std::shared_ptr base); void setLoads(std::vector const* proc_load, std::vector const* proc_subphase_load, From c163bf1511e7eac848a0c5b84a8bc20b16fe02ca Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Tue, 21 Jul 2020 09:56:45 -0400 Subject: [PATCH 30/63] #582: Document LoadModel interface --- src/vt/vrt/collection/balance/model/load_model.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/model/load_model.h b/src/vt/vrt/collection/balance/model/load_model.h index 8675063eef..05fd348a1f 100644 --- a/src/vt/vrt/collection/balance/model/load_model.h +++ b/src/vt/vrt/collection/balance/model/load_model.h @@ -85,6 +85,11 @@ class ObjectIterator { difference_type operator-(ObjectIterator rhs) { return std::distance(rhs.i, i); } }; +/** + * \brief Interface for transforming measurements of past object loads + * into predictions of future object load for load balancing + * strategies + */ class LoadModel { public: @@ -92,7 +97,7 @@ class LoadModel virtual ~LoadModel() = default; /** - * \internal \brief Initialize the model instance with pointers to the measured load data + * \brief Initialize the model instance with pointers to the measured load data * * This would typically be called by LBManager when the user has * passed a new model instance for a collection From 9f9ef2246cb929c80456560f21ad61f035f07955 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Tue, 21 Jul 2020 09:59:56 -0400 Subject: [PATCH 31/63] #582: Document ComposedModel --- src/vt/vrt/collection/balance/model/composed_model.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/vt/vrt/collection/balance/model/composed_model.h b/src/vt/vrt/collection/balance/model/composed_model.h index a1662ef44a..f306d08cdf 100644 --- a/src/vt/vrt/collection/balance/model/composed_model.h +++ b/src/vt/vrt/collection/balance/model/composed_model.h @@ -50,6 +50,15 @@ namespace vt { namespace vrt { namespace collection { namespace balance { +/** + * \brief Utility class to support implementation of composable load + * modeling components + * + * All model implementations meant to compose with arbitrary other + * models should inherit from this class. It implements all methods by + * calling the same method on the underlying model passed at + * construction. + */ class ComposedModel : public LoadModel { public: From 4c1181cfa78fbd3d656c2ec4a9a8057e6c4165da Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Tue, 21 Jul 2020 11:02:01 -0400 Subject: [PATCH 32/63] #582: Fix alignment --- src/vt/vrt/collection/balance/model/composed_model.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vt/vrt/collection/balance/model/composed_model.h b/src/vt/vrt/collection/balance/model/composed_model.h index f306d08cdf..e323d7818b 100644 --- a/src/vt/vrt/collection/balance/model/composed_model.h +++ b/src/vt/vrt/collection/balance/model/composed_model.h @@ -66,8 +66,8 @@ class ComposedModel : public LoadModel ComposedModel(std::shared_ptr base) : base_(base) {} void setLoads(std::vector const* proc_load, - std::vector const* proc_subphase_load, - std::vector const* proc_comm) override; + std::vector const* proc_subphase_load, + std::vector const* proc_comm) override; void updateLoads(PhaseType last_completed_phase) override; From 67948cdbdae5243d5877fd2deea626abe1882545 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Tue, 21 Jul 2020 11:03:35 -0400 Subject: [PATCH 33/63] #582: Fix alignment --- src/vt/vrt/collection/balance/model/load_model.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vt/vrt/collection/balance/model/load_model.h b/src/vt/vrt/collection/balance/model/load_model.h index 05fd348a1f..220a991689 100644 --- a/src/vt/vrt/collection/balance/model/load_model.h +++ b/src/vt/vrt/collection/balance/model/load_model.h @@ -103,8 +103,8 @@ class LoadModel * passed a new model instance for a collection */ virtual void setLoads(std::vector const* proc_load, - std::vector const* proc_subphase_load, - std::vector const* proc_comm) = 0; + std::vector const* proc_subphase_load, + std::vector const* proc_comm) = 0; /** * \brief Signals that load data for a new phase is available From f418fe5eeabbd0f24c5d6d26b2a7871169414a7e Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Tue, 21 Jul 2020 11:33:02 -0400 Subject: [PATCH 34/63] #582: Add PerCollection model --- .../balance/model/per_collection.cc | 84 ++++++++++++++++++ .../collection/balance/model/per_collection.h | 88 +++++++++++++++++++ 2 files changed, 172 insertions(+) create mode 100644 src/vt/vrt/collection/balance/model/per_collection.cc create mode 100644 src/vt/vrt/collection/balance/model/per_collection.h diff --git a/src/vt/vrt/collection/balance/model/per_collection.cc b/src/vt/vrt/collection/balance/model/per_collection.cc new file mode 100644 index 0000000000..046aea16d8 --- /dev/null +++ b/src/vt/vrt/collection/balance/model/per_collection.cc @@ -0,0 +1,84 @@ +/* +//@HEADER +// ***************************************************************************** +// +// per_collection.cc +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#include "vt/vrt/collection/balance/model/per_collection.h" + +namespace vt { namespace vrt { namespace collection { namespace balance { + +PerCollection::PerCollection(std::shared_ptr base) + : ComposedModel(base) +{ } + +void PerCollection::addModel(CollectionID collection, std::shared_ptr model) +{ + models_[collection] = model; +} + +void PerCollection::setLoads(std::vector const* proc_load, + std::vector const* proc_subphase_load, + std::vector const* proc_comm) { + for (auto& m : models_) + m.second->setLoads(proc_load, proc_subphase_load, proc_comm); + ComposedModel::setLoads(proc_load, proc_subphase_load, proc_comm); +} + +void PerCollection::updateLoads(PhaseType last_completed_phase) { + for (auto& m : models_) + m.second->updateLoads(last_completed_phase); + ComposedModel::updateLoads(last_completed_phase); +} + +TimeType PerCollection::getWork(ElementIDType object, PhaseOffset when) { +#if 0 + // See if some specific model has been given for the object in question + auto mi = models_.find(getCollectionID(object)); + if (mi != models_.end()) + return mi->second->getWork(object, when); +#endif + + // Otherwise, default to the given base model + return ComposedModel::getWork(object, when); +} + +}}}} diff --git a/src/vt/vrt/collection/balance/model/per_collection.h b/src/vt/vrt/collection/balance/model/per_collection.h new file mode 100644 index 0000000000..f2b8839590 --- /dev/null +++ b/src/vt/vrt/collection/balance/model/per_collection.h @@ -0,0 +1,88 @@ +/* +//@HEADER +// ***************************************************************************** +// +// per_collection.h +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#if !defined INCLUDED_VRT_COLLECTION_BALANCE_PER_COLLECTION_H +#define INCLUDED_VRT_COLLECTION_BALANCE_PER_COLLECTION_H + +#include "vt/config.h" +#include "vt/vrt/collection/balance/model/composed_model.h" +#include + +namespace vt { namespace vrt { namespace collection { namespace balance { + +/** + * \brief Selects an underlying model to call corresponding to the + * collection containing the queried object + */ +struct PerCollection : public ComposedModel +{ + using CollectionID = int; + + /** + * \param[in] base The underlying default model. Used to give loads + * for objects in unspecified collections, and for object and + * subphase enumeration + */ + PerCollection(std::shared_ptr base); + + /** + * \brief Add a model for objects in a specific collection + */ + void addModel(CollectionID collection, std::shared_ptr model); + + void setLoads(std::vector const* proc_load, + std::vector const* proc_subphase_load, + std::vector const* proc_comm) override; + + void updateLoads(PhaseType last_completed_phase) override; + + TimeType getWork(ElementIDType object, PhaseOffset when) override; + +private: + std::unordered_map> models_; +}; // class PerCollection + +}}}} // namespaces + +#endif From 22bf002a4fefe2db1795c824d63ee7cd19aafff1 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Tue, 21 Jul 2020 12:31:06 -0400 Subject: [PATCH 35/63] #582: Add a forecast model giving the median over the last N phases --- .../model/persistence_median_last_n.cc | 80 ++++++++++++++++++ .../balance/model/persistence_median_last_n.h | 81 +++++++++++++++++++ 2 files changed, 161 insertions(+) create mode 100644 src/vt/vrt/collection/balance/model/persistence_median_last_n.cc create mode 100644 src/vt/vrt/collection/balance/model/persistence_median_last_n.h diff --git a/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc b/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc new file mode 100644 index 0000000000..ae44d5d11d --- /dev/null +++ b/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc @@ -0,0 +1,80 @@ +/* +//@HEADER +// ***************************************************************************** +// +// persistence_median_last_n.cc +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#include "vt/config.h" +#include "vt/vrt/collection/balance/model/persistence_median_last_n.h" +#include + +namespace vt { namespace vrt { namespace collection { namespace balance { + +PersistenceMedianLastN::PersistenceMedianLastN(std::shared_ptr base, int n) + : ComposedModel(base) + , n_(n) +{ + vtAssert(n > 0, "Cannot take a median over no phases"); +} + +TimeType PersistenceMedianLastN::getWork(ElementIDType object, PhaseOffset when) +{ + // Retrospective queries don't call for a prospective calculation + if (when.phases < 0) + return ComposedModel::getWork(object, when); + + int phases = std::min(n_, completed_phases_); + std::vector times(phases); + for (int i = 1; i <= phases; ++i) { + PhaseOffset p{-1*i, when.subphase}; + TimeType t = ComposedModel::getWork(object, p); + times[i-1] = t; + } + + std::sort(times.begin(), times.end()); + + if (phases % 2 == 1) + return times[phases / 2 + 1]; + else + return (times[phases / 2] + times[phases / 2 + 1]) / 2; +} + +}}}} diff --git a/src/vt/vrt/collection/balance/model/persistence_median_last_n.h b/src/vt/vrt/collection/balance/model/persistence_median_last_n.h new file mode 100644 index 0000000000..947a20f85f --- /dev/null +++ b/src/vt/vrt/collection/balance/model/persistence_median_last_n.h @@ -0,0 +1,81 @@ +/* +//@HEADER +// ***************************************************************************** +// +// persistence_median_last_n.h +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#if !defined INCLUDED_VRT_COLLECTION_BALANCE_PERSISTENCE_MEDIAN_LAST_N_H +#define INCLUDED_VRT_COLLECTION_BALANCE_PERSISTENCE_MEDIAN_LAST_N_H + +#include "vt/config.h" +#include "vt/vrt/collection/balance/model/composed_model.h" +#include + +namespace vt { namespace vrt { namespace collection { namespace balance { + +/** + * \brief Predicts loads as the median of the last N phases + * + * This may be useful as a rudimentary form of noise filtering, or in + * general where a single phase worth of timings might not be ideally + * representative + */ +struct PersistenceMedianLastN : public ComposedModel +{ + /** + * \param[in] base The underlying model + * \param[in] n the number of preceding phases to use in making a prediction + */ + PersistenceMedianLastN(std::shared_ptr base, int n); + + void updateLoads(PhaseType last_completed_phase) override + { completed_phases_ = last_completed_phase; } + + TimeType getWork(ElementIDType object, PhaseOffset when) override; + +private: + int n_; + int completed_phases_; +}; // class PersistenceMedianLastN + +}}}} // namespaces + +#endif From db1dd9ee2255704993f5f49403fa5fe5856cc95f Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Tue, 21 Jul 2020 19:47:58 -0400 Subject: [PATCH 36/63] #582: Norm: Improve comments --- src/vt/vrt/collection/balance/model/norm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/model/norm.h b/src/vt/vrt/collection/balance/model/norm.h index d64e4dd0bb..0d27228b5f 100644 --- a/src/vt/vrt/collection/balance/model/norm.h +++ b/src/vt/vrt/collection/balance/model/norm.h @@ -60,7 +60,8 @@ class Norm : public ComposedModel { /** * \brief Constructor * - * \param[in] power The power to use in computing the norms + * \param[in] power The power to use in computing the norms. Must be + * >0. Pass +infinity for a `max` over subphases */ Norm(std::shared_ptr base, double power); void setLoads(std::vector const* proc_load, From 9a8b5c8a5001249ede1924554ec3c83759e79773 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Tue, 21 Jul 2020 19:48:20 -0400 Subject: [PATCH 37/63] #582: Docs: Fix spelling errors --- docs/md/lb-manager.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/md/lb-manager.md b/docs/md/lb-manager.md index f097f8268e..112ea14846 100644 --- a/docs/md/lb-manager.md +++ b/docs/md/lb-manager.md @@ -19,13 +19,13 @@ To run a load balancer at runtime: The LB specification file allows users to specify which load balancer along with which LB-specific configuration parameters are passed to the load balancer -instance for any given phase. The order of the LB phase specficiation lines in +instance for any given phase. The order of the LB phase specification lines in the file disambiguates lines---higher precedence for earlier lines. The format of the LB specification file is: \code -[%] <$phase> <$lbname> [$LB-specific-arg-1] ... [$LB-specfic-arg-N] +[%] <$phase> <$lbname> [$LB-specific-arg-1] ... [$LB-specific-arg-N] \endcode If a `%` is present, the line matches phases where: From ea8e005dedf54af9180450d5a986a50b59cebc9f Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Tue, 21 Jul 2020 19:49:32 -0400 Subject: [PATCH 38/63] #582: Add high-level documentation of load models --- docs/md/lb-manager.md | 51 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/docs/md/lb-manager.md b/docs/md/lb-manager.md index 112ea14846..cd14a01268 100644 --- a/docs/md/lb-manager.md +++ b/docs/md/lb-manager.md @@ -57,3 +57,54 @@ The following is an example LB specification: | HierarchicalLB | Hierarchical | Build tree to move objects nodes | `vt::vrt::collection::lb::HierarchicalLB` | | ZotltanLB | Hyper-graph Partitioner | Run Zoltan in hyper-graph mode to LB | `vt::vrt::collection::lb::ZoltanLB` | | StatsMapLB | User-specified | Read file to determine mapping | `vt::vrt::collection::lb::StatsMapLB` | + +\section load-models Object Load Models + +The performance-oriented load balancers described in the preceding +section require a prediction of the loads each object will represent +during the phases between one load balancing invocation and the +next. These predictions are provided by load models, which are +implementations of the `vt::vrt:collection::balance::LoadModel` +interface. There are a number of general-purpose load model +implementations provided by vt. + +By default, vt uses a load model that predicts each object's work load +for all future phases will match its workload in the most recent past +phase. The system also provides an interface for applications and +users to arrange use of a non-default load model where that may be +desirable for reasons such as performance experimentation, +specialization to application details, or execution environment +considerations. + +Most provided load models are designed as composable filters inherited +from the `vt::vrt:collection::balance::ComposedModel` class. This +allows them to form a 'model stack' in which each class makes a +particular adjustment to the predictions generated, and relies on +others above and below to refine them further. One exception is the +`vt::vrt:collection::balance::RawData` model, which directly returns +past values recorded in the instrumented statistics structures that +`LBManager` provides. + +To illustrate the design concept, the default model is implemented as +a stack of two other components. At the base of the stack is a +`RawData` model that will refer to the instrumented object load +timings recorded by the system during preceding execution. Layered on +that is a `vt::vrt:collection::balance::NaivePersistence` model that +queries the underlying `RawData` model for the times taken in the most +recent phase, and returns those same times as its prediction of the +times those objects will take in all future phases. + +The full set of load model classes provided with vt is as follows + +| Load Model | Description | Reference | +| -------------------|-----------------------------------------------------|---------- | +| LoadModel | Pure virtual interface class, which the following implement | `vt::vrt:collection::balance::LoadModel` | +| RawData | Returns historical data only, from the measured times | `vt::vrt:collection::balance::RawData` | +| NaivePersistence | Passes through historical queries, and maps all future queries to the most recent past phase | `vt::vrt:collection::balance::NaivePersistence` | +| PersistenceMedianLastN | Similar to NaivePersistence, except that it predicts based on a median over the N most recent phases | `vt::vrt:collection::balance::PersistenceMedianLastN` | +| Norm | When asked for a `WHOLE_PHASE` value, computes a specified l-norm over all subphases | `vt::vrt:collection::balance::Norm` | +| SelectSubphases | Filters and remaps the subphases with data present in the underlying model | `vt::vrt:collection::balance::SelectSubphases` | +| CommOverhead + | Adds a specified amount of imputed 'system overhead' time to each object's work based on the number of messages receives | `vt::vrt:collection::balance::CommOverhead` | +| PerCollection + | Maintains a set of load models associated with different collection instances, and passes queries for an object through to the model corresponding to its collection | `vt::vrt:collection::balance::PerCollection` | + ++: This model is not yet implemented From f5fc311447bf993705862fa0c66c5327048e8905 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Tue, 21 Jul 2020 20:04:12 -0400 Subject: [PATCH 39/63] #582: Add `explicit` annotations on single-argument constructors not meant for conversions --- src/vt/vrt/collection/balance/model/comm_overhead.h | 2 +- src/vt/vrt/collection/balance/model/composed_model.h | 2 +- src/vt/vrt/collection/balance/model/load_model.h | 2 +- src/vt/vrt/collection/balance/model/naive_persistence.h | 2 +- src/vt/vrt/collection/balance/model/per_collection.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/vt/vrt/collection/balance/model/comm_overhead.h b/src/vt/vrt/collection/balance/model/comm_overhead.h index ad54d23a80..1ec5921797 100644 --- a/src/vt/vrt/collection/balance/model/comm_overhead.h +++ b/src/vt/vrt/collection/balance/model/comm_overhead.h @@ -61,7 +61,7 @@ struct CommOverhead : public ComposedModel { * * \param[in] base: the underlying source of object work loads */ - CommOverhead(std::shared_ptr base); + explicit CommOverhead(std::shared_ptr base); void setLoads(std::vector const* proc_load, std::vector const* proc_subphase_load, std::vector const* proc_comm) override; diff --git a/src/vt/vrt/collection/balance/model/composed_model.h b/src/vt/vrt/collection/balance/model/composed_model.h index e323d7818b..25f19659cf 100644 --- a/src/vt/vrt/collection/balance/model/composed_model.h +++ b/src/vt/vrt/collection/balance/model/composed_model.h @@ -63,7 +63,7 @@ class ComposedModel : public LoadModel { public: // \param[in] base must not be null - ComposedModel(std::shared_ptr base) : base_(base) {} + explicit ComposedModel(std::shared_ptr base) : base_(base) {} void setLoads(std::vector const* proc_load, std::vector const* proc_subphase_load, diff --git a/src/vt/vrt/collection/balance/model/load_model.h b/src/vt/vrt/collection/balance/model/load_model.h index 220a991689..8d9871a553 100644 --- a/src/vt/vrt/collection/balance/model/load_model.h +++ b/src/vt/vrt/collection/balance/model/load_model.h @@ -78,7 +78,7 @@ class ObjectIterator { map_iterator_type i; public: - ObjectIterator(map_iterator_type in) : i(in) { } + explicit ObjectIterator(map_iterator_type in) : i(in) { } void operator++() { ++i; } value_type operator*() { return i->first; } bool operator!=(ObjectIterator rhs) { return i != rhs.i; } diff --git a/src/vt/vrt/collection/balance/model/naive_persistence.h b/src/vt/vrt/collection/balance/model/naive_persistence.h index 3df1cdb91b..ed53b46d1b 100644 --- a/src/vt/vrt/collection/balance/model/naive_persistence.h +++ b/src/vt/vrt/collection/balance/model/naive_persistence.h @@ -60,7 +60,7 @@ struct NaivePersistence : public ComposedModel { * * \param[in] base: The source of underlying load numbers to return; must not be null */ - NaivePersistence(std::shared_ptr base); + explicit NaivePersistence(std::shared_ptr base); TimeType getWork(ElementIDType object, PhaseOffset when) override; }; // class NaivePersistence diff --git a/src/vt/vrt/collection/balance/model/per_collection.h b/src/vt/vrt/collection/balance/model/per_collection.h index f2b8839590..0b0ea0bdb6 100644 --- a/src/vt/vrt/collection/balance/model/per_collection.h +++ b/src/vt/vrt/collection/balance/model/per_collection.h @@ -64,7 +64,7 @@ struct PerCollection : public ComposedModel * for objects in unspecified collections, and for object and * subphase enumeration */ - PerCollection(std::shared_ptr base); + explicit PerCollection(std::shared_ptr base); /** * \brief Add a model for objects in a specific collection From 0fb3b02e7d894d3afb774482066a84d6a1ffd610 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Tue, 21 Jul 2020 20:04:52 -0400 Subject: [PATCH 40/63] #582: Format docs a bit better --- docs/md/lb-manager.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/md/lb-manager.md b/docs/md/lb-manager.md index cd14a01268..f9b4e831a1 100644 --- a/docs/md/lb-manager.md +++ b/docs/md/lb-manager.md @@ -66,9 +66,9 @@ during the phases between one load balancing invocation and the next. These predictions are provided by load models, which are implementations of the `vt::vrt:collection::balance::LoadModel` interface. There are a number of general-purpose load model -implementations provided by vt. +implementations provided by \vt. -By default, vt uses a load model that predicts each object's work load +By default, \vt uses a load model that predicts each object's work load for all future phases will match its workload in the most recent past phase. The system also provides an interface for applications and users to arrange use of a non-default load model where that may be @@ -94,7 +94,7 @@ queries the underlying `RawData` model for the times taken in the most recent phase, and returns those same times as its prediction of the times those objects will take in all future phases. -The full set of load model classes provided with vt is as follows +The full set of load model classes provided with \vt is as follows | Load Model | Description | Reference | | -------------------|-----------------------------------------------------|---------- | From fb2922b518520884b81eae0aafda89c796dab592 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Tue, 21 Jul 2020 20:05:51 -0400 Subject: [PATCH 41/63] #582: Fix typo in docs --- docs/md/lb-manager.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/md/lb-manager.md b/docs/md/lb-manager.md index f9b4e831a1..cf0beeda6e 100644 --- a/docs/md/lb-manager.md +++ b/docs/md/lb-manager.md @@ -104,7 +104,7 @@ The full set of load model classes provided with \vt is as follows | PersistenceMedianLastN | Similar to NaivePersistence, except that it predicts based on a median over the N most recent phases | `vt::vrt:collection::balance::PersistenceMedianLastN` | | Norm | When asked for a `WHOLE_PHASE` value, computes a specified l-norm over all subphases | `vt::vrt:collection::balance::Norm` | | SelectSubphases | Filters and remaps the subphases with data present in the underlying model | `vt::vrt:collection::balance::SelectSubphases` | -| CommOverhead + | Adds a specified amount of imputed 'system overhead' time to each object's work based on the number of messages receives | `vt::vrt:collection::balance::CommOverhead` | +| CommOverhead + | Adds a specified amount of imputed 'system overhead' time to each object's work based on the number of messages received | `vt::vrt:collection::balance::CommOverhead` | | PerCollection + | Maintains a set of load models associated with different collection instances, and passes queries for an object through to the model corresponding to its collection | `vt::vrt:collection::balance::PerCollection` | +: This model is not yet implemented From aafbc34da7bbae97807d307f6b0171fea995c015 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 21 Jul 2020 17:13:29 -0700 Subject: [PATCH 42/63] #582: Implement collection proxy lookup for element IDs during LB --- src/vt/vrt/collection/balance/proc_stats.cc | 15 +++++++++++++++ src/vt/vrt/collection/balance/proc_stats.h | 12 ++++++++++++ 2 files changed, 27 insertions(+) diff --git a/src/vt/vrt/collection/balance/proc_stats.cc b/src/vt/vrt/collection/balance/proc_stats.cc index 19bc1e8fc7..e906eb5152 100644 --- a/src/vt/vrt/collection/balance/proc_stats.cc +++ b/src/vt/vrt/collection/balance/proc_stats.cc @@ -144,6 +144,7 @@ void ProcStats::startIterCleanup() { ProcStats::proc_migrate_.clear(); ProcStats::proc_temp_to_perm_.clear(); ProcStats::proc_perm_to_temp_.clear(); + proc_collection_lookup_.clear(); } ElementIDType ProcStats::getNextElm() { @@ -319,7 +320,21 @@ ElementIDType ProcStats::addProcStats( }) ); } + + auto const col_proxy = col_elm->getProxy(); + proc_collection_lookup_[temp_id] = col_proxy; + return temp_id; } +VirtualProxyType ProcStats::getCollectionProxyForElement( + ElementIDType temp_id +) const { + auto iter = proc_collection_lookup_.find(temp_id); + if (iter == proc_collection_lookup_.end()) { + return no_vrt_proxy; + } + return iter->second; +} + }}}} /* end namespace vt::vrt::collection::balance */ diff --git a/src/vt/vrt/collection/balance/proc_stats.h b/src/vt/vrt/collection/balance/proc_stats.h index 5a5f499102..a6b4310319 100644 --- a/src/vt/vrt/collection/balance/proc_stats.h +++ b/src/vt/vrt/collection/balance/proc_stats.h @@ -227,6 +227,16 @@ struct ProcStats : runtime::component::Component { */ ElementIDType permToTemp(ElementIDType perm_id) const; + /** + * \internal \brief Get the collection proxy for a given element ID + * + * \param[in] temp_id the temporary ID for the element for a given phase + * + * \return the virtual proxy if the element is part of the collection; + * otherwise \c no_vrt_proxy + */ + VirtualProxyType getCollectionProxyForElement(ElementIDType temp_id) const; + private: /** * \internal \brief Create the stats file @@ -251,6 +261,8 @@ struct ProcStats : runtime::component::Component { std::unordered_map proc_temp_to_perm_; /// Map of permanent ID to temporary ID std::unordered_map proc_perm_to_temp_; + /// Map from element ID to the collection's virtual proxy (untyped) + std::unordered_map proc_collection_lookup_; /// Processor communication graph for each local object std::vector proc_comm_; /// The current element ID From 77157587e6014214c4eb037cdf37427d12ab3ca1 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 21 Jul 2020 17:19:23 -0700 Subject: [PATCH 43/63] #582: Add the lookup for collection-based models in PerCollection --- src/vt/vrt/collection/balance/model/per_collection.cc | 9 ++++----- src/vt/vrt/collection/balance/model/per_collection.h | 7 +++++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/vt/vrt/collection/balance/model/per_collection.cc b/src/vt/vrt/collection/balance/model/per_collection.cc index 046aea16d8..f5b9237c8e 100644 --- a/src/vt/vrt/collection/balance/model/per_collection.cc +++ b/src/vt/vrt/collection/balance/model/per_collection.cc @@ -43,6 +43,7 @@ */ #include "vt/vrt/collection/balance/model/per_collection.h" +#include "vt/vrt/collection/balance/proc_stats.h" namespace vt { namespace vrt { namespace collection { namespace balance { @@ -50,9 +51,9 @@ PerCollection::PerCollection(std::shared_ptr base) : ComposedModel(base) { } -void PerCollection::addModel(CollectionID collection, std::shared_ptr model) +void PerCollection::addModel(CollectionID proxy, std::shared_ptr model) { - models_[collection] = model; + models_[proxy] = model; } void PerCollection::setLoads(std::vector const* proc_load, @@ -70,12 +71,10 @@ void PerCollection::updateLoads(PhaseType last_completed_phase) { } TimeType PerCollection::getWork(ElementIDType object, PhaseOffset when) { -#if 0 // See if some specific model has been given for the object in question - auto mi = models_.find(getCollectionID(object)); + auto mi = models_.find(theProcStats()->getCollectionProxyForElement(object)); if (mi != models_.end()) return mi->second->getWork(object, when); -#endif // Otherwise, default to the given base model return ComposedModel::getWork(object, when); diff --git a/src/vt/vrt/collection/balance/model/per_collection.h b/src/vt/vrt/collection/balance/model/per_collection.h index 0b0ea0bdb6..d7ff37d4fd 100644 --- a/src/vt/vrt/collection/balance/model/per_collection.h +++ b/src/vt/vrt/collection/balance/model/per_collection.h @@ -57,7 +57,7 @@ namespace vt { namespace vrt { namespace collection { namespace balance { */ struct PerCollection : public ComposedModel { - using CollectionID = int; + using CollectionID = VirtualProxyType; /** * \param[in] base The underlying default model. Used to give loads @@ -68,8 +68,11 @@ struct PerCollection : public ComposedModel /** * \brief Add a model for objects in a specific collection + * + * \param[in] proxy the virtual proxy of the collection + * \param[in] model the associated model for the particular collection */ - void addModel(CollectionID collection, std::shared_ptr model); + void addModel(CollectionID proxy, std::shared_ptr model); void setLoads(std::vector const* proc_load, std::vector const* proc_subphase_load, From 7d2f8c68889a458009dc13fc38da937ba8e155ed Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 21 Jul 2020 19:18:23 -0700 Subject: [PATCH 44/63] #582: Write a test for PerCollection models --- .../test_model_per_collection.extended.cc | 168 ++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 tests/unit/collection/test_model_per_collection.extended.cc diff --git a/tests/unit/collection/test_model_per_collection.extended.cc b/tests/unit/collection/test_model_per_collection.extended.cc new file mode 100644 index 0000000000..d87af5996b --- /dev/null +++ b/tests/unit/collection/test_model_per_collection.extended.cc @@ -0,0 +1,168 @@ +/* +//@HEADER +// ***************************************************************************** +// +// test_model_per_collection.extended.cc +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#include +#include +#include +#include + +#include + +#include "test_parallel_harness.h" + +#include + +namespace vt { namespace tests { namespace unit { + +static constexpr std::size_t data1_len = 1024; +static constexpr std::size_t data2_len = 64; + +struct TestCol1 : vt::Collection { }; +struct TestCol2 : vt::Collection { }; + +using TestModelPerCollection = TestParallelHarness; + +static constexpr int32_t const num_elms = 64; + +using vt::vrt::collection::balance::ComposedModel; +using vt::vrt::collection::balance::LoadModel; +using vt::vrt::collection::balance::ElementIDType; +using vt::vrt::collection::balance::PhaseOffset; +using vt::vrt::collection::balance::PerCollection; + +struct ConstantTestModel : ComposedModel { + + ConstantTestModel(std::shared_ptr in_base, VirtualProxyType in_proxy) + : vt::vrt::collection::balance::ComposedModel(in_base), + proxy_(in_proxy) + { } + + TimeType getWork(ElementIDType, PhaseOffset) override { + return static_cast(proxy_); + } + +private: + VirtualProxyType proxy_ = 0; +}; + +template +struct MyMsg : vt::CollectionMessage { }; + +std::unordered_map id_proxy_map; + +template +void colHandler(MyMsg*, ColT* col) { + // do nothing, except setting up our map using the temp ID, which will hit + // every node + id_proxy_map[col->getTempID()] = col->getProxy(); +} + +TEST_F(TestModelPerCollection, test_model_per_collection_1) { + id_proxy_map = {}; + + // We must have more or equal number of elements than nodes for this test to + // work properly + EXPECT_GE(num_elms, vt::theContext()->getNumNodes()); + + auto range = vt::Index1D(num_elms); + + vt::vrt::collection::CollectionProxy proxy1; + vt::vrt::collection::CollectionProxy proxy2; + + // Construct two collections + runInEpochCollective([&]{ + proxy1 = vt::theCollection()->constructCollective( + range, [](vt::Index1D){ return std::make_unique(); } + ); + proxy2 = vt::theCollection()->constructCollective( + range, [](vt::Index1D){ return std::make_unique(); } + ); + }); + + // Get the base model, assert it's valid + auto base = theLBManager()->getBaseLoadModel(); + EXPECT_NE(base, nullptr); + + // Create a new PerCollection model + auto per_col = std::make_shared(base); + + // Add two distinct models for each collection that return the proxy for the + // amount of work + auto proxy1_untyped = proxy1.getProxy(); + auto proxy2_untyped = proxy2.getProxy(); + per_col->addModel( + proxy1_untyped, std::make_shared(base, proxy1_untyped) + ); + per_col->addModel( + proxy2_untyped, std::make_shared(base, proxy2_untyped) + ); + + // Set the new model + theLBManager()->setLoadModel(per_col); + + // Do some work. + runInEpochCollective([&]{ + auto this_node = vt::theContext()->getNode(); + if (this_node == 0) { + proxy1.broadcast, colHandler>(); + proxy2.broadcast, colHandler>(); + } + }); + + // Go to the next phase. + runInEpochCollective([&]{ + vt::theCollection()->startPhaseCollective(nullptr); + }); + + // Test the model, which should be per-collection and return the proxy. + auto model = theLBManager()->getLoadModel(); + for (auto&& obj : *model) { + auto work_val = model->getWork(obj, PhaseOffset{}); + EXPECT_EQ(work_val, static_cast(id_proxy_map[obj])); + //fmt::print("{:x} {}\n", obj, work_val); + } + +} + +}}} // end namespace vt::tests::unit From 63f53e1cf9189f4470a01661d049e783775ee012 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 21 Jul 2020 19:29:48 -0700 Subject: [PATCH 45/63] #582: Update docs that PerCollection is implemented --- docs/md/lb-manager.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/md/lb-manager.md b/docs/md/lb-manager.md index cf0beeda6e..ef7f4f6f1f 100644 --- a/docs/md/lb-manager.md +++ b/docs/md/lb-manager.md @@ -105,6 +105,6 @@ The full set of load model classes provided with \vt is as follows | Norm | When asked for a `WHOLE_PHASE` value, computes a specified l-norm over all subphases | `vt::vrt:collection::balance::Norm` | | SelectSubphases | Filters and remaps the subphases with data present in the underlying model | `vt::vrt:collection::balance::SelectSubphases` | | CommOverhead + | Adds a specified amount of imputed 'system overhead' time to each object's work based on the number of messages received | `vt::vrt:collection::balance::CommOverhead` | -| PerCollection + | Maintains a set of load models associated with different collection instances, and passes queries for an object through to the model corresponding to its collection | `vt::vrt:collection::balance::PerCollection` | +| PerCollection | Maintains a set of load models associated with different collection instances, and passes queries for an object through to the model corresponding to its collection | `vt::vrt:collection::balance::PerCollection` | +: This model is not yet implemented From d40d9fd44e8c86f9cb2c5f7819353d826e3d7d4d Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 21 Jul 2020 19:30:00 -0700 Subject: [PATCH 46/63] #582: Remove some dead code from test (copy-paste error) --- tests/unit/collection/test_model_per_collection.extended.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/unit/collection/test_model_per_collection.extended.cc b/tests/unit/collection/test_model_per_collection.extended.cc index d87af5996b..5642fa66b5 100644 --- a/tests/unit/collection/test_model_per_collection.extended.cc +++ b/tests/unit/collection/test_model_per_collection.extended.cc @@ -55,9 +55,6 @@ namespace vt { namespace tests { namespace unit { -static constexpr std::size_t data1_len = 1024; -static constexpr std::size_t data2_len = 64; - struct TestCol1 : vt::Collection { }; struct TestCol2 : vt::Collection { }; From 39a419629c961f3a370ac4e518f21ad52f72ba00 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 21 Jul 2020 20:03:38 -0700 Subject: [PATCH 47/63] #582: Implement a basic comm overhead model --- .../collection/balance/model/comm_overhead.cc | 39 ++++++++++++------- .../collection/balance/model/comm_overhead.h | 16 ++++++-- 2 files changed, 36 insertions(+), 19 deletions(-) diff --git a/src/vt/vrt/collection/balance/model/comm_overhead.cc b/src/vt/vrt/collection/balance/model/comm_overhead.cc index 79782eb106..c34e98c6d1 100644 --- a/src/vt/vrt/collection/balance/model/comm_overhead.cc +++ b/src/vt/vrt/collection/balance/model/comm_overhead.cc @@ -47,10 +47,13 @@ namespace vt { namespace vrt { namespace collection { namespace balance { -CommOverhead::CommOverhead(std::shared_ptr base) - : ComposedModel(base) -{ -} +CommOverhead::CommOverhead( + std::shared_ptr base, TimeType in_per_msg_weight, + TimeType in_per_byte_weight +) : ComposedModel(base), + per_msg_weight_(in_per_msg_weight), + per_byte_weight_(in_per_byte_weight) +{ } void CommOverhead::setLoads(std::vector const* proc_load, std::vector const* proc_subphase_load, @@ -59,21 +62,27 @@ void CommOverhead::setLoads(std::vector const* proc_load, ComposedModel::setLoads(proc_load, proc_subphase_load, proc_comm); } -TimeType CommOverhead::getWork(ElementIDType object, PhaseOffset offset) -{ +TimeType CommOverhead::getWork(ElementIDType object, PhaseOffset offset) { auto work = ComposedModel::getWork(object, offset); - vtAbort("Not fully implemented yet"); -#if 0 - // Add a bit of overhead for each off-node received message per object - for (auto &&comm : *comms_) { - auto obj = loads_.find(comm.first.toObj()); - if (obj != loads_.end()) - work += 0.001 * comm.second.messages; + auto phase = proc_comm_->size() - offset.phases; + auto& comm = proc_comm_->at(phase); + + TimeType overhead = 0.; + for (auto&& c : comm) { + // find messages that go off-node and are sent to this object + if (c.first.offNode() and c.first.toObjTemp() == object) { + overhead += per_msg_weight_ * c.second.messages; + overhead += per_byte_weight_ * c.second.bytes; + } } -#endif - return work; + if (offset.subphase == PhaseOffset::WHOLE_PHASE) { + return work + overhead; + } else { + // @todo: we don't record comm costs for each subphase---split it evenly + return work + overhead / getNumSubphases(); + } } diff --git a/src/vt/vrt/collection/balance/model/comm_overhead.h b/src/vt/vrt/collection/balance/model/comm_overhead.h index 1ec5921797..09666e8258 100644 --- a/src/vt/vrt/collection/balance/model/comm_overhead.h +++ b/src/vt/vrt/collection/balance/model/comm_overhead.h @@ -52,22 +52,30 @@ namespace vt { namespace vrt { namespace collection { namespace balance { /** * \brief Add some implied, unaccounted work time for communication activity - * - * Not yet implemented - will abort at runtime */ struct CommOverhead : public ComposedModel { /** * \brief Constructor * * \param[in] base: the underlying source of object work loads + * \param[in] in_per_msg_weight weight to add per message received + * \param[in] in_per_byte_weight weight to add per byte received */ - explicit CommOverhead(std::shared_ptr base); + explicit CommOverhead( + std::shared_ptr base, TimeType in_per_msg_weight, + TimeType in_per_byte_weight + ); + void setLoads(std::vector const* proc_load, std::vector const* proc_subphase_load, std::vector const* proc_comm) override; + TimeType getWork(ElementIDType object, PhaseOffset when) override; - std::vector const* proc_comm_; +private: + std::vector const* proc_comm_; /**< Underlying comm data */ + TimeType per_msg_weight_ = 0.001; /**< Cost per message */ + TimeType per_byte_weight_ = 0.000001; /**< Cost per bytes */ }; // class CommOverhead }}}} // end namespace From 07a73026ee66ad97f874b0e75524c5883e7e8f6f Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 21 Jul 2020 21:11:22 -0700 Subject: [PATCH 48/63] #582: Implement a utility for linear regression --- src/vt/utils/stats/linear_regression.h | 135 +++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 src/vt/utils/stats/linear_regression.h diff --git a/src/vt/utils/stats/linear_regression.h b/src/vt/utils/stats/linear_regression.h new file mode 100644 index 0000000000..5f4f1fc5d9 --- /dev/null +++ b/src/vt/utils/stats/linear_regression.h @@ -0,0 +1,135 @@ +/* +//@HEADER +// ***************************************************************************** +// +// linear_regression.h +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#if !defined INCLUDED_VT_UTILS_STATS_LINEAR_REGRESSION_H +#define INCLUDED_VT_UTILS_STATS_LINEAR_REGRESSION_H + +#include "vt/config.h" + +#include +#include + +namespace vt { namespace util { namespace stats { + +/** + * \struct LinearRegression + * + * \brief Perform a simple linear regression to predict values with a linear + * model + */ +struct LinearRegression { + + /** + * \brief Construct a linear regression + * + * \param[in] in_x the x-values + * \param[in] in_y the y-values + */ + LinearRegression(std::vector const& in_x, std::vector const& in_y) + : x_(in_x), + y_(in_y) + { } + + /** + * \brief Perform the regression + */ + void compute() { + vtAssert(x_.size() == y_.size(), "Sizes must be the same"); + vtAssert(x_.size() != 0, "Sizes must not be zero"); + auto const n = x_.size(); + auto const sum_x = std::accumulate(x_.begin(), x_.end(), 0.0); + auto const sum_y = std::accumulate(y_.begin(), y_.end(), 0.0); + auto const p_xx = std::inner_product(x_.begin(), x_.end(), x_.begin(), 0.0); + auto const p_xy = std::inner_product(x_.begin(), x_.end(), y_.begin(), 0.0); + // numerator: sum over i of (x[i] - X_mean) * (y[i] - Y_mean) + // denominator: sum over i of (x[i] - X_mean) * (x[i] - X_mean) + auto const numerator = p_xy * n - sum_x * sum_y; + auto const denominator = p_xx * n - sum_x * sum_x; + + vtAssert(denominator != 0, "Denominator must not be zero"); + + slope_ = numerator / denominator; + intercept_ = (sum_y - slope_ * sum_x) / n; + computed_ = true; + } + + /** + * \brief Get the slope + * + * \return the slope of the line + */ + double getSlope() const { return slope_; } + + /** + * \brief Get the y-intercept + * + * \return the y-intercept + */ + double getIntercept() const { return intercept_; } + + /** + * \brief Predict a value using the linear model + * + * \param[in] in_x the x value to predict + * + * \return the y value + */ + double predict(double in_x) { + if (not computed_) { + compute(); + } + return intercept_ + slope_ * in_x; + } + +private: + std::vector const& x_; + std::vector const& y_; + double slope_ = 0.; + double intercept_ = 0.; + bool computed_ = false; +}; + +}}} /* end namespace vt::util::stats */ + +#endif /*INCLUDED_VT_UTILS_STATS_LINEAR_REGRESSION_H*/ From 623bf33066f33461345d41a727ab1dc6f931b799 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 21 Jul 2020 21:11:40 -0700 Subject: [PATCH 49/63] #582: Implement a linear model based on past window --- .../collection/balance/model/linear_model.cc | 84 ++++++++++++++++++ .../collection/balance/model/linear_model.h | 87 +++++++++++++++++++ 2 files changed, 171 insertions(+) create mode 100644 src/vt/vrt/collection/balance/model/linear_model.cc create mode 100644 src/vt/vrt/collection/balance/model/linear_model.h diff --git a/src/vt/vrt/collection/balance/model/linear_model.cc b/src/vt/vrt/collection/balance/model/linear_model.cc new file mode 100644 index 0000000000..066bd7fa7e --- /dev/null +++ b/src/vt/vrt/collection/balance/model/linear_model.cc @@ -0,0 +1,84 @@ +/* +//@HEADER +// ***************************************************************************** +// +// linear_model.cc +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#include "vt/vrt/collection/balance/model/linear_model.h" +#include "vt/utils/stats/linear_regression.h" + +#include + +namespace vt { namespace vrt { namespace collection { namespace balance { + +void LinearModel::setLoads( + std::vector const* proc_load, + std::vector const* proc_subphase_load, + std::vector const* proc_comm +) { + ComposedModel::setLoads(proc_load, proc_subphase_load, proc_comm); + + // Make sure the past length isn't too large for the number of phases we + // actually have + past_len_ = std::min(past_len_, static_cast(proc_load->size())); +} + +TimeType LinearModel::getWork(ElementIDType object, PhaseOffset when) { + using util::stats::LinearRegression; + + std::vector x; + std::vector y; + + PhaseOffset past_phase{when}; + + // Number values on X-axis based on a PhaseOffset + for (int i = -past_len_; i < 0; i++) { + x.emplace_back(i); + past_phase.phases = i; + y.emplace_back(ComposedModel::getWork(object, past_phase)); + } + + // should we re-create this every time? + LinearRegression regression{x, y}; + return regression.predict(when.phases); +} + +}}}} /* end namespace vt::vrt::collection::balance */ diff --git a/src/vt/vrt/collection/balance/model/linear_model.h b/src/vt/vrt/collection/balance/model/linear_model.h new file mode 100644 index 0000000000..e00a5586b7 --- /dev/null +++ b/src/vt/vrt/collection/balance/model/linear_model.h @@ -0,0 +1,87 @@ +/* +//@HEADER +// ***************************************************************************** +// +// linear_model.h +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#if !defined INCLUDED_VT_VRT_COLLECTION_BALANCE_MODEL_LINEAR_MODEL_H +#define INCLUDED_VT_VRT_COLLECTION_BALANCE_MODEL_LINEAR_MODEL_H + +#include "vt/vrt/collection/balance/model/composed_model.h" + +namespace vt { namespace vrt { namespace collection { namespace balance { + +/** + * \struct LinearModel + * + * \brief Predict an object's load with a linear regression model + */ +struct LinearModel : ComposedModel { + + static constexpr int default_past_len = 5; + + /** + * \brief Construct a linear model predictor + * + * \param[in] base the base model + * \param[in] in_past_len (optional) the past number of phases for prediction + */ + explicit LinearModel( + std::shared_ptr base, int in_past_len = default_past_len + ) : ComposedModel(base), + past_len_(in_past_len) + { } + + void setLoads( + std::vector const* proc_load, + std::vector const* proc_subphase_load, + std::vector const* proc_comm + ) override; + + TimeType getWork(ElementIDType object, PhaseOffset when) override; + +private: + int past_len_ = 0; +}; + +}}}} /* end namespace vt::vrt::collection::balance */ + +#endif /*INCLUDED_VT_VRT_COLLECTION_BALANCE_MODEL_LINEAR_MODEL_H*/ From ea18691e95edcb3e9606d2e48b78c6e27e90e718 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 21 Jul 2020 21:14:47 -0700 Subject: [PATCH 50/63] #582: Fix some tabs copied from other code --- src/vt/vrt/collection/balance/model/linear_model.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vt/vrt/collection/balance/model/linear_model.h b/src/vt/vrt/collection/balance/model/linear_model.h index e00a5586b7..484bd135c7 100644 --- a/src/vt/vrt/collection/balance/model/linear_model.h +++ b/src/vt/vrt/collection/balance/model/linear_model.h @@ -72,8 +72,8 @@ struct LinearModel : ComposedModel { void setLoads( std::vector const* proc_load, - std::vector const* proc_subphase_load, - std::vector const* proc_comm + std::vector const* proc_subphase_load, + std::vector const* proc_comm ) override; TimeType getWork(ElementIDType object, PhaseOffset when) override; From bf84807d4c5d892570e31da6f0684473cd53f339 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Wed, 22 Jul 2020 12:40:27 -0400 Subject: [PATCH 51/63] #582: Remove unimplemented note from doc --- docs/md/lb-manager.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/md/lb-manager.md b/docs/md/lb-manager.md index ef7f4f6f1f..0b174946bb 100644 --- a/docs/md/lb-manager.md +++ b/docs/md/lb-manager.md @@ -104,7 +104,5 @@ The full set of load model classes provided with \vt is as follows | PersistenceMedianLastN | Similar to NaivePersistence, except that it predicts based on a median over the N most recent phases | `vt::vrt:collection::balance::PersistenceMedianLastN` | | Norm | When asked for a `WHOLE_PHASE` value, computes a specified l-norm over all subphases | `vt::vrt:collection::balance::Norm` | | SelectSubphases | Filters and remaps the subphases with data present in the underlying model | `vt::vrt:collection::balance::SelectSubphases` | -| CommOverhead + | Adds a specified amount of imputed 'system overhead' time to each object's work based on the number of messages received | `vt::vrt:collection::balance::CommOverhead` | +| CommOverhead | Adds a specified amount of imputed 'system overhead' time to each object's work based on the number of messages received | `vt::vrt:collection::balance::CommOverhead` | | PerCollection | Maintains a set of load models associated with different collection instances, and passes queries for an object through to the model corresponding to its collection | `vt::vrt:collection::balance::PerCollection` | - -+: This model is not yet implemented From b7594fed96e200ac47b92a95bdb6b700b1b5fe20 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Wed, 22 Jul 2020 12:45:58 -0400 Subject: [PATCH 52/63] #582: Make CommOverhead subphase attribution proportional to work --- src/vt/vrt/collection/balance/model/comm_overhead.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/vt/vrt/collection/balance/model/comm_overhead.cc b/src/vt/vrt/collection/balance/model/comm_overhead.cc index c34e98c6d1..bd1b10bf69 100644 --- a/src/vt/vrt/collection/balance/model/comm_overhead.cc +++ b/src/vt/vrt/collection/balance/model/comm_overhead.cc @@ -80,8 +80,9 @@ TimeType CommOverhead::getWork(ElementIDType object, PhaseOffset offset) { if (offset.subphase == PhaseOffset::WHOLE_PHASE) { return work + overhead; } else { - // @todo: we don't record comm costs for each subphase---split it evenly - return work + overhead / getNumSubphases(); + // @todo: we don't record comm costs for each subphase---split it proportionally + auto whole_phase_work = ComposedModel::getWork(object, PhaseOffset{offset.phases, PhaseOffset::WHOLE_PHASE}); + return work + overhead * ( static_cast(work)/whole_phase_work ); } } From ea448e6f768b752d26d775a2628cd6c4a8d26eec Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Wed, 22 Jul 2020 13:17:32 -0400 Subject: [PATCH 53/63] #582: Factor out completed phase enumeration --- .../collection/balance/model/composed_model.cc | 4 ++++ .../collection/balance/model/composed_model.h | 1 + .../collection/balance/model/linear_model.cc | 18 +++++------------- .../collection/balance/model/linear_model.h | 6 ------ .../vrt/collection/balance/model/load_model.h | 1 + .../balance/model/persistence_median_last_n.cc | 2 +- .../balance/model/persistence_median_last_n.h | 6 +----- src/vt/vrt/collection/balance/model/raw_data.h | 1 + 8 files changed, 14 insertions(+), 25 deletions(-) diff --git a/src/vt/vrt/collection/balance/model/composed_model.cc b/src/vt/vrt/collection/balance/model/composed_model.cc index 9d879bfe24..166c50c646 100644 --- a/src/vt/vrt/collection/balance/model/composed_model.cc +++ b/src/vt/vrt/collection/balance/model/composed_model.cc @@ -72,6 +72,10 @@ int ComposedModel::getNumObjects() { return base_->getNumObjects(); } +int ComposedModel::getNumCompletedPhases() { + return base_->getNumSubphases(); +} + int ComposedModel::getNumSubphases() { return base_->getNumSubphases(); } diff --git a/src/vt/vrt/collection/balance/model/composed_model.h b/src/vt/vrt/collection/balance/model/composed_model.h index 25f19659cf..c793b7ba47 100644 --- a/src/vt/vrt/collection/balance/model/composed_model.h +++ b/src/vt/vrt/collection/balance/model/composed_model.h @@ -77,6 +77,7 @@ class ComposedModel : public LoadModel ObjectIterator end() override; int getNumObjects() override; + int getNumCompletedPhases() override; int getNumSubphases() override; private: diff --git a/src/vt/vrt/collection/balance/model/linear_model.cc b/src/vt/vrt/collection/balance/model/linear_model.cc index 066bd7fa7e..39b88114aa 100644 --- a/src/vt/vrt/collection/balance/model/linear_model.cc +++ b/src/vt/vrt/collection/balance/model/linear_model.cc @@ -49,28 +49,20 @@ namespace vt { namespace vrt { namespace collection { namespace balance { -void LinearModel::setLoads( - std::vector const* proc_load, - std::vector const* proc_subphase_load, - std::vector const* proc_comm -) { - ComposedModel::setLoads(proc_load, proc_subphase_load, proc_comm); - - // Make sure the past length isn't too large for the number of phases we - // actually have - past_len_ = std::min(past_len_, static_cast(proc_load->size())); -} - TimeType LinearModel::getWork(ElementIDType object, PhaseOffset when) { using util::stats::LinearRegression; + // Retrospective queries don't call for a prediction + if (when.phases < 0) + return ComposedModel::getWork(object, when); + std::vector x; std::vector y; PhaseOffset past_phase{when}; // Number values on X-axis based on a PhaseOffset - for (int i = -past_len_; i < 0; i++) { + for (int i = -1 * getNumCompletedPhases(); i < 0; i++) { x.emplace_back(i); past_phase.phases = i; y.emplace_back(ComposedModel::getWork(object, past_phase)); diff --git a/src/vt/vrt/collection/balance/model/linear_model.h b/src/vt/vrt/collection/balance/model/linear_model.h index 484bd135c7..1699b07f3c 100644 --- a/src/vt/vrt/collection/balance/model/linear_model.h +++ b/src/vt/vrt/collection/balance/model/linear_model.h @@ -70,12 +70,6 @@ struct LinearModel : ComposedModel { past_len_(in_past_len) { } - void setLoads( - std::vector const* proc_load, - std::vector const* proc_subphase_load, - std::vector const* proc_comm - ) override; - TimeType getWork(ElementIDType object, PhaseOffset when) override; private: diff --git a/src/vt/vrt/collection/balance/model/load_model.h b/src/vt/vrt/collection/balance/model/load_model.h index 8d9871a553..35aeb25e14 100644 --- a/src/vt/vrt/collection/balance/model/load_model.h +++ b/src/vt/vrt/collection/balance/model/load_model.h @@ -133,6 +133,7 @@ class LoadModel virtual ObjectIterator end() = 0; virtual int getNumObjects() = 0; + virtual int getNumCompletedPhases() = 0; virtual int getNumSubphases() = 0; }; // class LoadModel diff --git a/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc b/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc index ae44d5d11d..7bda8026fc 100644 --- a/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc +++ b/src/vt/vrt/collection/balance/model/persistence_median_last_n.cc @@ -61,7 +61,7 @@ TimeType PersistenceMedianLastN::getWork(ElementIDType object, PhaseOffset when) if (when.phases < 0) return ComposedModel::getWork(object, when); - int phases = std::min(n_, completed_phases_); + int phases = std::min(n_, getNumCompletedPhases()); std::vector times(phases); for (int i = 1; i <= phases; ++i) { PhaseOffset p{-1*i, when.subphase}; diff --git a/src/vt/vrt/collection/balance/model/persistence_median_last_n.h b/src/vt/vrt/collection/balance/model/persistence_median_last_n.h index 947a20f85f..eb309377f8 100644 --- a/src/vt/vrt/collection/balance/model/persistence_median_last_n.h +++ b/src/vt/vrt/collection/balance/model/persistence_median_last_n.h @@ -66,14 +66,10 @@ struct PersistenceMedianLastN : public ComposedModel */ PersistenceMedianLastN(std::shared_ptr base, int n); - void updateLoads(PhaseType last_completed_phase) override - { completed_phases_ = last_completed_phase; } - TimeType getWork(ElementIDType object, PhaseOffset when) override; private: - int n_; - int completed_phases_; + const int n_; }; // class PersistenceMedianLastN }}}} // namespaces diff --git a/src/vt/vrt/collection/balance/model/raw_data.h b/src/vt/vrt/collection/balance/model/raw_data.h index 21579536a7..79b052b49b 100644 --- a/src/vt/vrt/collection/balance/model/raw_data.h +++ b/src/vt/vrt/collection/balance/model/raw_data.h @@ -70,6 +70,7 @@ struct RawData : public LoadModel { ObjectIterator end() override { return ObjectIterator(proc_load_->back().end()); } int getNumObjects() override { return end() - begin(); } + int getNumCompletedPhases() override { return proc_load_->size(); } int getNumSubphases() override; // Observer pointers to the underlying data. In operation, these would be owned by ProcStats From 26a7d2860410224d6e5b9e0c782d2954fa542267 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Wed, 22 Jul 2020 13:19:07 -0400 Subject: [PATCH 54/63] #582: Clear out cruft from before subphase enumeration was factored out --- src/vt/vrt/collection/balance/model/norm.cc | 6 ------ src/vt/vrt/collection/balance/model/norm.h | 4 +--- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/src/vt/vrt/collection/balance/model/norm.cc b/src/vt/vrt/collection/balance/model/norm.cc index 8d5d39163c..a6e37f31bd 100644 --- a/src/vt/vrt/collection/balance/model/norm.cc +++ b/src/vt/vrt/collection/balance/model/norm.cc @@ -56,12 +56,6 @@ Norm::Norm(std::shared_ptr base, double power) vtAssert(power >= 0.0, "Reciprocal loads make no sense"); } -void Norm::setLoads(std::vector const* proc_load, - std::vector const* proc_subphase_load, - std::vector const* proc_comm) { - ComposedModel::setLoads(proc_load, proc_subphase_load, proc_comm); -} - TimeType Norm::getWork(ElementIDType object, PhaseOffset offset) { if (offset.subphase != PhaseOffset::WHOLE_PHASE) diff --git a/src/vt/vrt/collection/balance/model/norm.h b/src/vt/vrt/collection/balance/model/norm.h index 0d27228b5f..0fccf51854 100644 --- a/src/vt/vrt/collection/balance/model/norm.h +++ b/src/vt/vrt/collection/balance/model/norm.h @@ -64,9 +64,7 @@ class Norm : public ComposedModel { * >0. Pass +infinity for a `max` over subphases */ Norm(std::shared_ptr base, double power); - void setLoads(std::vector const* proc_load, - std::vector const* proc_subphase_load, - std::vector const* proc_comm) override; + TimeType getWork(ElementIDType object, PhaseOffset when) override; private: From d367a962ef7bc9931f4b6e982597a4ce68c8d689 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Wed, 22 Jul 2020 13:46:53 -0400 Subject: [PATCH 55/63] #582: Clarify comment --- src/vt/vrt/collection/balance/proc_stats.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/proc_stats.h b/src/vt/vrt/collection/balance/proc_stats.h index a6b4310319..e57cac2f6d 100644 --- a/src/vt/vrt/collection/balance/proc_stats.h +++ b/src/vt/vrt/collection/balance/proc_stats.h @@ -261,7 +261,7 @@ struct ProcStats : runtime::component::Component { std::unordered_map proc_temp_to_perm_; /// Map of permanent ID to temporary ID std::unordered_map proc_perm_to_temp_; - /// Map from element ID to the collection's virtual proxy (untyped) + /// Map from element temporary ID to the collection's virtual proxy (untyped) std::unordered_map proc_collection_lookup_; /// Processor communication graph for each local object std::vector proc_comm_; From eda40b361fcb92841e7dc1253af687ca3c0f2d57 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Wed, 22 Jul 2020 15:13:37 -0400 Subject: [PATCH 56/63] #582: Clean up some header dependencies --- src/vt/vrt/collection/balance/baselb/baselb.cc | 1 + src/vt/vrt/collection/balance/baselb/baselb.h | 1 - src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc | 1 + src/vt/vrt/collection/manager.h | 1 - src/vt/vrt/collection/manager.impl.h | 1 + 5 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.cc b/src/vt/vrt/collection/balance/baselb/baselb.cc index 11f2012390..13e6c8c4bd 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.cc +++ b/src/vt/vrt/collection/balance/baselb/baselb.cc @@ -51,6 +51,7 @@ #include "vt/vrt/collection/balance/lb_invoke/start_lb_msg.h" #include "vt/vrt/collection/balance/read_lb.h" #include "vt/vrt/collection/balance/lb_invoke/lb_manager.h" +#include "vt/vrt/collection/balance/proc_stats.h" #include "vt/timing/timing.h" #include "vt/collective/reduce/reduce.h" #include "vt/collective/collective_alg.h" diff --git a/src/vt/vrt/collection/balance/baselb/baselb.h b/src/vt/vrt/collection/balance/baselb/baselb.h index 9e8b0df0fd..02c2cdc056 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.h +++ b/src/vt/vrt/collection/balance/baselb/baselb.h @@ -49,7 +49,6 @@ #include "vt/vrt/collection/balance/lb_common.h" #include "vt/vrt/collection/balance/lb_invoke/start_lb_msg.h" #include "vt/vrt/collection/balance/baselb/baselb_msgs.h" -#include "vt/vrt/collection/balance/proc_stats.h" #include "vt/vrt/collection/balance/stats_msg.h" #include "vt/vrt/collection/balance/lb_comm.h" #include "vt/vrt/collection/balance/read_lb.h" diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index e97e131a76..1d3a48e3d6 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -48,6 +48,7 @@ #include "vt/vrt/collection/balance/lb_invoke/start_lb_msg.h" #include "vt/vrt/collection/balance/read_lb.h" #include "vt/vrt/collection/balance/lb_type.h" +#include "vt/vrt/collection/balance/proc_stats.h" #include "vt/vrt/collection/balance/hierarchicallb/hierlb.h" #include "vt/vrt/collection/balance/greedylb/greedylb.h" #include "vt/vrt/collection/balance/rotatelb/rotatelb.h" diff --git a/src/vt/vrt/collection/manager.h b/src/vt/vrt/collection/manager.h index f58f8a6743..ba16afc61a 100644 --- a/src/vt/vrt/collection/manager.h +++ b/src/vt/vrt/collection/manager.h @@ -74,7 +74,6 @@ #include "vt/collective/reduce/reduce_msg.h" #include "vt/collective/reduce/reduce_hash.h" #include "vt/configs/arguments/args.h" -#include "vt/vrt/collection/balance/proc_stats.h" #include "vt/vrt/collection/balance/lb_common.h" #include "vt/runtime/component/component_pack.h" #include "vt/vrt/collection/op_buffer.h" diff --git a/src/vt/vrt/collection/manager.impl.h b/src/vt/vrt/collection/manager.impl.h index 8c00c9440b..35dec2c876 100644 --- a/src/vt/vrt/collection/manager.impl.h +++ b/src/vt/vrt/collection/manager.impl.h @@ -69,6 +69,7 @@ #include "vt/vrt/collection/dispatch/registry.h" #include "vt/vrt/collection/holders/insert_context_holder.h" #include "vt/vrt/collection/collection_directory.h" +#include "vt/vrt/collection/balance/proc_stats.h" #include "vt/vrt/proxy/collection_proxy.h" #include "vt/registry/auto/map/auto_registry_map.h" #include "vt/registry/auto/collection/auto_registry_collection.h" From 3a780548c360a40f14089406e74bd7ed0e86698a Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Wed, 22 Jul 2020 17:33:26 -0400 Subject: [PATCH 57/63] #582: Disable part of test that's sensitive to LB being compile-time disabled --- tests/unit/collection/test_model_per_collection.extended.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/unit/collection/test_model_per_collection.extended.cc b/tests/unit/collection/test_model_per_collection.extended.cc index 5642fa66b5..8300dfb9a9 100644 --- a/tests/unit/collection/test_model_per_collection.extended.cc +++ b/tests/unit/collection/test_model_per_collection.extended.cc @@ -152,6 +152,9 @@ TEST_F(TestModelPerCollection, test_model_per_collection_1) { vt::theCollection()->startPhaseCollective(nullptr); }); + // LB control flow means that there will be no recorded phase for + // this to even look up objects in, causing failure +#if vt_check_enabled(lblite) // Test the model, which should be per-collection and return the proxy. auto model = theLBManager()->getLoadModel(); for (auto&& obj : *model) { @@ -159,7 +162,7 @@ TEST_F(TestModelPerCollection, test_model_per_collection_1) { EXPECT_EQ(work_val, static_cast(id_proxy_map[obj])); //fmt::print("{:x} {}\n", obj, work_val); } - +#endif } }}} // end namespace vt::tests::unit From f8cc0048af2b3c7b2808c6d52989dbad4592ec04 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Wed, 22 Jul 2020 14:52:27 -0700 Subject: [PATCH 58/63] #582: tests: Implement test for linear regression --- tests/unit/utils/test_linear_regression.cc | 75 ++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 tests/unit/utils/test_linear_regression.cc diff --git a/tests/unit/utils/test_linear_regression.cc b/tests/unit/utils/test_linear_regression.cc new file mode 100644 index 0000000000..212056a333 --- /dev/null +++ b/tests/unit/utils/test_linear_regression.cc @@ -0,0 +1,75 @@ +/* +//@HEADER +// ***************************************************************************** +// +// test_linear_regression.cc +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#include + +#include +#include "test_harness.h" + +#include + +namespace vt { namespace tests { namespace unit { + +using TestLinearRegression = TestHarness; + +TEST_F(TestLinearRegression, test_linear_regression_1) { + std::vector x, y; + + double slope = 0.5; + double intercept = 2; + + for (int i = 1; i < 5; i++) { + x.emplace_back(i); + y.emplace_back(intercept + slope * i); + } + + vt::util::stats::LinearRegression lin{x, y}; + lin.compute(); + + EXPECT_EQ(lin.getSlope(), slope); + EXPECT_EQ(lin.getIntercept(), intercept); + EXPECT_EQ(lin.predict(5), intercept + slope * 5); +} + +}}} /* end namespace vt::tests::unit */ From a0eb5b4816ef12bb7d0d2f07ccc930a7142fcce1 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Wed, 22 Jul 2020 18:57:01 -0400 Subject: [PATCH 59/63] #582: Docs: Add missing classes --- docs/md/lb-manager.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/md/lb-manager.md b/docs/md/lb-manager.md index 0b174946bb..c58b77d19b 100644 --- a/docs/md/lb-manager.md +++ b/docs/md/lb-manager.md @@ -99,10 +99,12 @@ The full set of load model classes provided with \vt is as follows | Load Model | Description | Reference | | -------------------|-----------------------------------------------------|---------- | | LoadModel | Pure virtual interface class, which the following implement | `vt::vrt:collection::balance::LoadModel` | +| ComposedModel | A convenience class for most implementations to inherit from, that passes unmodified calls through to an underlying model instance | `vt::vrt:collection::balance::ComposedModel` | | RawData | Returns historical data only, from the measured times | `vt::vrt:collection::balance::RawData` | | NaivePersistence | Passes through historical queries, and maps all future queries to the most recent past phase | `vt::vrt:collection::balance::NaivePersistence` | | PersistenceMedianLastN | Similar to NaivePersistence, except that it predicts based on a median over the N most recent phases | `vt::vrt:collection::balance::PersistenceMedianLastN` | | Norm | When asked for a `WHOLE_PHASE` value, computes a specified l-norm over all subphases | `vt::vrt:collection::balance::Norm` | | SelectSubphases | Filters and remaps the subphases with data present in the underlying model | `vt::vrt:collection::balance::SelectSubphases` | | CommOverhead | Adds a specified amount of imputed 'system overhead' time to each object's work based on the number of messages received | `vt::vrt:collection::balance::CommOverhead` | +| LinearModel | Computes a linear regression over on object's loads from a number of recent phases | `vt::vrt:collection::balance::LinearModel` | | PerCollection | Maintains a set of load models associated with different collection instances, and passes queries for an object through to the model corresponding to its collection | `vt::vrt:collection::balance::PerCollection` | From af3b28b4e7ed6e5f08625d933366d155d61bec67 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Wed, 22 Jul 2020 19:13:14 -0400 Subject: [PATCH 60/63] #582: Docs: Organize and annotate classes --- docs/md/lb-manager.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/md/lb-manager.md b/docs/md/lb-manager.md index c58b77d19b..14b1955067 100644 --- a/docs/md/lb-manager.md +++ b/docs/md/lb-manager.md @@ -98,13 +98,16 @@ The full set of load model classes provided with \vt is as follows | Load Model | Description | Reference | | -------------------|-----------------------------------------------------|---------- | +| **Utilities** | | LoadModel | Pure virtual interface class, which the following implement | `vt::vrt:collection::balance::LoadModel` | | ComposedModel | A convenience class for most implementations to inherit from, that passes unmodified calls through to an underlying model instance | `vt::vrt:collection::balance::ComposedModel` | | RawData | Returns historical data only, from the measured times | `vt::vrt:collection::balance::RawData` | -| NaivePersistence | Passes through historical queries, and maps all future queries to the most recent past phase | `vt::vrt:collection::balance::NaivePersistence` | -| PersistenceMedianLastN | Similar to NaivePersistence, except that it predicts based on a median over the N most recent phases | `vt::vrt:collection::balance::PersistenceMedianLastN` | +| **Transformers** | | Norm | When asked for a `WHOLE_PHASE` value, computes a specified l-norm over all subphases | `vt::vrt:collection::balance::Norm` | | SelectSubphases | Filters and remaps the subphases with data present in the underlying model | `vt::vrt:collection::balance::SelectSubphases` | | CommOverhead | Adds a specified amount of imputed 'system overhead' time to each object's work based on the number of messages received | `vt::vrt:collection::balance::CommOverhead` | -| LinearModel | Computes a linear regression over on object's loads from a number of recent phases | `vt::vrt:collection::balance::LinearModel` | | PerCollection | Maintains a set of load models associated with different collection instances, and passes queries for an object through to the model corresponding to its collection | `vt::vrt:collection::balance::PerCollection` | +| **Predictors** | +| NaivePersistence | Passes through historical queries, and maps all future queries to the most recent past phase | `vt::vrt:collection::balance::NaivePersistence` | +| PersistenceMedianLastN | Similar to NaivePersistence, except that it predicts based on a median over the N most recent phases | `vt::vrt:collection::balance::PersistenceMedianLastN` | +| LinearModel | Computes a linear regression over on object's loads from a number of recent phases | `vt::vrt:collection::balance::LinearModel` | From 8608e26f436d662bfdb6d1958a55cceefbc1476e Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Wed, 22 Jul 2020 19:23:04 -0400 Subject: [PATCH 61/63] #582: Docs: Describe the categories --- docs/md/lb-manager.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/md/lb-manager.md b/docs/md/lb-manager.md index 14b1955067..f8f6a1b8c2 100644 --- a/docs/md/lb-manager.md +++ b/docs/md/lb-manager.md @@ -98,16 +98,16 @@ The full set of load model classes provided with \vt is as follows | Load Model | Description | Reference | | -------------------|-----------------------------------------------------|---------- | -| **Utilities** | +| **Utilities** | | LoadModel | Pure virtual interface class, which the following implement | `vt::vrt:collection::balance::LoadModel` | | ComposedModel | A convenience class for most implementations to inherit from, that passes unmodified calls through to an underlying model instance | `vt::vrt:collection::balance::ComposedModel` | | RawData | Returns historical data only, from the measured times | `vt::vrt:collection::balance::RawData` | -| **Transformers** | +| **Transformers** | Transforms the values computed by the composed model(s), agnostic to whether a query refers to a past or future phase | | Norm | When asked for a `WHOLE_PHASE` value, computes a specified l-norm over all subphases | `vt::vrt:collection::balance::Norm` | | SelectSubphases | Filters and remaps the subphases with data present in the underlying model | `vt::vrt:collection::balance::SelectSubphases` | | CommOverhead | Adds a specified amount of imputed 'system overhead' time to each object's work based on the number of messages received | `vt::vrt:collection::balance::CommOverhead` | | PerCollection | Maintains a set of load models associated with different collection instances, and passes queries for an object through to the model corresponding to its collection | `vt::vrt:collection::balance::PerCollection` | -| **Predictors** | +| **Predictors** | Computes values for future phase queries, and passes through past phase queries | | NaivePersistence | Passes through historical queries, and maps all future queries to the most recent past phase | `vt::vrt:collection::balance::NaivePersistence` | | PersistenceMedianLastN | Similar to NaivePersistence, except that it predicts based on a median over the N most recent phases | `vt::vrt:collection::balance::PersistenceMedianLastN` | | LinearModel | Computes a linear regression over on object's loads from a number of recent phases | `vt::vrt:collection::balance::LinearModel` | From 76f814ec4a198cd0914c30845985f99f73a5b9f6 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Wed, 22 Jul 2020 20:27:22 -0400 Subject: [PATCH 62/63] #582: Docs: Finish elaborating on missing bits --- docs/md/lb-manager.md | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/docs/md/lb-manager.md b/docs/md/lb-manager.md index f8f6a1b8c2..6edc5a4a5a 100644 --- a/docs/md/lb-manager.md +++ b/docs/md/lb-manager.md @@ -68,13 +68,18 @@ implementations of the `vt::vrt:collection::balance::LoadModel` interface. There are a number of general-purpose load model implementations provided by \vt. -By default, \vt uses a load model that predicts each object's work load -for all future phases will match its workload in the most recent past -phase. The system also provides an interface for applications and +By default, \vt uses a load model that predicts each object's work +load for all future phases will match its workload in the most recent +past phase. The system also provides an interface for applications and users to arrange use of a non-default load model where that may be desirable for reasons such as performance experimentation, specialization to application details, or execution environment -considerations. +considerations. To install a custom load model, application code +should call `vt::theLBManager()->setLoadModel(user_model)`. To +simplify implementation of custom load models, and allow them to +benefit from future system-level improvements, we recommend that +custom load models be composed atop the default model, which can be +obtained by calling `vt::theLBManager()->getBaseLoadModel()`. Most provided load models are designed as composable filters inherited from the `vt::vrt:collection::balance::ComposedModel` class. This @@ -111,3 +116,8 @@ The full set of load model classes provided with \vt is as follows | NaivePersistence | Passes through historical queries, and maps all future queries to the most recent past phase | `vt::vrt:collection::balance::NaivePersistence` | | PersistenceMedianLastN | Similar to NaivePersistence, except that it predicts based on a median over the N most recent phases | `vt::vrt:collection::balance::PersistenceMedianLastN` | | LinearModel | Computes a linear regression over on object's loads from a number of recent phases | `vt::vrt:collection::balance::LinearModel` | + +All of the provided load balancers described in the previous section +require that the installed load model provide responses to future +phase queries for at least `PhaseOffset::NEXT_PHASE` (i.e. `0`), as +the **Predictors** described above do. From 24f93c746a7d7868b9b9708715a936f940f87716 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Wed, 22 Jul 2020 20:49:41 -0400 Subject: [PATCH 63/63] #582: Add MultiplePhases blocking predictor --- docs/md/lb-manager.md | 1 + .../balance/model/multiple_phases.cc | 64 +++++++++++++ .../balance/model/multiple_phases.h | 91 +++++++++++++++++++ 3 files changed, 156 insertions(+) create mode 100644 src/vt/vrt/collection/balance/model/multiple_phases.cc create mode 100644 src/vt/vrt/collection/balance/model/multiple_phases.h diff --git a/docs/md/lb-manager.md b/docs/md/lb-manager.md index 6edc5a4a5a..ab3951dc5a 100644 --- a/docs/md/lb-manager.md +++ b/docs/md/lb-manager.md @@ -116,6 +116,7 @@ The full set of load model classes provided with \vt is as follows | NaivePersistence | Passes through historical queries, and maps all future queries to the most recent past phase | `vt::vrt:collection::balance::NaivePersistence` | | PersistenceMedianLastN | Similar to NaivePersistence, except that it predicts based on a median over the N most recent phases | `vt::vrt:collection::balance::PersistenceMedianLastN` | | LinearModel | Computes a linear regression over on object's loads from a number of recent phases | `vt::vrt:collection::balance::LinearModel` | +| MultiplePhases | Computes values for future phases based on sums of the underlying model's predictions for N corresponding future phases | `vt::vrt:collection::balance::MultiplePhases` | All of the provided load balancers described in the previous section require that the installed load model provide responses to future diff --git a/src/vt/vrt/collection/balance/model/multiple_phases.cc b/src/vt/vrt/collection/balance/model/multiple_phases.cc new file mode 100644 index 0000000000..e4c94f242c --- /dev/null +++ b/src/vt/vrt/collection/balance/model/multiple_phases.cc @@ -0,0 +1,64 @@ +/* +//@HEADER +// ***************************************************************************** +// +// multiple_phases.cc +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#include "vt/vrt/collection/balance/model/multiple_phases.h" + +namespace vt { namespace vrt { namespace collection { namespace balance { + +TimeType MultiplePhases::getWork(ElementIDType object, PhaseOffset when) { + // Retrospective queries don't call for a prediction + if (when.phases < 0) + return ComposedModel::getWork(object, when); + + TimeType sum = 0.0; + for (int i = 0; i < future_phase_block_size_; ++i) { + PhaseOffset p{future_phase_block_size_*when.phases + i, + when.subphase}; + sum += ComposedModel::getWork(object, p); + } + + return sum; +} + +}}}} /* end namespace vt::vrt::collection::balance */ diff --git a/src/vt/vrt/collection/balance/model/multiple_phases.h b/src/vt/vrt/collection/balance/model/multiple_phases.h new file mode 100644 index 0000000000..6b1d6ed5d9 --- /dev/null +++ b/src/vt/vrt/collection/balance/model/multiple_phases.h @@ -0,0 +1,91 @@ +/* +//@HEADER +// ***************************************************************************** +// +// multiple_phases.h +// DARMA Toolkit v. 1.0.0 +// DARMA/vt => Virtual Transport +// +// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#if !defined INCLUDED_VT_VRT_COLLECTION_BALANCE_MODEL_MULTIPLE_PHASES_H +#define INCLUDED_VT_VRT_COLLECTION_BALANCE_MODEL_MULTIPLE_PHASES_H + +#include "vt/vrt/collection/balance/model/composed_model.h" + +namespace vt { namespace vrt { namespace collection { namespace balance { + +/** + * \struct MultiplePhases + * + * \brief Predict an object's load as a sum over blocks of N future phases + * + * Expected to be most useful either when queried by an explicitly + * subphase-aware vector-optimizing load balancer, or when queried by + * a whole-phase scalar-optimizing load balancer with a Norm model + * composed on top of this. + * + * Multiple phase blocked predictions will only be meaningfully + * different from single phase predictions when composed on top of a + * Predictor model that is not constant across future + * phases. I.e. `LinearModel` rather than `NaivePersistence` or + * `PersistenceMedianLastN`. + */ +struct MultiplePhases : ComposedModel { + /** + * \brief Constructor + * + * \param[in] base the base model + * + * \param[in] in_future_phase_block_size how many phases to predict + * as each single queried phase + */ + explicit MultiplePhases( + std::shared_ptr base, int in_future_phase_block_size) + : ComposedModel(base) + , future_phase_block_size_(in_future_phase_block_size) + { } + + TimeType getWork(ElementIDType object, PhaseOffset when) override; + +private: + int future_phase_block_size_ = 0; +}; + +}}}} /* end namespace vt::vrt::collection::balance */ + +#endif /*INCLUDED_VT_VRT_COLLECTION_BALANCE_MODEL_MULTIPLE_PHASES_H*/