Skip to content

Commit

Permalink
#1672: lb: add initialization for TemperedWMin
Browse files Browse the repository at this point in the history
- use single proxy underneath
- use proxy bits to create proxy
  • Loading branch information
cz4rs committed May 30, 2022
1 parent 01916fd commit b953108
Show file tree
Hide file tree
Showing 24 changed files with 67 additions and 36 deletions.
3 changes: 2 additions & 1 deletion src/vt/elm/elm_lb_data.cc
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,8 @@ TimeType ElementLBData::getLoadMetric(PhaseType const& phase) const {
}
}

TimeType ElementLBData::getLoadMetric(PhaseType phase, SubphaseType subphase) const {
TimeType
ElementLBData::getLoadMetric(PhaseType phase, SubphaseType subphase) const {
if (subphase == no_subphase)
return getLoadMetric(phase);

Expand Down
6 changes: 4 additions & 2 deletions src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,8 @@ void HierarchicalLB::loadOverBin(ObjBinType bin, ObjBinListType& bin_list) {
load_over[bin].push_back(obj_id);
bin_list.pop_back();

auto const& obj_time_milli = loadMilli(load_model_->getLoadMetric(obj_id, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE}));
auto const& obj_time_milli = loadMilli(load_model_->getLoadMetric(obj_id,
{balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE}));

this_load -= obj_time_milli;

Expand Down Expand Up @@ -452,7 +453,8 @@ void HierarchicalLB::downTree(

void HierarchicalLB::lbTreeUpHandler(LBTreeUpMsg* msg) {
lbTreeUp(
msg->getChildLoad(), msg->getChild(), msg->getLoadMetric(), msg->getChildSize()
msg->getChildLoad(), msg->getChild(), msg->getLoadMetric(),
msg->getChildSize()
);
}

Expand Down
7 changes: 5 additions & 2 deletions src/vt/vrt/collection/balance/lb_common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,14 @@ LoadSummary getObjectLoads(
LoadModel* model, ElementIDStruct object, PhaseOffset when
) {
LoadSummary ret;
ret.whole_phase_load = model->getLoadMetric(object, {when.phases, PhaseOffset::WHOLE_PHASE});
ret.whole_phase_load =
model->getLoadMetric(object, {when.phases, PhaseOffset::WHOLE_PHASE});

unsigned int subphases = model->getNumSubphases();
for (unsigned int i = 0; i < subphases; ++i)
ret.subphase_loads.push_back(model->getLoadMetric(object, {when.phases, i}));
ret.subphase_loads.push_back(
model->getLoadMetric(object, {when.phases, i})
);

return ret;
}
Expand Down
2 changes: 1 addition & 1 deletion src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ void LBManager::startLB(
case LBType::ZoltanLB: lb_instances_["chosen"] = makeLB<lb::ZoltanLB>(); break;
# endif
case LBType::TestSerializationLB: lb_instances_["chosen"] = makeLB<lb::TestSerializationLB>(); break;
// case LBType::TemperedWMin: lb_instances_["chosen"] = makeLB<lb::TemperedWMin>(); break;
case LBType::TemperedWMin: lb_instances_["chosen"] = makeLB<lb::TemperedWMin>(); break;
case LBType::NoLB:
vtAssert(false, "LBType::NoLB is not a valid LB for collectiveImpl");
break;
Expand Down
1 change: 1 addition & 0 deletions src/vt/vrt/collection/balance/lb_type.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ static std::unordered_map<LBType,std::string> lb_names_ = {
{LBType::OfflineLB, std::string{"OfflineLB" }},
{LBType::RandomLB, std::string{"RandomLB" }},
{LBType::TestSerializationLB, std::string{"TestSerializationLB"}},
{LBType::TemperedWMin, std::string{"TemperedWMin" }},
};

std::unordered_map<LBType, std::string>& get_lb_names() {
Expand Down
2 changes: 1 addition & 1 deletion src/vt/vrt/collection/balance/lb_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
#if !defined INCLUDED_VT_VRT_COLLECTION_BALANCE_LB_TYPE_H
#define INCLUDED_VT_VRT_COLLECTION_BALANCE_LB_TYPE_H

#include "vt/config.h"
#include "vt/configs/features/features_defines.h"

#include <unordered_map>
#include <string>
Expand Down
8 changes: 5 additions & 3 deletions src/vt/vrt/collection/balance/model/comm_overhead.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ void CommOverhead::setLoads(std::unordered_map<PhaseType, LoadMapType> const* pr
ComposedModel::setLoads(proc_load, proc_comm);
}

TimeType CommOverhead::getLoadMetric(ElementIDStruct object, PhaseOffset offset) {
TimeType
CommOverhead::getLoadMetric(ElementIDStruct object, PhaseOffset offset) {
auto work = ComposedModel::getLoadMetric(object, offset);

auto phase = getNumCompletedPhases() + offset.phases;
Expand All @@ -79,10 +80,11 @@ TimeType CommOverhead::getLoadMetric(ElementIDStruct object, PhaseOffset offset)
return work + overhead;
} else {
// @todo: we don't record comm costs for each subphase---split it proportionally
auto whole_phase_work = ComposedModel::getLoadMetric(object, PhaseOffset{offset.phases, PhaseOffset::WHOLE_PHASE});
auto whole_phase_work = ComposedModel::getLoadMetric(
object, PhaseOffset{offset.phases, PhaseOffset::WHOLE_PHASE}
);
return work + overhead * ( static_cast<double>(work)/whole_phase_work );
}
}


}}}}
3 changes: 2 additions & 1 deletion src/vt/vrt/collection/balance/model/composed_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ void ComposedModel::updateLoads(PhaseType last_completed_phase) {
base_->updateLoads(last_completed_phase);
}

TimeType ComposedModel::getLoadMetric(ElementIDStruct object, PhaseOffset when) {
TimeType
ComposedModel::getLoadMetric(ElementIDStruct object, PhaseOffset when) {
return base_->getLoadMetric(object, when);
}

Expand Down
3 changes: 2 additions & 1 deletion src/vt/vrt/collection/balance/model/multiple_phases.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@

namespace vt { namespace vrt { namespace collection { namespace balance {

TimeType MultiplePhases::getLoadMetric(ElementIDStruct object, PhaseOffset when) {
TimeType
MultiplePhases::getLoadMetric(ElementIDStruct object, PhaseOffset when) {
// Retrospective queries don't call for a prediction
if (when.phases < 0)
return ComposedModel::getLoadMetric(object, when);
Expand Down
4 changes: 2 additions & 2 deletions src/vt/vrt/collection/balance/model/naive_persistence.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ NaivePersistence::NaivePersistence(std::shared_ptr<balance::LoadModel> base)
: ComposedModel(base)
{ }

TimeType NaivePersistence::getLoadMetric(ElementIDStruct object, PhaseOffset offset)
{
TimeType
NaivePersistence::getLoadMetric(ElementIDStruct object, PhaseOffset offset) {
if (offset.phases >= 0)
offset.phases = -1;

Expand Down
4 changes: 1 addition & 3 deletions src/vt/vrt/collection/balance/model/norm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,7 @@ Norm::Norm(std::shared_ptr<balance::LoadModel> base, double power)
vtAssert(power >= 0.0, "Reciprocal loads make no sense");
}

TimeType Norm::getLoadMetric(ElementIDStruct object, PhaseOffset offset)
{
TimeType Norm::getLoadMetric(ElementIDStruct object, PhaseOffset offset) {
if (offset.subphase != PhaseOffset::WHOLE_PHASE)
return ComposedModel::getLoadMetric(object, offset);

Expand Down Expand Up @@ -84,5 +83,4 @@ TimeType Norm::getLoadMetric(ElementIDStruct object, PhaseOffset offset)
}
}


}}}}
3 changes: 2 additions & 1 deletion src/vt/vrt/collection/balance/model/per_collection.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ void PerCollection::updateLoads(PhaseType last_completed_phase) {
ComposedModel::updateLoads(last_completed_phase);
}

TimeType PerCollection::getLoadMetric(ElementIDStruct object, PhaseOffset when) {
TimeType
PerCollection::getLoadMetric(ElementIDStruct object, PhaseOffset when) {
// See if some specific model has been given for the object in question
auto mi = models_.find(theNodeLBData()->getCollectionProxyForElement(object));
if (mi != models_.end())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,9 @@ PersistenceMedianLastN::PersistenceMedianLastN(std::shared_ptr<LoadModel> base,
vtAssert(n > 0, "Cannot take a median over no phases");
}

TimeType PersistenceMedianLastN::getLoadMetric(ElementIDStruct object, PhaseOffset when)
{
TimeType PersistenceMedianLastN::getLoadMetric(
ElementIDStruct object, PhaseOffset when
) {
// Retrospective queries don't call for a prospective calculation
if (when.phases < 0)
return ComposedModel::getLoadMetric(object, when);
Expand Down
4 changes: 2 additions & 2 deletions src/vt/vrt/collection/balance/model/proposed_reassignment.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ int ProposedReassignment::getNumObjects()
return base - departing + arriving;
}

TimeType ProposedReassignment::getLoadMetric(ElementIDStruct object, PhaseOffset when)
{
TimeType
ProposedReassignment::getLoadMetric(ElementIDStruct object, PhaseOffset when) {
auto a = reassignment_->arrive_.find(object);
if (a != reassignment_->arrive_.end()) {
return std::get<0>(a->second).get(when);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ struct ProposedReassignment : public ComposedModel {
TimeType getLoadMetric(ElementIDStruct object, PhaseOffset when) override;
TimeType getRawLoad(ElementIDStruct object, PhaseOffset when) override;

private:
private:
std::shared_ptr<const Reassignment> reassignment_;
};

Expand Down
3 changes: 2 additions & 1 deletion src/vt/vrt/collection/balance/model/select_subphases.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ SelectSubphases::SelectSubphases(std::shared_ptr<LoadModel> base, std::vector<un
//vtAssert(subphases_.size() < base_subphases, "...");
}

TimeType SelectSubphases::getLoadMetric(ElementIDStruct object, PhaseOffset when) {
TimeType
SelectSubphases::getLoadMetric(ElementIDStruct object, PhaseOffset when) {
if (when.subphase == PhaseOffset::WHOLE_PHASE) {
// Sum up the selected subphases as if they represent the entire phase
TimeType sum = 0.0;
Expand Down
5 changes: 3 additions & 2 deletions src/vt/vrt/collection/balance/temperedlb/temperedlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ struct TemperedLB : BaseLB {

void setupDone(ReduceMsgType* msg);

std::mt19937 gen_propagate_;
std::mt19937 gen_sample_;

private:
uint16_t f_ = 0;
uint8_t k_max_ = 0;
Expand Down Expand Up @@ -179,8 +182,6 @@ struct TemperedLB : BaseLB {
bool setup_done_ = false;
bool propagate_next_round_ = false;
std::vector<bool> propagated_k_;
std::mt19937 gen_propagate_;
std::mt19937 gen_sample_;
StatisticMapType stats;
LoadType this_load = 0.0f;
};
Expand Down
11 changes: 9 additions & 2 deletions src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@

namespace vt { namespace vrt { namespace collection { namespace lb {

void TemperedWMin::init(objgroup::proxy::Proxy<TemperedWMin> in_proxy) {
auto proxy_bits = in_proxy.getProxy();
auto proxy = objgroup::proxy::Proxy<TemperedLB>(proxy_bits);
auto strat = proxy.get();
strat->init(proxy);
}

/*static*/ std::unordered_map<std::string, std::string>
TemperedWMin::getInputKeysWithHelp() {
auto map = TemperedLB::getInputKeysWithHelp();
Expand Down Expand Up @@ -87,8 +94,8 @@ TimeType TemperedWMin::getTotalWork(const elm::ElementIDStruct& obj) {
balance::PhaseOffset when =
{balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE};

return alpha_ * load_model_->getLoadMetric(obj, when)
+ beta_ * load_model_->getComm(obj, when) + gamma_;
return alpha_ * load_model_->getLoadMetric(obj, when) +
beta_ * load_model_->getComm(obj, when) + gamma_;
}

}}}} // namespace vt::vrt::collection::lb
7 changes: 4 additions & 3 deletions src/vt/vrt/collection/balance/temperedwmin/temperedwmin.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ struct TemperedWMin : TemperedLB {
virtual ~TemperedWMin() { }

public:
void init(objgroup::proxy::Proxy<TemperedWMin> in_proxy);
static std::unordered_map<std::string, std::string> getInputKeysWithHelp();

void inputParams(balance::SpecEntry* spec) override;
Expand All @@ -63,9 +64,9 @@ struct TemperedWMin : TemperedLB {
TimeType getTotalWork(const elm::ElementIDStruct& obj) override;

private:
double alpha_ = 1.0;
double beta_ = 0.0;
double gamma_ = 0.0;
double alpha_ = 1.0;
double beta_ = 0.0;
double gamma_ = 0.0;
};

}}}} /* end namespace vt::vrt::collection::lb */
Expand Down
4 changes: 3 additions & 1 deletion tests/unit/collection/test_model_linear_model.nompi.cc
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,9 @@ TEST_F(TestLinearModel, test_model_linear_model_1) {
++num_phases;

for (auto&& obj : *test_model) {
auto work_val = test_model->getLoadMetric(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE});
auto work_val = test_model->getLoadMetric(
obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}
);
EXPECT_EQ(
work_val,
obj.id == 1 ? expected_data[iter].first : expected_data[iter].second)
Expand Down
4 changes: 3 additions & 1 deletion tests/unit/collection/test_model_multiple_phases.nompi.cc
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,9 @@ TEST_F(TestModelMultiplePhases, test_model_multiple_phases_1) {
test_model->updateLoads(3);

for (auto&& obj : *test_model) {
auto work_val = test_model->getLoadMetric(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE});
auto work_val = test_model->getLoadMetric(
obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}
);
EXPECT_EQ(work_val, obj.id == 1 ? TimeType{100} : TimeType{85});
}
}
Expand Down
4 changes: 3 additions & 1 deletion tests/unit/collection/test_model_per_collection.extended.cc
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,9 @@ TEST_F(TestModelPerCollection, test_model_per_collection_1) {
// model to function
model->updateLoads(0);
for (auto&& obj : *model) {
auto work_val = model->getLoadMetric(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE});
auto work_val = model->getLoadMetric(
obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}
);
if (id_proxy_map.find(obj) != id_proxy_map.end()) {
EXPECT_DOUBLE_EQ(work_val, static_cast<TimeType>(id_proxy_map[obj]));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,9 @@ TEST_F(TestModelPersistenceMedianLastN, test_model_persistence_median_last_n_1)
++num_phases;

for (auto&& obj : *test_model) {
auto work_val = test_model->getLoadMetric(obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE});
auto work_val = test_model->getLoadMetric(
obj, {PhaseOffset::NEXT_PHASE, PhaseOffset::WHOLE_PHASE}
);
EXPECT_EQ(
work_val,
obj.id == 1 ? expected_medians[iter].first : expected_medians[iter].second)
Expand Down
4 changes: 3 additions & 1 deletion tests/unit/collection/test_model_raw_data.nompi.cc
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,9 @@ TEST_F(TestRawData, test_model_raw_data_scalar) {
EXPECT_TRUE(obj.id == 1 || obj.id == 2);
objects_seen++;

auto work_val = test_model->getLoadMetric(obj, PhaseOffset{-1, PhaseOffset::WHOLE_PHASE});
auto work_val = test_model->getLoadMetric(
obj, PhaseOffset{-1, PhaseOffset::WHOLE_PHASE}
);
EXPECT_EQ(work_val, load_holder[iter][obj].whole_phase_load);

auto sub_work_val = test_model->getLoadMetric(obj, PhaseOffset{-1, 0});
Expand Down

0 comments on commit b953108

Please sign in to comment.