Skip to content

Commit

Permalink
#1830: TemperedWMin: allow all nodes to be potential recipients of work
Browse files Browse the repository at this point in the history
  • Loading branch information
cz4rs committed Dec 21, 2022
1 parent 7fc9b42 commit 1194a01
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 18 deletions.
30 changes: 14 additions & 16 deletions src/vt/vrt/collection/balance/temperedlb/temperedlb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@
*/

#include "vt/config.h"
#include "vt/configs/types/types_sentinels.h"
#include "vt/configs/types/types_type.h"
#include "vt/timing/timing.h"
#include "vt/vrt/collection/balance/baselb/baselb.h"
#include "vt/vrt/collection/balance/model/load_model.h"
Expand Down Expand Up @@ -995,7 +993,7 @@ NodeType TemperedLB::sampleFromCMF(
return selected_node;
}

std::vector<NodeType> TemperedLB::makeUnderloaded() const {
std::vector<NodeType> TemperedLB::getPotentialRecipients() const {
std::vector<NodeType> under = {};
for (auto&& elm : load_info_) {
if (isUnderloaded(elm.second)) {
Expand Down Expand Up @@ -1203,10 +1201,10 @@ void TemperedLB::decide() {
int n_transfers = 0, n_rejected = 0;

if (canMigrate()) {
std::vector<NodeType> under = makeUnderloaded();
auto potential_recipients = getPotentialRecipients();
std::unordered_map<NodeType, ObjsType> migrate_objs;

if (under.size() > 0) {
if (potential_recipients.size() > 0) {
std::vector<ObjIDType> ordered_obj_ids = orderObjects(
obj_ordering_, cur_objs_, this_new_load_, target_max_load_
);
Expand All @@ -1218,24 +1216,24 @@ void TemperedLB::decide() {

if (cmf_type_ == CMFTypeEnum::Original) {
// Rebuild the relaxed underloaded set based on updated load of this node
under = makeUnderloaded();
if (under.size() == 0) {
potential_recipients = getPotentialRecipients();
if (potential_recipients.size() == 0) {
break;
}
} else if (cmf_type_ == CMFTypeEnum::NormByMaxExcludeIneligible) {
// Rebuild the underloaded set and eliminate processors that will
// fail the Criterion for this object
under = makeSufficientlyUnderloaded(obj_load);
if (under.size() == 0) {
potential_recipients = makeSufficientlyUnderloaded(obj_load);
if (potential_recipients.size() == 0) {
++n_rejected;
iter++;
continue;
}
}
// Rebuild the CMF with the new loads taken into account
auto cmf = createCMF(under);
auto cmf = createCMF(potential_recipients);
// Select a node using the CMF
auto const selected_node = sampleFromCMF(under, cmf);
auto const selected_node = sampleFromCMF(potential_recipients, cmf);

vt_debug_print(
verbose, temperedlb,
Expand All @@ -1255,13 +1253,13 @@ void TemperedLB::decide() {

vt_debug_print(
verbose, temperedlb,
"TemperedLB::decide: trial={}, iter={}, under.size()={}, "
"selected_node={}, selected_load={:e}, obj_id={:x}, home={}, "
"obj_load={}, target_max_load={}, this_new_load_={}, "
"criterion={}\n",
"TemperedLB::decide: trial={}, iter={}, "
"potential_recipients.size()={}, selected_node={}, "
"selected_load={:e}, obj_id={:x}, home={}, obj_load={}, "
"target_max_load={}, this_new_load_={}, criterion={}\n",
trial_,
iter_,
under.size(),
potential_recipients.size(),
selected_node,
selected_load,
obj_id.id,
Expand Down
6 changes: 4 additions & 2 deletions src/vt/vrt/collection/balance/temperedlb/temperedlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ struct TemperedLB : BaseLB {
* TemperedLB restricts this to underloaded ranks
*/
virtual bool canPropagate() const { return is_underloaded_; }
bool isDeterministic() const { return deterministic_; }

void propagateRound(uint8_t k_cur_async, bool sync, EpochType epoch = no_epoch);
void propagateIncomingAsync(LoadMsgAsync* msg);
Expand All @@ -113,7 +114,7 @@ struct TemperedLB : BaseLB {

std::vector<double> createCMF(NodeSetType const& under);
NodeType sampleFromCMF(NodeSetType const& under, std::vector<double> const& cmf);
std::vector<NodeType> makeUnderloaded() const;
virtual std::vector<NodeType> getPotentialRecipients() const;
std::vector<NodeType> makeSufficientlyUnderloaded(
LoadType load_to_accommodate
) const;
Expand All @@ -130,6 +131,8 @@ struct TemperedLB : BaseLB {

void setupDone(ReduceMsgType* msg);

std::unordered_map<NodeType, LoadType> load_info_ = {};

private:
uint16_t f_ = 0;
uint8_t k_max_ = 0;
Expand Down Expand Up @@ -168,7 +171,6 @@ struct TemperedLB : BaseLB {
*/
bool target_pole_ = false;
std::random_device seed_;
std::unordered_map<NodeType, LoadType> load_info_ = {};
std::unordered_map<NodeType, LoadType> new_load_info_ = {};
objgroup::proxy::Proxy<TemperedLB> proxy_ = {};
bool is_overloaded_ = false;
Expand Down
11 changes: 11 additions & 0 deletions src/vt/vrt/collection/balance/temperedwmin/temperedwmin.cc
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,17 @@ void TemperedWMin::inputParams(balance::ConfigEntry* config) {
load_model_ptr = theLBManager()->getLoadModel().get();
}

std::vector<NodeType> TemperedWMin::getPotentialRecipients() const {
std::vector<NodeType> nodes = {};
for (auto&& elm : load_info_) {
nodes.push_back(elm.first);
}
if (isDeterministic()) {
std::sort(nodes.begin(), nodes.end());
}
return nodes;
}

TimeType TemperedWMin::getModeledValue(const elm::ElementIDStruct& obj) {
vtAssert(
theLBManager()->getLoadModel().get() == load_model_ptr,
Expand Down
2 changes: 2 additions & 0 deletions src/vt/vrt/collection/balance/temperedwmin/temperedwmin.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ struct TemperedWMin : TemperedLB {
*/
bool canPropagate() const override { return true; }

std::vector<NodeType> getPotentialRecipients() const override;

private:
std::shared_ptr<balance::LoadModel> total_work_model_ = nullptr;
balance::LoadModel* load_model_ptr = nullptr;
Expand Down

0 comments on commit 1194a01

Please sign in to comment.