Skip to content

Commit

Permalink
Merge pull request #2333 from DARMA-tasking/2299-dont-deploy-lb-with-…
Browse files Browse the repository at this point in the history
…to-small-average-load

#2299: Do not deploy LB when average load is smaller than estimated load balancing cost
  • Loading branch information
nlslatt authored Sep 6, 2024
2 parents bffea2a + 2777ffb commit 7f294c3
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 3 deletions.
27 changes: 27 additions & 0 deletions src/vt/vrt/collection/balance/baselb/baselb.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#include <map>
#include <unordered_map>
#include <tuple>
#include <chrono>

namespace vt { namespace vrt { namespace collection {

Expand Down Expand Up @@ -188,6 +189,32 @@ struct BaseLB {
std::shared_ptr<balance::LoadModel> model
);

/**
* \brief Get the estimated time needed for load-balancing
*
* \return the estimated time
*/
auto getCollectiveEpochCost() const {
return std::chrono::nanoseconds(100);
}

/**
* \brief Check if load-balancing should be done
*
* \return true when the maximum load exceeds the cost of load balancing; false otherwise
*/
bool maxLoadExceedsLBCost() const {
auto const max = base_stats_->at(lb::Statistic::Rank_load_modeled).at(
lb::StatisticQuantity::max
);
auto max_in_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::duration<double>(max)
);

// Compare the maximum rank load to the estimated load-balancing cost
return max_in_ns > getCollectiveEpochCost();
}

private:
TransferVecType transfers_ = {};
TransferType off_node_migrate_ = {};
Expand Down
2 changes: 1 addition & 1 deletion src/vt/vrt/collection/balance/greedylb/greedylb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ void GreedyLB::loadStats() {
bool should_lb = false;
this_load_begin = this_load;

if (avg_load > 0.0000000001) {
if (maxLoadExceedsLBCost()) {
should_lb = I > greedy_tolerance;
}

Expand Down
2 changes: 1 addition & 1 deletion src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ void HierarchicalLB::loadStats() {
bool should_lb = false;
this_load_begin = this_load;

if (avg_load > 0.0000000001) {
if (maxLoadExceedsLBCost()) {
should_lb = I > hierlb_tolerance;
}

Expand Down
2 changes: 1 addition & 1 deletion src/vt/vrt/collection/balance/temperedlb/temperedlb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ void TemperedLB::runLB(LoadType total_load) {
target_max_load_ = avg;
}

if (avg > 0.0000000001) {
if (maxLoadExceedsLBCost()) {
should_lb = max > (run_temperedlb_tolerance + 1.0) * target_max_load_;
}

Expand Down

0 comments on commit 7f294c3

Please sign in to comment.