Skip to content

Commit

Permalink
#2299: Move logic for checking max load to BaseLB
Browse files Browse the repository at this point in the history
  • Loading branch information
thearusable authored and cz4rs committed Sep 20, 2024
1 parent 664a3bc commit 6aa4287
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 16 deletions.
19 changes: 18 additions & 1 deletion src/vt/vrt/collection/balance/baselb/baselb.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,14 +190,31 @@ struct BaseLB {
);

/**
* \brief Get the estimated time needed for load balancing
* \brief Get the estimated time needed for load-balancing
*
* \return the estimated time
*/
auto getCollectiveEpochCost() const {
return std::chrono::nanoseconds(100);
}

/**
* \brief Check if load-balancing should be done
*
* \return true when the maximum load exceeds the cost of load balancing; false otherwise
*/
bool maxLoadExceedsLBCost() const {
auto const max = base_stats_->at(lb::Statistic::Rank_load_modeled).at(
lb::StatisticQuantity::max
);
auto max_in_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::duration<double>(max)
);

// Compare the maximum rank load to the estimated load-balancing cost
return max_in_ns > getCollectiveEpochCost();
}

private:
TransferVecType transfers_ = {};
TransferType off_node_migrate_ = {};
Expand Down
6 changes: 1 addition & 5 deletions src/vt/vrt/collection/balance/greedylb/greedylb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -160,11 +160,7 @@ void GreedyLB::loadStats() {
bool should_lb = false;
this_load_begin = this_load;

// Use an estimated load-balancing cost on average rank load to load-balance
auto avg_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::duration<double>(avg_load)
);
if (avg_ns > getCollectiveEpochCost()) {
if (maxLoadExceedsLBCost()) {
should_lb = I > greedy_tolerance;
}

Expand Down
6 changes: 1 addition & 5 deletions src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -263,11 +263,7 @@ void HierarchicalLB::loadStats() {
bool should_lb = false;
this_load_begin = this_load;

// Use an estimated load-balancing cost on average rank load to load-balance
auto avg_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::duration<double>(avg_load)
);
if (avg_ns > getCollectiveEpochCost()) {
if (maxLoadExceedsLBCost()) {
should_lb = I > hierlb_tolerance;
}

Expand Down
6 changes: 1 addition & 5 deletions src/vt/vrt/collection/balance/temperedlb/temperedlb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -449,11 +449,7 @@ void TemperedLB::runLB(LoadType total_load) {
target_max_load_ = avg;
}

// Use an estimated load-balancing cost on average rank load to load-balance
auto avg_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::duration<double>(avg)
);
if (avg_ns > getCollectiveEpochCost()) {
if (maxLoadExceedsLBCost()) {
should_lb = max > (run_temperedlb_tolerance + 1.0) * target_max_load_;
}

Expand Down

0 comments on commit 6aa4287

Please sign in to comment.