Skip to content

Commit

Permalink
#2074: Avoid adding data for identical phases to history
Browse files Browse the repository at this point in the history
  • Loading branch information
thearusable committed Jun 5, 2023
1 parent a046a86 commit 045326a
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 39 deletions.
42 changes: 17 additions & 25 deletions src/vt/vrt/collection/balance/lb_data_restart_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,20 +87,17 @@ void LBDataRestartReader::readHistory(LBDataHolder const& lbdh) {
last_found_phase = phase;
for (auto const& obj : iter->second) {
if (obj.first.isMigratable()) {
history_[phase].insert(obj.first);
if (history_[phase] == nullptr) {
history_[phase] = std::make_shared<std::set<ElementIDStruct>>();
}
history_[phase]->insert(obj.first);
}
}
} else if(lbdh.identical_phases_.find(phase) != lbdh.identical_phases_.end()) {
// Phase is identical to previous one, fill with data from previous phase
auto last_iter = lbdh.node_data_.find(last_found_phase);
for (auto const& obj : last_iter->second) {
if (obj.first.isMigratable()) {
history_[phase].insert(obj.first);
}
}
// Phase is identical to previous one, use the shared pointer to data from previous phase
addIdenticalPhase(phase, last_found_phase);
} else if(lbdh.skipped_phases_.find(phase) == lbdh.skipped_phases_.end()) {
// Phases which are not present must be specified in metadata of the file
vtAbort("Could not find data: Unspecified phases needs to be present in skipped section of the file metadata");
vtAbort("Could not find data: Skipped phases needs to be listed in file metadata.");
}
}
}
Expand Down Expand Up @@ -143,12 +140,12 @@ void LBDataRestartReader::arriving(ArriveMsg* msg) {
}

void LBDataRestartReader::update(UpdateMsg* msg) {
auto iter = history_[msg->phase].find(msg->elm);
vtAssert(iter != history_[msg->phase].end(), "Must exist");
auto iter = history_[msg->phase]->find(msg->elm);
vtAssert(iter != history_[msg->phase]->end(), "Must exist");
auto elm = *iter;
elm.curr_node = msg->curr_node;
history_[msg->phase].erase(iter);
history_[msg->phase].insert(elm);
history_[msg->phase]->erase(iter);
history_[msg->phase]->insert(elm);
}

void LBDataRestartReader::checkBothEnds(Coord& coord) {
Expand All @@ -167,26 +164,21 @@ void LBDataRestartReader::determinePhasesToMigrate() {
runInEpochCollective("LBDataRestartReader::updateLocations", [&]{
PhaseType curr = 0, next;
for (;curr < num_phases_ - 1;) {
// find number of next Phase
for(next = curr + 1; next < num_phases_; ++next) {
if(history_.find(next) != history_.end()) {
break;
}
}
next = findNextPhase(curr);

local_changed_distro[curr] = history_[curr] != history_[next];
local_changed_distro[curr] = *history_[curr] != *history_[next];
if (local_changed_distro[curr]) {
std::set<ElementIDStruct> departing, arriving;

std::set_difference(
history_[next].begin(), history_[next].end(),
history_[curr].begin(), history_[curr].end(),
history_[next]->begin(), history_[next]->end(),
history_[curr]->begin(), history_[curr]->end(),
std::inserter(arriving, arriving.begin())
);

std::set_difference(
history_[curr].begin(), history_[curr].end(),
history_[next].begin(), history_[next].end(),
history_[curr]->begin(), history_[curr]->end(),
history_[next]->begin(), history_[next]->end(),
std::inserter(departing, departing.begin())
);

Expand Down
43 changes: 31 additions & 12 deletions src/vt/vrt/collection/balance/lb_data_restart_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,26 +131,21 @@ struct LBDataRestartReader : runtime::component::Component<LBDataRestartReader>
* \return the next phase
*/
PhaseType findNextPhase(PhaseType phase) const {
auto next = phase + 1;
for(; next < num_phases_; ++next) {
if(history_.find(next) != history_.end()) {
return next;
}
}
vtAssert(history_.find(next) != history_.end(), "Must have a valid phase");
return next;
auto iter = history_.upper_bound(phase);
vtAssert(iter != history_.end(), "Must have a valid phase");
return iter->first;
}

/**
* \brief Get the elements assigned for a given phase
*
* \param[in] phase the phase
*
* \return element assigned to this node
* \return pointer to elements assigned to this node, guaranted to be not null
*/
std::set<ElementIDStruct> const& getDistro(PhaseType phase) {
std::shared_ptr<const std::set<ElementIDStruct>> getDistro(PhaseType phase) const {
auto iter = history_.find(phase);
vtAssert(iter != history_.end(), "Must have a valid phase");
vtAssert(iter != history_.end() && iter->second != nullptr, "Must have a valid phase");
return iter->second;
}

Expand All @@ -166,6 +161,30 @@ struct LBDataRestartReader : runtime::component::Component<LBDataRestartReader>
}
}

/**
* \brief Add history for a given phase
*
* \param[in] phase the phase to be added
* \param[in] distro the distribution to be added
*/
void addDistro(PhaseType phase, const std::set<ElementIDStruct>& distro) {
if (history_[phase] == nullptr) {
history_[phase] = std::make_shared<std::set<ElementIDStruct>>();
}
history_[phase]->insert(distro.begin(), distro.end());
}

/**
* \brief Add identical phase to one already present
*
* \param[in] phase the phase to be added
* \param[in] identical the identical phase to be used
*/
void addIdenticalPhase(PhaseType phase, PhaseType identical) {
vtAssert(history_.find(identical) != history_.end(), "Identical phase was not added to history map.");
history_[phase] = history_[identical];
}

private:
/**
* \brief Reduce distribution changes globally to find where migrations need
Expand All @@ -188,7 +207,7 @@ struct LBDataRestartReader : runtime::component::Component<LBDataRestartReader>
std::vector<bool> changed_distro_;

/// History of mapping that was read in from the data files
std::unordered_map<PhaseType, std::set<ElementIDStruct>> history_;
std::map<PhaseType, std::shared_ptr<std::set<ElementIDStruct>>> history_;

struct DepartMsg : vt::Message {
DepartMsg(NodeType in_depart_node, PhaseType in_phase, ElementIDStruct in_elm)
Expand Down
4 changes: 2 additions & 2 deletions src/vt/vrt/collection/balance/offlinelb/offlinelb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ void OfflineLB::init(objgroup::proxy::Proxy<OfflineLB> in_proxy) {

void OfflineLB::runLB(TimeType) {
auto nextPhase = theLBDataReader()->findNextPhase(phase_);
auto const& distro = theLBDataReader()->getDistro(nextPhase);
for (auto&& elm : distro) {
auto const distro = theLBDataReader()->getDistro(nextPhase);
for (auto&& elm : *distro) {
migrateObjectTo(elm, theContext()->getNode());
}
theLBDataReader()->clearDistro(nextPhase);
Expand Down

0 comments on commit 045326a

Please sign in to comment.