Skip to content

Commit

Permalink
Report error position.
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis committed Oct 19, 2020
1 parent 3708601 commit b12174d
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 12 deletions.
2 changes: 1 addition & 1 deletion rabit/src/allreduce_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ void AllreduceBase::SetParam(const char *name, const char *val) {
rabit_enable_tcp_no_delay = true;
} else {
rabit_enable_tcp_no_delay = false;
}
}
}
}
/*!
Expand Down
30 changes: 19 additions & 11 deletions rabit/src/allreduce_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,11 @@ class AllreduceBase : public IEngine {
if (world_size == 1 || world_size == -1) {
return;
}
utils::Assert(TryAllgatherRing(sendrecvbuf_, total_size, slice_begin,
slice_end, size_prev_slice) == kSuccess,
"AllgatherRing failed");
auto ret = TryAllgatherRing(sendrecvbuf_, total_size, slice_begin,
slice_end, size_prev_slice);
if (ret != kSuccess) {
utils::Error("AllgatherRing failed: %d\n", ret.line);
}
}
/*!
* \brief perform in-place allreduce, on sendrecvbuf
Expand All @@ -135,9 +137,10 @@ class AllreduceBase : public IEngine {
const char *_caller = _CALLER) override {
if (prepare_fun != nullptr) prepare_fun(prepare_arg);
if (world_size == 1 || world_size == -1) return;
utils::Assert(TryAllreduce(sendrecvbuf_, type_nbytes, count, reducer) ==
kSuccess,
"Allreduce failed");
auto ret = TryAllreduce(sendrecvbuf_, type_nbytes, count, reducer);
if (ret != kSuccess) {
utils::Error("Allreduce failed: %d\n", ret.line);
}
}
/*!
* \brief broadcast data from root to all nodes
Expand All @@ -152,8 +155,10 @@ class AllreduceBase : public IEngine {
const char *_file = _FILE, const int _line = _LINE,
const char *_caller = _CALLER) override {
if (world_size == 1 || world_size == -1) return;
utils::Assert(TryBroadcast(sendrecvbuf_, total_size, root) == kSuccess,
"Broadcast failed");
auto ret = TryBroadcast(sendrecvbuf_, total_size, root);
if (ret != kSuccess) {
utils::Error("Broadcast failed: %d\n", ret.line);
}
}
/*!
* \brief load latest check point
Expand Down Expand Up @@ -272,9 +277,11 @@ class AllreduceBase : public IEngine {
struct ReturnType {
/*! \brief internal return type */
ReturnTypeEnum value;
int32_t line { -1 };
// constructor
ReturnType() = default;
ReturnType(ReturnTypeEnum value) : value(value) {} // NOLINT(*)
explicit ReturnType(int l = __builtin_LINE()) : line{l} {}
ReturnType(ReturnTypeEnum value, int32_t l = __builtin_LINE()) : value(value), line{l} {} // NOLINT(*)

inline bool operator==(const ReturnTypeEnum &v) const {
return value == v;
}
Expand Down Expand Up @@ -518,7 +525,8 @@ class AllreduceBase : public IEngine {
* \param err the error type
*/
inline ReturnType ReportError(LinkRecord *link, ReturnType err) {
err_link = link; return err;
err_link = link;
return err;
}
//---- data structure related to model ----
// call sequence counter, records how many calls we made so far
Expand Down

0 comments on commit b12174d

Please sign in to comment.