Skip to content

Commit

Permalink
Make core worker exit with system error when receiving IOError from w…
Browse files Browse the repository at this point in the history
…riting to local object store

Signed-off-by: Mengjin Yan <mengjinyan3@gmail.com>
  • Loading branch information
MengjinYan committed Nov 7, 2024
1 parent a49978f commit 9d57b29
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
5 changes: 3 additions & 2 deletions src/ray/raylet_client/raylet_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,10 @@ Status raylet::RayletConnection::AtomicRequestReply(MessageType request_type,
}

void raylet::RayletConnection::ShutdownIfLocalRayletDisconnected(const Status &status) {
if (!status.ok() && IsRayletFailed(RayConfig::instance().RAYLET_PID())) {
if ((!status.ok() && IsRayletFailed(RayConfig::instance().RAYLET_PID())) ||
status.IsIOError()) {
RAY_LOG(WARNING) << "The connection is failed because the local raylet has been "
"dead. Terminate the process. Status: "
"dead or is unreachable. Terminate the process. Status: "
<< status;
QuickExit();
RAY_LOG(FATAL) << "Unreachable.";
Expand Down
3 changes: 2 additions & 1 deletion src/ray/raylet_client/raylet_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,8 @@ class RayletConnection {
flatbuffers::FlatBufferBuilder *fbb = nullptr);

private:
/// Shutdown the raylet if the local connection is disconnected.
/// Shutdown the raylet if the local connection is disconnected (either terminated or
// unreachable).
void ShutdownIfLocalRayletDisconnected(const Status &status);
/// The connection to raylet.
std::shared_ptr<ServerConnection> conn_;
Expand Down

0 comments on commit 9d57b29

Please sign in to comment.