Skip to content

Commit

Permalink
Fix can't auto resume after no space error when the no space error is…
Browse files Browse the repository at this point in the history
… only trigger by db_->Write without any other background action (compact/flush)
  • Loading branch information
liumiuyong committed Apr 22, 2021
1 parent e505c74 commit 77a7b77
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 0 deletions.
3 changes: 3 additions & 0 deletions src/event_listener.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ void EventListener::OnBackgroundError(rocksdb::BackgroundErrorReason reason, roc
// Should not arrive here
break;
}
if (status->IsNoSpace() && status->severity() < rocksdb::Status::kFatalError) {
storage_->SetDBInRetryableIOError(true);
}
LOG(ERROR) << "[event_listener/background_error] reason: " << reason_str
<< ", status: " << status->ToString();
}
Expand Down
9 changes: 9 additions & 0 deletions src/server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,15 @@ void Server::cron() {
}
}
}
// check if DB need to be resumed every minute
// rocksdb has auto resume feature after retryable io error, but the current implement can't trigger auto resume
// when the no space error is only trigger by db_->Write without any other background action (compact/flush),
// so we trigger manually resume every minute after no space error to overcome this
if (is_loading_ == false && counter != 0 && counter % 600 == 0 && storage_->IsDBInRetryableIOError()) {
storage_->GetDB()->Resume();
LOG(INFO) << "[server] Schedule to resume DB after no space error";
storage_->SetDBInRetryableIOError(false);
}
cleanupExitedSlaves();
counter++;
stats_.TrackInstantaneousMetric(STATS_METRIC_COMMAND, stats_.total_calls);
Expand Down
3 changes: 3 additions & 0 deletions src/storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ class Storage {
time_t GetCheckpointCreateTime() { return checkpoint_info_.create_time; }
void SetCheckpointAccessTime(time_t t) { checkpoint_info_.access_time = t; }
time_t GetCheckpointAccessTime() { return checkpoint_info_.access_time; }
void SetDBInRetryableIOError(bool yes_or_no) { db_in_retryable_io_error_ = yes_or_no; }
bool IsDBInRetryableIOError() { return db_in_retryable_io_error_; }

private:
rocksdb::DB *db_ = nullptr;
Expand All @@ -141,6 +143,7 @@ class Storage {
std::mutex db_mu_;
int db_refs_ = 0;
bool db_closing_ = true;
bool db_in_retryable_io_error_ = false;
};

} // namespace Engine

0 comments on commit 77a7b77

Please sign in to comment.