-
Notifications
You must be signed in to change notification settings - Fork 6.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix for RecoverFromRetryableBGIOError starting with recovery_in_prog_ false #11991
Changes from all commits
759a726
3bcb8e1
ac43c6a
4865315
eea6910
7f9302c
ce5ba88
80c127b
84df441
9f43e8f
3ec955e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -556,6 +556,7 @@ Status ErrorHandler::ClearBGError() { | |
// old_bg_error is only for notifying listeners, so may not be checked | ||
old_bg_error.PermitUncheckedError(); | ||
// Clear and check the recovery IO and BG error | ||
is_db_stopped_.store(false, std::memory_order_release); | ||
bg_error_ = Status::OK(); | ||
recovery_error_ = IOStatus::OK(); | ||
bg_error_.PermitUncheckedError(); | ||
|
@@ -671,11 +672,14 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError( | |
// wait the previous recover thread to finish and create a new thread | ||
// to recover from the bg error. | ||
db_mutex_->Unlock(); | ||
TEST_SYNC_POINT( | ||
"StartRecoverFromRetryableBGIOError:BeforeWaitingForOtherThread"); | ||
old_recovery_thread->join(); | ||
TEST_SYNC_POINT( | ||
"StartRecoverFromRetryableBGIOError:AfterWaitingForOtherThread"); | ||
db_mutex_->Lock(); | ||
} | ||
|
||
TEST_SYNC_POINT("StartRecoverFromRetryableBGIOError::in_progress"); | ||
recovery_thread_.reset( | ||
new port::Thread(&ErrorHandler::RecoverFromRetryableBGIOError, this)); | ||
|
||
|
@@ -689,6 +693,7 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError( | |
// Automatic recover from Retryable BG IO error. Must be called after db | ||
// mutex is released. | ||
void ErrorHandler::RecoverFromRetryableBGIOError() { | ||
assert(recovery_in_prog_); | ||
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeStart"); | ||
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeStart2"); | ||
InstrumentedMutexLock l(db_mutex_); | ||
|
@@ -754,22 +759,12 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { | |
// recover from the retryable IO error and no other BG errors. Clean | ||
// the bg_error and notify user. | ||
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:RecoverSuccess"); | ||
Status old_bg_error = bg_error_; | ||
is_db_stopped_.store(false, std::memory_order_release); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. May need to move There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I guess this wasn't supposed to be missing in |
||
bg_error_ = Status::OK(); | ||
bg_error_.PermitUncheckedError(); | ||
EventHelpers::NotifyOnErrorRecoveryEnd( | ||
db_options_.listeners, old_bg_error, bg_error_, db_mutex_); | ||
if (bg_error_stats_ != nullptr) { | ||
RecordTick(bg_error_stats_.get(), | ||
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT); | ||
RecordInHistogram(bg_error_stats_.get(), | ||
ERROR_HANDLER_AUTORESUME_RETRY_COUNT, retry_count); | ||
} | ||
recovery_in_prog_ = false; | ||
if (soft_error_no_bg_work_) { | ||
soft_error_no_bg_work_ = false; | ||
} | ||
return; | ||
} else { | ||
// In this case: 1) recovery_error_ is more serious or not retryable | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@cbi42 do you think we want to assert other fields here as well?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure, it's hard to figure out all the invariants :)