-
Notifications
You must be signed in to change notification settings - Fork 3.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[fix] fix bug that replica can not be repaired duo to DECOMMISSION state #9424
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -402,12 +402,12 @@ private void schedulePendingTablets() { | |
if (tabletCtx.getType() == Type.BALANCE) { | ||
// if balance is disabled, remove this tablet | ||
if (Config.disable_balance) { | ||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, | ||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(), | ||
"disable balance and " + e.getMessage()); | ||
} else { | ||
// remove the balance task if it fails to be scheduled many times | ||
if (tabletCtx.getFailedSchedCounter() > 10) { | ||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, | ||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(), | ||
"schedule failed too many times and " + e.getMessage()); | ||
} else { | ||
// we must release resource it current hold, and be scheduled again | ||
|
@@ -427,19 +427,19 @@ private void schedulePendingTablets() { | |
} else if (e.getStatus() == Status.FINISHED) { | ||
// schedule redundant tablet or scheduler disabled will throw this exception | ||
stat.counterTabletScheduledSucceeded.incrementAndGet(); | ||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, e.getMessage()); | ||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, e.getStatus(), e.getMessage()); | ||
} else { | ||
Preconditions.checkState(e.getStatus() == Status.UNRECOVERABLE, e.getStatus()); | ||
// discard | ||
stat.counterTabletScheduledDiscard.incrementAndGet(); | ||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getMessage()); | ||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(), e.getMessage()); | ||
} | ||
continue; | ||
} catch (Exception e) { | ||
LOG.warn("got unexpected exception, discard this schedule. tablet: {}", | ||
tabletCtx.getTabletId(), e); | ||
stat.counterTabletScheduledFailed.incrementAndGet(); | ||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, e.getMessage()); | ||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, Status.UNRECOVERABLE, e.getMessage()); | ||
continue; | ||
} | ||
|
||
|
@@ -548,7 +548,8 @@ private void scheduleTablet(TabletSchedCtx tabletCtx, AgentBatchTask batchTask) | |
for (TransactionState transactionState : dbTransactionMgr.getPreCommittedTxnList()) { | ||
if(transactionState.getTableIdList().contains(tbl.getId())) { | ||
// If table releate to transaction with precommitted status, do not allow to do balance. | ||
throw new SchedException(Status.UNRECOVERABLE, "There exists PRECOMMITTED transaction releated to table"); | ||
throw new SchedException(Status.UNRECOVERABLE, | ||
"There exists PRECOMMITTED transaction related to table"); | ||
} | ||
} | ||
} catch (AnalysisException e) { | ||
|
@@ -577,7 +578,7 @@ private void scheduleTablet(TabletSchedCtx tabletCtx, AgentBatchTask batchTask) | |
throw new SchedException(Status.UNRECOVERABLE, "tablet is unhealthy when doing balance"); | ||
} | ||
|
||
// for disk balance more accutely, we only schedule tablet when has lastly stat info about disk | ||
// for disk balance more accurately, we only schedule tablet when has lastly stat info about disk | ||
if (tabletCtx.getType() == TabletSchedCtx.Type.BALANCE && | ||
tabletCtx.getBalanceType() == TabletSchedCtx.BalanceType.DISK_BALANCE) { | ||
checkDiskBalanceLastSuccTime(tabletCtx.getTempSrcBackendId(), tabletCtx.getTempSrcPathHash()); | ||
|
@@ -1093,7 +1094,6 @@ private void handleReplicaTooSlow(TabletSchedCtx tabletCtx) throws SchedExceptio | |
} | ||
|
||
private void deleteReplicaInternal(TabletSchedCtx tabletCtx, Replica replica, String reason, boolean force) throws SchedException { | ||
|
||
/* | ||
* Before deleting a replica, we should make sure that there is no running txn on it and no more txns will be on it. | ||
* So we do followings: | ||
|
@@ -1109,6 +1109,8 @@ private void deleteReplicaInternal(TabletSchedCtx tabletCtx, Replica replica, St | |
replica.setState(ReplicaState.DECOMMISSION); | ||
// set priority to normal because it may wait for a long time. Remain it as VERY_HIGH may block other task. | ||
tabletCtx.setOrigPriority(Priority.NORMAL); | ||
LOG.debug("set replica {} on backend {} of tablet {} state to DECOMMISSION", | ||
replica.getId(), replica.getBackendId(), tabletCtx.getTabletId()); | ||
throw new SchedException(Status.SCHEDULE_FAILED, "set watermark txn " + nextTxnId); | ||
} else if (replica.getState() == ReplicaState.DECOMMISSION && replica.getWatermarkTxnId() != -1) { | ||
long watermarkTxnId = replica.getWatermarkTxnId(); | ||
|
@@ -1389,17 +1391,20 @@ private void dynamicAdjustPrioAndAddBackToPendingTablets(TabletSchedCtx tabletCt | |
addTablet(tabletCtx, true /* force */); | ||
} | ||
|
||
private void finalizeTabletCtx(TabletSchedCtx tabletCtx, TabletSchedCtx.State state, String reason) { | ||
private void finalizeTabletCtx(TabletSchedCtx tabletCtx, TabletSchedCtx.State state, Status status, String reason) { | ||
// use 2 steps to avoid nested database lock and synchronized.(releaseTabletCtx() may hold db lock) | ||
// remove the tablet ctx, so that no other process can see it | ||
removeTabletCtx(tabletCtx, reason); | ||
// release resources taken by tablet ctx | ||
releaseTabletCtx(tabletCtx, state); | ||
releaseTabletCtx(tabletCtx, state, status == Status.UNRECOVERABLE); | ||
} | ||
|
||
private void releaseTabletCtx(TabletSchedCtx tabletCtx, TabletSchedCtx.State state) { | ||
private void releaseTabletCtx(TabletSchedCtx tabletCtx, TabletSchedCtx.State state, boolean resetReplicaState) { | ||
tabletCtx.setState(state); | ||
tabletCtx.releaseResource(this); | ||
if (resetReplicaState) { | ||
tabletCtx.resetReplicaState(); | ||
} | ||
tabletCtx.setFinishedTime(System.currentTimeMillis()); | ||
} | ||
|
||
|
@@ -1454,7 +1459,7 @@ public boolean finishStorageMediaMigrationTask(StorageMediaMigrationTask migrati | |
updateDiskBalanceLastSuccTime(tabletCtx.getDestBackendId(), tabletCtx.getDestPathHash()); | ||
} | ||
// we need this function to free slot for this migration task | ||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, "finished"); | ||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, Status.FINISHED, "finished"); | ||
return true; | ||
} | ||
/** | ||
|
@@ -1487,25 +1492,25 @@ public boolean finishCloneTask(CloneTask cloneTask, TFinishTaskRequest request) | |
} else if (e.getStatus() == Status.UNRECOVERABLE) { | ||
// unrecoverable | ||
stat.counterTabletScheduledDiscard.incrementAndGet(); | ||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getMessage()); | ||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(), e.getMessage()); | ||
return true; | ||
} else if (e.getStatus() == Status.FINISHED) { | ||
// tablet is already healthy, just remove | ||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getMessage()); | ||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(), e.getMessage()); | ||
return true; | ||
} | ||
} catch (Exception e) { | ||
LOG.warn("got unexpected exception when finish clone task. tablet: {}", | ||
tabletCtx.getTabletId(), e); | ||
stat.counterTabletScheduledDiscard.incrementAndGet(); | ||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, e.getMessage()); | ||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, Status.UNRECOVERABLE, e.getMessage()); | ||
return true; | ||
} | ||
|
||
Preconditions.checkState(tabletCtx.getState() == TabletSchedCtx.State.FINISHED); | ||
stat.counterCloneTaskSucceeded.incrementAndGet(); | ||
gatherStatistics(tabletCtx); | ||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, "finished"); | ||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, Status.FINISHED, "finished"); | ||
return true; | ||
} | ||
|
||
|
@@ -1569,7 +1574,10 @@ public void handleRunningTablets() { | |
|
||
// 2. release ctx | ||
timeoutTablets.stream().forEach(t -> { | ||
releaseTabletCtx(t, TabletSchedCtx.State.CANCELLED); | ||
// Set "resetReplicaState" to true because | ||
// the timeout task should also be considered as UNRECOVERABLE, | ||
// so need to reset replica state. | ||
releaseTabletCtx(t, TabletSchedCtx.State.CANCELLED, true); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is better to add a log here, it is useful for us to find why resetReplicaState is called. This log will be not too many. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need, this is an origin logic, and we don't expect this log in the past. |
||
stat.counterCloneTaskTimeout.incrementAndGet(); | ||
}); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LOG.warn() is better,resetReplicaState will not be called frequently, so this log will not be too many, but we have to known which tablet is reset, to find out why it is be like this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It may print a lot log. And actually, this is not an error. It is a common case.