-
Notifications
You must be signed in to change notification settings - Fork 25k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Replica recovery could go into an endless flushing loop #28350
Changes from 6 commits
ba2ec6a
ad530f3
fe6901a
9789bd7
18d8fe6
15e4edf
f53045e
ef7f713
36cc8bc
9bc74b3
bcf3704
5b735f1
6a4e2f5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1462,6 +1462,25 @@ final boolean tryRenewSyncCommit() { | |
return renewed; | ||
} | ||
|
||
@Override | ||
public boolean shouldFlush() { | ||
if (translog.shouldFlush() == false) { | ||
return false; | ||
} | ||
/* | ||
* We should only flush ony if the shouldFlush condition can become false after flushing. This condition will change if: | ||
* 1. The min translog gen of the next commit points to a different translog gen than the last commit | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this deserves a comment why we don't take the IW#hasUncommittedChanges() into account. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
* 2. If Local checkpoint equals to max_seqno, the min translog gen of the next commit will point to the newly rolled generation | ||
*/ | ||
final long localCheckpoint = localCheckpointTracker.getCheckpoint(); | ||
if (localCheckpoint == localCheckpointTracker.getMaxSeqNo()) { | ||
return true; | ||
} | ||
final long translogGenFromLastCommit = Long.parseLong(lastCommittedSegmentInfos.userData.get(Translog.TRANSLOG_GENERATION_KEY)); | ||
final long translogGenForNewCommit = translog.getMinGenerationForSeqNo(localCheckpoint + 1).translogFileGeneration; | ||
return translogGenForNewCommit > translogGenFromLastCommit; | ||
} | ||
|
||
@Override | ||
public CommitId flush() throws EngineException { | ||
return flush(false, false); | ||
|
@@ -1492,7 +1511,7 @@ public CommitId flush(boolean force, boolean waitIfOngoing) throws EngineExcepti | |
logger.trace("acquired flush lock immediately"); | ||
} | ||
try { | ||
if (indexWriter.hasUncommittedChanges() || force) { | ||
if (indexWriter.hasUncommittedChanges() || force || shouldFlush()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we add a comment explaining why we have 3 things? Basically something like - we check if:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
ensureCanFlush(); | ||
try { | ||
translog.rollGeneration(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
|
||
package org.elasticsearch.indices.recovery; | ||
|
||
import com.carrotsearch.randomizedtesting.generators.RandomNumbers; | ||
import org.apache.lucene.index.DirectoryReader; | ||
import org.apache.lucene.index.IndexCommit; | ||
import org.apache.lucene.index.IndexWriter; | ||
|
@@ -306,4 +307,26 @@ public void testSequenceBasedRecoveryKeepsTranslog() throws Exception { | |
} | ||
} | ||
|
||
public void testShouldFlushAfterPeerRecovery() throws Exception { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add Javadoc to this method to explain what the goal of this test is? |
||
try (ReplicationGroup shards = createGroup(0)) { | ||
shards.startAll(); | ||
long translogSizeOnPrimary = 0; | ||
int numDocs = shards.indexDocs(between(10, 100)); | ||
translogSizeOnPrimary += shards.getPrimary().getTranslog().uncommittedSizeInBytes(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just define translogSizeOnPrimary here (no need to initialize) |
||
shards.flush(); | ||
|
||
final IndexShard replica = shards.addReplica(); | ||
IndexMetaData.Builder builder = IndexMetaData.builder(replica.indexSettings().getIndexMetaData()); | ||
long flushThreshold = RandomNumbers.randomLongBetween(random(), 100, translogSizeOnPrimary); | ||
builder.settings(Settings.builder().put(replica.indexSettings().getSettings()) | ||
.put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), flushThreshold + "b") | ||
); | ||
replica.indexSettings().updateIndexMetaData(builder.build()); | ||
replica.onSettingsChanged(); | ||
shards.recoverReplica(replica); | ||
assertBusy(() -> assertThat(getEngine(replica).shouldFlush(), equalTo(false))); | ||
assertThat(replica.getTranslog().totalOperations(), equalTo(numDocs)); | ||
shards.assertAllEqual(numDocs); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you explain that this can return false even if there are uncommitted changes. It's more of a maintainance function. maybe we should call it differently something like
shouldFlushForMaintainance
ormaintainanceFlushPending()
just suggestions to make it more clearThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yannick and I came up with
shouldFlushToFreeTranslog