Remove global checkpoint assertion in index shard

jasontedor · jasontedor · commit 50b617f73a75 · 2017-05-04T10:33:42.000-04:00
Due to races, this assertion in index shard can be wrong. This commit
removes that assertion and adjusts the explanatory comment.
diff --git a/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java
@@ -1523,20 +1523,20 @@ public void updateGlobalCheckpointOnReplica(final long globalCheckpoint) {
         verifyReplicationTarget();
         final SequenceNumbersService seqNoService = getEngine().seqNoService();
         final long localCheckpoint = seqNoService.getLocalCheckpoint();
-        if (globalCheckpoint <= localCheckpoint) {
-            seqNoService.updateGlobalCheckpointOnReplica(globalCheckpoint);
-        } else {
+        if (globalCheckpoint > localCheckpoint) {
             /*
              * This can happen during recovery when the shard has started its engine but recovery is not finalized and is receiving global
-             * checkpoint updates from in-flight operations. However, since this shard is not yet contributing to calculating the global
-             * checkpoint, it can be the case that the global checkpoint update from the primary is ahead of the local checkpoint on this
-             * shard. In this case, we ignore the global checkpoint update. This should only happen if we are in the translog stage of
-             * recovery. Prior to this, the engine is not opened and this shard will not receive global checkpoint updates, and after this
-             * the shard will be contributing to calculations of the the global checkpoint.
+             * checkpoint updates. However, since this shard is not yet contributing to calculating the global checkpoint, it can be the
+             * case that the global checkpoint update from the primary is ahead of the local checkpoint on this shard. In this case, we
+             * ignore the global checkpoint update. This can happen if we are in the translog stage of recovery. Prior to this, the engine
+             * is not opened and this shard will not receive global checkpoint updates, and after this the shard will be contributing to
+             * calculations of the the global checkpoint. However, we can not assert that we are in the translog stage of recovery here as
+             * while the global checkpoint update may have emanated from the primary when we were in that state, we could subsequently move
+             * to recovery finalization, or even finished recovery before the update arrives here.
              */
-            assert recoveryState().getStage() == RecoveryState.Stage.TRANSLOG
-                    : "expected recovery stage [" + RecoveryState.Stage.TRANSLOG + "] but was [" + recoveryState().getStage() + "]";
+            return;
         }
+        seqNoService.updateGlobalCheckpointOnReplica(globalCheckpoint);
     }
 
     /**