Skip to content

Commit

Permalink
Promote shadow replica to primary when initializing primary fails (#2…
Browse files Browse the repository at this point in the history
…2021)

Failing an initializing primary when shadow replicas are enabled for the index can leave the primary unassigned with replicas being active. Instead, a replica should be promoted to primary, which is fixed by this commit.
  • Loading branch information
ywelsch committed Dec 7, 2016
1 parent 9d4f500 commit 4789134
Showing 1 changed file with 30 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -537,8 +537,22 @@ assert getByAllocationId(failedShard.shardId(), failedShard.allocationId().getId
// fail actual shard
if (failedShard.initializing()) {
if (failedShard.relocatingNodeId() == null) {
// initializing shard that is not relocation target, just move to unassigned
moveToUnassigned(failedShard, unassignedInfo);
if (failedShard.primary()) {
// promote active replica to primary if active replica exists (only the case for shadow replicas)
ShardRouting activeReplica = activeReplica(failedShard.shardId());
assert activeReplica == null || IndexMetaData.isIndexUsingShadowReplicas(indexMetaData.getSettings()) :
"initializing primary [" + failedShard + "] with active replicas [" + activeReplica + "] only expected when " +
"using shadow replicas";
if (activeReplica == null) {
moveToUnassigned(failedShard, unassignedInfo);
} else {
movePrimaryToUnassignedAndDemoteToReplica(failedShard, unassignedInfo);
promoteReplicaToPrimary(activeReplica, indexMetaData, routingChangesObserver);
}
} else {
// initializing shard that is not relocation target, just move to unassigned
moveToUnassigned(failedShard, unassignedInfo);
}
} else {
// The shard is a target of a relocating shard. In that case we only need to remove the target shard and cancel the source
// relocation. No shard is left unassigned
Expand All @@ -561,16 +575,8 @@ assert getByAllocationId(failedShard.shardId(), failedShard.allocationId().getId
if (activeReplica == null) {
moveToUnassigned(failedShard, unassignedInfo);
} else {
// if the activeReplica was relocating before this call to failShard, its relocation was cancelled above when we
// failed initializing replica shards (and moved replica relocation source back to started)
assert activeReplica.started() : "replica relocation should have been cancelled: " + activeReplica;
movePrimaryToUnassignedAndDemoteToReplica(failedShard, unassignedInfo);
ShardRouting primarySwappedCandidate = promoteActiveReplicaShardToPrimary(activeReplica);
routingChangesObserver.replicaPromoted(activeReplica);
if (IndexMetaData.isIndexUsingShadowReplicas(indexMetaData.getSettings())) {
ShardRouting initializedShard = reinitShadowPrimary(primarySwappedCandidate);
routingChangesObserver.startedPrimaryReinitialized(primarySwappedCandidate, initializedShard);
}
promoteReplicaToPrimary(activeReplica, indexMetaData, routingChangesObserver);
}
} else {
assert failedShard.primary() == false;
Expand All @@ -586,6 +592,19 @@ assert node(failedShard.currentNodeId()).getByShardId(failedShard.shardId()) ==
" was matched but wasn't removed";
}

private void promoteReplicaToPrimary(ShardRouting activeReplica, IndexMetaData indexMetaData,
RoutingChangesObserver routingChangesObserver) {
// if the activeReplica was relocating before this call to failShard, its relocation was cancelled earlier when we
// failed initializing replica shards (and moved replica relocation source back to started)
assert activeReplica.started() : "replica relocation should have been cancelled: " + activeReplica;
ShardRouting primarySwappedCandidate = promoteActiveReplicaShardToPrimary(activeReplica);
routingChangesObserver.replicaPromoted(activeReplica);
if (IndexMetaData.isIndexUsingShadowReplicas(indexMetaData.getSettings())) {
ShardRouting initializedShard = reinitShadowPrimary(primarySwappedCandidate);
routingChangesObserver.startedPrimaryReinitialized(primarySwappedCandidate, initializedShard);
}
}

/**
* Mark a shard as started and adjusts internal statistics.
*
Expand Down

0 comments on commit 4789134

Please sign in to comment.