Skip to content

Commit 4675968

Browse files
committed
Omit writing index metadata for non-replicated closed indices on data-only node (#47285)
Fixes a bug related to how "closed replicated indices" (introduced in 7.2) interact with the index metadata storage mechanism, which has special handling for closed indices (but incorrectly handles replicated closed indices). On non-master-eligible data nodes, it's possible for the node's manifest file (which tracks the relevant metadata state that the node should persist) to become out of sync with what's actually stored on disk, leading to an inconsistency that is then detected at startup, refusing for the node to start up. Closes #47276
1 parent d9a7bce commit 4675968

File tree

3 files changed

+68
-79
lines changed

3 files changed

+68
-79
lines changed

server/src/main/java/org/elasticsearch/gateway/IncrementalClusterStateWriter.java

Lines changed: 8 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ private void writeManifest(AtomicClusterStateWriter writer, Manifest manifest) t
139139
private Map<Index, Long> writeIndicesMetadata(AtomicClusterStateWriter writer, ClusterState newState, ClusterState previousState)
140140
throws WriteStateException {
141141
Map<Index, Long> previouslyWrittenIndices = previousManifest.getIndexGenerations();
142-
Set<Index> relevantIndices = getRelevantIndices(newState, previousState, previouslyWrittenIndices.keySet());
142+
Set<Index> relevantIndices = getRelevantIndices(newState);
143143

144144
Map<Index, Long> newIndices = new HashMap<>();
145145

@@ -207,8 +207,7 @@ static List<IndexMetaDataAction> resolveIndexMetaDataActions(Map<Index, Long> pr
207207
return actions;
208208
}
209209

210-
private static Set<Index> getRelevantIndicesOnDataOnlyNode(ClusterState state, ClusterState previousState, Set<Index>
211-
previouslyWrittenIndices) {
210+
private static Set<Index> getRelevantIndicesOnDataOnlyNode(ClusterState state) {
212211
RoutingNode newRoutingNode = state.getRoutingNodes().node(state.nodes().getLocalNodeId());
213212
if (newRoutingNode == null) {
214213
throw new IllegalStateException("cluster state does not contain this node - cannot write index meta state");
@@ -217,20 +216,6 @@ private static Set<Index> getRelevantIndicesOnDataOnlyNode(ClusterState state, C
217216
for (ShardRouting routing : newRoutingNode) {
218217
indices.add(routing.index());
219218
}
220-
// we have to check the meta data also: closed indices will not appear in the routing table, but we must still write the state if
221-
// we have it written on disk previously
222-
for (IndexMetaData indexMetaData : state.metaData()) {
223-
boolean isOrWasClosed = indexMetaData.getState().equals(IndexMetaData.State.CLOSE);
224-
// if the index is open we might still have to write the state if it just transitioned from closed to open
225-
// so we have to check for that as well.
226-
IndexMetaData previousMetaData = previousState.metaData().index(indexMetaData.getIndex());
227-
if (previousMetaData != null) {
228-
isOrWasClosed = isOrWasClosed || previousMetaData.getState().equals(IndexMetaData.State.CLOSE);
229-
}
230-
if (previouslyWrittenIndices.contains(indexMetaData.getIndex()) && isOrWasClosed) {
231-
indices.add(indexMetaData.getIndex());
232-
}
233-
}
234219
return indices;
235220
}
236221

@@ -244,20 +229,14 @@ private static Set<Index> getRelevantIndicesForMasterEligibleNode(ClusterState s
244229
}
245230

246231
// exposed for tests
247-
static Set<Index> getRelevantIndices(ClusterState state, ClusterState previousState, Set<Index> previouslyWrittenIndices) {
248-
Set<Index> relevantIndices;
249-
if (isDataOnlyNode(state)) {
250-
relevantIndices = getRelevantIndicesOnDataOnlyNode(state, previousState, previouslyWrittenIndices);
251-
} else if (state.nodes().getLocalNode().isMasterNode()) {
252-
relevantIndices = getRelevantIndicesForMasterEligibleNode(state);
232+
static Set<Index> getRelevantIndices(ClusterState state) {
233+
if (state.nodes().getLocalNode().isMasterNode()) {
234+
return getRelevantIndicesForMasterEligibleNode(state);
235+
} else if (state.nodes().getLocalNode().isDataNode()) {
236+
return getRelevantIndicesOnDataOnlyNode(state);
253237
} else {
254-
relevantIndices = Collections.emptySet();
238+
return Collections.emptySet();
255239
}
256-
return relevantIndices;
257-
}
258-
259-
private static boolean isDataOnlyNode(ClusterState state) {
260-
return state.nodes().getLocalNode().isMasterNode() == false && state.nodes().getLocalNode().isDataNode();
261240
}
262241

263242
/**

server/src/test/java/org/elasticsearch/gateway/IncrementalClusterStateWriterTests.java

Lines changed: 35 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import org.elasticsearch.cluster.metadata.IndexMetaData;
3131
import org.elasticsearch.cluster.metadata.Manifest;
3232
import org.elasticsearch.cluster.metadata.MetaData;
33+
import org.elasticsearch.cluster.metadata.MetaDataIndexStateService;
3334
import org.elasticsearch.cluster.node.DiscoveryNode;
3435
import org.elasticsearch.cluster.node.DiscoveryNodeRole;
3536
import org.elasticsearch.cluster.node.DiscoveryNodes;
@@ -73,17 +74,6 @@
7374

7475
public class IncrementalClusterStateWriterTests extends ESAllocationTestCase {
7576

76-
private ClusterState noIndexClusterState(boolean masterEligible) {
77-
MetaData metaData = MetaData.builder().build();
78-
RoutingTable routingTable = RoutingTable.builder().build();
79-
80-
return ClusterState.builder(org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY))
81-
.metaData(metaData)
82-
.routingTable(routingTable)
83-
.nodes(generateDiscoveryNodes(masterEligible))
84-
.build();
85-
}
86-
8777
private ClusterState clusterStateWithUnassignedIndex(IndexMetaData indexMetaData, boolean masterEligible) {
8878
MetaData metaData = MetaData.builder()
8979
.put(indexMetaData, false)
@@ -119,7 +109,7 @@ private ClusterState clusterStateWithAssignedIndex(IndexMetaData indexMetaData,
119109
.metaData(metaDataNewClusterState).version(oldClusterState.getVersion() + 1).build();
120110
}
121111

122-
private ClusterState clusterStateWithClosedIndex(IndexMetaData indexMetaData, boolean masterEligible) {
112+
private ClusterState clusterStateWithNonReplicatedClosedIndex(IndexMetaData indexMetaData, boolean masterEligible) {
123113
ClusterState oldClusterState = clusterStateWithAssignedIndex(indexMetaData, masterEligible);
124114

125115
MetaData metaDataNewClusterState = MetaData.builder()
@@ -128,23 +118,41 @@ private ClusterState clusterStateWithClosedIndex(IndexMetaData indexMetaData, bo
128118
.version(oldClusterState.metaData().version() + 1)
129119
.build();
130120
RoutingTable routingTable = RoutingTable.builder()
131-
.addAsNew(metaDataNewClusterState.index("test"))
121+
.addAsRecovery(metaDataNewClusterState.index("test"))
132122
.build();
133123

134124
return ClusterState.builder(oldClusterState).routingTable(routingTable)
135125
.metaData(metaDataNewClusterState).version(oldClusterState.getVersion() + 1).build();
136126
}
137127

138-
private ClusterState clusterStateWithJustOpenedIndex(IndexMetaData indexMetaData, boolean masterEligible) {
139-
ClusterState oldClusterState = clusterStateWithClosedIndex(indexMetaData, masterEligible);
128+
private ClusterState clusterStateWithReplicatedClosedIndex(IndexMetaData indexMetaData, boolean masterEligible, boolean assigned) {
129+
ClusterState oldClusterState = clusterStateWithAssignedIndex(indexMetaData, masterEligible);
140130

141131
MetaData metaDataNewClusterState = MetaData.builder()
142-
.put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).state(IndexMetaData.State.OPEN)
132+
.put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)
133+
.put(MetaDataIndexStateService.VERIFIED_BEFORE_CLOSE_SETTING.getKey(), true))
134+
.state(IndexMetaData.State.CLOSE)
143135
.numberOfShards(5).numberOfReplicas(2))
144136
.version(oldClusterState.metaData().version() + 1)
145137
.build();
138+
RoutingTable routingTable = RoutingTable.builder()
139+
.addAsRecovery(metaDataNewClusterState.index("test"))
140+
.build();
141+
142+
oldClusterState = ClusterState.builder(oldClusterState).routingTable(routingTable)
143+
.metaData(metaDataNewClusterState).build();
144+
if (assigned) {
145+
AllocationService strategy = createAllocationService(Settings.builder()
146+
.put("cluster.routing.allocation.node_concurrent_recoveries", 100)
147+
.put(ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), "always")
148+
.put("cluster.routing.allocation.cluster_concurrent_rebalance", 100)
149+
.put("cluster.routing.allocation.node_initial_primaries_recoveries", 100)
150+
.build());
151+
152+
routingTable = strategy.reroute(oldClusterState, "reroute").routingTable();
153+
}
146154

147-
return ClusterState.builder(oldClusterState)
155+
return ClusterState.builder(oldClusterState).routingTable(routingTable)
148156
.metaData(metaDataNewClusterState).version(oldClusterState.getVersion() + 1).build();
149157
}
150158

@@ -154,14 +162,6 @@ private DiscoveryNodes.Builder generateDiscoveryNodes(boolean masterEligible) {
154162
.add(newNode("master_node", MASTER_DATA_ROLES)).localNodeId("node1").masterNodeId(masterEligible ? "node1" : "master_node");
155163
}
156164

157-
private Set<Index> randomPrevWrittenIndices(IndexMetaData indexMetaData) {
158-
if (randomBoolean()) {
159-
return Collections.singleton(indexMetaData.getIndex());
160-
} else {
161-
return Collections.emptySet();
162-
}
163-
}
164-
165165
private IndexMetaData createIndexMetaData(String name) {
166166
return IndexMetaData.builder(name).
167167
settings(settings(Version.CURRENT)).
@@ -172,56 +172,41 @@ private IndexMetaData createIndexMetaData(String name) {
172172

173173
public void testGetRelevantIndicesWithUnassignedShardsOnMasterEligibleNode() {
174174
IndexMetaData indexMetaData = createIndexMetaData("test");
175-
Set<Index> indices = IncrementalClusterStateWriter.getRelevantIndices(
176-
clusterStateWithUnassignedIndex(indexMetaData, true),
177-
noIndexClusterState(true),
178-
randomPrevWrittenIndices(indexMetaData));
175+
Set<Index> indices = IncrementalClusterStateWriter.getRelevantIndices(clusterStateWithUnassignedIndex(indexMetaData, true));
179176
assertThat(indices.size(), equalTo(1));
180177
}
181178

182179
public void testGetRelevantIndicesWithUnassignedShardsOnDataOnlyNode() {
183180
IndexMetaData indexMetaData = createIndexMetaData("test");
184-
Set<Index> indices = IncrementalClusterStateWriter.getRelevantIndices(
185-
clusterStateWithUnassignedIndex(indexMetaData, false),
186-
noIndexClusterState(false),
187-
randomPrevWrittenIndices(indexMetaData));
181+
Set<Index> indices = IncrementalClusterStateWriter.getRelevantIndices(clusterStateWithUnassignedIndex(indexMetaData, false));
188182
assertThat(indices.size(), equalTo(0));
189183
}
190184

191185
public void testGetRelevantIndicesWithAssignedShards() {
192186
IndexMetaData indexMetaData = createIndexMetaData("test");
193187
boolean masterEligible = randomBoolean();
194-
Set<Index> indices = IncrementalClusterStateWriter.getRelevantIndices(
195-
clusterStateWithAssignedIndex(indexMetaData, masterEligible),
196-
clusterStateWithUnassignedIndex(indexMetaData, masterEligible),
197-
randomPrevWrittenIndices(indexMetaData));
188+
Set<Index> indices = IncrementalClusterStateWriter.getRelevantIndices(clusterStateWithAssignedIndex(indexMetaData, masterEligible));
198189
assertThat(indices.size(), equalTo(1));
199190
}
200191

201-
public void testGetRelevantIndicesForClosedPrevWrittenIndexOnDataOnlyNode() {
192+
public void testGetRelevantIndicesForNonReplicatedClosedIndexOnDataOnlyNode() {
202193
IndexMetaData indexMetaData = createIndexMetaData("test");
203194
Set<Index> indices = IncrementalClusterStateWriter.getRelevantIndices(
204-
clusterStateWithClosedIndex(indexMetaData, false),
205-
clusterStateWithAssignedIndex(indexMetaData, false),
206-
Collections.singleton(indexMetaData.getIndex()));
207-
assertThat(indices.size(), equalTo(1));
195+
clusterStateWithNonReplicatedClosedIndex(indexMetaData, false));
196+
assertThat(indices.size(), equalTo(0));
208197
}
209198

210-
public void testGetRelevantIndicesForClosedPrevNotWrittenIndexOnDataOnlyNode() {
199+
public void testGetRelevantIndicesForReplicatedClosedButUnassignedIndexOnDataOnlyNode() {
211200
IndexMetaData indexMetaData = createIndexMetaData("test");
212201
Set<Index> indices = IncrementalClusterStateWriter.getRelevantIndices(
213-
clusterStateWithJustOpenedIndex(indexMetaData, false),
214-
clusterStateWithClosedIndex(indexMetaData, false),
215-
Collections.emptySet());
202+
clusterStateWithReplicatedClosedIndex(indexMetaData, false, false));
216203
assertThat(indices.size(), equalTo(0));
217204
}
218205

219-
public void testGetRelevantIndicesForWasClosedPrevWrittenIndexOnDataOnlyNode() {
206+
public void testGetRelevantIndicesForReplicatedClosedAndAssignedIndexOnDataOnlyNode() {
220207
IndexMetaData indexMetaData = createIndexMetaData("test");
221208
Set<Index> indices = IncrementalClusterStateWriter.getRelevantIndices(
222-
clusterStateWithJustOpenedIndex(indexMetaData, false),
223-
clusterStateWithClosedIndex(indexMetaData, false),
224-
Collections.singleton(indexMetaData.getIndex()));
209+
clusterStateWithReplicatedClosedIndex(indexMetaData, false, true));
225210
assertThat(indices.size(), equalTo(1));
226211
}
227212

server/src/test/java/org/elasticsearch/indices/state/CloseIndexIT.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,31 @@ public Settings onNodeStopped(String nodeName) throws Exception {
435435
}
436436
}
437437

438+
/**
439+
* Test for https://github.com/elastic/elasticsearch/issues/47276 which checks that the persisted metadata on a data node does not
440+
* become inconsistent when using replicated closed indices.
441+
*/
442+
public void testRelocatedClosedIndexIssue() throws Exception {
443+
final String indexName = "closed-index";
444+
final List<String> dataNodes = internalCluster().startDataOnlyNodes(2);
445+
// allocate shard to first data node
446+
createIndex(indexName, Settings.builder()
447+
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
448+
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
449+
.put("index.routing.allocation.include._name", dataNodes.get(0))
450+
.build());
451+
indexRandom(randomBoolean(), randomBoolean(), randomBoolean(), IntStream.range(0, randomIntBetween(0, 50))
452+
.mapToObj(n -> client().prepareIndex(indexName, "_doc").setSource("num", n)).collect(toList()));
453+
assertAcked(client().admin().indices().prepareClose(indexName).setWaitForActiveShards(ActiveShardCount.ALL));
454+
// move single shard to second node
455+
client().admin().indices().prepareUpdateSettings(indexName).setSettings(Settings.builder()
456+
.put("index.routing.allocation.include._name", dataNodes.get(1))).get();
457+
ensureGreen(indexName);
458+
internalCluster().fullRestart();
459+
assertIndexIsClosed(indexName);
460+
ensureGreen(indexName);
461+
}
462+
438463
public void testResyncPropagatePrimaryTerm() throws Exception {
439464
internalCluster().ensureAtLeastNumDataNodes(3);
440465
final String indexName = "closed_indices_promotion";

0 commit comments

Comments
 (0)