Skip to content

Commit 8e40a2b

Browse files
authored
[Zen2] Introduce vote withdrawal (#35446)
If shutting down half or more of the master-eligible nodes, their votes must first be explicitly withdrawn to ensure that the cluster doesn't lose its quorum. This works via _voting tombstones_, stored in the cluster state, which tell the reconfigurator to remove nodes from the voting configuration. This change introduces voting tombstones to the cluster state, together with transport APIs for adding and removing them, and makes use of these APIs in `InternalTestCluster` to support tests which remove at least half of the master-eligible nodes at once (e.g. shrinking from two master-eligible nodes to one).
1 parent 0e1a121 commit 8e40a2b

24 files changed

+1742
-32
lines changed

server/src/main/java/org/elasticsearch/action/ActionModule.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@
2727
import org.elasticsearch.action.admin.cluster.bootstrap.GetDiscoveredNodesAction;
2828
import org.elasticsearch.action.admin.cluster.bootstrap.TransportBootstrapClusterAction;
2929
import org.elasticsearch.action.admin.cluster.bootstrap.TransportGetDiscoveredNodesAction;
30+
import org.elasticsearch.action.admin.cluster.configuration.AddVotingTombstonesAction;
31+
import org.elasticsearch.action.admin.cluster.configuration.ClearVotingTombstonesAction;
32+
import org.elasticsearch.action.admin.cluster.configuration.TransportAddVotingTombstonesAction;
33+
import org.elasticsearch.action.admin.cluster.configuration.TransportClearVotingTombstonesAction;
3034
import org.elasticsearch.action.admin.cluster.health.ClusterHealthAction;
3135
import org.elasticsearch.action.admin.cluster.health.TransportClusterHealthAction;
3236
import org.elasticsearch.action.admin.cluster.node.hotthreads.NodesHotThreadsAction;
@@ -428,6 +432,8 @@ public <Request extends ActionRequest, Response extends ActionResponse> void reg
428432

429433
actions.register(GetDiscoveredNodesAction.INSTANCE, TransportGetDiscoveredNodesAction.class);
430434
actions.register(BootstrapClusterAction.INSTANCE, TransportBootstrapClusterAction.class);
435+
actions.register(AddVotingTombstonesAction.INSTANCE, TransportAddVotingTombstonesAction.class);
436+
actions.register(ClearVotingTombstonesAction.INSTANCE, TransportClearVotingTombstonesAction.class);
431437
actions.register(ClusterAllocationExplainAction.INSTANCE, TransportClusterAllocationExplainAction.class);
432438
actions.register(ClusterStatsAction.INSTANCE, TransportClusterStatsAction.class);
433439
actions.register(ClusterStateAction.INSTANCE, TransportClusterStateAction.class);
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.action.admin.cluster.configuration;
20+
21+
import org.elasticsearch.action.Action;
22+
import org.elasticsearch.common.io.stream.Writeable.Reader;
23+
24+
public class AddVotingTombstonesAction extends Action<AddVotingTombstonesResponse> {
25+
public static final AddVotingTombstonesAction INSTANCE = new AddVotingTombstonesAction();
26+
public static final String NAME = "cluster:admin/voting/add_tombstones";
27+
28+
private AddVotingTombstonesAction() {
29+
super(NAME);
30+
}
31+
32+
@Override
33+
public AddVotingTombstonesResponse newResponse() {
34+
throw new UnsupportedOperationException("usage of Streamable is to be replaced by Writeable");
35+
}
36+
37+
@Override
38+
public Reader<AddVotingTombstonesResponse> getResponseReader() {
39+
return AddVotingTombstonesResponse::new;
40+
}
41+
}
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.action.admin.cluster.configuration;
20+
21+
import org.elasticsearch.action.ActionRequestValidationException;
22+
import org.elasticsearch.action.support.master.MasterNodeRequest;
23+
import org.elasticsearch.cluster.ClusterState;
24+
import org.elasticsearch.cluster.node.DiscoveryNode;
25+
import org.elasticsearch.cluster.node.DiscoveryNodes;
26+
import org.elasticsearch.common.io.stream.StreamInput;
27+
import org.elasticsearch.common.io.stream.StreamOutput;
28+
import org.elasticsearch.common.unit.TimeValue;
29+
30+
import java.io.IOException;
31+
import java.util.Arrays;
32+
import java.util.Set;
33+
import java.util.stream.Collectors;
34+
35+
/**
36+
* A request to add voting tombstones for certain master-eligible nodes, and wait for these nodes to be removed from the voting
37+
* configuration.
38+
*/
39+
public class AddVotingTombstonesRequest extends MasterNodeRequest<AddVotingTombstonesRequest> {
40+
private final String[] nodeDescriptions;
41+
private final TimeValue timeout;
42+
43+
/**
44+
* Construct a request to add voting tombstones for master-eligible nodes matching the given descriptions, and wait for a default 30
45+
* seconds for these nodes to be removed from the voting configuration.
46+
* @param nodeDescriptions Descriptions of the nodes to add - see {@link DiscoveryNodes#resolveNodes(String...)}
47+
*/
48+
public AddVotingTombstonesRequest(String[] nodeDescriptions) {
49+
this(nodeDescriptions, TimeValue.timeValueSeconds(30));
50+
}
51+
52+
/**
53+
* Construct a request to add voting tombstones for master-eligible nodes matching the given descriptions, and wait for these nodes to
54+
* be removed from the voting configuration.
55+
* @param nodeDescriptions Descriptions of the nodes whose tombstones to add - see {@link DiscoveryNodes#resolveNodes(String...)}.
56+
* @param timeout How long to wait for the nodes to be removed from the voting configuration.
57+
*/
58+
public AddVotingTombstonesRequest(String[] nodeDescriptions, TimeValue timeout) {
59+
if (timeout.compareTo(TimeValue.ZERO) < 0) {
60+
throw new IllegalArgumentException("timeout [" + timeout + "] must be non-negative");
61+
}
62+
this.nodeDescriptions = nodeDescriptions;
63+
this.timeout = timeout;
64+
}
65+
66+
public AddVotingTombstonesRequest(StreamInput in) throws IOException {
67+
super(in);
68+
nodeDescriptions = in.readStringArray();
69+
timeout = in.readTimeValue();
70+
}
71+
72+
Set<DiscoveryNode> resolveNodes(ClusterState currentState) {
73+
final DiscoveryNodes allNodes = currentState.nodes();
74+
final Set<DiscoveryNode> resolvedNodes = Arrays.stream(allNodes.resolveNodes(nodeDescriptions))
75+
.map(allNodes::get).filter(DiscoveryNode::isMasterNode).collect(Collectors.toSet());
76+
77+
if (resolvedNodes.isEmpty()) {
78+
throw new IllegalArgumentException("add voting tombstones request for " + Arrays.asList(nodeDescriptions)
79+
+ " matched no master-eligible nodes");
80+
}
81+
82+
resolvedNodes.removeIf(n -> currentState.getVotingTombstones().contains(n));
83+
return resolvedNodes;
84+
}
85+
86+
Set<DiscoveryNode> resolveNodesAndCheckMaximum(ClusterState currentState, int maxTombstoneCount, String maximumSettingKey) {
87+
final Set<DiscoveryNode> resolvedNodes = resolveNodes(currentState);
88+
89+
final int oldTombstoneCount = currentState.getVotingTombstones().size();
90+
final int newTombstoneCount = resolvedNodes.size();
91+
if (oldTombstoneCount + newTombstoneCount > maxTombstoneCount) {
92+
throw new IllegalArgumentException("add voting tombstones request for " + Arrays.asList(nodeDescriptions)
93+
+ " would add [" + newTombstoneCount + "] voting tombstones to the existing [" + oldTombstoneCount
94+
+ "] which would exceed the maximum of [" + maxTombstoneCount + "] set by ["
95+
+ maximumSettingKey + "]");
96+
}
97+
return resolvedNodes;
98+
}
99+
100+
/**
101+
* @return descriptions of the nodes for whom to add tombstones.
102+
*/
103+
public String[] getNodeDescriptions() {
104+
return nodeDescriptions;
105+
}
106+
107+
/**
108+
* @return how long to wait after adding the tombstones for the nodes to be removed from the voting configuration.
109+
*/
110+
public TimeValue getTimeout() {
111+
return timeout;
112+
}
113+
114+
@Override
115+
public ActionRequestValidationException validate() {
116+
return null;
117+
}
118+
119+
@Override
120+
public void readFrom(StreamInput in) throws IOException {
121+
throw new UnsupportedOperationException("usage of Streamable is to be replaced by Writeable");
122+
}
123+
124+
@Override
125+
public void writeTo(StreamOutput out) throws IOException {
126+
super.writeTo(out);
127+
out.writeStringArray(nodeDescriptions);
128+
out.writeTimeValue(timeout);
129+
}
130+
131+
@Override
132+
public String toString() {
133+
return "AddVotingTombstonesRequest{" +
134+
"nodeDescriptions=" + Arrays.asList(nodeDescriptions) +
135+
", timeout=" + timeout +
136+
'}';
137+
}
138+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.action.admin.cluster.configuration;
20+
21+
import org.elasticsearch.action.ActionResponse;
22+
import org.elasticsearch.common.io.stream.StreamInput;
23+
import org.elasticsearch.common.io.stream.StreamOutput;
24+
25+
import java.io.IOException;
26+
27+
/**
28+
* A response to {@link AddVotingTombstonesRequest} indicating that voting tombstones have been added for the requested nodes and these
29+
* nodes have been removed from the voting configuration.
30+
*/
31+
public class AddVotingTombstonesResponse extends ActionResponse {
32+
33+
public AddVotingTombstonesResponse() {
34+
}
35+
36+
public AddVotingTombstonesResponse(StreamInput in) throws IOException {
37+
super(in);
38+
}
39+
40+
@Override
41+
public void readFrom(StreamInput in) throws IOException {
42+
throw new UnsupportedOperationException("usage of Streamable is to be replaced by Writeable");
43+
}
44+
45+
@Override
46+
public void writeTo(StreamOutput out) throws IOException {
47+
super.writeTo(out);
48+
}
49+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.action.admin.cluster.configuration;
20+
21+
import org.elasticsearch.action.Action;
22+
import org.elasticsearch.common.io.stream.Writeable.Reader;
23+
24+
public class ClearVotingTombstonesAction extends Action<ClearVotingTombstonesResponse> {
25+
public static final ClearVotingTombstonesAction INSTANCE = new ClearVotingTombstonesAction();
26+
public static final String NAME = "cluster:admin/voting/clear_tombstones";
27+
28+
private ClearVotingTombstonesAction() {
29+
super(NAME);
30+
}
31+
32+
@Override
33+
public ClearVotingTombstonesResponse newResponse() {
34+
throw new UnsupportedOperationException("usage of Streamable is to be replaced by Writeable");
35+
}
36+
37+
@Override
38+
public Reader<ClearVotingTombstonesResponse> getResponseReader() {
39+
return ClearVotingTombstonesResponse::new;
40+
}
41+
}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.action.admin.cluster.configuration;
20+
21+
import org.elasticsearch.action.ActionRequestValidationException;
22+
import org.elasticsearch.action.support.master.MasterNodeRequest;
23+
import org.elasticsearch.common.io.stream.StreamInput;
24+
import org.elasticsearch.common.io.stream.StreamOutput;
25+
import org.elasticsearch.common.unit.TimeValue;
26+
27+
import java.io.IOException;
28+
29+
/**
30+
* A request to clear the voting tombstones from the cluster state, optionally waiting for these nodes to be removed from the cluster first.
31+
*/
32+
public class ClearVotingTombstonesRequest extends MasterNodeRequest<ClearVotingTombstonesRequest> {
33+
private boolean waitForRemoval = true;
34+
private TimeValue timeout = TimeValue.timeValueSeconds(30);
35+
36+
/**
37+
* Construct a request to remove all the voting tombstones from the cluster state.
38+
*/
39+
public ClearVotingTombstonesRequest() {
40+
}
41+
42+
public ClearVotingTombstonesRequest(StreamInput in) throws IOException {
43+
super(in);
44+
waitForRemoval = in.readBoolean();
45+
timeout = in.readTimeValue();
46+
}
47+
48+
/**
49+
* @return whether to wait for the tombstoned nodes to be removed from the cluster before removing their tombstones. True by default.
50+
*/
51+
public boolean getWaitForRemoval() {
52+
return waitForRemoval;
53+
}
54+
55+
/**
56+
* @param waitForRemoval whether to wait for the tombstoned nodes to be removed from the cluster before removing their tombstones. True
57+
* by default.
58+
*/
59+
public void setWaitForRemoval(boolean waitForRemoval) {
60+
this.waitForRemoval = waitForRemoval;
61+
}
62+
63+
/**
64+
* @param timeout how long to wait for the tombstoned nodes to be removed if {@link ClearVotingTombstonesRequest#waitForRemoval} is
65+
* true. Defaults to 30 seconds.
66+
*/
67+
public void setTimeout(TimeValue timeout) {
68+
this.timeout = timeout;
69+
}
70+
71+
/**
72+
* @return how long to wait for the tombstoned nodes to be removed if {@link ClearVotingTombstonesRequest#waitForRemoval} is
73+
* true. Defaults to 30 seconds.
74+
*/
75+
public TimeValue getTimeout() {
76+
return timeout;
77+
}
78+
79+
@Override
80+
public ActionRequestValidationException validate() {
81+
return null;
82+
}
83+
84+
@Override
85+
public void readFrom(StreamInput in) throws IOException {
86+
throw new UnsupportedOperationException("usage of Streamable is to be replaced by Writeable");
87+
}
88+
89+
@Override
90+
public void writeTo(StreamOutput out) throws IOException {
91+
super.writeTo(out);
92+
out.writeBoolean(waitForRemoval);
93+
out.writeTimeValue(timeout);
94+
}
95+
96+
@Override
97+
public String toString() {
98+
return "ClearVotingTombstonesRequest{" +
99+
", waitForRemoval=" + waitForRemoval +
100+
", timeout=" + timeout +
101+
'}';
102+
}
103+
}

0 commit comments

Comments
 (0)