From 9b864e2a208deeeb3814352ef1665826659d2dbd Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 2 Aug 2022 11:45:51 +0530 Subject: [PATCH 01/87] Add Executor to decommission node attribute Signed-off-by: Rishab Nahata --- ...NodeAttributeClusterStateTaskExecutor.java | 141 ++++++++++++++ .../decommission/DecommissionAttribute.java | 108 +++++++++++ ...ttributeClusterStateTaskExecutorTests.java | 178 ++++++++++++++++++ 3 files changed, 427 insertions(+) create mode 100644 server/src/main/java/org/opensearch/cluster/coordination/DecommissionNodeAttributeClusterStateTaskExecutor.java create mode 100644 server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java create mode 100644 server/src/test/java/org/opensearch/cluster/coordination/DecommissionNodeAttributeClusterStateTaskExecutorTests.java diff --git a/server/src/main/java/org/opensearch/cluster/coordination/DecommissionNodeAttributeClusterStateTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/DecommissionNodeAttributeClusterStateTaskExecutor.java new file mode 100644 index 0000000000000..d71cd98d5f25e --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/coordination/DecommissionNodeAttributeClusterStateTaskExecutor.java @@ -0,0 +1,141 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.coordination; + +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ClusterStateTaskExecutor; +import org.opensearch.cluster.ClusterStateTaskListener; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.persistent.PersistentTasksCustomMetadata; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.function.Predicate; + +/** + * Decommissions and shuts down nodes having a given attribute and updates the cluster state + * + * @opensearch.internal + */ +public class DecommissionNodeAttributeClusterStateTaskExecutor + implements + ClusterStateTaskExecutor, + ClusterStateTaskListener { + + private final AllocationService allocationService; + private final Logger logger; + + /** + * Task for the executor. + * + * @opensearch.internal + */ + public static class Task { + + private final DecommissionAttribute decommissionAttribute; + private final String reason; + + public Task(final DecommissionAttribute decommissionAttribute, final String reason) { + this.decommissionAttribute = decommissionAttribute; + this.reason = reason; + } + + public DecommissionAttribute decommissionAttribute() { + return decommissionAttribute; + } + + public String reason() { + return reason; + } + + @Override + public String toString() { + return "Decommission Node Attribute Task{" + + "decommissionAttribute=" + + decommissionAttribute + + ", reason='" + + reason + + '\'' + + '}'; + } + } + + public DecommissionNodeAttributeClusterStateTaskExecutor(final AllocationService allocationService, final Logger logger) { + this.allocationService = allocationService; + this.logger = logger; + } + + @Override + public ClusterTasksResult execute(ClusterState currentState, List tasks) throws Exception { + final DiscoveryNodes.Builder remainingNodesBuilder = DiscoveryNodes.builder(currentState.nodes()); + List nodesToBeRemoved = new ArrayList(); + for (final Task task : tasks) { + final Predicate shouldRemoveNodePredicate = discoveryNode -> nodeHasDecommissionedAttribute(discoveryNode, task); + Iterator nodesIter = currentState.nodes().getNodes().valuesIt(); + while (nodesIter.hasNext()) { + final DiscoveryNode node = nodesIter.next(); + if (shouldRemoveNodePredicate.test(node) && currentState.nodes().nodeExists(node)) { + nodesToBeRemoved.add(node); + } + } + } + if (nodesToBeRemoved.size() <= 0) { + // no nodes to remove, will keep the current cluster state + return ClusterTasksResult.builder() + .successes(tasks) + .build(currentState); + } + for (DiscoveryNode nodeToBeRemoved : nodesToBeRemoved) { + remainingNodesBuilder.remove(nodeToBeRemoved); + } + + final ClusterState remainingNodesClusterState = remainingNodesClusterState(currentState, remainingNodesBuilder); + + return getTaskClusterTasksResult(currentState, tasks, remainingNodesClusterState); + } + + private boolean nodeHasDecommissionedAttribute(DiscoveryNode discoveryNode, Task task) { + String discoveryNodeAttributeValue = discoveryNode.getAttributes().get(task.decommissionAttribute().attributeName()); + return discoveryNodeAttributeValue != null && task.decommissionAttribute().attributeValues().contains(discoveryNodeAttributeValue); + } + + // visible for testing + // hook is used in testing to ensure that correct cluster state is used to test whether a + // rejoin or reroute is needed + protected ClusterState remainingNodesClusterState(final ClusterState currentState, DiscoveryNodes.Builder remainingNodesBuilder) { + return ClusterState.builder(currentState).nodes(remainingNodesBuilder).build(); + } + + protected ClusterTasksResult getTaskClusterTasksResult( + ClusterState currentState, + List tasks, + ClusterState remainingNodesClusterState + ) { + ClusterState ptasksDisassociatedState = PersistentTasksCustomMetadata.disassociateDeadNodes(remainingNodesClusterState); + final ClusterTasksResult.Builder resultBuilder = ClusterTasksResult.< + DecommissionNodeAttributeClusterStateTaskExecutor.Task>builder().successes(tasks); + return resultBuilder.build(allocationService.disassociateDeadNodes(ptasksDisassociatedState, true, describeTasks(tasks))); + } + + @Override + public void onFailure(final String source, final Exception e) { + logger.error(() -> new ParameterizedMessage("unexpected failure during [{}]", source), e); + } + + @Override + public void onNoLongerClusterManager(String source) { + logger.debug("no longer cluster-manager while decommissioning node attribute [{}]", source); + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java new file mode 100644 index 0000000000000..6260af2823687 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java @@ -0,0 +1,108 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.io.stream.Writeable; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +public final class DecommissionAttribute implements Writeable { + private final String attributeName; + private final List attributeValues; + + /** + * Update the attribute values for a given attribute name to decommission + * + * @param decommissionAttribute current decommissioned attribute object + * @param attributeValues values to be updated with + */ + public DecommissionAttribute(DecommissionAttribute decommissionAttribute, List attributeValues) { + this(decommissionAttribute.attributeName, attributeValues); + } + + /** + * Constructs new decommission attribute name values pair + * + * @param attributeName attribute name + * @param attributeValues attribute values + */ + public DecommissionAttribute(String attributeName, List attributeValues) { + this.attributeName = attributeName; + this.attributeValues = attributeValues; + } + + /** + * Returns attribute name + * + * @return attributeName + */ + public String attributeName() { + return this.attributeName; + } + + /** + * Returns attribute values + * + * @return attributeValues + */ + public List attributeValues() { + return this.attributeValues; + } + + public DecommissionAttribute(StreamInput in) throws IOException { + attributeName = in.readString(); + attributeValues = in.readStringList(); + } + + /** + * Writes decommission attribute name values to stream output + * + * @param out stream output + */ + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(attributeName); + out.writeStringCollection(attributeValues); + } + + /** + * Checks if this instance is equal to the other instance in attributeName other than {@link #attributeValues}. + * + * @param other other decommission attribute name values + * @return {@code true} if both instances equal in attributeName fields but the attributeValues fields + */ + public boolean equalsIgnoreValues(DecommissionAttribute other) { + return attributeName.equals(other.attributeName); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + DecommissionAttribute that = (DecommissionAttribute) o; + + if (!attributeName.equals(that.attributeName)) return false; + return attributeValues.equals(that.attributeValues); + } + + @Override + public int hashCode() { + return Objects.hash(attributeName, attributeValues); + } + + @Override + public String toString() { + return "DecommissionAttribute{" + attributeName + "}{" + attributeValues().toString() + "}"; + } +} diff --git a/server/src/test/java/org/opensearch/cluster/coordination/DecommissionNodeAttributeClusterStateTaskExecutorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/DecommissionNodeAttributeClusterStateTaskExecutorTests.java new file mode 100644 index 0000000000000..204d31f18e2cf --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/coordination/DecommissionNodeAttributeClusterStateTaskExecutorTests.java @@ -0,0 +1,178 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.coordination; + +import org.opensearch.Version; +import org.opensearch.cluster.ClusterName; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ClusterStateTaskExecutor; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicReference; + +import static java.util.Collections.singletonMap; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class DecommissionNodeAttributeClusterStateTaskExecutorTests extends OpenSearchTestCase { + + public void testRemoveNodesForDecommissionedAttribute() throws Exception { + final AllocationService allocationService = mock(AllocationService.class); + when(allocationService.disassociateDeadNodes(any(ClusterState.class), eq(true), any(String.class))).thenAnswer( + im -> im.getArguments()[0] + ); + final AtomicReference remainingNodesClusterState = new AtomicReference<>(); + ClusterState clusterState = ClusterState.builder(new ClusterName("test")).build(); + + logger.info("--> adding five nodes on same zone_1"); + clusterState = addNodes(clusterState, "zone_1", "node1", "node2", "node3", "node4", "node5"); + + logger.info("--> adding five nodes on same zone_2"); + clusterState = addNodes(clusterState, "zone_2", "node6", "node7", "node8", "node9", "node10"); + + logger.info("--> adding five nodes on same zone_3"); + clusterState = addNodes(clusterState, "zone_3", "node11", "node12", "node13", "node14", "node15"); + + final DecommissionNodeAttributeClusterStateTaskExecutor executor = new DecommissionNodeAttributeClusterStateTaskExecutor( + allocationService, + logger + ) { + @Override + protected ClusterState remainingNodesClusterState(ClusterState currentState, DiscoveryNodes.Builder remainingNodesBuilder) { + remainingNodesClusterState.set(super.remainingNodesClusterState(currentState, remainingNodesBuilder)); + return remainingNodesClusterState.get(); + } + }; + + final List tasks = new ArrayList<>(); + tasks.add( + new DecommissionNodeAttributeClusterStateTaskExecutor.Task( + new DecommissionAttribute("zone", Collections.singletonList("zone_3")), + "unit test zone decommission executor" + ) + ); + + final ClusterStateTaskExecutor.ClusterTasksResult result = executor.execute( + clusterState, + tasks + ); + + ClusterState expectedClusterState = remainingNodesClusterState.get(); + ClusterState actualClusterState = result.resultingState; + + // Assert cluster state is updated and is successful + verify(allocationService).disassociateDeadNodes(eq(expectedClusterState), eq(true), any(String.class)); + assertEquals(actualClusterState, expectedClusterState); + assertTrue(result.executionResults.get(tasks.get(0)).isSuccess()); + + // Verify only 10 nodes present in the cluster after decommissioning + assertEquals(actualClusterState.nodes().getNodes().size(), 10); + + // Verify no nodes has attribute (zone, zone_3) + Iterator currDiscoveryNodeIterator = actualClusterState.nodes().getNodes().valuesIt(); + while (currDiscoveryNodeIterator.hasNext()) { + final DiscoveryNode node = currDiscoveryNodeIterator.next(); + assertNotEquals(node.getAttributes().get("zone"), "zone_3"); + } + } + + public void testSameClusterStateAfterExecutionForUnknownAttributeNameAndValue() throws Exception { + final AllocationService allocationService = mock(AllocationService.class); + when(allocationService.disassociateDeadNodes(any(ClusterState.class), eq(true), any(String.class))).thenAnswer( + im -> im.getArguments()[0] + ); + final AtomicReference remainingNodesClusterState = new AtomicReference<>(); + ClusterState clusterState = ClusterState.builder(new ClusterName("test")).build(); + + logger.info("--> adding five nodes on same zone_1"); + clusterState = addNodes(clusterState, "zone_1", "node1", "node2", "node3", "node4", "node5"); + + logger.info("--> adding five nodes on same zone_2"); + clusterState = addNodes(clusterState, "zone_2", "node6", "node7", "node8", "node9", "node10"); + + logger.info("--> adding five nodes on same zone_3"); + clusterState = addNodes(clusterState, "zone_3", "node11", "node12", "node13", "node14", "node15"); + + final DecommissionNodeAttributeClusterStateTaskExecutor executor = new DecommissionNodeAttributeClusterStateTaskExecutor( + allocationService, + logger + ) { + @Override + protected ClusterState remainingNodesClusterState(ClusterState currentState, DiscoveryNodes.Builder remainingNodesBuilder) { + remainingNodesClusterState.set(super.remainingNodesClusterState(currentState, remainingNodesBuilder)); + return remainingNodesClusterState.get(); + } + }; + + final List tasks = new ArrayList<>(); + // Task 1 with unknown attribute name + tasks.add( + new DecommissionNodeAttributeClusterStateTaskExecutor.Task( + new DecommissionAttribute("unknown_zone_name", Collections.singletonList("unknown_zone_value")), + "unit test zone decommission executor" + ) + ); + // Task 2 with unknown attribute value + tasks.add( + new DecommissionNodeAttributeClusterStateTaskExecutor.Task( + new DecommissionAttribute("zone", Collections.singletonList("unknown_zone_value")), + "unit test zone decommission executor" + ) + ); + + final ClusterStateTaskExecutor.ClusterTasksResult result = executor.execute( + clusterState, + tasks + ); + + ClusterState expectedClusterState = remainingNodesClusterState.get(); + ClusterState actualClusterState = result.resultingState; + + // assert that disassociate dead node tasks is never executed + verify(allocationService, never()).disassociateDeadNodes(eq(expectedClusterState), eq(true), any(String.class)); + + // assert that cluster state remains same + assertEquals(clusterState, actualClusterState); + + // Verify all 15 nodes present in the cluster after decommissioning unknown attribute name + assertEquals(actualClusterState.nodes().getNodes().size(), 15); + } + + private ClusterState addNodes(ClusterState clusterState, String zone, String... nodeIds) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + org.opensearch.common.collect.List.of(nodeIds).forEach(nodeId -> nodeBuilder.add(newNode(nodeId, singletonMap("zone", zone)))); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + + private DiscoveryNode newNode(String nodeId, Map attributes) { + return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, CLUSTER_MANAGER_DATA_ROLES, Version.CURRENT); + } + + final private static Set CLUSTER_MANAGER_DATA_ROLES = Collections.unmodifiableSet( + new HashSet<>(Arrays.asList(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE)) + ); +} From 6f188e80cb114fb030b983f1867842a423b5bb4b Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Mon, 8 Aug 2022 20:05:20 +0530 Subject: [PATCH 02/87] Add DecommissionHelper Signed-off-by: Rishab Nahata --- .../decommission/DecommissionHelper.java | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java new file mode 100644 index 0000000000000..6b9e480abcef7 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java @@ -0,0 +1,58 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ClusterStateTaskConfig; +import org.opensearch.cluster.ClusterStateTaskListener; +import org.opensearch.cluster.coordination.NodeRemovalClusterStateTaskExecutor; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.service.ClusterManagerService; +import org.opensearch.common.Priority; +import org.opensearch.common.inject.Inject; + +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +public class DecommissionHelper { + + private static final Logger logger = LogManager.getLogger(DecommissionHelper.class); + + private final NodeRemovalClusterStateTaskExecutor nodeRemovalExecutor; + private final ClusterManagerService clusterManagerService; + + DecommissionHelper( + ClusterManagerService clusterManagerService, + NodeRemovalClusterStateTaskExecutor nodeRemovalClusterStateTaskExecutor + ) { + this.nodeRemovalExecutor = nodeRemovalClusterStateTaskExecutor; + this.clusterManagerService = clusterManagerService; + } + + private void handleNodesDecommissionRequest(List nodesToBeDecommissioned, String reason) { + final Map nodesDecommissionTasks = new LinkedHashMap<>(); + nodesToBeDecommissioned.forEach(discoveryNode -> { + final NodeRemovalClusterStateTaskExecutor.Task task = new NodeRemovalClusterStateTaskExecutor.Task( + discoveryNode, reason + ); + nodesDecommissionTasks.put(task, nodeRemovalExecutor); + }); + final String source = "node-decommissioned"; + clusterManagerService.submitStateUpdateTasks( + source, + nodesDecommissionTasks, + ClusterStateTaskConfig.build(Priority.IMMEDIATE), + nodeRemovalExecutor + ); + } +} From c57bc7773e183197313047ee759227e974592461 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Wed, 17 Aug 2022 15:46:09 +0530 Subject: [PATCH 03/87] Decommission service implementation with metadata Signed-off-by: Rishab Nahata --- .../org/opensearch/OpenSearchException.java | 7 + .../org/opensearch/cluster/ClusterModule.java | 9 + ...NodeAttributeClusterStateTaskExecutor.java | 141 ---------- .../decommission/DecommissionAttribute.java | 47 ++-- .../DecommissionFailedException.java | 49 ++++ .../decommission/DecommissionHelper.java | 15 +- .../decommission/DecommissionService.java | 223 +++++++++++++++ .../decommission/DecommissionStatus.java | 94 +++++++ .../DecommissionAttributeMetadata.java | 254 ++++++++++++++++++ ...ttributeClusterStateTaskExecutorTests.java | 178 ------------ 10 files changed, 664 insertions(+), 353 deletions(-) delete mode 100644 server/src/main/java/org/opensearch/cluster/coordination/DecommissionNodeAttributeClusterStateTaskExecutor.java create mode 100644 server/src/main/java/org/opensearch/cluster/decommission/DecommissionFailedException.java create mode 100644 server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java create mode 100644 server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java create mode 100644 server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java delete mode 100644 server/src/test/java/org/opensearch/cluster/coordination/DecommissionNodeAttributeClusterStateTaskExecutorTests.java diff --git a/server/src/main/java/org/opensearch/OpenSearchException.java b/server/src/main/java/org/opensearch/OpenSearchException.java index 87efc03734d26..d3e1bef9b6dbb 100644 --- a/server/src/main/java/org/opensearch/OpenSearchException.java +++ b/server/src/main/java/org/opensearch/OpenSearchException.java @@ -34,6 +34,7 @@ import org.opensearch.action.support.replication.ReplicationOperation; import org.opensearch.cluster.action.shard.ShardStateAction; +import org.opensearch.cluster.decommission.DecommissionFailedException; import org.opensearch.common.CheckedFunction; import org.opensearch.common.Nullable; import org.opensearch.common.ParseField; @@ -1608,6 +1609,12 @@ private enum OpenSearchExceptionHandle { org.opensearch.index.shard.PrimaryShardClosedException::new, 162, V_3_0_0 + ), + DECOMMISSION_FAILED_EXCEPTION( + org.opensearch.cluster.decommission.DecommissionFailedException.class, + org.opensearch.cluster.decommission.DecommissionFailedException::new, + 163, + V_2_1_0 ); final Class exceptionClass; diff --git a/server/src/main/java/org/opensearch/cluster/ClusterModule.java b/server/src/main/java/org/opensearch/cluster/ClusterModule.java index f8ba520e465e2..de63369dafc89 100644 --- a/server/src/main/java/org/opensearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/opensearch/cluster/ClusterModule.java @@ -38,6 +38,7 @@ import org.opensearch.cluster.metadata.ComponentTemplateMetadata; import org.opensearch.cluster.metadata.ComposableIndexTemplateMetadata; import org.opensearch.cluster.metadata.DataStreamMetadata; +import org.opensearch.cluster.metadata.DecommissionAttributeMetadata; import org.opensearch.cluster.metadata.IndexGraveyard; import org.opensearch.cluster.metadata.IndexNameExpressionResolver; import org.opensearch.cluster.metadata.Metadata; @@ -191,6 +192,7 @@ public static List getNamedWriteables() { ComposableIndexTemplateMetadata::readDiffFrom ); registerMetadataCustom(entries, DataStreamMetadata.TYPE, DataStreamMetadata::new, DataStreamMetadata::readDiffFrom); + registerMetadataCustom(entries, DecommissionAttributeMetadata.TYPE, DecommissionAttributeMetadata::new, DecommissionAttributeMetadata::readDiffFrom); // Task Status (not Diffable) entries.add(new Entry(Task.Status.class, PersistentTasksNodeService.Status.NAME, PersistentTasksNodeService.Status::new)); return entries; @@ -274,6 +276,13 @@ public static List getNamedXWriteables() { DataStreamMetadata::fromXContent ) ); + entries.add( + new NamedXContentRegistry.Entry( + Metadata.Custom.class, + new ParseField(DecommissionAttributeMetadata.TYPE), + DecommissionAttributeMetadata::fromXContent + ) + ); return entries; } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/DecommissionNodeAttributeClusterStateTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/DecommissionNodeAttributeClusterStateTaskExecutor.java deleted file mode 100644 index d71cd98d5f25e..0000000000000 --- a/server/src/main/java/org/opensearch/cluster/coordination/DecommissionNodeAttributeClusterStateTaskExecutor.java +++ /dev/null @@ -1,141 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.cluster.coordination; - -import org.apache.logging.log4j.Logger; -import org.apache.logging.log4j.message.ParameterizedMessage; -import org.opensearch.cluster.ClusterState; -import org.opensearch.cluster.ClusterStateTaskExecutor; -import org.opensearch.cluster.ClusterStateTaskListener; -import org.opensearch.cluster.decommission.DecommissionAttribute; -import org.opensearch.cluster.node.DiscoveryNode; -import org.opensearch.cluster.node.DiscoveryNodes; -import org.opensearch.cluster.routing.allocation.AllocationService; -import org.opensearch.persistent.PersistentTasksCustomMetadata; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.function.Predicate; - -/** - * Decommissions and shuts down nodes having a given attribute and updates the cluster state - * - * @opensearch.internal - */ -public class DecommissionNodeAttributeClusterStateTaskExecutor - implements - ClusterStateTaskExecutor, - ClusterStateTaskListener { - - private final AllocationService allocationService; - private final Logger logger; - - /** - * Task for the executor. - * - * @opensearch.internal - */ - public static class Task { - - private final DecommissionAttribute decommissionAttribute; - private final String reason; - - public Task(final DecommissionAttribute decommissionAttribute, final String reason) { - this.decommissionAttribute = decommissionAttribute; - this.reason = reason; - } - - public DecommissionAttribute decommissionAttribute() { - return decommissionAttribute; - } - - public String reason() { - return reason; - } - - @Override - public String toString() { - return "Decommission Node Attribute Task{" - + "decommissionAttribute=" - + decommissionAttribute - + ", reason='" - + reason - + '\'' - + '}'; - } - } - - public DecommissionNodeAttributeClusterStateTaskExecutor(final AllocationService allocationService, final Logger logger) { - this.allocationService = allocationService; - this.logger = logger; - } - - @Override - public ClusterTasksResult execute(ClusterState currentState, List tasks) throws Exception { - final DiscoveryNodes.Builder remainingNodesBuilder = DiscoveryNodes.builder(currentState.nodes()); - List nodesToBeRemoved = new ArrayList(); - for (final Task task : tasks) { - final Predicate shouldRemoveNodePredicate = discoveryNode -> nodeHasDecommissionedAttribute(discoveryNode, task); - Iterator nodesIter = currentState.nodes().getNodes().valuesIt(); - while (nodesIter.hasNext()) { - final DiscoveryNode node = nodesIter.next(); - if (shouldRemoveNodePredicate.test(node) && currentState.nodes().nodeExists(node)) { - nodesToBeRemoved.add(node); - } - } - } - if (nodesToBeRemoved.size() <= 0) { - // no nodes to remove, will keep the current cluster state - return ClusterTasksResult.builder() - .successes(tasks) - .build(currentState); - } - for (DiscoveryNode nodeToBeRemoved : nodesToBeRemoved) { - remainingNodesBuilder.remove(nodeToBeRemoved); - } - - final ClusterState remainingNodesClusterState = remainingNodesClusterState(currentState, remainingNodesBuilder); - - return getTaskClusterTasksResult(currentState, tasks, remainingNodesClusterState); - } - - private boolean nodeHasDecommissionedAttribute(DiscoveryNode discoveryNode, Task task) { - String discoveryNodeAttributeValue = discoveryNode.getAttributes().get(task.decommissionAttribute().attributeName()); - return discoveryNodeAttributeValue != null && task.decommissionAttribute().attributeValues().contains(discoveryNodeAttributeValue); - } - - // visible for testing - // hook is used in testing to ensure that correct cluster state is used to test whether a - // rejoin or reroute is needed - protected ClusterState remainingNodesClusterState(final ClusterState currentState, DiscoveryNodes.Builder remainingNodesBuilder) { - return ClusterState.builder(currentState).nodes(remainingNodesBuilder).build(); - } - - protected ClusterTasksResult getTaskClusterTasksResult( - ClusterState currentState, - List tasks, - ClusterState remainingNodesClusterState - ) { - ClusterState ptasksDisassociatedState = PersistentTasksCustomMetadata.disassociateDeadNodes(remainingNodesClusterState); - final ClusterTasksResult.Builder resultBuilder = ClusterTasksResult.< - DecommissionNodeAttributeClusterStateTaskExecutor.Task>builder().successes(tasks); - return resultBuilder.build(allocationService.disassociateDeadNodes(ptasksDisassociatedState, true, describeTasks(tasks))); - } - - @Override - public void onFailure(final String source, final Exception e) { - logger.error(() -> new ParameterizedMessage("unexpected failure during [{}]", source), e); - } - - @Override - public void onNoLongerClusterManager(String source) { - logger.debug("no longer cluster-manager while decommissioning node attribute [{}]", source); - } -} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java index 6260af2823687..db4e06e854518 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java @@ -18,27 +18,27 @@ public final class DecommissionAttribute implements Writeable { private final String attributeName; - private final List attributeValues; + private final String attributeValue; /** - * Update the attribute values for a given attribute name to decommission + * Update the attribute value for a given attribute name to decommission * * @param decommissionAttribute current decommissioned attribute object - * @param attributeValues values to be updated with + * @param attributeValue attribute value to be updated with */ - public DecommissionAttribute(DecommissionAttribute decommissionAttribute, List attributeValues) { - this(decommissionAttribute.attributeName, attributeValues); + public DecommissionAttribute(DecommissionAttribute decommissionAttribute, String attributeValue) { + this(decommissionAttribute.attributeName, attributeValue); } /** - * Constructs new decommission attribute name values pair + * Constructs new decommission attribute name value pair * * @param attributeName attribute name - * @param attributeValues attribute values + * @param attributeValue attribute value */ - public DecommissionAttribute(String attributeName, List attributeValues) { + public DecommissionAttribute(String attributeName, String attributeValue) { this.attributeName = attributeName; - this.attributeValues = attributeValues; + this.attributeValue = attributeValue; } /** @@ -51,35 +51,35 @@ public String attributeName() { } /** - * Returns attribute values + * Returns attribute value * - * @return attributeValues + * @return attributeValue */ - public List attributeValues() { - return this.attributeValues; + public String attributeValue() { + return this.attributeValue; } public DecommissionAttribute(StreamInput in) throws IOException { attributeName = in.readString(); - attributeValues = in.readStringList(); + attributeValue = in.readString(); } /** - * Writes decommission attribute name values to stream output + * Writes decommission attribute name value to stream output * * @param out stream output */ @Override public void writeTo(StreamOutput out) throws IOException { out.writeString(attributeName); - out.writeStringCollection(attributeValues); + out.writeString(attributeValue); } /** - * Checks if this instance is equal to the other instance in attributeName other than {@link #attributeValues}. + * Checks if this instance is equal to the other instance in attributeName but differ in attribute value {@link #attributeValue}. * - * @param other other decommission attribute name values - * @return {@code true} if both instances equal in attributeName fields but the attributeValues fields + * @param other other decommission attribute name value + * @return {@code true} if both instances equal in attributeName fields but the attributeValue field */ public boolean equalsIgnoreValues(DecommissionAttribute other) { return attributeName.equals(other.attributeName); @@ -93,16 +93,13 @@ public boolean equals(Object o) { DecommissionAttribute that = (DecommissionAttribute) o; if (!attributeName.equals(that.attributeName)) return false; - return attributeValues.equals(that.attributeValues); + return attributeValue.equals(that.attributeValue); } @Override public int hashCode() { - return Objects.hash(attributeName, attributeValues); + return Objects.hash(attributeName, attributeValue); } - @Override - public String toString() { - return "DecommissionAttribute{" + attributeName + "}{" + attributeValues().toString() + "}"; - } + } diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionFailedException.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionFailedException.java new file mode 100644 index 0000000000000..3a611c2488779 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionFailedException.java @@ -0,0 +1,49 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.opensearch.OpenSearchException; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; + +import java.io.IOException; + +public class DecommissionFailedException extends OpenSearchException { + + private final DecommissionAttribute decommissionAttribute; + + public DecommissionFailedException(DecommissionAttribute decommissionAttribute, String msg) { + this(decommissionAttribute, msg, null); + } + + public DecommissionFailedException(DecommissionAttribute decommissionAttribute, String msg, Throwable cause) { + super("[" + (decommissionAttribute == null ? "_na" : decommissionAttribute.toString()) + "] " + msg, cause); + this.decommissionAttribute = decommissionAttribute; + } + + public DecommissionFailedException(StreamInput in) throws IOException { + super(in); + decommissionAttribute = new DecommissionAttribute(in); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + decommissionAttribute.writeTo(out); + } + + /** + * Returns decommission attribute + * + * @return decommission attribute + */ + public DecommissionAttribute decommissionAttribute() { + return decommissionAttribute; + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java index 6b9e480abcef7..d1eb17adc9747 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java @@ -10,15 +10,12 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateTaskConfig; import org.opensearch.cluster.ClusterStateTaskListener; import org.opensearch.cluster.coordination.NodeRemovalClusterStateTaskExecutor; import org.opensearch.cluster.node.DiscoveryNode; -import org.opensearch.cluster.node.DiscoveryNodes; -import org.opensearch.cluster.service.ClusterManagerService; +import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.Priority; -import org.opensearch.common.inject.Inject; import java.util.LinkedHashMap; import java.util.List; @@ -29,17 +26,17 @@ public class DecommissionHelper { private static final Logger logger = LogManager.getLogger(DecommissionHelper.class); private final NodeRemovalClusterStateTaskExecutor nodeRemovalExecutor; - private final ClusterManagerService clusterManagerService; + private final ClusterService clusterService; DecommissionHelper( - ClusterManagerService clusterManagerService, + ClusterService clusterService, NodeRemovalClusterStateTaskExecutor nodeRemovalClusterStateTaskExecutor ) { this.nodeRemovalExecutor = nodeRemovalClusterStateTaskExecutor; - this.clusterManagerService = clusterManagerService; + this.clusterService = clusterService; } - private void handleNodesDecommissionRequest(List nodesToBeDecommissioned, String reason) { + public void handleNodesDecommissionRequest(List nodesToBeDecommissioned, String reason) { final Map nodesDecommissionTasks = new LinkedHashMap<>(); nodesToBeDecommissioned.forEach(discoveryNode -> { final NodeRemovalClusterStateTaskExecutor.Task task = new NodeRemovalClusterStateTaskExecutor.Task( @@ -48,7 +45,7 @@ private void handleNodesDecommissionRequest(List nodesToBeDecommi nodesDecommissionTasks.put(task, nodeRemovalExecutor); }); final String source = "node-decommissioned"; - clusterManagerService.submitStateUpdateTasks( + clusterService.submitStateUpdateTasks( source, nodesDecommissionTasks, ClusterStateTaskConfig.build(Priority.IMMEDIATE), diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java new file mode 100644 index 0000000000000..160b729b14b3a --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -0,0 +1,223 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.action.ActionListener; +import org.opensearch.cluster.ClusterChangedEvent; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ClusterStateApplier; +import org.opensearch.cluster.ClusterStateUpdateTask; +import org.opensearch.cluster.ack.ClusterStateUpdateResponse; +import org.opensearch.cluster.coordination.NodeRemovalClusterStateTaskExecutor; +import org.opensearch.cluster.metadata.DecommissionAttributeMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.Priority; +import org.opensearch.common.inject.Inject; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportService; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.function.Predicate; + +// do we need to implement ClusterStateApplier -> will a change in cluster state impact this service?? +public class DecommissionService implements ClusterStateApplier { + + private static final Logger logger = LogManager.getLogger(DecommissionService.class); + + private final ClusterService clusterService; + private final TransportService transportService; + private final NodeRemovalClusterStateTaskExecutor nodeRemovalExecutor; + private final ThreadPool threadPool; + private final DecommissionHelper decommissionHelper; + private ClusterState clusterState; + private volatile List awarenessAttributes; + + @Inject + public DecommissionService( + Settings settings, + ClusterSettings clusterSettings, + ClusterService clusterService, + TransportService transportService, + ThreadPool threadPool, + AllocationService allocationService + ) { + this.clusterService = clusterService; + this.transportService = transportService; + this.threadPool = threadPool; + this.clusterState = clusterService.state(); // TODO - check if this is the right way + this.nodeRemovalExecutor = new NodeRemovalClusterStateTaskExecutor(allocationService, logger); + this.decommissionHelper = new DecommissionHelper( + clusterService, + nodeRemovalExecutor + ); + this.awarenessAttributes = AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.get(settings); + clusterSettings.addSettingsUpdateConsumer( + AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING, + this::setAwarenessAttributes + ); + } + + List getAwarenessAttributes() { + return awarenessAttributes; + } + + private void setAwarenessAttributes(List awarenessAttributes) { + this.awarenessAttributes = awarenessAttributes; + } + + public void initiateAttributeDecommissioning( + final DecommissionAttribute decommissionAttribute, + final ActionListener listener + ) { + /** + * 1. Abdicate master + * 2. Register attribute -> status should be set to INIT + * 3. Trigger weigh away for graceful decommission -> status should be set to DECOMMISSIONING + * 4. Once zone is weighed away -> trigger zone decommission using executor -> status should be set to DECOMMISSIONED on successful response + * 5. Clear voting config + */ + registerDecommissionAttribute(decommissionAttribute, listener); + } + + /** + * Registers new decommissioned attribute metadata in the cluster state + *

+ * This method can be only called on the cluster-manager node. It tries to create a new decommissioned attribute on the master + * and if it was successful it adds new decommissioned attribute to cluster metadata. + *

+ * This method should only be called once the eligible cluster manager node having decommissioned attribute is abdicated + * + * @param decommissionAttribute register decommission attribute in the metadata request + * @param listener register decommission listener + */ + private void registerDecommissionAttribute( + final DecommissionAttribute decommissionAttribute, + final ActionListener listener + ) { + validateAwarenessAttribute(decommissionAttribute, getAwarenessAttributes()); + clusterService.submitStateUpdateTask( + "put_decommission [" + decommissionAttribute + "]", + new ClusterStateUpdateTask(Priority.URGENT) { + @Override + public ClusterState execute(ClusterState currentState) throws Exception { + logger.info("decommission request for attribute [{}] received", decommissionAttribute.toString()); + Metadata metadata = currentState.metadata(); + Metadata.Builder mdBuilder = Metadata.builder(metadata); + DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); + ensureNoAwarenessAttributeDecommissioned(decommissionAttributeMetadata, decommissionAttribute); + decommissionAttributeMetadata = new DecommissionAttributeMetadata(decommissionAttribute); + mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); + return ClusterState.builder(currentState).metadata(mdBuilder).build(); + } + + @Override + public void onFailure(String source, Exception e) { + // TODO - should we put the weights back to zone, since we weighed away the zone before we started registering the metadata + // TODO - should we modify logic of logging for ease of debugging? + if (e instanceof DecommissionFailedException) { + logger.error(() -> new ParameterizedMessage("failed to decommission attribute [{}]", decommissionAttribute.toString()), e); + } else { + clusterService.submitStateUpdateTask( + "decommission_failed", + new ClusterStateUpdateTask(Priority.URGENT) { + @Override + public ClusterState execute(ClusterState currentState) throws Exception { + Metadata metadata = currentState.metadata(); + Metadata.Builder mdBuilder = Metadata.builder(metadata); + logger.info("decommission request for attribute [{}] failed", decommissionAttribute.toString()); + DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttribute, + DecommissionStatus.DECOMMISSION_FAILED + ); + mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); + return ClusterState.builder(currentState).metadata(mdBuilder).build(); + } + + @Override + public void onFailure(String source, Exception e) { + logger.error(() -> new ParameterizedMessage( + "failed to mark status as DECOMMISSION_FAILED for decommission attribute [{}]", + decommissionAttribute.toString()), e); +// listener.onFailure(e); + } + } + ); + } + listener.onFailure(e); + } + + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + if (!newState.equals(oldState)) { + // TODO - drain the nodes before decommissioning + failDecommissionedNodes(newState); + listener.onResponse(new ClusterStateUpdateResponse(true)); + } + listener.onResponse(new ClusterStateUpdateResponse(false)); + } + } + ); + } + + private static void validateAwarenessAttribute(final DecommissionAttribute decommissionAttribute, List awarenessAttributes) { + if (!awarenessAttributes.contains(decommissionAttribute.attributeName())) { + throw new DecommissionFailedException(decommissionAttribute, "invalid awareness attribute requested for decommissioning"); + } + // TODO - should attribute value be part of force zone values? If yes, read setting and throw exception if not found + } + + private static void ensureNoAwarenessAttributeDecommissioned( + DecommissionAttributeMetadata decommissionAttributeMetadata, + DecommissionAttribute decommissionAttribute + ) { + // If the previous decommission request failed, we will allow the request to pass this check + if (decommissionAttributeMetadata != null && !decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_FAILED)) { + throw new DecommissionFailedException(decommissionAttribute, "one awareness attribute already decommissioned, " + + "recommission before triggering another decommission"); + } + } + + private void failDecommissionedNodes(ClusterState state) { + DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().custom(DecommissionAttributeMetadata.TYPE); + assert decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSIONING) : "unexpected status encountered while decommissioning nodes"; + DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); + List nodesToBeDecommissioned = new ArrayList<>(); + final Predicate shouldRemoveNodePredicate = discoveryNode -> nodeHasDecommissionedAttribute(discoveryNode, decommissionAttribute); + Iterator nodesIter = state.nodes().getNodes().valuesIt(); + while (nodesIter.hasNext()) { + final DiscoveryNode node = nodesIter.next(); + if (shouldRemoveNodePredicate.test(node)) { + nodesToBeDecommissioned.add(node); + } + } + decommissionHelper.handleNodesDecommissionRequest(nodesToBeDecommissioned, "nodes-decommissioned"); + } + + private static boolean nodeHasDecommissionedAttribute(DiscoveryNode discoveryNode, DecommissionAttribute decommissionAttribute) { + return discoveryNode.getAttributes().get( + decommissionAttribute.attributeName() + ).equals(decommissionAttribute.attributeValue()); + } + + @Override + public void applyClusterState(ClusterChangedEvent event) { + clusterState = event.state(); + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java new file mode 100644 index 0000000000000..8f4ca3a6f578a --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java @@ -0,0 +1,94 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +public enum DecommissionStatus { + /** + * Decommission process is initiated + */ + INIT((byte) 0), + /** + * Decommission process has started, decommissioned nodes should be weighed away + */ + DECOMMISSIONING((byte) 1), + /** + * Decommissioning awareness attribute completed + */ + DECOMMISSIONED((byte) 2), + /** + * Decommission request failed + */ + DECOMMISSION_FAILED((byte) 3), + /** + * Recommission request received, recommissioning process has started + */ + RECOMMISSIONING((byte) 4), + /** + * Recommission request failed. No nodes should fail to join the cluster with decommission exception + */ + RECOMMISSION_FAILED((byte) 5); + + private final byte value; + + DecommissionStatus(byte value) { + this.value = value; + } + + /** + * Returns code that represents the decommission state + * + * @return code for the state + */ + public byte value() { + return value; + } + + /** + * Generate decommission state from code + * + * @param value the state code + * @return state + */ + public static DecommissionStatus fromValue(byte value) { + switch (value) { + case 0: + return INIT; + case 1: + return DECOMMISSIONING; + case 2: + return DECOMMISSIONED; + case 3: + return DECOMMISSION_FAILED; + case 4: + return RECOMMISSIONING; + case 5: + return RECOMMISSION_FAILED; + default: + throw new IllegalArgumentException("No decommission state for value [" + value + "]"); + } + } + + public static DecommissionStatus fromString(String status) { + if ("init".equals(status)) { + return INIT; + } else if ("decommissioning".equals(status)) { + return DECOMMISSIONING; + } else if ("decommissioned".equals(status)) { + return DECOMMISSIONED; + } else if ("decommission_failed".equals(status)) { + return DECOMMISSION_FAILED; + } else if ("recommissioning".equals(status)) { + return RECOMMISSIONING; + } else if ("recommission_failed".equals(status)) { + return RECOMMISSION_FAILED; + } + throw new IllegalStateException("No status match for [" + status + "]"); + } +} + diff --git a/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java b/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java new file mode 100644 index 0000000000000..dd0c78d7519b1 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java @@ -0,0 +1,254 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.metadata; + +import org.opensearch.OpenSearchParseException; +import org.opensearch.Version; +import org.opensearch.cluster.AbstractNamedDiffable; +import org.opensearch.cluster.NamedDiff; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.cluster.metadata.Metadata.Custom; +import org.opensearch.common.Nullable; +import org.opensearch.common.Strings; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.xcontent.ToXContent; +import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.common.xcontent.XContentParser; + +import java.io.IOException; +import java.util.EnumSet; +import java.util.Objects; + +/** + * Contains metadata about decommission attribute + * + * @opensearch.internal + */ +public class DecommissionAttributeMetadata extends AbstractNamedDiffable implements Custom { + + public static final String TYPE = "decommissionedAttribute"; + + private final DecommissionAttribute decommissionAttribute; + private final DecommissionStatus status; + public static final String attributeType = "awareness"; + + /** + * Constructs new decommission attribute metadata with given status + * + * @param decommissionAttribute attribute details + * @param status current status of the attribute decommission + */ + public DecommissionAttributeMetadata(DecommissionAttribute decommissionAttribute, DecommissionStatus status) { + this.decommissionAttribute = decommissionAttribute; + this.status = status; + } + + /** + * Constructs new decommission attribute metadata with status as {@link DecommissionStatus#INIT} + * + * @param decommissionAttribute attribute details + */ + public DecommissionAttributeMetadata(DecommissionAttribute decommissionAttribute) { + this.decommissionAttribute = decommissionAttribute; + this.status = DecommissionStatus.INIT; + } + + /** + * Returns the current decommissioned attribute + * + * @return decommissioned attributes + */ + public DecommissionAttribute decommissionAttribute() { + return this.decommissionAttribute; + } + + /** + * Returns the current status of the attribute decommission + * + * @return attribute type + */ + public DecommissionStatus status() { + return this.status; + } + + public DecommissionAttributeMetadata withUpdatedStatus( + DecommissionAttributeMetadata metadata, + DecommissionStatus status) { + return new DecommissionAttributeMetadata( + metadata.decommissionAttribute(), + status + ); + } + + /** + * Creates a new instance with a updated attribute value. + * + * @param metadata current metadata + * @param attributeValue new attribute value + * @return new instance with updated attribute value and status as DecommissionStatus.INIT + */ + public DecommissionAttributeMetadata withUpdatedAttributeValue( + DecommissionAttributeMetadata metadata, + String attributeValue + ) { + return new DecommissionAttributeMetadata( + new DecommissionAttribute(metadata.decommissionAttribute, attributeValue), + DecommissionStatus.INIT + ); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + DecommissionAttributeMetadata that = (DecommissionAttributeMetadata) o; + + if (!status.equals(that.status)) return false; + return decommissionAttribute.equals(that.decommissionAttribute); + } + + /** + * Checks if this instance and the given instance share the same decommissioned attributeName + * and only differ in the attributeValue {@link DecommissionAttribute#attributeValue()} + * + * @param other other decommission attribute metadata + * @return {@code true} iff both instances contain the same attributeName + */ + public boolean equalsIgnoreValue(@Nullable DecommissionAttributeMetadata other) { + if (other == null) { + return false; + } + if (!status.equals(other.status)) return false; + return decommissionAttribute.equalsIgnoreValues(other.decommissionAttribute); + } + + @Override + public int hashCode() { + return Objects.hash(attributeType, decommissionAttribute, status); + } + + /** + * {@inheritDoc} + */ + @Override + public String getWriteableName() { + return TYPE; + } + + @Override + public Version getMinimalSupportedVersion() { + return Version.CURRENT.minimumCompatibilityVersion(); + } + + public DecommissionAttributeMetadata(StreamInput in) throws IOException { + this.status = DecommissionStatus.fromValue(in.readByte()); + this.decommissionAttribute = new DecommissionAttribute(in); + } + + public static NamedDiff readDiffFrom(StreamInput in) throws IOException { + return readDiffFrom(Custom.class, TYPE, in); + } + + /** + * {@inheritDoc} + */ + @Override + public void writeTo(StreamOutput out) throws IOException { + decommissionAttribute.writeTo(out); + out.writeByte(status.value()); + out.writeString(attributeType); + } + + public static DecommissionAttributeMetadata fromXContent(XContentParser parser) throws IOException { + XContentParser.Token token; + DecommissionAttribute decommissionAttribute = null; + DecommissionStatus status = null; + if ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + String currentFieldName = parser.currentName(); + if (attributeType.equals(currentFieldName)) { + if (parser.nextToken() != XContentParser.Token.START_OBJECT) { + throw new OpenSearchParseException("failed to parse decommission attribute type [{}], expected object", attributeType); + } + token = parser.nextToken(); + if (token != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + String fieldName = parser.currentName(); + String value; + token = parser.nextToken(); + if (token != XContentParser.Token.VALUE_STRING) { + value = parser.text(); + } else { + throw new OpenSearchParseException("failed to parse attribute [{}], expected string for attribute value", fieldName); + } + decommissionAttribute = new DecommissionAttribute(fieldName, value); + } else { + throw new OpenSearchParseException("failed to parse attribute type [{}], unexpected type", attributeType); + } + } else { + throw new OpenSearchParseException("failed to parse attribute type [{}]", attributeType); + } + } else if ("status".equals(currentFieldName)) { + if (parser.nextToken() != XContentParser.Token.VALUE_STRING) { + throw new OpenSearchParseException("failed to parse status of decommissioning, expected string but found unknown type"); + } + status = DecommissionStatus.fromString(parser.text()); + } else { + throw new OpenSearchParseException( + "unknown field found [{}], failed to parse the decommission attribute", + currentFieldName + ); + } + } + } + return new DecommissionAttributeMetadata(decommissionAttribute, status); + } + + /** + * {@inheritDoc} + */ + @Override + public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException { + toXContent(decommissionAttribute, status, attributeType, builder, params); + return builder; + } + + @Override + public EnumSet context() { + return Metadata.API_AND_GATEWAY; + } + + /** + * @param decommissionAttribute decommission attribute + * @param status decommission status + * @param attributeType attribute type + * @param builder XContent builder + * @param params serialization parameters + */ + public static void toXContent( + DecommissionAttribute decommissionAttribute, + DecommissionStatus status, + String attributeType, + XContentBuilder builder, + ToXContent.Params params + ) throws IOException { + builder.startObject(attributeType); + builder.field(decommissionAttribute.attributeName(), decommissionAttribute.attributeValue()); + builder.endObject(); + builder.field("status", status); + } + + @Override + public String toString() { + return Strings.toString(this); + } +} diff --git a/server/src/test/java/org/opensearch/cluster/coordination/DecommissionNodeAttributeClusterStateTaskExecutorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/DecommissionNodeAttributeClusterStateTaskExecutorTests.java deleted file mode 100644 index 204d31f18e2cf..0000000000000 --- a/server/src/test/java/org/opensearch/cluster/coordination/DecommissionNodeAttributeClusterStateTaskExecutorTests.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.cluster.coordination; - -import org.opensearch.Version; -import org.opensearch.cluster.ClusterName; -import org.opensearch.cluster.ClusterState; -import org.opensearch.cluster.ClusterStateTaskExecutor; -import org.opensearch.cluster.decommission.DecommissionAttribute; -import org.opensearch.cluster.node.DiscoveryNode; -import org.opensearch.cluster.node.DiscoveryNodeRole; -import org.opensearch.cluster.node.DiscoveryNodes; -import org.opensearch.cluster.routing.allocation.AllocationService; -import org.opensearch.test.OpenSearchTestCase; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.atomic.AtomicReference; - -import static java.util.Collections.singletonMap; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -public class DecommissionNodeAttributeClusterStateTaskExecutorTests extends OpenSearchTestCase { - - public void testRemoveNodesForDecommissionedAttribute() throws Exception { - final AllocationService allocationService = mock(AllocationService.class); - when(allocationService.disassociateDeadNodes(any(ClusterState.class), eq(true), any(String.class))).thenAnswer( - im -> im.getArguments()[0] - ); - final AtomicReference remainingNodesClusterState = new AtomicReference<>(); - ClusterState clusterState = ClusterState.builder(new ClusterName("test")).build(); - - logger.info("--> adding five nodes on same zone_1"); - clusterState = addNodes(clusterState, "zone_1", "node1", "node2", "node3", "node4", "node5"); - - logger.info("--> adding five nodes on same zone_2"); - clusterState = addNodes(clusterState, "zone_2", "node6", "node7", "node8", "node9", "node10"); - - logger.info("--> adding five nodes on same zone_3"); - clusterState = addNodes(clusterState, "zone_3", "node11", "node12", "node13", "node14", "node15"); - - final DecommissionNodeAttributeClusterStateTaskExecutor executor = new DecommissionNodeAttributeClusterStateTaskExecutor( - allocationService, - logger - ) { - @Override - protected ClusterState remainingNodesClusterState(ClusterState currentState, DiscoveryNodes.Builder remainingNodesBuilder) { - remainingNodesClusterState.set(super.remainingNodesClusterState(currentState, remainingNodesBuilder)); - return remainingNodesClusterState.get(); - } - }; - - final List tasks = new ArrayList<>(); - tasks.add( - new DecommissionNodeAttributeClusterStateTaskExecutor.Task( - new DecommissionAttribute("zone", Collections.singletonList("zone_3")), - "unit test zone decommission executor" - ) - ); - - final ClusterStateTaskExecutor.ClusterTasksResult result = executor.execute( - clusterState, - tasks - ); - - ClusterState expectedClusterState = remainingNodesClusterState.get(); - ClusterState actualClusterState = result.resultingState; - - // Assert cluster state is updated and is successful - verify(allocationService).disassociateDeadNodes(eq(expectedClusterState), eq(true), any(String.class)); - assertEquals(actualClusterState, expectedClusterState); - assertTrue(result.executionResults.get(tasks.get(0)).isSuccess()); - - // Verify only 10 nodes present in the cluster after decommissioning - assertEquals(actualClusterState.nodes().getNodes().size(), 10); - - // Verify no nodes has attribute (zone, zone_3) - Iterator currDiscoveryNodeIterator = actualClusterState.nodes().getNodes().valuesIt(); - while (currDiscoveryNodeIterator.hasNext()) { - final DiscoveryNode node = currDiscoveryNodeIterator.next(); - assertNotEquals(node.getAttributes().get("zone"), "zone_3"); - } - } - - public void testSameClusterStateAfterExecutionForUnknownAttributeNameAndValue() throws Exception { - final AllocationService allocationService = mock(AllocationService.class); - when(allocationService.disassociateDeadNodes(any(ClusterState.class), eq(true), any(String.class))).thenAnswer( - im -> im.getArguments()[0] - ); - final AtomicReference remainingNodesClusterState = new AtomicReference<>(); - ClusterState clusterState = ClusterState.builder(new ClusterName("test")).build(); - - logger.info("--> adding five nodes on same zone_1"); - clusterState = addNodes(clusterState, "zone_1", "node1", "node2", "node3", "node4", "node5"); - - logger.info("--> adding five nodes on same zone_2"); - clusterState = addNodes(clusterState, "zone_2", "node6", "node7", "node8", "node9", "node10"); - - logger.info("--> adding five nodes on same zone_3"); - clusterState = addNodes(clusterState, "zone_3", "node11", "node12", "node13", "node14", "node15"); - - final DecommissionNodeAttributeClusterStateTaskExecutor executor = new DecommissionNodeAttributeClusterStateTaskExecutor( - allocationService, - logger - ) { - @Override - protected ClusterState remainingNodesClusterState(ClusterState currentState, DiscoveryNodes.Builder remainingNodesBuilder) { - remainingNodesClusterState.set(super.remainingNodesClusterState(currentState, remainingNodesBuilder)); - return remainingNodesClusterState.get(); - } - }; - - final List tasks = new ArrayList<>(); - // Task 1 with unknown attribute name - tasks.add( - new DecommissionNodeAttributeClusterStateTaskExecutor.Task( - new DecommissionAttribute("unknown_zone_name", Collections.singletonList("unknown_zone_value")), - "unit test zone decommission executor" - ) - ); - // Task 2 with unknown attribute value - tasks.add( - new DecommissionNodeAttributeClusterStateTaskExecutor.Task( - new DecommissionAttribute("zone", Collections.singletonList("unknown_zone_value")), - "unit test zone decommission executor" - ) - ); - - final ClusterStateTaskExecutor.ClusterTasksResult result = executor.execute( - clusterState, - tasks - ); - - ClusterState expectedClusterState = remainingNodesClusterState.get(); - ClusterState actualClusterState = result.resultingState; - - // assert that disassociate dead node tasks is never executed - verify(allocationService, never()).disassociateDeadNodes(eq(expectedClusterState), eq(true), any(String.class)); - - // assert that cluster state remains same - assertEquals(clusterState, actualClusterState); - - // Verify all 15 nodes present in the cluster after decommissioning unknown attribute name - assertEquals(actualClusterState.nodes().getNodes().size(), 15); - } - - private ClusterState addNodes(ClusterState clusterState, String zone, String... nodeIds) { - DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); - org.opensearch.common.collect.List.of(nodeIds).forEach(nodeId -> nodeBuilder.add(newNode(nodeId, singletonMap("zone", zone)))); - clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); - return clusterState; - } - - private DiscoveryNode newNode(String nodeId, Map attributes) { - return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, CLUSTER_MANAGER_DATA_ROLES, Version.CURRENT); - } - - final private static Set CLUSTER_MANAGER_DATA_ROLES = Collections.unmodifiableSet( - new HashSet<>(Arrays.asList(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE)) - ); -} From d348085c1da18bf446189c68dcab4fdd9ff1061c Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Wed, 17 Aug 2022 17:38:25 +0530 Subject: [PATCH 04/87] Fixes Signed-off-by: Rishab Nahata --- .../cluster/decommission/DecommissionAttribute.java | 8 +++++++- .../cluster/decommission/DecommissionService.java | 6 ++++-- .../cluster/metadata/DecommissionAttributeMetadata.java | 5 ++--- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java index db4e06e854518..15c17ae4b7ae1 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java @@ -13,7 +13,6 @@ import org.opensearch.common.io.stream.Writeable; import java.io.IOException; -import java.util.List; import java.util.Objects; public final class DecommissionAttribute implements Writeable { @@ -102,4 +101,11 @@ public int hashCode() { } + @Override + public String toString() { + return "DecommissionAttribute{" + + "attributeName='" + attributeName + '\'' + + ", attributeValue='" + attributeValue + '\'' + + '}'; + } } diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 160b729b14b3a..d932a45401530 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -169,8 +169,9 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS // TODO - drain the nodes before decommissioning failDecommissionedNodes(newState); listener.onResponse(new ClusterStateUpdateResponse(true)); + } else { + listener.onResponse(new ClusterStateUpdateResponse(false)); } - listener.onResponse(new ClusterStateUpdateResponse(false)); } } ); @@ -196,7 +197,8 @@ private static void ensureNoAwarenessAttributeDecommissioned( private void failDecommissionedNodes(ClusterState state) { DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().custom(DecommissionAttributeMetadata.TYPE); - assert decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSIONING) : "unexpected status encountered while decommissioning nodes"; + // TODO update the status check to DECOMMISSIONING once graceful decommission is implemented + assert decommissionAttributeMetadata.status().equals(DecommissionStatus.INIT) : "unexpected status encountered while decommissioning nodes"; DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); List nodesToBeDecommissioned = new ArrayList<>(); final Predicate shouldRemoveNodePredicate = discoveryNode -> nodeHasDecommissionedAttribute(discoveryNode, decommissionAttribute); diff --git a/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java b/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java index dd0c78d7519b1..869576f0ea070 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java @@ -150,8 +150,8 @@ public Version getMinimalSupportedVersion() { } public DecommissionAttributeMetadata(StreamInput in) throws IOException { - this.status = DecommissionStatus.fromValue(in.readByte()); this.decommissionAttribute = new DecommissionAttribute(in); + this.status = DecommissionStatus.fromValue(in.readByte()); } public static NamedDiff readDiffFrom(StreamInput in) throws IOException { @@ -165,7 +165,6 @@ public static NamedDiff readDiffFrom(StreamInput in) throws IOException public void writeTo(StreamOutput out) throws IOException { decommissionAttribute.writeTo(out); out.writeByte(status.value()); - out.writeString(attributeType); } public static DecommissionAttributeMetadata fromXContent(XContentParser parser) throws IOException { @@ -185,7 +184,7 @@ public static DecommissionAttributeMetadata fromXContent(XContentParser parser) String fieldName = parser.currentName(); String value; token = parser.nextToken(); - if (token != XContentParser.Token.VALUE_STRING) { + if (token == XContentParser.Token.VALUE_STRING) { value = parser.text(); } else { throw new OpenSearchParseException("failed to parse attribute [{}], expected string for attribute value", fieldName); From 9cccc44de6d32371804ec8580f8007d7c0ca7913 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Wed, 17 Aug 2022 19:11:09 +0530 Subject: [PATCH 05/87] Master abdication Signed-off-by: Rishab Nahata --- .../decommission/DecommissionService.java | 60 +++++++++++++++++-- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index d932a45401530..40d848f81483c 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -12,6 +12,9 @@ import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.action.ActionListener; +import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsAction; +import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsRequest; +import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsResponse; import org.opensearch.cluster.ClusterChangedEvent; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateApplier; @@ -26,11 +29,15 @@ import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.Priority; import org.opensearch.common.inject.Inject; +import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportException; +import org.opensearch.transport.TransportResponseHandler; import org.opensearch.transport.TransportService; +import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -61,7 +68,6 @@ public DecommissionService( this.clusterService = clusterService; this.transportService = transportService; this.threadPool = threadPool; - this.clusterState = clusterService.state(); // TODO - check if this is the right way this.nodeRemovalExecutor = new NodeRemovalClusterStateTaskExecutor(allocationService, logger); this.decommissionHelper = new DecommissionHelper( clusterService, @@ -84,16 +90,62 @@ private void setAwarenessAttributes(List awarenessAttributes) { public void initiateAttributeDecommissioning( final DecommissionAttribute decommissionAttribute, - final ActionListener listener + final ActionListener listener, + ClusterState state ) { - /** + /* * 1. Abdicate master * 2. Register attribute -> status should be set to INIT * 3. Trigger weigh away for graceful decommission -> status should be set to DECOMMISSIONING * 4. Once zone is weighed away -> trigger zone decommission using executor -> status should be set to DECOMMISSIONED on successful response * 5. Clear voting config */ - registerDecommissionAttribute(decommissionAttribute, listener); + this.clusterState = state; + abdicateDecommissionedClusterManagerNodes(decommissionAttribute, listener); + } + + private void abdicateDecommissionedClusterManagerNodes( + DecommissionAttribute decommissionAttribute, + final ActionListener listener + ) { + final Predicate shouldAbdicatePredicate = discoveryNode -> nodeHasDecommissionedAttribute(discoveryNode, decommissionAttribute); + List clusterManagerNodesToBeDecommissioned = new ArrayList<>(); + Iterator clusterManagerNodesIter = clusterState.nodes().getClusterManagerNodes().valuesIt(); + while (clusterManagerNodesIter.hasNext()) { + final DiscoveryNode node = clusterManagerNodesIter.next(); + if (shouldAbdicatePredicate.test(node)) { + clusterManagerNodesToBeDecommissioned.add(node.getName()); + } + } + transportService.sendRequest( + transportService.getLocalNode(), + AddVotingConfigExclusionsAction.NAME, + new AddVotingConfigExclusionsRequest(clusterManagerNodesToBeDecommissioned.toArray(String[]::new)), + new TransportResponseHandler() { + @Override + public void handleResponse(AddVotingConfigExclusionsResponse response) { + logger.info("successfully removed decommissioned cluster manager eligible nodes from voting config [{}], " + + "proceeding to drain the decommissioned nodes", response.toString()); + registerDecommissionAttribute(decommissionAttribute, listener); + } + + @Override + public void handleException(TransportException exp) { + logger.debug(new ParameterizedMessage( + "failure in removing decommissioned cluster manager eligible nodes from voting config"), exp); + } + + @Override + public String executor() { + return ThreadPool.Names.SAME; + } + + @Override + public AddVotingConfigExclusionsResponse read(StreamInput in) throws IOException { + return new AddVotingConfigExclusionsResponse(in); + } + } + ); } /** From 4923f0a7ba6a429290eca8f8d517b6b36c376337 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Wed, 17 Aug 2022 19:56:25 +0530 Subject: [PATCH 06/87] Fixes Signed-off-by: Rishab Nahata --- .../cluster/decommission/DecommissionService.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 40d848f81483c..48db0e06edb6a 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -101,6 +101,7 @@ public void initiateAttributeDecommissioning( * 5. Clear voting config */ this.clusterState = state; + logger.info("initiating awareness attribute [{}] decommissioning", decommissionAttribute.toString()); abdicateDecommissionedClusterManagerNodes(decommissionAttribute, listener); } @@ -124,8 +125,8 @@ private void abdicateDecommissionedClusterManagerNodes( new TransportResponseHandler() { @Override public void handleResponse(AddVotingConfigExclusionsResponse response) { - logger.info("successfully removed decommissioned cluster manager eligible nodes from voting config [{}], " + - "proceeding to drain the decommissioned nodes", response.toString()); + logger.info("successfully removed decommissioned cluster manager eligible nodes [{}] from voting config, " + + "proceeding to drain the decommissioned nodes", clusterManagerNodesToBeDecommissioned.toString()); registerDecommissionAttribute(decommissionAttribute, listener); } @@ -186,6 +187,7 @@ public void onFailure(String source, Exception e) { if (e instanceof DecommissionFailedException) { logger.error(() -> new ParameterizedMessage("failed to decommission attribute [{}]", decommissionAttribute.toString()), e); } else { + // could be due to on longer cluster manager clusterService.submitStateUpdateTask( "decommission_failed", new ClusterStateUpdateTask(Priority.URGENT) { From 38c59309fe23fce1b55f5e8ff68b0a4b7b43592c Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Wed, 17 Aug 2022 20:28:28 +0530 Subject: [PATCH 07/87] Update join validator to validate decommissioned node join request Signed-off-by: Rishab Nahata --- .../org/opensearch/OpenSearchException.java | 6 ++++ .../coordination/JoinTaskExecutor.java | 36 ++++++++++++++++--- .../NodeDecommissionedException.java | 32 +++++++++++++++++ 3 files changed, 70 insertions(+), 4 deletions(-) create mode 100644 server/src/main/java/org/opensearch/cluster/decommission/NodeDecommissionedException.java diff --git a/server/src/main/java/org/opensearch/OpenSearchException.java b/server/src/main/java/org/opensearch/OpenSearchException.java index d3e1bef9b6dbb..44d17cbca5652 100644 --- a/server/src/main/java/org/opensearch/OpenSearchException.java +++ b/server/src/main/java/org/opensearch/OpenSearchException.java @@ -1615,6 +1615,12 @@ private enum OpenSearchExceptionHandle { org.opensearch.cluster.decommission.DecommissionFailedException::new, 163, V_2_1_0 + ), + NODE_DECOMMISSIONED_EXCEPTION( + org.opensearch.cluster.decommission.NodeDecommissionedException.class, + org.opensearch.cluster.decommission.NodeDecommissionedException::new, + 163, + V_2_1_0 ); final Class exceptionClass; diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index 5afdb5b12db23..dca706c250a20 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -39,6 +39,9 @@ import org.opensearch.cluster.ClusterStateTaskExecutor; import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.block.ClusterBlocks; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.NodeDecommissionedException; +import org.opensearch.cluster.metadata.DecommissionAttributeMetadata; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; @@ -107,7 +110,9 @@ public boolean isBecomeClusterManagerTask() { return reason.equals(BECOME_MASTER_TASK_REASON) || reason.equals(BECOME_CLUSTER_MANAGER_TASK_REASON); } - /** @deprecated As of 2.2, because supporting inclusive language, replaced by {@link #isBecomeClusterManagerTask()} */ + /** + * @deprecated As of 2.2, because supporting inclusive language, replaced by {@link #isBecomeClusterManagerTask()} + */ @Deprecated public boolean isBecomeMasterTask() { return isBecomeClusterManagerTask(); @@ -358,6 +363,7 @@ public boolean runOnlyOnClusterManager() { /** * a task indicates that the current node should become master + * * @deprecated As of 2.0, because supporting inclusive language, replaced by {@link #newBecomeClusterManagerTask()} */ @Deprecated @@ -384,8 +390,9 @@ public static Task newFinishElectionTask() { * Ensures that all indices are compatible with the given node version. This will ensure that all indices in the given metadata * will not be created with a newer version of opensearch as well as that all indices are newer or equal to the minimum index * compatibility version. - * @see Version#minimumIndexCompatibilityVersion() + * * @throws IllegalStateException if any index is incompatible with the given version + * @see Version#minimumIndexCompatibilityVersion() */ public static void ensureIndexCompatibility(final Version nodeVersion, Metadata metadata) { Version supportedIndexVersion = nodeVersion.minimumIndexCompatibilityVersion(); @@ -415,14 +422,18 @@ public static void ensureIndexCompatibility(final Version nodeVersion, Metadata } } - /** ensures that the joining node has a version that's compatible with all current nodes*/ + /** + * ensures that the joining node has a version that's compatible with all current nodes + */ public static void ensureNodesCompatibility(final Version joiningNodeVersion, DiscoveryNodes currentNodes) { final Version minNodeVersion = currentNodes.getMinNodeVersion(); final Version maxNodeVersion = currentNodes.getMaxNodeVersion(); ensureNodesCompatibility(joiningNodeVersion, minNodeVersion, maxNodeVersion); } - /** ensures that the joining node has a version that's compatible with a given version range */ + /** + * ensures that the joining node has a version that's compatible with a given version range + */ public static void ensureNodesCompatibility(Version joiningNodeVersion, Version minClusterNodeVersion, Version maxClusterNodeVersion) { assert minClusterNodeVersion.onOrBefore(maxClusterNodeVersion) : minClusterNodeVersion + " > " + maxClusterNodeVersion; if (joiningNodeVersion.isCompatible(maxClusterNodeVersion) == false) { @@ -466,6 +477,22 @@ public static void ensureMajorVersionBarrier(Version joiningNodeVersion, Version } } + public static void ensureNodeNotDecommissioned(DiscoveryNode node, Metadata metadata) { + DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); + if (decommissionAttributeMetadata != null) { + DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); + if (decommissionAttribute != null) { + if (node.getAttributes().get(decommissionAttribute.attributeName()).equals(decommissionAttribute.attributeValue())) { + throw new NodeDecommissionedException( + "node has decommissioned attribute [" + + decommissionAttribute.toString() + + "]." + ); + } + } + } + } + public static Collection> addBuiltInJoinValidators( Collection> onJoinValidators ) { @@ -473,6 +500,7 @@ public static Collection> addBuiltInJoin validators.add((node, state) -> { ensureNodesCompatibility(node.getVersion(), state.getNodes()); ensureIndexCompatibility(node.getVersion(), state.getMetadata()); + ensureNodeNotDecommissioned(node, state.getMetadata()); }); validators.addAll(onJoinValidators); return Collections.unmodifiableCollection(validators); diff --git a/server/src/main/java/org/opensearch/cluster/decommission/NodeDecommissionedException.java b/server/src/main/java/org/opensearch/cluster/decommission/NodeDecommissionedException.java new file mode 100644 index 0000000000000..d4ca4679a0872 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/NodeDecommissionedException.java @@ -0,0 +1,32 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.opensearch.OpenSearchException; +import org.opensearch.common.io.stream.StreamInput; + +import java.io.IOException; + +/** + * This exception is thrown if the node is decommissioned by @{@link DecommissionService} + * and this nodes needs to be removed from the cluster + * + * @opensearch.internal + */ + +public class NodeDecommissionedException extends OpenSearchException { + + public NodeDecommissionedException(String msg, Object... args) { + super(msg, args); + } + + public NodeDecommissionedException(StreamInput in) throws IOException { + super(in); + } +} From 42050fab331674368605061f6be2b5ecf1bdb6e2 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Thu, 18 Aug 2022 10:57:37 +0530 Subject: [PATCH 08/87] Clear voting config after decommissioning Signed-off-by: Rishab Nahata --- .../decommission/DecommissionService.java | 48 +++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 48db0e06edb6a..3971931180656 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -15,10 +15,14 @@ import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsAction; import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsRequest; import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsResponse; +import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsAction; +import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsRequest; +import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsResponse; import org.opensearch.cluster.ClusterChangedEvent; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateApplier; import org.opensearch.cluster.ClusterStateUpdateTask; +import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; import org.opensearch.cluster.coordination.NodeRemovalClusterStateTaskExecutor; import org.opensearch.cluster.metadata.DecommissionAttributeMetadata; @@ -34,6 +38,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportException; +import org.opensearch.transport.TransportResponse; import org.opensearch.transport.TransportResponseHandler; import org.opensearch.transport.TransportService; @@ -43,7 +48,6 @@ import java.util.List; import java.util.function.Predicate; -// do we need to implement ClusterStateApplier -> will a change in cluster state impact this service?? public class DecommissionService implements ClusterStateApplier { private static final Logger logger = LogManager.getLogger(DecommissionService.class); @@ -102,10 +106,10 @@ public void initiateAttributeDecommissioning( */ this.clusterState = state; logger.info("initiating awareness attribute [{}] decommissioning", decommissionAttribute.toString()); - abdicateDecommissionedClusterManagerNodes(decommissionAttribute, listener); + excludeDecommissionedClusterManagerNodesFromVotingConfig(decommissionAttribute, listener); } - private void abdicateDecommissionedClusterManagerNodes( + private void excludeDecommissionedClusterManagerNodesFromVotingConfig( DecommissionAttribute decommissionAttribute, final ActionListener listener ) { @@ -149,6 +153,38 @@ public AddVotingConfigExclusionsResponse read(StreamInput in) throws IOException ); } + private void clearVotingConfigAfterSuccessfulDecommission() { + final ClearVotingConfigExclusionsRequest clearVotingConfigExclusionsRequest = new ClearVotingConfigExclusionsRequest(); + clearVotingConfigExclusionsRequest.setWaitForRemoval(true); + transportService.sendRequest( + transportService.getLocalNode(), + ClearVotingConfigExclusionsAction.NAME, + clearVotingConfigExclusionsRequest, + new TransportResponseHandler() { + @Override + public void handleResponse(ClearVotingConfigExclusionsResponse response) { + logger.info("successfully cleared voting config after decommissioning"); + } + + @Override + public void handleException(TransportException exp) { + logger.debug(new ParameterizedMessage( + "failure in clearing voting config exclusion after decommissioning"), exp); + } + + @Override + public String executor() { + return ThreadPool.Names.SAME; + } + + @Override + public ClearVotingConfigExclusionsResponse read(StreamInput in) throws IOException { + return new ClearVotingConfigExclusionsResponse(in); + } + } + ); + } + /** * Registers new decommissioned attribute metadata in the cluster state *

@@ -186,6 +222,10 @@ public void onFailure(String source, Exception e) { // TODO - should we modify logic of logging for ease of debugging? if (e instanceof DecommissionFailedException) { logger.error(() -> new ParameterizedMessage("failed to decommission attribute [{}]", decommissionAttribute.toString()), e); + } else if (e instanceof NotClusterManagerException) { + logger.info(() -> new ParameterizedMessage( + "cluster-manager updated while executing request for decommission attribute [{}]", + decommissionAttribute.toString()), e); } else { // could be due to on longer cluster manager clusterService.submitStateUpdateTask( @@ -263,7 +303,9 @@ private void failDecommissionedNodes(ClusterState state) { nodesToBeDecommissioned.add(node); } } + // TODO - check for response from decommission request and then clear voting config? decommissionHelper.handleNodesDecommissionRequest(nodesToBeDecommissioned, "nodes-decommissioned"); + clearVotingConfigAfterSuccessfulDecommission(); } private static boolean nodeHasDecommissionedAttribute(DiscoveryNode discoveryNode, DecommissionAttribute decommissionAttribute) { From 30dbfdc1676f7e2b91cf617c2b53fb9479fb4078 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 23 Aug 2022 14:35:57 +0530 Subject: [PATCH 09/87] Resolving comments Signed-off-by: Rishab Nahata --- .../org/opensearch/OpenSearchException.java | 8 +- .../org/opensearch/cluster/ClusterModule.java | 7 +- .../coordination/JoinTaskExecutor.java | 8 +- .../decommission/DecommissionAttribute.java | 14 +- .../DecommissionFailedException.java | 6 + .../decommission/DecommissionHelper.java | 18 +-- .../decommission/DecommissionService.java | 123 +++++++++++------- .../decommission/DecommissionStatus.java | 78 +++++------ .../NodeDecommissionedException.java | 1 - .../DecommissionAttributeMetadata.java | 54 +++----- 10 files changed, 161 insertions(+), 156 deletions(-) diff --git a/server/src/main/java/org/opensearch/OpenSearchException.java b/server/src/main/java/org/opensearch/OpenSearchException.java index 44d17cbca5652..5ae83c9df70d3 100644 --- a/server/src/main/java/org/opensearch/OpenSearchException.java +++ b/server/src/main/java/org/opensearch/OpenSearchException.java @@ -34,7 +34,6 @@ import org.opensearch.action.support.replication.ReplicationOperation; import org.opensearch.cluster.action.shard.ShardStateAction; -import org.opensearch.cluster.decommission.DecommissionFailedException; import org.opensearch.common.CheckedFunction; import org.opensearch.common.Nullable; import org.opensearch.common.ParseField; @@ -69,6 +68,7 @@ import static java.util.Collections.singletonMap; import static java.util.Collections.unmodifiableMap; import static org.opensearch.Version.V_2_1_0; +import static org.opensearch.Version.V_2_3_0; import static org.opensearch.Version.V_3_0_0; import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_UUID_NA_VALUE; import static org.opensearch.common.xcontent.XContentParserUtils.ensureExpectedToken; @@ -1614,13 +1614,13 @@ private enum OpenSearchExceptionHandle { org.opensearch.cluster.decommission.DecommissionFailedException.class, org.opensearch.cluster.decommission.DecommissionFailedException::new, 163, - V_2_1_0 + V_2_3_0 ), NODE_DECOMMISSIONED_EXCEPTION( org.opensearch.cluster.decommission.NodeDecommissionedException.class, org.opensearch.cluster.decommission.NodeDecommissionedException::new, - 163, - V_2_1_0 + 164, + V_2_3_0 ); final Class exceptionClass; diff --git a/server/src/main/java/org/opensearch/cluster/ClusterModule.java b/server/src/main/java/org/opensearch/cluster/ClusterModule.java index de63369dafc89..115b9bdf3d8d6 100644 --- a/server/src/main/java/org/opensearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/opensearch/cluster/ClusterModule.java @@ -192,7 +192,12 @@ public static List getNamedWriteables() { ComposableIndexTemplateMetadata::readDiffFrom ); registerMetadataCustom(entries, DataStreamMetadata.TYPE, DataStreamMetadata::new, DataStreamMetadata::readDiffFrom); - registerMetadataCustom(entries, DecommissionAttributeMetadata.TYPE, DecommissionAttributeMetadata::new, DecommissionAttributeMetadata::readDiffFrom); + registerMetadataCustom( + entries, + DecommissionAttributeMetadata.TYPE, + DecommissionAttributeMetadata::new, + DecommissionAttributeMetadata::readDiffFrom + ); // Task Status (not Diffable) entries.add(new Entry(Task.Status.class, PersistentTasksNodeService.Status.NAME, PersistentTasksNodeService.Status::new)); return entries; diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index dca706c250a20..c309d2a3e06cc 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -110,9 +110,7 @@ public boolean isBecomeClusterManagerTask() { return reason.equals(BECOME_MASTER_TASK_REASON) || reason.equals(BECOME_CLUSTER_MANAGER_TASK_REASON); } - /** - * @deprecated As of 2.2, because supporting inclusive language, replaced by {@link #isBecomeClusterManagerTask()} - */ + /** @deprecated As of 2.2, because supporting inclusive language, replaced by {@link #isBecomeClusterManagerTask()} */ @Deprecated public boolean isBecomeMasterTask() { return isBecomeClusterManagerTask(); @@ -484,9 +482,7 @@ public static void ensureNodeNotDecommissioned(DiscoveryNode node, Metadata meta if (decommissionAttribute != null) { if (node.getAttributes().get(decommissionAttribute.attributeName()).equals(decommissionAttribute.attributeValue())) { throw new NodeDecommissionedException( - "node has decommissioned attribute [" - + decommissionAttribute.toString() - + "]." + "node [{}] has decommissioned attribute [{}].", node.getId(), decommissionAttribute.toString() ); } } diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java index 15c17ae4b7ae1..1eb6b488447a1 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java @@ -8,6 +8,7 @@ package org.opensearch.cluster.decommission; +import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; import org.opensearch.common.io.stream.Writeable; @@ -15,12 +16,17 @@ import java.io.IOException; import java.util.Objects; +/** + * {@link DecommissionAttribute} encapsulates information about decommissioned node attribute like attribute name, attribute value. + * + * @opensearch.internal + */ public final class DecommissionAttribute implements Writeable { private final String attributeName; private final String attributeValue; /** - * Update the attribute value for a given attribute name to decommission + * Construct new decommission attribute with updated value from a given decommission attribute * * @param decommissionAttribute current decommissioned attribute object * @param attributeValue attribute value to be updated with @@ -100,12 +106,8 @@ public int hashCode() { return Objects.hash(attributeName, attributeValue); } - @Override public String toString() { - return "DecommissionAttribute{" + - "attributeName='" + attributeName + '\'' + - ", attributeValue='" + attributeValue + '\'' + - '}'; + return "DecommissionAttribute{" + "attributeName='" + attributeName + '\'' + ", attributeValue='" + attributeValue + '\'' + '}'; } } diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionFailedException.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionFailedException.java index 3a611c2488779..3ba121dd90cee 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionFailedException.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionFailedException.java @@ -14,6 +14,12 @@ import java.io.IOException; +/** + * This exception is thrown whenever a failure occurs in decommission request @{@link DecommissionService} + * + * @opensearch.internal + */ + public class DecommissionFailedException extends OpenSearchException { private final DecommissionAttribute decommissionAttribute; diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java index d1eb17adc9747..ce7befacaef98 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java @@ -14,6 +14,7 @@ import org.opensearch.cluster.ClusterStateTaskListener; import org.opensearch.cluster.coordination.NodeRemovalClusterStateTaskExecutor; import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.allocation.AllocationService; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.Priority; @@ -21,6 +22,12 @@ import java.util.List; import java.util.Map; +/** + * Helper executor class to remove list of nodes from the cluster + * + * @opensearch.internal + */ + public class DecommissionHelper { private static final Logger logger = LogManager.getLogger(DecommissionHelper.class); @@ -28,20 +35,15 @@ public class DecommissionHelper { private final NodeRemovalClusterStateTaskExecutor nodeRemovalExecutor; private final ClusterService clusterService; - DecommissionHelper( - ClusterService clusterService, - NodeRemovalClusterStateTaskExecutor nodeRemovalClusterStateTaskExecutor - ) { - this.nodeRemovalExecutor = nodeRemovalClusterStateTaskExecutor; + DecommissionHelper(ClusterService clusterService, AllocationService allocationService) { this.clusterService = clusterService; + this.nodeRemovalExecutor = new NodeRemovalClusterStateTaskExecutor(allocationService, logger); } public void handleNodesDecommissionRequest(List nodesToBeDecommissioned, String reason) { final Map nodesDecommissionTasks = new LinkedHashMap<>(); nodesToBeDecommissioned.forEach(discoveryNode -> { - final NodeRemovalClusterStateTaskExecutor.Task task = new NodeRemovalClusterStateTaskExecutor.Task( - discoveryNode, reason - ); + final NodeRemovalClusterStateTaskExecutor.Task task = new NodeRemovalClusterStateTaskExecutor.Task(discoveryNode, reason); nodesDecommissionTasks.put(task, nodeRemovalExecutor); }); final String source = "node-decommissioned"; diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 3971931180656..ee5e657a500ce 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -38,7 +38,6 @@ import org.opensearch.common.settings.Settings; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportException; -import org.opensearch.transport.TransportResponse; import org.opensearch.transport.TransportResponseHandler; import org.opensearch.transport.TransportService; @@ -48,13 +47,17 @@ import java.util.List; import java.util.function.Predicate; +/** + * Service responsible for entire lifecycle of decommissioning and recommissioning an awareness attribute. + * + * @opensearch.internal + */ public class DecommissionService implements ClusterStateApplier { private static final Logger logger = LogManager.getLogger(DecommissionService.class); private final ClusterService clusterService; private final TransportService transportService; - private final NodeRemovalClusterStateTaskExecutor nodeRemovalExecutor; private final ThreadPool threadPool; private final DecommissionHelper decommissionHelper; private ClusterState clusterState; @@ -72,11 +75,7 @@ public DecommissionService( this.clusterService = clusterService; this.transportService = transportService; this.threadPool = threadPool; - this.nodeRemovalExecutor = new NodeRemovalClusterStateTaskExecutor(allocationService, logger); - this.decommissionHelper = new DecommissionHelper( - clusterService, - nodeRemovalExecutor - ); + this.decommissionHelper = new DecommissionHelper(clusterService, allocationService); this.awarenessAttributes = AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.get(settings); clusterSettings.addSettingsUpdateConsumer( AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING, @@ -113,7 +112,10 @@ private void excludeDecommissionedClusterManagerNodesFromVotingConfig( DecommissionAttribute decommissionAttribute, final ActionListener listener ) { - final Predicate shouldAbdicatePredicate = discoveryNode -> nodeHasDecommissionedAttribute(discoveryNode, decommissionAttribute); + final Predicate shouldAbdicatePredicate = discoveryNode -> nodeHasDecommissionedAttribute( + discoveryNode, + decommissionAttribute + ); List clusterManagerNodesToBeDecommissioned = new ArrayList<>(); Iterator clusterManagerNodesIter = clusterState.nodes().getClusterManagerNodes().valuesIt(); while (clusterManagerNodesIter.hasNext()) { @@ -129,15 +131,20 @@ private void excludeDecommissionedClusterManagerNodesFromVotingConfig( new TransportResponseHandler() { @Override public void handleResponse(AddVotingConfigExclusionsResponse response) { - logger.info("successfully removed decommissioned cluster manager eligible nodes [{}] from voting config, " + - "proceeding to drain the decommissioned nodes", clusterManagerNodesToBeDecommissioned.toString()); + logger.info( + "successfully removed decommissioned cluster manager eligible nodes [{}] from voting config, " + + "proceeding to drain the decommissioned nodes", + clusterManagerNodesToBeDecommissioned.toString() + ); registerDecommissionAttribute(decommissionAttribute, listener); } @Override public void handleException(TransportException exp) { - logger.debug(new ParameterizedMessage( - "failure in removing decommissioned cluster manager eligible nodes from voting config"), exp); + logger.debug( + new ParameterizedMessage("failure in removing decommissioned cluster manager eligible nodes from voting config"), + exp + ); } @Override @@ -168,8 +175,7 @@ public void handleResponse(ClearVotingConfigExclusionsResponse response) { @Override public void handleException(TransportException exp) { - logger.debug(new ParameterizedMessage( - "failure in clearing voting config exclusion after decommissioning"), exp); + logger.debug(new ParameterizedMessage("failure in clearing voting config exclusion after decommissioning"), exp); } @Override @@ -200,6 +206,10 @@ private void registerDecommissionAttribute( final DecommissionAttribute decommissionAttribute, final ActionListener listener ) { + // check the local node is master and not in decommission attribute + assert transportService.getLocalNode().isClusterManagerNode() + && !nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute) + : "cannot register decommission attribute, as local node is not master or is going to be decommissioned"; validateAwarenessAttribute(decommissionAttribute, getAwarenessAttributes()); clusterService.submitStateUpdateTask( "put_decommission [" + decommissionAttribute + "]", @@ -218,47 +228,57 @@ public ClusterState execute(ClusterState currentState) throws Exception { @Override public void onFailure(String source, Exception e) { - // TODO - should we put the weights back to zone, since we weighed away the zone before we started registering the metadata + // TODO - should we put the weights back to zone, since we weighed away the zone before we started registering the + // metadata // TODO - should we modify logic of logging for ease of debugging? if (e instanceof DecommissionFailedException) { - logger.error(() -> new ParameterizedMessage("failed to decommission attribute [{}]", decommissionAttribute.toString()), e); + logger.error( + () -> new ParameterizedMessage("failed to decommission attribute [{}]", decommissionAttribute.toString()), + e + ); } else if (e instanceof NotClusterManagerException) { - logger.info(() -> new ParameterizedMessage( - "cluster-manager updated while executing request for decommission attribute [{}]", - decommissionAttribute.toString()), e); + logger.info( + () -> new ParameterizedMessage( + "cluster-manager updated while executing request for decommission attribute [{}]", + decommissionAttribute.toString() + ), + e + ); } else { // could be due to on longer cluster manager - clusterService.submitStateUpdateTask( - "decommission_failed", - new ClusterStateUpdateTask(Priority.URGENT) { - @Override - public ClusterState execute(ClusterState currentState) throws Exception { - Metadata metadata = currentState.metadata(); - Metadata.Builder mdBuilder = Metadata.builder(metadata); - logger.info("decommission request for attribute [{}] failed", decommissionAttribute.toString()); - DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( - decommissionAttribute, - DecommissionStatus.DECOMMISSION_FAILED - ); - mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); - return ClusterState.builder(currentState).metadata(mdBuilder).build(); - } + clusterService.submitStateUpdateTask("decommission_failed", new ClusterStateUpdateTask(Priority.URGENT) { + @Override + public ClusterState execute(ClusterState currentState) throws Exception { + Metadata metadata = currentState.metadata(); + Metadata.Builder mdBuilder = Metadata.builder(metadata); + logger.info("decommission request for attribute [{}] failed", decommissionAttribute.toString()); + DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttribute, + DecommissionStatus.DECOMMISSION_FAILED + ); + mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); + return ClusterState.builder(currentState).metadata(mdBuilder).build(); + } - @Override - public void onFailure(String source, Exception e) { - logger.error(() -> new ParameterizedMessage( + @Override + public void onFailure(String source, Exception e) { + logger.error( + () -> new ParameterizedMessage( "failed to mark status as DECOMMISSION_FAILED for decommission attribute [{}]", - decommissionAttribute.toString()), e); -// listener.onFailure(e); - } + decommissionAttribute.toString() + ), + e + ); + // listener.onFailure(e); } - ); + }); } listener.onFailure(e); } @Override public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + assert !newState.equals(oldState) : "no update in cluster state after initiating decommission request."; if (!newState.equals(oldState)) { // TODO - drain the nodes before decommissioning failDecommissionedNodes(newState); @@ -283,19 +303,26 @@ private static void ensureNoAwarenessAttributeDecommissioned( DecommissionAttribute decommissionAttribute ) { // If the previous decommission request failed, we will allow the request to pass this check - if (decommissionAttributeMetadata != null && !decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_FAILED)) { - throw new DecommissionFailedException(decommissionAttribute, "one awareness attribute already decommissioned, " + - "recommission before triggering another decommission"); + if (decommissionAttributeMetadata != null + && !decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_FAILED)) { + throw new DecommissionFailedException( + decommissionAttribute, + "one awareness attribute already decommissioned, " + "recommission before triggering another decommission" + ); } } private void failDecommissionedNodes(ClusterState state) { DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().custom(DecommissionAttributeMetadata.TYPE); // TODO update the status check to DECOMMISSIONING once graceful decommission is implemented - assert decommissionAttributeMetadata.status().equals(DecommissionStatus.INIT) : "unexpected status encountered while decommissioning nodes"; + assert decommissionAttributeMetadata.status().equals(DecommissionStatus.INIT) + : "unexpected status encountered while decommissioning nodes"; DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); List nodesToBeDecommissioned = new ArrayList<>(); - final Predicate shouldRemoveNodePredicate = discoveryNode -> nodeHasDecommissionedAttribute(discoveryNode, decommissionAttribute); + final Predicate shouldRemoveNodePredicate = discoveryNode -> nodeHasDecommissionedAttribute( + discoveryNode, + decommissionAttribute + ); Iterator nodesIter = state.nodes().getNodes().valuesIt(); while (nodesIter.hasNext()) { final DiscoveryNode node = nodesIter.next(); @@ -309,9 +336,7 @@ private void failDecommissionedNodes(ClusterState state) { } private static boolean nodeHasDecommissionedAttribute(DiscoveryNode discoveryNode, DecommissionAttribute decommissionAttribute) { - return discoveryNode.getAttributes().get( - decommissionAttribute.attributeName() - ).equals(decommissionAttribute.attributeValue()); + return discoveryNode.getAttributes().get(decommissionAttribute.attributeName()).equals(decommissionAttribute.attributeValue()); } @Override diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java index 8f4ca3a6f578a..d091b8ab44e30 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java @@ -8,87 +8,73 @@ package org.opensearch.cluster.decommission; +/** + * An enumeration of the states during decommissioning and recommissioning. + */ public enum DecommissionStatus { /** * Decommission process is initiated */ - INIT((byte) 0), + INIT("init"), /** * Decommission process has started, decommissioned nodes should be weighed away */ - DECOMMISSIONING((byte) 1), + DECOMMISSION_IN_PROGRESS("decommission_in_progress"), /** * Decommissioning awareness attribute completed */ - DECOMMISSIONED((byte) 2), + DECOMMISSION_SUCCESSFUL("decommission_successful"), /** * Decommission request failed */ - DECOMMISSION_FAILED((byte) 3), + DECOMMISSION_FAILED("decommission_failed"), /** * Recommission request received, recommissioning process has started */ - RECOMMISSIONING((byte) 4), + RECOMMISSION_IN_PROGRESS("recommission_in_progress"), /** * Recommission request failed. No nodes should fail to join the cluster with decommission exception */ - RECOMMISSION_FAILED((byte) 5); + RECOMMISSION_FAILED("recommission_failed"); - private final byte value; + private final String status; - DecommissionStatus(byte value) { - this.value = value; + DecommissionStatus(String status) { + this.status = status; } /** - * Returns code that represents the decommission state + * Returns status that represents the decommission state * - * @return code for the state + * @return status */ - public byte value() { - return value; + public String status() { + return status; } /** - * Generate decommission state from code + * Generate decommission status from given string * - * @param value the state code - * @return state + * @param status status in string + * @return status */ - public static DecommissionStatus fromValue(byte value) { - switch (value) { - case 0: - return INIT; - case 1: - return DECOMMISSIONING; - case 2: - return DECOMMISSIONED; - case 3: - return DECOMMISSION_FAILED; - case 4: - return RECOMMISSIONING; - case 5: - return RECOMMISSION_FAILED; - default: - throw new IllegalArgumentException("No decommission state for value [" + value + "]"); - } - } - public static DecommissionStatus fromString(String status) { - if ("init".equals(status)) { + if (status == null) { + throw new IllegalArgumentException("decommission status cannot be null"); + } + if (status.equals(INIT.status())) { return INIT; - } else if ("decommissioning".equals(status)) { - return DECOMMISSIONING; - } else if ("decommissioned".equals(status)) { - return DECOMMISSIONED; - } else if ("decommission_failed".equals(status)) { + } else if (status.equals(DECOMMISSION_IN_PROGRESS.status())) { + return DECOMMISSION_IN_PROGRESS; + } else if (status.equals(DECOMMISSION_SUCCESSFUL.status())) { + return DECOMMISSION_SUCCESSFUL; + } else if (status.equals(DECOMMISSION_FAILED.status())) { return DECOMMISSION_FAILED; - } else if ("recommissioning".equals(status)) { - return RECOMMISSIONING; - } else if ("recommission_failed".equals(status)) { + } else if (status.equals(RECOMMISSION_IN_PROGRESS.status())) { + return RECOMMISSION_IN_PROGRESS; + } else if (status.equals(RECOMMISSION_FAILED.status())) { return RECOMMISSION_FAILED; } - throw new IllegalStateException("No status match for [" + status + "]"); + throw new IllegalStateException("Decommission status [" + status + "] not recognized."); } } - diff --git a/server/src/main/java/org/opensearch/cluster/decommission/NodeDecommissionedException.java b/server/src/main/java/org/opensearch/cluster/decommission/NodeDecommissionedException.java index d4ca4679a0872..847d5a527b017 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/NodeDecommissionedException.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/NodeDecommissionedException.java @@ -19,7 +19,6 @@ * * @opensearch.internal */ - public class NodeDecommissionedException extends OpenSearchException { public NodeDecommissionedException(String msg, Object... args) { diff --git a/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java b/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java index 869576f0ea070..258f2a4e1b2a8 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java @@ -57,8 +57,7 @@ public DecommissionAttributeMetadata(DecommissionAttribute decommissionAttribute * @param decommissionAttribute attribute details */ public DecommissionAttributeMetadata(DecommissionAttribute decommissionAttribute) { - this.decommissionAttribute = decommissionAttribute; - this.status = DecommissionStatus.INIT; + this(decommissionAttribute, DecommissionStatus.INIT); } /** @@ -79,13 +78,8 @@ public DecommissionStatus status() { return this.status; } - public DecommissionAttributeMetadata withUpdatedStatus( - DecommissionAttributeMetadata metadata, - DecommissionStatus status) { - return new DecommissionAttributeMetadata( - metadata.decommissionAttribute(), - status - ); + public DecommissionAttributeMetadata withUpdatedStatus(DecommissionAttributeMetadata metadata, DecommissionStatus status) { + return new DecommissionAttributeMetadata(metadata.decommissionAttribute(), status); } /** @@ -95,10 +89,7 @@ public DecommissionAttributeMetadata withUpdatedStatus( * @param attributeValue new attribute value * @return new instance with updated attribute value and status as DecommissionStatus.INIT */ - public DecommissionAttributeMetadata withUpdatedAttributeValue( - DecommissionAttributeMetadata metadata, - String attributeValue - ) { + public DecommissionAttributeMetadata withUpdatedAttributeValue(DecommissionAttributeMetadata metadata, String attributeValue) { return new DecommissionAttributeMetadata( new DecommissionAttribute(metadata.decommissionAttribute, attributeValue), DecommissionStatus.INIT @@ -116,21 +107,6 @@ public boolean equals(Object o) { return decommissionAttribute.equals(that.decommissionAttribute); } - /** - * Checks if this instance and the given instance share the same decommissioned attributeName - * and only differ in the attributeValue {@link DecommissionAttribute#attributeValue()} - * - * @param other other decommission attribute metadata - * @return {@code true} iff both instances contain the same attributeName - */ - public boolean equalsIgnoreValue(@Nullable DecommissionAttributeMetadata other) { - if (other == null) { - return false; - } - if (!status.equals(other.status)) return false; - return decommissionAttribute.equalsIgnoreValues(other.decommissionAttribute); - } - @Override public int hashCode() { return Objects.hash(attributeType, decommissionAttribute, status); @@ -146,12 +122,12 @@ public String getWriteableName() { @Override public Version getMinimalSupportedVersion() { - return Version.CURRENT.minimumCompatibilityVersion(); + return Version.V_2_3_0; } public DecommissionAttributeMetadata(StreamInput in) throws IOException { this.decommissionAttribute = new DecommissionAttribute(in); - this.status = DecommissionStatus.fromValue(in.readByte()); + this.status = DecommissionStatus.fromString(in.readString()); } public static NamedDiff readDiffFrom(StreamInput in) throws IOException { @@ -164,7 +140,7 @@ public static NamedDiff readDiffFrom(StreamInput in) throws IOException @Override public void writeTo(StreamOutput out) throws IOException { decommissionAttribute.writeTo(out); - out.writeByte(status.value()); + out.writeString(status.status()); } public static DecommissionAttributeMetadata fromXContent(XContentParser parser) throws IOException { @@ -176,7 +152,10 @@ public static DecommissionAttributeMetadata fromXContent(XContentParser parser) String currentFieldName = parser.currentName(); if (attributeType.equals(currentFieldName)) { if (parser.nextToken() != XContentParser.Token.START_OBJECT) { - throw new OpenSearchParseException("failed to parse decommission attribute type [{}], expected object", attributeType); + throw new OpenSearchParseException( + "failed to parse decommission attribute type [{}], expected object", + attributeType + ); } token = parser.nextToken(); if (token != XContentParser.Token.END_OBJECT) { @@ -187,7 +166,10 @@ public static DecommissionAttributeMetadata fromXContent(XContentParser parser) if (token == XContentParser.Token.VALUE_STRING) { value = parser.text(); } else { - throw new OpenSearchParseException("failed to parse attribute [{}], expected string for attribute value", fieldName); + throw new OpenSearchParseException( + "failed to parse attribute [{}], expected string for attribute value", + fieldName + ); } decommissionAttribute = new DecommissionAttribute(fieldName, value); } else { @@ -198,7 +180,9 @@ public static DecommissionAttributeMetadata fromXContent(XContentParser parser) } } else if ("status".equals(currentFieldName)) { if (parser.nextToken() != XContentParser.Token.VALUE_STRING) { - throw new OpenSearchParseException("failed to parse status of decommissioning, expected string but found unknown type"); + throw new OpenSearchParseException( + "failed to parse status of decommissioning, expected string but found unknown type" + ); } status = DecommissionStatus.fromString(parser.text()); } else { @@ -243,7 +227,7 @@ public static void toXContent( builder.startObject(attributeType); builder.field(decommissionAttribute.attributeName(), decommissionAttribute.attributeValue()); builder.endObject(); - builder.field("status", status); + builder.field("status", status.status()); } @Override From 52a8e6b510e09063d01ff037a6c369c7e7dbc555 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 23 Aug 2022 15:24:43 +0530 Subject: [PATCH 10/87] Fixes Signed-off-by: Rishab Nahata --- .../decommission/DecommissionAttribute.java | 20 ------------------- .../decommission/DecommissionService.java | 7 ++++--- .../DecommissionAttributeMetadata.java | 14 ------------- 3 files changed, 4 insertions(+), 37 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java index 1eb6b488447a1..1a30b59d5a60e 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java @@ -25,16 +25,6 @@ public final class DecommissionAttribute implements Writeable { private final String attributeName; private final String attributeValue; - /** - * Construct new decommission attribute with updated value from a given decommission attribute - * - * @param decommissionAttribute current decommissioned attribute object - * @param attributeValue attribute value to be updated with - */ - public DecommissionAttribute(DecommissionAttribute decommissionAttribute, String attributeValue) { - this(decommissionAttribute.attributeName, attributeValue); - } - /** * Constructs new decommission attribute name value pair * @@ -80,16 +70,6 @@ public void writeTo(StreamOutput out) throws IOException { out.writeString(attributeValue); } - /** - * Checks if this instance is equal to the other instance in attributeName but differ in attribute value {@link #attributeValue}. - * - * @param other other decommission attribute name value - * @return {@code true} if both instances equal in attributeName fields but the attributeValue field - */ - public boolean equalsIgnoreValues(DecommissionAttribute other) { - return attributeName.equals(other.attributeName); - } - @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index ee5e657a500ce..53813ead9fcad 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -103,6 +103,7 @@ public void initiateAttributeDecommissioning( * 4. Once zone is weighed away -> trigger zone decommission using executor -> status should be set to DECOMMISSIONED on successful response * 5. Clear voting config */ + validateAwarenessAttribute(decommissionAttribute, getAwarenessAttributes()); this.clusterState = state; logger.info("initiating awareness attribute [{}] decommissioning", decommissionAttribute.toString()); excludeDecommissionedClusterManagerNodesFromVotingConfig(decommissionAttribute, listener); @@ -112,7 +113,7 @@ private void excludeDecommissionedClusterManagerNodesFromVotingConfig( DecommissionAttribute decommissionAttribute, final ActionListener listener ) { - final Predicate shouldAbdicatePredicate = discoveryNode -> nodeHasDecommissionedAttribute( + final Predicate shouldDecommissionPredicate = discoveryNode -> nodeHasDecommissionedAttribute( discoveryNode, decommissionAttribute ); @@ -120,7 +121,7 @@ private void excludeDecommissionedClusterManagerNodesFromVotingConfig( Iterator clusterManagerNodesIter = clusterState.nodes().getClusterManagerNodes().valuesIt(); while (clusterManagerNodesIter.hasNext()) { final DiscoveryNode node = clusterManagerNodesIter.next(); - if (shouldAbdicatePredicate.test(node)) { + if (shouldDecommissionPredicate.test(node)) { clusterManagerNodesToBeDecommissioned.add(node.getName()); } } @@ -210,7 +211,7 @@ private void registerDecommissionAttribute( assert transportService.getLocalNode().isClusterManagerNode() && !nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute) : "cannot register decommission attribute, as local node is not master or is going to be decommissioned"; - validateAwarenessAttribute(decommissionAttribute, getAwarenessAttributes()); + clusterService.submitStateUpdateTask( "put_decommission [" + decommissionAttribute + "]", new ClusterStateUpdateTask(Priority.URGENT) { diff --git a/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java b/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java index 258f2a4e1b2a8..6cba47d229534 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java @@ -82,20 +82,6 @@ public DecommissionAttributeMetadata withUpdatedStatus(DecommissionAttributeMeta return new DecommissionAttributeMetadata(metadata.decommissionAttribute(), status); } - /** - * Creates a new instance with a updated attribute value. - * - * @param metadata current metadata - * @param attributeValue new attribute value - * @return new instance with updated attribute value and status as DecommissionStatus.INIT - */ - public DecommissionAttributeMetadata withUpdatedAttributeValue(DecommissionAttributeMetadata metadata, String attributeValue) { - return new DecommissionAttributeMetadata( - new DecommissionAttribute(metadata.decommissionAttribute, attributeValue), - DecommissionStatus.INIT - ); - } - @Override public boolean equals(Object o) { if (this == o) return true; From 1561024a33da75764952d8cc51f47a0dd897599f Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 23 Aug 2022 16:46:33 +0530 Subject: [PATCH 11/87] Fixes Signed-off-by: Rishab Nahata --- .../decommission/DecommissionService.java | 55 ++++++++++++++++--- .../decommission/DecommissionStatus.java | 6 +- .../DecommissionAttributeMetadata.java | 14 +++-- 3 files changed, 58 insertions(+), 17 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 53813ead9fcad..fe67723187eab 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -24,7 +24,6 @@ import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; -import org.opensearch.cluster.coordination.NodeRemovalClusterStateTaskExecutor; import org.opensearch.cluster.metadata.DecommissionAttributeMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; @@ -50,6 +49,17 @@ /** * Service responsible for entire lifecycle of decommissioning and recommissioning an awareness attribute. * + * Whenever a cluster manager initiates operation to decommission an awareness attribute, + * the service makes the best attempt to perform the following task - + *

+ * 1. Remove cluster-manager eligible nodes from voting config [TODO - checks to avoid quorum loss scenarios] + * 2. Initiates nodes decommissioning by adding custom metadata with the attribute and state as {@link DecommissionStatus#DECOMMISSION_INIT} + * 3. Triggers weigh away for nodes having given awareness attribute to drain. This marks the decommission status as {@link DecommissionStatus#DECOMMISSION_IN_PROGRESS} + * 4. Once weighed away, the service triggers nodes decommission + * 5. Once the decommission is successful, the service clears the voting config and marks the status as {@link DecommissionStatus#DECOMMISSION_SUCCESSFUL} + * 6. If service fails at any step, it would mark the status as {@link DecommissionStatus#DECOMMISSION_FAILED} + *

+ * * @opensearch.internal */ public class DecommissionService implements ClusterStateApplier { @@ -96,13 +106,6 @@ public void initiateAttributeDecommissioning( final ActionListener listener, ClusterState state ) { - /* - * 1. Abdicate master - * 2. Register attribute -> status should be set to INIT - * 3. Trigger weigh away for graceful decommission -> status should be set to DECOMMISSIONING - * 4. Once zone is weighed away -> trigger zone decommission using executor -> status should be set to DECOMMISSIONED on successful response - * 5. Clear voting config - */ validateAwarenessAttribute(decommissionAttribute, getAwarenessAttributes()); this.clusterState = state; logger.info("initiating awareness attribute [{}] decommissioning", decommissionAttribute.toString()); @@ -292,6 +295,40 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS ); } + // To Do - Can we add a consumer here such that whenever this succeeds we call the next method in on cluster state processed + private void updateMetadataWithDecommissionStatus( + DecommissionStatus decommissionStatus + ) { + clusterService.submitStateUpdateTask( + decommissionStatus.status(), + new ClusterStateUpdateTask(Priority.URGENT) { + @Override + public ClusterState execute(ClusterState currentState) throws Exception { + Metadata metadata = currentState.metadata(); + DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); + assert decommissionAttributeMetadata != null + && decommissionAttributeMetadata.decommissionAttribute() != null + : "failed to update status for decommission. metadata doesn't exist or invalid"; + Metadata.Builder mdBuilder = Metadata.builder(metadata); + DecommissionAttributeMetadata newMetadata = decommissionAttributeMetadata.withUpdatedStatus(decommissionStatus); + mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, newMetadata); + return ClusterState.builder(currentState).metadata(mdBuilder).build(); + } + + @Override + public void onFailure(String source, Exception e) { + logger.error( + () -> new ParameterizedMessage( + "failed to mark status as [{}]", + decommissionStatus.status() + ), + e + ); + } + } + ); + } + private static void validateAwarenessAttribute(final DecommissionAttribute decommissionAttribute, List awarenessAttributes) { if (!awarenessAttributes.contains(decommissionAttribute.attributeName())) { throw new DecommissionFailedException(decommissionAttribute, "invalid awareness attribute requested for decommissioning"); @@ -316,7 +353,7 @@ private static void ensureNoAwarenessAttributeDecommissioned( private void failDecommissionedNodes(ClusterState state) { DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().custom(DecommissionAttributeMetadata.TYPE); // TODO update the status check to DECOMMISSIONING once graceful decommission is implemented - assert decommissionAttributeMetadata.status().equals(DecommissionStatus.INIT) + assert decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_INIT) : "unexpected status encountered while decommissioning nodes"; DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); List nodesToBeDecommissioned = new ArrayList<>(); diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java index d091b8ab44e30..41f9acfbc35d7 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java @@ -15,7 +15,7 @@ public enum DecommissionStatus { /** * Decommission process is initiated */ - INIT("init"), + DECOMMISSION_INIT("decommission_init"), /** * Decommission process has started, decommissioned nodes should be weighed away */ @@ -62,8 +62,8 @@ public static DecommissionStatus fromString(String status) { if (status == null) { throw new IllegalArgumentException("decommission status cannot be null"); } - if (status.equals(INIT.status())) { - return INIT; + if (status.equals(DECOMMISSION_INIT.status())) { + return DECOMMISSION_INIT; } else if (status.equals(DECOMMISSION_IN_PROGRESS.status())) { return DECOMMISSION_IN_PROGRESS; } else if (status.equals(DECOMMISSION_SUCCESSFUL.status())) { diff --git a/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java b/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java index 6cba47d229534..c6252e9981810 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java @@ -15,7 +15,6 @@ import org.opensearch.cluster.decommission.DecommissionAttribute; import org.opensearch.cluster.decommission.DecommissionStatus; import org.opensearch.cluster.metadata.Metadata.Custom; -import org.opensearch.common.Nullable; import org.opensearch.common.Strings; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; @@ -52,12 +51,12 @@ public DecommissionAttributeMetadata(DecommissionAttribute decommissionAttribute } /** - * Constructs new decommission attribute metadata with status as {@link DecommissionStatus#INIT} + * Constructs new decommission attribute metadata with status as {@link DecommissionStatus#DECOMMISSION_INIT} * * @param decommissionAttribute attribute details */ public DecommissionAttributeMetadata(DecommissionAttribute decommissionAttribute) { - this(decommissionAttribute, DecommissionStatus.INIT); + this(decommissionAttribute, DecommissionStatus.DECOMMISSION_INIT); } /** @@ -78,8 +77,13 @@ public DecommissionStatus status() { return this.status; } - public DecommissionAttributeMetadata withUpdatedStatus(DecommissionAttributeMetadata metadata, DecommissionStatus status) { - return new DecommissionAttributeMetadata(metadata.decommissionAttribute(), status); + /** + * Creates a new instance that has the given decommission attribute moved to the given @{@link DecommissionStatus} + * @param status status to be updated with + * @return new instance with updated status + */ + public DecommissionAttributeMetadata withUpdatedStatus(DecommissionStatus status) { + return new DecommissionAttributeMetadata(decommissionAttribute(), status); } @Override From 53057ac1549c82d9a9980e9f7277b371917916f3 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 23 Aug 2022 18:07:09 +0530 Subject: [PATCH 12/87] Some refactpring Signed-off-by: Rishab Nahata --- .../decommission/DecommissionService.java | 76 ++++++++----------- 1 file changed, 31 insertions(+), 45 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index fe67723187eab..912fcda7baa62 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -21,6 +21,7 @@ import org.opensearch.cluster.ClusterChangedEvent; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateApplier; +import org.opensearch.cluster.ClusterStateTaskListener; import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; @@ -210,7 +211,6 @@ private void registerDecommissionAttribute( final DecommissionAttribute decommissionAttribute, final ActionListener listener ) { - // check the local node is master and not in decommission attribute assert transportService.getLocalNode().isClusterManagerNode() && !nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute) : "cannot register decommission attribute, as local node is not master or is going to be decommissioned"; @@ -219,8 +219,12 @@ private void registerDecommissionAttribute( "put_decommission [" + decommissionAttribute + "]", new ClusterStateUpdateTask(Priority.URGENT) { @Override - public ClusterState execute(ClusterState currentState) throws Exception { - logger.info("decommission request for attribute [{}] received", decommissionAttribute.toString()); + public ClusterState execute(ClusterState currentState) { + logger.info( + "registering decommission metadata for attribute [{}] with status as [{}]", + decommissionAttribute.toString(), + DecommissionStatus.DECOMMISSION_INIT + ); Metadata metadata = currentState.metadata(); Metadata.Builder mdBuilder = Metadata.builder(metadata); DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); @@ -232,69 +236,51 @@ public ClusterState execute(ClusterState currentState) throws Exception { @Override public void onFailure(String source, Exception e) { - // TODO - should we put the weights back to zone, since we weighed away the zone before we started registering the - // metadata - // TODO - should we modify logic of logging for ease of debugging? if (e instanceof DecommissionFailedException) { logger.error( - () -> new ParameterizedMessage("failed to decommission attribute [{}]", decommissionAttribute.toString()), + () -> new ParameterizedMessage( + "failed to decommission attribute [{}]", + decommissionAttribute.toString()), e ); + listener.onFailure(e); } else if (e instanceof NotClusterManagerException) { - logger.info( + logger.debug( () -> new ParameterizedMessage( "cluster-manager updated while executing request for decommission attribute [{}]", decommissionAttribute.toString() ), e ); + // Do we need a listener here as the transport request will be retried? } else { - // could be due to on longer cluster manager - clusterService.submitStateUpdateTask("decommission_failed", new ClusterStateUpdateTask(Priority.URGENT) { - @Override - public ClusterState execute(ClusterState currentState) throws Exception { - Metadata metadata = currentState.metadata(); - Metadata.Builder mdBuilder = Metadata.builder(metadata); - logger.info("decommission request for attribute [{}] failed", decommissionAttribute.toString()); - DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( - decommissionAttribute, - DecommissionStatus.DECOMMISSION_FAILED - ); - mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); - return ClusterState.builder(currentState).metadata(mdBuilder).build(); - } - - @Override - public void onFailure(String source, Exception e) { - logger.error( - () -> new ParameterizedMessage( - "failed to mark status as DECOMMISSION_FAILED for decommission attribute [{}]", - decommissionAttribute.toString() - ), - e - ); - // listener.onFailure(e); - } - }); + logger.error( + () -> new ParameterizedMessage( + "failed to initiate decommissioning for attribute [{}]", + decommissionAttribute.toString() + ), + e + ); + updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_FAILED); + listener.onFailure(e); } - listener.onFailure(e); } @Override public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { assert !newState.equals(oldState) : "no update in cluster state after initiating decommission request."; - if (!newState.equals(oldState)) { - // TODO - drain the nodes before decommissioning - failDecommissionedNodes(newState); - listener.onResponse(new ClusterStateUpdateResponse(true)); - } else { - listener.onResponse(new ClusterStateUpdateResponse(false)); - } + // Do we attach a listener here with failed acknowledgement to the request? + listener.onResponse(new ClusterStateUpdateResponse(true)); + initiateGracefulDecommission(newState, listener); } } ); } + private void initiateGracefulDecommission(ClusterState clusterState, ActionListener listener) { + failDecommissionedNodes(clusterState, listener); + } + // To Do - Can we add a consumer here such that whenever this succeeds we call the next method in on cluster state processed private void updateMetadataWithDecommissionStatus( DecommissionStatus decommissionStatus @@ -345,12 +331,12 @@ private static void ensureNoAwarenessAttributeDecommissioned( && !decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_FAILED)) { throw new DecommissionFailedException( decommissionAttribute, - "one awareness attribute already decommissioned, " + "recommission before triggering another decommission" + "one awareness attribute already decommissioned, recommission before triggering another decommission" ); } } - private void failDecommissionedNodes(ClusterState state) { + private void failDecommissionedNodes(ClusterState state, ActionListener listener) { DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().custom(DecommissionAttributeMetadata.TYPE); // TODO update the status check to DECOMMISSIONING once graceful decommission is implemented assert decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_INIT) From 5c2b91c11f893a63c52b9287d484b89a4229a417 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 23 Aug 2022 19:35:36 +0530 Subject: [PATCH 13/87] Updates Signed-off-by: Rishab Nahata --- .../decommission/DecommissionService.java | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 912fcda7baa62..53e73f98e8590 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -21,7 +21,6 @@ import org.opensearch.cluster.ClusterChangedEvent; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateApplier; -import org.opensearch.cluster.ClusterStateTaskListener; import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; @@ -49,7 +48,7 @@ /** * Service responsible for entire lifecycle of decommissioning and recommissioning an awareness attribute. - * + *

* Whenever a cluster manager initiates operation to decommission an awareness attribute, * the service makes the best attempt to perform the following task - *

@@ -271,14 +270,14 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS assert !newState.equals(oldState) : "no update in cluster state after initiating decommission request."; // Do we attach a listener here with failed acknowledgement to the request? listener.onResponse(new ClusterStateUpdateResponse(true)); - initiateGracefulDecommission(newState, listener); + initiateGracefulDecommission(newState); } } ); } - private void initiateGracefulDecommission(ClusterState clusterState, ActionListener listener) { - failDecommissionedNodes(clusterState, listener); + private void initiateGracefulDecommission(ClusterState clusterState) { + failDecommissionedNodes(clusterState); } // To Do - Can we add a consumer here such that whenever this succeeds we call the next method in on cluster state processed @@ -295,6 +294,7 @@ public ClusterState execute(ClusterState currentState) throws Exception { assert decommissionAttributeMetadata != null && decommissionAttributeMetadata.decommissionAttribute() != null : "failed to update status for decommission. metadata doesn't exist or invalid"; + assert assertIncrementalStatusOrFailed(decommissionAttributeMetadata.status(), decommissionStatus); Metadata.Builder mdBuilder = Metadata.builder(metadata); DecommissionAttributeMetadata newMetadata = decommissionAttributeMetadata.withUpdatedStatus(decommissionStatus); mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, newMetadata); @@ -336,7 +336,7 @@ private static void ensureNoAwarenessAttributeDecommissioned( } } - private void failDecommissionedNodes(ClusterState state, ActionListener listener) { + private void failDecommissionedNodes(ClusterState state) { DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().custom(DecommissionAttributeMetadata.TYPE); // TODO update the status check to DECOMMISSIONING once graceful decommission is implemented assert decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_INIT) @@ -359,6 +359,16 @@ private void failDecommissionedNodes(ClusterState state, ActionListener Date: Wed, 24 Aug 2022 00:50:27 +0530 Subject: [PATCH 14/87] Fix to abdication Signed-off-by: Rishab Nahata --- .../decommission/DecommissionService.java | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 53e73f98e8590..246012d9339bf 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -109,12 +109,12 @@ public void initiateAttributeDecommissioning( validateAwarenessAttribute(decommissionAttribute, getAwarenessAttributes()); this.clusterState = state; logger.info("initiating awareness attribute [{}] decommissioning", decommissionAttribute.toString()); - excludeDecommissionedClusterManagerNodesFromVotingConfig(decommissionAttribute, listener); + excludeDecommissionedClusterManagerNodesFromVotingConfig(decommissionAttribute); + registerDecommissionAttribute(decommissionAttribute, listener); } private void excludeDecommissionedClusterManagerNodesFromVotingConfig( - DecommissionAttribute decommissionAttribute, - final ActionListener listener + DecommissionAttribute decommissionAttribute ) { final Predicate shouldDecommissionPredicate = discoveryNode -> nodeHasDecommissionedAttribute( discoveryNode, @@ -140,7 +140,6 @@ public void handleResponse(AddVotingConfigExclusionsResponse response) { + "proceeding to drain the decommissioned nodes", clusterManagerNodesToBeDecommissioned.toString() ); - registerDecommissionAttribute(decommissionAttribute, listener); } @Override @@ -210,10 +209,14 @@ private void registerDecommissionAttribute( final DecommissionAttribute decommissionAttribute, final ActionListener listener ) { - assert transportService.getLocalNode().isClusterManagerNode() - && !nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute) - : "cannot register decommission attribute, as local node is not master or is going to be decommissioned"; - + logger.info("Node is - " + transportService.getLocalNode()); + if (!transportService.getLocalNode().isClusterManagerNode() + || nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute)) + { + throw new NotClusterManagerException( + "Node [" + transportService.getLocalNode() + "] not eligible to execute decommission request" + ); + } clusterService.submitStateUpdateTask( "put_decommission [" + decommissionAttribute + "]", new ClusterStateUpdateTask(Priority.URGENT) { From ac9c3727b409f3589ade80303d2b541d28dbc149 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Wed, 24 Aug 2022 01:17:37 +0530 Subject: [PATCH 15/87] Remove cluster state variable from service Signed-off-by: Rishab Nahata --- .../cluster/decommission/DecommissionService.java | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 246012d9339bf..b3fcd9c04a15c 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -62,7 +62,7 @@ * * @opensearch.internal */ -public class DecommissionService implements ClusterStateApplier { +public class DecommissionService { private static final Logger logger = LogManager.getLogger(DecommissionService.class); @@ -70,7 +70,6 @@ public class DecommissionService implements ClusterStateApplier { private final TransportService transportService; private final ThreadPool threadPool; private final DecommissionHelper decommissionHelper; - private ClusterState clusterState; private volatile List awarenessAttributes; @Inject @@ -107,7 +106,6 @@ public void initiateAttributeDecommissioning( ClusterState state ) { validateAwarenessAttribute(decommissionAttribute, getAwarenessAttributes()); - this.clusterState = state; logger.info("initiating awareness attribute [{}] decommissioning", decommissionAttribute.toString()); excludeDecommissionedClusterManagerNodesFromVotingConfig(decommissionAttribute); registerDecommissionAttribute(decommissionAttribute, listener); @@ -121,7 +119,7 @@ private void excludeDecommissionedClusterManagerNodesFromVotingConfig( decommissionAttribute ); List clusterManagerNodesToBeDecommissioned = new ArrayList<>(); - Iterator clusterManagerNodesIter = clusterState.nodes().getClusterManagerNodes().valuesIt(); + Iterator clusterManagerNodesIter = clusterService.state().nodes().getClusterManagerNodes().valuesIt(); while (clusterManagerNodesIter.hasNext()) { final DiscoveryNode node = clusterManagerNodesIter.next(); if (shouldDecommissionPredicate.test(node)) { @@ -209,7 +207,6 @@ private void registerDecommissionAttribute( final DecommissionAttribute decommissionAttribute, final ActionListener listener ) { - logger.info("Node is - " + transportService.getLocalNode()); if (!transportService.getLocalNode().isClusterManagerNode() || nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute)) { @@ -375,9 +372,4 @@ else if (newStatus.equals(DecommissionStatus.DECOMMISSION_SUCCESSFUL)) { private static boolean nodeHasDecommissionedAttribute(DiscoveryNode discoveryNode, DecommissionAttribute decommissionAttribute) { return discoveryNode.getAttributes().get(decommissionAttribute.attributeName()).equals(decommissionAttribute.attributeValue()); } - - @Override - public void applyClusterState(ClusterChangedEvent event) { - clusterState = event.state(); - } } From 825092ae82e0f9945f461c05c442ea81d0046d1d Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Wed, 24 Aug 2022 01:26:01 +0530 Subject: [PATCH 16/87] Log node string Signed-off-by: Rishab Nahata --- .../org/opensearch/cluster/coordination/JoinTaskExecutor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index c309d2a3e06cc..997ddc3da9ea6 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -482,7 +482,7 @@ public static void ensureNodeNotDecommissioned(DiscoveryNode node, Metadata meta if (decommissionAttribute != null) { if (node.getAttributes().get(decommissionAttribute.attributeName()).equals(decommissionAttribute.attributeValue())) { throw new NodeDecommissionedException( - "node [{}] has decommissioned attribute [{}].", node.getId(), decommissionAttribute.toString() + "node [{}] has decommissioned attribute [{}].", node.toString(), decommissionAttribute.toString() ); } } From 6a3157c64b583f404fb8eefc42f3e1fd5edacab9 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Wed, 24 Aug 2022 12:11:32 +0530 Subject: [PATCH 17/87] Fix conflict Signed-off-by: Rishab Nahata --- .../decommission/DecommissionService.java | 100 +++++++++++++++--- 1 file changed, 85 insertions(+), 15 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index b3fcd9c04a15c..87081d2ee38b1 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -11,6 +11,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.OpenSearchException; import org.opensearch.action.ActionListener; import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsAction; import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsRequest; @@ -21,6 +22,7 @@ import org.opensearch.cluster.ClusterChangedEvent; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateApplier; +import org.opensearch.cluster.ClusterStateObserver; import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; @@ -35,6 +37,7 @@ import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportException; import org.opensearch.transport.TransportResponseHandler; @@ -211,7 +214,7 @@ private void registerDecommissionAttribute( || nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute)) { throw new NotClusterManagerException( - "Node [" + transportService.getLocalNode() + "] not eligible to execute decommission request" + "node [" + transportService.getLocalNode().toString() + "] not eligible to execute decommission request" ); } clusterService.submitStateUpdateTask( @@ -251,7 +254,6 @@ public void onFailure(String source, Exception e) { ), e ); - // Do we need a listener here as the transport request will be retried? } else { logger.error( () -> new ParameterizedMessage( @@ -260,7 +262,7 @@ public void onFailure(String source, Exception e) { ), e ); - updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_FAILED); +// updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_FAILED); listener.onFailure(e); } } @@ -270,19 +272,32 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS assert !newState.equals(oldState) : "no update in cluster state after initiating decommission request."; // Do we attach a listener here with failed acknowledgement to the request? listener.onResponse(new ClusterStateUpdateResponse(true)); - initiateGracefulDecommission(newState); + initiateGracefulDecommission(); } } ); } - private void initiateGracefulDecommission(ClusterState clusterState) { - failDecommissionedNodes(clusterState); + private void initiateGracefulDecommission() { + ActionListener listener = new ActionListener() { + @Override + public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { + failDecommissionedNodes(clusterService.state()); + } + + @Override + public void onFailure(Exception e) { + + } + }; + updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_IN_PROGRESS, listener); + //TODO - code for graceful decommission } // To Do - Can we add a consumer here such that whenever this succeeds we call the next method in on cluster state processed private void updateMetadataWithDecommissionStatus( - DecommissionStatus decommissionStatus + DecommissionStatus decommissionStatus, + ActionListener listener ) { clusterService.submitStateUpdateTask( decommissionStatus.status(), @@ -311,6 +326,13 @@ public void onFailure(String source, Exception e) { e ); } + + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + listener.onResponse(new ClusterStateUpdateResponse(true)); + } + + } ); } @@ -338,25 +360,73 @@ private static void ensureNoAwarenessAttributeDecommissioned( private void failDecommissionedNodes(ClusterState state) { DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().custom(DecommissionAttributeMetadata.TYPE); - // TODO update the status check to DECOMMISSIONING once graceful decommission is implemented - assert decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_INIT) + assert decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_IN_PROGRESS) : "unexpected status encountered while decommissioning nodes"; DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); - List nodesToBeDecommissioned = new ArrayList<>(); + + decommissionHelper.handleNodesDecommissionRequest( + nodesWithDecommissionAttribute(state, decommissionAttribute), + "nodes-decommissioned" + ); + + final ClusterStateObserver observer = new ClusterStateObserver( + clusterService, + TimeValue.timeValueSeconds(30L), + logger, + threadPool.getThreadContext() + ); + + final Predicate allDecommissionedNodesRemoved = clusterState -> { + List nodesWithDecommissionAttribute = nodesWithDecommissionAttribute(clusterState, decommissionAttribute); + return nodesWithDecommissionAttribute.size() == 0; + }; + ActionListener statusUpdateListener = new ActionListener() { + @Override + public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { + logger.info( + "successfully updated decommission status" + ); + } + + @Override + public void onFailure(Exception e) { + + } + }; + + observer.waitForNextChange(new ClusterStateObserver.Listener() { + @Override + public void onNewClusterState(ClusterState state) { + clearVotingConfigAfterSuccessfulDecommission(); + updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_SUCCESSFUL, statusUpdateListener); + } + + @Override + public void onClusterServiceClose() { + } + + @Override + public void onTimeout(TimeValue timeout) { + clearVotingConfigAfterSuccessfulDecommission(); + updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_FAILED, statusUpdateListener); + } + }, allDecommissionedNodesRemoved, TimeValue.timeValueSeconds(30L)); + } + + private List nodesWithDecommissionAttribute(ClusterState clusterState, DecommissionAttribute decommissionAttribute) { + List nodesWithDecommissionAttribute = new ArrayList<>(); final Predicate shouldRemoveNodePredicate = discoveryNode -> nodeHasDecommissionedAttribute( discoveryNode, decommissionAttribute ); - Iterator nodesIter = state.nodes().getNodes().valuesIt(); + Iterator nodesIter = clusterState.nodes().getNodes().valuesIt(); while (nodesIter.hasNext()) { final DiscoveryNode node = nodesIter.next(); if (shouldRemoveNodePredicate.test(node)) { - nodesToBeDecommissioned.add(node); + nodesWithDecommissionAttribute.add(node); } } - // TODO - check for response from decommission request and then clear voting config? - decommissionHelper.handleNodesDecommissionRequest(nodesToBeDecommissioned, "nodes-decommissioned"); - clearVotingConfigAfterSuccessfulDecommission(); + return nodesWithDecommissionAttribute; } private static boolean assertIncrementalStatusOrFailed(DecommissionStatus oldStatus, DecommissionStatus newStatus) { From 109459b24f88fe15bf9cd05b737d0eb186bae7ff Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Wed, 24 Aug 2022 15:50:23 +0530 Subject: [PATCH 18/87] Changes in Service Signed-off-by: Rishab Nahata --- .../decommission/DecommissionService.java | 207 +++++++++--------- 1 file changed, 106 insertions(+), 101 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 87081d2ee38b1..b97cca12c5006 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -11,7 +11,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; -import org.opensearch.OpenSearchException; import org.opensearch.action.ActionListener; import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsAction; import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsRequest; @@ -19,9 +18,7 @@ import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsAction; import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsRequest; import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsResponse; -import org.opensearch.cluster.ClusterChangedEvent; import org.opensearch.cluster.ClusterState; -import org.opensearch.cluster.ClusterStateApplier; import org.opensearch.cluster.ClusterStateObserver; import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.NotClusterManagerException; @@ -54,14 +51,14 @@ *

* Whenever a cluster manager initiates operation to decommission an awareness attribute, * the service makes the best attempt to perform the following task - - *

- * 1. Remove cluster-manager eligible nodes from voting config [TODO - checks to avoid quorum loss scenarios] - * 2. Initiates nodes decommissioning by adding custom metadata with the attribute and state as {@link DecommissionStatus#DECOMMISSION_INIT} - * 3. Triggers weigh away for nodes having given awareness attribute to drain. This marks the decommission status as {@link DecommissionStatus#DECOMMISSION_IN_PROGRESS} - * 4. Once weighed away, the service triggers nodes decommission - * 5. Once the decommission is successful, the service clears the voting config and marks the status as {@link DecommissionStatus#DECOMMISSION_SUCCESSFUL} - * 6. If service fails at any step, it would mark the status as {@link DecommissionStatus#DECOMMISSION_FAILED} - *

+ *
    + *
  • Remove cluster-manager eligible nodes from voting config [TODO - checks to avoid quorum loss scenarios]
  • + *
  • Initiates nodes decommissioning by adding custom metadata with the attribute and state as {@link DecommissionStatus#DECOMMISSION_INIT}
  • + *
  • Triggers weigh away for nodes having given awareness attribute to drain. This marks the decommission status as {@link DecommissionStatus#DECOMMISSION_IN_PROGRESS}
  • + *
  • Once weighed away, the service triggers nodes decommission
  • + *
  • Once the decommission is successful, the service clears the voting config and marks the status as {@link DecommissionStatus#DECOMMISSION_SUCCESSFUL}
  • + *
  • If service fails at any step, it would mark the status as {@link DecommissionStatus#DECOMMISSION_FAILED}
  • + *
* * @opensearch.internal */ @@ -114,9 +111,7 @@ public void initiateAttributeDecommissioning( registerDecommissionAttribute(decommissionAttribute, listener); } - private void excludeDecommissionedClusterManagerNodesFromVotingConfig( - DecommissionAttribute decommissionAttribute - ) { + private void excludeDecommissionedClusterManagerNodesFromVotingConfig(DecommissionAttribute decommissionAttribute) { final Predicate shouldDecommissionPredicate = discoveryNode -> nodeHasDecommissionedAttribute( discoveryNode, decommissionAttribute @@ -196,12 +191,12 @@ public ClearVotingConfigExclusionsResponse read(StreamInput in) throws IOExcepti } /** - * Registers new decommissioned attribute metadata in the cluster state + * Registers new decommissioned attribute metadata in the cluster state with {@link DecommissionStatus#DECOMMISSION_INIT} *

* This method can be only called on the cluster-manager node. It tries to create a new decommissioned attribute on the master * and if it was successful it adds new decommissioned attribute to cluster metadata. *

- * This method should only be called once the eligible cluster manager node having decommissioned attribute is abdicated + * This method ensures that request is performed only on eligible cluster manager node * * @param decommissionAttribute register decommission attribute in the metadata request * @param listener register decommission listener @@ -210,11 +205,10 @@ private void registerDecommissionAttribute( final DecommissionAttribute decommissionAttribute, final ActionListener listener ) { - if (!transportService.getLocalNode().isClusterManagerNode() - || nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute)) + if (!transportService.getLocalNode().isClusterManagerNode() || nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute)) { throw new NotClusterManagerException( - "node [" + transportService.getLocalNode().toString() + "] not eligible to execute decommission request" + "node [" + transportService.getLocalNode().toString() + "] not eligible to execute decommission request. Will retry until timeout." ); } clusterService.submitStateUpdateTask( @@ -262,15 +256,15 @@ public void onFailure(String source, Exception e) { ), e ); -// updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_FAILED); listener.onFailure(e); } } @Override public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { - assert !newState.equals(oldState) : "no update in cluster state after initiating decommission request."; - // Do we attach a listener here with failed acknowledgement to the request? + DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().custom(DecommissionAttributeMetadata.TYPE); + assert decommissionAttribute.equals(decommissionAttributeMetadata.decommissionAttribute()); + assert DecommissionStatus.DECOMMISSION_INIT.equals(decommissionAttributeMetadata.status()); listener.onResponse(new ClusterStateUpdateResponse(true)); initiateGracefulDecommission(); } @@ -279,22 +273,88 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS } private void initiateGracefulDecommission() { - ActionListener listener = new ActionListener() { + // maybe create a supplier for status update listener? + ActionListener listener = new ActionListener<>() { @Override public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { + logger.info("updated decommission status to [{}], weighing away awareness attribute for graceful shutdown", + DecommissionStatus.DECOMMISSION_IN_PROGRESS); failDecommissionedNodes(clusterService.state()); } @Override public void onFailure(Exception e) { - + logger.error( + () -> new ParameterizedMessage( "failed to update decommission status to [{}], will not proceed with decommission" + , DecommissionStatus.DECOMMISSION_IN_PROGRESS), + e + ); } }; updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_IN_PROGRESS, listener); //TODO - code for graceful decommission } - // To Do - Can we add a consumer here such that whenever this succeeds we call the next method in on cluster state processed + private void failDecommissionedNodes(ClusterState state) { + DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().custom(DecommissionAttributeMetadata.TYPE); + assert decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_IN_PROGRESS) + : "unexpected status encountered while decommissioning nodes"; + DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); + + ActionListener statusUpdateListener = new ActionListener<>() { + @Override + public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { + logger.info( + "successfully updated decommission status" + ); + } + + @Override + public void onFailure(Exception e) { + logger.error("failed to update the decommission status"); + } + }; + + // execute decommissioning + decommissionHelper.handleNodesDecommissionRequest( + nodesWithDecommissionAttribute(state, decommissionAttribute), + "nodes-decommissioned" + ); + + final ClusterStateObserver observer = new ClusterStateObserver( + clusterService, + TimeValue.timeValueSeconds(30L), // should this be a setting? + logger, + threadPool.getThreadContext() + ); + + final Predicate allDecommissionedNodesRemoved = clusterState -> { + List nodesWithDecommissionAttribute = nodesWithDecommissionAttribute(clusterState, decommissionAttribute); + return nodesWithDecommissionAttribute.size() == 0; + }; + + observer.waitForNextChange(new ClusterStateObserver.Listener() { + @Override + public void onNewClusterState(ClusterState state) { + logger.info("successfully removed all decommissioned nodes from the cluster"); + clearVotingConfigAfterSuccessfulDecommission(); + updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_SUCCESSFUL, statusUpdateListener); + } + + @Override + public void onClusterServiceClose() { + logger.debug("cluster service closed while waiting for removal of decommissioned nodes."); + } + + @Override + public void onTimeout(TimeValue timeout) { + logger.info("timed out while waiting for removal of decommissioned nodes"); + clearVotingConfigAfterSuccessfulDecommission(); + updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_FAILED, statusUpdateListener); + } + }, allDecommissionedNodesRemoved); + } + private void updateMetadataWithDecommissionStatus( DecommissionStatus decommissionStatus, ActionListener listener @@ -337,82 +397,6 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS ); } - private static void validateAwarenessAttribute(final DecommissionAttribute decommissionAttribute, List awarenessAttributes) { - if (!awarenessAttributes.contains(decommissionAttribute.attributeName())) { - throw new DecommissionFailedException(decommissionAttribute, "invalid awareness attribute requested for decommissioning"); - } - // TODO - should attribute value be part of force zone values? If yes, read setting and throw exception if not found - } - - private static void ensureNoAwarenessAttributeDecommissioned( - DecommissionAttributeMetadata decommissionAttributeMetadata, - DecommissionAttribute decommissionAttribute - ) { - // If the previous decommission request failed, we will allow the request to pass this check - if (decommissionAttributeMetadata != null - && !decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_FAILED)) { - throw new DecommissionFailedException( - decommissionAttribute, - "one awareness attribute already decommissioned, recommission before triggering another decommission" - ); - } - } - - private void failDecommissionedNodes(ClusterState state) { - DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().custom(DecommissionAttributeMetadata.TYPE); - assert decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_IN_PROGRESS) - : "unexpected status encountered while decommissioning nodes"; - DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); - - decommissionHelper.handleNodesDecommissionRequest( - nodesWithDecommissionAttribute(state, decommissionAttribute), - "nodes-decommissioned" - ); - - final ClusterStateObserver observer = new ClusterStateObserver( - clusterService, - TimeValue.timeValueSeconds(30L), - logger, - threadPool.getThreadContext() - ); - - final Predicate allDecommissionedNodesRemoved = clusterState -> { - List nodesWithDecommissionAttribute = nodesWithDecommissionAttribute(clusterState, decommissionAttribute); - return nodesWithDecommissionAttribute.size() == 0; - }; - ActionListener statusUpdateListener = new ActionListener() { - @Override - public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { - logger.info( - "successfully updated decommission status" - ); - } - - @Override - public void onFailure(Exception e) { - - } - }; - - observer.waitForNextChange(new ClusterStateObserver.Listener() { - @Override - public void onNewClusterState(ClusterState state) { - clearVotingConfigAfterSuccessfulDecommission(); - updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_SUCCESSFUL, statusUpdateListener); - } - - @Override - public void onClusterServiceClose() { - } - - @Override - public void onTimeout(TimeValue timeout) { - clearVotingConfigAfterSuccessfulDecommission(); - updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_FAILED, statusUpdateListener); - } - }, allDecommissionedNodesRemoved, TimeValue.timeValueSeconds(30L)); - } - private List nodesWithDecommissionAttribute(ClusterState clusterState, DecommissionAttribute decommissionAttribute) { List nodesWithDecommissionAttribute = new ArrayList<>(); final Predicate shouldRemoveNodePredicate = discoveryNode -> nodeHasDecommissionedAttribute( @@ -442,4 +426,25 @@ else if (newStatus.equals(DecommissionStatus.DECOMMISSION_SUCCESSFUL)) { private static boolean nodeHasDecommissionedAttribute(DiscoveryNode discoveryNode, DecommissionAttribute decommissionAttribute) { return discoveryNode.getAttributes().get(decommissionAttribute.attributeName()).equals(decommissionAttribute.attributeValue()); } + + private static void validateAwarenessAttribute(final DecommissionAttribute decommissionAttribute, List awarenessAttributes) { + if (!awarenessAttributes.contains(decommissionAttribute.attributeName())) { + throw new DecommissionFailedException(decommissionAttribute, "invalid awareness attribute requested for decommissioning"); + } + // TODO - should attribute value be part of force zone values? If yes, read setting and throw exception if not found + } + + private static void ensureNoAwarenessAttributeDecommissioned( + DecommissionAttributeMetadata decommissionAttributeMetadata, + DecommissionAttribute decommissionAttribute + ) { + // If the previous decommission request failed, we will allow the request to pass this check + if (decommissionAttributeMetadata != null + && !decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_FAILED)) { + throw new DecommissionFailedException( + decommissionAttribute, + "one awareness attribute already decommissioned, recommission before triggering another decommission" + ); + } + } } From 4cd06da3aaea92ee7037de59de63a617667a30ae Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Wed, 24 Aug 2022 15:55:34 +0530 Subject: [PATCH 19/87] Fix spotless check Signed-off-by: Rishab Nahata --- .../org/opensearch/OpenSearchException.java | 1 - .../coordination/JoinTaskExecutor.java | 4 +- .../decommission/DecommissionAttribute.java | 1 - .../decommission/DecommissionService.java | 90 +++++++++---------- 4 files changed, 44 insertions(+), 52 deletions(-) diff --git a/server/src/main/java/org/opensearch/OpenSearchException.java b/server/src/main/java/org/opensearch/OpenSearchException.java index 5ae83c9df70d3..b1e9a27b98416 100644 --- a/server/src/main/java/org/opensearch/OpenSearchException.java +++ b/server/src/main/java/org/opensearch/OpenSearchException.java @@ -69,7 +69,6 @@ import static java.util.Collections.unmodifiableMap; import static org.opensearch.Version.V_2_1_0; import static org.opensearch.Version.V_2_3_0; -import static org.opensearch.Version.V_3_0_0; import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_UUID_NA_VALUE; import static org.opensearch.common.xcontent.XContentParserUtils.ensureExpectedToken; import static org.opensearch.common.xcontent.XContentParserUtils.ensureFieldName; diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index 997ddc3da9ea6..9825bebb63dd6 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -482,7 +482,9 @@ public static void ensureNodeNotDecommissioned(DiscoveryNode node, Metadata meta if (decommissionAttribute != null) { if (node.getAttributes().get(decommissionAttribute.attributeName()).equals(decommissionAttribute.attributeValue())) { throw new NodeDecommissionedException( - "node [{}] has decommissioned attribute [{}].", node.toString(), decommissionAttribute.toString() + "node [{}] has decommissioned attribute [{}].", + node.toString(), + decommissionAttribute.toString() ); } } diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java index 1a30b59d5a60e..bf2487a1a0e18 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java @@ -8,7 +8,6 @@ package org.opensearch.cluster.decommission; -import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; import org.opensearch.common.io.stream.Writeable; diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index b97cca12c5006..139b241ea00a4 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -205,10 +205,12 @@ private void registerDecommissionAttribute( final DecommissionAttribute decommissionAttribute, final ActionListener listener ) { - if (!transportService.getLocalNode().isClusterManagerNode() || nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute)) - { + if (!transportService.getLocalNode().isClusterManagerNode() + || nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute)) { throw new NotClusterManagerException( - "node [" + transportService.getLocalNode().toString() + "] not eligible to execute decommission request. Will retry until timeout." + "node [" + + transportService.getLocalNode().toString() + + "] not eligible to execute decommission request. Will retry until timeout." ); } clusterService.submitStateUpdateTask( @@ -234,9 +236,7 @@ public ClusterState execute(ClusterState currentState) { public void onFailure(String source, Exception e) { if (e instanceof DecommissionFailedException) { logger.error( - () -> new ParameterizedMessage( - "failed to decommission attribute [{}]", - decommissionAttribute.toString()), + () -> new ParameterizedMessage("failed to decommission attribute [{}]", decommissionAttribute.toString()), e ); listener.onFailure(e); @@ -262,7 +262,8 @@ public void onFailure(String source, Exception e) { @Override public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { - DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().custom(DecommissionAttributeMetadata.TYPE); + DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata() + .custom(DecommissionAttributeMetadata.TYPE); assert decommissionAttribute.equals(decommissionAttributeMetadata.decommissionAttribute()); assert DecommissionStatus.DECOMMISSION_INIT.equals(decommissionAttributeMetadata.status()); listener.onResponse(new ClusterStateUpdateResponse(true)); @@ -277,22 +278,26 @@ private void initiateGracefulDecommission() { ActionListener listener = new ActionListener<>() { @Override public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { - logger.info("updated decommission status to [{}], weighing away awareness attribute for graceful shutdown", - DecommissionStatus.DECOMMISSION_IN_PROGRESS); + logger.info( + "updated decommission status to [{}], weighing away awareness attribute for graceful shutdown", + DecommissionStatus.DECOMMISSION_IN_PROGRESS + ); failDecommissionedNodes(clusterService.state()); } @Override public void onFailure(Exception e) { logger.error( - () -> new ParameterizedMessage( "failed to update decommission status to [{}], will not proceed with decommission" - , DecommissionStatus.DECOMMISSION_IN_PROGRESS), + () -> new ParameterizedMessage( + "failed to update decommission status to [{}], will not proceed with decommission", + DecommissionStatus.DECOMMISSION_IN_PROGRESS + ), e ); } }; updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_IN_PROGRESS, listener); - //TODO - code for graceful decommission + // TODO - code for graceful decommission } private void failDecommissionedNodes(ClusterState state) { @@ -304,9 +309,7 @@ private void failDecommissionedNodes(ClusterState state) { ActionListener statusUpdateListener = new ActionListener<>() { @Override public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { - logger.info( - "successfully updated decommission status" - ); + logger.info("successfully updated decommission status"); } @Override @@ -359,42 +362,31 @@ private void updateMetadataWithDecommissionStatus( DecommissionStatus decommissionStatus, ActionListener listener ) { - clusterService.submitStateUpdateTask( - decommissionStatus.status(), - new ClusterStateUpdateTask(Priority.URGENT) { - @Override - public ClusterState execute(ClusterState currentState) throws Exception { - Metadata metadata = currentState.metadata(); - DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); - assert decommissionAttributeMetadata != null - && decommissionAttributeMetadata.decommissionAttribute() != null - : "failed to update status for decommission. metadata doesn't exist or invalid"; - assert assertIncrementalStatusOrFailed(decommissionAttributeMetadata.status(), decommissionStatus); - Metadata.Builder mdBuilder = Metadata.builder(metadata); - DecommissionAttributeMetadata newMetadata = decommissionAttributeMetadata.withUpdatedStatus(decommissionStatus); - mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, newMetadata); - return ClusterState.builder(currentState).metadata(mdBuilder).build(); - } - - @Override - public void onFailure(String source, Exception e) { - logger.error( - () -> new ParameterizedMessage( - "failed to mark status as [{}]", - decommissionStatus.status() - ), - e - ); - } - - @Override - public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { - listener.onResponse(new ClusterStateUpdateResponse(true)); - } + clusterService.submitStateUpdateTask(decommissionStatus.status(), new ClusterStateUpdateTask(Priority.URGENT) { + @Override + public ClusterState execute(ClusterState currentState) throws Exception { + Metadata metadata = currentState.metadata(); + DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); + assert decommissionAttributeMetadata != null && decommissionAttributeMetadata.decommissionAttribute() != null + : "failed to update status for decommission. metadata doesn't exist or invalid"; + assert assertIncrementalStatusOrFailed(decommissionAttributeMetadata.status(), decommissionStatus); + Metadata.Builder mdBuilder = Metadata.builder(metadata); + DecommissionAttributeMetadata newMetadata = decommissionAttributeMetadata.withUpdatedStatus(decommissionStatus); + mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, newMetadata); + return ClusterState.builder(currentState).metadata(mdBuilder).build(); + } + @Override + public void onFailure(String source, Exception e) { + logger.error(() -> new ParameterizedMessage("failed to mark status as [{}]", decommissionStatus.status()), e); + } + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + listener.onResponse(new ClusterStateUpdateResponse(true)); } - ); + + }); } private List nodesWithDecommissionAttribute(ClusterState clusterState, DecommissionAttribute decommissionAttribute) { @@ -410,7 +402,7 @@ private List nodesWithDecommissionAttribute(ClusterState clusterS nodesWithDecommissionAttribute.add(node); } } - return nodesWithDecommissionAttribute; + return nodesWithDecommissionAttribute; } private static boolean assertIncrementalStatusOrFailed(DecommissionStatus oldStatus, DecommissionStatus newStatus) { From a28e6fd92110448feef167fa8d8ff4d851609f52 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Wed, 24 Aug 2022 16:14:49 +0530 Subject: [PATCH 20/87] Update the join validator for decommissioned attribute Signed-off-by: Rishab Nahata --- .../org/opensearch/cluster/coordination/JoinTaskExecutor.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index 9825bebb63dd6..2eba89c72ae2b 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -40,6 +40,7 @@ import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.block.ClusterBlocks; import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionStatus; import org.opensearch.cluster.decommission.NodeDecommissionedException; import org.opensearch.cluster.metadata.DecommissionAttributeMetadata; import org.opensearch.cluster.metadata.IndexMetadata; @@ -479,7 +480,8 @@ public static void ensureNodeNotDecommissioned(DiscoveryNode node, Metadata meta DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); if (decommissionAttributeMetadata != null) { DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); - if (decommissionAttribute != null) { + if (decommissionAttribute != null && decommissionAttributeMetadata.status() != null) { + if(decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_FAILED)) return; if (node.getAttributes().get(decommissionAttribute.attributeName()).equals(decommissionAttribute.attributeValue())) { throw new NodeDecommissionedException( "node [{}] has decommissioned attribute [{}].", From 763f241ca132dc7afafdbb1a5758cca87248f559 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Wed, 24 Aug 2022 20:22:38 +0530 Subject: [PATCH 21/87] Add UTs for metadata Signed-off-by: Rishab Nahata --- .../org/opensearch/OpenSearchException.java | 1 + .../DecommissionAttributeMetadata.java | 3 +- .../ExceptionSerializationTests.java | 4 + ...onAttributeMetadataSerializationTests.java | 85 +++++++++++++++++++ .../DecommissionAttributeMetadataTests.java | 51 +++++++++++ ...missionAttributeMetadataXContentTests.java | 37 ++++++++ 6 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataSerializationTests.java create mode 100644 server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataTests.java create mode 100644 server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataXContentTests.java diff --git a/server/src/main/java/org/opensearch/OpenSearchException.java b/server/src/main/java/org/opensearch/OpenSearchException.java index b1e9a27b98416..5ae83c9df70d3 100644 --- a/server/src/main/java/org/opensearch/OpenSearchException.java +++ b/server/src/main/java/org/opensearch/OpenSearchException.java @@ -69,6 +69,7 @@ import static java.util.Collections.unmodifiableMap; import static org.opensearch.Version.V_2_1_0; import static org.opensearch.Version.V_2_3_0; +import static org.opensearch.Version.V_3_0_0; import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_UUID_NA_VALUE; import static org.opensearch.common.xcontent.XContentParserUtils.ensureExpectedToken; import static org.opensearch.common.xcontent.XContentParserUtils.ensureFieldName; diff --git a/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java b/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java index c6252e9981810..2034ab34e25c3 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java @@ -137,7 +137,7 @@ public static DecommissionAttributeMetadata fromXContent(XContentParser parser) XContentParser.Token token; DecommissionAttribute decommissionAttribute = null; DecommissionStatus status = null; - if ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { String currentFieldName = parser.currentName(); if (attributeType.equals(currentFieldName)) { @@ -162,6 +162,7 @@ public static DecommissionAttributeMetadata fromXContent(XContentParser parser) ); } decommissionAttribute = new DecommissionAttribute(fieldName, value); + token = parser.nextToken(); } else { throw new OpenSearchParseException("failed to parse attribute type [{}], unexpected type", attributeType); } diff --git a/server/src/test/java/org/opensearch/ExceptionSerializationTests.java b/server/src/test/java/org/opensearch/ExceptionSerializationTests.java index 26b0ce7e9e20c..6516cc80c7929 100644 --- a/server/src/test/java/org/opensearch/ExceptionSerializationTests.java +++ b/server/src/test/java/org/opensearch/ExceptionSerializationTests.java @@ -49,6 +49,8 @@ import org.opensearch.cluster.block.ClusterBlockException; import org.opensearch.cluster.coordination.CoordinationStateRejectedException; import org.opensearch.cluster.coordination.NoClusterManagerBlockService; +import org.opensearch.cluster.decommission.DecommissionFailedException; +import org.opensearch.cluster.decommission.NodeDecommissionedException; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.IllegalShardRoutingStateException; import org.opensearch.cluster.routing.ShardRouting; @@ -860,6 +862,8 @@ public void testIds() { ids.put(160, NoSeedNodeLeftException.class); ids.put(161, ReplicationFailedException.class); ids.put(162, PrimaryShardClosedException.class); + ids.put(163, DecommissionFailedException.class); + ids.put(164, NodeDecommissionedException.class); Map, Integer> reverse = new HashMap<>(); for (Map.Entry> entry : ids.entrySet()) { diff --git a/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataSerializationTests.java b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataSerializationTests.java new file mode 100644 index 0000000000000..d81c05b8e8da0 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataSerializationTests.java @@ -0,0 +1,85 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.metadata; + +import org.opensearch.cluster.ClusterModule; +import org.opensearch.cluster.Diff; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.common.io.stream.NamedWriteableRegistry; +import org.opensearch.common.io.stream.Writeable; +import org.opensearch.common.xcontent.XContentParser; +import org.opensearch.test.AbstractDiffableSerializationTestCase; + +import java.io.IOException; + +public class DecommissionAttributeMetadataSerializationTests extends AbstractDiffableSerializationTestCase { + + @Override + protected Writeable.Reader instanceReader() { + return DecommissionAttributeMetadata::new; + } + + @Override + protected Metadata.Custom createTestInstance() { + String attributeName = randomAlphaOfLength(6); + String attributeValue = randomAlphaOfLength(6); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute(attributeName, attributeValue); + DecommissionStatus decommissionStatus = randomFrom(DecommissionStatus.values()); + return new DecommissionAttributeMetadata(decommissionAttribute, decommissionStatus); + } + + @Override + protected Metadata.Custom mutateInstance(Metadata.Custom instance) { + return randomValueOtherThan(instance, this::createTestInstance); + } + + @Override + protected Metadata.Custom makeTestChanges(Metadata.Custom testInstance) { + DecommissionAttributeMetadata decommissionAttributeMetadata = (DecommissionAttributeMetadata) testInstance; + DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); + String attributeName = decommissionAttribute.attributeName(); + String attributeValue = decommissionAttribute.attributeValue(); + DecommissionStatus decommissionStatus = decommissionAttributeMetadata.status(); + if (randomBoolean()) { + decommissionStatus = randomFrom(DecommissionStatus.values()); + } + if (randomBoolean()) { + attributeName = randomAlphaOfLength(6); + } + if(randomBoolean()) { + attributeValue = randomAlphaOfLength(6); + } + return new DecommissionAttributeMetadata( + new DecommissionAttribute(attributeName, attributeValue), + decommissionStatus + ); + } + + @Override + protected Writeable.Reader> diffReader() { + return DecommissionAttributeMetadata::readDiffFrom; + } + + @Override + protected NamedWriteableRegistry getNamedWriteableRegistry() { + return new NamedWriteableRegistry(ClusterModule.getNamedWriteables()); + } + + @Override + protected Metadata.Custom doParseInstance(XContentParser parser) throws IOException { + assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); + DecommissionAttributeMetadata decommissionAttributeMetadata = DecommissionAttributeMetadata.fromXContent(parser); + assertEquals(XContentParser.Token.END_OBJECT, parser.currentToken()); + return new DecommissionAttributeMetadata( + decommissionAttributeMetadata.decommissionAttribute(), + decommissionAttributeMetadata.status() + ); + } +} diff --git a/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataTests.java b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataTests.java new file mode 100644 index 0000000000000..bff57daef6109 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataTests.java @@ -0,0 +1,51 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.metadata; + +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.common.io.stream.NamedWriteableRegistry; +import org.opensearch.test.AbstractNamedWriteableTestCase; + +import java.io.IOException; +import java.util.Collections; + +public class DecommissionAttributeMetadataTests extends AbstractNamedWriteableTestCase { + @Override + protected DecommissionAttributeMetadata createTestInstance() { + String attributeName = randomAlphaOfLength(6); + String attributeValue = randomAlphaOfLength(6); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute(attributeName, attributeValue); + DecommissionStatus decommissionStatus = randomFrom(DecommissionStatus.values()); + return new DecommissionAttributeMetadata(decommissionAttribute, decommissionStatus); + } + + @Override + protected DecommissionAttributeMetadata mutateInstance(DecommissionAttributeMetadata instance) throws IOException { + return randomValueOtherThan(instance, this::createTestInstance); + } + + @Override + protected NamedWriteableRegistry getNamedWriteableRegistry() { + return new NamedWriteableRegistry( + Collections.singletonList( + new NamedWriteableRegistry.Entry( + DecommissionAttributeMetadata.class, + DecommissionAttributeMetadata.TYPE, + DecommissionAttributeMetadata::new + ) + ) + ); + } + + @Override + protected Class categoryClass() { + return DecommissionAttributeMetadata.class; + } +} diff --git a/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataXContentTests.java b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataXContentTests.java new file mode 100644 index 0000000000000..c632839acd4ca --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataXContentTests.java @@ -0,0 +1,37 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.metadata; + +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.common.xcontent.XContentParser; +import org.opensearch.test.AbstractXContentTestCase; + +import java.io.IOException; + +public class DecommissionAttributeMetadataXContentTests extends AbstractXContentTestCase { + @Override + protected DecommissionAttributeMetadata createTestInstance() { + String attributeName = randomAlphaOfLength(6); + String attributeValue = randomAlphaOfLength(6); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute(attributeName, attributeValue); + DecommissionStatus decommissionStatus = randomFrom(DecommissionStatus.values()); + return new DecommissionAttributeMetadata(decommissionAttribute, decommissionStatus); + } + + @Override + protected DecommissionAttributeMetadata doParseInstance(XContentParser parser) throws IOException { + return DecommissionAttributeMetadata.fromXContent(parser); + } + + @Override + protected boolean supportsUnknownFields() { + return false; + } +} From 1464bbbc133499836cea5e78943feb75be5e2eaa Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Thu, 25 Aug 2022 00:45:09 +0530 Subject: [PATCH 22/87] Add UTs for JoinTaskExecutor changes Signed-off-by: Rishab Nahata --- .../coordination/JoinTaskExecutor.java | 13 ++- .../coordination/JoinTaskExecutorTests.java | 86 +++++++++++++++++++ 2 files changed, 96 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index 2eba89c72ae2b..469c0b786056f 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -480,9 +480,16 @@ public static void ensureNodeNotDecommissioned(DiscoveryNode node, Metadata meta DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); if (decommissionAttributeMetadata != null) { DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); - if (decommissionAttribute != null && decommissionAttributeMetadata.status() != null) { - if(decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_FAILED)) return; - if (node.getAttributes().get(decommissionAttribute.attributeName()).equals(decommissionAttribute.attributeValue())) { + DecommissionStatus status = decommissionAttributeMetadata.status(); + if (decommissionAttribute != null && status != null) { + // We will let the node join the cluster if the current status is INIT or FAILED + if(status.equals(DecommissionStatus.DECOMMISSION_FAILED) || status.equals(DecommissionStatus.DECOMMISSION_INIT)) return; + if (node.getAttributes().get(decommissionAttribute.attributeName()).equals(decommissionAttribute.attributeValue()) + && ( + status.equals(DecommissionStatus.DECOMMISSION_IN_PROGRESS) + || status.equals(DecommissionStatus.DECOMMISSION_SUCCESSFUL + ) + )) { throw new NodeDecommissionedException( "node [{}] has decommissioned attribute [{}].", node.toString(), diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java index 02e502e762561..a3c3b9a9b9c7b 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java @@ -36,9 +36,15 @@ import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateTaskExecutor; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionFailedException; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.cluster.decommission.NodeDecommissionedException; +import org.opensearch.cluster.metadata.DecommissionAttributeMetadata; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.RerouteService; import org.opensearch.cluster.routing.allocation.AllocationService; @@ -48,8 +54,11 @@ import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.VersionUtils; +import java.util.Collections; import java.util.HashSet; +import java.util.Map; +import static java.util.Collections.emptyMap; import static org.hamcrest.Matchers.is; import static org.opensearch.test.VersionUtils.allVersions; import static org.opensearch.test.VersionUtils.maxCompatibleVersion; @@ -216,4 +225,81 @@ public void testIsBecomeClusterManagerTask() { JoinTaskExecutor.Task joinTaskOfClusterManager = JoinTaskExecutor.newBecomeClusterManagerTask(); assertThat(joinTaskOfClusterManager.isBecomeClusterManagerTask(), is(true)); } + + public void testJoinClusterWithNoDecommission() { + Settings.builder().build(); + Metadata.Builder metaBuilder = Metadata.builder(); + Metadata metadata = metaBuilder.build(); + DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-2")); + JoinTaskExecutor.ensureNodeNotDecommissioned(discoveryNode, metadata); + } + + public void testPreventJoinClusterWithDecommission() { + Settings.builder().build(); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone-1"); + DecommissionStatus decommissionStatus = randomFrom( + DecommissionStatus.DECOMMISSION_IN_PROGRESS, + DecommissionStatus.DECOMMISSION_SUCCESSFUL + ); + DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttribute, + decommissionStatus + ); + Metadata.Builder metaBuilder = Metadata.builder(); + metaBuilder.putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); + Metadata metadata = metaBuilder.build(); + + DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-1")); + expectThrows( + NodeDecommissionedException.class, + () -> JoinTaskExecutor.ensureNodeNotDecommissioned(discoveryNode, metadata) + ); + } + + public void testJoinClusterWithDifferentDecommission() { + Settings.builder().build(); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone-1"); + DecommissionStatus decommissionStatus = randomFrom(DecommissionStatus.values()); + DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttribute, + decommissionStatus + ); + Metadata.Builder metaBuilder = Metadata.builder(); + metaBuilder.putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); + Metadata metadata = metaBuilder.build(); + + DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-2")); + JoinTaskExecutor.ensureNodeNotDecommissioned(discoveryNode, metadata); + } + + public void testJoinClusterWithDecommissionFailedOrInitOrRecommission() { + Settings.builder().build(); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone-1"); + DecommissionStatus decommissionStatus = randomFrom( + DecommissionStatus.DECOMMISSION_INIT, + DecommissionStatus.DECOMMISSION_FAILED, + DecommissionStatus.RECOMMISSION_IN_PROGRESS + ); + DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttribute, + decommissionStatus + ); + Metadata.Builder metaBuilder = Metadata.builder(); + metaBuilder.putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); + Metadata metadata = metaBuilder.build(); + + DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-1")); + JoinTaskExecutor.ensureNodeNotDecommissioned(discoveryNode, metadata); + } + + private DiscoveryNode newDiscoveryNode(Map attributes) { + return new DiscoveryNode( + randomAlphaOfLength(10), + randomAlphaOfLength(10), + buildNewFakeTransportAddress(), + attributes, + Collections.singleton(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE), + Version.CURRENT + ); + } } From f3d49a34297f34e7d5c83665f5f019efcd5ac670 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Thu, 25 Aug 2022 00:47:20 +0530 Subject: [PATCH 23/87] Fix Signed-off-by: Rishab Nahata --- .../org/opensearch/cluster/coordination/JoinTaskExecutor.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index 469c0b786056f..7008474222aef 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -482,8 +482,7 @@ public static void ensureNodeNotDecommissioned(DiscoveryNode node, Metadata meta DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); DecommissionStatus status = decommissionAttributeMetadata.status(); if (decommissionAttribute != null && status != null) { - // We will let the node join the cluster if the current status is INIT or FAILED - if(status.equals(DecommissionStatus.DECOMMISSION_FAILED) || status.equals(DecommissionStatus.DECOMMISSION_INIT)) return; + // We will let the node join the cluster if the current status is not IN_PROGRESS or SUCCESSFUL if (node.getAttributes().get(decommissionAttribute.attributeName()).equals(decommissionAttribute.attributeValue()) && ( status.equals(DecommissionStatus.DECOMMISSION_IN_PROGRESS) From fd0bbe8b057346aa5c0dcfbc658d126d65ec0ae7 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Thu, 25 Aug 2022 16:17:26 +0530 Subject: [PATCH 24/87] Test files Signed-off-by: Rishab Nahata --- .../decommission/DecommissionHelperTests.java | 14 ++++++++++++++ .../decommission/DecommissionServiceTests.java | 14 ++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 server/src/test/java/org/opensearch/cluster/decommission/DecommissionHelperTests.java create mode 100644 server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionHelperTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionHelperTests.java new file mode 100644 index 0000000000000..289d5147512e1 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionHelperTests.java @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.opensearch.test.OpenSearchTestCase; + +public class DecommissionHelperTests extends OpenSearchTestCase { +} diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java new file mode 100644 index 0000000000000..ea4aee984df98 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.opensearch.test.OpenSearchTestCase; + +public class DecommissionServiceTests extends OpenSearchTestCase { +} From efdad784cf619bde628cc21d0e82146c942060fe Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Thu, 25 Aug 2022 17:14:49 +0530 Subject: [PATCH 25/87] Move observer logic to helper Signed-off-by: Rishab Nahata --- .../decommission/DecommissionHelper.java | 52 +++++++++++++-- .../decommission/DecommissionService.java | 58 ++++++++--------- .../decommission/DecommissionHelperTests.java | 65 +++++++++++++++++++ 3 files changed, 139 insertions(+), 36 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java index ce7befacaef98..7b2ae71288d71 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java @@ -10,17 +10,24 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.opensearch.action.ActionListener; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ClusterStateObserver; import org.opensearch.cluster.ClusterStateTaskConfig; import org.opensearch.cluster.ClusterStateTaskListener; +import org.opensearch.cluster.ack.ClusterStateUpdateResponse; import org.opensearch.cluster.coordination.NodeRemovalClusterStateTaskExecutor; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.allocation.AllocationService; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.Priority; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.threadpool.ThreadPool; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.function.Predicate; /** * Helper executor class to remove list of nodes from the cluster @@ -34,24 +41,61 @@ public class DecommissionHelper { private final NodeRemovalClusterStateTaskExecutor nodeRemovalExecutor; private final ClusterService clusterService; + private final ThreadPool threadPool; - DecommissionHelper(ClusterService clusterService, AllocationService allocationService) { + DecommissionHelper( + ClusterService clusterService, + AllocationService allocationService, + ThreadPool threadPool + ) { this.clusterService = clusterService; this.nodeRemovalExecutor = new NodeRemovalClusterStateTaskExecutor(allocationService, logger); + this.threadPool = threadPool; } - public void handleNodesDecommissionRequest(List nodesToBeDecommissioned, String reason) { + public void handleNodesDecommissionRequest( + List nodesToBeDecommissioned, + String reason, + TimeValue timeout, + Predicate allDecommissionedNodesRemoved, + ActionListener nodesRemovedListener + ) { final Map nodesDecommissionTasks = new LinkedHashMap<>(); nodesToBeDecommissioned.forEach(discoveryNode -> { final NodeRemovalClusterStateTaskExecutor.Task task = new NodeRemovalClusterStateTaskExecutor.Task(discoveryNode, reason); nodesDecommissionTasks.put(task, nodeRemovalExecutor); }); - final String source = "node-decommissioned"; clusterService.submitStateUpdateTasks( - source, + "node-decommissioned", nodesDecommissionTasks, ClusterStateTaskConfig.build(Priority.IMMEDIATE), nodeRemovalExecutor ); + + final ClusterStateObserver observer = new ClusterStateObserver( + clusterService, + timeout, + logger, + threadPool.getThreadContext() + ); + + observer.waitForNextChange(new ClusterStateObserver.Listener() { + @Override + public void onNewClusterState(ClusterState state) { + logger.info("successfully removed all decommissioned nodes [{}] from the cluster", nodesToBeDecommissioned.toString()); + nodesRemovedListener.onResponse(new ClusterStateUpdateResponse(true)); + } + + @Override + public void onClusterServiceClose() { + logger.debug("cluster service closed while waiting for removal of decommissioned nodes."); + } + + @Override + public void onTimeout(TimeValue timeout) { + logger.info("timed out while waiting for removal of decommissioned nodes"); + nodesRemovedListener.onResponse(new ClusterStateUpdateResponse(false)); + } + }, allDecommissionedNodesRemoved); } } diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 139b241ea00a4..c1e0045000ee7 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -84,7 +84,11 @@ public DecommissionService( this.clusterService = clusterService; this.transportService = transportService; this.threadPool = threadPool; - this.decommissionHelper = new DecommissionHelper(clusterService, allocationService); + this.decommissionHelper = new DecommissionHelper( + clusterService, + allocationService, + threadPool + ); this.awarenessAttributes = AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.get(settings); clusterSettings.addSettingsUpdateConsumer( AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING, @@ -318,44 +322,34 @@ public void onFailure(Exception e) { } }; - // execute decommissioning - decommissionHelper.handleNodesDecommissionRequest( - nodesWithDecommissionAttribute(state, decommissionAttribute), - "nodes-decommissioned" - ); + ActionListener nodesRemovalListener = new ActionListener<>() { + @Override + public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { + DecommissionStatus updateStatusTo = clusterStateUpdateResponse.isAcknowledged() ? + DecommissionStatus.DECOMMISSION_SUCCESSFUL : DecommissionStatus.DECOMMISSION_FAILED; + updateMetadataWithDecommissionStatus(updateStatusTo, statusUpdateListener); + } - final ClusterStateObserver observer = new ClusterStateObserver( - clusterService, - TimeValue.timeValueSeconds(30L), // should this be a setting? - logger, - threadPool.getThreadContext() - ); + @Override + public void onFailure(Exception e) { + logger.error("failed to update the decommission status"); + } + }; final Predicate allDecommissionedNodesRemoved = clusterState -> { List nodesWithDecommissionAttribute = nodesWithDecommissionAttribute(clusterState, decommissionAttribute); return nodesWithDecommissionAttribute.size() == 0; }; - observer.waitForNextChange(new ClusterStateObserver.Listener() { - @Override - public void onNewClusterState(ClusterState state) { - logger.info("successfully removed all decommissioned nodes from the cluster"); - clearVotingConfigAfterSuccessfulDecommission(); - updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_SUCCESSFUL, statusUpdateListener); - } - - @Override - public void onClusterServiceClose() { - logger.debug("cluster service closed while waiting for removal of decommissioned nodes."); - } - - @Override - public void onTimeout(TimeValue timeout) { - logger.info("timed out while waiting for removal of decommissioned nodes"); - clearVotingConfigAfterSuccessfulDecommission(); - updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_FAILED, statusUpdateListener); - } - }, allDecommissionedNodesRemoved); + // execute decommissioning + decommissionHelper.handleNodesDecommissionRequest( + nodesWithDecommissionAttribute(state, decommissionAttribute), + "nodes-decommissioned", + TimeValue.timeValueSeconds(30L), + allDecommissionedNodesRemoved, + nodesRemovalListener + ); + clearVotingConfigAfterSuccessfulDecommission(); } private void updateMetadataWithDecommissionStatus( diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionHelperTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionHelperTests.java index 289d5147512e1..722fad288e6bb 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionHelperTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionHelperTests.java @@ -8,7 +8,72 @@ package org.opensearch.cluster.decommission; +import org.opensearch.Version; +import org.opensearch.cluster.ClusterName; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.service.ClusterService; import org.opensearch.test.OpenSearchTestCase; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static java.util.Collections.singletonMap; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + public class DecommissionHelperTests extends OpenSearchTestCase { + +// public void testRemoveNodesForDecommissionRequest() { +// final AllocationService allocationService = mock(AllocationService.class); +// final ClusterService clusterService = mock(ClusterService.class); +// +// ClusterState clusterState = ClusterState.builder(new ClusterName("test")).build(); +// +// logger.info("--> adding five nodes on same zone_1"); +// clusterState = addNodes(clusterState, "zone_1", "node1", "node2", "node3", "node4", "node5"); +// +// logger.info("--> adding five nodes on same zone_2"); +// clusterState = addNodes(clusterState, "zone_2", "node6", "node7", "node8", "node9", "node10"); +// +// logger.info("--> adding five nodes on same zone_3"); +// clusterState = addNodes(clusterState, "zone_3", "node11", "node12", "node13", "node14", "node15"); +// +// when(clusterService.state()).thenReturn(clusterState); +// +// final DecommissionHelper decommissionHelper = new DecommissionHelper(clusterService, allocationService); +// +// List nodesToBeRemoved = new ArrayList<>(); +// nodesToBeRemoved.add(clusterState.nodes().get("node11")); +// nodesToBeRemoved.add(clusterState.nodes().get("node12")); +// nodesToBeRemoved.add(clusterState.nodes().get("node13")); +// nodesToBeRemoved.add(clusterState.nodes().get("node14")); +// nodesToBeRemoved.add(clusterState.nodes().get("node15")); +// +// decommissionHelper.handleNodesDecommissionRequest(nodesToBeRemoved, "unit-test"); +// assertEquals((clusterService.state().nodes().getSize()), 10); +// } + + private ClusterState addNodes(ClusterState clusterState, String zone, String... nodeIds) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + org.opensearch.common.collect.List.of(nodeIds).forEach(nodeId -> nodeBuilder.add(newNode(nodeId, singletonMap("zone", zone)))); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + + private DiscoveryNode newNode(String nodeId, Map attributes) { + return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, CLUSTER_MANAGER_DATA_ROLES, Version.CURRENT); + } + + final private static Set CLUSTER_MANAGER_DATA_ROLES = Collections.unmodifiableSet( + new HashSet<>(Arrays.asList(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE)) + ); } From 48def4bb943c104063faabc36899ea01c0ca1ac4 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Thu, 25 Aug 2022 17:23:56 +0530 Subject: [PATCH 26/87] fix msg Signed-off-by: Rishab Nahata --- .../opensearch/cluster/decommission/DecommissionService.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index c1e0045000ee7..b034daa567361 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -332,7 +332,7 @@ public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { @Override public void onFailure(Exception e) { - logger.error("failed to update the decommission status"); + logger.error("error while waiting for decommissioned nodes to be removed", e); } }; @@ -341,7 +341,7 @@ public void onFailure(Exception e) { return nodesWithDecommissionAttribute.size() == 0; }; - // execute decommissioning + // execute nodes decommissioning and wait for it to complete decommissionHelper.handleNodesDecommissionRequest( nodesWithDecommissionAttribute(state, decommissionAttribute), "nodes-decommissioned", From 23f59b130954937443886c1fc61700af3eedd7ef Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Fri, 26 Aug 2022 14:25:47 +0530 Subject: [PATCH 27/87] Move predicate to helper Signed-off-by: Rishab Nahata --- .../decommission/DecommissionHelper.java | 20 ++++++++++--- .../decommission/DecommissionService.java | 12 +++----- .../decommission/DecommissionHelperTests.java | 29 ++++++++++++++++++- 3 files changed, 48 insertions(+), 13 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java index 7b2ae71288d71..935fe42873bc9 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java @@ -24,9 +24,10 @@ import org.opensearch.common.unit.TimeValue; import org.opensearch.threadpool.ThreadPool; +import java.util.Iterator; import java.util.LinkedHashMap; -import java.util.List; import java.util.Map; +import java.util.Set; import java.util.function.Predicate; /** @@ -54,10 +55,9 @@ public class DecommissionHelper { } public void handleNodesDecommissionRequest( - List nodesToBeDecommissioned, + Set nodesToBeDecommissioned, String reason, TimeValue timeout, - Predicate allDecommissionedNodesRemoved, ActionListener nodesRemovedListener ) { final Map nodesDecommissionTasks = new LinkedHashMap<>(); @@ -72,6 +72,18 @@ public void handleNodesDecommissionRequest( nodeRemovalExecutor ); + Predicate allDecommissionedNodesRemovedPredicate = clusterState -> { + Iterator nodesIter = clusterState.nodes().getNodes().valuesIt(); + while (nodesIter.hasNext()) { + final DiscoveryNode node = nodesIter.next(); + // check if the node is part of node decommissioned list + if (nodesToBeDecommissioned.contains(node)) { + return false; + } + } + return true; + }; + final ClusterStateObserver observer = new ClusterStateObserver( clusterService, timeout, @@ -96,6 +108,6 @@ public void onTimeout(TimeValue timeout) { logger.info("timed out while waiting for removal of decommissioned nodes"); nodesRemovedListener.onResponse(new ClusterStateUpdateResponse(false)); } - }, allDecommissionedNodesRemoved); + }, allDecommissionedNodesRemovedPredicate); } } diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index b034daa567361..b0e2e2f1f7a46 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -42,8 +42,10 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Set; import java.util.function.Predicate; /** @@ -336,17 +338,11 @@ public void onFailure(Exception e) { } }; - final Predicate allDecommissionedNodesRemoved = clusterState -> { - List nodesWithDecommissionAttribute = nodesWithDecommissionAttribute(clusterState, decommissionAttribute); - return nodesWithDecommissionAttribute.size() == 0; - }; - // execute nodes decommissioning and wait for it to complete decommissionHelper.handleNodesDecommissionRequest( nodesWithDecommissionAttribute(state, decommissionAttribute), "nodes-decommissioned", TimeValue.timeValueSeconds(30L), - allDecommissionedNodesRemoved, nodesRemovalListener ); clearVotingConfigAfterSuccessfulDecommission(); @@ -383,8 +379,8 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS }); } - private List nodesWithDecommissionAttribute(ClusterState clusterState, DecommissionAttribute decommissionAttribute) { - List nodesWithDecommissionAttribute = new ArrayList<>(); + private Set nodesWithDecommissionAttribute(ClusterState clusterState, DecommissionAttribute decommissionAttribute) { + Set nodesWithDecommissionAttribute = new HashSet<>(); final Predicate shouldRemoveNodePredicate = discoveryNode -> nodeHasDecommissionedAttribute( discoveryNode, decommissionAttribute diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionHelperTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionHelperTests.java index 722fad288e6bb..9452c0e94c262 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionHelperTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionHelperTests.java @@ -8,15 +8,24 @@ package org.opensearch.cluster.decommission; +import org.junit.AfterClass; +import org.junit.BeforeClass; import org.opensearch.Version; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ClusterStateObserver; +import org.opensearch.cluster.coordination.CoordinationMetadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodeRole; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.allocation.AllocationService; import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportService; import java.util.ArrayList; import java.util.Arrays; @@ -26,12 +35,30 @@ import java.util.Map; import java.util.Set; +import static java.util.Collections.emptyMap; +import static java.util.Collections.emptySet; import static java.util.Collections.singletonMap; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; +import static org.opensearch.test.ClusterServiceUtils.createClusterService; public class DecommissionHelperTests extends OpenSearchTestCase { + private static ThreadPool threadPool; + private static ClusterService clusterService; + + @BeforeClass + public static void createThreadPoolAndClusterService() { + threadPool = new TestThreadPool("test", Settings.EMPTY); + clusterService = createClusterService(threadPool); + } + + @AfterClass + public static void shutdownThreadPoolAndClusterService() { + clusterService.stop(); + threadPool.shutdown(); + } + // public void testRemoveNodesForDecommissionRequest() { // final AllocationService allocationService = mock(AllocationService.class); // final ClusterService clusterService = mock(ClusterService.class); @@ -69,7 +96,7 @@ private ClusterState addNodes(ClusterState clusterState, String zone, String... return clusterState; } - private DiscoveryNode newNode(String nodeId, Map attributes) { + private static DiscoveryNode newNode(String nodeId, Map attributes) { return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, CLUSTER_MANAGER_DATA_ROLES, Version.CURRENT); } From d99dcace083c9d116a3312585097ab88f35349cd Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Mon, 29 Aug 2022 14:45:40 +0530 Subject: [PATCH 28/87] test Signed-off-by: Rishab Nahata --- ...elper.java => DecommissionController.java} | 51 ++++- .../decommission/DecommissionService.java | 50 +---- .../DecommissionControllerTests.java | 179 ++++++++++++++++++ .../decommission/DecommissionHelperTests.java | 106 ----------- 4 files changed, 232 insertions(+), 154 deletions(-) rename server/src/main/java/org/opensearch/cluster/decommission/{DecommissionHelper.java => DecommissionController.java} (61%) create mode 100644 server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java delete mode 100644 server/src/test/java/org/opensearch/cluster/decommission/DecommissionHelperTests.java diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java similarity index 61% rename from server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java rename to server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index 935fe42873bc9..87fe15f35be38 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionHelper.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -10,13 +10,17 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.action.ActionListener; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateObserver; import org.opensearch.cluster.ClusterStateTaskConfig; import org.opensearch.cluster.ClusterStateTaskListener; +import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; import org.opensearch.cluster.coordination.NodeRemovalClusterStateTaskExecutor; +import org.opensearch.cluster.metadata.DecommissionAttributeMetadata; +import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.allocation.AllocationService; import org.opensearch.cluster.service.ClusterService; @@ -36,15 +40,15 @@ * @opensearch.internal */ -public class DecommissionHelper { +public class DecommissionController { - private static final Logger logger = LogManager.getLogger(DecommissionHelper.class); + private static final Logger logger = LogManager.getLogger(DecommissionController.class); private final NodeRemovalClusterStateTaskExecutor nodeRemovalExecutor; private final ClusterService clusterService; private final ThreadPool threadPool; - DecommissionHelper( + DecommissionController( ClusterService clusterService, AllocationService allocationService, ThreadPool threadPool @@ -110,4 +114,45 @@ public void onTimeout(TimeValue timeout) { } }, allDecommissionedNodesRemovedPredicate); } + + public void updateMetadataWithDecommissionStatus( + DecommissionStatus decommissionStatus, + ActionListener listener + ) { + clusterService.submitStateUpdateTask(decommissionStatus.status(), new ClusterStateUpdateTask(Priority.URGENT) { + @Override + public ClusterState execute(ClusterState currentState) throws Exception { + Metadata metadata = currentState.metadata(); + DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); + assert decommissionAttributeMetadata != null && decommissionAttributeMetadata.decommissionAttribute() != null + : "failed to update status for decommission. metadata doesn't exist or invalid"; + assert assertIncrementalStatusOrFailed(decommissionAttributeMetadata.status(), decommissionStatus); + Metadata.Builder mdBuilder = Metadata.builder(metadata); + DecommissionAttributeMetadata newMetadata = decommissionAttributeMetadata.withUpdatedStatus(decommissionStatus); + mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, newMetadata); + return ClusterState.builder(currentState).metadata(mdBuilder).build(); + } + + @Override + public void onFailure(String source, Exception e) { + logger.error(() -> new ParameterizedMessage("failed to mark status as [{}]", decommissionStatus.status()), e); + } + + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + listener.onResponse(new ClusterStateUpdateResponse(true)); + } + + }); + } + + private static boolean assertIncrementalStatusOrFailed(DecommissionStatus oldStatus, DecommissionStatus newStatus) { + if (oldStatus == null || newStatus.equals(DecommissionStatus.DECOMMISSION_FAILED)) return true; + else if (newStatus.equals(DecommissionStatus.DECOMMISSION_SUCCESSFUL)) { + return oldStatus.equals(DecommissionStatus.DECOMMISSION_IN_PROGRESS); + } else if (newStatus.equals(DecommissionStatus.DECOMMISSION_IN_PROGRESS)) { + return oldStatus.equals(DecommissionStatus.DECOMMISSION_INIT); + } + return true; + } } diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index b0e2e2f1f7a46..971da583eabe5 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -19,7 +19,6 @@ import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsRequest; import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsResponse; import org.opensearch.cluster.ClusterState; -import org.opensearch.cluster.ClusterStateObserver; import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; @@ -71,7 +70,7 @@ public class DecommissionService { private final ClusterService clusterService; private final TransportService transportService; private final ThreadPool threadPool; - private final DecommissionHelper decommissionHelper; + private final DecommissionController decommissionController; private volatile List awarenessAttributes; @Inject @@ -86,7 +85,7 @@ public DecommissionService( this.clusterService = clusterService; this.transportService = transportService; this.threadPool = threadPool; - this.decommissionHelper = new DecommissionHelper( + this.decommissionController = new DecommissionController( clusterService, allocationService, threadPool @@ -302,7 +301,7 @@ public void onFailure(Exception e) { ); } }; - updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_IN_PROGRESS, listener); + decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_IN_PROGRESS, listener); // TODO - code for graceful decommission } @@ -329,7 +328,7 @@ public void onFailure(Exception e) { public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { DecommissionStatus updateStatusTo = clusterStateUpdateResponse.isAcknowledged() ? DecommissionStatus.DECOMMISSION_SUCCESSFUL : DecommissionStatus.DECOMMISSION_FAILED; - updateMetadataWithDecommissionStatus(updateStatusTo, statusUpdateListener); + decommissionController.updateMetadataWithDecommissionStatus(updateStatusTo, statusUpdateListener); } @Override @@ -339,7 +338,7 @@ public void onFailure(Exception e) { }; // execute nodes decommissioning and wait for it to complete - decommissionHelper.handleNodesDecommissionRequest( + decommissionController.handleNodesDecommissionRequest( nodesWithDecommissionAttribute(state, decommissionAttribute), "nodes-decommissioned", TimeValue.timeValueSeconds(30L), @@ -348,36 +347,7 @@ public void onFailure(Exception e) { clearVotingConfigAfterSuccessfulDecommission(); } - private void updateMetadataWithDecommissionStatus( - DecommissionStatus decommissionStatus, - ActionListener listener - ) { - clusterService.submitStateUpdateTask(decommissionStatus.status(), new ClusterStateUpdateTask(Priority.URGENT) { - @Override - public ClusterState execute(ClusterState currentState) throws Exception { - Metadata metadata = currentState.metadata(); - DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); - assert decommissionAttributeMetadata != null && decommissionAttributeMetadata.decommissionAttribute() != null - : "failed to update status for decommission. metadata doesn't exist or invalid"; - assert assertIncrementalStatusOrFailed(decommissionAttributeMetadata.status(), decommissionStatus); - Metadata.Builder mdBuilder = Metadata.builder(metadata); - DecommissionAttributeMetadata newMetadata = decommissionAttributeMetadata.withUpdatedStatus(decommissionStatus); - mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, newMetadata); - return ClusterState.builder(currentState).metadata(mdBuilder).build(); - } - @Override - public void onFailure(String source, Exception e) { - logger.error(() -> new ParameterizedMessage("failed to mark status as [{}]", decommissionStatus.status()), e); - } - - @Override - public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { - listener.onResponse(new ClusterStateUpdateResponse(true)); - } - - }); - } private Set nodesWithDecommissionAttribute(ClusterState clusterState, DecommissionAttribute decommissionAttribute) { Set nodesWithDecommissionAttribute = new HashSet<>(); @@ -395,16 +365,6 @@ private Set nodesWithDecommissionAttribute(ClusterState clusterSt return nodesWithDecommissionAttribute; } - private static boolean assertIncrementalStatusOrFailed(DecommissionStatus oldStatus, DecommissionStatus newStatus) { - if (oldStatus == null || newStatus.equals(DecommissionStatus.DECOMMISSION_FAILED)) return true; - else if (newStatus.equals(DecommissionStatus.DECOMMISSION_SUCCESSFUL)) { - return oldStatus.equals(DecommissionStatus.DECOMMISSION_IN_PROGRESS); - } else if (newStatus.equals(DecommissionStatus.DECOMMISSION_IN_PROGRESS)) { - return oldStatus.equals(DecommissionStatus.DECOMMISSION_INIT); - } - return true; - } - private static boolean nodeHasDecommissionedAttribute(DiscoveryNode discoveryNode, DecommissionAttribute decommissionAttribute) { return discoveryNode.getAttributes().get(decommissionAttribute.attributeName()).equals(decommissionAttribute.attributeValue()); } diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java new file mode 100644 index 0000000000000..33861b09c2596 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java @@ -0,0 +1,179 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.opensearch.Version; +import org.opensearch.action.ActionListener; +import org.opensearch.action.admin.cluster.configuration.TransportAddVotingConfigExclusionsActionTests; +import org.opensearch.cluster.ClusterName; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ClusterStateObserver; +import org.opensearch.cluster.ClusterStateUpdateTask; +import org.opensearch.cluster.ack.ClusterStateUpdateResponse; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import static java.util.Collections.singletonMap; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.sameInstance; +import static org.mockito.Mockito.mock; +import static org.opensearch.cluster.ClusterState.builder; +import static org.opensearch.test.ClusterServiceUtils.createClusterService; +import static org.opensearch.test.ClusterServiceUtils.setState; + +public class DecommissionControllerTests extends OpenSearchTestCase { + + private static ThreadPool threadPool; + private static ClusterService clusterService; + private DecommissionController decommissionController; + private ClusterStateObserver clusterStateObserver; + + @BeforeClass + public static void createThreadPoolAndClusterService() { + threadPool = new TestThreadPool("test", Settings.EMPTY); + clusterService = createClusterService(threadPool); + } + + @AfterClass + public static void shutdownThreadPoolAndClusterService() { + clusterService.stop(); + threadPool.shutdown(); + } + + @Before + public void setupForTests() { + ClusterState clusterState = ClusterState.builder(new ClusterName("test")).build(); + logger.info("--> adding five nodes on same zone_1"); + clusterState = addNodes(clusterState, "zone_1", "node1", "node2", "node3", "node4", "node5"); + logger.info("--> adding five nodes on same zone_2"); + clusterState = addNodes(clusterState, "zone_2", "node6", "node7", "node8", "node9", "node10"); + logger.info("--> adding five nodes on same zone_3"); + clusterState = addNodes(clusterState, "zone_3", "node11", "node12", "node13", "node14", "node15"); + + final ClusterState.Builder builder = builder(clusterState); + setState(clusterService, builder); + + final AllocationService allocationService = mock(AllocationService.class); + decommissionController = new DecommissionController(clusterService, allocationService, threadPool); + clusterStateObserver = new ClusterStateObserver(clusterService, null, logger, threadPool.getThreadContext()); + } + + public void testRemoveNodesForDecommissionRequest() throws InterruptedException{ + final CountDownLatch countDownLatch = new CountDownLatch(2); + + Set nodesToBeRemoved = new HashSet<>(); + nodesToBeRemoved.add(clusterService.state().nodes().get("node11")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node12")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node13")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node14")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node15")); + + ActionListener actionListener = new ActionListener() { + @Override + public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { + logger.info("test"); + } + + @Override + public void onFailure(Exception e) { + } + }; + + clusterStateObserver.waitForNextChange(new UpdateClusterStateForDecommission(countDownLatch, nodesToBeRemoved)); + decommissionController.handleNodesDecommissionRequest( + nodesToBeRemoved, + "unit-test", + TimeValue.timeValueSeconds(30L), + actionListener + ); + + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + assertEquals(clusterService.getClusterApplierService().state().nodes().getDataNodes().size(), 10); + } + + private ClusterState addNodes(ClusterState clusterState, String zone, String... nodeIds) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + org.opensearch.common.collect.List.of(nodeIds).forEach(nodeId -> nodeBuilder.add(newNode(nodeId, singletonMap("zone", zone)))); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + + private static DiscoveryNode newNode(String nodeId, Map attributes) { + return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, CLUSTER_MANAGER_DATA_ROLES, Version.CURRENT); + } + + final private static Set CLUSTER_MANAGER_DATA_ROLES = Collections.unmodifiableSet( + new HashSet<>(Arrays.asList(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE)) + ); + + private static class UpdateClusterStateForDecommission implements ClusterStateObserver.Listener { + + final CountDownLatch doneLatch; + final Set discoveryNodes; + + UpdateClusterStateForDecommission(CountDownLatch latch, Set discoveryNodes) { + this.doneLatch = latch; + this.discoveryNodes = discoveryNodes; + } + + @Override + public void onNewClusterState(ClusterState state) { + clusterService.getClusterManagerService().submitStateUpdateTask("decommission", new ClusterStateUpdateTask() { + @Override + public ClusterState execute(ClusterState currentState) throws Exception { + assertThat(currentState, sameInstance(state)); + final DiscoveryNodes.Builder remainingNodesBuilder = DiscoveryNodes.builder(currentState.nodes()); + for (DiscoveryNode nodeToBeRemoved : discoveryNodes) { + remainingNodesBuilder.remove(nodeToBeRemoved); + } + return ClusterState.builder(currentState).nodes(remainingNodesBuilder).build(); + } + + @Override + public void onFailure(String source, Exception e) { + throw new AssertionError("unexpected failure", e); + } + + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + doneLatch.countDown(); + } + }); + } + + @Override + public void onClusterServiceClose() { + throw new AssertionError("unexpected close"); + } + + @Override + public void onTimeout(TimeValue timeout) { + throw new AssertionError("unexpected timeout"); + } + } +} diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionHelperTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionHelperTests.java deleted file mode 100644 index 9452c0e94c262..0000000000000 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionHelperTests.java +++ /dev/null @@ -1,106 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.cluster.decommission; - -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.opensearch.Version; -import org.opensearch.cluster.ClusterName; -import org.opensearch.cluster.ClusterState; -import org.opensearch.cluster.ClusterStateObserver; -import org.opensearch.cluster.coordination.CoordinationMetadata; -import org.opensearch.cluster.node.DiscoveryNode; -import org.opensearch.cluster.node.DiscoveryNodeRole; -import org.opensearch.cluster.node.DiscoveryNodes; -import org.opensearch.cluster.routing.allocation.AllocationService; -import org.opensearch.cluster.service.ClusterService; -import org.opensearch.common.settings.ClusterSettings; -import org.opensearch.common.settings.Settings; -import org.opensearch.test.OpenSearchTestCase; -import org.opensearch.threadpool.TestThreadPool; -import org.opensearch.threadpool.ThreadPool; -import org.opensearch.transport.TransportService; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import static java.util.Collections.emptyMap; -import static java.util.Collections.emptySet; -import static java.util.Collections.singletonMap; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; -import static org.opensearch.test.ClusterServiceUtils.createClusterService; - -public class DecommissionHelperTests extends OpenSearchTestCase { - - private static ThreadPool threadPool; - private static ClusterService clusterService; - - @BeforeClass - public static void createThreadPoolAndClusterService() { - threadPool = new TestThreadPool("test", Settings.EMPTY); - clusterService = createClusterService(threadPool); - } - - @AfterClass - public static void shutdownThreadPoolAndClusterService() { - clusterService.stop(); - threadPool.shutdown(); - } - -// public void testRemoveNodesForDecommissionRequest() { -// final AllocationService allocationService = mock(AllocationService.class); -// final ClusterService clusterService = mock(ClusterService.class); -// -// ClusterState clusterState = ClusterState.builder(new ClusterName("test")).build(); -// -// logger.info("--> adding five nodes on same zone_1"); -// clusterState = addNodes(clusterState, "zone_1", "node1", "node2", "node3", "node4", "node5"); -// -// logger.info("--> adding five nodes on same zone_2"); -// clusterState = addNodes(clusterState, "zone_2", "node6", "node7", "node8", "node9", "node10"); -// -// logger.info("--> adding five nodes on same zone_3"); -// clusterState = addNodes(clusterState, "zone_3", "node11", "node12", "node13", "node14", "node15"); -// -// when(clusterService.state()).thenReturn(clusterState); -// -// final DecommissionHelper decommissionHelper = new DecommissionHelper(clusterService, allocationService); -// -// List nodesToBeRemoved = new ArrayList<>(); -// nodesToBeRemoved.add(clusterState.nodes().get("node11")); -// nodesToBeRemoved.add(clusterState.nodes().get("node12")); -// nodesToBeRemoved.add(clusterState.nodes().get("node13")); -// nodesToBeRemoved.add(clusterState.nodes().get("node14")); -// nodesToBeRemoved.add(clusterState.nodes().get("node15")); -// -// decommissionHelper.handleNodesDecommissionRequest(nodesToBeRemoved, "unit-test"); -// assertEquals((clusterService.state().nodes().getSize()), 10); -// } - - private ClusterState addNodes(ClusterState clusterState, String zone, String... nodeIds) { - DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); - org.opensearch.common.collect.List.of(nodeIds).forEach(nodeId -> nodeBuilder.add(newNode(nodeId, singletonMap("zone", zone)))); - clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); - return clusterState; - } - - private static DiscoveryNode newNode(String nodeId, Map attributes) { - return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, CLUSTER_MANAGER_DATA_ROLES, Version.CURRENT); - } - - final private static Set CLUSTER_MANAGER_DATA_ROLES = Collections.unmodifiableSet( - new HashSet<>(Arrays.asList(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE)) - ); -} From 5ee6f449bbb2bd9cf50326a4510876847e134b9d Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Mon, 29 Aug 2022 17:54:59 +0530 Subject: [PATCH 29/87] Add UT Signed-off-by: Rishab Nahata --- .../DecommissionControllerTests.java | 71 ++++++++++++------- 1 file changed, 46 insertions(+), 25 deletions(-) diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java index 33861b09c2596..262186d0dca2c 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java @@ -38,35 +38,34 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import static java.util.Collections.emptyMap; +import static java.util.Collections.emptySet; import static java.util.Collections.singletonMap; import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.sameInstance; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import static org.opensearch.cluster.ClusterState.builder; +import static org.opensearch.cluster.OpenSearchAllocationTestCase.createAllocationService; import static org.opensearch.test.ClusterServiceUtils.createClusterService; import static org.opensearch.test.ClusterServiceUtils.setState; public class DecommissionControllerTests extends OpenSearchTestCase { - private static ThreadPool threadPool; - private static ClusterService clusterService; + private ThreadPool threadPool; + private ClusterService clusterService; + private AllocationService allocationService; private DecommissionController decommissionController; private ClusterStateObserver clusterStateObserver; - @BeforeClass - public static void createThreadPoolAndClusterService() { + @Override + public void setUp() throws Exception { + super.setUp(); threadPool = new TestThreadPool("test", Settings.EMPTY); clusterService = createClusterService(threadPool); - } - - @AfterClass - public static void shutdownThreadPoolAndClusterService() { - clusterService.stop(); - threadPool.shutdown(); - } - - @Before - public void setupForTests() { + allocationService = createAllocationService(); ClusterState clusterState = ClusterState.builder(new ClusterName("test")).build(); logger.info("--> adding five nodes on same zone_1"); clusterState = addNodes(clusterState, "zone_1", "node1", "node2", "node3", "node4", "node5"); @@ -74,17 +73,15 @@ public void setupForTests() { clusterState = addNodes(clusterState, "zone_2", "node6", "node7", "node8", "node9", "node10"); logger.info("--> adding five nodes on same zone_3"); clusterState = addNodes(clusterState, "zone_3", "node11", "node12", "node13", "node14", "node15"); - + clusterState = setLocalNodeAsClusterManagerNode(clusterState, "node1"); final ClusterState.Builder builder = builder(clusterState); setState(clusterService, builder); - - final AllocationService allocationService = mock(AllocationService.class); decommissionController = new DecommissionController(clusterService, allocationService, threadPool); clusterStateObserver = new ClusterStateObserver(clusterService, null, logger, threadPool.getThreadContext()); } public void testRemoveNodesForDecommissionRequest() throws InterruptedException{ - final CountDownLatch countDownLatch = new CountDownLatch(2); + final CountDownLatch countDownLatch = new CountDownLatch(1); Set nodesToBeRemoved = new HashSet<>(); nodesToBeRemoved.add(clusterService.state().nodes().get("node11")); @@ -96,6 +93,7 @@ public void testRemoveNodesForDecommissionRequest() throws InterruptedException{ ActionListener actionListener = new ActionListener() { @Override public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { + countDownLatch.countDown(); logger.info("test"); } @@ -103,16 +101,16 @@ public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { public void onFailure(Exception e) { } }; - - clusterStateObserver.waitForNextChange(new UpdateClusterStateForDecommission(countDownLatch, nodesToBeRemoved)); decommissionController.handleNodesDecommissionRequest( nodesToBeRemoved, "unit-test", - TimeValue.timeValueSeconds(30L), + TimeValue.timeValueSeconds(29L), actionListener ); + clusterStateObserver.waitForNextChange(new UpdateClusterStateForDecommission(countDownLatch, nodesToBeRemoved)); assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + ClusterState state = clusterService.getClusterApplierService().state(); assertEquals(clusterService.getClusterApplierService().state().nodes().getDataNodes().size(), 10); } @@ -123,15 +121,38 @@ private ClusterState addNodes(ClusterState clusterState, String zone, String... return clusterState; } + private ClusterState addClusterManagerNode(ClusterState clusterState, String zone, String nodeId) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + nodeBuilder.add(newClusterManagerNode(nodeId, singletonMap("zone", zone))); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + + private ClusterState setLocalNodeAsClusterManagerNode(ClusterState clusterState, String nodeId) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + nodeBuilder.localNodeId(nodeId); + nodeBuilder.clusterManagerNodeId(nodeId); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + private static DiscoveryNode newNode(String nodeId, Map attributes) { - return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, CLUSTER_MANAGER_DATA_ROLES, Version.CURRENT); + return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, DATA_ROLE, Version.CURRENT); + } + + private static DiscoveryNode newClusterManagerNode(String nodeId, Map attributes) { + return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, CLUSTER_MANAGER_ROLE, Version.CURRENT); } - final private static Set CLUSTER_MANAGER_DATA_ROLES = Collections.unmodifiableSet( - new HashSet<>(Arrays.asList(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE)) + final private static Set CLUSTER_MANAGER_ROLE = Collections.unmodifiableSet( + new HashSet<>(Arrays.asList(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) + ); + + final private static Set DATA_ROLE = Collections.unmodifiableSet( + new HashSet<>(Arrays.asList(DiscoveryNodeRole.DATA_ROLE)) ); - private static class UpdateClusterStateForDecommission implements ClusterStateObserver.Listener { + private class UpdateClusterStateForDecommission implements ClusterStateObserver.Listener { final CountDownLatch doneLatch; final Set discoveryNodes; From 20b465ed786975f49ed5f9787860f496b20bf323 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Mon, 29 Aug 2022 21:46:05 +0530 Subject: [PATCH 30/87] Add UT for DecommissionController Signed-off-by: Rishab Nahata --- .../decommission/DecommissionController.java | 28 ++- .../decommission/DecommissionService.java | 48 +++-- .../DecommissionControllerTests.java | 193 ++++++++++-------- 3 files changed, 153 insertions(+), 116 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index 87fe15f35be38..003fe329da2da 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -11,6 +11,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.OpenSearchTimeoutException; import org.opensearch.action.ActionListener; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateObserver; @@ -62,7 +63,7 @@ public void handleNodesDecommissionRequest( Set nodesToBeDecommissioned, String reason, TimeValue timeout, - ActionListener nodesRemovedListener + ActionListener nodesRemovedListener ) { final Map nodesDecommissionTasks = new LinkedHashMap<>(); nodesToBeDecommissioned.forEach(discoveryNode -> { @@ -99,7 +100,7 @@ public void handleNodesDecommissionRequest( @Override public void onNewClusterState(ClusterState state) { logger.info("successfully removed all decommissioned nodes [{}] from the cluster", nodesToBeDecommissioned.toString()); - nodesRemovedListener.onResponse(new ClusterStateUpdateResponse(true)); + nodesRemovedListener.onResponse(null); } @Override @@ -110,22 +111,26 @@ public void onClusterServiceClose() { @Override public void onTimeout(TimeValue timeout) { logger.info("timed out while waiting for removal of decommissioned nodes"); - nodesRemovedListener.onResponse(new ClusterStateUpdateResponse(false)); + nodesRemovedListener.onFailure( + new OpenSearchTimeoutException( + "timed out waiting for removal of decommissioned nodes [{}] to take effect", + nodesToBeDecommissioned.toString() + ) + ); } }, allDecommissionedNodesRemovedPredicate); } public void updateMetadataWithDecommissionStatus( DecommissionStatus decommissionStatus, - ActionListener listener + ActionListener listener ) { clusterService.submitStateUpdateTask(decommissionStatus.status(), new ClusterStateUpdateTask(Priority.URGENT) { @Override - public ClusterState execute(ClusterState currentState) throws Exception { + public ClusterState execute(ClusterState currentState) { Metadata metadata = currentState.metadata(); DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); - assert decommissionAttributeMetadata != null && decommissionAttributeMetadata.decommissionAttribute() != null - : "failed to update status for decommission. metadata doesn't exist or invalid"; + assert decommissionAttributeMetadata != null && decommissionAttributeMetadata.decommissionAttribute() != null; assert assertIncrementalStatusOrFailed(decommissionAttributeMetadata.status(), decommissionStatus); Metadata.Builder mdBuilder = Metadata.builder(metadata); DecommissionAttributeMetadata newMetadata = decommissionAttributeMetadata.withUpdatedStatus(decommissionStatus); @@ -135,19 +140,20 @@ public ClusterState execute(ClusterState currentState) throws Exception { @Override public void onFailure(String source, Exception e) { - logger.error(() -> new ParameterizedMessage("failed to mark status as [{}]", decommissionStatus.status()), e); + listener.onFailure(e); } @Override public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { - listener.onResponse(new ClusterStateUpdateResponse(true)); + DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().custom(DecommissionAttributeMetadata.TYPE); + assert decommissionAttributeMetadata.status().equals(decommissionStatus); + listener.onResponse(null); } - }); } private static boolean assertIncrementalStatusOrFailed(DecommissionStatus oldStatus, DecommissionStatus newStatus) { - if (oldStatus == null || newStatus.equals(DecommissionStatus.DECOMMISSION_FAILED)) return true; + if (newStatus.equals(DecommissionStatus.DECOMMISSION_FAILED)) return true; else if (newStatus.equals(DecommissionStatus.DECOMMISSION_SUCCESSFUL)) { return oldStatus.equals(DecommissionStatus.DECOMMISSION_IN_PROGRESS); } else if (newStatus.equals(DecommissionStatus.DECOMMISSION_IN_PROGRESS)) { diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 971da583eabe5..d935bc02b71d4 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -280,14 +280,14 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS private void initiateGracefulDecommission() { // maybe create a supplier for status update listener? - ActionListener listener = new ActionListener<>() { + ActionListener listener = new ActionListener<>() { @Override - public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { + public void onResponse(Void unused) { logger.info( "updated decommission status to [{}], weighing away awareness attribute for graceful shutdown", DecommissionStatus.DECOMMISSION_IN_PROGRESS ); - failDecommissionedNodes(clusterService.state()); + failDecommissionedNodes(clusterService.getClusterApplierService().state()); } @Override @@ -311,10 +311,13 @@ private void failDecommissionedNodes(ClusterState state) { : "unexpected status encountered while decommissioning nodes"; DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); - ActionListener statusUpdateListener = new ActionListener<>() { + ActionListener statusUpdateListener = new ActionListener<>() { @Override - public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { - logger.info("successfully updated decommission status"); + public void onResponse(Void unused) { + logger.info( + "updated decommission status to [{}], decommissioning completed.", + DecommissionStatus.DECOMMISSION_SUCCESSFUL + ); } @Override @@ -323,32 +326,33 @@ public void onFailure(Exception e) { } }; - ActionListener nodesRemovalListener = new ActionListener<>() { - @Override - public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { - DecommissionStatus updateStatusTo = clusterStateUpdateResponse.isAcknowledged() ? - DecommissionStatus.DECOMMISSION_SUCCESSFUL : DecommissionStatus.DECOMMISSION_FAILED; - decommissionController.updateMetadataWithDecommissionStatus(updateStatusTo, statusUpdateListener); - } - - @Override - public void onFailure(Exception e) { - logger.error("error while waiting for decommissioned nodes to be removed", e); - } - }; - // execute nodes decommissioning and wait for it to complete decommissionController.handleNodesDecommissionRequest( nodesWithDecommissionAttribute(state, decommissionAttribute), "nodes-decommissioned", TimeValue.timeValueSeconds(30L), - nodesRemovalListener + new ActionListener() { + @Override + public void onResponse(Void unused) { + decommissionController.updateMetadataWithDecommissionStatus( + DecommissionStatus.DECOMMISSION_SUCCESSFUL, + statusUpdateListener + ); + } + + @Override + public void onFailure(Exception e) { + decommissionController.updateMetadataWithDecommissionStatus( + DecommissionStatus.DECOMMISSION_FAILED, + statusUpdateListener + ); + } + } ); clearVotingConfigAfterSuccessfulDecommission(); } - private Set nodesWithDecommissionAttribute(ClusterState clusterState, DecommissionAttribute decommissionAttribute) { Set nodesWithDecommissionAttribute = new HashSet<>(); final Predicate shouldRemoveNodePredicate = discoveryNode -> nodeHasDecommissionedAttribute( diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java index 262186d0dca2c..809a392d060e2 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java @@ -8,9 +8,11 @@ package org.opensearch.cluster.decommission; +import org.junit.After; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; +import org.opensearch.OpenSearchTimeoutException; import org.opensearch.Version; import org.opensearch.action.ActionListener; import org.opensearch.action.admin.cluster.configuration.TransportAddVotingConfigExclusionsActionTests; @@ -19,11 +21,14 @@ import org.opensearch.cluster.ClusterStateObserver; import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; +import org.opensearch.cluster.metadata.DecommissionAttributeMetadata; +import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodeRole; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.allocation.AllocationService; import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.collect.ImmutableOpenMap; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.test.OpenSearchTestCase; @@ -37,15 +42,21 @@ import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; import static java.util.Collections.emptyMap; import static java.util.Collections.emptySet; import static java.util.Collections.singletonMap; import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.sameInstance; +import static org.hamcrest.Matchers.startsWith; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import static org.opensearch.cluster.ClusterState.builder; import static org.opensearch.cluster.OpenSearchAllocationTestCase.createAllocationService; @@ -58,7 +69,6 @@ public class DecommissionControllerTests extends OpenSearchTestCase { private ClusterService clusterService; private AllocationService allocationService; private DecommissionController decommissionController; - private ClusterStateObserver clusterStateObserver; @Override public void setUp() throws Exception { @@ -66,6 +76,10 @@ public void setUp() throws Exception { threadPool = new TestThreadPool("test", Settings.EMPTY); clusterService = createClusterService(threadPool); allocationService = createAllocationService(); + } + + @Before + public void setDefaultClusterState() { ClusterState clusterState = ClusterState.builder(new ClusterName("test")).build(); logger.info("--> adding five nodes on same zone_1"); clusterState = addNodes(clusterState, "zone_1", "node1", "node2", "node3", "node4", "node5"); @@ -75,14 +89,21 @@ public void setUp() throws Exception { clusterState = addNodes(clusterState, "zone_3", "node11", "node12", "node13", "node14", "node15"); clusterState = setLocalNodeAsClusterManagerNode(clusterState, "node1"); final ClusterState.Builder builder = builder(clusterState); - setState(clusterService, builder); + setState( + clusterService, + builder + ); decommissionController = new DecommissionController(clusterService, allocationService, threadPool); - clusterStateObserver = new ClusterStateObserver(clusterService, null, logger, threadPool.getThreadContext()); } - public void testRemoveNodesForDecommissionRequest() throws InterruptedException{ - final CountDownLatch countDownLatch = new CountDownLatch(1); + @After + public void shutdownThreadPoolAndClusterService() { + clusterService.stop(); + threadPool.shutdown(); + } + public void testNodesRemovedForDecommissionRequestSuccessfulResponse() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); Set nodesToBeRemoved = new HashSet<>(); nodesToBeRemoved.add(clusterService.state().nodes().get("node11")); nodesToBeRemoved.add(clusterService.state().nodes().get("node12")); @@ -90,28 +111,95 @@ public void testRemoveNodesForDecommissionRequest() throws InterruptedException{ nodesToBeRemoved.add(clusterService.state().nodes().get("node14")); nodesToBeRemoved.add(clusterService.state().nodes().get("node15")); - ActionListener actionListener = new ActionListener() { - @Override - public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { - countDownLatch.countDown(); - logger.info("test"); - } + decommissionController.handleNodesDecommissionRequest( + nodesToBeRemoved, + "unit-test", + TimeValue.timeValueSeconds(30L), + new ActionListener() { + @Override + public void onResponse(Void unused) { + countDownLatch.countDown(); + } - @Override - public void onFailure(Exception e) { + @Override + public void onFailure(Exception e) { + fail("there shouldn't have been any failure"); + } } - }; + ); + + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + // test all 5 nodes removed and cluster has 10 nodes + Set nodes = StreamSupport.stream( + clusterService.getClusterApplierService().state().nodes().spliterator(), false + ).collect(Collectors.toSet()); + assertEquals(nodes.size(), 10); + // test no nodes part of zone-3 + for (DiscoveryNode node : nodes) { + assertNotEquals(node.getAttributes().get("zone"), "zone-1"); + } + } + + public void testTimesOut() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + Set nodesToBeRemoved = new HashSet<>(); + nodesToBeRemoved.add(clusterService.state().nodes().get("node11")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node12")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node13")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node14")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node15")); decommissionController.handleNodesDecommissionRequest( nodesToBeRemoved, "unit-test", - TimeValue.timeValueSeconds(29L), - actionListener + TimeValue.timeValueMillis(2), + new ActionListener() { + @Override + public void onResponse(Void unused) { + fail("response shouldn't have been called"); + } + + @Override + public void onFailure(Exception e) { + assertThat(e, instanceOf(OpenSearchTimeoutException.class)); + assertThat(e.getMessage(), startsWith("timed out waiting for removal of decommissioned nodes")); + countDownLatch.countDown(); + } + } + ); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + } + + public void testSuccessfulDecommissionStatusMetadataUpdate() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + DecommissionAttributeMetadata oldMetadata = new DecommissionAttributeMetadata( + new DecommissionAttribute("zone", "zone-1"), + DecommissionStatus.DECOMMISSION_IN_PROGRESS ); - clusterStateObserver.waitForNextChange(new UpdateClusterStateForDecommission(countDownLatch, nodesToBeRemoved)); + ClusterState state = clusterService.state(); + Metadata metadata = state.metadata(); + Metadata.Builder mdBuilder = Metadata.builder(metadata); + mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, oldMetadata); + state = ClusterState.builder(state).metadata(mdBuilder).build(); + setState(clusterService, state); + + decommissionController.updateMetadataWithDecommissionStatus( + DecommissionStatus.DECOMMISSION_SUCCESSFUL, + new ActionListener() { + @Override + public void onResponse(Void unused) { + countDownLatch.countDown(); + } + @Override + public void onFailure(Exception e) { + fail("decommission status update failed"); + } + } + ); assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); - ClusterState state = clusterService.getClusterApplierService().state(); - assertEquals(clusterService.getClusterApplierService().state().nodes().getDataNodes().size(), 10); + ClusterState newState = clusterService.getClusterApplierService().state(); + DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().custom(DecommissionAttributeMetadata.TYPE); + assertEquals(decommissionAttributeMetadata.status(), DecommissionStatus.DECOMMISSION_SUCCESSFUL); } private ClusterState addNodes(ClusterState clusterState, String zone, String... nodeIds) { @@ -121,13 +209,6 @@ private ClusterState addNodes(ClusterState clusterState, String zone, String... return clusterState; } - private ClusterState addClusterManagerNode(ClusterState clusterState, String zone, String nodeId) { - DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); - nodeBuilder.add(newClusterManagerNode(nodeId, singletonMap("zone", zone))); - clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); - return clusterState; - } - private ClusterState setLocalNodeAsClusterManagerNode(ClusterState clusterState, String nodeId) { DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); nodeBuilder.localNodeId(nodeId); @@ -137,64 +218,10 @@ private ClusterState setLocalNodeAsClusterManagerNode(ClusterState clusterState, } private static DiscoveryNode newNode(String nodeId, Map attributes) { - return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, DATA_ROLE, Version.CURRENT); + return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, CLUSTER_MANAGER_DATA_ROLE, Version.CURRENT); } - private static DiscoveryNode newClusterManagerNode(String nodeId, Map attributes) { - return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, CLUSTER_MANAGER_ROLE, Version.CURRENT); - } - - final private static Set CLUSTER_MANAGER_ROLE = Collections.unmodifiableSet( - new HashSet<>(Arrays.asList(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) - ); - - final private static Set DATA_ROLE = Collections.unmodifiableSet( - new HashSet<>(Arrays.asList(DiscoveryNodeRole.DATA_ROLE)) + final private static Set CLUSTER_MANAGER_DATA_ROLE = Collections.unmodifiableSet( + new HashSet<>(Arrays.asList(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE)) ); - - private class UpdateClusterStateForDecommission implements ClusterStateObserver.Listener { - - final CountDownLatch doneLatch; - final Set discoveryNodes; - - UpdateClusterStateForDecommission(CountDownLatch latch, Set discoveryNodes) { - this.doneLatch = latch; - this.discoveryNodes = discoveryNodes; - } - - @Override - public void onNewClusterState(ClusterState state) { - clusterService.getClusterManagerService().submitStateUpdateTask("decommission", new ClusterStateUpdateTask() { - @Override - public ClusterState execute(ClusterState currentState) throws Exception { - assertThat(currentState, sameInstance(state)); - final DiscoveryNodes.Builder remainingNodesBuilder = DiscoveryNodes.builder(currentState.nodes()); - for (DiscoveryNode nodeToBeRemoved : discoveryNodes) { - remainingNodesBuilder.remove(nodeToBeRemoved); - } - return ClusterState.builder(currentState).nodes(remainingNodesBuilder).build(); - } - - @Override - public void onFailure(String source, Exception e) { - throw new AssertionError("unexpected failure", e); - } - - @Override - public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { - doneLatch.countDown(); - } - }); - } - - @Override - public void onClusterServiceClose() { - throw new AssertionError("unexpected close"); - } - - @Override - public void onTimeout(TimeValue timeout) { - throw new AssertionError("unexpected timeout"); - } - } } From 3b5dd143a85f69fa18c212b449dadb4422c84813 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 30 Aug 2022 12:05:10 +0530 Subject: [PATCH 31/87] Improvements and UTs Signed-off-by: Rishab Nahata --- .../AddVotingConfigExclusionsRequest.java | 2 +- .../decommission/DecommissionController.java | 80 +++++++- .../decommission/DecommissionService.java | 186 +++++++++--------- .../DecommissionControllerTests.java | 112 ++++++++++- .../DecommissionServiceTests.java | 172 ++++++++++++++++ 5 files changed, 455 insertions(+), 97 deletions(-) diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/configuration/AddVotingConfigExclusionsRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/configuration/AddVotingConfigExclusionsRequest.java index a2a77a1316898..739bfaf2a3fb1 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/configuration/AddVotingConfigExclusionsRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/configuration/AddVotingConfigExclusionsRequest.java @@ -157,7 +157,7 @@ Set resolveVotingConfigExclusions(ClusterState currentSta } else { assert nodeNames.length >= 1; Map existingNodes = StreamSupport.stream(allNodes.spliterator(), false) - .collect(Collectors.toMap(DiscoveryNode::getName, Function.identity())); + .collect(Collectors.toMap(DiscoveryNode::getName, Function.identity(), (r1, r2) -> r1)); for (String nodeName : nodeNames) { if (existingNodes.containsKey(nodeName)) { diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index 003fe329da2da..d699fc51e819a 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -13,6 +13,12 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.OpenSearchTimeoutException; import org.opensearch.action.ActionListener; +import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsAction; +import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsRequest; +import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsResponse; +import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsAction; +import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsRequest; +import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsResponse; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateObserver; import org.opensearch.cluster.ClusterStateTaskConfig; @@ -26,17 +32,24 @@ import org.opensearch.cluster.routing.allocation.AllocationService; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.Priority; +import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.unit.TimeValue; import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.Transport; +import org.opensearch.transport.TransportException; +import org.opensearch.transport.TransportResponseHandler; +import org.opensearch.transport.TransportService; +import java.io.IOException; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.function.Predicate; /** - * Helper executor class to remove list of nodes from the cluster + * Helper controller class to remove list of nodes from the cluster and update status * * @opensearch.internal */ @@ -47,18 +60,83 @@ public class DecommissionController { private final NodeRemovalClusterStateTaskExecutor nodeRemovalExecutor; private final ClusterService clusterService; + private final TransportService transportService; private final ThreadPool threadPool; DecommissionController( ClusterService clusterService, + TransportService transportService, AllocationService allocationService, ThreadPool threadPool ) { this.clusterService = clusterService; + this.transportService = transportService; this.nodeRemovalExecutor = new NodeRemovalClusterStateTaskExecutor(allocationService, logger); this.threadPool = threadPool; } + public void excludeDecommissionedNodesFromVotingConfig(Set nodes, ActionListener listener) { + transportService.sendRequest( + transportService.getLocalNode(), + AddVotingConfigExclusionsAction.NAME, + new AddVotingConfigExclusionsRequest(nodes.stream().toArray(String[] :: new)), + new TransportResponseHandler() { + @Override + public void handleResponse(AddVotingConfigExclusionsResponse response) { + listener.onResponse(null); + } + + @Override + public void handleException(TransportException exp) { + listener.onFailure(exp); + } + + @Override + public String executor() { + return ThreadPool.Names.SAME; + } + + @Override + public AddVotingConfigExclusionsResponse read(StreamInput in) throws IOException { + return new AddVotingConfigExclusionsResponse(in); + } + } + ); + } + + public void clearVotingConfigExclusion(ActionListener listener) { + final ClearVotingConfigExclusionsRequest clearVotingConfigExclusionsRequest = new ClearVotingConfigExclusionsRequest(); + clearVotingConfigExclusionsRequest.setWaitForRemoval(true); + transportService.sendRequest( + transportService.getLocalNode(), + ClearVotingConfigExclusionsAction.NAME, + clearVotingConfigExclusionsRequest, + new TransportResponseHandler() { + @Override + public void handleResponse(ClearVotingConfigExclusionsResponse response) { + logger.info("successfully cleared voting config after decommissioning"); + listener.onResponse(null); + } + + @Override + public void handleException(TransportException exp) { + logger.debug(new ParameterizedMessage("failure in clearing voting config exclusion after decommissioning"), exp); + listener.onFailure(exp); + } + + @Override + public String executor() { + return ThreadPool.Names.SAME; + } + + @Override + public ClearVotingConfigExclusionsResponse read(StreamInput in) throws IOException { + return new ClearVotingConfigExclusionsResponse(in); + } + } + ); + } + public void handleNodesDecommissionRequest( Set nodesToBeDecommissioned, String reason, diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index d935bc02b71d4..729ec944560d4 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -12,16 +12,11 @@ import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.action.ActionListener; -import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsAction; -import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsRequest; -import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsResponse; -import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsAction; -import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsRequest; -import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsResponse; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; +import org.opensearch.cluster.coordination.CoordinationMetadata; import org.opensearch.cluster.metadata.DecommissionAttributeMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; @@ -38,14 +33,20 @@ import org.opensearch.transport.TransportException; import org.opensearch.transport.TransportResponseHandler; import org.opensearch.transport.TransportService; +import org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfigExclusion; import java.io.IOException; -import java.util.ArrayList; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.function.Predicate; +import java.util.stream.Collectors; + +import static org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING; +import static org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING; /** * Service responsible for entire lifecycle of decommissioning and recommissioning an awareness attribute. @@ -72,6 +73,7 @@ public class DecommissionService { private final ThreadPool threadPool; private final DecommissionController decommissionController; private volatile List awarenessAttributes; + private volatile Map> forcedAwarenessAttributes; @Inject public DecommissionService( @@ -87,110 +89,100 @@ public DecommissionService( this.threadPool = threadPool; this.decommissionController = new DecommissionController( clusterService, + transportService, allocationService, threadPool ); - this.awarenessAttributes = AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.get(settings); + this.awarenessAttributes = CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.get(settings); clusterSettings.addSettingsUpdateConsumer( - AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING, + CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING, this::setAwarenessAttributes ); - } - List getAwarenessAttributes() { - return awarenessAttributes; + setForcedAwarenessAttributes(CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING.get(settings)); + clusterSettings.addSettingsUpdateConsumer( + CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING, + this::setForcedAwarenessAttributes + ); } private void setAwarenessAttributes(List awarenessAttributes) { this.awarenessAttributes = awarenessAttributes; } + private void setForcedAwarenessAttributes(Settings forceSettings) { + Map> forcedAwarenessAttributes = new HashMap<>(); + Map forceGroups = forceSettings.getAsGroups(); + for (Map.Entry entry : forceGroups.entrySet()) { + List aValues = entry.getValue().getAsList("values"); + if (aValues.size() > 0) { + forcedAwarenessAttributes.put(entry.getKey(), aValues); + } + } + this.forcedAwarenessAttributes = forcedAwarenessAttributes; + } + public void initiateAttributeDecommissioning( final DecommissionAttribute decommissionAttribute, final ActionListener listener, ClusterState state ) { - validateAwarenessAttribute(decommissionAttribute, getAwarenessAttributes()); + validateAwarenessAttribute(decommissionAttribute, awarenessAttributes, forcedAwarenessAttributes); logger.info("initiating awareness attribute [{}] decommissioning", decommissionAttribute.toString()); + + // remove all decommissioned cluster manager eligible nodes from voting config + // The method ensures that we don't exclude nodes multiple times excludeDecommissionedClusterManagerNodesFromVotingConfig(decommissionAttribute); - registerDecommissionAttribute(decommissionAttribute, listener); + + // explicitly throwing NotClusterManagerException as we can certainly say the local cluster manager node will + // be abdicated and soon will no longer be cluster manager. + if(transportService.getLocalNode().isClusterManagerNode() + && !nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute)) { + registerDecommissionAttribute(decommissionAttribute, listener); + } else { + throw new NotClusterManagerException( + "node [" + + transportService.getLocalNode().toString() + + "] not eligible to execute decommission request. Will retry until timeout." + ); + } } private void excludeDecommissionedClusterManagerNodesFromVotingConfig(DecommissionAttribute decommissionAttribute) { - final Predicate shouldDecommissionPredicate = discoveryNode -> nodeHasDecommissionedAttribute( - discoveryNode, - decommissionAttribute + Set clusterManagerNodesToBeDecommissioned = nodesWithDecommissionAttribute( + clusterService.state(), decommissionAttribute, true ); - List clusterManagerNodesToBeDecommissioned = new ArrayList<>(); - Iterator clusterManagerNodesIter = clusterService.state().nodes().getClusterManagerNodes().valuesIt(); - while (clusterManagerNodesIter.hasNext()) { - final DiscoveryNode node = clusterManagerNodesIter.next(); - if (shouldDecommissionPredicate.test(node)) { - clusterManagerNodesToBeDecommissioned.add(node.getName()); - } + Set clusterManagerNodesNameToBeDecommissioned = clusterManagerNodesToBeDecommissioned.stream() + .map(DiscoveryNode::getName) + .collect(Collectors.toSet()); + + Set currentVotingConfigExclusions = clusterService.getClusterApplierService().state().coordinationMetadata().getVotingConfigExclusions(); + Set excludedNodesName = currentVotingConfigExclusions.stream().map(VotingConfigExclusion::getNodeName).collect(Collectors.toSet()); + + // check if the to-be-excluded nodes are excluded. If yes, we don't need to exclude them again + if (clusterManagerNodesNameToBeDecommissioned.size() == 0 + || (clusterManagerNodesNameToBeDecommissioned.size() == excludedNodesName.size() + && excludedNodesName.containsAll(clusterManagerNodesNameToBeDecommissioned))) { + return; } - transportService.sendRequest( - transportService.getLocalNode(), - AddVotingConfigExclusionsAction.NAME, - new AddVotingConfigExclusionsRequest(clusterManagerNodesToBeDecommissioned.toArray(String[]::new)), - new TransportResponseHandler() { + decommissionController.excludeDecommissionedNodesFromVotingConfig( + clusterManagerNodesNameToBeDecommissioned, + new ActionListener() { @Override - public void handleResponse(AddVotingConfigExclusionsResponse response) { + public void onResponse(Void unused) { logger.info( - "successfully removed decommissioned cluster manager eligible nodes [{}] from voting config, " - + "proceeding to drain the decommissioned nodes", - clusterManagerNodesToBeDecommissioned.toString() + "successfully removed decommissioned cluster manager eligible nodes [{}] from voting config " + , clusterManagerNodesToBeDecommissioned.toString() ); } @Override - public void handleException(TransportException exp) { + public void onFailure(Exception e) { logger.debug( new ParameterizedMessage("failure in removing decommissioned cluster manager eligible nodes from voting config"), - exp + e ); } - - @Override - public String executor() { - return ThreadPool.Names.SAME; - } - - @Override - public AddVotingConfigExclusionsResponse read(StreamInput in) throws IOException { - return new AddVotingConfigExclusionsResponse(in); - } - } - ); - } - - private void clearVotingConfigAfterSuccessfulDecommission() { - final ClearVotingConfigExclusionsRequest clearVotingConfigExclusionsRequest = new ClearVotingConfigExclusionsRequest(); - clearVotingConfigExclusionsRequest.setWaitForRemoval(true); - transportService.sendRequest( - transportService.getLocalNode(), - ClearVotingConfigExclusionsAction.NAME, - clearVotingConfigExclusionsRequest, - new TransportResponseHandler() { - @Override - public void handleResponse(ClearVotingConfigExclusionsResponse response) { - logger.info("successfully cleared voting config after decommissioning"); - } - - @Override - public void handleException(TransportException exp) { - logger.debug(new ParameterizedMessage("failure in clearing voting config exclusion after decommissioning"), exp); - } - - @Override - public String executor() { - return ThreadPool.Names.SAME; - } - - @Override - public ClearVotingConfigExclusionsResponse read(StreamInput in) throws IOException { - return new ClearVotingConfigExclusionsResponse(in); - } } ); } @@ -210,14 +202,6 @@ private void registerDecommissionAttribute( final DecommissionAttribute decommissionAttribute, final ActionListener listener ) { - if (!transportService.getLocalNode().isClusterManagerNode() - || nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute)) { - throw new NotClusterManagerException( - "node [" - + transportService.getLocalNode().toString() - + "] not eligible to execute decommission request. Will retry until timeout." - ); - } clusterService.submitStateUpdateTask( "put_decommission [" + decommissionAttribute + "]", new ClusterStateUpdateTask(Priority.URGENT) { @@ -328,7 +312,7 @@ public void onFailure(Exception e) { // execute nodes decommissioning and wait for it to complete decommissionController.handleNodesDecommissionRequest( - nodesWithDecommissionAttribute(state, decommissionAttribute), + nodesWithDecommissionAttribute(state, decommissionAttribute, false), "nodes-decommissioned", TimeValue.timeValueSeconds(30L), new ActionListener() { @@ -349,20 +333,25 @@ public void onFailure(Exception e) { } } ); - clearVotingConfigAfterSuccessfulDecommission(); +// decommissionController.clearVotingConfigExclusion(); } - - private Set nodesWithDecommissionAttribute(ClusterState clusterState, DecommissionAttribute decommissionAttribute) { + public Set nodesWithDecommissionAttribute( + ClusterState clusterState, + DecommissionAttribute decommissionAttribute, + boolean onlyClusterManagerNodes + ) { Set nodesWithDecommissionAttribute = new HashSet<>(); - final Predicate shouldRemoveNodePredicate = discoveryNode -> nodeHasDecommissionedAttribute( + final Predicate shouldDecommissionNodePredicate = discoveryNode -> nodeHasDecommissionedAttribute( discoveryNode, decommissionAttribute ); - Iterator nodesIter = clusterState.nodes().getNodes().valuesIt(); + Iterator nodesIter = onlyClusterManagerNodes? clusterState.nodes().getClusterManagerNodes().valuesIt() : + clusterState.nodes().getNodes().valuesIt(); + while (nodesIter.hasNext()) { final DiscoveryNode node = nodesIter.next(); - if (shouldRemoveNodePredicate.test(node)) { + if (shouldDecommissionNodePredicate.test(node)) { nodesWithDecommissionAttribute.add(node); } } @@ -373,11 +362,22 @@ private static boolean nodeHasDecommissionedAttribute(DiscoveryNode discoveryNod return discoveryNode.getAttributes().get(decommissionAttribute.attributeName()).equals(decommissionAttribute.attributeValue()); } - private static void validateAwarenessAttribute(final DecommissionAttribute decommissionAttribute, List awarenessAttributes) { + private static void validateAwarenessAttribute( + final DecommissionAttribute decommissionAttribute, + List awarenessAttributes, + Map> forcedAwarenessAttributes) { + if (awarenessAttributes == null || forcedAwarenessAttributes == null) { + throw new DecommissionFailedException(decommissionAttribute, "awareness attribute and forced awareness attribute not set to the cluster."); + } if (!awarenessAttributes.contains(decommissionAttribute.attributeName())) { throw new DecommissionFailedException(decommissionAttribute, "invalid awareness attribute requested for decommissioning"); } - // TODO - should attribute value be part of force zone values? If yes, read setting and throw exception if not found + if (!forcedAwarenessAttributes.get(decommissionAttribute.attributeName()).contains(decommissionAttribute.attributeValue()) ) { + throw new DecommissionFailedException( + decommissionAttribute, + "invalid awareness attribute value requested for decommissioning. Set forced awareness values before to decommission" + ); + } } private static void ensureNoAwarenessAttributeDecommissioned( diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java index 809a392d060e2..0f7f76fc5eeec 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java @@ -15,13 +15,19 @@ import org.opensearch.OpenSearchTimeoutException; import org.opensearch.Version; import org.opensearch.action.ActionListener; +import org.opensearch.action.admin.cluster.configuration.TransportAddVotingConfigExclusionsAction; import org.opensearch.action.admin.cluster.configuration.TransportAddVotingConfigExclusionsActionTests; +import org.opensearch.action.admin.cluster.configuration.TransportClearVotingConfigExclusionsAction; +import org.opensearch.action.support.ActionFilters; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfigExclusion; import org.opensearch.cluster.ClusterStateObserver; import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; +import org.opensearch.cluster.coordination.CoordinationMetadata; import org.opensearch.cluster.metadata.DecommissionAttributeMetadata; +import org.opensearch.cluster.metadata.IndexNameExpressionResolver; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodeRole; @@ -29,15 +35,21 @@ import org.opensearch.cluster.routing.allocation.AllocationService; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.collect.ImmutableOpenMap; +import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.test.transport.MockTransport; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportService; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.CountDownLatch; @@ -49,6 +61,7 @@ import static java.util.Collections.emptySet; import static java.util.Collections.singletonMap; import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.sameInstance; import static org.hamcrest.Matchers.startsWith; @@ -58,6 +71,7 @@ import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import static org.opensearch.action.admin.cluster.configuration.TransportAddVotingConfigExclusionsAction.MAXIMUM_VOTING_CONFIG_EXCLUSIONS_SETTING; import static org.opensearch.cluster.ClusterState.builder; import static org.opensearch.cluster.OpenSearchAllocationTestCase.createAllocationService; import static org.opensearch.test.ClusterServiceUtils.createClusterService; @@ -67,8 +81,10 @@ public class DecommissionControllerTests extends OpenSearchTestCase { private ThreadPool threadPool; private ClusterService clusterService; + private TransportService transportService; private AllocationService allocationService; private DecommissionController decommissionController; + private ClusterSettings clusterSettings; @Override public void setUp() throws Exception { @@ -79,7 +95,7 @@ public void setUp() throws Exception { } @Before - public void setDefaultClusterState() { + public void setTransportServiceAndDefaultClusterState() { ClusterState clusterState = ClusterState.builder(new ClusterName("test")).build(); logger.info("--> adding five nodes on same zone_1"); clusterState = addNodes(clusterState, "zone_1", "node1", "node2", "node3", "node4", "node5"); @@ -88,12 +104,47 @@ public void setDefaultClusterState() { logger.info("--> adding five nodes on same zone_3"); clusterState = addNodes(clusterState, "zone_3", "node11", "node12", "node13", "node14", "node15"); clusterState = setLocalNodeAsClusterManagerNode(clusterState, "node1"); + clusterState = setThreeNodesInVotingConfig(clusterState); final ClusterState.Builder builder = builder(clusterState); setState( clusterService, builder ); - decommissionController = new DecommissionController(clusterService, allocationService, threadPool); + final MockTransport transport = new MockTransport(); + transportService = transport.createTransportService( + Settings.EMPTY, + threadPool, + TransportService.NOOP_TRANSPORT_INTERCEPTOR, + boundTransportAddress -> clusterService.state().nodes().get("node1"), + null, + emptySet() + ); + + final Settings.Builder nodeSettingsBuilder = Settings.builder(); + final Settings nodeSettings = nodeSettingsBuilder.build(); + clusterSettings = new ClusterSettings(nodeSettings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + + new TransportAddVotingConfigExclusionsAction( + nodeSettings, + clusterSettings, + transportService, + clusterService, + threadPool, + new ActionFilters(emptySet()), + new IndexNameExpressionResolver(new ThreadContext(Settings.EMPTY)) + ); // registers action + + new TransportClearVotingConfigExclusionsAction( + transportService, + clusterService, + threadPool, + new ActionFilters(emptySet()), + new IndexNameExpressionResolver(new ThreadContext(Settings.EMPTY)) + ); // registers action + + transportService.start(); + transportService.acceptIncomingRequests(); + decommissionController = new DecommissionController(clusterService, transportService, allocationService, threadPool); } @After @@ -102,6 +153,48 @@ public void shutdownThreadPoolAndClusterService() { threadPool.shutdown(); } + public void testAddNodesToVotingConfigExclusion() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + Set nodesToRemoveFromVotingConfig = Collections.singleton(randomFrom("node1", "node6", "node11")); + decommissionController.excludeDecommissionedNodesFromVotingConfig( + nodesToRemoveFromVotingConfig, + new ActionListener() { + @Override + public void onResponse(Void unused) { + countDownLatch.countDown(); + } + + @Override + public void onFailure(Exception e) { + fail("unexpected failure occurred while removing node from voting config " + e); + } + } + ); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + clusterService.getClusterApplierService().state().getVotingConfigExclusions().forEach(vce -> { + assertTrue(nodesToRemoveFromVotingConfig.contains(vce.getNodeName())); + assertEquals(nodesToRemoveFromVotingConfig.size(), 1); + }); + } + + public void testClearVotingConfigExclusions() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + decommissionController.clearVotingConfigExclusion(new ActionListener() { + @Override + public void onResponse(Void unused) { + countDownLatch.countDown(); + } + + @Override + public void onFailure(Exception e) { + fail("unexpected failure occurred while clearing voting config exclusion" + e); + } + }); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + assertThat(clusterService.getClusterApplierService().state().getVotingConfigExclusions(), empty()); + } + + public void testNodesRemovedForDecommissionRequestSuccessfulResponse() throws InterruptedException { final CountDownLatch countDownLatch = new CountDownLatch(1); Set nodesToBeRemoved = new HashSet<>(); @@ -217,6 +310,21 @@ private ClusterState setLocalNodeAsClusterManagerNode(ClusterState clusterState, return clusterState; } + private ClusterState setThreeNodesInVotingConfig(ClusterState clusterState) { + final CoordinationMetadata.VotingConfiguration votingConfiguration = CoordinationMetadata.VotingConfiguration.of( + clusterState.nodes().get("node1"), clusterState.nodes().get("node6"), clusterState.nodes().get("node11") + ); + + Metadata.Builder builder = Metadata.builder().coordinationMetadata( + CoordinationMetadata.builder() + .lastAcceptedConfiguration(votingConfiguration) + .lastCommittedConfiguration(votingConfiguration) + .build() + ); + clusterState = ClusterState.builder(clusterState).metadata(builder).build(); + return clusterState; + } + private static DiscoveryNode newNode(String nodeId, Map attributes) { return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, CLUSTER_MANAGER_DATA_ROLE, Version.CURRENT); } diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java index ea4aee984df98..f2baea304344a 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java @@ -8,7 +8,179 @@ package org.opensearch.cluster.decommission; +import org.hamcrest.Matchers; +import org.junit.After; +import org.junit.Before; +import org.opensearch.Version; +import org.opensearch.action.ActionListener; +import org.opensearch.action.search.CreatePitController; +import org.opensearch.cluster.ClusterName; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ClusterStateObserver; +import org.opensearch.cluster.ack.ClusterStateUpdateResponse; +import org.opensearch.cluster.coordination.CoordinationMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider; +import org.opensearch.cluster.routing.allocation.decider.ClusterRebalanceAllocationDecider; +import org.opensearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.test.transport.MockTransport; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportService; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import static java.util.Collections.emptySet; +import static java.util.Collections.singletonMap; +import static org.mockito.ArgumentMatchers.startsWith; +import static org.mockito.Mockito.mock; +import static org.opensearch.cluster.ClusterState.builder; +import static org.opensearch.cluster.OpenSearchAllocationTestCase.createAllocationService; +import static org.opensearch.cluster.coordination.NoClusterManagerBlockService.NO_CLUSTER_MANAGER_BLOCK_SETTING; +import static org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING; +import static org.opensearch.test.ClusterServiceUtils.createClusterService; +import static org.opensearch.test.ClusterServiceUtils.setState; public class DecommissionServiceTests extends OpenSearchTestCase { + + private ThreadPool threadPool; + private ClusterService clusterService; + private TransportService transportService; + private AllocationService allocationService; + private DecommissionService decommissionService; + private ClusterSettings clusterSettings; + + @Override + public void setUp() throws Exception { + super.setUp(); + super.setUp(); + threadPool = new TestThreadPool("test", Settings.EMPTY); + clusterService = createClusterService(threadPool); + allocationService = createAllocationService(); + } + + @Before + public void setUpService() { + ClusterState clusterState = ClusterState.builder(new ClusterName("test")).build(); + logger.info("--> adding five nodes on same zone_1"); + clusterState = addNodes(clusterState, "zone_1", "node1", "node2", "node3", "node4", "node5"); + logger.info("--> adding five nodes on same zone_2"); + clusterState = addNodes(clusterState, "zone_2", "node6", "node7", "node8", "node9", "node10"); + logger.info("--> adding five nodes on same zone_3"); + clusterState = addNodes(clusterState, "zone_3", "node11", "node12", "node13", "node14", "node15"); + clusterState = setLocalNodeAsClusterManagerNode(clusterState, "node1"); + clusterState = setThreeNodesInVotingConfig(clusterState); + final ClusterState.Builder builder = builder(clusterState); + setState( + clusterService, + builder + ); + final MockTransport transport = new MockTransport(); + transportService = transport.createTransportService( + Settings.EMPTY, + threadPool, + TransportService.NOOP_TRANSPORT_INTERCEPTOR, + boundTransportAddress -> clusterService.state().nodes().get("node1"), + null, + emptySet() + ); + + final Settings.Builder nodeSettingsBuilder = Settings.builder() + .put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.getKey(), "zone") + .put("cluster.routing.allocation.awareness.force.zone.values", "zone_1,zone_2,zone_3"); + + clusterSettings = new ClusterSettings(nodeSettingsBuilder.build(), ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + transportService.start(); + transportService.acceptIncomingRequests(); + + this.decommissionService = new DecommissionService( + nodeSettingsBuilder.build(), + clusterSettings, + clusterService, + transportService, + threadPool, + allocationService + ); + } + + @After + public void shutdownThreadPoolAndClusterService() { + clusterService.stop(); + threadPool.shutdown(); + } + + @SuppressWarnings("unchecked") + public void testDecommissioningNotInitiatedForInvalidAttributeName() { + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("rack", "rack-a"); + ActionListener listener = mock(ActionListener.class); + DecommissionFailedException e = expectThrows(DecommissionFailedException.class, () -> { + decommissionService.initiateAttributeDecommissioning( + decommissionAttribute, listener, clusterService.state()); + }); + assertThat(e.getMessage(), Matchers.endsWith("invalid awareness attribute requested for decommissioning")); + } + + @SuppressWarnings("unchecked") + public void testDecommissioningNotInitiatedForInvalidAttributeValue() { + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "random"); + ActionListener listener = mock(ActionListener.class); + DecommissionFailedException e = expectThrows(DecommissionFailedException.class, () -> { + decommissionService.initiateAttributeDecommissioning( + decommissionAttribute, listener, clusterService.state()); + }); + assertThat( + e.getMessage(), + Matchers.endsWith("invalid awareness attribute value requested for decommissioning. Set forced awareness values before to decommission") + ); + } + + private ClusterState addNodes(ClusterState clusterState, String zone, String... nodeIds) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + org.opensearch.common.collect.List.of(nodeIds).forEach(nodeId -> nodeBuilder.add(newNode(nodeId, singletonMap("zone", zone)))); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + + private ClusterState setLocalNodeAsClusterManagerNode(ClusterState clusterState, String nodeId) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + nodeBuilder.localNodeId(nodeId); + nodeBuilder.clusterManagerNodeId(nodeId); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + + private ClusterState setThreeNodesInVotingConfig(ClusterState clusterState) { + final CoordinationMetadata.VotingConfiguration votingConfiguration = CoordinationMetadata.VotingConfiguration.of( + clusterState.nodes().get("node1"), clusterState.nodes().get("node6"), clusterState.nodes().get("node11") + ); + + Metadata.Builder builder = Metadata.builder().coordinationMetadata( + CoordinationMetadata.builder() + .lastAcceptedConfiguration(votingConfiguration) + .lastCommittedConfiguration(votingConfiguration) + .build() + ); + clusterState = ClusterState.builder(clusterState).metadata(builder).build(); + return clusterState; + } + + private static DiscoveryNode newNode(String nodeId, Map attributes) { + return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, CLUSTER_MANAGER_DATA_ROLE, Version.CURRENT); + } + + final private static Set CLUSTER_MANAGER_DATA_ROLE = Collections.unmodifiableSet( + new HashSet<>(Arrays.asList(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE)) + ); } From 4fa02f093b6aeb840e5de06a719655759080a4e7 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 30 Aug 2022 12:54:53 +0530 Subject: [PATCH 32/87] Add UT Signed-off-by: Rishab Nahata --- .../decommission/DecommissionService.java | 38 ++++++++++++------- .../DecommissionServiceTests.java | 25 ++++++++++++ 2 files changed, 50 insertions(+), 13 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 729ec944560d4..5e86954cc18b2 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -128,10 +128,13 @@ public void initiateAttributeDecommissioning( ClusterState state ) { validateAwarenessAttribute(decommissionAttribute, awarenessAttributes, forcedAwarenessAttributes); + DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().custom(DecommissionAttributeMetadata.TYPE); + ensureNoAwarenessAttributeDecommissioned(decommissionAttributeMetadata, decommissionAttribute); + logger.info("initiating awareness attribute [{}] decommissioning", decommissionAttribute.toString()); // remove all decommissioned cluster manager eligible nodes from voting config - // The method ensures that we don't exclude nodes multiple times + // The method ensures that we don't exclude same nodes multiple times excludeDecommissionedClusterManagerNodesFromVotingConfig(decommissionAttribute); // explicitly throwing NotClusterManagerException as we can certainly say the local cluster manager node will @@ -190,10 +193,10 @@ public void onFailure(Exception e) { /** * Registers new decommissioned attribute metadata in the cluster state with {@link DecommissionStatus#DECOMMISSION_INIT} *

- * This method can be only called on the cluster-manager node. It tries to create a new decommissioned attribute on the master + * This method can be only called on the cluster-manager node. It tries to create a new decommissioned attribute on the cluster manager * and if it was successful it adds new decommissioned attribute to cluster metadata. *

- * This method ensures that request is performed only on eligible cluster manager node + * This method would only be executed on eligible cluster manager node * * @param decommissionAttribute register decommission attribute in the metadata request * @param listener register decommission listener @@ -365,18 +368,27 @@ private static boolean nodeHasDecommissionedAttribute(DiscoveryNode discoveryNod private static void validateAwarenessAttribute( final DecommissionAttribute decommissionAttribute, List awarenessAttributes, - Map> forcedAwarenessAttributes) { - if (awarenessAttributes == null || forcedAwarenessAttributes == null) { - throw new DecommissionFailedException(decommissionAttribute, "awareness attribute and forced awareness attribute not set to the cluster."); + Map> forcedAwarenessAttributes + ) { + String msg = null; + if (awarenessAttributes == null) { + msg = "awareness attribute not set to the cluster."; } - if (!awarenessAttributes.contains(decommissionAttribute.attributeName())) { - throw new DecommissionFailedException(decommissionAttribute, "invalid awareness attribute requested for decommissioning"); + else if (forcedAwarenessAttributes == null) { + msg = "forced awareness attribute not set to the cluster."; } - if (!forcedAwarenessAttributes.get(decommissionAttribute.attributeName()).contains(decommissionAttribute.attributeValue()) ) { - throw new DecommissionFailedException( - decommissionAttribute, - "invalid awareness attribute value requested for decommissioning. Set forced awareness values before to decommission" - ); + else if (!awarenessAttributes.contains(decommissionAttribute.attributeName())) { + msg = "invalid awareness attribute requested for decommissioning"; + } + else if (!forcedAwarenessAttributes.containsKey(decommissionAttribute.attributeName())) { + msg = "forced awareness attribute [" + forcedAwarenessAttributes.toString() + "] doesn't have the decommissioning attribute"; + } + else if (!forcedAwarenessAttributes.get(decommissionAttribute.attributeName()).contains(decommissionAttribute.attributeValue()) ) { + msg = "invalid awareness attribute value requested for decommissioning. Set forced awareness values before to decommission"; + } + + if (msg != null) { + throw new DecommissionFailedException(decommissionAttribute, msg); } } diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java index f2baea304344a..5f55387745ca0 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java @@ -19,6 +19,7 @@ import org.opensearch.cluster.ClusterStateObserver; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; import org.opensearch.cluster.coordination.CoordinationMetadata; +import org.opensearch.cluster.metadata.DecommissionAttributeMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodeRole; @@ -146,6 +147,30 @@ public void testDecommissioningNotInitiatedForInvalidAttributeValue() { ); } + @SuppressWarnings("unchecked") + public void testDecommissioningNotInitiatedWhenAlreadyDecommissioned() { + DecommissionAttributeMetadata oldMetadata = new DecommissionAttributeMetadata( + new DecommissionAttribute("zone", "zone_1"), + DecommissionStatus.DECOMMISSION_IN_PROGRESS + ); + final ClusterState.Builder builder = builder(clusterService.state()); + setState( + clusterService, + builder.metadata( + Metadata.builder( + clusterService.state().metadata()).putCustom(DecommissionAttributeMetadata.TYPE, oldMetadata).build() + )); + ActionListener listener = mock(ActionListener.class); + DecommissionFailedException e = expectThrows(DecommissionFailedException.class, () -> { + decommissionService.initiateAttributeDecommissioning( + new DecommissionAttribute("zone", "zone_2"), listener, clusterService.state()); + }); + assertThat( + e.getMessage(), + Matchers.endsWith("one awareness attribute already decommissioned, recommission before triggering another decommission") + ); + } + private ClusterState addNodes(ClusterState clusterState, String zone, String... nodeIds) { DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); org.opensearch.common.collect.List.of(nodeIds).forEach(nodeId -> nodeBuilder.add(newNode(nodeId, singletonMap("zone", zone)))); From 447084fe4bd40b51a60bcd88855cd512be818363 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 30 Aug 2022 13:02:59 +0530 Subject: [PATCH 33/87] Fix decommission initiation Signed-off-by: Rishab Nahata --- .../opensearch/cluster/decommission/DecommissionService.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 5e86954cc18b2..8ffa388bf414f 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -127,8 +127,11 @@ public void initiateAttributeDecommissioning( final ActionListener listener, ClusterState state ) { + // validates if the correct awareness attributes and forced awareness attribute set to the cluster before initiating decommission action validateAwarenessAttribute(decommissionAttribute, awarenessAttributes, forcedAwarenessAttributes); + DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().custom(DecommissionAttributeMetadata.TYPE); + // validates that there's no inflight decommissioning or already executed decommission in place ensureNoAwarenessAttributeDecommissioned(decommissionAttributeMetadata, decommissionAttribute); logger.info("initiating awareness attribute [{}] decommissioning", decommissionAttribute.toString()); @@ -168,6 +171,7 @@ private void excludeDecommissionedClusterManagerNodesFromVotingConfig(Decommissi && excludedNodesName.containsAll(clusterManagerNodesNameToBeDecommissioned))) { return; } + // send a transport request to exclude to-be-decommissioned cluster manager eligible nodes from voting config decommissionController.excludeDecommissionedNodesFromVotingConfig( clusterManagerNodesNameToBeDecommissioned, new ActionListener() { From 55238eefbcffd50d377fb572d5afb24b3a28db2d Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 30 Aug 2022 14:17:59 +0530 Subject: [PATCH 34/87] Changes Signed-off-by: Rishab Nahata --- .../decommission/DecommissionService.java | 118 +++++++++--------- 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 8ffa388bf414f..16c77a758a433 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -214,43 +214,37 @@ private void registerDecommissionAttribute( new ClusterStateUpdateTask(Priority.URGENT) { @Override public ClusterState execute(ClusterState currentState) { - logger.info( - "registering decommission metadata for attribute [{}] with status as [{}]", - decommissionAttribute.toString(), - DecommissionStatus.DECOMMISSION_INIT - ); Metadata metadata = currentState.metadata(); Metadata.Builder mdBuilder = Metadata.builder(metadata); DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); ensureNoAwarenessAttributeDecommissioned(decommissionAttributeMetadata, decommissionAttribute); decommissionAttributeMetadata = new DecommissionAttributeMetadata(decommissionAttribute); mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); + logger.info( + "registering decommission metadata for attribute [{}] with status as [{}]", + decommissionAttribute.toString(), + DecommissionStatus.DECOMMISSION_INIT + ); return ClusterState.builder(currentState).metadata(mdBuilder).build(); } @Override public void onFailure(String source, Exception e) { if (e instanceof DecommissionFailedException) { - logger.error( - () -> new ParameterizedMessage("failed to decommission attribute [{}]", decommissionAttribute.toString()), - e - ); + logger.error(() -> new ParameterizedMessage("failed to decommission attribute [{}]", decommissionAttribute.toString()), e); listener.onFailure(e); } else if (e instanceof NotClusterManagerException) { logger.debug( () -> new ParameterizedMessage( "cluster-manager updated while executing request for decommission attribute [{}]", decommissionAttribute.toString() - ), - e + ), e ); + // we don't want to send the failure response to the listener here as the request will be retried } else { - logger.error( - () -> new ParameterizedMessage( - "failed to initiate decommissioning for attribute [{}]", - decommissionAttribute.toString() - ), - e + logger.error(() -> new ParameterizedMessage( + "failed to initiate decommissioning for attribute [{}]", decommissionAttribute.toString() + ), e ); listener.onFailure(e); } @@ -270,30 +264,29 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS } private void initiateGracefulDecommission() { - // maybe create a supplier for status update listener? - ActionListener listener = new ActionListener<>() { - @Override - public void onResponse(Void unused) { - logger.info( - "updated decommission status to [{}], weighing away awareness attribute for graceful shutdown", - DecommissionStatus.DECOMMISSION_IN_PROGRESS - ); - failDecommissionedNodes(clusterService.getClusterApplierService().state()); - } - - @Override - public void onFailure(Exception e) { - logger.error( - () -> new ParameterizedMessage( - "failed to update decommission status to [{}], will not proceed with decommission", + decommissionController.updateMetadataWithDecommissionStatus( + DecommissionStatus.DECOMMISSION_IN_PROGRESS, + new ActionListener() { + @Override + public void onResponse(Void unused) { + logger.info( + "updated decommission status to [{}], weighing away awareness attribute for graceful shutdown", DecommissionStatus.DECOMMISSION_IN_PROGRESS - ), - e - ); + ); + // TODO - should trigger weigh away here and on successful weigh away -> fail the decommissioned nodes + failDecommissionedNodes(clusterService.getClusterApplierService().state()); + } + + @Override + public void onFailure(Exception e) { + logger.error(() -> new ParameterizedMessage( + "failed to update decommission status to [{}], will not proceed with decommission", + DecommissionStatus.DECOMMISSION_IN_PROGRESS + ), e + ); + } } - }; - decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_IN_PROGRESS, listener); - // TODO - code for graceful decommission + ); } private void failDecommissionedNodes(ClusterState state) { @@ -302,13 +295,31 @@ private void failDecommissionedNodes(ClusterState state) { : "unexpected status encountered while decommissioning nodes"; DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); - ActionListener statusUpdateListener = new ActionListener<>() { + // execute nodes decommissioning + decommissionController.handleNodesDecommissionRequest( + nodesWithDecommissionAttribute(state, decommissionAttribute, false), + "nodes-decommissioned", + TimeValue.timeValueSeconds(30L), // TODO - read timeout from request while integrating with API + new ActionListener() { + @Override + public void onResponse(Void unused) { + clearVotingConfigExclusionAndUpdateStatus(true); + } + + @Override + public void onFailure(Exception e) { + clearVotingConfigExclusionAndUpdateStatus(false); + } + } + ); + } + + private void clearVotingConfigExclusionAndUpdateStatus(boolean decommissionSuccessful) { + ActionListener statusUpdateListener = new ActionListener() { @Override public void onResponse(Void unused) { - logger.info( - "updated decommission status to [{}], decommissioning completed.", - DecommissionStatus.DECOMMISSION_SUCCESSFUL - ); + logger.info("successful updated decommission status with [{}]", + decommissionSuccessful ? DecommissionStatus.DECOMMISSION_SUCCESSFUL : DecommissionStatus.DECOMMISSION_FAILED); } @Override @@ -316,31 +327,20 @@ public void onFailure(Exception e) { logger.error("failed to update the decommission status"); } }; - - // execute nodes decommissioning and wait for it to complete - decommissionController.handleNodesDecommissionRequest( - nodesWithDecommissionAttribute(state, decommissionAttribute, false), - "nodes-decommissioned", - TimeValue.timeValueSeconds(30L), + decommissionController.clearVotingConfigExclusion( new ActionListener() { @Override public void onResponse(Void unused) { - decommissionController.updateMetadataWithDecommissionStatus( - DecommissionStatus.DECOMMISSION_SUCCESSFUL, - statusUpdateListener - ); + DecommissionStatus updateStatusWith = decommissionSuccessful? DecommissionStatus.DECOMMISSION_SUCCESSFUL : DecommissionStatus.DECOMMISSION_FAILED; + decommissionController.updateMetadataWithDecommissionStatus(updateStatusWith, statusUpdateListener); } @Override public void onFailure(Exception e) { - decommissionController.updateMetadataWithDecommissionStatus( - DecommissionStatus.DECOMMISSION_FAILED, - statusUpdateListener - ); + decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_FAILED, statusUpdateListener); } } ); -// decommissionController.clearVotingConfigExclusion(); } public Set nodesWithDecommissionAttribute( From 0d850a3286a3dc94cb7bc6ecb6eae3c6829564b7 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 30 Aug 2022 15:09:26 +0530 Subject: [PATCH 35/87] Move DecommissionAttributeMetadata to decommission package Signed-off-by: Rishab Nahata --- .../java/org/opensearch/cluster/ClusterModule.java | 2 +- .../cluster/coordination/JoinTaskExecutor.java | 6 +++--- .../DecommissionAttributeMetadata.java | 5 +++-- .../cluster/decommission/DecommissionController.java | 1 - .../cluster/decommission/DecommissionService.java | 1 - .../cluster/coordination/JoinTaskExecutorTests.java | 12 +++++------- .../decommission/DecommissionControllerTests.java | 1 - .../decommission/DecommissionServiceTests.java | 3 +-- ...ommissionAttributeMetadataSerializationTests.java | 1 + .../metadata/DecommissionAttributeMetadataTests.java | 1 + .../DecommissionAttributeMetadataXContentTests.java | 1 + 11 files changed, 16 insertions(+), 18 deletions(-) rename server/src/main/java/org/opensearch/cluster/{metadata => decommission}/DecommissionAttributeMetadata.java (98%) diff --git a/server/src/main/java/org/opensearch/cluster/ClusterModule.java b/server/src/main/java/org/opensearch/cluster/ClusterModule.java index 115b9bdf3d8d6..892e65e2ee5b4 100644 --- a/server/src/main/java/org/opensearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/opensearch/cluster/ClusterModule.java @@ -35,10 +35,10 @@ import org.opensearch.cluster.action.index.MappingUpdatedAction; import org.opensearch.cluster.action.index.NodeMappingRefreshAction; import org.opensearch.cluster.action.shard.ShardStateAction; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; import org.opensearch.cluster.metadata.ComponentTemplateMetadata; import org.opensearch.cluster.metadata.ComposableIndexTemplateMetadata; import org.opensearch.cluster.metadata.DataStreamMetadata; -import org.opensearch.cluster.metadata.DecommissionAttributeMetadata; import org.opensearch.cluster.metadata.IndexGraveyard; import org.opensearch.cluster.metadata.IndexNameExpressionResolver; import org.opensearch.cluster.metadata.Metadata; diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index 7008474222aef..a104499109683 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -40,9 +40,9 @@ import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.block.ClusterBlocks; import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; import org.opensearch.cluster.decommission.DecommissionStatus; import org.opensearch.cluster.decommission.NodeDecommissionedException; -import org.opensearch.cluster.metadata.DecommissionAttributeMetadata; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; @@ -476,7 +476,7 @@ public static void ensureMajorVersionBarrier(Version joiningNodeVersion, Version } } - public static void ensureNodeNotDecommissioned(DiscoveryNode node, Metadata metadata) { + public static void ensureNodeCommissioned(DiscoveryNode node, Metadata metadata) { DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); if (decommissionAttributeMetadata != null) { DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); @@ -506,7 +506,7 @@ public static Collection> addBuiltInJoin validators.add((node, state) -> { ensureNodesCompatibility(node.getVersion(), state.getNodes()); ensureIndexCompatibility(node.getVersion(), state.getMetadata()); - ensureNodeNotDecommissioned(node, state.getMetadata()); + ensureNodeCommissioned(node, state.getMetadata()); }); validators.addAll(onJoinValidators); return Collections.unmodifiableCollection(validators); diff --git a/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java similarity index 98% rename from server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java rename to server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java index 2034ab34e25c3..72bdfbdca78d3 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.cluster.metadata; +package org.opensearch.cluster.decommission; import org.opensearch.OpenSearchParseException; import org.opensearch.Version; @@ -14,6 +14,7 @@ import org.opensearch.cluster.NamedDiff; import org.opensearch.cluster.decommission.DecommissionAttribute; import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.metadata.Metadata.Custom; import org.opensearch.common.Strings; import org.opensearch.common.io.stream.StreamInput; @@ -162,7 +163,7 @@ public static DecommissionAttributeMetadata fromXContent(XContentParser parser) ); } decommissionAttribute = new DecommissionAttribute(fieldName, value); - token = parser.nextToken(); + parser.nextToken(); } else { throw new OpenSearchParseException("failed to parse attribute type [{}], unexpected type", attributeType); } diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index d699fc51e819a..4f81ddf1e32ff 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -26,7 +26,6 @@ import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; import org.opensearch.cluster.coordination.NodeRemovalClusterStateTaskExecutor; -import org.opensearch.cluster.metadata.DecommissionAttributeMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.allocation.AllocationService; diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 16c77a758a433..70a3b18519e89 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -17,7 +17,6 @@ import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; import org.opensearch.cluster.coordination.CoordinationMetadata; -import org.opensearch.cluster.metadata.DecommissionAttributeMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.allocation.AllocationService; diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java index a3c3b9a9b9c7b..9e2d0cc3a7fc4 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java @@ -37,10 +37,9 @@ import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateTaskExecutor; import org.opensearch.cluster.decommission.DecommissionAttribute; -import org.opensearch.cluster.decommission.DecommissionFailedException; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; import org.opensearch.cluster.decommission.DecommissionStatus; import org.opensearch.cluster.decommission.NodeDecommissionedException; -import org.opensearch.cluster.metadata.DecommissionAttributeMetadata; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; @@ -58,7 +57,6 @@ import java.util.HashSet; import java.util.Map; -import static java.util.Collections.emptyMap; import static org.hamcrest.Matchers.is; import static org.opensearch.test.VersionUtils.allVersions; import static org.opensearch.test.VersionUtils.maxCompatibleVersion; @@ -231,7 +229,7 @@ public void testJoinClusterWithNoDecommission() { Metadata.Builder metaBuilder = Metadata.builder(); Metadata metadata = metaBuilder.build(); DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-2")); - JoinTaskExecutor.ensureNodeNotDecommissioned(discoveryNode, metadata); + JoinTaskExecutor.ensureNodeCommissioned(discoveryNode, metadata); } public void testPreventJoinClusterWithDecommission() { @@ -252,7 +250,7 @@ public void testPreventJoinClusterWithDecommission() { DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-1")); expectThrows( NodeDecommissionedException.class, - () -> JoinTaskExecutor.ensureNodeNotDecommissioned(discoveryNode, metadata) + () -> JoinTaskExecutor.ensureNodeCommissioned(discoveryNode, metadata) ); } @@ -269,7 +267,7 @@ public void testJoinClusterWithDifferentDecommission() { Metadata metadata = metaBuilder.build(); DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-2")); - JoinTaskExecutor.ensureNodeNotDecommissioned(discoveryNode, metadata); + JoinTaskExecutor.ensureNodeCommissioned(discoveryNode, metadata); } public void testJoinClusterWithDecommissionFailedOrInitOrRecommission() { @@ -289,7 +287,7 @@ public void testJoinClusterWithDecommissionFailedOrInitOrRecommission() { Metadata metadata = metaBuilder.build(); DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-1")); - JoinTaskExecutor.ensureNodeNotDecommissioned(discoveryNode, metadata); + JoinTaskExecutor.ensureNodeCommissioned(discoveryNode, metadata); } private DiscoveryNode newDiscoveryNode(Map attributes) { diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java index 0f7f76fc5eeec..ee97f1768cd94 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java @@ -26,7 +26,6 @@ import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; import org.opensearch.cluster.coordination.CoordinationMetadata; -import org.opensearch.cluster.metadata.DecommissionAttributeMetadata; import org.opensearch.cluster.metadata.IndexNameExpressionResolver; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java index 5f55387745ca0..d93cd5ea51978 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java @@ -19,7 +19,6 @@ import org.opensearch.cluster.ClusterStateObserver; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; import org.opensearch.cluster.coordination.CoordinationMetadata; -import org.opensearch.cluster.metadata.DecommissionAttributeMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodeRole; @@ -160,7 +159,7 @@ public void testDecommissioningNotInitiatedWhenAlreadyDecommissioned() { Metadata.builder( clusterService.state().metadata()).putCustom(DecommissionAttributeMetadata.TYPE, oldMetadata).build() )); - ActionListener listener = mock(ActionListener.class); + ActionListener listener = mock(ActionListener.class); DecommissionFailedException e = expectThrows(DecommissionFailedException.class, () -> { decommissionService.initiateAttributeDecommissioning( new DecommissionAttribute("zone", "zone_2"), listener, clusterService.state()); diff --git a/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataSerializationTests.java b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataSerializationTests.java index d81c05b8e8da0..5423c2ed672a3 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataSerializationTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataSerializationTests.java @@ -11,6 +11,7 @@ import org.opensearch.cluster.ClusterModule; import org.opensearch.cluster.Diff; import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; import org.opensearch.cluster.decommission.DecommissionStatus; import org.opensearch.common.io.stream.NamedWriteableRegistry; import org.opensearch.common.io.stream.Writeable; diff --git a/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataTests.java b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataTests.java index bff57daef6109..746d4565b0db3 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataTests.java @@ -9,6 +9,7 @@ package org.opensearch.cluster.metadata; import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; import org.opensearch.cluster.decommission.DecommissionStatus; import org.opensearch.common.io.stream.NamedWriteableRegistry; import org.opensearch.test.AbstractNamedWriteableTestCase; diff --git a/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataXContentTests.java b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataXContentTests.java index c632839acd4ca..030946f4510a1 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataXContentTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataXContentTests.java @@ -9,6 +9,7 @@ package org.opensearch.cluster.metadata; import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; import org.opensearch.cluster.decommission.DecommissionStatus; import org.opensearch.common.xcontent.XContentParser; import org.opensearch.test.AbstractXContentTestCase; From a6e542d9ae95988ddf266c6c5bdf6eee662846cf Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 30 Aug 2022 15:38:00 +0530 Subject: [PATCH 36/87] Update exception name Signed-off-by: Rishab Nahata --- .../java/org/opensearch/OpenSearchException.java | 5 +++-- .../cluster/decommission/DecommissionService.java | 12 +++--------- ...ion.java => DecommissioningFailedException.java} | 8 ++++---- .../org/opensearch/ExceptionSerializationTests.java | 4 ++-- .../decommission/DecommissionServiceTests.java | 13 +++---------- 5 files changed, 15 insertions(+), 27 deletions(-) rename server/src/main/java/org/opensearch/cluster/decommission/{DecommissionFailedException.java => DecommissioningFailedException.java} (78%) diff --git a/server/src/main/java/org/opensearch/OpenSearchException.java b/server/src/main/java/org/opensearch/OpenSearchException.java index 5ae83c9df70d3..7799aef7fab38 100644 --- a/server/src/main/java/org/opensearch/OpenSearchException.java +++ b/server/src/main/java/org/opensearch/OpenSearchException.java @@ -34,6 +34,7 @@ import org.opensearch.action.support.replication.ReplicationOperation; import org.opensearch.cluster.action.shard.ShardStateAction; +import org.opensearch.cluster.decommission.DecommissioningFailedException; import org.opensearch.common.CheckedFunction; import org.opensearch.common.Nullable; import org.opensearch.common.ParseField; @@ -1611,8 +1612,8 @@ private enum OpenSearchExceptionHandle { V_3_0_0 ), DECOMMISSION_FAILED_EXCEPTION( - org.opensearch.cluster.decommission.DecommissionFailedException.class, - org.opensearch.cluster.decommission.DecommissionFailedException::new, + org.opensearch.cluster.decommission.DecommissioningFailedException.class, + org.opensearch.cluster.decommission.DecommissioningFailedException::new, 163, V_2_3_0 ), diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 70a3b18519e89..eac713f4961b4 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -16,25 +16,19 @@ import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; -import org.opensearch.cluster.coordination.CoordinationMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.allocation.AllocationService; -import org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.Priority; import org.opensearch.common.inject.Inject; -import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.threadpool.ThreadPool; -import org.opensearch.transport.TransportException; -import org.opensearch.transport.TransportResponseHandler; import org.opensearch.transport.TransportService; import org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfigExclusion; -import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -229,7 +223,7 @@ public ClusterState execute(ClusterState currentState) { @Override public void onFailure(String source, Exception e) { - if (e instanceof DecommissionFailedException) { + if (e instanceof DecommissioningFailedException) { logger.error(() -> new ParameterizedMessage("failed to decommission attribute [{}]", decommissionAttribute.toString()), e); listener.onFailure(e); } else if (e instanceof NotClusterManagerException) { @@ -391,7 +385,7 @@ else if (!forcedAwarenessAttributes.get(decommissionAttribute.attributeName()).c } if (msg != null) { - throw new DecommissionFailedException(decommissionAttribute, msg); + throw new DecommissioningFailedException(decommissionAttribute, msg); } } @@ -402,7 +396,7 @@ private static void ensureNoAwarenessAttributeDecommissioned( // If the previous decommission request failed, we will allow the request to pass this check if (decommissionAttributeMetadata != null && !decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_FAILED)) { - throw new DecommissionFailedException( + throw new DecommissioningFailedException( decommissionAttribute, "one awareness attribute already decommissioned, recommission before triggering another decommission" ); diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionFailedException.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissioningFailedException.java similarity index 78% rename from server/src/main/java/org/opensearch/cluster/decommission/DecommissionFailedException.java rename to server/src/main/java/org/opensearch/cluster/decommission/DecommissioningFailedException.java index 3ba121dd90cee..fe1b9368ac712 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionFailedException.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissioningFailedException.java @@ -20,20 +20,20 @@ * @opensearch.internal */ -public class DecommissionFailedException extends OpenSearchException { +public class DecommissioningFailedException extends OpenSearchException { private final DecommissionAttribute decommissionAttribute; - public DecommissionFailedException(DecommissionAttribute decommissionAttribute, String msg) { + public DecommissioningFailedException(DecommissionAttribute decommissionAttribute, String msg) { this(decommissionAttribute, msg, null); } - public DecommissionFailedException(DecommissionAttribute decommissionAttribute, String msg, Throwable cause) { + public DecommissioningFailedException(DecommissionAttribute decommissionAttribute, String msg, Throwable cause) { super("[" + (decommissionAttribute == null ? "_na" : decommissionAttribute.toString()) + "] " + msg, cause); this.decommissionAttribute = decommissionAttribute; } - public DecommissionFailedException(StreamInput in) throws IOException { + public DecommissioningFailedException(StreamInput in) throws IOException { super(in); decommissionAttribute = new DecommissionAttribute(in); } diff --git a/server/src/test/java/org/opensearch/ExceptionSerializationTests.java b/server/src/test/java/org/opensearch/ExceptionSerializationTests.java index 6516cc80c7929..ff2bb77531486 100644 --- a/server/src/test/java/org/opensearch/ExceptionSerializationTests.java +++ b/server/src/test/java/org/opensearch/ExceptionSerializationTests.java @@ -49,7 +49,7 @@ import org.opensearch.cluster.block.ClusterBlockException; import org.opensearch.cluster.coordination.CoordinationStateRejectedException; import org.opensearch.cluster.coordination.NoClusterManagerBlockService; -import org.opensearch.cluster.decommission.DecommissionFailedException; +import org.opensearch.cluster.decommission.DecommissioningFailedException; import org.opensearch.cluster.decommission.NodeDecommissionedException; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.IllegalShardRoutingStateException; @@ -862,7 +862,7 @@ public void testIds() { ids.put(160, NoSeedNodeLeftException.class); ids.put(161, ReplicationFailedException.class); ids.put(162, PrimaryShardClosedException.class); - ids.put(163, DecommissionFailedException.class); + ids.put(163, DecommissioningFailedException.class); ids.put(164, NodeDecommissionedException.class); Map, Integer> reverse = new HashMap<>(); diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java index d93cd5ea51978..13d036b2952f7 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java @@ -13,10 +13,8 @@ import org.junit.Before; import org.opensearch.Version; import org.opensearch.action.ActionListener; -import org.opensearch.action.search.CreatePitController; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; -import org.opensearch.cluster.ClusterStateObserver; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; import org.opensearch.cluster.coordination.CoordinationMetadata; import org.opensearch.cluster.metadata.Metadata; @@ -25,8 +23,6 @@ import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.allocation.AllocationService; import org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider; -import org.opensearch.cluster.routing.allocation.decider.ClusterRebalanceAllocationDecider; -import org.opensearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; @@ -44,12 +40,9 @@ import static java.util.Collections.emptySet; import static java.util.Collections.singletonMap; -import static org.mockito.ArgumentMatchers.startsWith; import static org.mockito.Mockito.mock; import static org.opensearch.cluster.ClusterState.builder; import static org.opensearch.cluster.OpenSearchAllocationTestCase.createAllocationService; -import static org.opensearch.cluster.coordination.NoClusterManagerBlockService.NO_CLUSTER_MANAGER_BLOCK_SETTING; -import static org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING; import static org.opensearch.test.ClusterServiceUtils.createClusterService; import static org.opensearch.test.ClusterServiceUtils.setState; @@ -125,7 +118,7 @@ public void shutdownThreadPoolAndClusterService() { public void testDecommissioningNotInitiatedForInvalidAttributeName() { DecommissionAttribute decommissionAttribute = new DecommissionAttribute("rack", "rack-a"); ActionListener listener = mock(ActionListener.class); - DecommissionFailedException e = expectThrows(DecommissionFailedException.class, () -> { + DecommissioningFailedException e = expectThrows(DecommissioningFailedException.class, () -> { decommissionService.initiateAttributeDecommissioning( decommissionAttribute, listener, clusterService.state()); }); @@ -136,7 +129,7 @@ public void testDecommissioningNotInitiatedForInvalidAttributeName() { public void testDecommissioningNotInitiatedForInvalidAttributeValue() { DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "random"); ActionListener listener = mock(ActionListener.class); - DecommissionFailedException e = expectThrows(DecommissionFailedException.class, () -> { + DecommissioningFailedException e = expectThrows(DecommissioningFailedException.class, () -> { decommissionService.initiateAttributeDecommissioning( decommissionAttribute, listener, clusterService.state()); }); @@ -160,7 +153,7 @@ public void testDecommissioningNotInitiatedWhenAlreadyDecommissioned() { clusterService.state().metadata()).putCustom(DecommissionAttributeMetadata.TYPE, oldMetadata).build() )); ActionListener listener = mock(ActionListener.class); - DecommissionFailedException e = expectThrows(DecommissionFailedException.class, () -> { + DecommissioningFailedException e = expectThrows(DecommissioningFailedException.class, () -> { decommissionService.initiateAttributeDecommissioning( new DecommissionAttribute("zone", "zone_2"), listener, clusterService.state()); }); From 1b98cf5740db629a75e900c79c299cf67446e1b0 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 30 Aug 2022 15:46:13 +0530 Subject: [PATCH 37/87] Fix spotless and precommit checks Signed-off-by: Rishab Nahata --- .../coordination/JoinTaskExecutor.java | 7 +- .../DecommissionAttributeMetadata.java | 2 - .../decommission/DecommissionController.java | 20 +--- .../decommission/DecommissionService.java | 108 ++++++++++-------- .../coordination/JoinTaskExecutorTests.java | 5 +- .../DecommissionControllerTests.java | 92 ++++++--------- .../DecommissionServiceTests.java | 62 +++++----- ...onAttributeMetadataSerializationTests.java | 7 +- 8 files changed, 135 insertions(+), 168 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index a104499109683..ebf37e21bbfd6 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -484,11 +484,8 @@ public static void ensureNodeCommissioned(DiscoveryNode node, Metadata metadata) if (decommissionAttribute != null && status != null) { // We will let the node join the cluster if the current status is not IN_PROGRESS or SUCCESSFUL if (node.getAttributes().get(decommissionAttribute.attributeName()).equals(decommissionAttribute.attributeValue()) - && ( - status.equals(DecommissionStatus.DECOMMISSION_IN_PROGRESS) - || status.equals(DecommissionStatus.DECOMMISSION_SUCCESSFUL - ) - )) { + && (status.equals(DecommissionStatus.DECOMMISSION_IN_PROGRESS) + || status.equals(DecommissionStatus.DECOMMISSION_SUCCESSFUL))) { throw new NodeDecommissionedException( "node [{}] has decommissioned attribute [{}].", node.toString(), diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java index 72bdfbdca78d3..0924a181fb458 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java @@ -12,8 +12,6 @@ import org.opensearch.Version; import org.opensearch.cluster.AbstractNamedDiffable; import org.opensearch.cluster.NamedDiff; -import org.opensearch.cluster.decommission.DecommissionAttribute; -import org.opensearch.cluster.decommission.DecommissionStatus; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.metadata.Metadata.Custom; import org.opensearch.common.Strings; diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index 4f81ddf1e32ff..1799479efe4cc 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -24,7 +24,6 @@ import org.opensearch.cluster.ClusterStateTaskConfig; import org.opensearch.cluster.ClusterStateTaskListener; import org.opensearch.cluster.ClusterStateUpdateTask; -import org.opensearch.cluster.ack.ClusterStateUpdateResponse; import org.opensearch.cluster.coordination.NodeRemovalClusterStateTaskExecutor; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; @@ -34,7 +33,6 @@ import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.unit.TimeValue; import org.opensearch.threadpool.ThreadPool; -import org.opensearch.transport.Transport; import org.opensearch.transport.TransportException; import org.opensearch.transport.TransportResponseHandler; import org.opensearch.transport.TransportService; @@ -42,7 +40,6 @@ import java.io.IOException; import java.util.Iterator; import java.util.LinkedHashMap; -import java.util.List; import java.util.Map; import java.util.Set; import java.util.function.Predicate; @@ -78,7 +75,7 @@ public void excludeDecommissionedNodesFromVotingConfig(Set nodes, Action transportService.sendRequest( transportService.getLocalNode(), AddVotingConfigExclusionsAction.NAME, - new AddVotingConfigExclusionsRequest(nodes.stream().toArray(String[] :: new)), + new AddVotingConfigExclusionsRequest(nodes.stream().toArray(String[]::new)), new TransportResponseHandler() { @Override public void handleResponse(AddVotingConfigExclusionsResponse response) { @@ -166,12 +163,7 @@ public void handleNodesDecommissionRequest( return true; }; - final ClusterStateObserver observer = new ClusterStateObserver( - clusterService, - timeout, - logger, - threadPool.getThreadContext() - ); + final ClusterStateObserver observer = new ClusterStateObserver(clusterService, timeout, logger, threadPool.getThreadContext()); observer.waitForNextChange(new ClusterStateObserver.Listener() { @Override @@ -198,10 +190,7 @@ public void onTimeout(TimeValue timeout) { }, allDecommissionedNodesRemovedPredicate); } - public void updateMetadataWithDecommissionStatus( - DecommissionStatus decommissionStatus, - ActionListener listener - ) { + public void updateMetadataWithDecommissionStatus(DecommissionStatus decommissionStatus, ActionListener listener) { clusterService.submitStateUpdateTask(decommissionStatus.status(), new ClusterStateUpdateTask(Priority.URGENT) { @Override public ClusterState execute(ClusterState currentState) { @@ -222,7 +211,8 @@ public void onFailure(String source, Exception e) { @Override public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { - DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().custom(DecommissionAttributeMetadata.TYPE); + DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata() + .custom(DecommissionAttributeMetadata.TYPE); assert decommissionAttributeMetadata.status().equals(decommissionStatus); listener.onResponse(null); } diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index eac713f4961b4..89ddabb9fa19e 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -80,17 +80,9 @@ public DecommissionService( this.clusterService = clusterService; this.transportService = transportService; this.threadPool = threadPool; - this.decommissionController = new DecommissionController( - clusterService, - transportService, - allocationService, - threadPool - ); + this.decommissionController = new DecommissionController(clusterService, transportService, allocationService, threadPool); this.awarenessAttributes = CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.get(settings); - clusterSettings.addSettingsUpdateConsumer( - CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING, - this::setAwarenessAttributes - ); + clusterSettings.addSettingsUpdateConsumer(CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING, this::setAwarenessAttributes); setForcedAwarenessAttributes(CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING.get(settings)); clusterSettings.addSettingsUpdateConsumer( @@ -120,7 +112,8 @@ public void initiateAttributeDecommissioning( final ActionListener listener, ClusterState state ) { - // validates if the correct awareness attributes and forced awareness attribute set to the cluster before initiating decommission action + // validates if the correct awareness attributes and forced awareness attribute set to the cluster before initiating decommission + // action validateAwarenessAttribute(decommissionAttribute, awarenessAttributes, forcedAwarenessAttributes); DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().custom(DecommissionAttributeMetadata.TYPE); @@ -135,7 +128,7 @@ public void initiateAttributeDecommissioning( // explicitly throwing NotClusterManagerException as we can certainly say the local cluster manager node will // be abdicated and soon will no longer be cluster manager. - if(transportService.getLocalNode().isClusterManagerNode() + if (transportService.getLocalNode().isClusterManagerNode() && !nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute)) { registerDecommissionAttribute(decommissionAttribute, listener); } else { @@ -149,19 +142,26 @@ public void initiateAttributeDecommissioning( private void excludeDecommissionedClusterManagerNodesFromVotingConfig(DecommissionAttribute decommissionAttribute) { Set clusterManagerNodesToBeDecommissioned = nodesWithDecommissionAttribute( - clusterService.state(), decommissionAttribute, true + clusterService.state(), + decommissionAttribute, + true ); Set clusterManagerNodesNameToBeDecommissioned = clusterManagerNodesToBeDecommissioned.stream() .map(DiscoveryNode::getName) .collect(Collectors.toSet()); - Set currentVotingConfigExclusions = clusterService.getClusterApplierService().state().coordinationMetadata().getVotingConfigExclusions(); - Set excludedNodesName = currentVotingConfigExclusions.stream().map(VotingConfigExclusion::getNodeName).collect(Collectors.toSet()); + Set currentVotingConfigExclusions = clusterService.getClusterApplierService() + .state() + .coordinationMetadata() + .getVotingConfigExclusions(); + Set excludedNodesName = currentVotingConfigExclusions.stream() + .map(VotingConfigExclusion::getNodeName) + .collect(Collectors.toSet()); // check if the to-be-excluded nodes are excluded. If yes, we don't need to exclude them again if (clusterManagerNodesNameToBeDecommissioned.size() == 0 || (clusterManagerNodesNameToBeDecommissioned.size() == excludedNodesName.size() - && excludedNodesName.containsAll(clusterManagerNodesNameToBeDecommissioned))) { + && excludedNodesName.containsAll(clusterManagerNodesNameToBeDecommissioned))) { return; } // send a transport request to exclude to-be-decommissioned cluster manager eligible nodes from voting config @@ -171,8 +171,8 @@ private void excludeDecommissionedClusterManagerNodesFromVotingConfig(Decommissi @Override public void onResponse(Void unused) { logger.info( - "successfully removed decommissioned cluster manager eligible nodes [{}] from voting config " - , clusterManagerNodesToBeDecommissioned.toString() + "successfully removed decommissioned cluster manager eligible nodes [{}] from voting config ", + clusterManagerNodesToBeDecommissioned.toString() ); } @@ -224,20 +224,27 @@ public ClusterState execute(ClusterState currentState) { @Override public void onFailure(String source, Exception e) { if (e instanceof DecommissioningFailedException) { - logger.error(() -> new ParameterizedMessage("failed to decommission attribute [{}]", decommissionAttribute.toString()), e); + logger.error( + () -> new ParameterizedMessage("failed to decommission attribute [{}]", decommissionAttribute.toString()), + e + ); listener.onFailure(e); } else if (e instanceof NotClusterManagerException) { logger.debug( () -> new ParameterizedMessage( "cluster-manager updated while executing request for decommission attribute [{}]", decommissionAttribute.toString() - ), e + ), + e ); // we don't want to send the failure response to the listener here as the request will be retried } else { - logger.error(() -> new ParameterizedMessage( - "failed to initiate decommissioning for attribute [{}]", decommissionAttribute.toString() - ), e + logger.error( + () -> new ParameterizedMessage( + "failed to initiate decommissioning for attribute [{}]", + decommissionAttribute.toString() + ), + e ); listener.onFailure(e); } @@ -272,10 +279,12 @@ public void onResponse(Void unused) { @Override public void onFailure(Exception e) { - logger.error(() -> new ParameterizedMessage( + logger.error( + () -> new ParameterizedMessage( "failed to update decommission status to [{}], will not proceed with decommission", DecommissionStatus.DECOMMISSION_IN_PROGRESS - ), e + ), + e ); } } @@ -311,8 +320,10 @@ private void clearVotingConfigExclusionAndUpdateStatus(boolean decommissionSucce ActionListener statusUpdateListener = new ActionListener() { @Override public void onResponse(Void unused) { - logger.info("successful updated decommission status with [{}]", - decommissionSuccessful ? DecommissionStatus.DECOMMISSION_SUCCESSFUL : DecommissionStatus.DECOMMISSION_FAILED); + logger.info( + "successful updated decommission status with [{}]", + decommissionSuccessful ? DecommissionStatus.DECOMMISSION_SUCCESSFUL : DecommissionStatus.DECOMMISSION_FAILED + ); } @Override @@ -320,20 +331,20 @@ public void onFailure(Exception e) { logger.error("failed to update the decommission status"); } }; - decommissionController.clearVotingConfigExclusion( - new ActionListener() { - @Override - public void onResponse(Void unused) { - DecommissionStatus updateStatusWith = decommissionSuccessful? DecommissionStatus.DECOMMISSION_SUCCESSFUL : DecommissionStatus.DECOMMISSION_FAILED; - decommissionController.updateMetadataWithDecommissionStatus(updateStatusWith, statusUpdateListener); - } + decommissionController.clearVotingConfigExclusion(new ActionListener() { + @Override + public void onResponse(Void unused) { + DecommissionStatus updateStatusWith = decommissionSuccessful + ? DecommissionStatus.DECOMMISSION_SUCCESSFUL + : DecommissionStatus.DECOMMISSION_FAILED; + decommissionController.updateMetadataWithDecommissionStatus(updateStatusWith, statusUpdateListener); + } - @Override - public void onFailure(Exception e) { - decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_FAILED, statusUpdateListener); - } + @Override + public void onFailure(Exception e) { + decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_FAILED, statusUpdateListener); } - ); + }); } public Set nodesWithDecommissionAttribute( @@ -346,8 +357,9 @@ public Set nodesWithDecommissionAttribute( discoveryNode, decommissionAttribute ); - Iterator nodesIter = onlyClusterManagerNodes? clusterState.nodes().getClusterManagerNodes().valuesIt() : - clusterState.nodes().getNodes().valuesIt(); + Iterator nodesIter = onlyClusterManagerNodes + ? clusterState.nodes().getClusterManagerNodes().valuesIt() + : clusterState.nodes().getNodes().valuesIt(); while (nodesIter.hasNext()) { final DiscoveryNode node = nodesIter.next(); @@ -369,18 +381,14 @@ private static void validateAwarenessAttribute( ) { String msg = null; if (awarenessAttributes == null) { - msg = "awareness attribute not set to the cluster."; - } - else if (forcedAwarenessAttributes == null) { + msg = "awareness attribute not set to the cluster."; + } else if (forcedAwarenessAttributes == null) { msg = "forced awareness attribute not set to the cluster."; - } - else if (!awarenessAttributes.contains(decommissionAttribute.attributeName())) { + } else if (!awarenessAttributes.contains(decommissionAttribute.attributeName())) { msg = "invalid awareness attribute requested for decommissioning"; - } - else if (!forcedAwarenessAttributes.containsKey(decommissionAttribute.attributeName())) { + } else if (!forcedAwarenessAttributes.containsKey(decommissionAttribute.attributeName())) { msg = "forced awareness attribute [" + forcedAwarenessAttributes.toString() + "] doesn't have the decommissioning attribute"; - } - else if (!forcedAwarenessAttributes.get(decommissionAttribute.attributeName()).contains(decommissionAttribute.attributeValue()) ) { + } else if (!forcedAwarenessAttributes.get(decommissionAttribute.attributeName()).contains(decommissionAttribute.attributeValue())) { msg = "invalid awareness attribute value requested for decommissioning. Set forced awareness values before to decommission"; } diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java index 9e2d0cc3a7fc4..734f112bdce3d 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java @@ -248,10 +248,7 @@ public void testPreventJoinClusterWithDecommission() { Metadata metadata = metaBuilder.build(); DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-1")); - expectThrows( - NodeDecommissionedException.class, - () -> JoinTaskExecutor.ensureNodeCommissioned(discoveryNode, metadata) - ); + expectThrows(NodeDecommissionedException.class, () -> JoinTaskExecutor.ensureNodeCommissioned(discoveryNode, metadata)); } public void testJoinClusterWithDifferentDecommission() { diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java index ee97f1768cd94..e5d7ec60c0e23 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java @@ -9,22 +9,15 @@ package org.opensearch.cluster.decommission; import org.junit.After; -import org.junit.AfterClass; import org.junit.Before; -import org.junit.BeforeClass; import org.opensearch.OpenSearchTimeoutException; import org.opensearch.Version; import org.opensearch.action.ActionListener; import org.opensearch.action.admin.cluster.configuration.TransportAddVotingConfigExclusionsAction; -import org.opensearch.action.admin.cluster.configuration.TransportAddVotingConfigExclusionsActionTests; import org.opensearch.action.admin.cluster.configuration.TransportClearVotingConfigExclusionsAction; import org.opensearch.action.support.ActionFilters; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; -import org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfigExclusion; -import org.opensearch.cluster.ClusterStateObserver; -import org.opensearch.cluster.ClusterStateUpdateTask; -import org.opensearch.cluster.ack.ClusterStateUpdateResponse; import org.opensearch.cluster.coordination.CoordinationMetadata; import org.opensearch.cluster.metadata.IndexNameExpressionResolver; import org.opensearch.cluster.metadata.Metadata; @@ -33,7 +26,6 @@ import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.allocation.AllocationService; import org.opensearch.cluster.service.ClusterService; -import org.opensearch.common.collect.ImmutableOpenMap; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; @@ -44,11 +36,9 @@ import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportService; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; -import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.CountDownLatch; @@ -56,21 +46,11 @@ import java.util.stream.Collectors; import java.util.stream.StreamSupport; -import static java.util.Collections.emptyMap; import static java.util.Collections.emptySet; import static java.util.Collections.singletonMap; -import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.instanceOf; -import static org.hamcrest.Matchers.sameInstance; import static org.hamcrest.Matchers.startsWith; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; -import static org.opensearch.action.admin.cluster.configuration.TransportAddVotingConfigExclusionsAction.MAXIMUM_VOTING_CONFIG_EXCLUSIONS_SETTING; import static org.opensearch.cluster.ClusterState.builder; import static org.opensearch.cluster.OpenSearchAllocationTestCase.createAllocationService; import static org.opensearch.test.ClusterServiceUtils.createClusterService; @@ -105,10 +85,7 @@ public void setTransportServiceAndDefaultClusterState() { clusterState = setLocalNodeAsClusterManagerNode(clusterState, "node1"); clusterState = setThreeNodesInVotingConfig(clusterState); final ClusterState.Builder builder = builder(clusterState); - setState( - clusterService, - builder - ); + setState(clusterService, builder); final MockTransport transport = new MockTransport(); transportService = transport.createTransportService( Settings.EMPTY, @@ -155,20 +132,17 @@ public void shutdownThreadPoolAndClusterService() { public void testAddNodesToVotingConfigExclusion() throws InterruptedException { final CountDownLatch countDownLatch = new CountDownLatch(1); Set nodesToRemoveFromVotingConfig = Collections.singleton(randomFrom("node1", "node6", "node11")); - decommissionController.excludeDecommissionedNodesFromVotingConfig( - nodesToRemoveFromVotingConfig, - new ActionListener() { - @Override - public void onResponse(Void unused) { - countDownLatch.countDown(); - } + decommissionController.excludeDecommissionedNodesFromVotingConfig(nodesToRemoveFromVotingConfig, new ActionListener() { + @Override + public void onResponse(Void unused) { + countDownLatch.countDown(); + } - @Override - public void onFailure(Exception e) { - fail("unexpected failure occurred while removing node from voting config " + e); - } + @Override + public void onFailure(Exception e) { + fail("unexpected failure occurred while removing node from voting config " + e); } - ); + }); assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); clusterService.getClusterApplierService().state().getVotingConfigExclusions().forEach(vce -> { assertTrue(nodesToRemoveFromVotingConfig.contains(vce.getNodeName())); @@ -186,14 +160,13 @@ public void onResponse(Void unused) { @Override public void onFailure(Exception e) { - fail("unexpected failure occurred while clearing voting config exclusion" + e); + fail("unexpected failure occurred while clearing voting config exclusion" + e); } }); assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); assertThat(clusterService.getClusterApplierService().state().getVotingConfigExclusions(), empty()); } - public void testNodesRemovedForDecommissionRequestSuccessfulResponse() throws InterruptedException { final CountDownLatch countDownLatch = new CountDownLatch(1); Set nodesToBeRemoved = new HashSet<>(); @@ -222,9 +195,8 @@ public void onFailure(Exception e) { assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); // test all 5 nodes removed and cluster has 10 nodes - Set nodes = StreamSupport.stream( - clusterService.getClusterApplierService().state().nodes().spliterator(), false - ).collect(Collectors.toSet()); + Set nodes = StreamSupport.stream(clusterService.getClusterApplierService().state().nodes().spliterator(), false) + .collect(Collectors.toSet()); assertEquals(nodes.size(), 10); // test no nodes part of zone-3 for (DiscoveryNode node : nodes) { @@ -274,20 +246,17 @@ public void testSuccessfulDecommissionStatusMetadataUpdate() throws InterruptedE state = ClusterState.builder(state).metadata(mdBuilder).build(); setState(clusterService, state); - decommissionController.updateMetadataWithDecommissionStatus( - DecommissionStatus.DECOMMISSION_SUCCESSFUL, - new ActionListener() { - @Override - public void onResponse(Void unused) { - countDownLatch.countDown(); - } + decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_SUCCESSFUL, new ActionListener() { + @Override + public void onResponse(Void unused) { + countDownLatch.countDown(); + } - @Override - public void onFailure(Exception e) { - fail("decommission status update failed"); - } + @Override + public void onFailure(Exception e) { + fail("decommission status update failed"); } - ); + }); assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); ClusterState newState = clusterService.getClusterApplierService().state(); DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().custom(DecommissionAttributeMetadata.TYPE); @@ -311,15 +280,18 @@ private ClusterState setLocalNodeAsClusterManagerNode(ClusterState clusterState, private ClusterState setThreeNodesInVotingConfig(ClusterState clusterState) { final CoordinationMetadata.VotingConfiguration votingConfiguration = CoordinationMetadata.VotingConfiguration.of( - clusterState.nodes().get("node1"), clusterState.nodes().get("node6"), clusterState.nodes().get("node11") + clusterState.nodes().get("node1"), + clusterState.nodes().get("node6"), + clusterState.nodes().get("node11") ); - Metadata.Builder builder = Metadata.builder().coordinationMetadata( - CoordinationMetadata.builder() - .lastAcceptedConfiguration(votingConfiguration) - .lastCommittedConfiguration(votingConfiguration) - .build() - ); + Metadata.Builder builder = Metadata.builder() + .coordinationMetadata( + CoordinationMetadata.builder() + .lastAcceptedConfiguration(votingConfiguration) + .lastCommittedConfiguration(votingConfiguration) + .build() + ); clusterState = ClusterState.builder(clusterState).metadata(builder).build(); return clusterState; } diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java index 13d036b2952f7..04bb876761113 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java @@ -76,10 +76,7 @@ public void setUpService() { clusterState = setLocalNodeAsClusterManagerNode(clusterState, "node1"); clusterState = setThreeNodesInVotingConfig(clusterState); final ClusterState.Builder builder = builder(clusterState); - setState( - clusterService, - builder - ); + setState(clusterService, builder); final MockTransport transport = new MockTransport(); transportService = transport.createTransportService( Settings.EMPTY, @@ -118,10 +115,10 @@ public void shutdownThreadPoolAndClusterService() { public void testDecommissioningNotInitiatedForInvalidAttributeName() { DecommissionAttribute decommissionAttribute = new DecommissionAttribute("rack", "rack-a"); ActionListener listener = mock(ActionListener.class); - DecommissioningFailedException e = expectThrows(DecommissioningFailedException.class, () -> { - decommissionService.initiateAttributeDecommissioning( - decommissionAttribute, listener, clusterService.state()); - }); + DecommissioningFailedException e = expectThrows( + DecommissioningFailedException.class, + () -> { decommissionService.initiateAttributeDecommissioning(decommissionAttribute, listener, clusterService.state()); } + ); assertThat(e.getMessage(), Matchers.endsWith("invalid awareness attribute requested for decommissioning")); } @@ -129,13 +126,15 @@ public void testDecommissioningNotInitiatedForInvalidAttributeName() { public void testDecommissioningNotInitiatedForInvalidAttributeValue() { DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "random"); ActionListener listener = mock(ActionListener.class); - DecommissioningFailedException e = expectThrows(DecommissioningFailedException.class, () -> { - decommissionService.initiateAttributeDecommissioning( - decommissionAttribute, listener, clusterService.state()); - }); + DecommissioningFailedException e = expectThrows( + DecommissioningFailedException.class, + () -> { decommissionService.initiateAttributeDecommissioning(decommissionAttribute, listener, clusterService.state()); } + ); assertThat( e.getMessage(), - Matchers.endsWith("invalid awareness attribute value requested for decommissioning. Set forced awareness values before to decommission") + Matchers.endsWith( + "invalid awareness attribute value requested for decommissioning. Set forced awareness values before to decommission" + ) ); } @@ -149,14 +148,20 @@ public void testDecommissioningNotInitiatedWhenAlreadyDecommissioned() { setState( clusterService, builder.metadata( - Metadata.builder( - clusterService.state().metadata()).putCustom(DecommissionAttributeMetadata.TYPE, oldMetadata).build() - )); + Metadata.builder(clusterService.state().metadata()).putCustom(DecommissionAttributeMetadata.TYPE, oldMetadata).build() + ) + ); ActionListener listener = mock(ActionListener.class); - DecommissioningFailedException e = expectThrows(DecommissioningFailedException.class, () -> { - decommissionService.initiateAttributeDecommissioning( - new DecommissionAttribute("zone", "zone_2"), listener, clusterService.state()); - }); + DecommissioningFailedException e = expectThrows( + DecommissioningFailedException.class, + () -> { + decommissionService.initiateAttributeDecommissioning( + new DecommissionAttribute("zone", "zone_2"), + listener, + clusterService.state() + ); + } + ); assertThat( e.getMessage(), Matchers.endsWith("one awareness attribute already decommissioned, recommission before triggering another decommission") @@ -180,15 +185,18 @@ private ClusterState setLocalNodeAsClusterManagerNode(ClusterState clusterState, private ClusterState setThreeNodesInVotingConfig(ClusterState clusterState) { final CoordinationMetadata.VotingConfiguration votingConfiguration = CoordinationMetadata.VotingConfiguration.of( - clusterState.nodes().get("node1"), clusterState.nodes().get("node6"), clusterState.nodes().get("node11") + clusterState.nodes().get("node1"), + clusterState.nodes().get("node6"), + clusterState.nodes().get("node11") ); - Metadata.Builder builder = Metadata.builder().coordinationMetadata( - CoordinationMetadata.builder() - .lastAcceptedConfiguration(votingConfiguration) - .lastCommittedConfiguration(votingConfiguration) - .build() - ); + Metadata.Builder builder = Metadata.builder() + .coordinationMetadata( + CoordinationMetadata.builder() + .lastAcceptedConfiguration(votingConfiguration) + .lastCommittedConfiguration(votingConfiguration) + .build() + ); clusterState = ClusterState.builder(clusterState).metadata(builder).build(); return clusterState; } diff --git a/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataSerializationTests.java b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataSerializationTests.java index 5423c2ed672a3..60b3a03848830 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataSerializationTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataSerializationTests.java @@ -54,13 +54,10 @@ protected Metadata.Custom makeTestChanges(Metadata.Custom testInstance) { if (randomBoolean()) { attributeName = randomAlphaOfLength(6); } - if(randomBoolean()) { + if (randomBoolean()) { attributeValue = randomAlphaOfLength(6); } - return new DecommissionAttributeMetadata( - new DecommissionAttribute(attributeName, attributeValue), - decommissionStatus - ); + return new DecommissionAttributeMetadata(new DecommissionAttribute(attributeName, attributeValue), decommissionStatus); } @Override From ebf7e6e2fe10ab00ef7a6ffc412afd2da37d75aa Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 30 Aug 2022 15:56:26 +0530 Subject: [PATCH 38/87] Update enum Signed-off-by: Rishab Nahata --- server/src/main/java/org/opensearch/OpenSearchException.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/OpenSearchException.java b/server/src/main/java/org/opensearch/OpenSearchException.java index 7799aef7fab38..83a11ba10da56 100644 --- a/server/src/main/java/org/opensearch/OpenSearchException.java +++ b/server/src/main/java/org/opensearch/OpenSearchException.java @@ -1611,7 +1611,7 @@ private enum OpenSearchExceptionHandle { 162, V_3_0_0 ), - DECOMMISSION_FAILED_EXCEPTION( + DECOMMISSIONING_FAILED_EXCEPTION( org.opensearch.cluster.decommission.DecommissioningFailedException.class, org.opensearch.cluster.decommission.DecommissioningFailedException::new, 163, From c3755a243c00cff3ed7d5aafcb7b2fc527d4a5f6 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 30 Aug 2022 16:11:01 +0530 Subject: [PATCH 39/87] Fix spotless and precommit checks Signed-off-by: Rishab Nahata --- server/src/main/java/org/opensearch/OpenSearchException.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/OpenSearchException.java b/server/src/main/java/org/opensearch/OpenSearchException.java index 83a11ba10da56..932aae741160e 100644 --- a/server/src/main/java/org/opensearch/OpenSearchException.java +++ b/server/src/main/java/org/opensearch/OpenSearchException.java @@ -34,7 +34,6 @@ import org.opensearch.action.support.replication.ReplicationOperation; import org.opensearch.cluster.action.shard.ShardStateAction; -import org.opensearch.cluster.decommission.DecommissioningFailedException; import org.opensearch.common.CheckedFunction; import org.opensearch.common.Nullable; import org.opensearch.common.ParseField; From 9c7cd3f7bbc38fe68b9bee4c43518fbbbf7a4a52 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 30 Aug 2022 20:07:39 +0530 Subject: [PATCH 40/87] Add package-info and Changelog Signed-off-by: Rishab Nahata --- CHANGELOG.md | 1 + .../cluster/decommission/package-info.java | 12 ++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 server/src/main/java/org/opensearch/cluster/decommission/package-info.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 1cbfc56ed776c..697a66cd13d9a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Add index specific setting for remote repository ([#4253](https://github.com/opensearch-project/OpenSearch/pull/4253)) - [Segment Replication] Update replicas to commit SegmentInfos instead of relying on SIS files from primary shards. ([#4402](https://github.com/opensearch-project/OpenSearch/pull/4402)) - [CCR] Add getHistoryOperationsFromTranslog method to fetch the history snapshot from translogs ([#3948](https://github.com/opensearch-project/OpenSearch/pull/3948)) +- Add DecommissionService and helper to execute awareness attribute decommissioning ([#4084](https://github.com/opensearch-project/OpenSearch/pull/4084)) ### Deprecated diff --git a/server/src/main/java/org/opensearch/cluster/decommission/package-info.java b/server/src/main/java/org/opensearch/cluster/decommission/package-info.java new file mode 100644 index 0000000000000..256c2f22253cc --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Decommission lifecycle classes + */ +package org.opensearch.cluster.decommission; From 0bb70e264b197ca36f01d341bef78137976a0ecb Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Thu, 1 Sep 2022 17:28:43 +0530 Subject: [PATCH 41/87] Add checks for quorum Signed-off-by: Rishab Nahata --- .../decommission/DecommissionService.java | 38 ++++++-- .../DecommissionServiceTests.java | 88 ++++++++++++++----- 2 files changed, 98 insertions(+), 28 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 89ddabb9fa19e..e3e76a1e45086 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -16,6 +16,8 @@ import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; +import org.opensearch.cluster.coordination.ClusterBootstrapService; +import org.opensearch.cluster.coordination.CoordinationMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.allocation.AllocationService; @@ -116,15 +118,22 @@ public void initiateAttributeDecommissioning( // action validateAwarenessAttribute(decommissionAttribute, awarenessAttributes, forcedAwarenessAttributes); + Set clusterManagerNodesToBeDecommissioned = nodesWithDecommissionAttribute(state, decommissionAttribute, true); + ensureNoQuorumLossDueToDecommissioning( + decommissionAttribute, + clusterManagerNodesToBeDecommissioned, + state.getLastAcceptedConfiguration() + ); + DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().custom(DecommissionAttributeMetadata.TYPE); // validates that there's no inflight decommissioning or already executed decommission in place ensureNoAwarenessAttributeDecommissioned(decommissionAttributeMetadata, decommissionAttribute); logger.info("initiating awareness attribute [{}] decommissioning", decommissionAttribute.toString()); - // remove all decommissioned cluster manager eligible nodes from voting config + // remove all 'to-be-decommissioned' cluster manager eligible nodes from voting config // The method ensures that we don't exclude same nodes multiple times - excludeDecommissionedClusterManagerNodesFromVotingConfig(decommissionAttribute); + excludeDecommissionedClusterManagerNodesFromVotingConfig(clusterManagerNodesToBeDecommissioned); // explicitly throwing NotClusterManagerException as we can certainly say the local cluster manager node will // be abdicated and soon will no longer be cluster manager. @@ -140,12 +149,7 @@ public void initiateAttributeDecommissioning( } } - private void excludeDecommissionedClusterManagerNodesFromVotingConfig(DecommissionAttribute decommissionAttribute) { - Set clusterManagerNodesToBeDecommissioned = nodesWithDecommissionAttribute( - clusterService.state(), - decommissionAttribute, - true - ); + private void excludeDecommissionedClusterManagerNodesFromVotingConfig(Set clusterManagerNodesToBeDecommissioned) { Set clusterManagerNodesNameToBeDecommissioned = clusterManagerNodesToBeDecommissioned.stream() .map(DiscoveryNode::getName) .collect(Collectors.toSet()); @@ -410,4 +414,22 @@ private static void ensureNoAwarenessAttributeDecommissioned( ); } } + + private static void ensureNoQuorumLossDueToDecommissioning( + DecommissionAttribute decommissionAttribute, + Set clusterManagerNodesToBeDecommissioned, + CoordinationMetadata.VotingConfiguration votingConfiguration + ) { + final Set nodesInVotingConfig = votingConfiguration.getNodeIds(); + assert nodesInVotingConfig.isEmpty() == false; + final int requiredNodes = nodesInVotingConfig.size() / 2 + 1; + final Set realNodeIds = new HashSet<>(nodesInVotingConfig); + realNodeIds.removeIf(ClusterBootstrapService::isBootstrapPlaceholder); + if (realNodeIds.size() - clusterManagerNodesToBeDecommissioned.size() < requiredNodes) { + throw new DecommissioningFailedException( + decommissionAttribute, + "cannot proceed with decommission request. Cluster might go into quorum loss" + ); + } + } } diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java index 04bb876761113..ea4d9eb76b0dc 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java @@ -32,7 +32,6 @@ import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportService; -import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.Map; @@ -67,14 +66,25 @@ public void setUp() throws Exception { @Before public void setUpService() { ClusterState clusterState = ClusterState.builder(new ClusterName("test")).build(); - logger.info("--> adding five nodes on same zone_1"); - clusterState = addNodes(clusterState, "zone_1", "node1", "node2", "node3", "node4", "node5"); - logger.info("--> adding five nodes on same zone_2"); - clusterState = addNodes(clusterState, "zone_2", "node6", "node7", "node8", "node9", "node10"); - logger.info("--> adding five nodes on same zone_3"); - clusterState = addNodes(clusterState, "zone_3", "node11", "node12", "node13", "node14", "node15"); + logger.info("--> adding cluster manager node on zone_1"); + clusterState = addClusterManagerNodes(clusterState, "zone_1", "node1"); + logger.info("--> adding cluster manager node on zone_2"); + clusterState = addClusterManagerNodes(clusterState, "zone_2", "node6"); + logger.info("--> adding cluster manager node on zone_3"); + clusterState = addClusterManagerNodes(clusterState, "zone_3", "node11"); + logger.info("--> adding four data nodes on zone_1"); + clusterState = addDataNodes(clusterState, "zone_1", "node2", "node3", "node4", "node5"); + logger.info("--> adding four data nodes on zone_2"); + clusterState = addDataNodes(clusterState, "zone_2", "node7", "node8", "node9", "node10"); + logger.info("--> adding four data nodes on zone_3"); + clusterState = addDataNodes(clusterState, "zone_3", "node12", "node13", "node14", "node15"); clusterState = setLocalNodeAsClusterManagerNode(clusterState, "node1"); - clusterState = setThreeNodesInVotingConfig(clusterState); + clusterState = setNodesInVotingConfig( + clusterState, + clusterState.nodes().get("node1"), + clusterState.nodes().get("node6"), + clusterState.nodes().get("node11") + ); final ClusterState.Builder builder = builder(clusterState); setState(clusterService, builder); final MockTransport transport = new MockTransport(); @@ -168,9 +178,37 @@ public void testDecommissioningNotInitiatedWhenAlreadyDecommissioned() { ); } - private ClusterState addNodes(ClusterState clusterState, String zone, String... nodeIds) { + @SuppressWarnings("unchecked") + public void testDecommissioningNotInitiatedWhenNotEnoughClusterManagerNodes() { + ClusterState state = clusterService.state(); + // shrink voting config + state = setNodesInVotingConfig(state, state.nodes().get("node1"), state.nodes().get("node11")); + setState(clusterService, state); + ActionListener listener = mock(ActionListener.class); + DecommissioningFailedException e = expectThrows( + DecommissioningFailedException.class, + () -> { + decommissionService.initiateAttributeDecommissioning( + new DecommissionAttribute("zone", "zone_3"), + listener, + clusterService.state() + ); + } + ); + assertThat(e.getMessage(), Matchers.endsWith("cannot proceed with decommission request. Cluster might go into quorum loss")); + } + + private ClusterState addDataNodes(ClusterState clusterState, String zone, String... nodeIds) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + org.opensearch.common.collect.List.of(nodeIds).forEach(nodeId -> nodeBuilder.add(newDataNode(nodeId, singletonMap("zone", zone)))); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + + private ClusterState addClusterManagerNodes(ClusterState clusterState, String zone, String... nodeIds) { DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); - org.opensearch.common.collect.List.of(nodeIds).forEach(nodeId -> nodeBuilder.add(newNode(nodeId, singletonMap("zone", zone)))); + org.opensearch.common.collect.List.of(nodeIds) + .forEach(nodeId -> nodeBuilder.add(newClusterManagerNode(nodeId, singletonMap("zone", zone)))); clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); return clusterState; } @@ -183,12 +221,8 @@ private ClusterState setLocalNodeAsClusterManagerNode(ClusterState clusterState, return clusterState; } - private ClusterState setThreeNodesInVotingConfig(ClusterState clusterState) { - final CoordinationMetadata.VotingConfiguration votingConfiguration = CoordinationMetadata.VotingConfiguration.of( - clusterState.nodes().get("node1"), - clusterState.nodes().get("node6"), - clusterState.nodes().get("node11") - ); + private ClusterState setNodesInVotingConfig(ClusterState clusterState, DiscoveryNode... nodes) { + final CoordinationMetadata.VotingConfiguration votingConfiguration = CoordinationMetadata.VotingConfiguration.of(nodes); Metadata.Builder builder = Metadata.builder() .coordinationMetadata( @@ -201,11 +235,25 @@ private ClusterState setThreeNodesInVotingConfig(ClusterState clusterState) { return clusterState; } - private static DiscoveryNode newNode(String nodeId, Map attributes) { - return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, CLUSTER_MANAGER_DATA_ROLE, Version.CURRENT); + private static DiscoveryNode newDataNode(String nodeId, Map attributes) { + return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, DATA_ROLE, Version.CURRENT); + } + + private static DiscoveryNode newClusterManagerNode(String nodeId, Map attributes) { + return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, CLUSTER_MANAGER_ROLE, Version.CURRENT); } - final private static Set CLUSTER_MANAGER_DATA_ROLE = Collections.unmodifiableSet( - new HashSet<>(Arrays.asList(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE)) + final private static Set CLUSTER_MANAGER_ROLE = Collections.unmodifiableSet( + new HashSet<>(Collections.singletonList(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) ); + + final private static Set DATA_ROLE = Collections.unmodifiableSet( + new HashSet<>(Collections.singletonList(DiscoveryNodeRole.DATA_ROLE)) + ); + + private ClusterState removeNodes(ClusterState clusterState, String... nodeIds) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.getNodes()); + org.opensearch.common.collect.List.of(nodeIds).forEach(nodeBuilder::remove); + return allocationService.disassociateDeadNodes(ClusterState.builder(clusterState).nodes(nodeBuilder).build(), false, "test"); + } } From d976865338132896729a14608e5725d972231da9 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Thu, 1 Sep 2022 17:52:19 +0530 Subject: [PATCH 42/87] Bug fix Signed-off-by: Rishab Nahata --- .../org/opensearch/cluster/decommission/DecommissionService.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index e3e76a1e45086..e37915d0924a9 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -425,6 +425,7 @@ private static void ensureNoQuorumLossDueToDecommissioning( final int requiredNodes = nodesInVotingConfig.size() / 2 + 1; final Set realNodeIds = new HashSet<>(nodesInVotingConfig); realNodeIds.removeIf(ClusterBootstrapService::isBootstrapPlaceholder); + clusterManagerNodesToBeDecommissioned.removeIf(b -> !nodesInVotingConfig.contains(b.getId())); if (realNodeIds.size() - clusterManagerNodesToBeDecommissioned.size() < requiredNodes) { throw new DecommissioningFailedException( decommissionAttribute, From ba5c57216647cb4ee2aee7e1ca93483618293799 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Fri, 2 Sep 2022 16:16:15 +0530 Subject: [PATCH 43/87] Resolving PR comments Signed-off-by: Rishab Nahata --- .../decommission/DecommissionController.java | 3 +-- .../cluster/decommission/DecommissionService.java | 2 +- .../cluster/decommission/DecommissionStatus.java | 14 +------------- .../coordination/JoinTaskExecutorTests.java | 3 +-- .../decommission/DecommissionControllerTests.java | 4 ++-- 5 files changed, 6 insertions(+), 20 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index 1799479efe4cc..89790c7cdeef8 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -102,7 +102,6 @@ public AddVotingConfigExclusionsResponse read(StreamInput in) throws IOException public void clearVotingConfigExclusion(ActionListener listener) { final ClearVotingConfigExclusionsRequest clearVotingConfigExclusionsRequest = new ClearVotingConfigExclusionsRequest(); - clearVotingConfigExclusionsRequest.setWaitForRemoval(true); transportService.sendRequest( transportService.getLocalNode(), ClearVotingConfigExclusionsAction.NAME, @@ -133,7 +132,7 @@ public ClearVotingConfigExclusionsResponse read(StreamInput in) throws IOExcepti ); } - public void handleNodesDecommissionRequest( + public void removeDecommissionedNodes( Set nodesToBeDecommissioned, String reason, TimeValue timeout, diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index e37915d0924a9..e7dbca75ca16b 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -302,7 +302,7 @@ private void failDecommissionedNodes(ClusterState state) { DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); // execute nodes decommissioning - decommissionController.handleNodesDecommissionRequest( + decommissionController.removeDecommissionedNodes( nodesWithDecommissionAttribute(state, decommissionAttribute, false), "nodes-decommissioned", TimeValue.timeValueSeconds(30L), // TODO - read timeout from request while integrating with API diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java index 41f9acfbc35d7..1474faa9bb227 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java @@ -27,15 +27,7 @@ public enum DecommissionStatus { /** * Decommission request failed */ - DECOMMISSION_FAILED("decommission_failed"), - /** - * Recommission request received, recommissioning process has started - */ - RECOMMISSION_IN_PROGRESS("recommission_in_progress"), - /** - * Recommission request failed. No nodes should fail to join the cluster with decommission exception - */ - RECOMMISSION_FAILED("recommission_failed"); + DECOMMISSION_FAILED("decommission_failed"); private final String status; @@ -70,10 +62,6 @@ public static DecommissionStatus fromString(String status) { return DECOMMISSION_SUCCESSFUL; } else if (status.equals(DECOMMISSION_FAILED.status())) { return DECOMMISSION_FAILED; - } else if (status.equals(RECOMMISSION_IN_PROGRESS.status())) { - return RECOMMISSION_IN_PROGRESS; - } else if (status.equals(RECOMMISSION_FAILED.status())) { - return RECOMMISSION_FAILED; } throw new IllegalStateException("Decommission status [" + status + "] not recognized."); } diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java index 734f112bdce3d..003a16bc218ef 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java @@ -272,8 +272,7 @@ public void testJoinClusterWithDecommissionFailedOrInitOrRecommission() { DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone-1"); DecommissionStatus decommissionStatus = randomFrom( DecommissionStatus.DECOMMISSION_INIT, - DecommissionStatus.DECOMMISSION_FAILED, - DecommissionStatus.RECOMMISSION_IN_PROGRESS + DecommissionStatus.DECOMMISSION_FAILED ); DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( decommissionAttribute, diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java index e5d7ec60c0e23..8e5d4e61937a4 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java @@ -176,7 +176,7 @@ public void testNodesRemovedForDecommissionRequestSuccessfulResponse() throws In nodesToBeRemoved.add(clusterService.state().nodes().get("node14")); nodesToBeRemoved.add(clusterService.state().nodes().get("node15")); - decommissionController.handleNodesDecommissionRequest( + decommissionController.removeDecommissionedNodes( nodesToBeRemoved, "unit-test", TimeValue.timeValueSeconds(30L), @@ -212,7 +212,7 @@ public void testTimesOut() throws InterruptedException { nodesToBeRemoved.add(clusterService.state().nodes().get("node13")); nodesToBeRemoved.add(clusterService.state().nodes().get("node14")); nodesToBeRemoved.add(clusterService.state().nodes().get("node15")); - decommissionController.handleNodesDecommissionRequest( + decommissionController.removeDecommissionedNodes( nodesToBeRemoved, "unit-test", TimeValue.timeValueMillis(2), From 5cc5c9cbc4d250bc8f3cf05f3587424e8b0bf1dd Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 6 Sep 2022 13:56:58 +0530 Subject: [PATCH 44/87] Update awareness attribute decommission status check Signed-off-by: Rishab Nahata --- .../decommission/DecommissionService.java | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index e7dbca75ca16b..9f7eeeed80ebd 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -127,7 +127,7 @@ public void initiateAttributeDecommissioning( DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().custom(DecommissionAttributeMetadata.TYPE); // validates that there's no inflight decommissioning or already executed decommission in place - ensureNoAwarenessAttributeDecommissioned(decommissionAttributeMetadata, decommissionAttribute); + ensureNoInflightDifferentDecommissionRequest(decommissionAttributeMetadata, decommissionAttribute); logger.info("initiating awareness attribute [{}] decommissioning", decommissionAttribute.toString()); @@ -214,7 +214,7 @@ public ClusterState execute(ClusterState currentState) { Metadata metadata = currentState.metadata(); Metadata.Builder mdBuilder = Metadata.builder(metadata); DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); - ensureNoAwarenessAttributeDecommissioned(decommissionAttributeMetadata, decommissionAttribute); + ensureNoInflightDifferentDecommissionRequest(decommissionAttributeMetadata, decommissionAttribute); decommissionAttributeMetadata = new DecommissionAttributeMetadata(decommissionAttribute); mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); logger.info( @@ -401,17 +401,28 @@ private static void validateAwarenessAttribute( } } - private static void ensureNoAwarenessAttributeDecommissioned( + private static void ensureNoInflightDifferentDecommissionRequest( DecommissionAttributeMetadata decommissionAttributeMetadata, DecommissionAttribute decommissionAttribute ) { - // If the previous decommission request failed, we will allow the request to pass this check - if (decommissionAttributeMetadata != null - && !decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_FAILED)) { - throw new DecommissioningFailedException( - decommissionAttribute, - "one awareness attribute already decommissioned, recommission before triggering another decommission" - ); + String msg = null; + if (decommissionAttributeMetadata!=null) { + if (decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_SUCCESSFUL)) { + // one awareness attribute is already decommissioned. We will reject the new request + msg = "one awareness attribute already successfully decommissioned. Recommission before triggering another decommission"; + } else if (decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_FAILED)) { + // here we are sure that the previous decommission request failed, we can let this request pass this check + return; + } else { + // it means the decommission has been initiated or is inflight. In that case, if the same attribute is requested for decommissioning, + // which can happen during retries, we will pass this check, if not, we will throw exception + if (!decommissionAttributeMetadata.decommissionAttribute().equals(decommissionAttribute)) { + msg = "another request for decommission is in flight, will not process this request"; + } + } + } + if (msg != null) { + throw new DecommissioningFailedException(decommissionAttribute, msg); } } From a356e46f93639745415304da80981fb47566efb7 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 6 Sep 2022 15:06:20 +0530 Subject: [PATCH 45/87] Update quorum loss check logic Signed-off-by: Rishab Nahata --- .../decommission/DecommissionService.java | 19 ++++++++------- .../DecommissionServiceTests.java | 24 +++++++++++-------- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 9f7eeeed80ebd..96fe9f8740461 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -431,16 +431,19 @@ private static void ensureNoQuorumLossDueToDecommissioning( Set clusterManagerNodesToBeDecommissioned, CoordinationMetadata.VotingConfiguration votingConfiguration ) { - final Set nodesInVotingConfig = votingConfiguration.getNodeIds(); - assert nodesInVotingConfig.isEmpty() == false; - final int requiredNodes = nodesInVotingConfig.size() / 2 + 1; - final Set realNodeIds = new HashSet<>(nodesInVotingConfig); - realNodeIds.removeIf(ClusterBootstrapService::isBootstrapPlaceholder); - clusterManagerNodesToBeDecommissioned.removeIf(b -> !nodesInVotingConfig.contains(b.getId())); - if (realNodeIds.size() - clusterManagerNodesToBeDecommissioned.size() < requiredNodes) { + Set clusterManagerNodesIdToBeDecommissioned = new HashSet<>(); + clusterManagerNodesToBeDecommissioned.forEach(node -> clusterManagerNodesIdToBeDecommissioned.add(node.getId())); + if (!votingConfiguration.hasQuorum( + votingConfiguration.getNodeIds() + .stream() + .filter(n -> clusterManagerNodesIdToBeDecommissioned.contains(n) == false) + .collect(Collectors.toList() + ) + ) + ) { throw new DecommissioningFailedException( decommissionAttribute, - "cannot proceed with decommission request. Cluster might go into quorum loss" + "cannot proceed with decommission request as cluster might go into quorum loss" ); } } diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java index ea4d9eb76b0dc..126bd605425e3 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java @@ -54,17 +54,11 @@ public class DecommissionServiceTests extends OpenSearchTestCase { private DecommissionService decommissionService; private ClusterSettings clusterSettings; - @Override - public void setUp() throws Exception { - super.setUp(); - super.setUp(); + @Before + public void setUpService() { threadPool = new TestThreadPool("test", Settings.EMPTY); clusterService = createClusterService(threadPool); allocationService = createAllocationService(); - } - - @Before - public void setUpService() { ClusterState clusterState = ClusterState.builder(new ClusterName("test")).build(); logger.info("--> adding cluster manager node on zone_1"); clusterState = addClusterManagerNodes(clusterState, "zone_1", "node1"); @@ -174,7 +168,17 @@ public void testDecommissioningNotInitiatedWhenAlreadyDecommissioned() { ); assertThat( e.getMessage(), - Matchers.endsWith("one awareness attribute already decommissioned, recommission before triggering another decommission") + Matchers.endsWith("another request for decommission is in flight, will not process this request") + ); + } + + @SuppressWarnings("unchecked") + public void testDecommissioningInitiatedWhenEnoughClusterManagerNodes() { + ActionListener listener = mock(ActionListener.class); + decommissionService.initiateAttributeDecommissioning( + new DecommissionAttribute("zone", "zone_3"), + listener, + clusterService.state() ); } @@ -195,7 +199,7 @@ public void testDecommissioningNotInitiatedWhenNotEnoughClusterManagerNodes() { ); } ); - assertThat(e.getMessage(), Matchers.endsWith("cannot proceed with decommission request. Cluster might go into quorum loss")); + assertThat(e.getMessage(), Matchers.endsWith("cannot proceed with decommission request as cluster might go into quorum loss")); } private ClusterState addDataNodes(ClusterState clusterState, String zone, String... nodeIds) { From 585c37c91af80e0611336331d24eeab5111cecca Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 6 Sep 2022 16:24:05 +0530 Subject: [PATCH 46/87] Update status assertion and clear voting config for failed init Signed-off-by: Rishab Nahata --- .../decommission/DecommissionController.java | 25 ++++++++------ .../decommission/DecommissionService.java | 34 ++++++++++++++----- 2 files changed, 40 insertions(+), 19 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index 89790c7cdeef8..25c600ec47add 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -109,13 +109,11 @@ public void clearVotingConfigExclusion(ActionListener listener) { new TransportResponseHandler() { @Override public void handleResponse(ClearVotingConfigExclusionsResponse response) { - logger.info("successfully cleared voting config after decommissioning"); listener.onResponse(null); } @Override public void handleException(TransportException exp) { - logger.debug(new ParameterizedMessage("failure in clearing voting config exclusion after decommissioning"), exp); listener.onFailure(exp); } @@ -196,7 +194,7 @@ public ClusterState execute(ClusterState currentState) { Metadata metadata = currentState.metadata(); DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); assert decommissionAttributeMetadata != null && decommissionAttributeMetadata.decommissionAttribute() != null; - assert assertIncrementalStatusOrFailed(decommissionAttributeMetadata.status(), decommissionStatus); + assert assertStatusTransitionOrFailed(decommissionAttributeMetadata.status(), decommissionStatus); Metadata.Builder mdBuilder = Metadata.builder(metadata); DecommissionAttributeMetadata newMetadata = decommissionAttributeMetadata.withUpdatedStatus(decommissionStatus); mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, newMetadata); @@ -218,13 +216,20 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS }); } - private static boolean assertIncrementalStatusOrFailed(DecommissionStatus oldStatus, DecommissionStatus newStatus) { - if (newStatus.equals(DecommissionStatus.DECOMMISSION_FAILED)) return true; - else if (newStatus.equals(DecommissionStatus.DECOMMISSION_SUCCESSFUL)) { - return oldStatus.equals(DecommissionStatus.DECOMMISSION_IN_PROGRESS); - } else if (newStatus.equals(DecommissionStatus.DECOMMISSION_IN_PROGRESS)) { - return oldStatus.equals(DecommissionStatus.DECOMMISSION_INIT); + private static boolean assertStatusTransitionOrFailed(DecommissionStatus oldStatus, DecommissionStatus newStatus) { + switch (newStatus) { + case DECOMMISSION_INIT: + // if the new status is INIT, then the old status cannot be anything but FAILED + return oldStatus.equals(DecommissionStatus.DECOMMISSION_FAILED); + case DECOMMISSION_IN_PROGRESS: + // if the new status is IN_PROGRESS, the old status has to be INIT + return oldStatus.equals(DecommissionStatus.DECOMMISSION_INIT); + case DECOMMISSION_SUCCESSFUL: + // if the new status is SUCCESSFUL, the old status has to be IN_PROGRESS + return oldStatus.equals(DecommissionStatus.DECOMMISSION_IN_PROGRESS); + default: + // if the new status is FAILED, we don't need to assert for previous state + return true; } - return true; } } diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 96fe9f8740461..0bf6be700a0ee 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -16,7 +16,6 @@ import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; -import org.opensearch.cluster.coordination.ClusterBootstrapService; import org.opensearch.cluster.coordination.CoordinationMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; @@ -227,13 +226,7 @@ public ClusterState execute(ClusterState currentState) { @Override public void onFailure(String source, Exception e) { - if (e instanceof DecommissioningFailedException) { - logger.error( - () -> new ParameterizedMessage("failed to decommission attribute [{}]", decommissionAttribute.toString()), - e - ); - listener.onFailure(e); - } else if (e instanceof NotClusterManagerException) { + if (e instanceof NotClusterManagerException) { logger.debug( () -> new ParameterizedMessage( "cluster-manager updated while executing request for decommission attribute [{}]", @@ -250,7 +243,7 @@ public void onFailure(String source, Exception e) { ), e ); - listener.onFailure(e); + failAndClearVotingConfigExclusion(listener, e); } } @@ -267,6 +260,24 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS ); } + private void failAndClearVotingConfigExclusion(final ActionListener listener, Exception e) { + decommissionController.clearVotingConfigExclusion(new ActionListener() { + @Override + public void onResponse(Void unused) { + logger.info("successfully cleared voting config exclusion after failing to execute decommission request"); + } + + @Override + public void onFailure(Exception e) { + logger.debug(new ParameterizedMessage( + "failure in clearing voting config exclusion after failing to execute decommission request"), + e + ); + } + }); + listener.onFailure(e); + } + private void initiateGracefulDecommission() { decommissionController.updateMetadataWithDecommissionStatus( DecommissionStatus.DECOMMISSION_IN_PROGRESS, @@ -338,6 +349,7 @@ public void onFailure(Exception e) { decommissionController.clearVotingConfigExclusion(new ActionListener() { @Override public void onResponse(Void unused) { + logger.info("successfully cleared voting config exclusion after failing to execute decommission request"); DecommissionStatus updateStatusWith = decommissionSuccessful ? DecommissionStatus.DECOMMISSION_SUCCESSFUL : DecommissionStatus.DECOMMISSION_FAILED; @@ -346,6 +358,10 @@ public void onResponse(Void unused) { @Override public void onFailure(Exception e) { + logger.debug(new ParameterizedMessage( + "failure in clearing voting config exclusion after processing decommission request"), + e + ); decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_FAILED, statusUpdateListener); } }); From 406950a49fa94fd2b5ff1775cc2dbe51f38b9af0 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 6 Sep 2022 16:45:50 +0530 Subject: [PATCH 47/87] Refactoring Signed-off-by: Rishab Nahata --- .../decommission/DecommissionService.java | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 0bf6be700a0ee..d5afcda667115 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -117,7 +117,7 @@ public void initiateAttributeDecommissioning( // action validateAwarenessAttribute(decommissionAttribute, awarenessAttributes, forcedAwarenessAttributes); - Set clusterManagerNodesToBeDecommissioned = nodesWithDecommissionAttribute(state, decommissionAttribute, true); + Set clusterManagerNodesToBeDecommissioned = filterNodesWithDecommissionAttribute(state, decommissionAttribute, true); ensureNoQuorumLossDueToDecommissioning( decommissionAttribute, clusterManagerNodesToBeDecommissioned, @@ -254,7 +254,7 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS assert decommissionAttribute.equals(decommissionAttributeMetadata.decommissionAttribute()); assert DecommissionStatus.DECOMMISSION_INIT.equals(decommissionAttributeMetadata.status()); listener.onResponse(new ClusterStateUpdateResponse(true)); - initiateGracefulDecommission(); + weighAwayForGracefulDecommission(); } } ); @@ -278,7 +278,7 @@ public void onFailure(Exception e) { listener.onFailure(e); } - private void initiateGracefulDecommission() { + private void weighAwayForGracefulDecommission() { decommissionController.updateMetadataWithDecommissionStatus( DecommissionStatus.DECOMMISSION_IN_PROGRESS, new ActionListener() { @@ -314,7 +314,7 @@ private void failDecommissionedNodes(ClusterState state) { // execute nodes decommissioning decommissionController.removeDecommissionedNodes( - nodesWithDecommissionAttribute(state, decommissionAttribute, false), + filterNodesWithDecommissionAttribute(state, decommissionAttribute, false), "nodes-decommissioned", TimeValue.timeValueSeconds(30L), // TODO - read timeout from request while integrating with API new ActionListener() { @@ -367,23 +367,19 @@ public void onFailure(Exception e) { }); } - public Set nodesWithDecommissionAttribute( + private Set filterNodesWithDecommissionAttribute( ClusterState clusterState, DecommissionAttribute decommissionAttribute, boolean onlyClusterManagerNodes ) { Set nodesWithDecommissionAttribute = new HashSet<>(); - final Predicate shouldDecommissionNodePredicate = discoveryNode -> nodeHasDecommissionedAttribute( - discoveryNode, - decommissionAttribute - ); Iterator nodesIter = onlyClusterManagerNodes ? clusterState.nodes().getClusterManagerNodes().valuesIt() : clusterState.nodes().getNodes().valuesIt(); while (nodesIter.hasNext()) { final DiscoveryNode node = nodesIter.next(); - if (shouldDecommissionNodePredicate.test(node)) { + if (nodeHasDecommissionedAttribute(node, decommissionAttribute)) { nodesWithDecommissionAttribute.add(node); } } From 29013a9d797e6827da9d97c3ec8b5b3aed351762 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 6 Sep 2022 17:12:29 +0530 Subject: [PATCH 48/87] Fix spotless check Signed-off-by: Rishab Nahata --- .../decommission/DecommissionController.java | 1 - .../decommission/DecommissionService.java | 21 ++++++++----------- .../coordination/JoinTaskExecutorTests.java | 5 +---- .../DecommissionServiceTests.java | 11 ++-------- 4 files changed, 12 insertions(+), 26 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index 25c600ec47add..579ca508bd1d9 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -10,7 +10,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.OpenSearchTimeoutException; import org.opensearch.action.ActionListener; import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsAction; diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index d5afcda667115..78fb555ac5546 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -36,7 +36,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.function.Predicate; import java.util.stream.Collectors; import static org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING; @@ -269,8 +268,8 @@ public void onResponse(Void unused) { @Override public void onFailure(Exception e) { - logger.debug(new ParameterizedMessage( - "failure in clearing voting config exclusion after failing to execute decommission request"), + logger.debug( + new ParameterizedMessage("failure in clearing voting config exclusion after failing to execute decommission request"), e ); } @@ -358,8 +357,8 @@ public void onResponse(Void unused) { @Override public void onFailure(Exception e) { - logger.debug(new ParameterizedMessage( - "failure in clearing voting config exclusion after processing decommission request"), + logger.debug( + new ParameterizedMessage("failure in clearing voting config exclusion after processing decommission request"), e ); decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_FAILED, statusUpdateListener); @@ -418,7 +417,7 @@ private static void ensureNoInflightDifferentDecommissionRequest( DecommissionAttribute decommissionAttribute ) { String msg = null; - if (decommissionAttributeMetadata!=null) { + if (decommissionAttributeMetadata != null) { if (decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_SUCCESSFUL)) { // one awareness attribute is already decommissioned. We will reject the new request msg = "one awareness attribute already successfully decommissioned. Recommission before triggering another decommission"; @@ -426,8 +425,8 @@ private static void ensureNoInflightDifferentDecommissionRequest( // here we are sure that the previous decommission request failed, we can let this request pass this check return; } else { - // it means the decommission has been initiated or is inflight. In that case, if the same attribute is requested for decommissioning, - // which can happen during retries, we will pass this check, if not, we will throw exception + // it means the decommission has been initiated or is inflight. In that case, if the same attribute is requested for + // decommissioning, which can happen during retries, we will pass this check, if not, we will throw exception if (!decommissionAttributeMetadata.decommissionAttribute().equals(decommissionAttribute)) { msg = "another request for decommission is in flight, will not process this request"; } @@ -449,10 +448,8 @@ private static void ensureNoQuorumLossDueToDecommissioning( votingConfiguration.getNodeIds() .stream() .filter(n -> clusterManagerNodesIdToBeDecommissioned.contains(n) == false) - .collect(Collectors.toList() - ) - ) - ) { + .collect(Collectors.toList()) + )) { throw new DecommissioningFailedException( decommissionAttribute, "cannot proceed with decommission request as cluster might go into quorum loss" diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java index 003a16bc218ef..e9ad305637f95 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java @@ -270,10 +270,7 @@ public void testJoinClusterWithDifferentDecommission() { public void testJoinClusterWithDecommissionFailedOrInitOrRecommission() { Settings.builder().build(); DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone-1"); - DecommissionStatus decommissionStatus = randomFrom( - DecommissionStatus.DECOMMISSION_INIT, - DecommissionStatus.DECOMMISSION_FAILED - ); + DecommissionStatus decommissionStatus = randomFrom(DecommissionStatus.DECOMMISSION_INIT, DecommissionStatus.DECOMMISSION_FAILED); DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( decommissionAttribute, decommissionStatus diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java index 126bd605425e3..df148c6201d3c 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java @@ -166,20 +166,13 @@ public void testDecommissioningNotInitiatedWhenAlreadyDecommissioned() { ); } ); - assertThat( - e.getMessage(), - Matchers.endsWith("another request for decommission is in flight, will not process this request") - ); + assertThat(e.getMessage(), Matchers.endsWith("another request for decommission is in flight, will not process this request")); } @SuppressWarnings("unchecked") public void testDecommissioningInitiatedWhenEnoughClusterManagerNodes() { ActionListener listener = mock(ActionListener.class); - decommissionService.initiateAttributeDecommissioning( - new DecommissionAttribute("zone", "zone_3"), - listener, - clusterService.state() - ); + decommissionService.initiateAttributeDecommissioning(new DecommissionAttribute("zone", "zone_3"), listener, clusterService.state()); } @SuppressWarnings("unchecked") From 1e6a3ffc92dafa4ee22b0672f51bdf0fe8238912 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 6 Sep 2022 18:18:53 +0530 Subject: [PATCH 49/87] Resolve comments Signed-off-by: Rishab Nahata --- .../main/java/org/opensearch/OpenSearchException.java | 4 ++-- .../cluster/decommission/DecommissionController.java | 11 +++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/opensearch/OpenSearchException.java b/server/src/main/java/org/opensearch/OpenSearchException.java index 932aae741160e..2b909271660c5 100644 --- a/server/src/main/java/org/opensearch/OpenSearchException.java +++ b/server/src/main/java/org/opensearch/OpenSearchException.java @@ -1614,13 +1614,13 @@ private enum OpenSearchExceptionHandle { org.opensearch.cluster.decommission.DecommissioningFailedException.class, org.opensearch.cluster.decommission.DecommissioningFailedException::new, 163, - V_2_3_0 + V_3_0_0 ), NODE_DECOMMISSIONED_EXCEPTION( org.opensearch.cluster.decommission.NodeDecommissionedException.class, org.opensearch.cluster.decommission.NodeDecommissionedException::new, 164, - V_2_3_0 + V_3_0_0 ); final Class exceptionClass; diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index 579ca508bd1d9..dc183613fa162 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -143,7 +143,7 @@ public void removeDecommissionedNodes( clusterService.submitStateUpdateTasks( "node-decommissioned", nodesDecommissionTasks, - ClusterStateTaskConfig.build(Priority.IMMEDIATE), + ClusterStateTaskConfig.build(Priority.URGENT), nodeRemovalExecutor ); @@ -170,7 +170,7 @@ public void onNewClusterState(ClusterState state) { @Override public void onClusterServiceClose() { - logger.debug("cluster service closed while waiting for removal of decommissioned nodes."); + logger.warn("cluster service closed while waiting for removal of decommissioned nodes."); } @Override @@ -178,7 +178,8 @@ public void onTimeout(TimeValue timeout) { logger.info("timed out while waiting for removal of decommissioned nodes"); nodesRemovedListener.onFailure( new OpenSearchTimeoutException( - "timed out waiting for removal of decommissioned nodes [{}] to take effect", + "timed out [{}] while waiting for removal of decommissioned nodes [{}] to take effect", + timeout.toString(), nodesToBeDecommissioned.toString() ) ); @@ -226,9 +227,11 @@ private static boolean assertStatusTransitionOrFailed(DecommissionStatus oldStat case DECOMMISSION_SUCCESSFUL: // if the new status is SUCCESSFUL, the old status has to be IN_PROGRESS return oldStatus.equals(DecommissionStatus.DECOMMISSION_IN_PROGRESS); - default: + case DECOMMISSION_FAILED: // if the new status is FAILED, we don't need to assert for previous state return true; + default: + throw new IllegalStateException("unexpected status [" + newStatus.status() + "] requested to update"); } } } From 9006455ebf80bd3ec5f7e0379e5b0f783457d60d Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 6 Sep 2022 18:29:40 +0530 Subject: [PATCH 50/87] Fix spotless check Signed-off-by: Rishab Nahata --- server/src/main/java/org/opensearch/OpenSearchException.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/OpenSearchException.java b/server/src/main/java/org/opensearch/OpenSearchException.java index 2b909271660c5..34d7509c7afb2 100644 --- a/server/src/main/java/org/opensearch/OpenSearchException.java +++ b/server/src/main/java/org/opensearch/OpenSearchException.java @@ -68,7 +68,6 @@ import static java.util.Collections.singletonMap; import static java.util.Collections.unmodifiableMap; import static org.opensearch.Version.V_2_1_0; -import static org.opensearch.Version.V_2_3_0; import static org.opensearch.Version.V_3_0_0; import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_UUID_NA_VALUE; import static org.opensearch.common.xcontent.XContentParserUtils.ensureExpectedToken; From 985317d804bc9e79eeab53704a6ff0b1e1fd510e Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Wed, 7 Sep 2022 17:02:16 +0530 Subject: [PATCH 51/87] Updating states and flow Signed-off-by: Rishab Nahata --- .../coordination/JoinTaskExecutor.java | 4 +- .../DecommissionAttributeMetadata.java | 4 +- .../decommission/DecommissionController.java | 61 ++-- .../decommission/DecommissionService.java | 290 +++++++++--------- .../decommission/DecommissionStatus.java | 66 +++- .../coordination/JoinTaskExecutorTests.java | 6 +- .../DecommissionControllerTests.java | 8 +- .../DecommissionServiceTests.java | 2 +- 8 files changed, 225 insertions(+), 216 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index ebf37e21bbfd6..ee2b35eac302d 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -484,8 +484,8 @@ public static void ensureNodeCommissioned(DiscoveryNode node, Metadata metadata) if (decommissionAttribute != null && status != null) { // We will let the node join the cluster if the current status is not IN_PROGRESS or SUCCESSFUL if (node.getAttributes().get(decommissionAttribute.attributeName()).equals(decommissionAttribute.attributeValue()) - && (status.equals(DecommissionStatus.DECOMMISSION_IN_PROGRESS) - || status.equals(DecommissionStatus.DECOMMISSION_SUCCESSFUL))) { + && (status.equals(DecommissionStatus.IN_PROGRESS) + || status.equals(DecommissionStatus.SUCCESSFUL))) { throw new NodeDecommissionedException( "node [{}] has decommissioned attribute [{}].", node.toString(), diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java index 0924a181fb458..fc3f1841615d9 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java @@ -50,12 +50,12 @@ public DecommissionAttributeMetadata(DecommissionAttribute decommissionAttribute } /** - * Constructs new decommission attribute metadata with status as {@link DecommissionStatus#DECOMMISSION_INIT} + * Constructs new decommission attribute metadata with status as {@link DecommissionStatus#INIT} * * @param decommissionAttribute attribute details */ public DecommissionAttributeMetadata(DecommissionAttribute decommissionAttribute) { - this(decommissionAttribute, DecommissionStatus.DECOMMISSION_INIT); + this(decommissionAttribute, DecommissionStatus.INIT); } /** diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index dc183613fa162..9a87a48af60b2 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -37,11 +37,12 @@ import org.opensearch.transport.TransportService; import java.io.IOException; -import java.util.Iterator; +import java.util.Arrays; import java.util.LinkedHashMap; import java.util.Map; import java.util.Set; import java.util.function.Predicate; +import java.util.stream.Collectors; /** * Helper controller class to remove list of nodes from the cluster and update status @@ -135,7 +136,7 @@ public void removeDecommissionedNodes( TimeValue timeout, ActionListener nodesRemovedListener ) { - final Map nodesDecommissionTasks = new LinkedHashMap<>(); + final Map nodesDecommissionTasks = new LinkedHashMap<>(nodesToBeDecommissioned.size()); nodesToBeDecommissioned.forEach(discoveryNode -> { final NodeRemovalClusterStateTaskExecutor.Task task = new NodeRemovalClusterStateTaskExecutor.Task(discoveryNode, reason); nodesDecommissionTasks.put(task, nodeRemovalExecutor); @@ -148,15 +149,10 @@ public void removeDecommissionedNodes( ); Predicate allDecommissionedNodesRemovedPredicate = clusterState -> { - Iterator nodesIter = clusterState.nodes().getNodes().valuesIt(); - while (nodesIter.hasNext()) { - final DiscoveryNode node = nodesIter.next(); - // check if the node is part of node decommissioned list - if (nodesToBeDecommissioned.contains(node)) { - return false; - } - } - return true; + Set intersection = Arrays.stream( + clusterState.nodes().getNodes().values().toArray(DiscoveryNode.class)).collect(Collectors.toSet()); + intersection.retainAll(nodesToBeDecommissioned); + return intersection.size() == 0; }; final ClusterStateObserver observer = new ClusterStateObserver(clusterService, timeout, logger, threadPool.getThreadContext()); @@ -187,14 +183,25 @@ public void onTimeout(TimeValue timeout) { }, allDecommissionedNodesRemovedPredicate); } - public void updateMetadataWithDecommissionStatus(DecommissionStatus decommissionStatus, ActionListener listener) { + public void updateMetadataWithDecommissionStatus(DecommissionStatus decommissionStatus, ActionListener listener) { clusterService.submitStateUpdateTask(decommissionStatus.status(), new ClusterStateUpdateTask(Priority.URGENT) { @Override - public ClusterState execute(ClusterState currentState) { + public ClusterState execute(ClusterState currentState) throws Exception { Metadata metadata = currentState.metadata(); DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); assert decommissionAttributeMetadata != null && decommissionAttributeMetadata.decommissionAttribute() != null; - assert assertStatusTransitionOrFailed(decommissionAttributeMetadata.status(), decommissionStatus); + // we need to update the status only when the previous stage is just behind than expected stage + // if the previous stage is already ahead of expected stage, we don't need to update the stage + // For failures, we update it no matter what + int previousStage = decommissionAttributeMetadata.status().stage(); + int expectedStage = decommissionStatus.stage(); + if (previousStage >= expectedStage) return currentState; + if (expectedStage - previousStage != 1 && !decommissionStatus.equals(DecommissionStatus.FAILED)) { + throw new DecommissioningFailedException( + decommissionAttributeMetadata.decommissionAttribute(), + "invalid previous decommission status found while updating status" + ); + } Metadata.Builder mdBuilder = Metadata.builder(metadata); DecommissionAttributeMetadata newMetadata = decommissionAttributeMetadata.withUpdatedStatus(decommissionStatus); mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, newMetadata); @@ -208,30 +215,10 @@ public void onFailure(String source, Exception e) { @Override public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { - DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata() - .custom(DecommissionAttributeMetadata.TYPE); - assert decommissionAttributeMetadata.status().equals(decommissionStatus); - listener.onResponse(null); + DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().custom(DecommissionAttributeMetadata.TYPE); + logger.info("updated decommission status to [{}]", decommissionAttributeMetadata.status()); + listener.onResponse(decommissionAttributeMetadata.status()); } }); } - - private static boolean assertStatusTransitionOrFailed(DecommissionStatus oldStatus, DecommissionStatus newStatus) { - switch (newStatus) { - case DECOMMISSION_INIT: - // if the new status is INIT, then the old status cannot be anything but FAILED - return oldStatus.equals(DecommissionStatus.DECOMMISSION_FAILED); - case DECOMMISSION_IN_PROGRESS: - // if the new status is IN_PROGRESS, the old status has to be INIT - return oldStatus.equals(DecommissionStatus.DECOMMISSION_INIT); - case DECOMMISSION_SUCCESSFUL: - // if the new status is SUCCESSFUL, the old status has to be IN_PROGRESS - return oldStatus.equals(DecommissionStatus.DECOMMISSION_IN_PROGRESS); - case DECOMMISSION_FAILED: - // if the new status is FAILED, we don't need to assert for previous state - return true; - default: - throw new IllegalStateException("unexpected status [" + newStatus.status() + "] requested to update"); - } - } } diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 78fb555ac5546..680808d152335 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -48,11 +48,11 @@ * the service makes the best attempt to perform the following task - *

    *
  • Remove cluster-manager eligible nodes from voting config [TODO - checks to avoid quorum loss scenarios]
  • - *
  • Initiates nodes decommissioning by adding custom metadata with the attribute and state as {@link DecommissionStatus#DECOMMISSION_INIT}
  • - *
  • Triggers weigh away for nodes having given awareness attribute to drain. This marks the decommission status as {@link DecommissionStatus#DECOMMISSION_IN_PROGRESS}
  • + *
  • Initiates nodes decommissioning by adding custom metadata with the attribute and state as {@link DecommissionStatus#INIT}
  • + *
  • Triggers weigh away for nodes having given awareness attribute to drain. This marks the decommission status as {@link DecommissionStatus#IN_PROGRESS}
  • *
  • Once weighed away, the service triggers nodes decommission
  • - *
  • Once the decommission is successful, the service clears the voting config and marks the status as {@link DecommissionStatus#DECOMMISSION_SUCCESSFUL}
  • - *
  • If service fails at any step, it would mark the status as {@link DecommissionStatus#DECOMMISSION_FAILED}
  • + *
  • Once the decommission is successful, the service clears the voting config and marks the status as {@link DecommissionStatus#SUCCESSFUL}
  • + *
  • If service fails at any step, it would mark the status as {@link DecommissionStatus#FAILED}
  • *
* * @opensearch.internal @@ -107,118 +107,44 @@ private void setForcedAwarenessAttributes(Settings forceSettings) { this.forcedAwarenessAttributes = forcedAwarenessAttributes; } - public void initiateAttributeDecommissioning( - final DecommissionAttribute decommissionAttribute, - final ActionListener listener, - ClusterState state - ) { - // validates if the correct awareness attributes and forced awareness attribute set to the cluster before initiating decommission - // action - validateAwarenessAttribute(decommissionAttribute, awarenessAttributes, forcedAwarenessAttributes); - - Set clusterManagerNodesToBeDecommissioned = filterNodesWithDecommissionAttribute(state, decommissionAttribute, true); - ensureNoQuorumLossDueToDecommissioning( - decommissionAttribute, - clusterManagerNodesToBeDecommissioned, - state.getLastAcceptedConfiguration() - ); - - DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().custom(DecommissionAttributeMetadata.TYPE); - // validates that there's no inflight decommissioning or already executed decommission in place - ensureNoInflightDifferentDecommissionRequest(decommissionAttributeMetadata, decommissionAttribute); - - logger.info("initiating awareness attribute [{}] decommissioning", decommissionAttribute.toString()); - - // remove all 'to-be-decommissioned' cluster manager eligible nodes from voting config - // The method ensures that we don't exclude same nodes multiple times - excludeDecommissionedClusterManagerNodesFromVotingConfig(clusterManagerNodesToBeDecommissioned); - - // explicitly throwing NotClusterManagerException as we can certainly say the local cluster manager node will - // be abdicated and soon will no longer be cluster manager. - if (transportService.getLocalNode().isClusterManagerNode() - && !nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute)) { - registerDecommissionAttribute(decommissionAttribute, listener); - } else { - throw new NotClusterManagerException( - "node [" - + transportService.getLocalNode().toString() - + "] not eligible to execute decommission request. Will retry until timeout." - ); - } - } - - private void excludeDecommissionedClusterManagerNodesFromVotingConfig(Set clusterManagerNodesToBeDecommissioned) { - Set clusterManagerNodesNameToBeDecommissioned = clusterManagerNodesToBeDecommissioned.stream() - .map(DiscoveryNode::getName) - .collect(Collectors.toSet()); - - Set currentVotingConfigExclusions = clusterService.getClusterApplierService() - .state() - .coordinationMetadata() - .getVotingConfigExclusions(); - Set excludedNodesName = currentVotingConfigExclusions.stream() - .map(VotingConfigExclusion::getNodeName) - .collect(Collectors.toSet()); - - // check if the to-be-excluded nodes are excluded. If yes, we don't need to exclude them again - if (clusterManagerNodesNameToBeDecommissioned.size() == 0 - || (clusterManagerNodesNameToBeDecommissioned.size() == excludedNodesName.size() - && excludedNodesName.containsAll(clusterManagerNodesNameToBeDecommissioned))) { - return; - } - // send a transport request to exclude to-be-decommissioned cluster manager eligible nodes from voting config - decommissionController.excludeDecommissionedNodesFromVotingConfig( - clusterManagerNodesNameToBeDecommissioned, - new ActionListener() { - @Override - public void onResponse(Void unused) { - logger.info( - "successfully removed decommissioned cluster manager eligible nodes [{}] from voting config ", - clusterManagerNodesToBeDecommissioned.toString() - ); - } - - @Override - public void onFailure(Exception e) { - logger.debug( - new ParameterizedMessage("failure in removing decommissioned cluster manager eligible nodes from voting config"), - e - ); - } - } - ); - } - /** - * Registers new decommissioned attribute metadata in the cluster state with {@link DecommissionStatus#DECOMMISSION_INIT} - *

- * This method can be only called on the cluster-manager node. It tries to create a new decommissioned attribute on the cluster manager - * and if it was successful it adds new decommissioned attribute to cluster metadata. - *

- * This method would only be executed on eligible cluster manager node + * Starts the new decommission request and registers the metadata with status as {@link DecommissionStatus#INIT} + * or the last known status if not {@link DecommissionStatus#FAILED} + * Once the status is updated, it tries to exclude to-be-decommissioned cluster manager nodes from Voting Configuration * * @param decommissionAttribute register decommission attribute in the metadata request - * @param listener register decommission listener + * @param listener register decommission listener */ - private void registerDecommissionAttribute( + public synchronized void startDecommissionAction( final DecommissionAttribute decommissionAttribute, final ActionListener listener ) { + // validates if correct awareness attributes and forced awareness attribute set to the cluster before starting action + validateAwarenessAttribute(decommissionAttribute, awarenessAttributes, forcedAwarenessAttributes); + + // register the metadata with status as DECOMMISSION_INIT as first step clusterService.submitStateUpdateTask( - "put_decommission [" + decommissionAttribute + "]", + "decommission [" + decommissionAttribute + "]", new ClusterStateUpdateTask(Priority.URGENT) { @Override - public ClusterState execute(ClusterState currentState) { + public ClusterState execute(ClusterState currentState) throws Exception { Metadata metadata = currentState.metadata(); Metadata.Builder mdBuilder = Metadata.builder(metadata); DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); + // check if the same attribute is requested for decommission and currently not FAILED, then return the current state as is + if(decommissionAttributeMetadata!=null && + decommissionAttributeMetadata.decommissionAttribute().equals(decommissionAttribute) && + !decommissionAttributeMetadata.status().equals(DecommissionStatus.FAILED)) { + logger.info("re-request received for decommissioning [{}], will not update state", decommissionAttribute); + return currentState; + } + // check the request sanity and reject the request if there's any inflight or successful request already present ensureNoInflightDifferentDecommissionRequest(decommissionAttributeMetadata, decommissionAttribute); decommissionAttributeMetadata = new DecommissionAttributeMetadata(decommissionAttribute); mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); logger.info( - "registering decommission metadata for attribute [{}] with status as [{}]", - decommissionAttribute.toString(), - DecommissionStatus.DECOMMISSION_INIT + "registering decommission metadata [{}] to execute action", + decommissionAttributeMetadata.toString() ); return ClusterState.builder(currentState).metadata(mdBuilder).build(); } @@ -237,12 +163,12 @@ public void onFailure(String source, Exception e) { } else { logger.error( () -> new ParameterizedMessage( - "failed to initiate decommissioning for attribute [{}]", + "failed to start decommission action for attribute [{}]", decommissionAttribute.toString() ), e ); - failAndClearVotingConfigExclusion(listener, e); + listener.onFailure(e); } } @@ -251,53 +177,97 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata() .custom(DecommissionAttributeMetadata.TYPE); assert decommissionAttribute.equals(decommissionAttributeMetadata.decommissionAttribute()); - assert DecommissionStatus.DECOMMISSION_INIT.equals(decommissionAttributeMetadata.status()); listener.onResponse(new ClusterStateUpdateResponse(true)); - weighAwayForGracefulDecommission(); + if (!decommissionAttributeMetadata.status().equals(DecommissionStatus.SUCCESSFUL)) { + decommissionClusterManagerNodes(decommissionAttributeMetadata.decommissionAttribute()); + } } } ); } - private void failAndClearVotingConfigExclusion(final ActionListener listener, Exception e) { - decommissionController.clearVotingConfigExclusion(new ActionListener() { - @Override - public void onResponse(Void unused) { - logger.info("successfully cleared voting config exclusion after failing to execute decommission request"); - } + private void decommissionClusterManagerNodes(final DecommissionAttribute decommissionAttribute) { + decommissionController.updateMetadataWithDecommissionStatus( + DecommissionStatus.EXCLUDE_LEADER_FROM_VOTING_CONFIG, + new ActionListener() { + @Override + public void onResponse(DecommissionStatus status) { + ClusterState state = clusterService.getClusterApplierService().state(); + Set clusterManagerNodesToBeDecommissioned = filterNodesWithDecommissionAttribute( + state, decommissionAttribute, true + ); + ensureNoQuorumLossDueToDecommissioning( + decommissionAttribute, + clusterManagerNodesToBeDecommissioned, + state.getLastCommittedConfiguration() + ); + // remove all 'to-be-decommissioned' cluster manager eligible nodes from voting config + // The method ensures that we don't exclude same nodes multiple times + excludeDecommissionedClusterManagerNodesFromVotingConfig(clusterManagerNodesToBeDecommissioned); + // explicitly throwing NotClusterManagerException as we can certainly say the local cluster manager node will + // be abdicated and soon will no longer be cluster manager. + if (transportService.getLocalNode().isClusterManagerNode() + && !nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute)) { + failDecommissionedNodes(clusterService.getClusterApplierService().state()); + } else { + throw new NotClusterManagerException( + "node [" + + transportService.getLocalNode().toString() + + "] not eligible to execute decommission request. Will retry until timeout." + ); + } + } - @Override - public void onFailure(Exception e) { - logger.debug( - new ParameterizedMessage("failure in clearing voting config exclusion after failing to execute decommission request"), - e - ); + @Override + public void onFailure(Exception e) { + logger.error( + () -> new ParameterizedMessage( + "failed to update decommission status for attribute [{}] to [{}]", + decommissionAttribute.toString(), + DecommissionStatus.EXCLUDE_LEADER_FROM_VOTING_CONFIG + ), + e + ); + } } - }); - listener.onFailure(e); + ); } - private void weighAwayForGracefulDecommission() { - decommissionController.updateMetadataWithDecommissionStatus( - DecommissionStatus.DECOMMISSION_IN_PROGRESS, + private void excludeDecommissionedClusterManagerNodesFromVotingConfig(Set clusterManagerNodesToBeDecommissioned) { + Set clusterManagerNodesNameToBeDecommissioned = clusterManagerNodesToBeDecommissioned.stream() + .map(DiscoveryNode::getName) + .collect(Collectors.toSet()); + + Set currentVotingConfigExclusions = clusterService.getClusterApplierService() + .state() + .coordinationMetadata() + .getVotingConfigExclusions(); + Set excludedNodesName = currentVotingConfigExclusions.stream() + .map(VotingConfigExclusion::getNodeName) + .collect(Collectors.toSet()); + + // check if the to-be-excluded nodes are excluded. If yes, we don't need to exclude them again + if (clusterManagerNodesNameToBeDecommissioned.size() == 0 + || (clusterManagerNodesNameToBeDecommissioned.size() == excludedNodesName.size() + && excludedNodesName.containsAll(clusterManagerNodesNameToBeDecommissioned))) { + return; + } + // send a transport request to exclude to-be-decommissioned cluster manager eligible nodes from voting config + decommissionController.excludeDecommissionedNodesFromVotingConfig( + clusterManagerNodesNameToBeDecommissioned, new ActionListener() { @Override public void onResponse(Void unused) { logger.info( - "updated decommission status to [{}], weighing away awareness attribute for graceful shutdown", - DecommissionStatus.DECOMMISSION_IN_PROGRESS + "successfully removed decommissioned cluster manager eligible nodes [{}] from voting config ", + clusterManagerNodesToBeDecommissioned.toString() ); - // TODO - should trigger weigh away here and on successful weigh away -> fail the decommissioned nodes - failDecommissionedNodes(clusterService.getClusterApplierService().state()); } @Override public void onFailure(Exception e) { - logger.error( - () -> new ParameterizedMessage( - "failed to update decommission status to [{}], will not proceed with decommission", - DecommissionStatus.DECOMMISSION_IN_PROGRESS - ), + logger.debug( + new ParameterizedMessage("failure in removing decommissioned cluster manager eligible nodes from voting config"), e ); } @@ -307,23 +277,42 @@ public void onFailure(Exception e) { private void failDecommissionedNodes(ClusterState state) { DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().custom(DecommissionAttributeMetadata.TYPE); - assert decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_IN_PROGRESS) - : "unexpected status encountered while decommissioning nodes"; DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); - - // execute nodes decommissioning - decommissionController.removeDecommissionedNodes( - filterNodesWithDecommissionAttribute(state, decommissionAttribute, false), - "nodes-decommissioned", - TimeValue.timeValueSeconds(30L), // TODO - read timeout from request while integrating with API - new ActionListener() { + decommissionController.updateMetadataWithDecommissionStatus( + DecommissionStatus.IN_PROGRESS, + new ActionListener() { @Override - public void onResponse(Void unused) { - clearVotingConfigExclusionAndUpdateStatus(true); + public void onResponse(DecommissionStatus status) { + // execute nodes decommissioning + decommissionController.removeDecommissionedNodes( + filterNodesWithDecommissionAttribute(state, decommissionAttribute, false), + "nodes-decommissioned", + TimeValue.timeValueSeconds(30L), // TODO - read timeout from request while integrating with API + new ActionListener() { + @Override + public void onResponse(Void unused) { + clearVotingConfigExclusionAndUpdateStatus(true); + } + + @Override + public void onFailure(Exception e) { + clearVotingConfigExclusionAndUpdateStatus(false); + } + } + ); } @Override public void onFailure(Exception e) { + logger.error( + () -> new ParameterizedMessage( + "failed to update decommission status for attribute [{}] to [{}]", + decommissionAttribute.toString(), + DecommissionStatus.IN_PROGRESS + ), + e + ); + // since we are not able to update the status, we will clear the voting config exclusion we have set earlier clearVotingConfigExclusionAndUpdateStatus(false); } } @@ -331,18 +320,15 @@ public void onFailure(Exception e) { } private void clearVotingConfigExclusionAndUpdateStatus(boolean decommissionSuccessful) { - ActionListener statusUpdateListener = new ActionListener() { + ActionListener statusUpdateListener = new ActionListener() { @Override - public void onResponse(Void unused) { - logger.info( - "successful updated decommission status with [{}]", - decommissionSuccessful ? DecommissionStatus.DECOMMISSION_SUCCESSFUL : DecommissionStatus.DECOMMISSION_FAILED - ); + public void onResponse(DecommissionStatus status) { + logger.info("completed decommission action"); } @Override public void onFailure(Exception e) { - logger.error("failed to update the decommission status"); + logger.error("failure encountered while executing decommission action"); } }; decommissionController.clearVotingConfigExclusion(new ActionListener() { @@ -350,8 +336,8 @@ public void onFailure(Exception e) { public void onResponse(Void unused) { logger.info("successfully cleared voting config exclusion after failing to execute decommission request"); DecommissionStatus updateStatusWith = decommissionSuccessful - ? DecommissionStatus.DECOMMISSION_SUCCESSFUL - : DecommissionStatus.DECOMMISSION_FAILED; + ? DecommissionStatus.SUCCESSFUL + : DecommissionStatus.FAILED; decommissionController.updateMetadataWithDecommissionStatus(updateStatusWith, statusUpdateListener); } @@ -361,7 +347,7 @@ public void onFailure(Exception e) { new ParameterizedMessage("failure in clearing voting config exclusion after processing decommission request"), e ); - decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_FAILED, statusUpdateListener); + decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.FAILED, statusUpdateListener); } }); } @@ -418,10 +404,10 @@ private static void ensureNoInflightDifferentDecommissionRequest( ) { String msg = null; if (decommissionAttributeMetadata != null) { - if (decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_SUCCESSFUL)) { + if (decommissionAttributeMetadata.status().equals(DecommissionStatus.SUCCESSFUL)) { // one awareness attribute is already decommissioned. We will reject the new request msg = "one awareness attribute already successfully decommissioned. Recommission before triggering another decommission"; - } else if (decommissionAttributeMetadata.status().equals(DecommissionStatus.DECOMMISSION_FAILED)) { + } else if (decommissionAttributeMetadata.status().equals(DecommissionStatus.FAILED)) { // here we are sure that the previous decommission request failed, we can let this request pass this check return; } else { diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java index 1474faa9bb227..567ba091b1392 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java @@ -15,24 +15,30 @@ public enum DecommissionStatus { /** * Decommission process is initiated */ - DECOMMISSION_INIT("decommission_init"), + INIT("init", 0), /** - * Decommission process has started, decommissioned nodes should be weighed away + * Exclude cluster manager from Voting Configuration */ - DECOMMISSION_IN_PROGRESS("decommission_in_progress"), + EXCLUDE_LEADER_FROM_VOTING_CONFIG("exclude_leader_from_voting_config", 2), /** - * Decommissioning awareness attribute completed + * Decommission process has started, decommissioned nodes should be removed */ - DECOMMISSION_SUCCESSFUL("decommission_successful"), + IN_PROGRESS("in_progress", 3), + /** + * Decommission action completed + */ + SUCCESSFUL("successful", 4), /** * Decommission request failed */ - DECOMMISSION_FAILED("decommission_failed"); + FAILED("failed", -1); private final String status; + private final int stage; - DecommissionStatus(String status) { + DecommissionStatus(String status, int stage) { this.status = status; + this.stage = stage; } /** @@ -44,6 +50,13 @@ public String status() { return status; } + /** + * Returns stage that represents the decommission stage + */ + public int stage() { + return stage; + } + /** * Generate decommission status from given string * @@ -54,15 +67,38 @@ public static DecommissionStatus fromString(String status) { if (status == null) { throw new IllegalArgumentException("decommission status cannot be null"); } - if (status.equals(DECOMMISSION_INIT.status())) { - return DECOMMISSION_INIT; - } else if (status.equals(DECOMMISSION_IN_PROGRESS.status())) { - return DECOMMISSION_IN_PROGRESS; - } else if (status.equals(DECOMMISSION_SUCCESSFUL.status())) { - return DECOMMISSION_SUCCESSFUL; - } else if (status.equals(DECOMMISSION_FAILED.status())) { - return DECOMMISSION_FAILED; + if (status.equals(INIT.status())) { + return INIT; + } else if (status.equals(EXCLUDE_LEADER_FROM_VOTING_CONFIG.status())) { + return EXCLUDE_LEADER_FROM_VOTING_CONFIG; + } else if (status.equals(IN_PROGRESS.status())) { + return IN_PROGRESS; + } else if (status.equals(SUCCESSFUL.status())) { + return SUCCESSFUL; + } else if (status.equals(FAILED.status())) { + return FAILED; } throw new IllegalStateException("Decommission status [" + status + "] not recognized."); } + + /** + * Generate decommission status from given stage + * + * @param stage stage in int + * @return status + */ + public static DecommissionStatus fromStage(int stage) { + if (stage == INIT.stage()) { + return INIT; + } else if (stage == EXCLUDE_LEADER_FROM_VOTING_CONFIG.stage()) { + return EXCLUDE_LEADER_FROM_VOTING_CONFIG; + } else if (stage == IN_PROGRESS.stage()) { + return IN_PROGRESS; + } else if (stage == SUCCESSFUL.stage()) { + return SUCCESSFUL; + } else if (stage == FAILED.stage()) { + return FAILED; + } + throw new IllegalStateException("Decommission stage [" + stage + "] not recognized."); + } } diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java index e9ad305637f95..97862656a2f4e 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java @@ -236,8 +236,8 @@ public void testPreventJoinClusterWithDecommission() { Settings.builder().build(); DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone-1"); DecommissionStatus decommissionStatus = randomFrom( - DecommissionStatus.DECOMMISSION_IN_PROGRESS, - DecommissionStatus.DECOMMISSION_SUCCESSFUL + DecommissionStatus.IN_PROGRESS, + DecommissionStatus.SUCCESSFUL ); DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( decommissionAttribute, @@ -270,7 +270,7 @@ public void testJoinClusterWithDifferentDecommission() { public void testJoinClusterWithDecommissionFailedOrInitOrRecommission() { Settings.builder().build(); DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone-1"); - DecommissionStatus decommissionStatus = randomFrom(DecommissionStatus.DECOMMISSION_INIT, DecommissionStatus.DECOMMISSION_FAILED); + DecommissionStatus decommissionStatus = randomFrom(DecommissionStatus.INIT, DecommissionStatus.FAILED); DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( decommissionAttribute, decommissionStatus diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java index 8e5d4e61937a4..ff27c39b9226b 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java @@ -237,7 +237,7 @@ public void testSuccessfulDecommissionStatusMetadataUpdate() throws InterruptedE final CountDownLatch countDownLatch = new CountDownLatch(1); DecommissionAttributeMetadata oldMetadata = new DecommissionAttributeMetadata( new DecommissionAttribute("zone", "zone-1"), - DecommissionStatus.DECOMMISSION_IN_PROGRESS + DecommissionStatus.IN_PROGRESS ); ClusterState state = clusterService.state(); Metadata metadata = state.metadata(); @@ -246,9 +246,9 @@ public void testSuccessfulDecommissionStatusMetadataUpdate() throws InterruptedE state = ClusterState.builder(state).metadata(mdBuilder).build(); setState(clusterService, state); - decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.DECOMMISSION_SUCCESSFUL, new ActionListener() { + decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.SUCCESSFUL, new ActionListener() { @Override - public void onResponse(Void unused) { + public void onResponse(DecommissionStatus status) { countDownLatch.countDown(); } @@ -260,7 +260,7 @@ public void onFailure(Exception e) { assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); ClusterState newState = clusterService.getClusterApplierService().state(); DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().custom(DecommissionAttributeMetadata.TYPE); - assertEquals(decommissionAttributeMetadata.status(), DecommissionStatus.DECOMMISSION_SUCCESSFUL); + assertEquals(decommissionAttributeMetadata.status(), DecommissionStatus.SUCCESSFUL); } private ClusterState addNodes(ClusterState clusterState, String zone, String... nodeIds) { diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java index df148c6201d3c..13af5203da3ca 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java @@ -146,7 +146,7 @@ public void testDecommissioningNotInitiatedForInvalidAttributeValue() { public void testDecommissioningNotInitiatedWhenAlreadyDecommissioned() { DecommissionAttributeMetadata oldMetadata = new DecommissionAttributeMetadata( new DecommissionAttribute("zone", "zone_1"), - DecommissionStatus.DECOMMISSION_IN_PROGRESS + DecommissionStatus.IN_PROGRESS ); final ClusterState.Builder builder = builder(clusterService.state()); setState( From c851909b859d2064c0a6e6f2956a436fbe1dd5e7 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Wed, 7 Sep 2022 21:07:51 +0530 Subject: [PATCH 52/87] Trigger exclusion after init Signed-off-by: Rishab Nahata --- .../decommission/DecommissionService.java | 78 ++++++++----------- .../decommission/DecommissionStatus.java | 16 +--- 2 files changed, 36 insertions(+), 58 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 680808d152335..21aab2fe0b38d 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -177,60 +177,46 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata() .custom(DecommissionAttributeMetadata.TYPE); assert decommissionAttribute.equals(decommissionAttributeMetadata.decommissionAttribute()); - listener.onResponse(new ClusterStateUpdateResponse(true)); if (!decommissionAttributeMetadata.status().equals(DecommissionStatus.SUCCESSFUL)) { - decommissionClusterManagerNodes(decommissionAttributeMetadata.decommissionAttribute()); + decommissionClusterManagerNodes( + decommissionAttributeMetadata.decommissionAttribute(), + listener + ); } } } ); } - private void decommissionClusterManagerNodes(final DecommissionAttribute decommissionAttribute) { - decommissionController.updateMetadataWithDecommissionStatus( - DecommissionStatus.EXCLUDE_LEADER_FROM_VOTING_CONFIG, - new ActionListener() { - @Override - public void onResponse(DecommissionStatus status) { - ClusterState state = clusterService.getClusterApplierService().state(); - Set clusterManagerNodesToBeDecommissioned = filterNodesWithDecommissionAttribute( - state, decommissionAttribute, true - ); - ensureNoQuorumLossDueToDecommissioning( - decommissionAttribute, - clusterManagerNodesToBeDecommissioned, - state.getLastCommittedConfiguration() - ); - // remove all 'to-be-decommissioned' cluster manager eligible nodes from voting config - // The method ensures that we don't exclude same nodes multiple times - excludeDecommissionedClusterManagerNodesFromVotingConfig(clusterManagerNodesToBeDecommissioned); - // explicitly throwing NotClusterManagerException as we can certainly say the local cluster manager node will - // be abdicated and soon will no longer be cluster manager. - if (transportService.getLocalNode().isClusterManagerNode() - && !nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute)) { - failDecommissionedNodes(clusterService.getClusterApplierService().state()); - } else { - throw new NotClusterManagerException( - "node [" - + transportService.getLocalNode().toString() - + "] not eligible to execute decommission request. Will retry until timeout." - ); - } - } - - @Override - public void onFailure(Exception e) { - logger.error( - () -> new ParameterizedMessage( - "failed to update decommission status for attribute [{}] to [{}]", - decommissionAttribute.toString(), - DecommissionStatus.EXCLUDE_LEADER_FROM_VOTING_CONFIG - ), - e - ); - } - } + private void decommissionClusterManagerNodes( + final DecommissionAttribute decommissionAttribute, + ActionListener listener + ) { + ClusterState state = clusterService.getClusterApplierService().state(); + Set clusterManagerNodesToBeDecommissioned = filterNodesWithDecommissionAttribute( + state, decommissionAttribute, true + ); + ensureNoQuorumLossDueToDecommissioning( + decommissionAttribute, + clusterManagerNodesToBeDecommissioned, + state.getLastCommittedConfiguration() ); + // remove all 'to-be-decommissioned' cluster manager eligible nodes from voting config + // The method ensures that we don't exclude same nodes multiple times + excludeDecommissionedClusterManagerNodesFromVotingConfig(clusterManagerNodesToBeDecommissioned); + // explicitly throwing NotClusterManagerException as we can certainly say the local cluster manager node will + // be abdicated and soon will no longer be cluster manager. + if (transportService.getLocalNode().isClusterManagerNode() + && !nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute)) { + // we are good here to send the response now as the request is processed by an eligible active leader + listener.onResponse(new ClusterStateUpdateResponse(true)); + failDecommissionedNodes(clusterService.getClusterApplierService().state()); + } else { + // this will ensure that request is retried until cluster manager times out + listener.onFailure(new NotClusterManagerException("node [" + + transportService.getLocalNode().toString() + + "] not eligible to execute decommission request. Will retry until timeout.")); + } } private void excludeDecommissionedClusterManagerNodesFromVotingConfig(Set clusterManagerNodesToBeDecommissioned) { diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java index 567ba091b1392..ba3dec4ded94a 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java @@ -9,25 +9,21 @@ package org.opensearch.cluster.decommission; /** - * An enumeration of the states during decommissioning and recommissioning. + * An enumeration of the states during decommissioning */ public enum DecommissionStatus { /** - * Decommission process is initiated + * Decommission process is initiated, and to-be-decommissioned leader is excluded from voting config */ INIT("init", 0), - /** - * Exclude cluster manager from Voting Configuration - */ - EXCLUDE_LEADER_FROM_VOTING_CONFIG("exclude_leader_from_voting_config", 2), /** * Decommission process has started, decommissioned nodes should be removed */ - IN_PROGRESS("in_progress", 3), + IN_PROGRESS("in_progress", 1), /** * Decommission action completed */ - SUCCESSFUL("successful", 4), + SUCCESSFUL("successful", 2), /** * Decommission request failed */ @@ -69,8 +65,6 @@ public static DecommissionStatus fromString(String status) { } if (status.equals(INIT.status())) { return INIT; - } else if (status.equals(EXCLUDE_LEADER_FROM_VOTING_CONFIG.status())) { - return EXCLUDE_LEADER_FROM_VOTING_CONFIG; } else if (status.equals(IN_PROGRESS.status())) { return IN_PROGRESS; } else if (status.equals(SUCCESSFUL.status())) { @@ -90,8 +84,6 @@ public static DecommissionStatus fromString(String status) { public static DecommissionStatus fromStage(int stage) { if (stage == INIT.stage()) { return INIT; - } else if (stage == EXCLUDE_LEADER_FROM_VOTING_CONFIG.stage()) { - return EXCLUDE_LEADER_FROM_VOTING_CONFIG; } else if (stage == IN_PROGRESS.stage()) { return IN_PROGRESS; } else if (stage == SUCCESSFUL.stage()) { From 798f0a21885c37c27f29e9f8fe24dac387e33132 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Wed, 7 Sep 2022 23:13:44 +0530 Subject: [PATCH 53/87] Updates Signed-off-by: Rishab Nahata --- .../decommission/DecommissionService.java | 64 ++++++++----------- 1 file changed, 28 insertions(+), 36 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 21aab2fe0b38d..58d068b12f1cd 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -47,8 +47,8 @@ * Whenever a cluster manager initiates operation to decommission an awareness attribute, * the service makes the best attempt to perform the following task - *

    - *
  • Remove cluster-manager eligible nodes from voting config [TODO - checks to avoid quorum loss scenarios]
  • *
  • Initiates nodes decommissioning by adding custom metadata with the attribute and state as {@link DecommissionStatus#INIT}
  • + *
  • Remove cluster-manager eligible nodes from voting config
  • *
  • Triggers weigh away for nodes having given awareness attribute to drain. This marks the decommission status as {@link DecommissionStatus#IN_PROGRESS}
  • *
  • Once weighed away, the service triggers nodes decommission
  • *
  • Once the decommission is successful, the service clears the voting config and marks the status as {@link DecommissionStatus#SUCCESSFUL}
  • @@ -151,25 +151,14 @@ public ClusterState execute(ClusterState currentState) throws Exception { @Override public void onFailure(String source, Exception e) { - if (e instanceof NotClusterManagerException) { - logger.debug( - () -> new ParameterizedMessage( - "cluster-manager updated while executing request for decommission attribute [{}]", - decommissionAttribute.toString() - ), - e - ); - // we don't want to send the failure response to the listener here as the request will be retried - } else { - logger.error( - () -> new ParameterizedMessage( - "failed to start decommission action for attribute [{}]", - decommissionAttribute.toString() - ), - e - ); - listener.onFailure(e); - } + logger.error( + () -> new ParameterizedMessage( + "failed to start decommission action for attribute [{}]", + decommissionAttribute.toString() + ), + e + ); + listener.onFailure(e); } @Override @@ -177,12 +166,10 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata() .custom(DecommissionAttributeMetadata.TYPE); assert decommissionAttribute.equals(decommissionAttributeMetadata.decommissionAttribute()); - if (!decommissionAttributeMetadata.status().equals(DecommissionStatus.SUCCESSFUL)) { - decommissionClusterManagerNodes( - decommissionAttributeMetadata.decommissionAttribute(), - listener - ); - } + decommissionClusterManagerNodes( + decommissionAttributeMetadata.decommissionAttribute(), + listener + ); } } ); @@ -196,15 +183,17 @@ private void decommissionClusterManagerNodes( Set clusterManagerNodesToBeDecommissioned = filterNodesWithDecommissionAttribute( state, decommissionAttribute, true ); - ensureNoQuorumLossDueToDecommissioning( - decommissionAttribute, - clusterManagerNodesToBeDecommissioned, - state.getLastCommittedConfiguration() - ); + try { + // this is a sanity check that the cluster will not go into a quorum loss state because of exclusion + ensureNoQuorumLossDueToDecommissioning(decommissionAttribute, clusterManagerNodesToBeDecommissioned, state.getLastCommittedConfiguration()); + } catch (DecommissioningFailedException dfe) { + listener.onFailure(dfe); + } // remove all 'to-be-decommissioned' cluster manager eligible nodes from voting config // The method ensures that we don't exclude same nodes multiple times excludeDecommissionedClusterManagerNodesFromVotingConfig(clusterManagerNodesToBeDecommissioned); - // explicitly throwing NotClusterManagerException as we can certainly say the local cluster manager node will + + // explicitly calling listener.onFailure with NotClusterManagerException as we can certainly say the local cluster manager node will // be abdicated and soon will no longer be cluster manager. if (transportService.getLocalNode().isClusterManagerNode() && !nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute)) { @@ -262,16 +251,19 @@ public void onFailure(Exception e) { } private void failDecommissionedNodes(ClusterState state) { + // this method ensures no matter what, we always exit from this function after clearing the voting config exclusion DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().custom(DecommissionAttributeMetadata.TYPE); DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); decommissionController.updateMetadataWithDecommissionStatus( DecommissionStatus.IN_PROGRESS, - new ActionListener() { + new ActionListener<>() { @Override public void onResponse(DecommissionStatus status) { // execute nodes decommissioning decommissionController.removeDecommissionedNodes( - filterNodesWithDecommissionAttribute(state, decommissionAttribute, false), + filterNodesWithDecommissionAttribute( + clusterService.getClusterApplierService().state(), decommissionAttribute, false + ), "nodes-decommissioned", TimeValue.timeValueSeconds(30L), // TODO - read timeout from request while integrating with API new ActionListener() { @@ -306,7 +298,7 @@ public void onFailure(Exception e) { } private void clearVotingConfigExclusionAndUpdateStatus(boolean decommissionSuccessful) { - ActionListener statusUpdateListener = new ActionListener() { + ActionListener statusUpdateListener = new ActionListener<>() { @Override public void onResponse(DecommissionStatus status) { logger.info("completed decommission action"); @@ -320,7 +312,7 @@ public void onFailure(Exception e) { decommissionController.clearVotingConfigExclusion(new ActionListener() { @Override public void onResponse(Void unused) { - logger.info("successfully cleared voting config exclusion after failing to execute decommission request"); + logger.info("successfully cleared voting config exclusion after completing decommission action, proceeding to update metadata"); DecommissionStatus updateStatusWith = decommissionSuccessful ? DecommissionStatus.SUCCESSFUL : DecommissionStatus.FAILED; From 3fe2b6623001e5c2c94613c106d69eff04a8144c Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Thu, 8 Sep 2022 00:25:28 +0530 Subject: [PATCH 54/87] Resolving comments Signed-off-by: Rishab Nahata --- .../AddVotingConfigExclusionsRequest.java | 2 +- .../coordination/JoinTaskExecutor.java | 6 ++-- .../DecommissionAttributeMetadata.java | 2 +- .../decommission/DecommissionController.java | 28 ++++++++++++++++++ .../decommission/DecommissionService.java | 29 +++++++++++++++---- 5 files changed, 56 insertions(+), 11 deletions(-) diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/configuration/AddVotingConfigExclusionsRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/configuration/AddVotingConfigExclusionsRequest.java index 739bfaf2a3fb1..a2a77a1316898 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/configuration/AddVotingConfigExclusionsRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/configuration/AddVotingConfigExclusionsRequest.java @@ -157,7 +157,7 @@ Set resolveVotingConfigExclusions(ClusterState currentSta } else { assert nodeNames.length >= 1; Map existingNodes = StreamSupport.stream(allNodes.spliterator(), false) - .collect(Collectors.toMap(DiscoveryNode::getName, Function.identity(), (r1, r2) -> r1)); + .collect(Collectors.toMap(DiscoveryNode::getName, Function.identity())); for (String nodeName : nodeNames) { if (existingNodes.containsKey(nodeName)) { diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index ee2b35eac302d..1c2a9466f76e3 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -482,10 +482,10 @@ public static void ensureNodeCommissioned(DiscoveryNode node, Metadata metadata) DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); DecommissionStatus status = decommissionAttributeMetadata.status(); if (decommissionAttribute != null && status != null) { - // We will let the node join the cluster if the current status is not IN_PROGRESS or SUCCESSFUL + // We will let the node join the cluster if the current status is FAILED if (node.getAttributes().get(decommissionAttribute.attributeName()).equals(decommissionAttribute.attributeValue()) - && (status.equals(DecommissionStatus.IN_PROGRESS) - || status.equals(DecommissionStatus.SUCCESSFUL))) { + && !status.equals(DecommissionStatus.FAILED) + ) { throw new NodeDecommissionedException( "node [{}] has decommissioned attribute [{}].", node.toString(), diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java index fc3f1841615d9..009161ce66fc6 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java @@ -111,7 +111,7 @@ public String getWriteableName() { @Override public Version getMinimalSupportedVersion() { - return Version.V_2_3_0; + return Version.V_3_0_0; } public DecommissionAttributeMetadata(StreamInput in) throws IOException { diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index 9a87a48af60b2..403570782bbd5 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -71,6 +71,12 @@ public class DecommissionController { this.threadPool = threadPool; } + /** + * Transport call to add nodes to voting config exclusion + * + * @param nodes set of nodes to be added to voting config exclusion list + * @param listener callback for response or failure + */ public void excludeDecommissionedNodesFromVotingConfig(Set nodes, ActionListener listener) { transportService.sendRequest( transportService.getLocalNode(), @@ -100,6 +106,11 @@ public AddVotingConfigExclusionsResponse read(StreamInput in) throws IOException ); } + /** + * Transport call to clear voting config exclusion + * + * @param listener callback for response or failure + */ public void clearVotingConfigExclusion(ActionListener listener) { final ClearVotingConfigExclusionsRequest clearVotingConfigExclusionsRequest = new ClearVotingConfigExclusionsRequest(); transportService.sendRequest( @@ -130,6 +141,16 @@ public ClearVotingConfigExclusionsResponse read(StreamInput in) throws IOExcepti ); } + /** + * This method triggers batch of tasks for nodes to be decommissioned using executor {@link NodeRemovalClusterStateTaskExecutor} + * Once the tasks are submitted, it waits for an expected cluster state to guarantee + * that the expected decommissioned nodes are removed from the cluster + * + * @param nodesToBeDecommissioned set of the node to be decommissioned + * @param reason reason of removal + * @param timeout timeout for the request + * @param nodesRemovedListener callback for the success or failure + */ public void removeDecommissionedNodes( Set nodesToBeDecommissioned, String reason, @@ -183,6 +204,13 @@ public void onTimeout(TimeValue timeout) { }, allDecommissionedNodesRemovedPredicate); } + /** + * This method updates the status in the currently registered metadata. + * This method also validates the status with its previous state before executing the request + * + * @param decommissionStatus status to update decommission metadata with + * @param listener listener for response and failure + */ public void updateMetadataWithDecommissionStatus(DecommissionStatus decommissionStatus, ActionListener listener) { clusterService.submitStateUpdateTask(decommissionStatus.status(), new ClusterStateUpdateTask(Priority.URGENT) { @Override diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 58d068b12f1cd..7642bce77a37a 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -13,6 +13,7 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.action.ActionListener; import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ClusterStateObserver; import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; @@ -188,19 +189,33 @@ private void decommissionClusterManagerNodes( ensureNoQuorumLossDueToDecommissioning(decommissionAttribute, clusterManagerNodesToBeDecommissioned, state.getLastCommittedConfiguration()); } catch (DecommissioningFailedException dfe) { listener.onFailure(dfe); + decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.FAILED, new ActionListener() { + @Override + public void onResponse(DecommissionStatus status) { + logger.info("updated the status to [{}], as cluster could have gone to quorum loss situation due to decommissioning", status.toString()); + } + + @Override + public void onFailure(Exception e) { + logger.error("unexpected error found while updating the status", e); + } + }); } // remove all 'to-be-decommissioned' cluster manager eligible nodes from voting config // The method ensures that we don't exclude same nodes multiple times - excludeDecommissionedClusterManagerNodesFromVotingConfig(clusterManagerNodesToBeDecommissioned); + boolean toBeDecommissionedClusterManagerNodesExcluded = excludeDecommissionedClusterManagerNodesFromVotingConfig(clusterManagerNodesToBeDecommissioned); - // explicitly calling listener.onFailure with NotClusterManagerException as we can certainly say the local cluster manager node will - // be abdicated and soon will no longer be cluster manager. if (transportService.getLocalNode().isClusterManagerNode() - && !nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute)) { + && !nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute) + && toBeDecommissionedClusterManagerNodesExcluded + ) { // we are good here to send the response now as the request is processed by an eligible active leader + // and to-be-decommissioned cluster manager is no more part of Voting Configuration listener.onResponse(new ClusterStateUpdateResponse(true)); failDecommissionedNodes(clusterService.getClusterApplierService().state()); } else { + // explicitly calling listener.onFailure with NotClusterManagerException as we can certainly say the local cluster manager node will + // be abdicated and soon will no longer be cluster manager. // this will ensure that request is retried until cluster manager times out listener.onFailure(new NotClusterManagerException("node [" + transportService.getLocalNode().toString() @@ -208,7 +223,7 @@ private void decommissionClusterManagerNodes( } } - private void excludeDecommissionedClusterManagerNodesFromVotingConfig(Set clusterManagerNodesToBeDecommissioned) { + private boolean excludeDecommissionedClusterManagerNodesFromVotingConfig(Set clusterManagerNodesToBeDecommissioned) { Set clusterManagerNodesNameToBeDecommissioned = clusterManagerNodesToBeDecommissioned.stream() .map(DiscoveryNode::getName) .collect(Collectors.toSet()); @@ -225,7 +240,7 @@ private void excludeDecommissionedClusterManagerNodesFromVotingConfig(Set Date: Thu, 8 Sep 2022 01:17:23 +0530 Subject: [PATCH 55/87] Fixes Signed-off-by: Rishab Nahata --- .../opensearch/cluster/coordination/JoinTaskExecutor.java | 6 +++--- .../cluster/decommission/DecommissionService.java | 8 +++----- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index 1c2a9466f76e3..bceb28b3a40b2 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -482,10 +482,10 @@ public static void ensureNodeCommissioned(DiscoveryNode node, Metadata metadata) DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); DecommissionStatus status = decommissionAttributeMetadata.status(); if (decommissionAttribute != null && status != null) { - // We will let the node join the cluster if the current status is FAILED + // We will let the node join the cluster if the current status is not IN_PROGRESS or SUCCESSFUL if (node.getAttributes().get(decommissionAttribute.attributeName()).equals(decommissionAttribute.attributeValue()) - && !status.equals(DecommissionStatus.FAILED) - ) { + && (status.equals(DecommissionStatus.IN_PROGRESS) + || status.equals(DecommissionStatus.SUCCESSFUL))) { throw new NodeDecommissionedException( "node [{}] has decommissioned attribute [{}].", node.toString(), diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 7642bce77a37a..fbde87bddcfd7 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -132,6 +132,8 @@ public ClusterState execute(ClusterState currentState) throws Exception { Metadata metadata = currentState.metadata(); Metadata.Builder mdBuilder = Metadata.builder(metadata); DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); + // check the request sanity and reject the request if there's any inflight or successful request already present + ensureNoInflightDifferentDecommissionRequest(decommissionAttributeMetadata, decommissionAttribute); // check if the same attribute is requested for decommission and currently not FAILED, then return the current state as is if(decommissionAttributeMetadata!=null && decommissionAttributeMetadata.decommissionAttribute().equals(decommissionAttribute) && @@ -139,8 +141,6 @@ public ClusterState execute(ClusterState currentState) throws Exception { logger.info("re-request received for decommissioning [{}], will not update state", decommissionAttribute); return currentState; } - // check the request sanity and reject the request if there's any inflight or successful request already present - ensureNoInflightDifferentDecommissionRequest(decommissionAttributeMetadata, decommissionAttribute); decommissionAttributeMetadata = new DecommissionAttributeMetadata(decommissionAttribute); mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); logger.info( @@ -237,9 +237,7 @@ private boolean excludeDecommissionedClusterManagerNodesFromVotingConfig(Set Date: Thu, 8 Sep 2022 01:33:36 +0530 Subject: [PATCH 56/87] Fix spotless check Signed-off-by: Rishab Nahata --- .../coordination/JoinTaskExecutor.java | 3 +- .../decommission/DecommissionController.java | 11 +- .../decommission/DecommissionService.java | 220 +++++++++--------- .../coordination/JoinTaskExecutorTests.java | 5 +- .../DecommissionControllerTests.java | 21 +- .../DecommissionServiceTests.java | 22 +- 6 files changed, 136 insertions(+), 146 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index bceb28b3a40b2..7410efc9ab60f 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -484,8 +484,7 @@ public static void ensureNodeCommissioned(DiscoveryNode node, Metadata metadata) if (decommissionAttribute != null && status != null) { // We will let the node join the cluster if the current status is not IN_PROGRESS or SUCCESSFUL if (node.getAttributes().get(decommissionAttribute.attributeName()).equals(decommissionAttribute.attributeValue()) - && (status.equals(DecommissionStatus.IN_PROGRESS) - || status.equals(DecommissionStatus.SUCCESSFUL))) { + && (status.equals(DecommissionStatus.IN_PROGRESS) || status.equals(DecommissionStatus.SUCCESSFUL))) { throw new NodeDecommissionedException( "node [{}] has decommissioned attribute [{}].", node.toString(), diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index 403570782bbd5..cff01e4e480a5 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -157,7 +157,9 @@ public void removeDecommissionedNodes( TimeValue timeout, ActionListener nodesRemovedListener ) { - final Map nodesDecommissionTasks = new LinkedHashMap<>(nodesToBeDecommissioned.size()); + final Map nodesDecommissionTasks = new LinkedHashMap<>( + nodesToBeDecommissioned.size() + ); nodesToBeDecommissioned.forEach(discoveryNode -> { final NodeRemovalClusterStateTaskExecutor.Task task = new NodeRemovalClusterStateTaskExecutor.Task(discoveryNode, reason); nodesDecommissionTasks.put(task, nodeRemovalExecutor); @@ -170,8 +172,8 @@ public void removeDecommissionedNodes( ); Predicate allDecommissionedNodesRemovedPredicate = clusterState -> { - Set intersection = Arrays.stream( - clusterState.nodes().getNodes().values().toArray(DiscoveryNode.class)).collect(Collectors.toSet()); + Set intersection = Arrays.stream(clusterState.nodes().getNodes().values().toArray(DiscoveryNode.class)) + .collect(Collectors.toSet()); intersection.retainAll(nodesToBeDecommissioned); return intersection.size() == 0; }; @@ -243,7 +245,8 @@ public void onFailure(String source, Exception e) { @Override public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { - DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().custom(DecommissionAttributeMetadata.TYPE); + DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata() + .custom(DecommissionAttributeMetadata.TYPE); logger.info("updated decommission status to [{}]", decommissionAttributeMetadata.status()); listener.onResponse(decommissionAttributeMetadata.status()); } diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index fbde87bddcfd7..50c7889400f95 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -13,7 +13,6 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.action.ActionListener; import org.opensearch.cluster.ClusterState; -import org.opensearch.cluster.ClusterStateObserver; import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; @@ -124,56 +123,47 @@ public synchronized void startDecommissionAction( validateAwarenessAttribute(decommissionAttribute, awarenessAttributes, forcedAwarenessAttributes); // register the metadata with status as DECOMMISSION_INIT as first step - clusterService.submitStateUpdateTask( - "decommission [" + decommissionAttribute + "]", - new ClusterStateUpdateTask(Priority.URGENT) { - @Override - public ClusterState execute(ClusterState currentState) throws Exception { - Metadata metadata = currentState.metadata(); - Metadata.Builder mdBuilder = Metadata.builder(metadata); - DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); - // check the request sanity and reject the request if there's any inflight or successful request already present - ensureNoInflightDifferentDecommissionRequest(decommissionAttributeMetadata, decommissionAttribute); - // check if the same attribute is requested for decommission and currently not FAILED, then return the current state as is - if(decommissionAttributeMetadata!=null && - decommissionAttributeMetadata.decommissionAttribute().equals(decommissionAttribute) && - !decommissionAttributeMetadata.status().equals(DecommissionStatus.FAILED)) { - logger.info("re-request received for decommissioning [{}], will not update state", decommissionAttribute); - return currentState; - } - decommissionAttributeMetadata = new DecommissionAttributeMetadata(decommissionAttribute); - mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); - logger.info( - "registering decommission metadata [{}] to execute action", - decommissionAttributeMetadata.toString() - ); - return ClusterState.builder(currentState).metadata(mdBuilder).build(); + clusterService.submitStateUpdateTask("decommission [" + decommissionAttribute + "]", new ClusterStateUpdateTask(Priority.URGENT) { + @Override + public ClusterState execute(ClusterState currentState) throws Exception { + Metadata metadata = currentState.metadata(); + Metadata.Builder mdBuilder = Metadata.builder(metadata); + DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); + // check the request sanity and reject the request if there's any inflight or successful request already present + ensureNoInflightDifferentDecommissionRequest(decommissionAttributeMetadata, decommissionAttribute); + // check if the same attribute is requested for decommission and currently not FAILED, then return the current state as is + if (decommissionAttributeMetadata != null + && decommissionAttributeMetadata.decommissionAttribute().equals(decommissionAttribute) + && !decommissionAttributeMetadata.status().equals(DecommissionStatus.FAILED)) { + logger.info("re-request received for decommissioning [{}], will not update state", decommissionAttribute); + return currentState; } + decommissionAttributeMetadata = new DecommissionAttributeMetadata(decommissionAttribute); + mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); + logger.info("registering decommission metadata [{}] to execute action", decommissionAttributeMetadata.toString()); + return ClusterState.builder(currentState).metadata(mdBuilder).build(); + } - @Override - public void onFailure(String source, Exception e) { - logger.error( - () -> new ParameterizedMessage( - "failed to start decommission action for attribute [{}]", - decommissionAttribute.toString() - ), - e - ); - listener.onFailure(e); - } + @Override + public void onFailure(String source, Exception e) { + logger.error( + () -> new ParameterizedMessage( + "failed to start decommission action for attribute [{}]", + decommissionAttribute.toString() + ), + e + ); + listener.onFailure(e); + } - @Override - public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { - DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata() - .custom(DecommissionAttributeMetadata.TYPE); - assert decommissionAttribute.equals(decommissionAttributeMetadata.decommissionAttribute()); - decommissionClusterManagerNodes( - decommissionAttributeMetadata.decommissionAttribute(), - listener - ); - } + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata() + .custom(DecommissionAttributeMetadata.TYPE); + assert decommissionAttribute.equals(decommissionAttributeMetadata.decommissionAttribute()); + decommissionClusterManagerNodes(decommissionAttributeMetadata.decommissionAttribute(), listener); } - ); + }); } private void decommissionClusterManagerNodes( @@ -181,45 +171,59 @@ private void decommissionClusterManagerNodes( ActionListener listener ) { ClusterState state = clusterService.getClusterApplierService().state(); - Set clusterManagerNodesToBeDecommissioned = filterNodesWithDecommissionAttribute( - state, decommissionAttribute, true - ); + Set clusterManagerNodesToBeDecommissioned = filterNodesWithDecommissionAttribute(state, decommissionAttribute, true); try { // this is a sanity check that the cluster will not go into a quorum loss state because of exclusion - ensureNoQuorumLossDueToDecommissioning(decommissionAttribute, clusterManagerNodesToBeDecommissioned, state.getLastCommittedConfiguration()); + ensureNoQuorumLossDueToDecommissioning( + decommissionAttribute, + clusterManagerNodesToBeDecommissioned, + state.getLastCommittedConfiguration() + ); } catch (DecommissioningFailedException dfe) { listener.onFailure(dfe); - decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.FAILED, new ActionListener() { - @Override - public void onResponse(DecommissionStatus status) { - logger.info("updated the status to [{}], as cluster could have gone to quorum loss situation due to decommissioning", status.toString()); - } + decommissionController.updateMetadataWithDecommissionStatus( + DecommissionStatus.FAILED, + new ActionListener() { + @Override + public void onResponse(DecommissionStatus status) { + logger.info( + "updated the status to [{}], as cluster could have gone to quorum loss situation due to decommissioning", + status.toString() + ); + } - @Override - public void onFailure(Exception e) { - logger.error("unexpected error found while updating the status", e); + @Override + public void onFailure(Exception e) { + logger.error("unexpected error found while updating the status", e); + } } - }); + ); } // remove all 'to-be-decommissioned' cluster manager eligible nodes from voting config // The method ensures that we don't exclude same nodes multiple times - boolean toBeDecommissionedClusterManagerNodesExcluded = excludeDecommissionedClusterManagerNodesFromVotingConfig(clusterManagerNodesToBeDecommissioned); + boolean toBeDecommissionedClusterManagerNodesExcluded = excludeDecommissionedClusterManagerNodesFromVotingConfig( + clusterManagerNodesToBeDecommissioned + ); if (transportService.getLocalNode().isClusterManagerNode() && !nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute) - && toBeDecommissionedClusterManagerNodesExcluded - ) { + && toBeDecommissionedClusterManagerNodesExcluded) { // we are good here to send the response now as the request is processed by an eligible active leader // and to-be-decommissioned cluster manager is no more part of Voting Configuration listener.onResponse(new ClusterStateUpdateResponse(true)); failDecommissionedNodes(clusterService.getClusterApplierService().state()); } else { - // explicitly calling listener.onFailure with NotClusterManagerException as we can certainly say the local cluster manager node will + // explicitly calling listener.onFailure with NotClusterManagerException as we can certainly say the local cluster manager node + // will // be abdicated and soon will no longer be cluster manager. // this will ensure that request is retried until cluster manager times out - listener.onFailure(new NotClusterManagerException("node [" - + transportService.getLocalNode().toString() - + "] not eligible to execute decommission request. Will retry until timeout.")); + listener.onFailure( + new NotClusterManagerException( + "node [" + + transportService.getLocalNode().toString() + + "] not eligible to execute decommission request. Will retry until timeout." + ) + ); } } @@ -237,7 +241,8 @@ private boolean excludeDecommissionedClusterManagerNodesFromVotingConfig(Set() { - @Override - public void onResponse(DecommissionStatus status) { - // execute nodes decommissioning - decommissionController.removeDecommissionedNodes( - filterNodesWithDecommissionAttribute( - clusterService.getClusterApplierService().state(), decommissionAttribute, false - ), - "nodes-decommissioned", - TimeValue.timeValueSeconds(30L), // TODO - read timeout from request while integrating with API - new ActionListener() { - @Override - public void onResponse(Void unused) { - clearVotingConfigExclusionAndUpdateStatus(true); - } - - @Override - public void onFailure(Exception e) { - clearVotingConfigExclusionAndUpdateStatus(false); - } + decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.IN_PROGRESS, new ActionListener<>() { + @Override + public void onResponse(DecommissionStatus status) { + // execute nodes decommissioning + decommissionController.removeDecommissionedNodes( + filterNodesWithDecommissionAttribute(clusterService.getClusterApplierService().state(), decommissionAttribute, false), + "nodes-decommissioned", + TimeValue.timeValueSeconds(30L), // TODO - read timeout from request while integrating with API + new ActionListener() { + @Override + public void onResponse(Void unused) { + clearVotingConfigExclusionAndUpdateStatus(true); } - ); - } - @Override - public void onFailure(Exception e) { - logger.error( - () -> new ParameterizedMessage( - "failed to update decommission status for attribute [{}] to [{}]", - decommissionAttribute.toString(), - DecommissionStatus.IN_PROGRESS - ), - e - ); - // since we are not able to update the status, we will clear the voting config exclusion we have set earlier - clearVotingConfigExclusionAndUpdateStatus(false); - } + @Override + public void onFailure(Exception e) { + clearVotingConfigExclusionAndUpdateStatus(false); + } + } + ); } - ); + + @Override + public void onFailure(Exception e) { + logger.error( + () -> new ParameterizedMessage( + "failed to update decommission status for attribute [{}] to [{}]", + decommissionAttribute.toString(), + DecommissionStatus.IN_PROGRESS + ), + e + ); + // since we are not able to update the status, we will clear the voting config exclusion we have set earlier + clearVotingConfigExclusionAndUpdateStatus(false); + } + }); } private void clearVotingConfigExclusionAndUpdateStatus(boolean decommissionSuccessful) { @@ -327,10 +327,10 @@ public void onFailure(Exception e) { decommissionController.clearVotingConfigExclusion(new ActionListener() { @Override public void onResponse(Void unused) { - logger.info("successfully cleared voting config exclusion after completing decommission action, proceeding to update metadata"); - DecommissionStatus updateStatusWith = decommissionSuccessful - ? DecommissionStatus.SUCCESSFUL - : DecommissionStatus.FAILED; + logger.info( + "successfully cleared voting config exclusion after completing decommission action, proceeding to update metadata" + ); + DecommissionStatus updateStatusWith = decommissionSuccessful ? DecommissionStatus.SUCCESSFUL : DecommissionStatus.FAILED; decommissionController.updateMetadataWithDecommissionStatus(updateStatusWith, statusUpdateListener); } diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java index 97862656a2f4e..a0c979f972a70 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java @@ -235,10 +235,7 @@ public void testJoinClusterWithNoDecommission() { public void testPreventJoinClusterWithDecommission() { Settings.builder().build(); DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone-1"); - DecommissionStatus decommissionStatus = randomFrom( - DecommissionStatus.IN_PROGRESS, - DecommissionStatus.SUCCESSFUL - ); + DecommissionStatus decommissionStatus = randomFrom(DecommissionStatus.IN_PROGRESS, DecommissionStatus.SUCCESSFUL); DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( decommissionAttribute, decommissionStatus diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java index ff27c39b9226b..95199cdf09487 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java @@ -246,17 +246,20 @@ public void testSuccessfulDecommissionStatusMetadataUpdate() throws InterruptedE state = ClusterState.builder(state).metadata(mdBuilder).build(); setState(clusterService, state); - decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.SUCCESSFUL, new ActionListener() { - @Override - public void onResponse(DecommissionStatus status) { - countDownLatch.countDown(); - } + decommissionController.updateMetadataWithDecommissionStatus( + DecommissionStatus.SUCCESSFUL, + new ActionListener() { + @Override + public void onResponse(DecommissionStatus status) { + countDownLatch.countDown(); + } - @Override - public void onFailure(Exception e) { - fail("decommission status update failed"); + @Override + public void onFailure(Exception e) { + fail("decommission status update failed"); + } } - }); + ); assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); ClusterState newState = clusterService.getClusterApplierService().state(); DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().custom(DecommissionAttributeMetadata.TYPE); diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java index 13af5203da3ca..63e285e2dab36 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java @@ -121,7 +121,7 @@ public void testDecommissioningNotInitiatedForInvalidAttributeName() { ActionListener listener = mock(ActionListener.class); DecommissioningFailedException e = expectThrows( DecommissioningFailedException.class, - () -> { decommissionService.initiateAttributeDecommissioning(decommissionAttribute, listener, clusterService.state()); } + () -> { decommissionService.startDecommissionAction(decommissionAttribute, listener); } ); assertThat(e.getMessage(), Matchers.endsWith("invalid awareness attribute requested for decommissioning")); } @@ -132,7 +132,7 @@ public void testDecommissioningNotInitiatedForInvalidAttributeValue() { ActionListener listener = mock(ActionListener.class); DecommissioningFailedException e = expectThrows( DecommissioningFailedException.class, - () -> { decommissionService.initiateAttributeDecommissioning(decommissionAttribute, listener, clusterService.state()); } + () -> { decommissionService.startDecommissionAction(decommissionAttribute, listener); } ); assertThat( e.getMessage(), @@ -158,13 +158,7 @@ public void testDecommissioningNotInitiatedWhenAlreadyDecommissioned() { ActionListener listener = mock(ActionListener.class); DecommissioningFailedException e = expectThrows( DecommissioningFailedException.class, - () -> { - decommissionService.initiateAttributeDecommissioning( - new DecommissionAttribute("zone", "zone_2"), - listener, - clusterService.state() - ); - } + () -> { decommissionService.startDecommissionAction(new DecommissionAttribute("zone", "zone_2"), listener); } ); assertThat(e.getMessage(), Matchers.endsWith("another request for decommission is in flight, will not process this request")); } @@ -172,7 +166,7 @@ public void testDecommissioningNotInitiatedWhenAlreadyDecommissioned() { @SuppressWarnings("unchecked") public void testDecommissioningInitiatedWhenEnoughClusterManagerNodes() { ActionListener listener = mock(ActionListener.class); - decommissionService.initiateAttributeDecommissioning(new DecommissionAttribute("zone", "zone_3"), listener, clusterService.state()); + decommissionService.startDecommissionAction(new DecommissionAttribute("zone", "zone_3"), listener); } @SuppressWarnings("unchecked") @@ -184,13 +178,7 @@ public void testDecommissioningNotInitiatedWhenNotEnoughClusterManagerNodes() { ActionListener listener = mock(ActionListener.class); DecommissioningFailedException e = expectThrows( DecommissioningFailedException.class, - () -> { - decommissionService.initiateAttributeDecommissioning( - new DecommissionAttribute("zone", "zone_3"), - listener, - clusterService.state() - ); - } + () -> { decommissionService.startDecommissionAction(new DecommissionAttribute("zone", "zone_3"), listener); } ); assertThat(e.getMessage(), Matchers.endsWith("cannot proceed with decommission request as cluster might go into quorum loss")); } From cba93d5805a5a04f09172c21f5e341b6d8c031a5 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Thu, 8 Sep 2022 18:58:50 +0530 Subject: [PATCH 57/87] Resolve comments Signed-off-by: Rishab Nahata --- .../decommission/DecommissionController.java | 5 +- .../decommission/DecommissionService.java | 226 ++++++++---------- 2 files changed, 109 insertions(+), 122 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index cff01e4e480a5..2f860d5318216 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -29,6 +29,7 @@ import org.opensearch.cluster.routing.allocation.AllocationService; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.Priority; +import org.opensearch.common.Strings; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.unit.TimeValue; import org.opensearch.threadpool.ThreadPool; @@ -74,14 +75,14 @@ public class DecommissionController { /** * Transport call to add nodes to voting config exclusion * - * @param nodes set of nodes to be added to voting config exclusion list + * @param nodes set of nodes Ids to be added to voting config exclusion list * @param listener callback for response or failure */ public void excludeDecommissionedNodesFromVotingConfig(Set nodes, ActionListener listener) { transportService.sendRequest( transportService.getLocalNode(), AddVotingConfigExclusionsAction.NAME, - new AddVotingConfigExclusionsRequest(nodes.stream().toArray(String[]::new)), + new AddVotingConfigExclusionsRequest(Strings.EMPTY_ARRAY, nodes.toArray(String[]::new), Strings.EMPTY_ARRAY, TimeValue.timeValueSeconds(30)), new TransportResponseHandler() { @Override public void handleResponse(AddVotingConfigExclusionsResponse response) { diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 50c7889400f95..65e38f0fd0326 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -28,7 +28,6 @@ import org.opensearch.common.unit.TimeValue; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportService; -import org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfigExclusion; import java.util.HashMap; import java.util.HashSet; @@ -36,6 +35,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.function.Predicate; import java.util.stream.Collectors; import static org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING; @@ -130,14 +130,7 @@ public ClusterState execute(ClusterState currentState) throws Exception { Metadata.Builder mdBuilder = Metadata.builder(metadata); DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); // check the request sanity and reject the request if there's any inflight or successful request already present - ensureNoInflightDifferentDecommissionRequest(decommissionAttributeMetadata, decommissionAttribute); - // check if the same attribute is requested for decommission and currently not FAILED, then return the current state as is - if (decommissionAttributeMetadata != null - && decommissionAttributeMetadata.decommissionAttribute().equals(decommissionAttribute) - && !decommissionAttributeMetadata.status().equals(DecommissionStatus.FAILED)) { - logger.info("re-request received for decommissioning [{}], will not update state", decommissionAttribute); - return currentState; - } + ensureNoInflightRequest(decommissionAttributeMetadata, decommissionAttribute); decommissionAttributeMetadata = new DecommissionAttributeMetadata(decommissionAttribute); mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); logger.info("registering decommission metadata [{}] to execute action", decommissionAttributeMetadata.toString()); @@ -172,102 +165,90 @@ private void decommissionClusterManagerNodes( ) { ClusterState state = clusterService.getClusterApplierService().state(); Set clusterManagerNodesToBeDecommissioned = filterNodesWithDecommissionAttribute(state, decommissionAttribute, true); - try { - // this is a sanity check that the cluster will not go into a quorum loss state because of exclusion - ensureNoQuorumLossDueToDecommissioning( - decommissionAttribute, - clusterManagerNodesToBeDecommissioned, - state.getLastCommittedConfiguration() - ); - } catch (DecommissioningFailedException dfe) { - listener.onFailure(dfe); - decommissionController.updateMetadataWithDecommissionStatus( - DecommissionStatus.FAILED, - new ActionListener() { + // This check doesn't seem to be needed as exclusion automatically shrinks the config before sending the response. + // We can guarantee that because of exclusion there wouldn't be a quorum loss and if the service gets a successful response, + // we are certain that the config is updated and nodes are ready to be kicked out. + // Please add comment if you feel there could be a edge case here. +// try { +// // this is a sanity check that the cluster will not go into a quorum loss state because of exclusion +// ensureNoQuorumLossDueToDecommissioning( +// decommissionAttribute, +// clusterManagerNodesToBeDecommissioned, +// state.getLastCommittedConfiguration() +// ); +// } catch (DecommissioningFailedException dfe) { +// listener.onFailure(dfe); +// decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.FAILED, statusUpdateListener()); +// return; +// } + + ActionListener exclusionListener = new ActionListener() { + @Override + public void onResponse(Void unused) { + if (transportService.getLocalNode().isClusterManagerNode() + && !nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute) + ) { + // we are good here to send the response now as the request is processed by an eligible active leader + // and to-be-decommissioned cluster manager is no more part of Voting Configuration + listener.onResponse(new ClusterStateUpdateResponse(true)); + failDecommissionedNodes(clusterService.getClusterApplierService().state()); + } else { + // explicitly calling listener.onFailure with NotClusterManagerException as we can certainly say that + // the local cluster manager node will be abdicated and soon will no longer be cluster manager. + // this will ensure that request is retried until cluster manager times out + listener.onFailure( + new NotClusterManagerException( + "node [" + + transportService.getLocalNode().toString() + + "] not eligible to execute decommission request. Will retry until timeout." + ) + ); + } + } + + @Override + public void onFailure(Exception e) { + listener.onFailure(e); + decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.FAILED, statusUpdateListener()); + } + }; + + // remove all 'to-be-decommissioned' cluster manager eligible nodes from voting config + Set nodeIdsToBeExcluded = clusterManagerNodesToBeDecommissioned.stream() + .map(DiscoveryNode::getId) + .collect(Collectors.toSet()); + + final Predicate allNodesRemoved = clusterState -> { + final Set votingConfigNodeIds = clusterState.getLastCommittedConfiguration().getNodeIds(); + return nodeIdsToBeExcluded.stream().noneMatch(votingConfigNodeIds::contains); + }; + if (allNodesRemoved.test(clusterService.getClusterApplierService().state())) { + exclusionListener.onResponse(null); + } else { + // send a transport request to exclude to-be-decommissioned cluster manager eligible nodes from voting config + decommissionController.excludeDecommissionedNodesFromVotingConfig( + nodeIdsToBeExcluded, + new ActionListener() { @Override - public void onResponse(DecommissionStatus status) { + public void onResponse(Void unused) { logger.info( - "updated the status to [{}], as cluster could have gone to quorum loss situation due to decommissioning", - status.toString() + "successfully removed decommissioned cluster manager eligible nodes [{}] from voting config ", + clusterManagerNodesToBeDecommissioned.toString() ); + exclusionListener.onResponse(null); } @Override public void onFailure(Exception e) { - logger.error("unexpected error found while updating the status", e); + logger.debug( + new ParameterizedMessage("failure in removing decommissioned cluster manager eligible nodes from voting config"), + e + ); + exclusionListener.onFailure(e); } } ); } - // remove all 'to-be-decommissioned' cluster manager eligible nodes from voting config - // The method ensures that we don't exclude same nodes multiple times - boolean toBeDecommissionedClusterManagerNodesExcluded = excludeDecommissionedClusterManagerNodesFromVotingConfig( - clusterManagerNodesToBeDecommissioned - ); - - if (transportService.getLocalNode().isClusterManagerNode() - && !nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute) - && toBeDecommissionedClusterManagerNodesExcluded) { - // we are good here to send the response now as the request is processed by an eligible active leader - // and to-be-decommissioned cluster manager is no more part of Voting Configuration - listener.onResponse(new ClusterStateUpdateResponse(true)); - failDecommissionedNodes(clusterService.getClusterApplierService().state()); - } else { - // explicitly calling listener.onFailure with NotClusterManagerException as we can certainly say the local cluster manager node - // will - // be abdicated and soon will no longer be cluster manager. - // this will ensure that request is retried until cluster manager times out - listener.onFailure( - new NotClusterManagerException( - "node [" - + transportService.getLocalNode().toString() - + "] not eligible to execute decommission request. Will retry until timeout." - ) - ); - } - } - - private boolean excludeDecommissionedClusterManagerNodesFromVotingConfig(Set clusterManagerNodesToBeDecommissioned) { - Set clusterManagerNodesNameToBeDecommissioned = clusterManagerNodesToBeDecommissioned.stream() - .map(DiscoveryNode::getName) - .collect(Collectors.toSet()); - - Set currentVotingConfigExclusions = clusterService.getClusterApplierService() - .state() - .coordinationMetadata() - .getVotingConfigExclusions(); - Set excludedNodesName = currentVotingConfigExclusions.stream() - .map(VotingConfigExclusion::getNodeName) - .collect(Collectors.toSet()); - - // check if the to-be-excluded nodes are excluded. If yes, we don't need to exclude them again - if (clusterManagerNodesNameToBeDecommissioned.size() == 0 - || excludedNodesName.containsAll(clusterManagerNodesNameToBeDecommissioned)) { - return true; - } - // send a transport request to exclude to-be-decommissioned cluster manager eligible nodes from voting config - decommissionController.excludeDecommissionedNodesFromVotingConfig( - clusterManagerNodesNameToBeDecommissioned, - new ActionListener() { - @Override - public void onResponse(Void unused) { - logger.info( - "successfully removed decommissioned cluster manager eligible nodes [{}] from voting config ", - clusterManagerNodesToBeDecommissioned.toString() - ); - } - - @Override - public void onFailure(Exception e) { - logger.debug( - new ParameterizedMessage("failure in removing decommissioned cluster manager eligible nodes from voting config"), - e - ); - } - } - ); - // send false for now and let the transport request be retried - return false; } private void failDecommissionedNodes(ClusterState state) { @@ -313,17 +294,6 @@ public void onFailure(Exception e) { } private void clearVotingConfigExclusionAndUpdateStatus(boolean decommissionSuccessful) { - ActionListener statusUpdateListener = new ActionListener<>() { - @Override - public void onResponse(DecommissionStatus status) { - logger.info("completed decommission action"); - } - - @Override - public void onFailure(Exception e) { - logger.error("failure encountered while executing decommission action"); - } - }; decommissionController.clearVotingConfigExclusion(new ActionListener() { @Override public void onResponse(Void unused) { @@ -331,7 +301,7 @@ public void onResponse(Void unused) { "successfully cleared voting config exclusion after completing decommission action, proceeding to update metadata" ); DecommissionStatus updateStatusWith = decommissionSuccessful ? DecommissionStatus.SUCCESSFUL : DecommissionStatus.FAILED; - decommissionController.updateMetadataWithDecommissionStatus(updateStatusWith, statusUpdateListener); + decommissionController.updateMetadataWithDecommissionStatus(updateStatusWith, statusUpdateListener()); } @Override @@ -340,7 +310,7 @@ public void onFailure(Exception e) { new ParameterizedMessage("failure in clearing voting config exclusion after processing decommission request"), e ); - decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.FAILED, statusUpdateListener); + decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.FAILED, statusUpdateListener()); } }); } @@ -391,24 +361,24 @@ private static void validateAwarenessAttribute( } } - private static void ensureNoInflightDifferentDecommissionRequest( + private static void ensureNoInflightRequest( DecommissionAttributeMetadata decommissionAttributeMetadata, DecommissionAttribute decommissionAttribute ) { String msg = null; if (decommissionAttributeMetadata != null) { - if (decommissionAttributeMetadata.status().equals(DecommissionStatus.SUCCESSFUL)) { - // one awareness attribute is already decommissioned. We will reject the new request - msg = "one awareness attribute already successfully decommissioned. Recommission before triggering another decommission"; - } else if (decommissionAttributeMetadata.status().equals(DecommissionStatus.FAILED)) { - // here we are sure that the previous decommission request failed, we can let this request pass this check - return; - } else { - // it means the decommission has been initiated or is inflight. In that case, if the same attribute is requested for - // decommissioning, which can happen during retries, we will pass this check, if not, we will throw exception - if (!decommissionAttributeMetadata.decommissionAttribute().equals(decommissionAttribute)) { - msg = "another request for decommission is in flight, will not process this request"; - } + switch (decommissionAttributeMetadata.status()) { + case SUCCESSFUL: + // one awareness attribute is already decommissioned. We will reject the new request + msg = "one awareness attribute already successfully decommissioned, recommission before triggering another decommission"; + break; + case IN_PROGRESS: + case INIT: + // it means the decommission has been initiated or is inflight. In that case, will fail new request + msg = "there's an inflight decommission request in progress, cannot process this request"; + break; + case FAILED: + break; } } if (msg != null) { @@ -435,4 +405,20 @@ private static void ensureNoQuorumLossDueToDecommissioning( ); } } + + private ActionListener statusUpdateListener() { + return new ActionListener() { + @Override + public void onResponse(DecommissionStatus status) { + logger.info( + "updated the status to [{}]", status.toString() + ); + } + + @Override + public void onFailure(Exception e) { + logger.error("unexpected failure during status update", e); + } + }; + } } From 74769be2a505578fd8857835f68111e714baf877 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Thu, 8 Sep 2022 20:08:24 +0530 Subject: [PATCH 58/87] Precheck for retry Signed-off-by: Rishab Nahata --- .../cluster/decommission/DecommissionService.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 65e38f0fd0326..0f67187bcc7f3 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -129,6 +129,15 @@ public ClusterState execute(ClusterState currentState) throws Exception { Metadata metadata = currentState.metadata(); Metadata.Builder mdBuilder = Metadata.builder(metadata); DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); + // check if the same attribute is requested for decommission and currently not FAILED or SUCCESS, then return the current state as is + if (decommissionAttributeMetadata != null + && decommissionAttributeMetadata.decommissionAttribute().equals(decommissionAttribute) + && !decommissionAttributeMetadata.status().equals(DecommissionStatus.FAILED) + && !decommissionAttributeMetadata.status().equals(DecommissionStatus.SUCCESSFUL) + ) { + logger.info("re-request received for decommissioning [{}], will not update state", decommissionAttribute); + return currentState; + } // check the request sanity and reject the request if there's any inflight or successful request already present ensureNoInflightRequest(decommissionAttributeMetadata, decommissionAttribute); decommissionAttributeMetadata = new DecommissionAttributeMetadata(decommissionAttribute); @@ -159,7 +168,7 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS }); } - private void decommissionClusterManagerNodes( + private synchronized void decommissionClusterManagerNodes( final DecommissionAttribute decommissionAttribute, ActionListener listener ) { From 9119cbd3605f36212daeb950294bf2a2c00a7848 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Thu, 8 Sep 2022 20:30:51 +0530 Subject: [PATCH 59/87] Add logging Signed-off-by: Rishab Nahata --- .../cluster/decommission/DecommissionService.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 0f67187bcc7f3..6cefbdca17fc3 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -197,6 +197,7 @@ public void onResponse(Void unused) { if (transportService.getLocalNode().isClusterManagerNode() && !nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute) ) { + logger.info("will attempt to fail decommissioned nodes as local node is eligible to process the request"); // we are good here to send the response now as the request is processed by an eligible active leader // and to-be-decommissioned cluster manager is no more part of Voting Configuration listener.onResponse(new ClusterStateUpdateResponse(true)); @@ -205,6 +206,8 @@ public void onResponse(Void unused) { // explicitly calling listener.onFailure with NotClusterManagerException as we can certainly say that // the local cluster manager node will be abdicated and soon will no longer be cluster manager. // this will ensure that request is retried until cluster manager times out + logger.info("local node is not eligible to process the request, " + + "throwing NotClusterManagerException to attempt a retry on an eligible node"); listener.onFailure( new NotClusterManagerException( "node [" @@ -218,6 +221,7 @@ public void onResponse(Void unused) { @Override public void onFailure(Exception e) { listener.onFailure(e); + // attempting to mark the status as FAILED decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.FAILED, statusUpdateListener()); } }; @@ -379,12 +383,14 @@ private static void ensureNoInflightRequest( switch (decommissionAttributeMetadata.status()) { case SUCCESSFUL: // one awareness attribute is already decommissioned. We will reject the new request - msg = "one awareness attribute already successfully decommissioned, recommission before triggering another decommission"; + msg = "one awareness attribute [" + decommissionAttributeMetadata.decommissionAttribute().toString() + + "] already successfully decommissioned, recommission before triggering another decommission"; break; case IN_PROGRESS: case INIT: // it means the decommission has been initiated or is inflight. In that case, will fail new request - msg = "there's an inflight decommission request in progress, cannot process this request"; + msg = "there's an inflight decommission request for attribute [" + decommissionAttributeMetadata.decommissionAttribute().toString() + + "] is in progress, cannot process this request"; break; case FAILED: break; From 807bd3a357e898402148a952a5a75be031687d64 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Thu, 8 Sep 2022 20:40:04 +0530 Subject: [PATCH 60/87] Fix spotless check Signed-off-by: Rishab Nahata --- .../decommission/DecommissionController.java | 7 +- .../decommission/DecommissionService.java | 90 +++++++++---------- 2 files changed, 50 insertions(+), 47 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index 2f860d5318216..244339b7ccd03 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -82,7 +82,12 @@ public void excludeDecommissionedNodesFromVotingConfig(Set nodes, Action transportService.sendRequest( transportService.getLocalNode(), AddVotingConfigExclusionsAction.NAME, - new AddVotingConfigExclusionsRequest(Strings.EMPTY_ARRAY, nodes.toArray(String[]::new), Strings.EMPTY_ARRAY, TimeValue.timeValueSeconds(30)), + new AddVotingConfigExclusionsRequest( + Strings.EMPTY_ARRAY, + nodes.toArray(String[]::new), + Strings.EMPTY_ARRAY, + TimeValue.timeValueSeconds(30) + ), new TransportResponseHandler() { @Override public void handleResponse(AddVotingConfigExclusionsResponse response) { diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 6cefbdca17fc3..9e1c10c5167d6 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -129,12 +129,12 @@ public ClusterState execute(ClusterState currentState) throws Exception { Metadata metadata = currentState.metadata(); Metadata.Builder mdBuilder = Metadata.builder(metadata); DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); - // check if the same attribute is requested for decommission and currently not FAILED or SUCCESS, then return the current state as is + // check if the same attribute is requested for decommission and currently not FAILED or SUCCESS, then return the current + // state as is if (decommissionAttributeMetadata != null && decommissionAttributeMetadata.decommissionAttribute().equals(decommissionAttribute) && !decommissionAttributeMetadata.status().equals(DecommissionStatus.FAILED) - && !decommissionAttributeMetadata.status().equals(DecommissionStatus.SUCCESSFUL) - ) { + && !decommissionAttributeMetadata.status().equals(DecommissionStatus.SUCCESSFUL)) { logger.info("re-request received for decommissioning [{}], will not update state", decommissionAttribute); return currentState; } @@ -178,25 +178,24 @@ private synchronized void decommissionClusterManagerNodes( // We can guarantee that because of exclusion there wouldn't be a quorum loss and if the service gets a successful response, // we are certain that the config is updated and nodes are ready to be kicked out. // Please add comment if you feel there could be a edge case here. -// try { -// // this is a sanity check that the cluster will not go into a quorum loss state because of exclusion -// ensureNoQuorumLossDueToDecommissioning( -// decommissionAttribute, -// clusterManagerNodesToBeDecommissioned, -// state.getLastCommittedConfiguration() -// ); -// } catch (DecommissioningFailedException dfe) { -// listener.onFailure(dfe); -// decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.FAILED, statusUpdateListener()); -// return; -// } + // try { + // // this is a sanity check that the cluster will not go into a quorum loss state because of exclusion + // ensureNoQuorumLossDueToDecommissioning( + // decommissionAttribute, + // clusterManagerNodesToBeDecommissioned, + // state.getLastCommittedConfiguration() + // ); + // } catch (DecommissioningFailedException dfe) { + // listener.onFailure(dfe); + // decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.FAILED, statusUpdateListener()); + // return; + // } ActionListener exclusionListener = new ActionListener() { @Override public void onResponse(Void unused) { if (transportService.getLocalNode().isClusterManagerNode() - && !nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute) - ) { + && !nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute)) { logger.info("will attempt to fail decommissioned nodes as local node is eligible to process the request"); // we are good here to send the response now as the request is processed by an eligible active leader // and to-be-decommissioned cluster manager is no more part of Voting Configuration @@ -206,8 +205,10 @@ public void onResponse(Void unused) { // explicitly calling listener.onFailure with NotClusterManagerException as we can certainly say that // the local cluster manager node will be abdicated and soon will no longer be cluster manager. // this will ensure that request is retried until cluster manager times out - logger.info("local node is not eligible to process the request, " + - "throwing NotClusterManagerException to attempt a retry on an eligible node"); + logger.info( + "local node is not eligible to process the request, " + + "throwing NotClusterManagerException to attempt a retry on an eligible node" + ); listener.onFailure( new NotClusterManagerException( "node [" @@ -239,28 +240,25 @@ public void onFailure(Exception e) { exclusionListener.onResponse(null); } else { // send a transport request to exclude to-be-decommissioned cluster manager eligible nodes from voting config - decommissionController.excludeDecommissionedNodesFromVotingConfig( - nodeIdsToBeExcluded, - new ActionListener() { - @Override - public void onResponse(Void unused) { - logger.info( - "successfully removed decommissioned cluster manager eligible nodes [{}] from voting config ", - clusterManagerNodesToBeDecommissioned.toString() - ); - exclusionListener.onResponse(null); - } + decommissionController.excludeDecommissionedNodesFromVotingConfig(nodeIdsToBeExcluded, new ActionListener() { + @Override + public void onResponse(Void unused) { + logger.info( + "successfully removed decommissioned cluster manager eligible nodes [{}] from voting config ", + clusterManagerNodesToBeDecommissioned.toString() + ); + exclusionListener.onResponse(null); + } - @Override - public void onFailure(Exception e) { - logger.debug( - new ParameterizedMessage("failure in removing decommissioned cluster manager eligible nodes from voting config"), - e - ); - exclusionListener.onFailure(e); - } + @Override + public void onFailure(Exception e) { + logger.debug( + new ParameterizedMessage("failure in removing decommissioned cluster manager eligible nodes from voting config"), + e + ); + exclusionListener.onFailure(e); } - ); + }); } } @@ -383,14 +381,16 @@ private static void ensureNoInflightRequest( switch (decommissionAttributeMetadata.status()) { case SUCCESSFUL: // one awareness attribute is already decommissioned. We will reject the new request - msg = "one awareness attribute [" + decommissionAttributeMetadata.decommissionAttribute().toString() + - "] already successfully decommissioned, recommission before triggering another decommission"; + msg = "one awareness attribute [" + + decommissionAttributeMetadata.decommissionAttribute().toString() + + "] already successfully decommissioned, recommission before triggering another decommission"; break; case IN_PROGRESS: case INIT: // it means the decommission has been initiated or is inflight. In that case, will fail new request - msg = "there's an inflight decommission request for attribute [" + decommissionAttributeMetadata.decommissionAttribute().toString() + - "] is in progress, cannot process this request"; + msg = "there's an inflight decommission request for attribute [" + + decommissionAttributeMetadata.decommissionAttribute().toString() + + "] is in progress, cannot process this request"; break; case FAILED: break; @@ -425,9 +425,7 @@ private ActionListener statusUpdateListener() { return new ActionListener() { @Override public void onResponse(DecommissionStatus status) { - logger.info( - "updated the status to [{}]", status.toString() - ); + logger.info("updated the status to [{}]", status.toString()); } @Override From ad4b2274fc3cf407c3e3fc3c077cda8ac122363f Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Fri, 9 Sep 2022 14:34:54 +0530 Subject: [PATCH 61/87] Fix controller tests Signed-off-by: Rishab Nahata --- .../DecommissionControllerTests.java | 87 +++++++++++++++---- 1 file changed, 72 insertions(+), 15 deletions(-) diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java index 95199cdf09487..3e1c5d36fb84e 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java @@ -8,6 +8,7 @@ package org.opensearch.cluster.decommission; +import org.hamcrest.Matchers; import org.junit.After; import org.junit.Before; import org.opensearch.OpenSearchTimeoutException; @@ -18,6 +19,8 @@ import org.opensearch.action.support.ActionFilters; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ClusterStateObserver; +import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.coordination.CoordinationMetadata; import org.opensearch.cluster.metadata.IndexNameExpressionResolver; import org.opensearch.cluster.metadata.Metadata; @@ -48,8 +51,11 @@ import static java.util.Collections.emptySet; import static java.util.Collections.singletonMap; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.endsWith; import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.sameInstance; import static org.hamcrest.Matchers.startsWith; import static org.opensearch.cluster.ClusterState.builder; import static org.opensearch.cluster.OpenSearchAllocationTestCase.createAllocationService; @@ -58,23 +64,17 @@ public class DecommissionControllerTests extends OpenSearchTestCase { - private ThreadPool threadPool; - private ClusterService clusterService; + private static ThreadPool threadPool; + private static ClusterService clusterService; private TransportService transportService; private AllocationService allocationService; private DecommissionController decommissionController; private ClusterSettings clusterSettings; - @Override - public void setUp() throws Exception { - super.setUp(); - threadPool = new TestThreadPool("test", Settings.EMPTY); - clusterService = createClusterService(threadPool); - allocationService = createAllocationService(); - } - @Before public void setTransportServiceAndDefaultClusterState() { + threadPool = new TestThreadPool("test", Settings.EMPTY); + allocationService = createAllocationService(); ClusterState clusterState = ClusterState.builder(new ClusterName("test")).build(); logger.info("--> adding five nodes on same zone_1"); clusterState = addNodes(clusterState, "zone_1", "node1", "node2", "node3", "node4", "node5"); @@ -85,6 +85,7 @@ public void setTransportServiceAndDefaultClusterState() { clusterState = setLocalNodeAsClusterManagerNode(clusterState, "node1"); clusterState = setThreeNodesInVotingConfig(clusterState); final ClusterState.Builder builder = builder(clusterState); + clusterService = createClusterService(threadPool, clusterState.nodes().get("node1")); setState(clusterService, builder); final MockTransport transport = new MockTransport(); transportService = transport.createTransportService( @@ -130,7 +131,10 @@ public void shutdownThreadPoolAndClusterService() { } public void testAddNodesToVotingConfigExclusion() throws InterruptedException { - final CountDownLatch countDownLatch = new CountDownLatch(1); + final CountDownLatch countDownLatch = new CountDownLatch(2); + + ClusterStateObserver clusterStateObserver = new ClusterStateObserver(clusterService, null, logger, threadPool.getThreadContext()); + clusterStateObserver.waitForNextChange(new AdjustConfigurationForExclusions(countDownLatch)); Set nodesToRemoveFromVotingConfig = Collections.singleton(randomFrom("node1", "node6", "node11")); decommissionController.excludeDecommissionedNodesFromVotingConfig(nodesToRemoveFromVotingConfig, new ActionListener() { @Override @@ -145,7 +149,7 @@ public void onFailure(Exception e) { }); assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); clusterService.getClusterApplierService().state().getVotingConfigExclusions().forEach(vce -> { - assertTrue(nodesToRemoveFromVotingConfig.contains(vce.getNodeName())); + assertTrue(nodesToRemoveFromVotingConfig.contains(vce.getNodeId())); assertEquals(nodesToRemoveFromVotingConfig.size(), 1); }); } @@ -214,7 +218,7 @@ public void testTimesOut() throws InterruptedException { nodesToBeRemoved.add(clusterService.state().nodes().get("node15")); decommissionController.removeDecommissionedNodes( nodesToBeRemoved, - "unit-test", + "unit-test-timeout", TimeValue.timeValueMillis(2), new ActionListener() { @Override @@ -225,7 +229,7 @@ public void onResponse(Void unused) { @Override public void onFailure(Exception e) { assertThat(e, instanceOf(OpenSearchTimeoutException.class)); - assertThat(e.getMessage(), startsWith("timed out waiting for removal of decommissioned nodes")); + assertThat(e.getMessage(), containsString("waiting for removal of decommissioned nodes")); countDownLatch.countDown(); } } @@ -251,6 +255,7 @@ public void testSuccessfulDecommissionStatusMetadataUpdate() throws InterruptedE new ActionListener() { @Override public void onResponse(DecommissionStatus status) { + assertEquals(DecommissionStatus.SUCCESSFUL, status); countDownLatch.countDown(); } @@ -266,6 +271,58 @@ public void onFailure(Exception e) { assertEquals(decommissionAttributeMetadata.status(), DecommissionStatus.SUCCESSFUL); } + private static class AdjustConfigurationForExclusions implements ClusterStateObserver.Listener { + + final CountDownLatch doneLatch; + + AdjustConfigurationForExclusions(CountDownLatch latch) { + this.doneLatch = latch; + } + + @Override + public void onNewClusterState(ClusterState state) { + clusterService.getClusterManagerService().submitStateUpdateTask("reconfiguration", new ClusterStateUpdateTask() { + @Override + public ClusterState execute(ClusterState currentState) { + assertThat(currentState, sameInstance(state)); + final Set votingNodeIds = new HashSet<>(); + currentState.nodes().forEach(n -> votingNodeIds.add(n.getId())); + currentState.getVotingConfigExclusions().forEach(t -> votingNodeIds.remove(t.getNodeId())); + final CoordinationMetadata.VotingConfiguration votingConfiguration = new CoordinationMetadata.VotingConfiguration(votingNodeIds); + return builder(currentState).metadata( + Metadata.builder(currentState.metadata()) + .coordinationMetadata( + CoordinationMetadata.builder(currentState.coordinationMetadata()) + .lastAcceptedConfiguration(votingConfiguration) + .lastCommittedConfiguration(votingConfiguration) + .build() + ) + ).build(); + } + + @Override + public void onFailure(String source, Exception e) { + throw new AssertionError("unexpected failure", e); + } + + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + doneLatch.countDown(); + } + }); + } + + @Override + public void onClusterServiceClose() { + throw new AssertionError("unexpected close"); + } + + @Override + public void onTimeout(TimeValue timeout) { + throw new AssertionError("unexpected timeout"); + } + } + private ClusterState addNodes(ClusterState clusterState, String zone, String... nodeIds) { DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); org.opensearch.common.collect.List.of(nodeIds).forEach(nodeId -> nodeBuilder.add(newNode(nodeId, singletonMap("zone", zone)))); @@ -300,7 +357,7 @@ private ClusterState setThreeNodesInVotingConfig(ClusterState clusterState) { } private static DiscoveryNode newNode(String nodeId, Map attributes) { - return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, CLUSTER_MANAGER_DATA_ROLE, Version.CURRENT); + return new DiscoveryNode(nodeId, nodeId, buildNewFakeTransportAddress(), attributes, CLUSTER_MANAGER_DATA_ROLE, Version.CURRENT); } final private static Set CLUSTER_MANAGER_DATA_ROLE = Collections.unmodifiableSet( From 871784ea0f3b0add12632de89793c23a103b664e Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Fri, 9 Sep 2022 15:27:59 +0530 Subject: [PATCH 62/87] Fix Decommission Service test Signed-off-by: Rishab Nahata --- .../decommission/DecommissionController.java | 1 - .../decommission/DecommissionService.java | 3 +- .../DecommissionServiceTests.java | 72 +++++++++++-------- 3 files changed, 43 insertions(+), 33 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index 244339b7ccd03..715235d7bff3b 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -253,7 +253,6 @@ public void onFailure(String source, Exception e) { public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata() .custom(DecommissionAttributeMetadata.TYPE); - logger.info("updated decommission status to [{}]", decommissionAttributeMetadata.status()); listener.onResponse(decommissionAttributeMetadata.status()); } }); diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 9e1c10c5167d6..1e47fd33ba043 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -269,6 +269,7 @@ private void failDecommissionedNodes(ClusterState state) { decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.IN_PROGRESS, new ActionListener<>() { @Override public void onResponse(DecommissionStatus status) { + logger.info("updated the decommission status to [{}]", status.toString()); // execute nodes decommissioning decommissionController.removeDecommissionedNodes( filterNodesWithDecommissionAttribute(clusterService.getClusterApplierService().state(), decommissionAttribute, false), @@ -425,7 +426,7 @@ private ActionListener statusUpdateListener() { return new ActionListener() { @Override public void onResponse(DecommissionStatus status) { - logger.info("updated the status to [{}]", status.toString()); + logger.info("updated the decommission status to [{}]", status.toString()); } @Override diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java index 63e285e2dab36..837a1f0d8b275 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java @@ -15,6 +15,7 @@ import org.opensearch.action.ActionListener; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ClusterStateObserver; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; import org.opensearch.cluster.coordination.CoordinationMetadata; import org.opensearch.cluster.metadata.Metadata; @@ -26,6 +27,7 @@ import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.transport.MockTransport; import org.opensearch.threadpool.TestThreadPool; @@ -36,6 +38,8 @@ import java.util.HashSet; import java.util.Map; import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; import static java.util.Collections.emptySet; import static java.util.Collections.singletonMap; @@ -116,18 +120,18 @@ public void shutdownThreadPoolAndClusterService() { } @SuppressWarnings("unchecked") - public void testDecommissioningNotInitiatedForInvalidAttributeName() { + public void testDecommissioningNotStartedForInvalidAttributeName() { DecommissionAttribute decommissionAttribute = new DecommissionAttribute("rack", "rack-a"); ActionListener listener = mock(ActionListener.class); DecommissioningFailedException e = expectThrows( DecommissioningFailedException.class, - () -> { decommissionService.startDecommissionAction(decommissionAttribute, listener); } + () -> decommissionService.startDecommissionAction(decommissionAttribute, listener) ); assertThat(e.getMessage(), Matchers.endsWith("invalid awareness attribute requested for decommissioning")); } @SuppressWarnings("unchecked") - public void testDecommissioningNotInitiatedForInvalidAttributeValue() { + public void testDecommissioningNotStartedForInvalidAttributeValue() { DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "random"); ActionListener listener = mock(ActionListener.class); DecommissioningFailedException e = expectThrows( @@ -143,10 +147,12 @@ public void testDecommissioningNotInitiatedForInvalidAttributeValue() { } @SuppressWarnings("unchecked") - public void testDecommissioningNotInitiatedWhenAlreadyDecommissioned() { + public void testDecommissioningFailedWhenAnotherAttributeDecommissioningSuccessful() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + DecommissionStatus oldStatus = randomFrom(DecommissionStatus.SUCCESSFUL, DecommissionStatus.IN_PROGRESS, DecommissionStatus.INIT); DecommissionAttributeMetadata oldMetadata = new DecommissionAttributeMetadata( new DecommissionAttribute("zone", "zone_1"), - DecommissionStatus.IN_PROGRESS + oldStatus ); final ClusterState.Builder builder = builder(clusterService.state()); setState( @@ -155,32 +161,36 @@ public void testDecommissioningNotInitiatedWhenAlreadyDecommissioned() { Metadata.builder(clusterService.state().metadata()).putCustom(DecommissionAttributeMetadata.TYPE, oldMetadata).build() ) ); - ActionListener listener = mock(ActionListener.class); - DecommissioningFailedException e = expectThrows( - DecommissioningFailedException.class, - () -> { decommissionService.startDecommissionAction(new DecommissionAttribute("zone", "zone_2"), listener); } - ); - assertThat(e.getMessage(), Matchers.endsWith("another request for decommission is in flight, will not process this request")); - } - - @SuppressWarnings("unchecked") - public void testDecommissioningInitiatedWhenEnoughClusterManagerNodes() { - ActionListener listener = mock(ActionListener.class); - decommissionService.startDecommissionAction(new DecommissionAttribute("zone", "zone_3"), listener); - } - - @SuppressWarnings("unchecked") - public void testDecommissioningNotInitiatedWhenNotEnoughClusterManagerNodes() { - ClusterState state = clusterService.state(); - // shrink voting config - state = setNodesInVotingConfig(state, state.nodes().get("node1"), state.nodes().get("node11")); - setState(clusterService, state); - ActionListener listener = mock(ActionListener.class); - DecommissioningFailedException e = expectThrows( - DecommissioningFailedException.class, - () -> { decommissionService.startDecommissionAction(new DecommissionAttribute("zone", "zone_3"), listener); } - ); - assertThat(e.getMessage(), Matchers.endsWith("cannot proceed with decommission request as cluster might go into quorum loss")); + ActionListener listener = new ActionListener() { + @Override + public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { + fail("on response shouldn't have been called"); + } + + @Override + public void onFailure(Exception e) { + assertTrue(e instanceof DecommissioningFailedException); + if (oldStatus.equals(DecommissionStatus.SUCCESSFUL)) { + assertThat( + e.getMessage(), + Matchers.endsWith( + "already successfully decommissioned, recommission before triggering another decommission" + ) + ); + } + else { + assertThat( + e.getMessage(), + Matchers.endsWith( + "is in progress, cannot process this request" + ) + ); + } + countDownLatch.countDown(); + } + }; + decommissionService.startDecommissionAction(new DecommissionAttribute("zone", "zone_2"), listener); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); } private ClusterState addDataNodes(ClusterState clusterState, String zone, String... nodeIds) { From f3cf71449a1cee27f8572d6dde203c22aa4da31c Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Fri, 9 Sep 2022 15:34:23 +0530 Subject: [PATCH 63/87] Fix spotless check Signed-off-by: Rishab Nahata --- .../DecommissionControllerTests.java | 7 +++---- .../decommission/DecommissionServiceTests.java | 18 ++++-------------- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java index 3e1c5d36fb84e..4b85fa39a91e1 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java @@ -8,7 +8,6 @@ package org.opensearch.cluster.decommission; -import org.hamcrest.Matchers; import org.junit.After; import org.junit.Before; import org.opensearch.OpenSearchTimeoutException; @@ -53,10 +52,8 @@ import static java.util.Collections.singletonMap; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.empty; -import static org.hamcrest.Matchers.endsWith; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.sameInstance; -import static org.hamcrest.Matchers.startsWith; import static org.opensearch.cluster.ClusterState.builder; import static org.opensearch.cluster.OpenSearchAllocationTestCase.createAllocationService; import static org.opensearch.test.ClusterServiceUtils.createClusterService; @@ -288,7 +285,9 @@ public ClusterState execute(ClusterState currentState) { final Set votingNodeIds = new HashSet<>(); currentState.nodes().forEach(n -> votingNodeIds.add(n.getId())); currentState.getVotingConfigExclusions().forEach(t -> votingNodeIds.remove(t.getNodeId())); - final CoordinationMetadata.VotingConfiguration votingConfiguration = new CoordinationMetadata.VotingConfiguration(votingNodeIds); + final CoordinationMetadata.VotingConfiguration votingConfiguration = new CoordinationMetadata.VotingConfiguration( + votingNodeIds + ); return builder(currentState).metadata( Metadata.builder(currentState.metadata()) .coordinationMetadata( diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java index 837a1f0d8b275..6e40b608ccc7a 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java @@ -15,7 +15,6 @@ import org.opensearch.action.ActionListener; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; -import org.opensearch.cluster.ClusterStateObserver; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; import org.opensearch.cluster.coordination.CoordinationMetadata; import org.opensearch.cluster.metadata.Metadata; @@ -27,7 +26,6 @@ import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; -import org.opensearch.common.unit.TimeValue; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.transport.MockTransport; import org.opensearch.threadpool.TestThreadPool; @@ -152,7 +150,7 @@ public void testDecommissioningFailedWhenAnotherAttributeDecommissioningSuccessf DecommissionStatus oldStatus = randomFrom(DecommissionStatus.SUCCESSFUL, DecommissionStatus.IN_PROGRESS, DecommissionStatus.INIT); DecommissionAttributeMetadata oldMetadata = new DecommissionAttributeMetadata( new DecommissionAttribute("zone", "zone_1"), - oldStatus + oldStatus ); final ClusterState.Builder builder = builder(clusterService.state()); setState( @@ -173,18 +171,10 @@ public void onFailure(Exception e) { if (oldStatus.equals(DecommissionStatus.SUCCESSFUL)) { assertThat( e.getMessage(), - Matchers.endsWith( - "already successfully decommissioned, recommission before triggering another decommission" - ) - ); - } - else { - assertThat( - e.getMessage(), - Matchers.endsWith( - "is in progress, cannot process this request" - ) + Matchers.endsWith("already successfully decommissioned, recommission before triggering another decommission") ); + } else { + assertThat(e.getMessage(), Matchers.endsWith("is in progress, cannot process this request")); } countDownLatch.countDown(); } From a6619b574a3c31c73b0e0051e1a890826b20a3ad Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Fri, 9 Sep 2022 16:04:57 +0530 Subject: [PATCH 64/87] Empty-Commit Signed-off-by: Rishab Nahata From 06048a8da78d1cfb22e2a8aef6c8a9583178db15 Mon Sep 17 00:00:00 2001 From: Ankit Kala Date: Tue, 13 Sep 2022 15:28:51 +0530 Subject: [PATCH 65/87] Add getHistoryOperationsFromTranslog method to fetch the history snapshot from translogs (#3948) * Add getHistoryOperationsFromTranslog method to fetch the hostory snapshot from translogs Signed-off-by: Ankit Kala --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 697a66cd13d9a..048d55e5c5530 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - [Segment Replication] Update replicas to commit SegmentInfos instead of relying on SIS files from primary shards. ([#4402](https://github.com/opensearch-project/OpenSearch/pull/4402)) - [CCR] Add getHistoryOperationsFromTranslog method to fetch the history snapshot from translogs ([#3948](https://github.com/opensearch-project/OpenSearch/pull/3948)) - Add DecommissionService and helper to execute awareness attribute decommissioning ([#4084](https://github.com/opensearch-project/OpenSearch/pull/4084)) +- [CCR] Add getHistoryOperationsFromTranslog method to fetch the history snapshot from translogs ([#3948](https://github.com/opensearch-project/OpenSearch/pull/3948)) ### Deprecated From 3a1dbc814c26a6a2608d3087f4b00c66b9d4db46 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 30 Aug 2022 20:07:39 +0530 Subject: [PATCH 66/87] Add package-info and Changelog Signed-off-by: Rishab Nahata --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 048d55e5c5530..5703d8b7d3eea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - [CCR] Add getHistoryOperationsFromTranslog method to fetch the history snapshot from translogs ([#3948](https://github.com/opensearch-project/OpenSearch/pull/3948)) - Add DecommissionService and helper to execute awareness attribute decommissioning ([#4084](https://github.com/opensearch-project/OpenSearch/pull/4084)) - [CCR] Add getHistoryOperationsFromTranslog method to fetch the history snapshot from translogs ([#3948](https://github.com/opensearch-project/OpenSearch/pull/3948)) +- Add DecommissionService and helper to execute awareness attribute decommissioning ([#4084](https://github.com/opensearch-project/OpenSearch/pull/4084)) ### Deprecated From 418c0792a64e32e188ad1914855c75b9f7443ea5 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Fri, 9 Sep 2022 16:04:57 +0530 Subject: [PATCH 67/87] Empty-Commit Signed-off-by: Rishab Nahata From ac28c4dcf228ba335d83c4ed6d7b9d7e687f5483 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 13 Sep 2022 19:39:43 +0530 Subject: [PATCH 68/87] Address Comments Signed-off-by: Rishab Nahata --- .../decommission/DecommissionController.java | 6 ++- .../decommission/DecommissionService.java | 42 ++++++++++++------- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index 715235d7bff3b..686c73c348ee8 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -226,11 +226,15 @@ public ClusterState execute(ClusterState currentState) throws Exception { Metadata metadata = currentState.metadata(); DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); assert decommissionAttributeMetadata != null && decommissionAttributeMetadata.decommissionAttribute() != null; - // we need to update the status only when the previous stage is just behind than expected stage + // we need to update the status only when the previous stage is just behind the expected stage // if the previous stage is already ahead of expected stage, we don't need to update the stage // For failures, we update it no matter what int previousStage = decommissionAttributeMetadata.status().stage(); int expectedStage = decommissionStatus.stage(); + logger.info("attempting to update current decommission status [{}] with expected status [{}]", + decommissionAttributeMetadata.status().stage(), + decommissionStatus + ); if (previousStage >= expectedStage) return currentState; if (expectedStage - previousStage != 1 && !decommissionStatus.equals(DecommissionStatus.FAILED)) { throw new DecommissioningFailedException( diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 1e47fd33ba043..abe9c7703032e 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -115,31 +115,28 @@ private void setForcedAwarenessAttributes(Settings forceSettings) { * @param decommissionAttribute register decommission attribute in the metadata request * @param listener register decommission listener */ - public synchronized void startDecommissionAction( + public void startDecommissionAction( final DecommissionAttribute decommissionAttribute, final ActionListener listener ) { - // validates if correct awareness attributes and forced awareness attribute set to the cluster before starting action - validateAwarenessAttribute(decommissionAttribute, awarenessAttributes, forcedAwarenessAttributes); - // register the metadata with status as DECOMMISSION_INIT as first step clusterService.submitStateUpdateTask("decommission [" + decommissionAttribute + "]", new ClusterStateUpdateTask(Priority.URGENT) { @Override public ClusterState execute(ClusterState currentState) throws Exception { + // validates if correct awareness attributes and forced awareness attribute set to the cluster before starting action + validateAwarenessAttribute(decommissionAttribute, awarenessAttributes, forcedAwarenessAttributes); + Metadata metadata = currentState.metadata(); Metadata.Builder mdBuilder = Metadata.builder(metadata); DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); - // check if the same attribute is requested for decommission and currently not FAILED or SUCCESS, then return the current - // state as is - if (decommissionAttributeMetadata != null - && decommissionAttributeMetadata.decommissionAttribute().equals(decommissionAttribute) - && !decommissionAttributeMetadata.status().equals(DecommissionStatus.FAILED) - && !decommissionAttributeMetadata.status().equals(DecommissionStatus.SUCCESSFUL)) { + // check if the same attribute is requested for decommission and currently not FAILED, + // then return the current state as is; as we don't need a state update here + if(isSameNonFailedRequest(decommissionAttributeMetadata, decommissionAttribute)) { logger.info("re-request received for decommissioning [{}], will not update state", decommissionAttribute); return currentState; } - // check the request sanity and reject the request if there's any inflight or successful request already present - ensureNoInflightRequest(decommissionAttributeMetadata, decommissionAttribute); + // check the request sanity and reject the request if there's any conflicting inflight or successful request already present + ensureNoConflictingInflightRequest(decommissionAttributeMetadata, decommissionAttribute); decommissionAttributeMetadata = new DecommissionAttributeMetadata(decommissionAttribute); mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); logger.info("registering decommission metadata [{}] to execute action", decommissionAttributeMetadata.toString()); @@ -163,7 +160,12 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata() .custom(DecommissionAttributeMetadata.TYPE); assert decommissionAttribute.equals(decommissionAttributeMetadata.decommissionAttribute()); - decommissionClusterManagerNodes(decommissionAttributeMetadata.decommissionAttribute(), listener); + if (decommissionAttributeMetadata.status().equals(DecommissionStatus.SUCCESSFUL)) { + logger.info("status is already marked SUCCESSFUL, no need to proceed for further processing"); + listener.onResponse(new ClusterStateUpdateResponse(true)); + } else { + decommissionClusterManagerNodes(decommissionAttributeMetadata.decommissionAttribute(), listener); + } } }); } @@ -262,7 +264,7 @@ public void onFailure(Exception e) { } } - private void failDecommissionedNodes(ClusterState state) { + private synchronized void failDecommissionedNodes(ClusterState state) { // this method ensures no matter what, we always exit from this function after clearing the voting config exclusion DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().custom(DecommissionAttributeMetadata.TYPE); DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); @@ -373,12 +375,12 @@ private static void validateAwarenessAttribute( } } - private static void ensureNoInflightRequest( + private static void ensureNoConflictingInflightRequest( DecommissionAttributeMetadata decommissionAttributeMetadata, DecommissionAttribute decommissionAttribute ) { String msg = null; - if (decommissionAttributeMetadata != null) { + if (decommissionAttributeMetadata != null && !decommissionAttributeMetadata.decommissionAttribute().equals(decommissionAttribute)) { switch (decommissionAttributeMetadata.status()) { case SUCCESSFUL: // one awareness attribute is already decommissioned. We will reject the new request @@ -402,6 +404,14 @@ private static void ensureNoInflightRequest( } } + private static boolean isSameNonFailedRequest( + DecommissionAttributeMetadata decommissionAttributeMetadata, + DecommissionAttribute decommissionAttribute) { + return decommissionAttributeMetadata != null + && decommissionAttributeMetadata.decommissionAttribute().equals(decommissionAttribute) + && !decommissionAttributeMetadata.status().equals(DecommissionStatus.FAILED); + } + private static void ensureNoQuorumLossDueToDecommissioning( DecommissionAttribute decommissionAttribute, Set clusterManagerNodesToBeDecommissioned, From 4b377267d433e27a3c66a0ebc45ee8fcdfb9bce1 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 13 Sep 2022 19:53:47 +0530 Subject: [PATCH 69/87] Fix tests Signed-off-by: Rishab Nahata --- .../DecommissionServiceTests.java | 56 ++++++++++++------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java index 6e40b608ccc7a..5f38a6e71f27c 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java @@ -118,30 +118,46 @@ public void shutdownThreadPoolAndClusterService() { } @SuppressWarnings("unchecked") - public void testDecommissioningNotStartedForInvalidAttributeName() { + public void testDecommissioningNotStartedForInvalidAttributeName() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); DecommissionAttribute decommissionAttribute = new DecommissionAttribute("rack", "rack-a"); - ActionListener listener = mock(ActionListener.class); - DecommissioningFailedException e = expectThrows( - DecommissioningFailedException.class, - () -> decommissionService.startDecommissionAction(decommissionAttribute, listener) - ); - assertThat(e.getMessage(), Matchers.endsWith("invalid awareness attribute requested for decommissioning")); + ActionListener listener = new ActionListener() { + @Override + public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { + fail("on response shouldn't have been called"); + } + + @Override + public void onFailure(Exception e) { + assertTrue(e instanceof DecommissioningFailedException); + assertThat(e.getMessage(), Matchers.endsWith("invalid awareness attribute requested for decommissioning")); + countDownLatch.countDown(); + } + }; + decommissionService.startDecommissionAction(decommissionAttribute, listener); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); } @SuppressWarnings("unchecked") - public void testDecommissioningNotStartedForInvalidAttributeValue() { - DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "random"); - ActionListener listener = mock(ActionListener.class); - DecommissioningFailedException e = expectThrows( - DecommissioningFailedException.class, - () -> { decommissionService.startDecommissionAction(decommissionAttribute, listener); } - ); - assertThat( - e.getMessage(), - Matchers.endsWith( - "invalid awareness attribute value requested for decommissioning. Set forced awareness values before to decommission" - ) - ); + public void testDecommissioningNotStartedForInvalidAttributeValue() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "rack-a"); + ActionListener listener = new ActionListener() { + @Override + public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { + fail("on response shouldn't have been called"); + } + + @Override + public void onFailure(Exception e) { + assertTrue(e instanceof DecommissioningFailedException); + assertThat(e.getMessage(), Matchers.endsWith("invalid awareness attribute value requested for decommissioning. " + + "Set forced awareness values before to decommission")); + countDownLatch.countDown(); + } + }; + decommissionService.startDecommissionAction(decommissionAttribute, listener); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); } @SuppressWarnings("unchecked") From a268fd378c51ca5d96f22b5bb4ca49867df41b62 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Tue, 13 Sep 2022 20:05:58 +0530 Subject: [PATCH 70/87] Fix spotless check Signed-off-by: Rishab Nahata --- .../cluster/decommission/DecommissionController.java | 3 ++- .../cluster/decommission/DecommissionService.java | 5 +++-- .../cluster/decommission/DecommissionServiceTests.java | 10 +++++++--- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index 686c73c348ee8..69644ee39a009 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -231,7 +231,8 @@ public ClusterState execute(ClusterState currentState) throws Exception { // For failures, we update it no matter what int previousStage = decommissionAttributeMetadata.status().stage(); int expectedStage = decommissionStatus.stage(); - logger.info("attempting to update current decommission status [{}] with expected status [{}]", + logger.info( + "attempting to update current decommission status [{}] with expected status [{}]", decommissionAttributeMetadata.status().stage(), decommissionStatus ); diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index abe9c7703032e..67ddd722a4047 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -131,7 +131,7 @@ public ClusterState execute(ClusterState currentState) throws Exception { DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); // check if the same attribute is requested for decommission and currently not FAILED, // then return the current state as is; as we don't need a state update here - if(isSameNonFailedRequest(decommissionAttributeMetadata, decommissionAttribute)) { + if (isSameNonFailedRequest(decommissionAttributeMetadata, decommissionAttribute)) { logger.info("re-request received for decommissioning [{}], will not update state", decommissionAttribute); return currentState; } @@ -406,7 +406,8 @@ private static void ensureNoConflictingInflightRequest( private static boolean isSameNonFailedRequest( DecommissionAttributeMetadata decommissionAttributeMetadata, - DecommissionAttribute decommissionAttribute) { + DecommissionAttribute decommissionAttribute + ) { return decommissionAttributeMetadata != null && decommissionAttributeMetadata.decommissionAttribute().equals(decommissionAttribute) && !decommissionAttributeMetadata.status().equals(DecommissionStatus.FAILED); diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java index 5f38a6e71f27c..e93be1bea1282 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java @@ -41,7 +41,6 @@ import static java.util.Collections.emptySet; import static java.util.Collections.singletonMap; -import static org.mockito.Mockito.mock; import static org.opensearch.cluster.ClusterState.builder; import static org.opensearch.cluster.OpenSearchAllocationTestCase.createAllocationService; import static org.opensearch.test.ClusterServiceUtils.createClusterService; @@ -151,8 +150,13 @@ public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { @Override public void onFailure(Exception e) { assertTrue(e instanceof DecommissioningFailedException); - assertThat(e.getMessage(), Matchers.endsWith("invalid awareness attribute value requested for decommissioning. " + - "Set forced awareness values before to decommission")); + assertThat( + e.getMessage(), + Matchers.endsWith( + "invalid awareness attribute value requested for decommissioning. " + + "Set forced awareness values before to decommission" + ) + ); countDownLatch.countDown(); } }; From 0d420b127ad26dc33422bfdafa9157fd0bb76641 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Fri, 16 Sep 2022 13:19:57 +0530 Subject: [PATCH 71/87] Update logic for exclusion response Signed-off-by: Rishab Nahata --- .../decommission/DecommissionController.java | 3 +- .../decommission/DecommissionService.java | 39 ++++++++++++------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index 69644ee39a009..d984547827aa0 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -117,8 +117,9 @@ public AddVotingConfigExclusionsResponse read(StreamInput in) throws IOException * * @param listener callback for response or failure */ - public void clearVotingConfigExclusion(ActionListener listener) { + public void clearVotingConfigExclusion(ActionListener listener, boolean waitForRemoval) { final ClearVotingConfigExclusionsRequest clearVotingConfigExclusionsRequest = new ClearVotingConfigExclusionsRequest(); + clearVotingConfigExclusionsRequest.setWaitForRemoval(waitForRemoval); transportService.sendRequest( transportService.getLocalNode(), ClearVotingConfigExclusionsAction.NAME, diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 67ddd722a4047..ffe3dc69a8375 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -196,17 +196,26 @@ private synchronized void decommissionClusterManagerNodes( ActionListener exclusionListener = new ActionListener() { @Override public void onResponse(Void unused) { - if (transportService.getLocalNode().isClusterManagerNode() - && !nodeHasDecommissionedAttribute(transportService.getLocalNode(), decommissionAttribute)) { - logger.info("will attempt to fail decommissioned nodes as local node is eligible to process the request"); - // we are good here to send the response now as the request is processed by an eligible active leader - // and to-be-decommissioned cluster manager is no more part of Voting Configuration - listener.onResponse(new ClusterStateUpdateResponse(true)); - failDecommissionedNodes(clusterService.getClusterApplierService().state()); + if (clusterService.getClusterApplierService().state().nodes().isLocalNodeElectedClusterManager()) { + if (nodeHasDecommissionedAttribute(clusterService.localNode(), decommissionAttribute)) { + // this is an unexpected state, as after exclusion of nodes having decommission attribute, + // this local node shouldn't have had the decommission attribute. Will send the failure response to the user + String errorMsg = "unexpected state encountered [local node is to-be-decommissioned leader] while executing decommission request"; + logger.error(errorMsg); + // will go ahead and clear the voting config and mark the status as false + clearVotingConfigExclusionAndUpdateStatus(false, false); + // we can send the failure response to the user here + listener.onFailure(new IllegalStateException(errorMsg)); + } else { + logger.info("will attempt to fail decommissioned nodes as local node is eligible to process the request"); + // we are good here to send the response now as the request is processed by an eligible active leader + // and to-be-decommissioned cluster manager is no more part of Voting Configuration + listener.onResponse(new ClusterStateUpdateResponse(true)); + failDecommissionedNodes(clusterService.getClusterApplierService().state()); + } } else { - // explicitly calling listener.onFailure with NotClusterManagerException as we can certainly say that - // the local cluster manager node will be abdicated and soon will no longer be cluster manager. - // this will ensure that request is retried until cluster manager times out + // explicitly calling listener.onFailure with NotClusterManagerException as the local node is not the cluster manager + // this will ensures that request is retried until cluster manager times out logger.info( "local node is not eligible to process the request, " + "throwing NotClusterManagerException to attempt a retry on an eligible node" @@ -280,12 +289,12 @@ public void onResponse(DecommissionStatus status) { new ActionListener() { @Override public void onResponse(Void unused) { - clearVotingConfigExclusionAndUpdateStatus(true); + clearVotingConfigExclusionAndUpdateStatus(true, true); } @Override public void onFailure(Exception e) { - clearVotingConfigExclusionAndUpdateStatus(false); + clearVotingConfigExclusionAndUpdateStatus(false, false); } } ); @@ -302,12 +311,12 @@ public void onFailure(Exception e) { e ); // since we are not able to update the status, we will clear the voting config exclusion we have set earlier - clearVotingConfigExclusionAndUpdateStatus(false); + clearVotingConfigExclusionAndUpdateStatus(false, false); } }); } - private void clearVotingConfigExclusionAndUpdateStatus(boolean decommissionSuccessful) { + private void clearVotingConfigExclusionAndUpdateStatus(boolean decommissionSuccessful, boolean waitForRemoval) { decommissionController.clearVotingConfigExclusion(new ActionListener() { @Override public void onResponse(Void unused) { @@ -326,7 +335,7 @@ public void onFailure(Exception e) { ); decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.FAILED, statusUpdateListener()); } - }); + }, waitForRemoval); } private Set filterNodesWithDecommissionAttribute( From 84443be31ca740777ea50637fc080605386afc83 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Fri, 16 Sep 2022 13:21:41 +0530 Subject: [PATCH 72/87] Update Changelog Signed-off-by: Rishab Nahata --- CHANGELOG.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5703d8b7d3eea..697a66cd13d9a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,8 +35,6 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - [Segment Replication] Update replicas to commit SegmentInfos instead of relying on SIS files from primary shards. ([#4402](https://github.com/opensearch-project/OpenSearch/pull/4402)) - [CCR] Add getHistoryOperationsFromTranslog method to fetch the history snapshot from translogs ([#3948](https://github.com/opensearch-project/OpenSearch/pull/3948)) - Add DecommissionService and helper to execute awareness attribute decommissioning ([#4084](https://github.com/opensearch-project/OpenSearch/pull/4084)) -- [CCR] Add getHistoryOperationsFromTranslog method to fetch the history snapshot from translogs ([#3948](https://github.com/opensearch-project/OpenSearch/pull/3948)) -- Add DecommissionService and helper to execute awareness attribute decommissioning ([#4084](https://github.com/opensearch-project/OpenSearch/pull/4084)) ### Deprecated From 0bcf6b363d949b67008a9c90579552b96844ac4f Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Fri, 16 Sep 2022 13:37:03 +0530 Subject: [PATCH 73/87] Addressing minor comments Signed-off-by: Rishab Nahata --- .../cluster/decommission/DecommissionController.java | 8 ++++---- .../cluster/decommission/DecommissionService.java | 10 +++++----- .../decommission/DecommissionControllerTests.java | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index d984547827aa0..a84b7bc21f487 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -86,7 +86,7 @@ public void excludeDecommissionedNodesFromVotingConfig(Set nodes, Action Strings.EMPTY_ARRAY, nodes.toArray(String[]::new), Strings.EMPTY_ARRAY, - TimeValue.timeValueSeconds(30) + TimeValue.timeValueSeconds(120) // giving a larger timeout of 120 sec as cluster might already be in stress when decommission is triggered ), new TransportResponseHandler() { @Override @@ -196,12 +196,12 @@ public void onNewClusterState(ClusterState state) { @Override public void onClusterServiceClose() { - logger.warn("cluster service closed while waiting for removal of decommissioned nodes."); + logger.warn("cluster service closed while waiting for removal of decommissioned nodes [{}]", nodesToBeDecommissioned.toString()); } @Override public void onTimeout(TimeValue timeout) { - logger.info("timed out while waiting for removal of decommissioned nodes"); + logger.info("timed out while waiting for removal of decommissioned nodes [{}]", nodesToBeDecommissioned.toString()); nodesRemovedListener.onFailure( new OpenSearchTimeoutException( "timed out [{}] while waiting for removal of decommissioned nodes [{}] to take effect", @@ -238,7 +238,7 @@ public ClusterState execute(ClusterState currentState) throws Exception { decommissionStatus ); if (previousStage >= expectedStage) return currentState; - if (expectedStage - previousStage != 1 && !decommissionStatus.equals(DecommissionStatus.FAILED)) { + if (expectedStage - previousStage != 1 && decommissionStatus.equals(DecommissionStatus.FAILED) == false) { throw new DecommissioningFailedException( decommissionAttributeMetadata.decommissionAttribute(), "invalid previous decommission status found while updating status" diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index ffe3dc69a8375..25274783e39e0 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -371,11 +371,11 @@ private static void validateAwarenessAttribute( msg = "awareness attribute not set to the cluster."; } else if (forcedAwarenessAttributes == null) { msg = "forced awareness attribute not set to the cluster."; - } else if (!awarenessAttributes.contains(decommissionAttribute.attributeName())) { + } else if (awarenessAttributes.contains(decommissionAttribute.attributeName()) == false) { msg = "invalid awareness attribute requested for decommissioning"; - } else if (!forcedAwarenessAttributes.containsKey(decommissionAttribute.attributeName())) { + } else if (forcedAwarenessAttributes.containsKey(decommissionAttribute.attributeName()) == false) { msg = "forced awareness attribute [" + forcedAwarenessAttributes.toString() + "] doesn't have the decommissioning attribute"; - } else if (!forcedAwarenessAttributes.get(decommissionAttribute.attributeName()).contains(decommissionAttribute.attributeValue())) { + } else if (forcedAwarenessAttributes.get(decommissionAttribute.attributeName()).contains(decommissionAttribute.attributeValue()) == false) { msg = "invalid awareness attribute value requested for decommissioning. Set forced awareness values before to decommission"; } @@ -389,7 +389,7 @@ private static void ensureNoConflictingInflightRequest( DecommissionAttribute decommissionAttribute ) { String msg = null; - if (decommissionAttributeMetadata != null && !decommissionAttributeMetadata.decommissionAttribute().equals(decommissionAttribute)) { + if (decommissionAttributeMetadata != null && decommissionAttributeMetadata.decommissionAttribute().equals(decommissionAttribute) == false) { switch (decommissionAttributeMetadata.status()) { case SUCCESSFUL: // one awareness attribute is already decommissioned. We will reject the new request @@ -419,7 +419,7 @@ private static boolean isSameNonFailedRequest( ) { return decommissionAttributeMetadata != null && decommissionAttributeMetadata.decommissionAttribute().equals(decommissionAttribute) - && !decommissionAttributeMetadata.status().equals(DecommissionStatus.FAILED); + && decommissionAttributeMetadata.status().equals(DecommissionStatus.FAILED) == false; } private static void ensureNoQuorumLossDueToDecommissioning( diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java index 4b85fa39a91e1..b1e9167740355 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java @@ -163,7 +163,7 @@ public void onResponse(Void unused) { public void onFailure(Exception e) { fail("unexpected failure occurred while clearing voting config exclusion" + e); } - }); + }, false); assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); assertThat(clusterService.getClusterApplierService().state().getVotingConfigExclusions(), empty()); } From e114e85e8618c8ad7d6ec071add29564da3bd48b Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Fri, 16 Sep 2022 14:33:02 +0530 Subject: [PATCH 74/87] Update request eligibility check Signed-off-by: Rishab Nahata --- .../decommission/DecommissionService.java | 124 ++++++------------ 1 file changed, 43 insertions(+), 81 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 25274783e39e0..8ba03bdc494db 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -125,18 +125,11 @@ public void startDecommissionAction( public ClusterState execute(ClusterState currentState) throws Exception { // validates if correct awareness attributes and forced awareness attribute set to the cluster before starting action validateAwarenessAttribute(decommissionAttribute, awarenessAttributes, forcedAwarenessAttributes); - Metadata metadata = currentState.metadata(); Metadata.Builder mdBuilder = Metadata.builder(metadata); DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); - // check if the same attribute is requested for decommission and currently not FAILED, - // then return the current state as is; as we don't need a state update here - if (isSameNonFailedRequest(decommissionAttributeMetadata, decommissionAttribute)) { - logger.info("re-request received for decommissioning [{}], will not update state", decommissionAttribute); - return currentState; - } - // check the request sanity and reject the request if there's any conflicting inflight or successful request already present - ensureNoConflictingInflightRequest(decommissionAttributeMetadata, decommissionAttribute); + // check that request is eligible to proceed + ensureEligibleRequest(decommissionAttributeMetadata, decommissionAttribute); decommissionAttributeMetadata = new DecommissionAttributeMetadata(decommissionAttribute); mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); logger.info("registering decommission metadata [{}] to execute action", decommissionAttributeMetadata.toString()); @@ -160,12 +153,7 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata() .custom(DecommissionAttributeMetadata.TYPE); assert decommissionAttribute.equals(decommissionAttributeMetadata.decommissionAttribute()); - if (decommissionAttributeMetadata.status().equals(DecommissionStatus.SUCCESSFUL)) { - logger.info("status is already marked SUCCESSFUL, no need to proceed for further processing"); - listener.onResponse(new ClusterStateUpdateResponse(true)); - } else { - decommissionClusterManagerNodes(decommissionAttributeMetadata.decommissionAttribute(), listener); - } + decommissionClusterManagerNodes(decommissionAttributeMetadata.decommissionAttribute(), listener); } }); } @@ -176,23 +164,6 @@ private synchronized void decommissionClusterManagerNodes( ) { ClusterState state = clusterService.getClusterApplierService().state(); Set clusterManagerNodesToBeDecommissioned = filterNodesWithDecommissionAttribute(state, decommissionAttribute, true); - // This check doesn't seem to be needed as exclusion automatically shrinks the config before sending the response. - // We can guarantee that because of exclusion there wouldn't be a quorum loss and if the service gets a successful response, - // we are certain that the config is updated and nodes are ready to be kicked out. - // Please add comment if you feel there could be a edge case here. - // try { - // // this is a sanity check that the cluster will not go into a quorum loss state because of exclusion - // ensureNoQuorumLossDueToDecommissioning( - // decommissionAttribute, - // clusterManagerNodesToBeDecommissioned, - // state.getLastCommittedConfiguration() - // ); - // } catch (DecommissioningFailedException dfe) { - // listener.onFailure(dfe); - // decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.FAILED, statusUpdateListener()); - // return; - // } - ActionListener exclusionListener = new ActionListener() { @Override public void onResponse(Void unused) { @@ -384,61 +355,52 @@ private static void validateAwarenessAttribute( } } - private static void ensureNoConflictingInflightRequest( + private static void ensureEligibleRequest( DecommissionAttributeMetadata decommissionAttributeMetadata, - DecommissionAttribute decommissionAttribute + DecommissionAttribute requestedDecommissionAttribute ) { String msg = null; - if (decommissionAttributeMetadata != null && decommissionAttributeMetadata.decommissionAttribute().equals(decommissionAttribute) == false) { - switch (decommissionAttributeMetadata.status()) { - case SUCCESSFUL: - // one awareness attribute is already decommissioned. We will reject the new request - msg = "one awareness attribute [" - + decommissionAttributeMetadata.decommissionAttribute().toString() - + "] already successfully decommissioned, recommission before triggering another decommission"; - break; - case IN_PROGRESS: - case INIT: - // it means the decommission has been initiated or is inflight. In that case, will fail new request - msg = "there's an inflight decommission request for attribute [" - + decommissionAttributeMetadata.decommissionAttribute().toString() - + "] is in progress, cannot process this request"; - break; - case FAILED: - break; + if (decommissionAttributeMetadata != null) { + // check if the same attribute is registered and handle it accordingly + if (decommissionAttributeMetadata.decommissionAttribute().equals(requestedDecommissionAttribute)) { + switch (decommissionAttributeMetadata.status()) { + // for INIT and FAILED - we are good to process it again + case INIT: + case FAILED: + break; + case IN_PROGRESS: + case SUCCESSFUL: + msg = "same request is already in status [" + decommissionAttributeMetadata.status() + "], please wait for it to complete"; + break; + default: + throw new IllegalStateException("unknown status [" + decommissionAttributeMetadata.status() + "] currently registered in metadata"); + } + } + else { + switch (decommissionAttributeMetadata.status()) { + case SUCCESSFUL: + // one awareness attribute is already decommissioned. We will reject the new request + msg = "one awareness attribute [" + + decommissionAttributeMetadata.decommissionAttribute().toString() + + "] already successfully decommissioned, recommission before triggering another decommission"; + break; + case IN_PROGRESS: + case INIT: + // it means the decommission has been initiated or is inflight. In that case, will fail new request + msg = "there's an inflight decommission request for attribute [" + + decommissionAttributeMetadata.decommissionAttribute().toString() + + "] is in progress, cannot process this request"; + break; + case FAILED: + break; + default: + throw new IllegalStateException("unknown status [" + decommissionAttributeMetadata.status() + "] currently registered in metadata"); + } } } - if (msg != null) { - throw new DecommissioningFailedException(decommissionAttribute, msg); - } - } - private static boolean isSameNonFailedRequest( - DecommissionAttributeMetadata decommissionAttributeMetadata, - DecommissionAttribute decommissionAttribute - ) { - return decommissionAttributeMetadata != null - && decommissionAttributeMetadata.decommissionAttribute().equals(decommissionAttribute) - && decommissionAttributeMetadata.status().equals(DecommissionStatus.FAILED) == false; - } - - private static void ensureNoQuorumLossDueToDecommissioning( - DecommissionAttribute decommissionAttribute, - Set clusterManagerNodesToBeDecommissioned, - CoordinationMetadata.VotingConfiguration votingConfiguration - ) { - Set clusterManagerNodesIdToBeDecommissioned = new HashSet<>(); - clusterManagerNodesToBeDecommissioned.forEach(node -> clusterManagerNodesIdToBeDecommissioned.add(node.getId())); - if (!votingConfiguration.hasQuorum( - votingConfiguration.getNodeIds() - .stream() - .filter(n -> clusterManagerNodesIdToBeDecommissioned.contains(n) == false) - .collect(Collectors.toList()) - )) { - throw new DecommissioningFailedException( - decommissionAttribute, - "cannot proceed with decommission request as cluster might go into quorum loss" - ); + if (msg != null) { + throw new DecommissioningFailedException(requestedDecommissionAttribute, msg); } } From ad9f040f8fa38a8b332b7ea98135f9b5e1782d99 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Fri, 16 Sep 2022 15:01:01 +0530 Subject: [PATCH 75/87] Update metadata usage Signed-off-by: Rishab Nahata --- .../cluster/coordination/JoinTaskExecutor.java | 2 +- .../decommission/DecommissionController.java | 15 +++++++-------- .../decommission/DecommissionService.java | 17 ++++++++--------- .../opensearch/cluster/metadata/Metadata.java | 14 ++++++++++++++ .../coordination/JoinTaskExecutorTests.java | 13 +++---------- .../DecommissionControllerTests.java | 4 ++-- .../decommission/DecommissionServiceTests.java | 2 +- 7 files changed, 36 insertions(+), 31 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index 7410efc9ab60f..b59862706a8aa 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -477,7 +477,7 @@ public static void ensureMajorVersionBarrier(Version joiningNodeVersion, Version } public static void ensureNodeCommissioned(DiscoveryNode node, Metadata metadata) { - DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); + DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.decommissionAttributeMetadata(); if (decommissionAttributeMetadata != null) { DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); DecommissionStatus status = decommissionAttributeMetadata.status(); diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index a84b7bc21f487..4995df776bc2e 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -223,9 +223,8 @@ public void onTimeout(TimeValue timeout) { public void updateMetadataWithDecommissionStatus(DecommissionStatus decommissionStatus, ActionListener listener) { clusterService.submitStateUpdateTask(decommissionStatus.status(), new ClusterStateUpdateTask(Priority.URGENT) { @Override - public ClusterState execute(ClusterState currentState) throws Exception { - Metadata metadata = currentState.metadata(); - DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); + public ClusterState execute(ClusterState currentState) { + DecommissionAttributeMetadata decommissionAttributeMetadata = currentState.metadata().decommissionAttributeMetadata(); assert decommissionAttributeMetadata != null && decommissionAttributeMetadata.decommissionAttribute() != null; // we need to update the status only when the previous stage is just behind the expected stage // if the previous stage is already ahead of expected stage, we don't need to update the stage @@ -244,10 +243,11 @@ public ClusterState execute(ClusterState currentState) throws Exception { "invalid previous decommission status found while updating status" ); } - Metadata.Builder mdBuilder = Metadata.builder(metadata); DecommissionAttributeMetadata newMetadata = decommissionAttributeMetadata.withUpdatedStatus(decommissionStatus); - mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, newMetadata); - return ClusterState.builder(currentState).metadata(mdBuilder).build(); + return ClusterState.builder(currentState) + .metadata(Metadata.builder(currentState.metadata()) + .decommissionAttributeMetadata(newMetadata)) + .build(); } @Override @@ -257,8 +257,7 @@ public void onFailure(String source, Exception e) { @Override public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { - DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata() - .custom(DecommissionAttributeMetadata.TYPE); + DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().decommissionAttributeMetadata(); listener.onResponse(decommissionAttributeMetadata.status()); } }); diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 8ba03bdc494db..1434f52b3adbb 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -122,18 +122,18 @@ public void startDecommissionAction( // register the metadata with status as DECOMMISSION_INIT as first step clusterService.submitStateUpdateTask("decommission [" + decommissionAttribute + "]", new ClusterStateUpdateTask(Priority.URGENT) { @Override - public ClusterState execute(ClusterState currentState) throws Exception { + public ClusterState execute(ClusterState currentState) { // validates if correct awareness attributes and forced awareness attribute set to the cluster before starting action validateAwarenessAttribute(decommissionAttribute, awarenessAttributes, forcedAwarenessAttributes); - Metadata metadata = currentState.metadata(); - Metadata.Builder mdBuilder = Metadata.builder(metadata); - DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.custom(DecommissionAttributeMetadata.TYPE); + DecommissionAttributeMetadata decommissionAttributeMetadata = currentState.metadata().decommissionAttributeMetadata(); // check that request is eligible to proceed ensureEligibleRequest(decommissionAttributeMetadata, decommissionAttribute); decommissionAttributeMetadata = new DecommissionAttributeMetadata(decommissionAttribute); - mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); logger.info("registering decommission metadata [{}] to execute action", decommissionAttributeMetadata.toString()); - return ClusterState.builder(currentState).metadata(mdBuilder).build(); + return ClusterState.builder(currentState) + .metadata(Metadata.builder(currentState.metadata()) + .decommissionAttributeMetadata(decommissionAttributeMetadata)) + .build(); } @Override @@ -150,8 +150,7 @@ public void onFailure(String source, Exception e) { @Override public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { - DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata() - .custom(DecommissionAttributeMetadata.TYPE); + DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().decommissionAttributeMetadata(); assert decommissionAttribute.equals(decommissionAttributeMetadata.decommissionAttribute()); decommissionClusterManagerNodes(decommissionAttributeMetadata.decommissionAttribute(), listener); } @@ -246,7 +245,7 @@ public void onFailure(Exception e) { private synchronized void failDecommissionedNodes(ClusterState state) { // this method ensures no matter what, we always exit from this function after clearing the voting config exclusion - DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().custom(DecommissionAttributeMetadata.TYPE); + DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().decommissionAttributeMetadata(); DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.IN_PROGRESS, new ActionListener<>() { @Override diff --git a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java index 5f7e98e9e1199..8d3319f174e6d 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java @@ -51,6 +51,7 @@ import org.opensearch.cluster.block.ClusterBlock; import org.opensearch.cluster.block.ClusterBlockLevel; import org.opensearch.cluster.coordination.CoordinationMetadata; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; import org.opensearch.common.Nullable; import org.opensearch.common.Strings; import org.opensearch.common.UUIDs; @@ -795,6 +796,10 @@ public Map dataStreams() { .orElse(Collections.emptyMap()); } + public DecommissionAttributeMetadata decommissionAttributeMetadata() { + return custom(DecommissionAttributeMetadata.TYPE); + } + public ImmutableOpenMap customs() { return this.customs; } @@ -1328,6 +1333,15 @@ public IndexGraveyard indexGraveyard() { return graveyard; } + public Builder decommissionAttributeMetadata(final DecommissionAttributeMetadata decommissionAttributeMetadata) { + putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); + return this; + } + + public DecommissionAttributeMetadata decommissionAttributeMetadata() { + return (DecommissionAttributeMetadata) getCustom(DecommissionAttributeMetadata.TYPE); + } + public Builder updateSettings(Settings settings, String... indices) { if (indices == null || indices.length == 0) { indices = this.indices.keys().toArray(String.class); diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java index a0c979f972a70..ca1ad956f10f5 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java @@ -240,10 +240,7 @@ public void testPreventJoinClusterWithDecommission() { decommissionAttribute, decommissionStatus ); - Metadata.Builder metaBuilder = Metadata.builder(); - metaBuilder.putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); - Metadata metadata = metaBuilder.build(); - + Metadata metadata = Metadata.builder().decommissionAttributeMetadata(decommissionAttributeMetadata).build(); DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-1")); expectThrows(NodeDecommissionedException.class, () -> JoinTaskExecutor.ensureNodeCommissioned(discoveryNode, metadata)); } @@ -256,9 +253,7 @@ public void testJoinClusterWithDifferentDecommission() { decommissionAttribute, decommissionStatus ); - Metadata.Builder metaBuilder = Metadata.builder(); - metaBuilder.putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); - Metadata metadata = metaBuilder.build(); + Metadata metadata = Metadata.builder().decommissionAttributeMetadata(decommissionAttributeMetadata).build(); DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-2")); JoinTaskExecutor.ensureNodeCommissioned(discoveryNode, metadata); @@ -272,9 +267,7 @@ public void testJoinClusterWithDecommissionFailedOrInitOrRecommission() { decommissionAttribute, decommissionStatus ); - Metadata.Builder metaBuilder = Metadata.builder(); - metaBuilder.putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); - Metadata metadata = metaBuilder.build(); + Metadata metadata = Metadata.builder().decommissionAttributeMetadata(decommissionAttributeMetadata).build(); DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-1")); JoinTaskExecutor.ensureNodeCommissioned(discoveryNode, metadata); diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java index b1e9167740355..8b5343184dabd 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java @@ -243,7 +243,7 @@ public void testSuccessfulDecommissionStatusMetadataUpdate() throws InterruptedE ClusterState state = clusterService.state(); Metadata metadata = state.metadata(); Metadata.Builder mdBuilder = Metadata.builder(metadata); - mdBuilder.putCustom(DecommissionAttributeMetadata.TYPE, oldMetadata); + mdBuilder.decommissionAttributeMetadata(oldMetadata); state = ClusterState.builder(state).metadata(mdBuilder).build(); setState(clusterService, state); @@ -264,7 +264,7 @@ public void onFailure(Exception e) { ); assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); ClusterState newState = clusterService.getClusterApplierService().state(); - DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().custom(DecommissionAttributeMetadata.TYPE); + DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().decommissionAttributeMetadata(); assertEquals(decommissionAttributeMetadata.status(), DecommissionStatus.SUCCESSFUL); } diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java index e93be1bea1282..6fcfae559a5c4 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java @@ -176,7 +176,7 @@ public void testDecommissioningFailedWhenAnotherAttributeDecommissioningSuccessf setState( clusterService, builder.metadata( - Metadata.builder(clusterService.state().metadata()).putCustom(DecommissionAttributeMetadata.TYPE, oldMetadata).build() + Metadata.builder(clusterService.state().metadata()).decommissionAttributeMetadata(oldMetadata).build() ) ); ActionListener listener = new ActionListener() { From 7a6577001ae86b07e7844ee9f4910c4bf7b738cc Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Fri, 16 Sep 2022 15:20:32 +0530 Subject: [PATCH 76/87] Remove fromStage method and update withUpdatedStatus method in metadata Signed-off-by: Rishab Nahata --- .../DecommissionAttributeMetadata.java | 23 +++++++++++++++---- .../decommission/DecommissionController.java | 13 +---------- .../decommission/DecommissionStatus.java | 19 --------------- 3 files changed, 19 insertions(+), 36 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java index 009161ce66fc6..afb8f0a25f9fc 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java @@ -77,12 +77,25 @@ public DecommissionStatus status() { } /** - * Creates a new instance that has the given decommission attribute moved to the given @{@link DecommissionStatus} - * @param status status to be updated with - * @return new instance with updated status + * Returns new instance of the metadata with updated status + * @param newStatus status to be updated with + * @return instance with valid status + * @throws DecommissioningFailedException when unexpected status update is requested */ - public DecommissionAttributeMetadata withUpdatedStatus(DecommissionStatus status) { - return new DecommissionAttributeMetadata(decommissionAttribute(), status); + public DecommissionAttributeMetadata withUpdatedStatus(DecommissionStatus newStatus) throws DecommissioningFailedException{ + int previousStage = this.status().stage(); + int newStage = newStatus.stage(); + // we need to update the status only when the previous stage is just behind the expected stage + // if the previous stage is already ahead of expected stage, we don't need to update the stage + // For failures, we update it no matter what + if (previousStage >= newStage) return this; + if (newStage - previousStage != 1 && newStatus.equals(DecommissionStatus.FAILED) == false) { + throw new DecommissioningFailedException( + this.decommissionAttribute(), + "invalid previous decommission status [" + this.status + "] found while updating status to [" + newStatus + "]" + ); + } + return new DecommissionAttributeMetadata(decommissionAttribute(), newStatus); } @Override diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index 4995df776bc2e..64b75170624c2 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -226,23 +226,12 @@ public void updateMetadataWithDecommissionStatus(DecommissionStatus decommission public ClusterState execute(ClusterState currentState) { DecommissionAttributeMetadata decommissionAttributeMetadata = currentState.metadata().decommissionAttributeMetadata(); assert decommissionAttributeMetadata != null && decommissionAttributeMetadata.decommissionAttribute() != null; - // we need to update the status only when the previous stage is just behind the expected stage - // if the previous stage is already ahead of expected stage, we don't need to update the stage - // For failures, we update it no matter what - int previousStage = decommissionAttributeMetadata.status().stage(); - int expectedStage = decommissionStatus.stage(); logger.info( "attempting to update current decommission status [{}] with expected status [{}]", decommissionAttributeMetadata.status().stage(), decommissionStatus ); - if (previousStage >= expectedStage) return currentState; - if (expectedStage - previousStage != 1 && decommissionStatus.equals(DecommissionStatus.FAILED) == false) { - throw new DecommissioningFailedException( - decommissionAttributeMetadata.decommissionAttribute(), - "invalid previous decommission status found while updating status" - ); - } + // withUpdatedStatus can throw DecommissioningFailedException if the sequence of update is not valid DecommissionAttributeMetadata newMetadata = decommissionAttributeMetadata.withUpdatedStatus(decommissionStatus); return ClusterState.builder(currentState) .metadata(Metadata.builder(currentState.metadata()) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java index ba3dec4ded94a..4578b9988e9c1 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java @@ -74,23 +74,4 @@ public static DecommissionStatus fromString(String status) { } throw new IllegalStateException("Decommission status [" + status + "] not recognized."); } - - /** - * Generate decommission status from given stage - * - * @param stage stage in int - * @return status - */ - public static DecommissionStatus fromStage(int stage) { - if (stage == INIT.stage()) { - return INIT; - } else if (stage == IN_PROGRESS.stage()) { - return IN_PROGRESS; - } else if (stage == SUCCESSFUL.stage()) { - return SUCCESSFUL; - } else if (stage == FAILED.stage()) { - return FAILED; - } - throw new IllegalStateException("Decommission stage [" + stage + "] not recognized."); - } } From 453b1b1ac20a3a37e40def6a46ab0c9c5851a955 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Fri, 16 Sep 2022 15:32:56 +0530 Subject: [PATCH 77/87] Fix spotless check Signed-off-by: Rishab Nahata --- .../DecommissionAttributeMetadata.java | 2 +- .../decommission/DecommissionController.java | 11 ++++--- .../decommission/DecommissionService.java | 29 +++++++++++-------- .../DecommissionServiceTests.java | 4 +-- 4 files changed, 26 insertions(+), 20 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java index afb8f0a25f9fc..d8899ca429aae 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java @@ -82,7 +82,7 @@ public DecommissionStatus status() { * @return instance with valid status * @throws DecommissioningFailedException when unexpected status update is requested */ - public DecommissionAttributeMetadata withUpdatedStatus(DecommissionStatus newStatus) throws DecommissioningFailedException{ + public DecommissionAttributeMetadata withUpdatedStatus(DecommissionStatus newStatus) throws DecommissioningFailedException { int previousStage = this.status().stage(); int newStage = newStatus.stage(); // we need to update the status only when the previous stage is just behind the expected stage diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index 64b75170624c2..6b55d322f0835 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -86,7 +86,8 @@ public void excludeDecommissionedNodesFromVotingConfig(Set nodes, Action Strings.EMPTY_ARRAY, nodes.toArray(String[]::new), Strings.EMPTY_ARRAY, - TimeValue.timeValueSeconds(120) // giving a larger timeout of 120 sec as cluster might already be in stress when decommission is triggered + TimeValue.timeValueSeconds(120) // giving a larger timeout of 120 sec as cluster might already be in stress when + // decommission is triggered ), new TransportResponseHandler() { @Override @@ -196,7 +197,10 @@ public void onNewClusterState(ClusterState state) { @Override public void onClusterServiceClose() { - logger.warn("cluster service closed while waiting for removal of decommissioned nodes [{}]", nodesToBeDecommissioned.toString()); + logger.warn( + "cluster service closed while waiting for removal of decommissioned nodes [{}]", + nodesToBeDecommissioned.toString() + ); } @Override @@ -234,8 +238,7 @@ public ClusterState execute(ClusterState currentState) { // withUpdatedStatus can throw DecommissioningFailedException if the sequence of update is not valid DecommissionAttributeMetadata newMetadata = decommissionAttributeMetadata.withUpdatedStatus(decommissionStatus); return ClusterState.builder(currentState) - .metadata(Metadata.builder(currentState.metadata()) - .decommissionAttributeMetadata(newMetadata)) + .metadata(Metadata.builder(currentState.metadata()).decommissionAttributeMetadata(newMetadata)) .build(); } diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 1434f52b3adbb..dd0088c1e8022 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -16,7 +16,6 @@ import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; -import org.opensearch.cluster.coordination.CoordinationMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.allocation.AllocationService; @@ -131,8 +130,7 @@ public ClusterState execute(ClusterState currentState) { decommissionAttributeMetadata = new DecommissionAttributeMetadata(decommissionAttribute); logger.info("registering decommission metadata [{}] to execute action", decommissionAttributeMetadata.toString()); return ClusterState.builder(currentState) - .metadata(Metadata.builder(currentState.metadata()) - .decommissionAttributeMetadata(decommissionAttributeMetadata)) + .metadata(Metadata.builder(currentState.metadata()).decommissionAttributeMetadata(decommissionAttributeMetadata)) .build(); } @@ -170,7 +168,8 @@ public void onResponse(Void unused) { if (nodeHasDecommissionedAttribute(clusterService.localNode(), decommissionAttribute)) { // this is an unexpected state, as after exclusion of nodes having decommission attribute, // this local node shouldn't have had the decommission attribute. Will send the failure response to the user - String errorMsg = "unexpected state encountered [local node is to-be-decommissioned leader] while executing decommission request"; + String errorMsg = + "unexpected state encountered [local node is to-be-decommissioned leader] while executing decommission request"; logger.error(errorMsg); // will go ahead and clear the voting config and mark the status as false clearVotingConfigExclusionAndUpdateStatus(false, false); @@ -345,9 +344,10 @@ private static void validateAwarenessAttribute( msg = "invalid awareness attribute requested for decommissioning"; } else if (forcedAwarenessAttributes.containsKey(decommissionAttribute.attributeName()) == false) { msg = "forced awareness attribute [" + forcedAwarenessAttributes.toString() + "] doesn't have the decommissioning attribute"; - } else if (forcedAwarenessAttributes.get(decommissionAttribute.attributeName()).contains(decommissionAttribute.attributeValue()) == false) { - msg = "invalid awareness attribute value requested for decommissioning. Set forced awareness values before to decommission"; - } + } else if (forcedAwarenessAttributes.get(decommissionAttribute.attributeName()) + .contains(decommissionAttribute.attributeValue()) == false) { + msg = "invalid awareness attribute value requested for decommissioning. Set forced awareness values before to decommission"; + } if (msg != null) { throw new DecommissioningFailedException(decommissionAttribute, msg); @@ -369,13 +369,16 @@ private static void ensureEligibleRequest( break; case IN_PROGRESS: case SUCCESSFUL: - msg = "same request is already in status [" + decommissionAttributeMetadata.status() + "], please wait for it to complete"; + msg = "same request is already in status [" + + decommissionAttributeMetadata.status() + + "], please wait for it to complete"; break; default: - throw new IllegalStateException("unknown status [" + decommissionAttributeMetadata.status() + "] currently registered in metadata"); + throw new IllegalStateException( + "unknown status [" + decommissionAttributeMetadata.status() + "] currently registered in metadata" + ); } - } - else { + } else { switch (decommissionAttributeMetadata.status()) { case SUCCESSFUL: // one awareness attribute is already decommissioned. We will reject the new request @@ -393,7 +396,9 @@ private static void ensureEligibleRequest( case FAILED: break; default: - throw new IllegalStateException("unknown status [" + decommissionAttributeMetadata.status() + "] currently registered in metadata"); + throw new IllegalStateException( + "unknown status [" + decommissionAttributeMetadata.status() + "] currently registered in metadata" + ); } } } diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java index 6fcfae559a5c4..71ee61ffec275 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java @@ -175,9 +175,7 @@ public void testDecommissioningFailedWhenAnotherAttributeDecommissioningSuccessf final ClusterState.Builder builder = builder(clusterService.state()); setState( clusterService, - builder.metadata( - Metadata.builder(clusterService.state().metadata()).decommissionAttributeMetadata(oldMetadata).build() - ) + builder.metadata(Metadata.builder(clusterService.state().metadata()).decommissionAttributeMetadata(oldMetadata).build()) ); ActionListener listener = new ActionListener() { @Override From 1b42670bf1500c513069d9e48e113e2d027f8be6 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Fri, 16 Sep 2022 19:09:41 +0530 Subject: [PATCH 78/87] Add observer to ensure abdication Signed-off-by: Rishab Nahata --- .../DecommissionAttributeMetadata.java | 2 +- .../decommission/DecommissionService.java | 49 +++++++++++++++++-- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java index d8899ca429aae..2c7ac9e15c0f5 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java @@ -88,7 +88,7 @@ public DecommissionAttributeMetadata withUpdatedStatus(DecommissionStatus newSta // we need to update the status only when the previous stage is just behind the expected stage // if the previous stage is already ahead of expected stage, we don't need to update the stage // For failures, we update it no matter what - if (previousStage >= newStage) return this; + if (previousStage >= newStage && newStatus.equals(DecommissionStatus.FAILED) == false) return this; if (newStage - previousStage != 1 && newStatus.equals(DecommissionStatus.FAILED) == false) { throw new DecommissioningFailedException( this.decommissionAttribute(), diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index dd0088c1e8022..b06ea22c6fd0d 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -11,8 +11,10 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.OpenSearchTimeoutException; import org.opensearch.action.ActionListener; import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ClusterStateObserver; import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; @@ -212,11 +214,12 @@ public void onFailure(Exception e) { .map(DiscoveryNode::getId) .collect(Collectors.toSet()); - final Predicate allNodesRemoved = clusterState -> { + final Predicate allNodesRemovedAndAbdicated = clusterState -> { final Set votingConfigNodeIds = clusterState.getLastCommittedConfiguration().getNodeIds(); - return nodeIdsToBeExcluded.stream().noneMatch(votingConfigNodeIds::contains); + return nodeIdsToBeExcluded.stream().noneMatch(votingConfigNodeIds::contains) + && nodeIdsToBeExcluded.contains(clusterState.nodes().getClusterManagerNodeId()) == false; }; - if (allNodesRemoved.test(clusterService.getClusterApplierService().state())) { + if (allNodesRemovedAndAbdicated.test(clusterService.getClusterApplierService().state())) { exclusionListener.onResponse(null); } else { // send a transport request to exclude to-be-decommissioned cluster manager eligible nodes from voting config @@ -227,7 +230,43 @@ public void onResponse(Void unused) { "successfully removed decommissioned cluster manager eligible nodes [{}] from voting config ", clusterManagerNodesToBeDecommissioned.toString() ); - exclusionListener.onResponse(null); + final ClusterStateObserver abdicationObserver = new ClusterStateObserver( + clusterService, + TimeValue.timeValueSeconds(30L), + logger, + threadPool.getThreadContext() + ); + final ClusterStateObserver.Listener abdicationListener = new ClusterStateObserver.Listener() { + @Override + public void onNewClusterState(ClusterState state) { + logger.debug("to-be-decommissioned node is no more the active leader"); + exclusionListener.onResponse(null); + } + + @Override + public void onClusterServiceClose() { + String errorMsg = "cluster service closed while waiting for abdication of to-be-decommissioned leader"; + logger.warn(errorMsg); + listener.onFailure(new DecommissioningFailedException(decommissionAttribute, errorMsg)); + } + + @Override + public void onTimeout(TimeValue timeout) { + logger.info("timed out while waiting for abdication of to-be-decommissioned leader"); + clearVotingConfigExclusionAndUpdateStatus(false, false); + listener.onFailure(new OpenSearchTimeoutException( + "timed out [{}] while waiting for abdication of to-be-decommissioned leader", + timeout.toString() + )); + } + }; + // In case the cluster state is already processed even before this code is executed + // therefore testing first before attaching the listener + if (allNodesRemovedAndAbdicated.test(clusterService.getClusterApplierService().state())) { + abdicationListener.onNewClusterState(clusterService.getClusterApplierService().state()); + } else { + abdicationObserver.waitForNextChange(abdicationListener, allNodesRemovedAndAbdicated); + } } @Override @@ -254,7 +293,7 @@ public void onResponse(DecommissionStatus status) { decommissionController.removeDecommissionedNodes( filterNodesWithDecommissionAttribute(clusterService.getClusterApplierService().state(), decommissionAttribute, false), "nodes-decommissioned", - TimeValue.timeValueSeconds(30L), // TODO - read timeout from request while integrating with API + TimeValue.timeValueSeconds(120L), new ActionListener() { @Override public void onResponse(Void unused) { From a30fa9d37561507bddfa96a462147a1e2b954f0b Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Fri, 16 Sep 2022 19:19:32 +0530 Subject: [PATCH 79/87] Refactor node removal observer Signed-off-by: Rishab Nahata --- .../cluster/decommission/DecommissionController.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index 6b55d322f0835..2a206bf89d1e9 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -188,7 +188,7 @@ public void removeDecommissionedNodes( final ClusterStateObserver observer = new ClusterStateObserver(clusterService, timeout, logger, threadPool.getThreadContext()); - observer.waitForNextChange(new ClusterStateObserver.Listener() { + final ClusterStateObserver.Listener removalListener = new ClusterStateObserver.Listener() { @Override public void onNewClusterState(ClusterState state) { logger.info("successfully removed all decommissioned nodes [{}] from the cluster", nodesToBeDecommissioned.toString()); @@ -214,7 +214,13 @@ public void onTimeout(TimeValue timeout) { ) ); } - }, allDecommissionedNodesRemovedPredicate); + }; + + if(allDecommissionedNodesRemovedPredicate.test(clusterService.getClusterApplierService().state())) { + removalListener.onNewClusterState(clusterService.getClusterApplierService().state()); + } else { + observer.waitForNextChange(removalListener, allDecommissionedNodesRemovedPredicate); + } } /** From 32481f570ef08c7cb06df1b84d5694340564b2cc Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Fri, 16 Sep 2022 19:22:45 +0530 Subject: [PATCH 80/87] Fix spotless check Signed-off-by: Rishab Nahata --- .../cluster/decommission/DecommissionController.java | 2 +- .../cluster/decommission/DecommissionService.java | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index 2a206bf89d1e9..ee328d7937993 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -216,7 +216,7 @@ public void onTimeout(TimeValue timeout) { } }; - if(allDecommissionedNodesRemovedPredicate.test(clusterService.getClusterApplierService().state())) { + if (allDecommissionedNodesRemovedPredicate.test(clusterService.getClusterApplierService().state())) { removalListener.onNewClusterState(clusterService.getClusterApplierService().state()); } else { observer.waitForNextChange(removalListener, allDecommissionedNodesRemovedPredicate); diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index b06ea22c6fd0d..890b369bba3d6 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -254,10 +254,12 @@ public void onClusterServiceClose() { public void onTimeout(TimeValue timeout) { logger.info("timed out while waiting for abdication of to-be-decommissioned leader"); clearVotingConfigExclusionAndUpdateStatus(false, false); - listener.onFailure(new OpenSearchTimeoutException( - "timed out [{}] while waiting for abdication of to-be-decommissioned leader", - timeout.toString() - )); + listener.onFailure( + new OpenSearchTimeoutException( + "timed out [{}] while waiting for abdication of to-be-decommissioned leader", + timeout.toString() + ) + ); } }; // In case the cluster state is already processed even before this code is executed From 9f6e1deb5b6fd4ec5c7d5ce6cd282321b0966ae4 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Mon, 19 Sep 2022 09:54:46 +0530 Subject: [PATCH 81/87] Update state transistions Signed-off-by: Rishab Nahata --- .../DecommissionAttributeMetadata.java | 43 ++++++++++++------- .../decommission/DecommissionController.java | 8 ++-- .../decommission/DecommissionStatus.java | 19 +++----- 3 files changed, 37 insertions(+), 33 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java index 2c7ac9e15c0f5..d7f986d46fa64 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java @@ -35,7 +35,7 @@ public class DecommissionAttributeMetadata extends AbstractNamedDiffable public static final String TYPE = "decommissionedAttribute"; private final DecommissionAttribute decommissionAttribute; - private final DecommissionStatus status; + private DecommissionStatus status; public static final String attributeType = "awareness"; /** @@ -76,26 +76,39 @@ public DecommissionStatus status() { return this.status; } + /** - * Returns new instance of the metadata with updated status + * Returns instance of the metadata with updated status * @param newStatus status to be updated with * @return instance with valid status - * @throws DecommissioningFailedException when unexpected status update is requested */ - public DecommissionAttributeMetadata withUpdatedStatus(DecommissionStatus newStatus) throws DecommissioningFailedException { - int previousStage = this.status().stage(); - int newStage = newStatus.stage(); - // we need to update the status only when the previous stage is just behind the expected stage - // if the previous stage is already ahead of expected stage, we don't need to update the stage - // For failures, we update it no matter what - if (previousStage >= newStage && newStatus.equals(DecommissionStatus.FAILED) == false) return this; - if (newStage - previousStage != 1 && newStatus.equals(DecommissionStatus.FAILED) == false) { - throw new DecommissioningFailedException( - this.decommissionAttribute(), - "invalid previous decommission status [" + this.status + "] found while updating status to [" + newStatus + "]" + // synchronized is strictly speaking not needed (this is called by a single thread), but just to be safe + public synchronized DecommissionAttributeMetadata setUpdatedStatus(DecommissionStatus newStatus) { + // We don't expect that INIT will be new status, as it is registered only when starting the decommission action + switch(newStatus) { + case IN_PROGRESS: + validateAndSetStatus(DecommissionStatus.INIT, newStatus); + break; + case SUCCESSFUL: + validateAndSetStatus(DecommissionStatus.IN_PROGRESS, newStatus); + break; + case FAILED: + // we don't need to validate here and directly update status to FAILED + this.status = newStatus; + default: + throw new IllegalArgumentException("illegal decommission status [" + newStatus.status() + "] requested for updating metadata"); + } + return this; + } + + protected void validateAndSetStatus(DecommissionStatus expected, DecommissionStatus next) { + if (status.equals(expected) == false){ + assert false : "can't move decommission status to [" + next + "]. current status: [" + status + "] (expected [" + expected + "])"; + throw new IllegalStateException( + "can't move decommission status to [" + next + "]. current status: [" + status + "] (expected [" + expected + "])" ); } - return new DecommissionAttributeMetadata(decommissionAttribute(), newStatus); + status = next; } @Override diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index ee328d7937993..1352b5051789e 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -238,13 +238,13 @@ public ClusterState execute(ClusterState currentState) { assert decommissionAttributeMetadata != null && decommissionAttributeMetadata.decommissionAttribute() != null; logger.info( "attempting to update current decommission status [{}] with expected status [{}]", - decommissionAttributeMetadata.status().stage(), + decommissionAttributeMetadata.status(), decommissionStatus ); - // withUpdatedStatus can throw DecommissioningFailedException if the sequence of update is not valid - DecommissionAttributeMetadata newMetadata = decommissionAttributeMetadata.withUpdatedStatus(decommissionStatus); + // setUpdatedStatus can throw IllegalStateException if the sequence of update is not valid + decommissionAttributeMetadata.setUpdatedStatus(decommissionStatus); return ClusterState.builder(currentState) - .metadata(Metadata.builder(currentState.metadata()).decommissionAttributeMetadata(newMetadata)) + .metadata(Metadata.builder(currentState.metadata()).decommissionAttributeMetadata(decommissionAttributeMetadata)) .build(); } diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java index 4578b9988e9c1..af88b0d0f5902 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java @@ -15,26 +15,24 @@ public enum DecommissionStatus { /** * Decommission process is initiated, and to-be-decommissioned leader is excluded from voting config */ - INIT("init", 0), + INIT("init"), /** * Decommission process has started, decommissioned nodes should be removed */ - IN_PROGRESS("in_progress", 1), + IN_PROGRESS("in_progress"), /** * Decommission action completed */ - SUCCESSFUL("successful", 2), + SUCCESSFUL("successful"), /** * Decommission request failed */ - FAILED("failed", -1); + FAILED("failed"); private final String status; - private final int stage; - DecommissionStatus(String status, int stage) { + DecommissionStatus(String status) { this.status = status; - this.stage = stage; } /** @@ -46,13 +44,6 @@ public String status() { return status; } - /** - * Returns stage that represents the decommission stage - */ - public int stage() { - return stage; - } - /** * Generate decommission status from given string * From 32de49250092182c9921caf4741b2962f3b1fd17 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Mon, 19 Sep 2022 10:26:38 +0530 Subject: [PATCH 82/87] Small fixes Signed-off-by: Rishab Nahata --- .../cluster/decommission/DecommissionAttributeMetadata.java | 1 + .../opensearch/cluster/decommission/DecommissionService.java | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java index d7f986d46fa64..f70fc12f43225 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java @@ -95,6 +95,7 @@ public synchronized DecommissionAttributeMetadata setUpdatedStatus(DecommissionS case FAILED: // we don't need to validate here and directly update status to FAILED this.status = newStatus; + break; default: throw new IllegalArgumentException("illegal decommission status [" + newStatus.status() + "] requested for updating metadata"); } diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 890b369bba3d6..7ed2c0c054e38 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -412,7 +412,7 @@ private static void ensureEligibleRequest( case SUCCESSFUL: msg = "same request is already in status [" + decommissionAttributeMetadata.status() - + "], please wait for it to complete"; + + "]"; break; default: throw new IllegalStateException( From 779bdf8c7ccfc65758c3c75d0ad738853c7e5268 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Mon, 19 Sep 2022 11:06:59 +0530 Subject: [PATCH 83/87] Fixes Signed-off-by: Rishab Nahata --- .../cluster/decommission/DecommissionController.java | 6 +++++- .../cluster/decommission/DecommissionService.java | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java index 1352b5051789e..20aa77bf592c6 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -159,7 +159,7 @@ public ClearVotingConfigExclusionsResponse read(StreamInput in) throws IOExcepti * @param timeout timeout for the request * @param nodesRemovedListener callback for the success or failure */ - public void removeDecommissionedNodes( + public synchronized void removeDecommissionedNodes( Set nodesToBeDecommissioned, String reason, TimeValue timeout, @@ -241,6 +241,10 @@ public ClusterState execute(ClusterState currentState) { decommissionAttributeMetadata.status(), decommissionStatus ); + // if the same state is already registered, we will return the current state as is without making any change + if (decommissionAttributeMetadata.status().equals(decommissionStatus)) { + return currentState; + } // setUpdatedStatus can throw IllegalStateException if the sequence of update is not valid decommissionAttributeMetadata.setUpdatedStatus(decommissionStatus); return ClusterState.builder(currentState) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index 7ed2c0c054e38..e30dc926c9725 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -283,7 +283,7 @@ public void onFailure(Exception e) { } } - private synchronized void failDecommissionedNodes(ClusterState state) { + private void failDecommissionedNodes(ClusterState state) { // this method ensures no matter what, we always exit from this function after clearing the voting config exclusion DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().decommissionAttributeMetadata(); DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); From 0d81ae20831df7e9e00aaf63d2ecc6c7de0e9e67 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Mon, 19 Sep 2022 11:13:15 +0530 Subject: [PATCH 84/87] Fix spotless check Signed-off-by: Rishab Nahata --- .../DecommissionAttributeMetadata.java | 17 ++++++++++++----- .../decommission/DecommissionService.java | 4 +--- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java index f70fc12f43225..87c1e7e4e7bd0 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java @@ -76,7 +76,6 @@ public DecommissionStatus status() { return this.status; } - /** * Returns instance of the metadata with updated status * @param newStatus status to be updated with @@ -85,7 +84,7 @@ public DecommissionStatus status() { // synchronized is strictly speaking not needed (this is called by a single thread), but just to be safe public synchronized DecommissionAttributeMetadata setUpdatedStatus(DecommissionStatus newStatus) { // We don't expect that INIT will be new status, as it is registered only when starting the decommission action - switch(newStatus) { + switch (newStatus) { case IN_PROGRESS: validateAndSetStatus(DecommissionStatus.INIT, newStatus); break; @@ -97,14 +96,22 @@ public synchronized DecommissionAttributeMetadata setUpdatedStatus(DecommissionS this.status = newStatus; break; default: - throw new IllegalArgumentException("illegal decommission status [" + newStatus.status() + "] requested for updating metadata"); + throw new IllegalArgumentException( + "illegal decommission status [" + newStatus.status() + "] requested for updating metadata" + ); } return this; } protected void validateAndSetStatus(DecommissionStatus expected, DecommissionStatus next) { - if (status.equals(expected) == false){ - assert false : "can't move decommission status to [" + next + "]. current status: [" + status + "] (expected [" + expected + "])"; + if (status.equals(expected) == false) { + assert false : "can't move decommission status to [" + + next + + "]. current status: [" + + status + + "] (expected [" + + expected + + "])"; throw new IllegalStateException( "can't move decommission status to [" + next + "]. current status: [" + status + "] (expected [" + expected + "])" ); diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java index e30dc926c9725..c93555433a668 100644 --- a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -410,9 +410,7 @@ private static void ensureEligibleRequest( break; case IN_PROGRESS: case SUCCESSFUL: - msg = "same request is already in status [" - + decommissionAttributeMetadata.status() - + "]"; + msg = "same request is already in status [" + decommissionAttributeMetadata.status() + "]"; break; default: throw new IllegalStateException( From b8bd931e4511e6214c1fb1affc49b87974d18738 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Wed, 31 Aug 2022 14:28:16 +0530 Subject: [PATCH 85/87] Control peer discovery during decommission --- .../cluster/coordination/Coordinator.java | 28 ++++++++++--------- .../cluster/coordination/JoinHelper.java | 21 +++++++++++++- .../common/settings/ClusterSettings.java | 1 + .../org/opensearch/discovery/PeerFinder.java | 22 ++++++++++++++- .../cluster/coordination/JoinHelperTests.java | 12 ++++++-- 5 files changed, 66 insertions(+), 18 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index 1c7e7cd0419e2..9f455142a2aef 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -208,19 +208,6 @@ public Coordinator( this.onJoinValidators = JoinTaskExecutor.addBuiltInJoinValidators(onJoinValidators); this.singleNodeDiscovery = DiscoveryModule.isSingleNodeDiscovery(settings); this.electionStrategy = electionStrategy; - this.joinHelper = new JoinHelper( - settings, - allocationService, - clusterManagerService, - transportService, - this::getCurrentTerm, - this::getStateForClusterManagerService, - this::handleJoinRequest, - this::joinLeaderInTerm, - this.onJoinValidators, - rerouteService, - nodeHealthService - ); this.persistedStateSupplier = persistedStateSupplier; this.noClusterManagerBlockService = new NoClusterManagerBlockService(settings, clusterSettings); this.lastKnownLeader = Optional.empty(); @@ -244,6 +231,21 @@ public Coordinator( new HandshakingTransportAddressConnector(settings, transportService), configuredHostsResolver ); + this.joinHelper = new JoinHelper( + settings, + allocationService, + clusterManagerService, + transportService, + this::getCurrentTerm, + this::getStateForClusterManagerService, + this::handleJoinRequest, + this::joinLeaderInTerm, + this.onJoinValidators, + rerouteService, + nodeHealthService, + this.peerFinder::onDecommission, + this.peerFinder::onRecommission + ); this.publicationHandler = new PublicationTransportHandler( transportService, namedWriteableRegistry, diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java index 656e6d220720f..4442ddb3bc3c5 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java @@ -42,6 +42,7 @@ import org.opensearch.cluster.ClusterStateTaskListener; import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.coordination.Coordinator.Mode; +import org.opensearch.cluster.decommission.NodeDecommissionedException; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.RerouteService; @@ -57,6 +58,7 @@ import org.opensearch.monitor.StatusInfo; import org.opensearch.threadpool.ThreadPool; import org.opensearch.threadpool.ThreadPool.Names; +import org.opensearch.transport.RemoteTransportException; import org.opensearch.transport.TransportChannel; import org.opensearch.transport.TransportException; import org.opensearch.transport.TransportRequest; @@ -113,6 +115,10 @@ public class JoinHelper { private final TimeValue joinTimeout; // only used for Zen1 joining private final NodeHealthService nodeHealthService; + public boolean isDecommissioned; + private Runnable onDecommission; + private Runnable onRecommission; + private final Set> pendingOutgoingJoins = Collections.synchronizedSet(new HashSet<>()); private final AtomicReference lastFailedJoinAttempt = new AtomicReference<>(); @@ -130,7 +136,9 @@ public class JoinHelper { Function joinLeaderInTerm, Collection> joinValidators, RerouteService rerouteService, - NodeHealthService nodeHealthService + NodeHealthService nodeHealthService, + Runnable onDecommission, + Runnable onRecommission ) { this.clusterManagerService = clusterManagerService; this.transportService = transportService; @@ -343,11 +351,22 @@ public void handleResponse(Empty response) { logger.debug("successfully joined {} with {}", destination, joinRequest); lastFailedJoinAttempt.set(null); onCompletion.run(); + if (isDecommissioned) { + isDecommissioned = false; + onRecommission.run(); + } } @Override public void handleException(TransportException exp) { pendingOutgoingJoins.remove(dedupKey); + if (exp instanceof RemoteTransportException && (exp.getCause() instanceof NodeDecommissionedException)) { + logger.info("local node is decommissioned. Will not be able to join the cluster"); + if (!isDecommissioned) { + isDecommissioned = true; + onDecommission.run(); + } + } logger.info(() -> new ParameterizedMessage("failed to join {} with {}", destination, joinRequest), exp); FailedJoinAttempt attempt = new FailedJoinAttempt(destination, joinRequest, exp); attempt.logNow(); diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 971fb518ff1da..826500ddcf48d 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -533,6 +533,7 @@ public void apply(Settings value, Settings current, Settings previous) { PersistentTasksClusterService.CLUSTER_TASKS_ALLOCATION_RECHECK_INTERVAL_SETTING, EnableAssignmentDecider.CLUSTER_TASKS_ALLOCATION_ENABLE_SETTING, PeerFinder.DISCOVERY_FIND_PEERS_INTERVAL_SETTING, + PeerFinder.DISCOVERY_FIND_PEERS_INTERVAL_DURING_DECOMMISSION_SETTING, PeerFinder.DISCOVERY_REQUEST_PEERS_TIMEOUT_SETTING, ClusterFormationFailureHelper.DISCOVERY_CLUSTER_FORMATION_WARNING_TIMEOUT_SETTING, ElectionSchedulerFactory.ELECTION_INITIAL_TIMEOUT_SETTING, diff --git a/server/src/main/java/org/opensearch/discovery/PeerFinder.java b/server/src/main/java/org/opensearch/discovery/PeerFinder.java index a601a6fbe4d82..67727459fdcde 100644 --- a/server/src/main/java/org/opensearch/discovery/PeerFinder.java +++ b/server/src/main/java/org/opensearch/discovery/PeerFinder.java @@ -84,6 +84,14 @@ public abstract class PeerFinder { Setting.Property.NodeScope ); + // the time between attempts to find all peers when node is in decommissioned state, default set to 2 minutes + public static final Setting DISCOVERY_FIND_PEERS_INTERVAL_DURING_DECOMMISSION_SETTING = Setting.timeSetting( + "discovery.find_peers_interval_during_decommission", + TimeValue.timeValueMinutes(2L), + TimeValue.timeValueMillis(1000), + Setting.Property.NodeScope + ); + public static final Setting DISCOVERY_REQUEST_PEERS_TIMEOUT_SETTING = Setting.timeSetting( "discovery.request_peers_timeout", TimeValue.timeValueMillis(3000), @@ -91,7 +99,8 @@ public abstract class PeerFinder { Setting.Property.NodeScope ); - private final TimeValue findPeersInterval; + private final Settings settings; + private TimeValue findPeersInterval; private final TimeValue requestPeersTimeout; private final Object mutex = new Object(); @@ -112,6 +121,7 @@ public PeerFinder( TransportAddressConnector transportAddressConnector, ConfiguredHostsResolver configuredHostsResolver ) { + this.settings = settings; findPeersInterval = DISCOVERY_FIND_PEERS_INTERVAL_SETTING.get(settings); requestPeersTimeout = DISCOVERY_REQUEST_PEERS_TIMEOUT_SETTING.get(settings); this.transportService = transportService; @@ -156,6 +166,16 @@ public void deactivate(DiscoveryNode leader) { } } + public void onDecommission() { + findPeersInterval = DISCOVERY_FIND_PEERS_INTERVAL_DURING_DECOMMISSION_SETTING.get(settings); + logger.info("setting findPeersInterval to [{}], due to decommissioning", findPeersInterval); + } + + public void onRecommission() { + findPeersInterval = DISCOVERY_FIND_PEERS_INTERVAL_SETTING.get(settings); + logger.info("setting findPeersInterval to [{}], due to recommissioning", findPeersInterval); + } + // exposed to subclasses for testing protected final boolean holdsLock() { return Thread.holdsLock(mutex); diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinHelperTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinHelperTests.java index a3c945cdbac3a..a43af882975b8 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinHelperTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinHelperTests.java @@ -90,7 +90,9 @@ public void testJoinDeduplication() { startJoinRequest -> { throw new AssertionError(); }, Collections.emptyList(), (s, p, r) -> {}, - () -> new StatusInfo(HEALTHY, "info") + () -> new StatusInfo(HEALTHY, "info"), + ()-> {}, + () -> {} ); transportService.start(); @@ -230,7 +232,9 @@ private void assertJoinValidationRejectsMismatchedClusterUUID(String actionName, startJoinRequest -> { throw new AssertionError(); }, Collections.emptyList(), (s, p, r) -> {}, - null + null, + () -> {}, + () -> {} ); // registers request handler transportService.start(); transportService.acceptIncomingRequests(); @@ -284,7 +288,9 @@ public void testJoinFailureOnUnhealthyNodes() { startJoinRequest -> { throw new AssertionError(); }, Collections.emptyList(), (s, p, r) -> {}, - () -> nodeHealthServiceStatus.get() + () -> nodeHealthServiceStatus.get(), + () -> {}, + () -> {} ); transportService.start(); From 828167f6ac02934c6fa3fdf1edb720fcd41fa990 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Wed, 31 Aug 2022 17:32:09 +0530 Subject: [PATCH 86/87] Change runnable to action listener --- .../cluster/coordination/Coordinator.java | 3 +-- .../cluster/coordination/JoinHelper.java | 11 ++++---- .../org/opensearch/discovery/PeerFinder.java | 26 ++++++++++++------- .../cluster/coordination/JoinHelperTests.java | 11 ++++---- 4 files changed, 27 insertions(+), 24 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index 9f455142a2aef..cb99f02bbf03f 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -243,8 +243,7 @@ public Coordinator( this.onJoinValidators, rerouteService, nodeHealthService, - this.peerFinder::onDecommission, - this.peerFinder::onRecommission + peerFinder.nodeCommissionedListener() ); this.publicationHandler = new PublicationTransportHandler( transportService, diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java index 4442ddb3bc3c5..9e30b9a3f7ee3 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java @@ -116,8 +116,7 @@ public class JoinHelper { private final NodeHealthService nodeHealthService; public boolean isDecommissioned; - private Runnable onDecommission; - private Runnable onRecommission; + private final ActionListener nodeCommissionedListener; private final Set> pendingOutgoingJoins = Collections.synchronizedSet(new HashSet<>()); @@ -137,13 +136,13 @@ public class JoinHelper { Collection> joinValidators, RerouteService rerouteService, NodeHealthService nodeHealthService, - Runnable onDecommission, - Runnable onRecommission + ActionListener nodeCommissionedListener ) { this.clusterManagerService = clusterManagerService; this.transportService = transportService; this.nodeHealthService = nodeHealthService; this.joinTimeout = JOIN_TIMEOUT_SETTING.get(settings); + this.nodeCommissionedListener = nodeCommissionedListener; this.joinTaskExecutorGenerator = () -> new JoinTaskExecutor(settings, allocationService, logger, rerouteService, transportService) { private final long term = currentTermSupplier.getAsLong(); @@ -353,7 +352,7 @@ public void handleResponse(Empty response) { onCompletion.run(); if (isDecommissioned) { isDecommissioned = false; - onRecommission.run(); + nodeCommissionedListener.onResponse(null); } } @@ -364,7 +363,7 @@ public void handleException(TransportException exp) { logger.info("local node is decommissioned. Will not be able to join the cluster"); if (!isDecommissioned) { isDecommissioned = true; - onDecommission.run(); + nodeCommissionedListener.onFailure(exp); } } logger.info(() -> new ParameterizedMessage("failed to join {} with {}", destination, joinRequest), exp); diff --git a/server/src/main/java/org/opensearch/discovery/PeerFinder.java b/server/src/main/java/org/opensearch/discovery/PeerFinder.java index 67727459fdcde..5f89e681f5526 100644 --- a/server/src/main/java/org/opensearch/discovery/PeerFinder.java +++ b/server/src/main/java/org/opensearch/discovery/PeerFinder.java @@ -138,6 +138,22 @@ public PeerFinder( ); } + public ActionListener nodeCommissionedListener() { + return new ActionListener() { + @Override + public void onResponse(Void unused) { + logger.info("setting findPeersInterval to [{}], due to recommissioning", findPeersInterval); + findPeersInterval = DISCOVERY_FIND_PEERS_INTERVAL_SETTING.get(settings); + } + + @Override + public void onFailure(Exception e) { + logger.info("setting findPeersInterval to [{}], due to decommissioning", findPeersInterval); + findPeersInterval = DISCOVERY_FIND_PEERS_INTERVAL_DURING_DECOMMISSION_SETTING.get(settings); + } + }; + } + public void activate(final DiscoveryNodes lastAcceptedNodes) { logger.trace("activating with {}", lastAcceptedNodes); @@ -166,16 +182,6 @@ public void deactivate(DiscoveryNode leader) { } } - public void onDecommission() { - findPeersInterval = DISCOVERY_FIND_PEERS_INTERVAL_DURING_DECOMMISSION_SETTING.get(settings); - logger.info("setting findPeersInterval to [{}], due to decommissioning", findPeersInterval); - } - - public void onRecommission() { - findPeersInterval = DISCOVERY_FIND_PEERS_INTERVAL_SETTING.get(settings); - logger.info("setting findPeersInterval to [{}], due to recommissioning", findPeersInterval); - } - // exposed to subclasses for testing protected final boolean holdsLock() { return Thread.holdsLock(mutex); diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinHelperTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinHelperTests.java index a43af882975b8..50e18f25aad5b 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinHelperTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinHelperTests.java @@ -33,6 +33,7 @@ import org.apache.logging.log4j.Level; import org.opensearch.Version; +import org.opensearch.action.ActionListener; import org.opensearch.action.ActionListenerResponseHandler; import org.opensearch.action.support.PlainActionFuture; import org.opensearch.cluster.ClusterName; @@ -55,6 +56,7 @@ import java.util.Optional; import java.util.concurrent.atomic.AtomicReference; +import static org.mockito.Mockito.mock; import static org.opensearch.monitor.StatusInfo.Status.HEALTHY; import static org.opensearch.monitor.StatusInfo.Status.UNHEALTHY; import static org.opensearch.node.Node.NODE_NAME_SETTING; @@ -91,8 +93,7 @@ public void testJoinDeduplication() { Collections.emptyList(), (s, p, r) -> {}, () -> new StatusInfo(HEALTHY, "info"), - ()-> {}, - () -> {} + mock(ActionListener.class) ); transportService.start(); @@ -233,8 +234,7 @@ private void assertJoinValidationRejectsMismatchedClusterUUID(String actionName, Collections.emptyList(), (s, p, r) -> {}, null, - () -> {}, - () -> {} + mock(ActionListener.class) ); // registers request handler transportService.start(); transportService.acceptIncomingRequests(); @@ -289,8 +289,7 @@ public void testJoinFailureOnUnhealthyNodes() { Collections.emptyList(), (s, p, r) -> {}, () -> nodeHealthServiceStatus.get(), - () -> {}, - () -> {} + mock(ActionListener.class) ); transportService.start(); From e73f4f792a14ca8720a2afa2c34b444677b4b02b Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Thu, 1 Sep 2022 14:31:45 +0530 Subject: [PATCH 87/87] skip prevoting for decommissioned nodes irrespective of cluster state --- .../cluster/coordination/Coordinator.java | 5 +++++ .../java/org/opensearch/discovery/PeerFinder.java | 13 ++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index cb99f02bbf03f..dd928dd911304 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -1439,6 +1439,11 @@ private void startElectionScheduler() { public void run() { synchronized (mutex) { if (mode == Mode.CANDIDATE) { + if(peerFinder.localNodeDecommissioned()) { + logger.debug("skip prevoting as local node is decommissioned"); + return; + } + final ClusterState lastAcceptedState = coordinationState.get().getLastAcceptedState(); if (localNodeMayWinElection(lastAcceptedState) == false) { diff --git a/server/src/main/java/org/opensearch/discovery/PeerFinder.java b/server/src/main/java/org/opensearch/discovery/PeerFinder.java index 5f89e681f5526..0cd7169ff191e 100644 --- a/server/src/main/java/org/opensearch/discovery/PeerFinder.java +++ b/server/src/main/java/org/opensearch/discovery/PeerFinder.java @@ -110,6 +110,7 @@ public abstract class PeerFinder { private volatile long currentTerm; private boolean active; + private boolean localNodeDecommissioned = false; private DiscoveryNodes lastAcceptedNodes; private final Map peersByAddress = new LinkedHashMap<>(); private Optional leader = Optional.empty(); @@ -143,17 +144,27 @@ public ActionListener nodeCommissionedListener() { @Override public void onResponse(Void unused) { logger.info("setting findPeersInterval to [{}], due to recommissioning", findPeersInterval); + assert localNodeDecommissioned; // TODO: Do we need this? + localNodeDecommissioned = false; findPeersInterval = DISCOVERY_FIND_PEERS_INTERVAL_SETTING.get(settings); + } @Override public void onFailure(Exception e) { - logger.info("setting findPeersInterval to [{}], due to decommissioning", findPeersInterval); + logger.info("setting findPeersInterval to [{}], due to decommissioning", + DISCOVERY_FIND_PEERS_INTERVAL_DURING_DECOMMISSION_SETTING.get(settings)); + assert !localNodeDecommissioned; + localNodeDecommissioned = true; findPeersInterval = DISCOVERY_FIND_PEERS_INTERVAL_DURING_DECOMMISSION_SETTING.get(settings); } }; } + public boolean localNodeDecommissioned() { + return localNodeDecommissioned; + } + public void activate(final DiscoveryNodes lastAcceptedNodes) { logger.trace("activating with {}", lastAcceptedNodes);