Skip to content

Commit

Permalink
Make viaNode more deterministic for AwarenessAttributeDecommissionIT …
Browse files Browse the repository at this point in the history
…tests (#5262)

* Make viaNode more deterministic fixing flaky AwarenessAttributeDecommissionIT tests 

Signed-off-by: Rishab Nahata <rnnahata@amazon.com>
  • Loading branch information
imRishN authored Nov 17, 2022
1 parent 9d4aac2 commit 059b614
Showing 1 changed file with 36 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,6 @@ public void testInvariantsAndLogsOnDecommissionedNodes() throws Exception {

logger.info("--> starting decommissioning nodes in zone {}", 'a');
DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "a");
String activeNode = getNonDecommissionedNode(internalCluster().clusterService().state(), "a");
DecommissionRequest decommissionRequest = new DecommissionRequest(decommissionAttribute);
decommissionRequest.setNoDelay(true);
DecommissionResponse decommissionResponse = client().execute(DecommissionAction.INSTANCE, decommissionRequest).get();
Expand All @@ -239,6 +238,7 @@ public void testInvariantsAndLogsOnDecommissionedNodes() throws Exception {
client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).get();

String decommissionedNode = randomFrom(clusterManagerNodes.get(0), dataNodes.get(0));
String activeNode = dataNodes.get(1);

ClusterService decommissionedNodeClusterService = internalCluster().getInstance(ClusterService.class, decommissionedNode);
DecommissionAttributeMetadata metadata = decommissionedNodeClusterService.state()
Expand Down Expand Up @@ -278,7 +278,7 @@ public boolean innerMatch(LogEvent event) {
);
TransportService clusterManagerTransportService = internalCluster().getInstance(
TransportService.class,
internalCluster().getClusterManagerName()
internalCluster().getClusterManagerName(activeNode)
);
MockTransportService decommissionedNodeTransportService = (MockTransportService) internalCluster().getInstance(
TransportService.class,
Expand Down Expand Up @@ -385,17 +385,27 @@ private void assertNodesRemovedAfterZoneDecommission(boolean originalClusterMana
clusterManagerNameToZone.put(clusterManagerNodes.get(2), "c");

logger.info("--> starting 4 data nodes each on zones 'a' & 'b' & 'c'");
List<String> nodes_in_zone_a = internalCluster().startDataOnlyNodes(
dataNodeCountPerAZ,
Settings.builder().put(commonSettings).put("node.attr.zone", "a").build()
Map<String, List<String>> zoneToNodesMap = new HashMap<>();
zoneToNodesMap.put(
"a",
internalCluster().startDataOnlyNodes(
dataNodeCountPerAZ,
Settings.builder().put(commonSettings).put("node.attr.zone", "a").build()
)
);
List<String> nodes_in_zone_b = internalCluster().startDataOnlyNodes(
dataNodeCountPerAZ,
Settings.builder().put(commonSettings).put("node.attr.zone", "b").build()
zoneToNodesMap.put(
"b",
internalCluster().startDataOnlyNodes(
dataNodeCountPerAZ,
Settings.builder().put(commonSettings).put("node.attr.zone", "b").build()
)
);
List<String> nodes_in_zone_c = internalCluster().startDataOnlyNodes(
dataNodeCountPerAZ,
Settings.builder().put(commonSettings).put("node.attr.zone", "c").build()
zoneToNodesMap.put(
"c",
internalCluster().startDataOnlyNodes(
dataNodeCountPerAZ,
Settings.builder().put(commonSettings).put("node.attr.zone", "c").build()
)
);
ensureStableCluster(15);
ClusterHealthResponse health = client().admin()
Expand All @@ -420,7 +430,20 @@ private void assertNodesRemovedAfterZoneDecommission(boolean originalClusterMana
tempZones.remove(originalClusterManagerZone);
zoneToDecommission = randomFrom(tempZones);
}
String activeNode = getNonDecommissionedNode(internalCluster().clusterService().state(), zoneToDecommission);
String activeNode;
switch (zoneToDecommission) {
case "a":
activeNode = randomFrom(randomFrom(zoneToNodesMap.get("b")), randomFrom(zoneToNodesMap.get("c")));
break;
case "b":
activeNode = randomFrom(randomFrom(zoneToNodesMap.get("a")), randomFrom(zoneToNodesMap.get("c")));
break;
case "c":
activeNode = randomFrom(randomFrom(zoneToNodesMap.get("a")), randomFrom(zoneToNodesMap.get("b")));
break;
default:
throw new IllegalStateException("unexpected zone decommissioned");
}

logger.info("--> setting shard routing weights for weighted round robin");
Map<String, Double> weights = new HashMap<>(Map.of("a", 1.0, "b", 1.0, "c", 1.0));
Expand Down Expand Up @@ -631,8 +654,8 @@ public void testDecommissionStatusUpdatePublishedToAllNodes() throws ExecutionEx
assertTrue(weightedRoutingResponse.isAcknowledged());

logger.info("--> starting decommissioning nodes in zone {}", 'c');
String activeNode = randomFrom(dataNodes.get(0), dataNodes.get(1));
DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "c");
String activeNode = getNonDecommissionedNode(internalCluster().clusterService().state(), "c");
// Set the timeout to 0 to do immediate Decommission
DecommissionRequest decommissionRequest = new DecommissionRequest(decommissionAttribute);
decommissionRequest.setNoDelay(true);
Expand Down Expand Up @@ -860,16 +883,6 @@ public void testDecommissionFailedWithOnlyOneAttributeValue() throws Exception {
ensureStableCluster(6, TimeValue.timeValueMinutes(2));
}

private String getNonDecommissionedNode(ClusterState clusterState, String decommissionedZone) {
List<String> allNodes = new ArrayList<>();
for (DiscoveryNode node : clusterState.nodes()) {
if (node.getAttributes().get("zone").equals(decommissionedZone) == false) {
allNodes.add(node.getName());
}
}
return randomFrom(allNodes);
}

private static class WaitForFailedDecommissionState implements ClusterStateObserver.Listener {

final CountDownLatch doneLatch;
Expand Down

0 comments on commit 059b614

Please sign in to comment.