From 64e4b2f5712f5026413546893efa4814db99b57b Mon Sep 17 00:00:00 2001 From: Himanshu Kandwal Date: Mon, 8 Jul 2024 15:33:56 -0700 Subject: [PATCH 1/6] [apache/helix] -- Added detail in the Exception message for WAGED rebalance (hard constraint) failures. --- .../constraints/ConstraintBasedAlgorithm.java | 16 ++++------ .../constraints/FaultZoneAwareConstraint.java | 12 ++++---- .../waged/constraints/HardConstraint.java | 30 ++++++++++++++++++- .../constraints/NodeCapacityConstraint.java | 11 +++---- .../NodeMaxPartitionLimitConstraint.java | 15 +++++----- .../ReplicaActivateConstraint.java | 8 ++--- .../SamePartitionOnInstanceConstraint.java | 9 +++--- .../constraints/ValidGroupTagConstraint.java | 10 +++---- .../TestConstraintBasedAlgorithm.java | 13 ++++---- .../TestFaultZoneAwareConstraint.java | 6 ++-- .../TestNodeCapacityConstraint.java | 4 +-- .../TestNodeMaxPartitionLimitConstraint.java | 4 +-- .../TestPartitionActivateConstraint.java | 6 ++-- .../TestReplicaActivateConstraint.java | 4 +-- ...TestSamePartitionOnInstanceConstraint.java | 4 +-- .../TestValidGroupTagConstraint.java | 6 ++-- 16 files changed, 93 insertions(+), 65 deletions(-) diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java index 60d43764a4..c8a12751e2 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java @@ -33,6 +33,7 @@ import com.google.common.collect.Maps; import org.apache.helix.HelixRebalanceException; import org.apache.helix.controller.rebalancer.waged.RebalanceAlgorithm; +import org.apache.helix.controller.rebalancer.waged.constraints.HardConstraint.ValidationResult; import org.apache.helix.controller.rebalancer.waged.model.AssignableNode; import org.apache.helix.controller.rebalancer.waged.model.AssignableReplica; import org.apache.helix.controller.rebalancer.waged.model.ClusterContext; @@ -120,15 +121,16 @@ public OptimalAssignment calculate(ClusterModel clusterModel) throws HelixRebala private Optional getNodeWithHighestPoints(AssignableReplica replica, List assignableNodes, ClusterContext clusterContext, Set busyInstances, OptimalAssignment optimalAssignment) { - Map> hardConstraintFailures = new ConcurrentHashMap<>(); + Map> hardConstraintFailures = new ConcurrentHashMap<>(); List candidateNodes = assignableNodes.parallelStream().filter(candidateNode -> { boolean isValid = true; // need to record all the failure reasons and it gives us the ability to debug/fix the runtime // cluster environment for (HardConstraint hardConstraint : _hardConstraints) { - if (!hardConstraint.isAssignmentValid(candidateNode, replica, clusterContext)) { + ValidationResult validationResult = hardConstraint.isAssignmentValid(candidateNode, replica, clusterContext); + if (!validationResult.isSuccess()) { hardConstraintFailures.computeIfAbsent(candidateNode, node -> new ArrayList<>()) - .add(hardConstraint); + .add(validationResult.getErrorMsg()); isValid = false; } } @@ -136,8 +138,7 @@ private Optional getNodeWithHighestPoints(AssignableReplica repl }).collect(Collectors.toList()); if (candidateNodes.isEmpty()) { - optimalAssignment.recordAssignmentFailure(replica, - Maps.transformValues(hardConstraintFailures, this::convertFailureReasons)); + optimalAssignment.recordAssignmentFailure(replica, hardConstraintFailures); return Optional.empty(); } @@ -174,11 +175,6 @@ private double getAssignmentNormalizedScore(AssignableNode node, AssignableRepli return sum; } - private List convertFailureReasons(List hardConstraints) { - return hardConstraints.stream().map(HardConstraint::getDescription) - .collect(Collectors.toList()); - } - private static class AssignableReplicaWithScore implements Comparable { private final AssignableReplica _replica; private float _score = 0; diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java index e0992b5d4d..bb27110c2c 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java @@ -26,27 +26,27 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.helix.controller.rebalancer.waged.constraints.HardConstraint.ValidationResult.fail; class FaultZoneAwareConstraint extends HardConstraint { private static final Logger LOG = LoggerFactory.getLogger(FaultZoneAwareConstraint.class); @Override - boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, + ValidationResult isAssignmentValid(AssignableNode node, AssignableReplica replica, ClusterContext clusterContext) { if (!node.hasFaultZone()) { - return true; + return ValidationResult.OK; } Set partitionsForResourceAndFaultZone = clusterContext.getPartitionsForResourceAndFaultZone(replica.getResourceName(), node.getFaultZone()); if (partitionsForResourceAndFaultZone.contains(replica.getPartitionName())) { - LOG.debug("A fault zone cannot contain more than 1 replica of same partition. Found replica for partition: {}", - replica.getPartitionName()); - return false; + return fail(String.format("A fault zone cannot contain more than 1 replica of same partition. Found replica for partition: %s", + replica.getPartitionName())); } - return true; + return ValidationResult.OK; } @Override diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java index a0e2ff8c47..b5e79e31e5 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java @@ -29,11 +29,39 @@ */ abstract class HardConstraint { + /** + * Result of the constraint validation, which contains the error message if the validation fails. + */ + static class ValidationResult { + + public static final ValidationResult OK = new ValidationResult(true, null); + + private final boolean success; + private final String errorMsg; + + public static ValidationResult fail(String errorMsg) { + return new ValidationResult(false, errorMsg); + } + + private ValidationResult(boolean success, String errorMsg) { + this.success = success; + this.errorMsg = errorMsg; + } + + public String getErrorMsg() { + return errorMsg; + } + + public boolean isSuccess() { + return success; + } + } + /** * Check if the replica could be assigned to the node * @return True if the proposed assignment is valid; False otherwise */ - abstract boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, + abstract ValidationResult isAssignmentValid(AssignableNode node, AssignableReplica replica, ClusterContext clusterContext); /** diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java index 056d7f8424..8724d0af19 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java @@ -20,19 +20,21 @@ */ import java.util.Map; + import org.apache.helix.controller.rebalancer.waged.model.AssignableNode; import org.apache.helix.controller.rebalancer.waged.model.AssignableReplica; import org.apache.helix.controller.rebalancer.waged.model.ClusterContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.helix.controller.rebalancer.waged.constraints.HardConstraint.ValidationResult.fail; class NodeCapacityConstraint extends HardConstraint { private static final Logger LOG = LoggerFactory.getLogger(NodeCapacityConstraint.class); @Override - boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, + ValidationResult isAssignmentValid(AssignableNode node, AssignableReplica replica, ClusterContext clusterContext) { Map nodeCapacity = node.getRemainingCapacity(); Map replicaCapacity = replica.getCapacity(); @@ -40,13 +42,12 @@ boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, for (String key : replicaCapacity.keySet()) { if (nodeCapacity.containsKey(key)) { if (nodeCapacity.get(key) < replicaCapacity.get(key)) { - LOG.debug("Node has insufficient capacity for: {}. Left available: {}, Required: {}", - key, nodeCapacity.get(key), replicaCapacity.get(key)); - return false; + return fail(String.format("Node has insufficient capacity for: %s. Left available: %s, Required: %s", + key, nodeCapacity.get(key), replicaCapacity.get(key))); } } } - return true; + return ValidationResult.OK; } @Override diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java index deedf4aa5a..0268947d2e 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java @@ -31,15 +31,14 @@ class NodeMaxPartitionLimitConstraint extends HardConstraint { private static final Logger LOG = LoggerFactory.getLogger(NodeMaxPartitionLimitConstraint.class); @Override - boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, + ValidationResult isAssignmentValid(AssignableNode node, AssignableReplica replica, ClusterContext clusterContext) { boolean exceedMaxPartitionLimit = node.getMaxPartition() < 0 || node.getAssignedReplicaCount() < node.getMaxPartition(); if (!exceedMaxPartitionLimit) { - LOG.debug("Cannot exceed the max number of partitions ({}) limitation on node. Assigned replica count: {}", - node.getMaxPartition(), node.getAssignedReplicaCount()); - return false; + return ValidationResult.fail(String.format("Cannot exceed the max number of partitions (%s) limitation on node. Assigned replica count: %s", + node.getMaxPartition(), node.getAssignedReplicaCount())); } int resourceMaxPartitionsPerInstance = replica.getResourceMaxPartitionsPerInstance(); @@ -48,11 +47,11 @@ boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, || assignedPartitionsByResourceSize < resourceMaxPartitionsPerInstance; if (!exceedResourceMaxPartitionLimit) { - LOG.debug("Cannot exceed the max number of partitions per resource ({}) limitation on node. Assigned replica count: {}", - resourceMaxPartitionsPerInstance, assignedPartitionsByResourceSize); - return false; + return ValidationResult.fail(String.format("Cannot exceed the max number of partitions per resource (%s) limitation on node. Assigned replica count: %s", + resourceMaxPartitionsPerInstance, assignedPartitionsByResourceSize)); + } - return true; + return ValidationResult.OK; } @Override diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java index 13e380a0a7..818b47ef97 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java @@ -27,21 +27,21 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.helix.controller.rebalancer.waged.constraints.HardConstraint.ValidationResult.fail; class ReplicaActivateConstraint extends HardConstraint { private static final Logger LOG = LoggerFactory.getLogger(ReplicaActivateConstraint.class); @Override - boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, + ValidationResult isAssignmentValid(AssignableNode node, AssignableReplica replica, ClusterContext clusterContext) { List disabledPartitions = node.getDisabledPartitionsMap().get(replica.getResourceName()); if (disabledPartitions != null && disabledPartitions.contains(replica.getPartitionName())) { - LOG.debug("Cannot assign the inactive replica: {}", replica.getPartitionName()); - return false; + return fail(String.format("Cannot assign the inactive replica: %s", replica.getPartitionName())); } - return true; + return ValidationResult.OK; } @Override diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java index e6de9989c2..bddeb47787 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java @@ -20,27 +20,28 @@ */ import java.util.Set; + import org.apache.helix.controller.rebalancer.waged.model.AssignableNode; import org.apache.helix.controller.rebalancer.waged.model.AssignableReplica; import org.apache.helix.controller.rebalancer.waged.model.ClusterContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.helix.controller.rebalancer.waged.constraints.HardConstraint.ValidationResult.fail; class SamePartitionOnInstanceConstraint extends HardConstraint { private static final Logger LOG = LoggerFactory.getLogger(SamePartitionOnInstanceConstraint.class); @Override - boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, + ValidationResult isAssignmentValid(AssignableNode node, AssignableReplica replica, ClusterContext clusterContext) { Set assignedPartitionsByResource = node.getAssignedPartitionsByResource(replica.getResourceName()); if (assignedPartitionsByResource.contains(replica.getPartitionName())) { - LOG.debug("Same partition ({}) of different states cannot co-exist in one instance", replica.getPartitionName()); - return false; + return fail(String.format("Same partition (%s) of different states cannot co-exist in one instance", replica.getPartitionName())); } - return true; + return ValidationResult.OK; } @Override diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java index b1e257c878..650c1002e8 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java @@ -25,23 +25,23 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.helix.controller.rebalancer.waged.constraints.HardConstraint.ValidationResult.fail; class ValidGroupTagConstraint extends HardConstraint { private static final Logger LOG = LoggerFactory.getLogger(SamePartitionOnInstanceConstraint.class); @Override - boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, + ValidationResult isAssignmentValid(AssignableNode node, AssignableReplica replica, ClusterContext clusterContext) { if (!replica.hasResourceInstanceGroupTag()) { - return true; + return ValidationResult.OK; } if (!node.getInstanceTags().contains(replica.getResourceInstanceGroupTag())) { - LOG.debug("Instance doesn't have the tag of the replica ({})", replica.getResourceInstanceGroupTag()); - return false; + return fail(String.format("Instance doesn't have the tag of the replica (%s)", replica.getResourceInstanceGroupTag())); } - return true; + return ValidationResult.OK; } @Override diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java index 0a75002f16..d01799328a 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java @@ -19,13 +19,15 @@ * under the License. */ -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.Set; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import org.apache.helix.HelixRebalanceException; +import org.apache.helix.controller.rebalancer.waged.constraints.HardConstraint.ValidationResult; import org.apache.helix.controller.rebalancer.waged.model.AssignableReplica; import org.apache.helix.controller.rebalancer.waged.model.ClusterModel; import org.apache.helix.controller.rebalancer.waged.model.ClusterModelTestHelper; @@ -33,6 +35,7 @@ import org.testng.Assert; import org.testng.annotations.Test; +import static org.apache.helix.controller.rebalancer.waged.constraints.HardConstraint.ValidationResult.fail; import static org.mockito.Matchers.any; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -43,7 +46,7 @@ public class TestConstraintBasedAlgorithm { public void testCalculateNoValidAssignment() throws IOException, HelixRebalanceException { HardConstraint mockHardConstraint = mock(HardConstraint.class); SoftConstraint mockSoftConstraint = mock(SoftConstraint.class); - when(mockHardConstraint.isAssignmentValid(any(), any(), any())).thenReturn(false); + when(mockHardConstraint.isAssignmentValid(any(), any(), any())).thenReturn(fail("failure")); when(mockSoftConstraint.getAssignmentNormalizedScore(any(), any(), any())).thenReturn(1.0); ConstraintBasedAlgorithm algorithm = new ConstraintBasedAlgorithm(ImmutableList.of(mockHardConstraint), @@ -56,7 +59,7 @@ public void testCalculateNoValidAssignment() throws IOException, HelixRebalanceE public void testCalculateWithValidAssignment() throws IOException, HelixRebalanceException { HardConstraint mockHardConstraint = mock(HardConstraint.class); SoftConstraint mockSoftConstraint = mock(SoftConstraint.class); - when(mockHardConstraint.isAssignmentValid(any(), any(), any())).thenReturn(true); + when(mockHardConstraint.isAssignmentValid(any(), any(), any())).thenReturn(ValidationResult.OK); when(mockSoftConstraint.getAssignmentNormalizedScore(any(), any(), any())).thenReturn(1.0); ConstraintBasedAlgorithm algorithm = new ConstraintBasedAlgorithm(ImmutableList.of(mockHardConstraint), @@ -71,7 +74,7 @@ public void testCalculateWithValidAssignment() throws IOException, HelixRebalanc public void testCalculateScoreDeterminism() throws IOException, HelixRebalanceException { HardConstraint mockHardConstraint = mock(HardConstraint.class); SoftConstraint mockSoftConstraint = mock(SoftConstraint.class); - when(mockHardConstraint.isAssignmentValid(any(), any(), any())).thenReturn(true); + when(mockHardConstraint.isAssignmentValid(any(), any(), any())).thenReturn(ValidationResult.OK); when(mockSoftConstraint.getAssignmentNormalizedScore(any(), any(), any())).thenReturn(1.0); ConstraintBasedAlgorithm algorithm = new ConstraintBasedAlgorithm(ImmutableList.of(mockHardConstraint), diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestFaultZoneAwareConstraint.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestFaultZoneAwareConstraint.java index 2de5af93a8..4f3da78b17 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestFaultZoneAwareConstraint.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestFaultZoneAwareConstraint.java @@ -56,7 +56,7 @@ public void inValidWhenFaultZoneAlreadyAssigned() { ImmutableSet.of(TEST_PARTITION)); Assert.assertFalse( - _faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); + _faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); } @Test @@ -65,7 +65,7 @@ public void validWhenEmptyAssignment() { when(_clusterContext.getPartitionsForResourceAndFaultZone(TEST_RESOURCE, TEST_ZONE)).thenReturn(Collections.emptySet()); Assert.assertTrue( - _faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); + _faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); } @Test @@ -73,6 +73,6 @@ public void validWhenNoFaultZone() { when(_testNode.hasFaultZone()).thenReturn(false); Assert.assertTrue( - _faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); + _faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); } } diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestNodeCapacityConstraint.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestNodeCapacityConstraint.java index 4365a42495..d45e271913 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestNodeCapacityConstraint.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestNodeCapacityConstraint.java @@ -41,7 +41,7 @@ public void testConstraintValidWhenNodeHasEnoughSpace() { String key = "testKey"; when(_testNode.getRemainingCapacity()).thenReturn(ImmutableMap.of(key, 10)); when(_testReplica.getCapacity()).thenReturn(ImmutableMap.of(key, 5)); - Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); + Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); } @Test @@ -49,6 +49,6 @@ public void testConstraintInValidWhenNodeHasInsufficientSpace() { String key = "testKey"; when(_testNode.getRemainingCapacity()).thenReturn(ImmutableMap.of(key, 1)); when(_testReplica.getCapacity()).thenReturn(ImmutableMap.of(key, 5)); - Assert.assertFalse(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); + Assert.assertFalse(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); } } diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestNodeMaxPartitionLimitConstraint.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestNodeMaxPartitionLimitConstraint.java index 4cb7466283..35d1cc47c5 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestNodeMaxPartitionLimitConstraint.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestNodeMaxPartitionLimitConstraint.java @@ -44,13 +44,13 @@ public void testConstraintValid() { when(_testNode.getAssignedPartitionsByResource(TEST_RESOURCE)) .thenReturn(Collections.emptySet()); when(_testReplica.getResourceMaxPartitionsPerInstance()).thenReturn(5); - Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); + Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); } @Test public void testConstraintInvalid() { when(_testNode.getAssignedReplicaCount()).thenReturn(10); when(_testNode.getMaxPartition()).thenReturn(5); - Assert.assertFalse(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); + Assert.assertFalse(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); } } diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestPartitionActivateConstraint.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestPartitionActivateConstraint.java index ecfdaa2029..eb8eb897e0 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestPartitionActivateConstraint.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestPartitionActivateConstraint.java @@ -47,10 +47,10 @@ public void testConstraintValid() { when(_testReplica.getPartitionName()).thenReturn(TEST_PARTITION); when(_testNode.getDisabledPartitionsMap()) .thenReturn(ImmutableMap.of(TEST_PARTITION, Collections.emptyList())); - Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); + Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); when(_testNode.getDisabledPartitionsMap()) .thenReturn(ImmutableMap.of(TEST_PARTITION, ImmutableList.of("dummy"))); - Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); + Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); } @Test @@ -59,6 +59,6 @@ public void testConstraintInvalidWhenReplicaIsDisabled() { when(_testReplica.getPartitionName()).thenReturn(TEST_PARTITION); when(_testNode.getDisabledPartitionsMap()) .thenReturn(ImmutableMap.of(TEST_PARTITION, ImmutableList.of(TEST_PARTITION))); - Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); + Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); } } diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestReplicaActivateConstraint.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestReplicaActivateConstraint.java index 3593982802..f6d15e8753 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestReplicaActivateConstraint.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestReplicaActivateConstraint.java @@ -54,7 +54,7 @@ public void validWhenEmptyDisabledReplicaMap() { when(_testReplica.getPartitionName()).thenReturn(TEST_PARTITION); when(_testNode.getDisabledPartitionsMap()).thenReturn(disabledReplicaMap); - Assert.assertTrue(_faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); + Assert.assertTrue(_faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); } @Test @@ -66,7 +66,7 @@ public void invalidWhenPartitionIsDisabled() { when(_testReplica.getPartitionName()).thenReturn(TEST_PARTITION); when(_testNode.getDisabledPartitionsMap()).thenReturn(disabledReplicaMap); - Assert.assertFalse(_faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); + Assert.assertFalse(_faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); } } diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestSamePartitionOnInstanceConstraint.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestSamePartitionOnInstanceConstraint.java index 50b0c037be..3ba010d729 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestSamePartitionOnInstanceConstraint.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestSamePartitionOnInstanceConstraint.java @@ -45,7 +45,7 @@ public void testConstraintValid() { when(_testReplica.getResourceName()).thenReturn(TEST_RESOURCE); when(_testReplica.getPartitionName()).thenReturn(TEST_PARTITIOIN); - Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); + Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); } @Test @@ -54,6 +54,6 @@ public void testConstraintInValid() { .thenReturn(ImmutableSet.of(TEST_PARTITIOIN)); when(_testReplica.getResourceName()).thenReturn(TEST_RESOURCE); when(_testReplica.getPartitionName()).thenReturn(TEST_PARTITIOIN); - Assert.assertFalse(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); + Assert.assertFalse(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); } } diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestValidGroupTagConstraint.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestValidGroupTagConstraint.java index 8d02b3d801..67c9b4396d 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestValidGroupTagConstraint.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestValidGroupTagConstraint.java @@ -45,7 +45,7 @@ public void testConstraintValid() { when(_testReplica.getResourceInstanceGroupTag()).thenReturn(TEST_TAG); when(_testNode.getInstanceTags()).thenReturn(ImmutableSet.of(TEST_TAG)); - Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); + Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); } @Test @@ -54,13 +54,13 @@ public void testConstraintInValid() { when(_testReplica.getResourceInstanceGroupTag()).thenReturn(TEST_TAG); when(_testNode.getInstanceTags()).thenReturn(Collections.emptySet()); - Assert.assertFalse(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); + Assert.assertFalse(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); } @Test public void testConstraintWhenReplicaHasNoTag() { when(_testReplica.hasResourceInstanceGroupTag()).thenReturn(false); - Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); + Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); } } From 87dd9e3ab84fc912b5e1b36ba036534ffd0210b2 Mon Sep 17 00:00:00 2001 From: Himanshu Kandwal Date: Mon, 15 Jul 2024 12:35:16 -0700 Subject: [PATCH 2/6] [apache/helix] -- Added detail in the Exception message for WAGED rebalance (hard constraint) failures. --- .../constraints/ConstraintBasedAlgorithm.java | 50 ++++++++++++++-- .../constraints/FaultZoneAwareConstraint.java | 14 +++-- .../waged/constraints/HardConstraint.java | 57 ++++++++++--------- .../constraints/NodeCapacityConstraint.java | 13 +++-- .../NodeMaxPartitionLimitConstraint.java | 17 +++--- .../ReplicaActivateConstraint.java | 10 ++-- .../SamePartitionOnInstanceConstraint.java | 11 ++-- .../constraints/ValidGroupTagConstraint.java | 12 ++-- .../TestConstraintBasedAlgorithm.java | 51 ++++++++++++++--- .../TestFaultZoneAwareConstraint.java | 6 +- .../TestNodeCapacityConstraint.java | 4 +- .../TestNodeMaxPartitionLimitConstraint.java | 4 +- .../TestPartitionActivateConstraint.java | 6 +- .../TestReplicaActivateConstraint.java | 4 +- ...TestSamePartitionOnInstanceConstraint.java | 4 +- .../TestValidGroupTagConstraint.java | 6 +- 16 files changed, 178 insertions(+), 91 deletions(-) diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java index c8a12751e2..82b6414028 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; @@ -33,7 +34,6 @@ import com.google.common.collect.Maps; import org.apache.helix.HelixRebalanceException; import org.apache.helix.controller.rebalancer.waged.RebalanceAlgorithm; -import org.apache.helix.controller.rebalancer.waged.constraints.HardConstraint.ValidationResult; import org.apache.helix.controller.rebalancer.waged.model.AssignableNode; import org.apache.helix.controller.rebalancer.waged.model.AssignableReplica; import org.apache.helix.controller.rebalancer.waged.model.ClusterContext; @@ -58,6 +58,7 @@ class ConstraintBasedAlgorithm implements RebalanceAlgorithm { private static final Logger LOG = LoggerFactory.getLogger(ConstraintBasedAlgorithm.class); private final List _hardConstraints; private final Map _softConstraints; + private final Set logEnabledReplicas = new HashSet<>(); ConstraintBasedAlgorithm(List hardConstraints, Map softConstraints) { @@ -121,16 +122,15 @@ public OptimalAssignment calculate(ClusterModel clusterModel) throws HelixRebala private Optional getNodeWithHighestPoints(AssignableReplica replica, List assignableNodes, ClusterContext clusterContext, Set busyInstances, OptimalAssignment optimalAssignment) { - Map> hardConstraintFailures = new ConcurrentHashMap<>(); + Map> hardConstraintFailures = new ConcurrentHashMap<>(); List candidateNodes = assignableNodes.parallelStream().filter(candidateNode -> { boolean isValid = true; // need to record all the failure reasons and it gives us the ability to debug/fix the runtime // cluster environment for (HardConstraint hardConstraint : _hardConstraints) { - ValidationResult validationResult = hardConstraint.isAssignmentValid(candidateNode, replica, clusterContext); - if (!validationResult.isSuccess()) { + if (!hardConstraint.isAssignmentValid(candidateNode, replica, clusterContext)) { hardConstraintFailures.computeIfAbsent(candidateNode, node -> new ArrayList<>()) - .add(validationResult.getErrorMsg()); + .add(hardConstraint); isValid = false; } } @@ -138,10 +138,14 @@ private Optional getNodeWithHighestPoints(AssignableReplica repl }).collect(Collectors.toList()); if (candidateNodes.isEmpty()) { - optimalAssignment.recordAssignmentFailure(replica, hardConstraintFailures); + enableFullLoggingForReplica(replica); + optimalAssignment.recordAssignmentFailure(replica, + Maps.transformValues(hardConstraintFailures, this::convertFailureReasons)); return Optional.empty(); } + removeReplicaFromFullLogging(replica); + return candidateNodes.parallelStream().map(node -> new HashMap.SimpleEntry<>(node, getAssignmentNormalizedScore(node, replica, clusterContext))) .max((nodeEntry1, nodeEntry2) -> { @@ -175,6 +179,40 @@ private double getAssignmentNormalizedScore(AssignableNode node, AssignableRepli return sum; } + private List convertFailureReasons(List hardConstraints) { + return hardConstraints.stream().map(HardConstraint::getDescription) + .collect(Collectors.toList()); + } + + /** + * Adds replica for full logging in all hard constraints + * @param replica replica to be added + */ + private void enableFullLoggingForReplica(AssignableReplica replica) { + if (logEnabledReplicas.contains(replica.toString())) { + return; + } + + // enable the full logging for replica in all hard constraints + for (HardConstraint constraint : _hardConstraints) { + constraint.addReplicaToLogEnabledSet(replica); + } + logEnabledReplicas.add(replica.toString()); + } + + /** + * Removes the replica from full logging in all hard constraints (if added previously) + * @param replica replica to be removed + */ + private void removeReplicaFromFullLogging(AssignableReplica replica) { + if (logEnabledReplicas.contains(replica.toString())) { + for (HardConstraint constraint : _hardConstraints) { + constraint.removeReplicaFromLogEnabledSet(replica); + } + logEnabledReplicas.remove(replica.toString()); + } + } + private static class AssignableReplicaWithScore implements Comparable { private final AssignableReplica _replica; private float _score = 0; diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java index bb27110c2c..c6c60a99b7 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java @@ -26,27 +26,29 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.helix.controller.rebalancer.waged.constraints.HardConstraint.ValidationResult.fail; class FaultZoneAwareConstraint extends HardConstraint { private static final Logger LOG = LoggerFactory.getLogger(FaultZoneAwareConstraint.class); @Override - ValidationResult isAssignmentValid(AssignableNode node, AssignableReplica replica, + boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, ClusterContext clusterContext) { if (!node.hasFaultZone()) { - return ValidationResult.OK; + return true; } Set partitionsForResourceAndFaultZone = clusterContext.getPartitionsForResourceAndFaultZone(replica.getResourceName(), node.getFaultZone()); if (partitionsForResourceAndFaultZone.contains(replica.getPartitionName())) { - return fail(String.format("A fault zone cannot contain more than 1 replica of same partition. Found replica for partition: %s", - replica.getPartitionName())); + if (isLoggingEnabled(replica)) { + LOG.info("A fault zone cannot contain more than 1 replica of same partition. Found replica for partition: {}", + replica.getPartitionName()); + } + return false; } - return ValidationResult.OK; + return true; } @Override diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java index b5e79e31e5..223fa6ff00 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java @@ -19,6 +19,9 @@ * under the License. */ +import java.util.HashSet; +import java.util.Set; + import org.apache.helix.controller.rebalancer.waged.model.AssignableNode; import org.apache.helix.controller.rebalancer.waged.model.AssignableReplica; import org.apache.helix.controller.rebalancer.waged.model.ClusterContext; @@ -29,39 +32,13 @@ */ abstract class HardConstraint { - /** - * Result of the constraint validation, which contains the error message if the validation fails. - */ - static class ValidationResult { - - public static final ValidationResult OK = new ValidationResult(true, null); - - private final boolean success; - private final String errorMsg; - - public static ValidationResult fail(String errorMsg) { - return new ValidationResult(false, errorMsg); - } - - private ValidationResult(boolean success, String errorMsg) { - this.success = success; - this.errorMsg = errorMsg; - } - - public String getErrorMsg() { - return errorMsg; - } - - public boolean isSuccess() { - return success; - } - } + protected Set logEnabledReplicas = new HashSet<>(); /** * Check if the replica could be assigned to the node * @return True if the proposed assignment is valid; False otherwise */ - abstract ValidationResult isAssignmentValid(AssignableNode node, AssignableReplica replica, + abstract boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, ClusterContext clusterContext); /** @@ -72,4 +49,28 @@ abstract ValidationResult isAssignmentValid(AssignableNode node, AssignableRepli String getDescription() { return getClass().getName(); } + + /** + * Check if the logging is enabled for the replica + * @param replica The replica to be checked + */ + public boolean isLoggingEnabled(AssignableReplica replica) { + return logEnabledReplicas.contains(replica.toString()); + } + + /** + * Add the replica to the log enabled set + * @param replica The replica to be added + */ + public void addReplicaToLogEnabledSet(AssignableReplica replica) { + logEnabledReplicas.add(replica.toString()); + } + + /** + * Remove the replica from the log enabled set + * @param replica The replica to be removed + */ + public void removeReplicaFromLogEnabledSet(AssignableReplica replica) { + logEnabledReplicas.remove(replica.toString()); + } } diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java index 8724d0af19..f4a25f4d41 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java @@ -20,21 +20,19 @@ */ import java.util.Map; - import org.apache.helix.controller.rebalancer.waged.model.AssignableNode; import org.apache.helix.controller.rebalancer.waged.model.AssignableReplica; import org.apache.helix.controller.rebalancer.waged.model.ClusterContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.helix.controller.rebalancer.waged.constraints.HardConstraint.ValidationResult.fail; class NodeCapacityConstraint extends HardConstraint { private static final Logger LOG = LoggerFactory.getLogger(NodeCapacityConstraint.class); @Override - ValidationResult isAssignmentValid(AssignableNode node, AssignableReplica replica, + boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, ClusterContext clusterContext) { Map nodeCapacity = node.getRemainingCapacity(); Map replicaCapacity = replica.getCapacity(); @@ -42,12 +40,15 @@ ValidationResult isAssignmentValid(AssignableNode node, AssignableReplica replic for (String key : replicaCapacity.keySet()) { if (nodeCapacity.containsKey(key)) { if (nodeCapacity.get(key) < replicaCapacity.get(key)) { - return fail(String.format("Node has insufficient capacity for: %s. Left available: %s, Required: %s", - key, nodeCapacity.get(key), replicaCapacity.get(key))); + if (isLoggingEnabled(replica)) { + LOG.info("Node has insufficient capacity for: {}. Left available: {}, Required: {}", + key, nodeCapacity.get(key), replicaCapacity.get(key)); + } + return false; } } } - return ValidationResult.OK; + return true; } @Override diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java index 0268947d2e..36532282a2 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java @@ -31,14 +31,15 @@ class NodeMaxPartitionLimitConstraint extends HardConstraint { private static final Logger LOG = LoggerFactory.getLogger(NodeMaxPartitionLimitConstraint.class); @Override - ValidationResult isAssignmentValid(AssignableNode node, AssignableReplica replica, + boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, ClusterContext clusterContext) { boolean exceedMaxPartitionLimit = node.getMaxPartition() < 0 || node.getAssignedReplicaCount() < node.getMaxPartition(); if (!exceedMaxPartitionLimit) { - return ValidationResult.fail(String.format("Cannot exceed the max number of partitions (%s) limitation on node. Assigned replica count: %s", - node.getMaxPartition(), node.getAssignedReplicaCount())); + LOG.debug("Cannot exceed the max number of partitions ({}) limitation on node. Assigned replica count: {}", + node.getMaxPartition(), node.getAssignedReplicaCount()); + return false; } int resourceMaxPartitionsPerInstance = replica.getResourceMaxPartitionsPerInstance(); @@ -47,11 +48,13 @@ ValidationResult isAssignmentValid(AssignableNode node, AssignableReplica replic || assignedPartitionsByResourceSize < resourceMaxPartitionsPerInstance; if (!exceedResourceMaxPartitionLimit) { - return ValidationResult.fail(String.format("Cannot exceed the max number of partitions per resource (%s) limitation on node. Assigned replica count: %s", - resourceMaxPartitionsPerInstance, assignedPartitionsByResourceSize)); - + if (isLoggingEnabled(replica)) { + LOG.info("Cannot exceed the max number of partitions per resource ({}) limitation on node. Assigned replica count: {}", + resourceMaxPartitionsPerInstance, assignedPartitionsByResourceSize); + } + return false; } - return ValidationResult.OK; + return true; } @Override diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java index 818b47ef97..20a1b9e8cb 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java @@ -27,21 +27,23 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.helix.controller.rebalancer.waged.constraints.HardConstraint.ValidationResult.fail; class ReplicaActivateConstraint extends HardConstraint { private static final Logger LOG = LoggerFactory.getLogger(ReplicaActivateConstraint.class); @Override - ValidationResult isAssignmentValid(AssignableNode node, AssignableReplica replica, + boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, ClusterContext clusterContext) { List disabledPartitions = node.getDisabledPartitionsMap().get(replica.getResourceName()); if (disabledPartitions != null && disabledPartitions.contains(replica.getPartitionName())) { - return fail(String.format("Cannot assign the inactive replica: %s", replica.getPartitionName())); + if (isLoggingEnabled(replica)) { + LOG.info("Cannot assign the inactive replica: {}", replica.getPartitionName()); + } + return false; } - return ValidationResult.OK; + return true; } @Override diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java index bddeb47787..4a93049940 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java @@ -20,28 +20,29 @@ */ import java.util.Set; - import org.apache.helix.controller.rebalancer.waged.model.AssignableNode; import org.apache.helix.controller.rebalancer.waged.model.AssignableReplica; import org.apache.helix.controller.rebalancer.waged.model.ClusterContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.helix.controller.rebalancer.waged.constraints.HardConstraint.ValidationResult.fail; class SamePartitionOnInstanceConstraint extends HardConstraint { private static final Logger LOG = LoggerFactory.getLogger(SamePartitionOnInstanceConstraint.class); @Override - ValidationResult isAssignmentValid(AssignableNode node, AssignableReplica replica, + boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, ClusterContext clusterContext) { Set assignedPartitionsByResource = node.getAssignedPartitionsByResource(replica.getResourceName()); if (assignedPartitionsByResource.contains(replica.getPartitionName())) { - return fail(String.format("Same partition (%s) of different states cannot co-exist in one instance", replica.getPartitionName())); + if (isLoggingEnabled(replica)) { + LOG.info("Same partition ({}) of different states cannot co-exist in one instance", replica.getPartitionName()); + } + return false; } - return ValidationResult.OK; + return true; } @Override diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java index 650c1002e8..eab486a23b 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java @@ -25,23 +25,25 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.helix.controller.rebalancer.waged.constraints.HardConstraint.ValidationResult.fail; class ValidGroupTagConstraint extends HardConstraint { private static final Logger LOG = LoggerFactory.getLogger(SamePartitionOnInstanceConstraint.class); @Override - ValidationResult isAssignmentValid(AssignableNode node, AssignableReplica replica, + boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, ClusterContext clusterContext) { if (!replica.hasResourceInstanceGroupTag()) { - return ValidationResult.OK; + return true; } if (!node.getInstanceTags().contains(replica.getResourceInstanceGroupTag())) { - return fail(String.format("Instance doesn't have the tag of the replica (%s)", replica.getResourceInstanceGroupTag())); + if (isLoggingEnabled(replica)) { + LOG.info("Instance doesn't have the tag of the replica ({})", replica.getResourceInstanceGroupTag()); + } + return false; } - return ValidationResult.OK; + return true; } @Override diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java index d01799328a..9937c1b528 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java @@ -27,7 +27,6 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import org.apache.helix.HelixRebalanceException; -import org.apache.helix.controller.rebalancer.waged.constraints.HardConstraint.ValidationResult; import org.apache.helix.controller.rebalancer.waged.model.AssignableReplica; import org.apache.helix.controller.rebalancer.waged.model.ClusterModel; import org.apache.helix.controller.rebalancer.waged.model.ClusterModelTestHelper; @@ -35,31 +34,69 @@ import org.testng.Assert; import org.testng.annotations.Test; -import static org.apache.helix.controller.rebalancer.waged.constraints.HardConstraint.ValidationResult.fail; import static org.mockito.Matchers.any; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; public class TestConstraintBasedAlgorithm { - @Test(expectedExceptions = HelixRebalanceException.class) - public void testCalculateNoValidAssignment() throws IOException, HelixRebalanceException { + + @Test + public void testCalculateNoValidAssignment() throws IOException { HardConstraint mockHardConstraint = mock(HardConstraint.class); SoftConstraint mockSoftConstraint = mock(SoftConstraint.class); - when(mockHardConstraint.isAssignmentValid(any(), any(), any())).thenReturn(fail("failure")); + when(mockHardConstraint.isAssignmentValid(any(), any(), any())).thenReturn(false); when(mockSoftConstraint.getAssignmentNormalizedScore(any(), any(), any())).thenReturn(1.0); ConstraintBasedAlgorithm algorithm = new ConstraintBasedAlgorithm(ImmutableList.of(mockHardConstraint), ImmutableMap.of(mockSoftConstraint, 1f)); ClusterModel clusterModel = new ClusterModelTestHelper().getDefaultClusterModel(); + try { + algorithm.calculate(clusterModel); + } catch (HelixRebalanceException ex) { + Assert.assertEquals(ex.getFailureType(), HelixRebalanceException.Type.FAILED_TO_CALCULATE); + } + + verify(mockHardConstraint, never()).removeReplicaFromLogEnabledSet(any()); + verify(mockHardConstraint, times(1)).addReplicaToLogEnabledSet(any()); + verify(mockHardConstraint, times(1)).isAssignmentValid(any(), any(), any()); + } + + @Test + public void testCalculateNoValidAssignmentFirstAndThenRecovery() throws IOException, HelixRebalanceException { + HardConstraint mockHardConstraint = mock(HardConstraint.class); + SoftConstraint mockSoftConstraint = mock(SoftConstraint.class); + when(mockHardConstraint.isAssignmentValid(any(), any(), any())) + .thenReturn(false) // hard constraint fails + .thenReturn(true); // hard constraint recovers + when(mockSoftConstraint.getAssignmentNormalizedScore(any(), any(), any())).thenReturn(1.0); + ConstraintBasedAlgorithm algorithm = + new ConstraintBasedAlgorithm(ImmutableList.of(mockHardConstraint), + ImmutableMap.of(mockSoftConstraint, 1f)); + ClusterModel clusterModel = new ClusterModelTestHelper().getDefaultClusterModel(); + try { + algorithm.calculate(clusterModel); + } catch (HelixRebalanceException ex) { + Assert.assertEquals(ex.getFailureType(), HelixRebalanceException.Type.FAILED_TO_CALCULATE); + } + + verify(mockHardConstraint, never()).removeReplicaFromLogEnabledSet(any()); + verify(mockHardConstraint, times(1)).addReplicaToLogEnabledSet(any()); + verify(mockHardConstraint, times(1)).isAssignmentValid(any(), any(), any()); + + // calling again for recovery (no exception) algorithm.calculate(clusterModel); + verify(mockHardConstraint, times(1)).removeReplicaFromLogEnabledSet(any()); } @Test public void testCalculateWithValidAssignment() throws IOException, HelixRebalanceException { HardConstraint mockHardConstraint = mock(HardConstraint.class); SoftConstraint mockSoftConstraint = mock(SoftConstraint.class); - when(mockHardConstraint.isAssignmentValid(any(), any(), any())).thenReturn(ValidationResult.OK); + when(mockHardConstraint.isAssignmentValid(any(), any(), any())).thenReturn(true); when(mockSoftConstraint.getAssignmentNormalizedScore(any(), any(), any())).thenReturn(1.0); ConstraintBasedAlgorithm algorithm = new ConstraintBasedAlgorithm(ImmutableList.of(mockHardConstraint), @@ -74,7 +111,7 @@ public void testCalculateWithValidAssignment() throws IOException, HelixRebalanc public void testCalculateScoreDeterminism() throws IOException, HelixRebalanceException { HardConstraint mockHardConstraint = mock(HardConstraint.class); SoftConstraint mockSoftConstraint = mock(SoftConstraint.class); - when(mockHardConstraint.isAssignmentValid(any(), any(), any())).thenReturn(ValidationResult.OK); + when(mockHardConstraint.isAssignmentValid(any(), any(), any())).thenReturn(true); when(mockSoftConstraint.getAssignmentNormalizedScore(any(), any(), any())).thenReturn(1.0); ConstraintBasedAlgorithm algorithm = new ConstraintBasedAlgorithm(ImmutableList.of(mockHardConstraint), diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestFaultZoneAwareConstraint.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestFaultZoneAwareConstraint.java index 4f3da78b17..2de5af93a8 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestFaultZoneAwareConstraint.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestFaultZoneAwareConstraint.java @@ -56,7 +56,7 @@ public void inValidWhenFaultZoneAlreadyAssigned() { ImmutableSet.of(TEST_PARTITION)); Assert.assertFalse( - _faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); + _faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); } @Test @@ -65,7 +65,7 @@ public void validWhenEmptyAssignment() { when(_clusterContext.getPartitionsForResourceAndFaultZone(TEST_RESOURCE, TEST_ZONE)).thenReturn(Collections.emptySet()); Assert.assertTrue( - _faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); + _faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); } @Test @@ -73,6 +73,6 @@ public void validWhenNoFaultZone() { when(_testNode.hasFaultZone()).thenReturn(false); Assert.assertTrue( - _faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); + _faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); } } diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestNodeCapacityConstraint.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestNodeCapacityConstraint.java index d45e271913..4365a42495 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestNodeCapacityConstraint.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestNodeCapacityConstraint.java @@ -41,7 +41,7 @@ public void testConstraintValidWhenNodeHasEnoughSpace() { String key = "testKey"; when(_testNode.getRemainingCapacity()).thenReturn(ImmutableMap.of(key, 10)); when(_testReplica.getCapacity()).thenReturn(ImmutableMap.of(key, 5)); - Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); + Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); } @Test @@ -49,6 +49,6 @@ public void testConstraintInValidWhenNodeHasInsufficientSpace() { String key = "testKey"; when(_testNode.getRemainingCapacity()).thenReturn(ImmutableMap.of(key, 1)); when(_testReplica.getCapacity()).thenReturn(ImmutableMap.of(key, 5)); - Assert.assertFalse(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); + Assert.assertFalse(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); } } diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestNodeMaxPartitionLimitConstraint.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestNodeMaxPartitionLimitConstraint.java index 35d1cc47c5..4cb7466283 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestNodeMaxPartitionLimitConstraint.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestNodeMaxPartitionLimitConstraint.java @@ -44,13 +44,13 @@ public void testConstraintValid() { when(_testNode.getAssignedPartitionsByResource(TEST_RESOURCE)) .thenReturn(Collections.emptySet()); when(_testReplica.getResourceMaxPartitionsPerInstance()).thenReturn(5); - Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); + Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); } @Test public void testConstraintInvalid() { when(_testNode.getAssignedReplicaCount()).thenReturn(10); when(_testNode.getMaxPartition()).thenReturn(5); - Assert.assertFalse(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); + Assert.assertFalse(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); } } diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestPartitionActivateConstraint.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestPartitionActivateConstraint.java index eb8eb897e0..ecfdaa2029 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestPartitionActivateConstraint.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestPartitionActivateConstraint.java @@ -47,10 +47,10 @@ public void testConstraintValid() { when(_testReplica.getPartitionName()).thenReturn(TEST_PARTITION); when(_testNode.getDisabledPartitionsMap()) .thenReturn(ImmutableMap.of(TEST_PARTITION, Collections.emptyList())); - Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); + Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); when(_testNode.getDisabledPartitionsMap()) .thenReturn(ImmutableMap.of(TEST_PARTITION, ImmutableList.of("dummy"))); - Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); + Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); } @Test @@ -59,6 +59,6 @@ public void testConstraintInvalidWhenReplicaIsDisabled() { when(_testReplica.getPartitionName()).thenReturn(TEST_PARTITION); when(_testNode.getDisabledPartitionsMap()) .thenReturn(ImmutableMap.of(TEST_PARTITION, ImmutableList.of(TEST_PARTITION))); - Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); + Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); } } diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestReplicaActivateConstraint.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestReplicaActivateConstraint.java index f6d15e8753..3593982802 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestReplicaActivateConstraint.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestReplicaActivateConstraint.java @@ -54,7 +54,7 @@ public void validWhenEmptyDisabledReplicaMap() { when(_testReplica.getPartitionName()).thenReturn(TEST_PARTITION); when(_testNode.getDisabledPartitionsMap()).thenReturn(disabledReplicaMap); - Assert.assertTrue(_faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); + Assert.assertTrue(_faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); } @Test @@ -66,7 +66,7 @@ public void invalidWhenPartitionIsDisabled() { when(_testReplica.getPartitionName()).thenReturn(TEST_PARTITION); when(_testNode.getDisabledPartitionsMap()).thenReturn(disabledReplicaMap); - Assert.assertFalse(_faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); + Assert.assertFalse(_faultZoneAwareConstraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); } } diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestSamePartitionOnInstanceConstraint.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestSamePartitionOnInstanceConstraint.java index 3ba010d729..50b0c037be 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestSamePartitionOnInstanceConstraint.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestSamePartitionOnInstanceConstraint.java @@ -45,7 +45,7 @@ public void testConstraintValid() { when(_testReplica.getResourceName()).thenReturn(TEST_RESOURCE); when(_testReplica.getPartitionName()).thenReturn(TEST_PARTITIOIN); - Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); + Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); } @Test @@ -54,6 +54,6 @@ public void testConstraintInValid() { .thenReturn(ImmutableSet.of(TEST_PARTITIOIN)); when(_testReplica.getResourceName()).thenReturn(TEST_RESOURCE); when(_testReplica.getPartitionName()).thenReturn(TEST_PARTITIOIN); - Assert.assertFalse(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); + Assert.assertFalse(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); } } diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestValidGroupTagConstraint.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestValidGroupTagConstraint.java index 67c9b4396d..8d02b3d801 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestValidGroupTagConstraint.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestValidGroupTagConstraint.java @@ -45,7 +45,7 @@ public void testConstraintValid() { when(_testReplica.getResourceInstanceGroupTag()).thenReturn(TEST_TAG); when(_testNode.getInstanceTags()).thenReturn(ImmutableSet.of(TEST_TAG)); - Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); + Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); } @Test @@ -54,13 +54,13 @@ public void testConstraintInValid() { when(_testReplica.getResourceInstanceGroupTag()).thenReturn(TEST_TAG); when(_testNode.getInstanceTags()).thenReturn(Collections.emptySet()); - Assert.assertFalse(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); + Assert.assertFalse(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); } @Test public void testConstraintWhenReplicaHasNoTag() { when(_testReplica.hasResourceInstanceGroupTag()).thenReturn(false); - Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext).isSuccess()); + Assert.assertTrue(_constraint.isAssignmentValid(_testNode, _testReplica, _clusterContext)); } } From 76c065a3f6f825dffdce0d2e0282bdd734445520 Mon Sep 17 00:00:00 2001 From: Himanshu Kandwal Date: Mon, 15 Jul 2024 13:32:09 -0700 Subject: [PATCH 3/6] [apache/helix] -- Added detail in the Exception message for WAGED rebalance (hard constraint) failures. --- .../constraints/ConstraintBasedAlgorithm.java | 25 ++++++++----------- .../waged/constraints/HardConstraint.java | 20 +++++---------- .../TestConstraintBasedAlgorithm.java | 8 +++--- 3 files changed, 20 insertions(+), 33 deletions(-) diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java index 82b6414028..a83becddaa 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java @@ -31,6 +31,7 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.stream.Collectors; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Maps; import org.apache.helix.HelixRebalanceException; import org.apache.helix.controller.rebalancer.waged.RebalanceAlgorithm; @@ -64,6 +65,10 @@ class ConstraintBasedAlgorithm implements RebalanceAlgorithm { Map softConstraints) { _hardConstraints = hardConstraints; _softConstraints = softConstraints; + + for (HardConstraint constraint : hardConstraints) { + constraint.setLogEnabledReplicas(logEnabledReplicas); + } } @Override @@ -189,14 +194,6 @@ private List convertFailureReasons(List hardConstraints) * @param replica replica to be added */ private void enableFullLoggingForReplica(AssignableReplica replica) { - if (logEnabledReplicas.contains(replica.toString())) { - return; - } - - // enable the full logging for replica in all hard constraints - for (HardConstraint constraint : _hardConstraints) { - constraint.addReplicaToLogEnabledSet(replica); - } logEnabledReplicas.add(replica.toString()); } @@ -205,12 +202,12 @@ private void enableFullLoggingForReplica(AssignableReplica replica) { * @param replica replica to be removed */ private void removeReplicaFromFullLogging(AssignableReplica replica) { - if (logEnabledReplicas.contains(replica.toString())) { - for (HardConstraint constraint : _hardConstraints) { - constraint.removeReplicaFromLogEnabledSet(replica); - } - logEnabledReplicas.remove(replica.toString()); - } + logEnabledReplicas.remove(replica.toString()); + } + + @VisibleForTesting + Set getLogEnabledReplicas() { + return logEnabledReplicas; } private static class AssignableReplicaWithScore implements Comparable { diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java index 223fa6ff00..ba9c3f253c 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java @@ -19,7 +19,6 @@ * under the License. */ -import java.util.HashSet; import java.util.Set; import org.apache.helix.controller.rebalancer.waged.model.AssignableNode; @@ -32,7 +31,7 @@ */ abstract class HardConstraint { - protected Set logEnabledReplicas = new HashSet<>(); + protected Set logEnabledReplicas; /** * Check if the replica could be assigned to the node @@ -55,22 +54,15 @@ String getDescription() { * @param replica The replica to be checked */ public boolean isLoggingEnabled(AssignableReplica replica) { - return logEnabledReplicas.contains(replica.toString()); + return logEnabledReplicas != null && logEnabledReplicas.contains(replica.toString()); } /** - * Add the replica to the log enabled set - * @param replica The replica to be added + * Set the reference of the replicas that need to be logged. + * @param logEnabledReplicas The replicas that need to be logged */ - public void addReplicaToLogEnabledSet(AssignableReplica replica) { - logEnabledReplicas.add(replica.toString()); + public void setLogEnabledReplicas(Set logEnabledReplicas) { + this.logEnabledReplicas = logEnabledReplicas; } - /** - * Remove the replica from the log enabled set - * @param replica The replica to be removed - */ - public void removeReplicaFromLogEnabledSet(AssignableReplica replica) { - logEnabledReplicas.remove(replica.toString()); - } } diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java index 9937c1b528..96f2da8454 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java @@ -60,8 +60,7 @@ public void testCalculateNoValidAssignment() throws IOException { Assert.assertEquals(ex.getFailureType(), HelixRebalanceException.Type.FAILED_TO_CALCULATE); } - verify(mockHardConstraint, never()).removeReplicaFromLogEnabledSet(any()); - verify(mockHardConstraint, times(1)).addReplicaToLogEnabledSet(any()); + Assert.assertEquals(algorithm.getLogEnabledReplicas().size(), 1); verify(mockHardConstraint, times(1)).isAssignmentValid(any(), any(), any()); } @@ -83,13 +82,12 @@ public void testCalculateNoValidAssignmentFirstAndThenRecovery() throws IOExcept Assert.assertEquals(ex.getFailureType(), HelixRebalanceException.Type.FAILED_TO_CALCULATE); } - verify(mockHardConstraint, never()).removeReplicaFromLogEnabledSet(any()); - verify(mockHardConstraint, times(1)).addReplicaToLogEnabledSet(any()); + Assert.assertEquals(algorithm.getLogEnabledReplicas().size(), 1); verify(mockHardConstraint, times(1)).isAssignmentValid(any(), any(), any()); // calling again for recovery (no exception) algorithm.calculate(clusterModel); - verify(mockHardConstraint, times(1)).removeReplicaFromLogEnabledSet(any()); + Assert.assertEquals(algorithm.getLogEnabledReplicas().size(), 0); } @Test From 13a7db76566001b14dd328ecc4a70de241c2fcd8 Mon Sep 17 00:00:00 2001 From: Himanshu Kandwal Date: Tue, 16 Jul 2024 14:05:10 -0700 Subject: [PATCH 4/6] [apache/helix] -- Added detail in the Exception message for WAGED rebalance (hard constraint) failures. --- .../constraints/ConstraintBasedAlgorithm.java | 30 ++++++++++--------- .../constraints/FaultZoneAwareConstraint.java | 2 +- .../waged/constraints/HardConstraint.java | 14 ++++----- .../constraints/NodeCapacityConstraint.java | 2 +- .../NodeMaxPartitionLimitConstraint.java | 2 +- .../ReplicaActivateConstraint.java | 2 +- .../SamePartitionOnInstanceConstraint.java | 2 +- .../constraints/ValidGroupTagConstraint.java | 2 +- .../waged/model/ClusterContext.java | 4 +++ .../TestConstraintBasedAlgorithm.java | 8 +++-- 10 files changed, 38 insertions(+), 30 deletions(-) diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java index a83becddaa..b7de3c4870 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java @@ -59,7 +59,7 @@ class ConstraintBasedAlgorithm implements RebalanceAlgorithm { private static final Logger LOG = LoggerFactory.getLogger(ConstraintBasedAlgorithm.class); private final List _hardConstraints; private final Map _softConstraints; - private final Set logEnabledReplicas = new HashSet<>(); + private final Set logEnabledClusters = new HashSet<>(); ConstraintBasedAlgorithm(List hardConstraints, Map softConstraints) { @@ -67,7 +67,7 @@ class ConstraintBasedAlgorithm implements RebalanceAlgorithm { _softConstraints = softConstraints; for (HardConstraint constraint : hardConstraints) { - constraint.setLogEnabledReplicas(logEnabledReplicas); + constraint.setLogEnabledReplicas(logEnabledClusters); } } @@ -143,13 +143,15 @@ private Optional getNodeWithHighestPoints(AssignableReplica repl }).collect(Collectors.toList()); if (candidateNodes.isEmpty()) { - enableFullLoggingForReplica(replica); + LOG.info("Found no eligible candidate nodes. Enabling hard constraint level logging for cluster: {}", clusterContext.getClusterName()); + enableFullLoggingForCluster(clusterContext.getClusterName()); optimalAssignment.recordAssignmentFailure(replica, Maps.transformValues(hardConstraintFailures, this::convertFailureReasons)); return Optional.empty(); } - removeReplicaFromFullLogging(replica); + LOG.info("Disabling hard constraint level logging for cluster: {}", clusterContext.getClusterName()); + removeClusterFromFullLogging(clusterContext.getClusterName()); return candidateNodes.parallelStream().map(node -> new HashMap.SimpleEntry<>(node, getAssignmentNormalizedScore(node, replica, clusterContext))) @@ -190,24 +192,24 @@ private List convertFailureReasons(List hardConstraints) } /** - * Adds replica for full logging in all hard constraints - * @param replica replica to be added + * Adds cluster name for full logging in all hard constraints + * @param clusterName cluster to be added */ - private void enableFullLoggingForReplica(AssignableReplica replica) { - logEnabledReplicas.add(replica.toString()); + private void enableFullLoggingForCluster(String clusterName) { + logEnabledClusters.add(clusterName); } /** - * Removes the replica from full logging in all hard constraints (if added previously) - * @param replica replica to be removed + * Removes the cluster from full logging in all hard constraints (if added previously) + * @param clusterName cluster to be removed */ - private void removeReplicaFromFullLogging(AssignableReplica replica) { - logEnabledReplicas.remove(replica.toString()); + private void removeClusterFromFullLogging(String clusterName) { + logEnabledClusters.remove(clusterName); } @VisibleForTesting - Set getLogEnabledReplicas() { - return logEnabledReplicas; + Set getLogEnabledClusters() { + return logEnabledClusters; } private static class AssignableReplicaWithScore implements Comparable { diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java index c6c60a99b7..08e3792880 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java @@ -42,7 +42,7 @@ boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, clusterContext.getPartitionsForResourceAndFaultZone(replica.getResourceName(), node.getFaultZone()); if (partitionsForResourceAndFaultZone.contains(replica.getPartitionName())) { - if (isLoggingEnabled(replica)) { + if (isLoggingEnabled(clusterContext.getClusterName())) { LOG.info("A fault zone cannot contain more than 1 replica of same partition. Found replica for partition: {}", replica.getPartitionName()); } diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java index ba9c3f253c..f50773c40e 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java @@ -31,7 +31,7 @@ */ abstract class HardConstraint { - protected Set logEnabledReplicas; + protected Set logEnabledClusters; /** * Check if the replica could be assigned to the node @@ -51,18 +51,18 @@ String getDescription() { /** * Check if the logging is enabled for the replica - * @param replica The replica to be checked + * @param clusterName The name of the cluster to be checked */ - public boolean isLoggingEnabled(AssignableReplica replica) { - return logEnabledReplicas != null && logEnabledReplicas.contains(replica.toString()); + public boolean isLoggingEnabled(String clusterName) { + return logEnabledClusters != null && logEnabledClusters.contains(clusterName); } /** * Set the reference of the replicas that need to be logged. - * @param logEnabledReplicas The replicas that need to be logged + * @param logEnabledClusters The clusters that need to be logged */ - public void setLogEnabledReplicas(Set logEnabledReplicas) { - this.logEnabledReplicas = logEnabledReplicas; + public void setLogEnabledReplicas(Set logEnabledClusters) { + this.logEnabledClusters = logEnabledClusters; } } diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java index f4a25f4d41..4e0537fe45 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java @@ -40,7 +40,7 @@ boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, for (String key : replicaCapacity.keySet()) { if (nodeCapacity.containsKey(key)) { if (nodeCapacity.get(key) < replicaCapacity.get(key)) { - if (isLoggingEnabled(replica)) { + if (isLoggingEnabled(clusterContext.getClusterName())) { LOG.info("Node has insufficient capacity for: {}. Left available: {}, Required: {}", key, nodeCapacity.get(key), replicaCapacity.get(key)); } diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java index 36532282a2..e15ca0dbf7 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java @@ -48,7 +48,7 @@ boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, || assignedPartitionsByResourceSize < resourceMaxPartitionsPerInstance; if (!exceedResourceMaxPartitionLimit) { - if (isLoggingEnabled(replica)) { + if (isLoggingEnabled(clusterContext.getClusterName())) { LOG.info("Cannot exceed the max number of partitions per resource ({}) limitation on node. Assigned replica count: {}", resourceMaxPartitionsPerInstance, assignedPartitionsByResourceSize); } diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java index 20a1b9e8cb..d6fcfd2c41 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java @@ -38,7 +38,7 @@ boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, List disabledPartitions = node.getDisabledPartitionsMap().get(replica.getResourceName()); if (disabledPartitions != null && disabledPartitions.contains(replica.getPartitionName())) { - if (isLoggingEnabled(replica)) { + if (isLoggingEnabled(clusterContext.getClusterName())) { LOG.info("Cannot assign the inactive replica: {}", replica.getPartitionName()); } return false; diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java index 4a93049940..a8f69ffff1 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java @@ -37,7 +37,7 @@ boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, Set assignedPartitionsByResource = node.getAssignedPartitionsByResource(replica.getResourceName()); if (assignedPartitionsByResource.contains(replica.getPartitionName())) { - if (isLoggingEnabled(replica)) { + if (isLoggingEnabled(clusterContext.getClusterName())) { LOG.info("Same partition ({}) of different states cannot co-exist in one instance", replica.getPartitionName()); } return false; diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java index eab486a23b..91cb5d3d23 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java @@ -38,7 +38,7 @@ boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, } if (!node.getInstanceTags().contains(replica.getResourceInstanceGroupTag())) { - if (isLoggingEnabled(replica)) { + if (isLoggingEnabled(clusterContext.getClusterName())) { LOG.info("Instance doesn't have the tag of the replica ({})", replica.getResourceInstanceGroupTag()); } return false; diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterContext.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterContext.java index 1b3ad2a879..32debca653 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterContext.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterContext.java @@ -183,6 +183,10 @@ public Map getClusterCapacityMap() { return _clusterCapacityMap; } + public String getClusterName() { + return _clusterName; + } + public Set getPartitionsForResourceAndFaultZone(String resourceName, String faultZoneId) { return _assignmentForFaultZoneMap.getOrDefault(faultZoneId, Collections.emptyMap()) .getOrDefault(resourceName, Collections.emptySet()); diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java index 96f2da8454..cdaf148bcf 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java @@ -60,7 +60,8 @@ public void testCalculateNoValidAssignment() throws IOException { Assert.assertEquals(ex.getFailureType(), HelixRebalanceException.Type.FAILED_TO_CALCULATE); } - Assert.assertEquals(algorithm.getLogEnabledReplicas().size(), 1); + Assert.assertEquals(algorithm.getLogEnabledClusters().size(), 1); + Assert.assertTrue(algorithm.getLogEnabledClusters().contains(clusterModel.getContext().getClusterName())); verify(mockHardConstraint, times(1)).isAssignmentValid(any(), any(), any()); } @@ -82,12 +83,13 @@ public void testCalculateNoValidAssignmentFirstAndThenRecovery() throws IOExcept Assert.assertEquals(ex.getFailureType(), HelixRebalanceException.Type.FAILED_TO_CALCULATE); } - Assert.assertEquals(algorithm.getLogEnabledReplicas().size(), 1); + Assert.assertEquals(algorithm.getLogEnabledClusters().size(), 1); + Assert.assertTrue(algorithm.getLogEnabledClusters().contains(clusterModel.getContext().getClusterName())); verify(mockHardConstraint, times(1)).isAssignmentValid(any(), any(), any()); // calling again for recovery (no exception) algorithm.calculate(clusterModel); - Assert.assertEquals(algorithm.getLogEnabledReplicas().size(), 0); + Assert.assertEquals(algorithm.getLogEnabledClusters().size(), 0); } @Test From 613abbcb87414ba6787bd1e50134cf935abe9429 Mon Sep 17 00:00:00 2001 From: Himanshu Kandwal Date: Tue, 16 Jul 2024 14:11:52 -0700 Subject: [PATCH 5/6] [apache/helix] -- Added detail in the Exception message for WAGED rebalance (hard constraint) failures. --- .../rebalancer/waged/constraints/ConstraintBasedAlgorithm.java | 2 +- .../controller/rebalancer/waged/constraints/HardConstraint.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java index b7de3c4870..99f24a0dca 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java @@ -67,7 +67,7 @@ class ConstraintBasedAlgorithm implements RebalanceAlgorithm { _softConstraints = softConstraints; for (HardConstraint constraint : hardConstraints) { - constraint.setLogEnabledReplicas(logEnabledClusters); + constraint.setLogEnabledClusters(logEnabledClusters); } } diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java index f50773c40e..c2f1d09454 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java @@ -61,7 +61,7 @@ public boolean isLoggingEnabled(String clusterName) { * Set the reference of the replicas that need to be logged. * @param logEnabledClusters The clusters that need to be logged */ - public void setLogEnabledReplicas(Set logEnabledClusters) { + public void setLogEnabledClusters(Set logEnabledClusters) { this.logEnabledClusters = logEnabledClusters; } From e77bf3aa242d93286210c448e0722f43ff798bf9 Mon Sep 17 00:00:00 2001 From: Himanshu Kandwal Date: Tue, 16 Jul 2024 16:01:27 -0700 Subject: [PATCH 6/6] [apache/helix] -- Added detail in the Exception message for WAGED rebalance (hard constraint) failures. --- .../constraints/ConstraintBasedAlgorithm.java | 30 +++++++------------ .../constraints/FaultZoneAwareConstraint.java | 2 +- .../waged/constraints/HardConstraint.java | 19 +++--------- .../constraints/NodeCapacityConstraint.java | 2 +- .../NodeMaxPartitionLimitConstraint.java | 2 +- .../ReplicaActivateConstraint.java | 2 +- .../SamePartitionOnInstanceConstraint.java | 2 +- .../constraints/ValidGroupTagConstraint.java | 2 +- .../TestConstraintBasedAlgorithm.java | 10 +++---- 9 files changed, 26 insertions(+), 45 deletions(-) diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java index 99f24a0dca..e7510987a1 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java @@ -59,16 +59,11 @@ class ConstraintBasedAlgorithm implements RebalanceAlgorithm { private static final Logger LOG = LoggerFactory.getLogger(ConstraintBasedAlgorithm.class); private final List _hardConstraints; private final Map _softConstraints; - private final Set logEnabledClusters = new HashSet<>(); ConstraintBasedAlgorithm(List hardConstraints, Map softConstraints) { _hardConstraints = hardConstraints; _softConstraints = softConstraints; - - for (HardConstraint constraint : hardConstraints) { - constraint.setLogEnabledClusters(logEnabledClusters); - } } @Override @@ -144,14 +139,14 @@ private Optional getNodeWithHighestPoints(AssignableReplica repl if (candidateNodes.isEmpty()) { LOG.info("Found no eligible candidate nodes. Enabling hard constraint level logging for cluster: {}", clusterContext.getClusterName()); - enableFullLoggingForCluster(clusterContext.getClusterName()); + enableFullLoggingForCluster(); optimalAssignment.recordAssignmentFailure(replica, Maps.transformValues(hardConstraintFailures, this::convertFailureReasons)); return Optional.empty(); } LOG.info("Disabling hard constraint level logging for cluster: {}", clusterContext.getClusterName()); - removeClusterFromFullLogging(clusterContext.getClusterName()); + removeFullLoggingForCluster(); return candidateNodes.parallelStream().map(node -> new HashMap.SimpleEntry<>(node, getAssignmentNormalizedScore(node, replica, clusterContext))) @@ -192,24 +187,21 @@ private List convertFailureReasons(List hardConstraints) } /** - * Adds cluster name for full logging in all hard constraints - * @param clusterName cluster to be added + * Enables logging of failures in all hard constraints */ - private void enableFullLoggingForCluster(String clusterName) { - logEnabledClusters.add(clusterName); + private void enableFullLoggingForCluster() { + for (HardConstraint hardConstraint : _hardConstraints) { + hardConstraint.setEnableLogging(true); + } } /** * Removes the cluster from full logging in all hard constraints (if added previously) - * @param clusterName cluster to be removed */ - private void removeClusterFromFullLogging(String clusterName) { - logEnabledClusters.remove(clusterName); - } - - @VisibleForTesting - Set getLogEnabledClusters() { - return logEnabledClusters; + private void removeFullLoggingForCluster() { + for (HardConstraint hardConstraint : _hardConstraints) { + hardConstraint.setEnableLogging(false); + } } private static class AssignableReplicaWithScore implements Comparable { diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java index 08e3792880..459e358533 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/FaultZoneAwareConstraint.java @@ -42,7 +42,7 @@ boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, clusterContext.getPartitionsForResourceAndFaultZone(replica.getResourceName(), node.getFaultZone()); if (partitionsForResourceAndFaultZone.contains(replica.getPartitionName())) { - if (isLoggingEnabled(clusterContext.getClusterName())) { + if (enableLogging) { LOG.info("A fault zone cannot contain more than 1 replica of same partition. Found replica for partition: {}", replica.getPartitionName()); } diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java index c2f1d09454..281adae347 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/HardConstraint.java @@ -19,8 +19,6 @@ * under the License. */ -import java.util.Set; - import org.apache.helix.controller.rebalancer.waged.model.AssignableNode; import org.apache.helix.controller.rebalancer.waged.model.AssignableReplica; import org.apache.helix.controller.rebalancer.waged.model.ClusterContext; @@ -31,7 +29,7 @@ */ abstract class HardConstraint { - protected Set logEnabledClusters; + protected boolean enableLogging = false; /** * Check if the replica could be assigned to the node @@ -50,19 +48,10 @@ String getDescription() { } /** - * Check if the logging is enabled for the replica - * @param clusterName The name of the cluster to be checked - */ - public boolean isLoggingEnabled(String clusterName) { - return logEnabledClusters != null && logEnabledClusters.contains(clusterName); - } - - /** - * Set the reference of the replicas that need to be logged. - * @param logEnabledClusters The clusters that need to be logged + * Sets the flag to enable constraint level logging */ - public void setLogEnabledClusters(Set logEnabledClusters) { - this.logEnabledClusters = logEnabledClusters; + public void setEnableLogging(boolean enableLogging) { + this.enableLogging = enableLogging; } } diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java index 4e0537fe45..a686037de8 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeCapacityConstraint.java @@ -40,7 +40,7 @@ boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, for (String key : replicaCapacity.keySet()) { if (nodeCapacity.containsKey(key)) { if (nodeCapacity.get(key) < replicaCapacity.get(key)) { - if (isLoggingEnabled(clusterContext.getClusterName())) { + if (enableLogging) { LOG.info("Node has insufficient capacity for: {}. Left available: {}, Required: {}", key, nodeCapacity.get(key), replicaCapacity.get(key)); } diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java index e15ca0dbf7..08b2ee19ed 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/NodeMaxPartitionLimitConstraint.java @@ -48,7 +48,7 @@ boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, || assignedPartitionsByResourceSize < resourceMaxPartitionsPerInstance; if (!exceedResourceMaxPartitionLimit) { - if (isLoggingEnabled(clusterContext.getClusterName())) { + if (enableLogging) { LOG.info("Cannot exceed the max number of partitions per resource ({}) limitation on node. Assigned replica count: {}", resourceMaxPartitionsPerInstance, assignedPartitionsByResourceSize); } diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java index d6fcfd2c41..efee2a219d 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ReplicaActivateConstraint.java @@ -38,7 +38,7 @@ boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, List disabledPartitions = node.getDisabledPartitionsMap().get(replica.getResourceName()); if (disabledPartitions != null && disabledPartitions.contains(replica.getPartitionName())) { - if (isLoggingEnabled(clusterContext.getClusterName())) { + if (enableLogging) { LOG.info("Cannot assign the inactive replica: {}", replica.getPartitionName()); } return false; diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java index a8f69ffff1..3dd7c65bb5 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SamePartitionOnInstanceConstraint.java @@ -37,7 +37,7 @@ boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, Set assignedPartitionsByResource = node.getAssignedPartitionsByResource(replica.getResourceName()); if (assignedPartitionsByResource.contains(replica.getPartitionName())) { - if (isLoggingEnabled(clusterContext.getClusterName())) { + if (enableLogging) { LOG.info("Same partition ({}) of different states cannot co-exist in one instance", replica.getPartitionName()); } return false; diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java index 91cb5d3d23..d0609e0059 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ValidGroupTagConstraint.java @@ -38,7 +38,7 @@ boolean isAssignmentValid(AssignableNode node, AssignableReplica replica, } if (!node.getInstanceTags().contains(replica.getResourceInstanceGroupTag())) { - if (isLoggingEnabled(clusterContext.getClusterName())) { + if (enableLogging) { LOG.info("Instance doesn't have the tag of the replica ({})", replica.getResourceInstanceGroupTag()); } return false; diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java index cdaf148bcf..1b7b1b9443 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestConstraintBasedAlgorithm.java @@ -34,7 +34,9 @@ import org.testng.Assert; import org.testng.annotations.Test; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Matchers.any; +import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; @@ -60,8 +62,7 @@ public void testCalculateNoValidAssignment() throws IOException { Assert.assertEquals(ex.getFailureType(), HelixRebalanceException.Type.FAILED_TO_CALCULATE); } - Assert.assertEquals(algorithm.getLogEnabledClusters().size(), 1); - Assert.assertTrue(algorithm.getLogEnabledClusters().contains(clusterModel.getContext().getClusterName())); + verify(mockHardConstraint, times(1)).setEnableLogging(eq(true)); verify(mockHardConstraint, times(1)).isAssignmentValid(any(), any(), any()); } @@ -83,13 +84,12 @@ public void testCalculateNoValidAssignmentFirstAndThenRecovery() throws IOExcept Assert.assertEquals(ex.getFailureType(), HelixRebalanceException.Type.FAILED_TO_CALCULATE); } - Assert.assertEquals(algorithm.getLogEnabledClusters().size(), 1); - Assert.assertTrue(algorithm.getLogEnabledClusters().contains(clusterModel.getContext().getClusterName())); + verify(mockHardConstraint, times(1)).setEnableLogging(eq(true)); verify(mockHardConstraint, times(1)).isAssignmentValid(any(), any(), any()); // calling again for recovery (no exception) algorithm.calculate(clusterModel); - Assert.assertEquals(algorithm.getLogEnabledClusters().size(), 0); + verify(mockHardConstraint, atLeastOnce()).setEnableLogging(eq(false)); } @Test