diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java index b03cead27e79..dc0ed1bf64f6 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto; import org.apache.hadoop.hdds.scm.DatanodeAdminError; import org.apache.hadoop.hdds.scm.container.ContainerReplicaInfo; import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport; @@ -357,6 +358,8 @@ StartContainerBalancerResponseProto startContainerBalancer( */ boolean getContainerBalancerStatus() throws IOException; + ContainerBalancerStatusInfoResponseProto getContainerBalancerStatusInfo() throws IOException; + /** * returns the list of ratis peer roles. Currently only include peer address. */ diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java index b587cc924b06..fc130942ef60 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.Type; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto; import org.apache.hadoop.hdds.scm.DatanodeAdminError; import org.apache.hadoop.hdds.scm.ScmConfig; import org.apache.hadoop.hdds.scm.ScmInfo; @@ -409,6 +410,8 @@ StartContainerBalancerResponseProto startContainerBalancer( */ boolean getContainerBalancerStatus() throws IOException; + ContainerBalancerStatusInfoResponseProto getContainerBalancerStatusInfo() throws IOException; + /** * Get Datanode usage information by ip or hostname or uuid. * diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java index 330cfae30b2f..8ddacb0743f5 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java @@ -96,6 +96,8 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopContainerBalancerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ResetDeletedBlockRetryCountRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.Type; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoRequestProto; import org.apache.hadoop.hdds.scm.DatanodeAdminError; import org.apache.hadoop.hdds.scm.ScmInfo; import org.apache.hadoop.hdds.scm.container.ContainerID; @@ -942,6 +944,19 @@ public boolean getContainerBalancerStatus() throws IOException { } + @Override + public ContainerBalancerStatusInfoResponseProto getContainerBalancerStatusInfo() throws IOException { + + ContainerBalancerStatusInfoRequestProto request = + ContainerBalancerStatusInfoRequestProto.getDefaultInstance(); + ContainerBalancerStatusInfoResponseProto response = + submitRequest(Type.GetContainerBalancerStatusInfo, + builder -> builder.setContainerBalancerStatusInfoRequest(request)) + .getContainerBalancerStatusInfoResponse(); + return response; + + } + /** * Builds request for datanode usage information and receives response. * diff --git a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto index 49e71d2fe69a..943afee180df 100644 --- a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto +++ b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto @@ -81,6 +81,7 @@ message ScmContainerLocationRequest { optional TransferLeadershipRequestProto transferScmLeadershipRequest = 42; optional GetFailedDeletedBlocksTxnRequestProto getFailedDeletedBlocksTxnRequest = 43; optional DecommissionScmRequestProto decommissionScmRequest = 44; + optional ContainerBalancerStatusInfoRequestProto containerBalancerStatusInfoRequest = 48; } message ScmContainerLocationResponse { @@ -133,6 +134,7 @@ message ScmContainerLocationResponse { optional TransferLeadershipResponseProto transferScmLeadershipResponse = 42; optional GetFailedDeletedBlocksTxnResponseProto getFailedDeletedBlocksTxnResponse = 43; optional DecommissionScmResponseProto decommissionScmResponse = 44; + optional ContainerBalancerStatusInfoResponseProto containerBalancerStatusInfoResponse = 48; enum Status { OK = 1; @@ -184,6 +186,7 @@ enum Type { TransferLeadership = 38; GetFailedDeletedBlocksTransaction = 39; DecommissionScm = 40; + GetContainerBalancerStatusInfo = 44; } /** @@ -582,6 +585,38 @@ message ContainerBalancerStatusResponseProto { required bool isRunning = 1; } +message ContainerBalancerStatusInfoRequestProto { + optional string traceID = 1; +} + +message ContainerBalancerStatusInfoResponseProto { + optional bool isRunning = 1; + optional ContainerBalancerStatusInfo containerBalancerStatusInfo = 2; +} +message ContainerBalancerStatusInfo { + optional uint64 startedAt = 1; + optional ContainerBalancerConfigurationProto configuration = 2; + repeated ContainerBalancerTaskIterationStatusInfo iterationsStatusInfo = 3; +} + +message ContainerBalancerTaskIterationStatusInfo { + optional int32 iterationNumber = 1; + optional string iterationResult = 2; + optional int64 sizeScheduledForMoveGB = 3; + optional int64 dataSizeMovedGB = 4; + optional int64 containerMovesScheduled = 5; + optional int64 containerMovesCompleted = 6; + optional int64 containerMovesFailed = 7; + optional int64 containerMovesTimeout = 8; + repeated NodeTransferInfo sizeEnteringNodesGB = 9; + repeated NodeTransferInfo sizeLeavingNodesGB = 10; +} + +message NodeTransferInfo { + optional string uuid = 1; + optional int64 dataVolumeGB = 2; +} + message DecommissionScmRequestProto { required string scmId = 1; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/AbstractFindTargetGreedy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/AbstractFindTargetGreedy.java index 660452b2d8b0..7293272d1e89 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/AbstractFindTargetGreedy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/AbstractFindTargetGreedy.java @@ -278,4 +278,8 @@ NodeManager getNodeManager() { return nodeManager; } + @Override + public Map getSizeEnteringNodes() { + return sizeEnteringNode; + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java index 7b5cbe9f21fc..427ab445cc9a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java @@ -31,6 +31,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.time.OffsetDateTime; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.ReentrantLock; @@ -53,6 +54,7 @@ public class ContainerBalancer extends StatefulService { private volatile Thread currentBalancingThread; private volatile ContainerBalancerTask task = null; private ReentrantLock lock; + private OffsetDateTime startedAt; /** * Constructs ContainerBalancer with the specified arguments. Initializes @@ -175,6 +177,24 @@ public ContainerBalancerTask.Status getBalancerStatus() { : ContainerBalancerTask.Status.STOPPED; } + /** + * Get balancer status info. + * + * @return balancer status info if balancer started + */ + public ContainerBalancerStatusInfo getBalancerStatusInfo() throws IOException { + if (isBalancerRunning()) { + ContainerBalancerConfigurationProto configProto = readConfiguration(ContainerBalancerConfigurationProto.class); + return new ContainerBalancerStatusInfo( + this.startedAt, + configProto, + task.getCurrentIterationsStatistic() + ); + } else { + return null; + } + + } /** * Checks if ContainerBalancer is in valid state to call stop. * @@ -204,6 +224,7 @@ public String getServiceName() { @Override public void start() throws IllegalContainerBalancerStateException, InvalidContainerBalancerConfigurationException { + startedAt = OffsetDateTime.now(); lock.lock(); try { // should be leader-ready, out of safe mode, and not running already @@ -251,6 +272,7 @@ public void start() throws IllegalContainerBalancerStateException, public void startBalancer(ContainerBalancerConfiguration configuration) throws IllegalContainerBalancerStateException, InvalidContainerBalancerConfigurationException, IOException { + startedAt = OffsetDateTime.now(); lock.lock(); try { // validates state, config, and then saves config diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerConfiguration.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerConfiguration.java index 8f9332e2d3ca..a316efd18291 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerConfiguration.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerConfiguration.java @@ -449,7 +449,7 @@ public String toString() { excludeNodes.equals("") ? "None" : excludeNodes); } - ContainerBalancerConfigurationProto.Builder toProtobufBuilder() { + public ContainerBalancerConfigurationProto.Builder toProtobufBuilder() { ContainerBalancerConfigurationProto.Builder builder = ContainerBalancerConfigurationProto.newBuilder(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java index 09558d3a6d4f..6446089db353 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java @@ -131,6 +131,11 @@ void incrementNumContainerMovesScheduledInLatestIteration(long valueToAdd) { this.numContainerMovesScheduledInLatestIteration.incr(valueToAdd); } + public void resetNumContainerMovesScheduledInLatestIteration() { + numContainerMovesScheduledInLatestIteration.incr( + -getNumContainerMovesScheduledInLatestIteration()); + } + /** * Gets the amount of data moved by Container Balancer in the latest * iteration. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerStatusInfo.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerStatusInfo.java new file mode 100644 index 000000000000..cbe8385e53a7 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerStatusInfo.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.balancer; + +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; + +import java.time.OffsetDateTime; +import java.util.List; + +/** + * Info about balancer status. + */ +public class ContainerBalancerStatusInfo { + private final OffsetDateTime startedAt; + private final HddsProtos.ContainerBalancerConfigurationProto configuration; + private final List iterationsStatusInfo; + + public ContainerBalancerStatusInfo( + OffsetDateTime startedAt, + HddsProtos.ContainerBalancerConfigurationProto configuration, + List iterationsStatusInfo) { + this.startedAt = startedAt; + this.configuration = configuration; + this.iterationsStatusInfo = iterationsStatusInfo; + } + + public OffsetDateTime getStartedAt() { + return startedAt; + } + + public HddsProtos.ContainerBalancerConfigurationProto getConfiguration() { + return configuration; + } + + public List getIterationsStatusInfo() { + return iterationsStatusInfo; + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerTask.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerTask.java index abbc50ac86a5..5298343e3378 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerTask.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerTask.java @@ -51,11 +51,13 @@ import java.util.List; import java.util.Map; import java.util.NavigableSet; +import java.util.Objects; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.stream.Collectors; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_NODE_REPORT_INTERVAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_NODE_REPORT_INTERVAL_DEFAULT; @@ -119,6 +121,7 @@ public class ContainerBalancerTask implements Runnable { private IterationResult iterationResult; private int nextIterationIndex; private boolean delayStart; + private List iterationsStatistic; /** * Constructs ContainerBalancerTask with the specified arguments. @@ -160,6 +163,7 @@ public ContainerBalancerTask(StorageContainerManager scm, this.selectedSources = new HashSet<>(); this.selectedTargets = new HashSet<>(); findSourceStrategy = new FindSourceGreedy(nodeManager); + this.iterationsStatistic = new ArrayList<>(); } /** @@ -255,7 +259,9 @@ private void balance() { } IterationResult iR = doIteration(); + saveIterationStatistic(i, iR); metrics.incrementNumIterations(1); + LOG.info("Result of this iteration of Container Balancer: {}", iR); // if no new move option is generated, it means the cluster cannot be @@ -297,6 +303,85 @@ private void balance() { tryStopWithSaveConfiguration("Completed all iterations."); } + private void saveIterationStatistic(Integer iterationNumber, IterationResult iR) { + ContainerBalancerTaskIterationStatusInfo iterationStatistic = new ContainerBalancerTaskIterationStatusInfo( + iterationNumber, + iR.name(), + getSizeScheduledForMoveInLatestIteration() / OzoneConsts.GB, + metrics.getDataSizeMovedGBInLatestIteration(), + metrics.getNumContainerMovesScheduledInLatestIteration(), + metrics.getNumContainerMovesCompletedInLatestIteration(), + metrics.getNumContainerMovesFailedInLatestIteration(), + metrics.getNumContainerMovesTimeoutInLatestIteration(), + findTargetStrategy.getSizeEnteringNodes() + .entrySet() + .stream() + .filter(Objects::nonNull) + .filter(datanodeDetailsLongEntry -> datanodeDetailsLongEntry.getValue() > 0) + .collect( + Collectors.toMap( + entry -> entry.getKey().getUuid(), + entry -> entry.getValue() / OzoneConsts.GB + ) + ), + findSourceStrategy.getSizeLeavingNodes() + .entrySet() + .stream() + .filter(Objects::nonNull) + .filter(datanodeDetailsLongEntry -> datanodeDetailsLongEntry.getValue() > 0) + .collect( + Collectors.toMap( + entry -> entry.getKey().getUuid(), + entry -> entry.getValue() / OzoneConsts.GB + ) + ) + ); + iterationsStatistic.add(iterationStatistic); + } + + public List getCurrentIterationsStatistic() { + + int lastIterationNumber = iterationsStatistic.stream() + .mapToInt(ContainerBalancerTaskIterationStatusInfo::getIterationNumber) + .max() + .orElse(0); + + ContainerBalancerTaskIterationStatusInfo currentIterationStatistic = new ContainerBalancerTaskIterationStatusInfo( + lastIterationNumber + 1, + null, + getSizeScheduledForMoveInLatestIteration() / OzoneConsts.GB, + sizeActuallyMovedInLatestIteration / OzoneConsts.GB, + metrics.getNumContainerMovesScheduledInLatestIteration(), + metrics.getNumContainerMovesCompletedInLatestIteration(), + metrics.getNumContainerMovesFailedInLatestIteration(), + metrics.getNumContainerMovesTimeoutInLatestIteration(), + findTargetStrategy.getSizeEnteringNodes() + .entrySet() + .stream() + .filter(Objects::nonNull) + .filter(datanodeDetailsLongEntry -> datanodeDetailsLongEntry.getValue() > 0) + .collect(Collectors.toMap( + entry -> entry.getKey().getUuid(), + entry -> entry.getValue() / OzoneConsts.GB + ) + ), + findSourceStrategy.getSizeLeavingNodes() + .entrySet() + .stream() + .filter(Objects::nonNull) + .filter(datanodeDetailsLongEntry -> datanodeDetailsLongEntry.getValue() > 0) + .collect( + Collectors.toMap( + entry -> entry.getKey().getUuid(), + entry -> entry.getValue() / OzoneConsts.GB + ) + ) + ); + List resultList = new ArrayList<>(iterationsStatistic); + resultList.add(currentIterationStatistic); + return resultList; + } + /** * Logs the reason for stop and save configuration and stop the task. * @@ -1033,6 +1118,7 @@ private void resetState() { this.sizeScheduledForMoveInLatestIteration = 0; this.sizeActuallyMovedInLatestIteration = 0; metrics.resetDataSizeMovedGBInLatestIteration(); + metrics.resetNumContainerMovesScheduledInLatestIteration(); metrics.resetNumContainerMovesCompletedInLatestIteration(); metrics.resetNumContainerMovesTimeoutInLatestIteration(); metrics.resetNumDatanodesInvolvedInLatestIteration(); @@ -1105,6 +1191,10 @@ IterationResult getIterationResult() { return iterationResult; } + ContainerBalancerConfiguration getConfig() { + return config; + } + @VisibleForTesting void setConfig(ContainerBalancerConfiguration config) { this.config = config; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerTaskIterationStatusInfo.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerTaskIterationStatusInfo.java new file mode 100644 index 000000000000..1d597b0ca273 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerTaskIterationStatusInfo.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.balancer; + +import java.util.Map; +import java.util.UUID; + +/** + * Information about balancer task iteration. + */ +public class ContainerBalancerTaskIterationStatusInfo { + private final Integer iterationNumber; + private final String iterationResult; + private final long sizeScheduledForMoveGB; + private final long dataSizeMovedGB; + private final long containerMovesScheduled; + private final long containerMovesCompleted; + private final long containerMovesFailed; + private final long containerMovesTimeout; + private final Map sizeEnteringNodesGB; + private final Map sizeLeavingNodesGB; + + @SuppressWarnings("checkstyle:ParameterNumber") + public ContainerBalancerTaskIterationStatusInfo( + Integer iterationNumber, + String iterationResult, + long sizeScheduledForMoveGB, + long dataSizeMovedGB, + long containerMovesScheduled, + long containerMovesCompleted, + long containerMovesFailed, + long containerMovesTimeout, + Map sizeEnteringNodesGB, + Map sizeLeavingNodesGB) { + this.iterationNumber = iterationNumber; + this.iterationResult = iterationResult; + this.sizeScheduledForMoveGB = sizeScheduledForMoveGB; + this.dataSizeMovedGB = dataSizeMovedGB; + this.containerMovesScheduled = containerMovesScheduled; + this.containerMovesCompleted = containerMovesCompleted; + this.containerMovesFailed = containerMovesFailed; + this.containerMovesTimeout = containerMovesTimeout; + this.sizeEnteringNodesGB = sizeEnteringNodesGB; + this.sizeLeavingNodesGB = sizeLeavingNodesGB; + } + + public Integer getIterationNumber() { + return iterationNumber; + } + + public String getIterationResult() { + return iterationResult; + } + + public long getSizeScheduledForMoveGB() { + return sizeScheduledForMoveGB; + } + + public long getDataSizeMovedGB() { + return dataSizeMovedGB; + } + + public long getContainerMovesScheduled() { + return containerMovesScheduled; + } + + public long getContainerMovesCompleted() { + return containerMovesCompleted; + } + + public long getContainerMovesFailed() { + return containerMovesFailed; + } + + public long getContainerMovesTimeout() { + return containerMovesTimeout; + } + + public Map getSizeEnteringNodesGB() { + return sizeEnteringNodesGB; + } + + public Map getSizeLeavingNodesGB() { + return sizeLeavingNodesGB; + } +} + + diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindSourceGreedy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindSourceGreedy.java index 4f5868f2456e..0b2e0952d53d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindSourceGreedy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindSourceGreedy.java @@ -190,4 +190,9 @@ public void reInitialize(List potentialDataNodes, sizeLeavingNode.clear(); resetSources(potentialDataNodes); } + + @Override + public Map getSizeLeavingNodes() { + return sizeLeavingNode; + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindSourceStrategy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindSourceStrategy.java index e27163048390..315be3405166 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindSourceStrategy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindSourceStrategy.java @@ -24,6 +24,7 @@ import javax.annotation.Nonnull; import java.util.Collection; import java.util.List; +import java.util.Map; /** * This interface can be used to implement strategies to get a @@ -75,4 +76,6 @@ void reInitialize(List potentialDataNodes, * {@link DatanodeDetails} that containers can move from */ void resetPotentialSources(@Nonnull Collection sources); + + Map getSizeLeavingNodes(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindTargetStrategy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindTargetStrategy.java index 17f6aa329dcb..9fc673a4c64a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindTargetStrategy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindTargetStrategy.java @@ -26,6 +26,7 @@ import java.util.Collection; import java.util.List; import java.util.Set; +import java.util.Map; /** * This interface can be used to implement strategies to find a target for a @@ -69,4 +70,6 @@ void reInitialize(List potentialDataNodes, * that containers can be moved to */ void resetPotentialTargets(@Nonnull Collection targets); + + Map getSizeEnteringNodes(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java index 7738d0e3907e..e8aaffd824f1 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ActivatePipelineResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ClosePipelineRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ClosePipelineResponseProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerRequestProto; @@ -584,6 +585,13 @@ public ScmContainerLocationResponse processRequest( .setContainerBalancerStatusResponse(getContainerBalancerStatus( request.getContainerBalancerStatusRequest())) .build(); + case GetContainerBalancerStatusInfo: + return ScmContainerLocationResponse.newBuilder() + .setCmdType(request.getCmdType()) + .setStatus(Status.OK) + .setContainerBalancerStatusInfoResponse(getContainerBalancerStatusInfo( + request.getContainerBalancerStatusInfoRequest())) + .build(); case GetPipeline: return ScmContainerLocationResponse.newBuilder() .setCmdType(request.getCmdType()) @@ -1124,6 +1132,12 @@ public ContainerBalancerStatusResponseProto getContainerBalancerStatus( .setIsRunning(impl.getContainerBalancerStatus()).build(); } + public ContainerBalancerStatusInfoResponseProto getContainerBalancerStatusInfo( + StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoRequestProto request) + throws IOException { + return impl.getContainerBalancerStatusInfo(); + } + public DecommissionNodesResponseProto decommissionNodes( DecommissionNodesRequestProto request) throws IOException { List errors = diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index f9fe52fb1cba..c7e5b857f695 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -34,11 +34,14 @@ import org.apache.hadoop.hdds.conf.ReconfigurationHandler; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.protocol.proto.ReconfigureProtocolProtos.ReconfigureProtocolService; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo; +import org.apache.hadoop.hdds.protocol.proto.ReconfigureProtocolProtos.ReconfigureProtocolService; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfo; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto.Builder; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.NodeTransferInfo; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto; import org.apache.hadoop.hdds.protocolPB.ReconfigureProtocolPB; import org.apache.hadoop.hdds.protocolPB.ReconfigureProtocolServerSideTranslatorPB; @@ -49,13 +52,14 @@ import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; import org.apache.hadoop.hdds.scm.container.ContainerReplica; -import org.apache.hadoop.hdds.scm.container.common.helpers.DeletedBlocksTransactionInfoWrapper; import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport; import org.apache.hadoop.hdds.scm.container.balancer.ContainerBalancer; import org.apache.hadoop.hdds.scm.container.balancer.ContainerBalancerConfiguration; +import org.apache.hadoop.hdds.scm.container.balancer.ContainerBalancerStatusInfo; import org.apache.hadoop.hdds.scm.container.balancer.IllegalContainerBalancerStateException; import org.apache.hadoop.hdds.scm.container.balancer.InvalidContainerBalancerConfigurationException; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; +import org.apache.hadoop.hdds.scm.container.common.helpers.DeletedBlocksTransactionInfoWrapper; import org.apache.hadoop.hdds.scm.events.SCMEvents; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes; @@ -1111,6 +1115,67 @@ public boolean getContainerBalancerStatus() { return scm.getContainerBalancer().isBalancerRunning(); } + @Override + public ContainerBalancerStatusInfoResponseProto getContainerBalancerStatusInfo() throws IOException { + AUDIT.logReadSuccess(buildAuditMessageForSuccess( + SCMAction.GET_CONTAINER_BALANCER_STATUS_INFO, null)); + ContainerBalancerStatusInfo balancerStatusInfo = scm.getContainerBalancer().getBalancerStatusInfo(); + if (balancerStatusInfo == null) { + return ContainerBalancerStatusInfoResponseProto + .newBuilder() + .setIsRunning(false) + .build(); + } else { + + return ContainerBalancerStatusInfoResponseProto + .newBuilder() + .setIsRunning(true) + .setContainerBalancerStatusInfo(StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfo + .newBuilder() + .setStartedAt(balancerStatusInfo.getStartedAt().toEpochSecond()) + .setConfiguration(balancerStatusInfo.getConfiguration()) + .addAllIterationsStatusInfo( + balancerStatusInfo.getIterationsStatusInfo() + .stream() + .map( + info -> ContainerBalancerTaskIterationStatusInfo.newBuilder() + .setIterationNumber(info.getIterationNumber()) + .setIterationResult(Optional.ofNullable(info.getIterationResult()).orElse("")) + .setSizeScheduledForMoveGB(info.getSizeScheduledForMoveGB()) + .setDataSizeMovedGB(info.getDataSizeMovedGB()) + .setContainerMovesScheduled(info.getContainerMovesScheduled()) + .setContainerMovesCompleted(info.getContainerMovesCompleted()) + .setContainerMovesFailed(info.getContainerMovesFailed()) + .setContainerMovesTimeout(info.getContainerMovesTimeout()) + .addAllSizeEnteringNodesGB( + info.getSizeEnteringNodesGB().entrySet() + .stream() + .map(entry -> NodeTransferInfo.newBuilder() + .setUuid(entry.getKey().toString()) + .setDataVolumeGB(entry.getValue()) + .build() + ) + .collect(Collectors.toList()) + ) + .addAllSizeLeavingNodesGB( + info.getSizeLeavingNodesGB().entrySet() + .stream() + .map(entry -> NodeTransferInfo.newBuilder() + .setUuid(entry.getKey().toString()) + .setDataVolumeGB(entry.getValue()) + .build() + ) + .collect(Collectors.toList()) + ) + .build() + ) + .collect(Collectors.toList()) + ) + ) + .build(); + } + } + /** * Get Datanode usage info such as capacity, SCMUsed, and remaining by ip * or hostname or uuid. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java index 4e1fe234ff01..128e3401c370 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java @@ -47,6 +47,7 @@ public enum SCMAction implements AuditAction { START_CONTAINER_BALANCER, STOP_CONTAINER_BALANCER, GET_CONTAINER_BALANCER_STATUS, + GET_CONTAINER_BALANCER_STATUS_INFO, GET_CONTAINER_WITH_PIPELINE_BATCH, ADD_SCM, GET_REPLICATION_MANAGER_REPORT, diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerStatusInfo.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerStatusInfo.java new file mode 100644 index 000000000000..b8ac648e8442 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerStatusInfo.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.balancer; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.OzoneConsts; +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Tests for {@link ContainerBalancerStatusInfo}. + */ +class TestContainerBalancerStatusInfo { + + @Test + void testGetIterationStatistics() { + MockedSCM mockedScm = new MockedSCM(new TestableCluster(20, OzoneConsts.GB)); + + ContainerBalancerConfiguration config = new OzoneConfiguration().getObject(ContainerBalancerConfiguration.class); + + config.setIterations(2); + config.setBalancingInterval(0); + config.setMaxSizeToMovePerIteration(50 * OzoneConsts.GB); + + ContainerBalancerTask task = mockedScm.startBalancerTask(config); + List iterationStatistics = task.getCurrentIterationsStatistic(); + assertEquals(3, iterationStatistics.size()); + iterationStatistics.forEach(is -> { + assertTrue(is.getContainerMovesCompleted() > 0); + assertEquals(0, is.getContainerMovesFailed()); + assertEquals(0, is.getContainerMovesTimeout()); + assertFalse(is.getSizeEnteringNodesGB().isEmpty()); + assertFalse(is.getSizeLeavingNodesGB().isEmpty()); + }); + + } +} diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerBalancerStatusSubcommand.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerBalancerStatusSubcommand.java index 44e4d4c9c50b..e58074bf140a 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerBalancerStatusSubcommand.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerBalancerStatusSubcommand.java @@ -18,10 +18,22 @@ package org.apache.hadoop.hdds.scm.cli; import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfo; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfo; import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.ozone.OzoneConsts; +import picocli.CommandLine; import picocli.CommandLine.Command; import java.io.IOException; +import java.time.Duration; +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.util.List; +import java.util.stream.Collectors; /** * Handler to query status of container balancer. @@ -33,13 +45,134 @@ versionProvider = HddsVersionProvider.class) public class ContainerBalancerStatusSubcommand extends ScmSubcommand { + @CommandLine.Option(names = {"-v", "--verbose"}, + description = "Verbose output. Show current iteration info.") + private boolean verbose; + + @CommandLine.Option(names = {"-H", "--history"}, + description = "Verbose output with history. Show current iteration info and history of iterations. " + + "Works only with -v.") + private boolean verboseWithHistory; + @Override public void execute(ScmClient scmClient) throws IOException { - boolean execReturn = scmClient.getContainerBalancerStatus(); - if (execReturn) { + ContainerBalancerStatusInfoResponseProto response = scmClient.getContainerBalancerStatusInfo(); + boolean isRunning = response.getIsRunning(); + ContainerBalancerStatusInfo balancerStatusInfo = response.getContainerBalancerStatusInfo(); + if (isRunning) { + LocalDateTime dateTime = + LocalDateTime.ofInstant(Instant.ofEpochSecond(balancerStatusInfo.getStartedAt()), ZoneId.systemDefault()); System.out.println("ContainerBalancer is Running."); + + if (verbose) { + System.out.printf("Started at: %s %s%n%n", dateTime.toLocalDate(), dateTime.toLocalTime()); + System.out.println(getConfigurationPrettyString(balancerStatusInfo.getConfiguration())); + List iterationsStatusInfoList + = balancerStatusInfo.getIterationsStatusInfoList(); + + System.out.println("Current iteration info:"); + System.out.println( + getPrettyIterationStatusInfo(iterationsStatusInfoList.get(iterationsStatusInfoList.size() - 1)) + ); + + if (verboseWithHistory) { + System.out.println("Iteration history list:"); + System.out.println( + iterationsStatusInfoList.stream().map(this::getPrettyIterationStatusInfo) + .collect(Collectors.joining("\n")) + ); + } + } + } else { System.out.println("ContainerBalancer is Not Running."); } } + + String getConfigurationPrettyString(HddsProtos.ContainerBalancerConfigurationProto configuration) { + return String.format("Container Balancer Configuration values:%n" + + "%-50s %s%n" + + "%-50s %s%n" + + "%-50s %d%n" + + "%-50s %dGB%n" + + "%-50s %dGB%n" + + "%-50s %dGB%n" + + "%-50s %d%n" + + "%-50s %dmin%n" + + "%-50s %dmin%n" + + "%-50s %dmin%n" + + "%-50s %s%n" + + "%-50s %s%n" + + "%-50s %s%n" + + "%-50s %s%n" + + "%-50s %s%n", "Key", "Value", "Threshold", + configuration.getUtilizationThreshold(), "Max Datanodes to Involve per Iteration(percent)", + configuration.getDatanodesInvolvedMaxPercentagePerIteration(), + "Max Size to Move per Iteration", + configuration.getDatanodesInvolvedMaxPercentagePerIteration() / OzoneConsts.GB, + "Max Size Entering Target per Iteration", + configuration.getSizeEnteringTargetMax() / OzoneConsts.GB, + "Max Size Leaving Source per Iteration", + configuration.getSizeLeavingSourceMax() / OzoneConsts.GB, + "Number of Iterations", + configuration.getIterations(), + "Time Limit for Single Container's Movement", + Duration.ofMillis(configuration.getMoveTimeout()).toMinutes(), + "Time Limit for Single Container's Replication", + Duration.ofMillis(configuration.getMoveReplicationTimeout()).toMinutes(), + "Interval between each Iteration", + Duration.ofMillis(configuration.getBalancingIterationInterval()).toMinutes(), + "Whether to Enable Network Topology", + configuration.getMoveNetworkTopologyEnable(), + "Whether to Trigger Refresh Datanode Usage Info", + configuration.getTriggerDuBeforeMoveEnable(), + "Container IDs to Exclude from Balancing", + configuration.getExcludeContainers().isEmpty() ? "None" : configuration.getExcludeContainers(), + "Datanodes Specified to be Balanced", + configuration.getIncludeDatanodes().isEmpty() ? "None" : configuration.getIncludeDatanodes(), + "Datanodes Excluded from Balancing", + configuration.getExcludeDatanodes().isEmpty() ? "None" : configuration.getExcludeDatanodes()); + } + + private String getPrettyIterationStatusInfo(ContainerBalancerTaskIterationStatusInfo iterationStatusInfo) { + int iterationNumber = iterationStatusInfo.getIterationNumber(); + String iterationResult = iterationStatusInfo.getIterationResult(); + long sizeScheduledForMove = iterationStatusInfo.getSizeScheduledForMoveGB(); + long dataSizeMovedGB = iterationStatusInfo.getDataSizeMovedGB(); + long containerMovesScheduled = iterationStatusInfo.getContainerMovesScheduled(); + long containerMovesCompleted = iterationStatusInfo.getContainerMovesCompleted(); + long containerMovesFailed = iterationStatusInfo.getContainerMovesFailed(); + long containerMovesTimeout = iterationStatusInfo.getContainerMovesTimeout(); + String enteringDataNodeList = iterationStatusInfo.getSizeEnteringNodesGBList() + .stream().map(nodeInfo -> nodeInfo.getUuid() + " <- " + nodeInfo.getDataVolumeGB() + "\n") + .collect(Collectors.joining()); + String leavingDataNodeList = iterationStatusInfo.getSizeLeavingNodesGBList() + .stream().map(nodeInfo -> nodeInfo.getUuid() + " -> " + nodeInfo.getDataVolumeGB() + "\n") + .collect(Collectors.joining()); + return String.format( + "%-50s %s%n" + + "%-50s %s%n" + + "%-50s %s%n" + + "%-50s %s%n" + + "%-50s %s%n" + + "%-50s %s%n" + + "%-50s %s%n" + + "%-50s %s%n" + + "%-50s %s%n" + + "%-50s %n%s" + + "%-50s %n%s", + "Key", "Value", + "Iteration number", iterationNumber, + "Iteration result", + iterationResult.isEmpty() ? "IN_PROGRESS" : iterationResult, + "Size scheduled to move", sizeScheduledForMove, + "Moved data size", dataSizeMovedGB, + "Scheduled to move containers", containerMovesScheduled, + "Already moved containers", containerMovesCompleted, + "Failed to move containers", containerMovesFailed, + "Failed to move containers by timeout", containerMovesTimeout, + "Entered data to nodes", enteringDataNodeList, + "Exited data from nodes", leavingDataNodeList); + } } + diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java index 03c09203c968..08b9d81c85aa 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto; import org.apache.hadoop.hdds.scm.DatanodeAdminError; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.XceiverClientManager; @@ -490,6 +491,11 @@ public boolean getContainerBalancerStatus() throws IOException { return storageContainerLocationClient.getContainerBalancerStatus(); } + @Override + public ContainerBalancerStatusInfoResponseProto getContainerBalancerStatusInfo() throws IOException { + return storageContainerLocationClient.getContainerBalancerStatusInfo(); + } + @Override public List getScmRatisRoles() throws IOException { return storageContainerLocationClient.getScmInfo().getRatisPeerRoles(); diff --git a/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestContainerBalancerSubCommand.java b/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestContainerBalancerSubCommand.java index 4d4e0194a585..992bd5b6762e 100644 --- a/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestContainerBalancerSubCommand.java +++ b/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestContainerBalancerSubCommand.java @@ -18,10 +18,13 @@ package org.apache.hadoop.hdds.scm.cli.datanode; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos; -import org.apache.hadoop.hdds.scm.cli.ContainerBalancerStopSubcommand; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfo; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto; import org.apache.hadoop.hdds.scm.cli.ContainerBalancerStartSubcommand; import org.apache.hadoop.hdds.scm.cli.ContainerBalancerStatusSubcommand; +import org.apache.hadoop.hdds.scm.cli.ContainerBalancerStopSubcommand; import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.container.balancer.ContainerBalancerConfiguration; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -32,6 +35,8 @@ import java.io.PrintStream; import java.io.UnsupportedEncodingException; import java.nio.charset.StandardCharsets; +import java.time.OffsetDateTime; +import java.util.Arrays; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -42,16 +47,16 @@ * Unit tests to validate the ContainerBalancerSubCommand class includes the * correct output when executed against a mock client. */ -public class TestContainerBalancerSubCommand { +class TestContainerBalancerSubCommand { - private ContainerBalancerStopSubcommand stopCmd; - private ContainerBalancerStartSubcommand startCmd; - private ContainerBalancerStatusSubcommand statusCmd; + private static final String DEFAULT_ENCODING = StandardCharsets.UTF_8.name(); private final ByteArrayOutputStream outContent = new ByteArrayOutputStream(); private final ByteArrayOutputStream errContent = new ByteArrayOutputStream(); private final PrintStream originalOut = System.out; private final PrintStream originalErr = System.err; - private static final String DEFAULT_ENCODING = StandardCharsets.UTF_8.name(); + private ContainerBalancerStopSubcommand stopCmd; + private ContainerBalancerStartSubcommand startCmd; + private ContainerBalancerStatusSubcommand statusCmd; @BeforeEach public void setup() throws UnsupportedEncodingException { @@ -69,16 +74,147 @@ public void tearDown() { } @Test - public void testContainerBalancerStatusSubcommandRunning() - throws IOException { + public void testContainerBalancerStatusInfoSubcommandRunning() + throws IOException { ScmClient scmClient = mock(ScmClient.class); + ContainerBalancerConfiguration config = new ContainerBalancerConfiguration(); + config.setThreshold(10); + config.setMaxDatanodesPercentageToInvolvePerIteration(20); + config.setMaxSizeToMovePerIteration(53687091200L); + config.setMaxSizeEnteringTarget(27917287424L); + config.setMaxSizeLeavingSource(27917287424L); + config.setIterations(2); + config.setExcludeNodes(""); + config.setMoveTimeout(3900000); + config.setMoveReplicationTimeout(3000000); + config.setBalancingInterval(0); + config.setIncludeNodes(""); + config.setExcludeNodes(""); + config.setNetworkTopologyEnable(false); + config.setTriggerDuEnable(false); + + StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfo iteration0StatusInfo = + StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfo.newBuilder() + .setIterationNumber(0) + .setIterationResult("ITERATION_COMPLETED") + .setSizeScheduledForMoveGB(48) + .setDataSizeMovedGB(48) + .setContainerMovesScheduled(11) + .setContainerMovesCompleted(11) + .setContainerMovesFailed(0) + .setContainerMovesTimeout(0) + .addSizeEnteringNodesGB( + StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .setUuid("80f6bc27-e6f3-493e-b1f4-25f810ad960d") + .setDataVolumeGB(27) + .build() + ) + .addSizeEnteringNodesGB( + StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .setUuid("701ca98e-aa1a-4b36-b817-e28ed634bba6") + .setDataVolumeGB(23L) + .build() + ) + .addSizeLeavingNodesGB( + StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .setUuid("b8b9c511-c30f-4933-8938-2f272e307070") + .setDataVolumeGB(24L) + .build() + ) + .addSizeLeavingNodesGB( + StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .setUuid("7bd99815-47e7-4015-bc61-ca6ef6dfd130") + .setDataVolumeGB(26L) + .build() + ) + .build(); + StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfo iteration1StatusInfo = + StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfo.newBuilder() + .setIterationNumber(1) + .setIterationResult("ITERATION_COMPLETED") + .setSizeScheduledForMoveGB(48) + .setDataSizeMovedGB(48) + .setContainerMovesScheduled(11) + .setContainerMovesCompleted(11) + .setContainerMovesFailed(0) + .setContainerMovesTimeout(0) + .addSizeEnteringNodesGB( + StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .setUuid("80f6bc27-e6f3-493e-b1f4-25f810ad960d") + .setDataVolumeGB(27L) + .build() + ) + .addSizeEnteringNodesGB( + StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .setUuid("701ca98e-aa1a-4b36-b817-e28ed634bba6") + .setDataVolumeGB(23L) + .build() + ) + .addSizeLeavingNodesGB( + StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .setUuid("b8b9c511-c30f-4933-8938-2f272e307070") + .setDataVolumeGB(24L) + .build() + ) + .addSizeLeavingNodesGB( + StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .setUuid("7bd99815-47e7-4015-bc61-ca6ef6dfd130") + .setDataVolumeGB(26L) + .build() + ) + .build(); + StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfo iteration2StatusInfo = + StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfo.newBuilder() + .setIterationNumber(1) + .setIterationResult("") + .setSizeScheduledForMoveGB(48) + .setDataSizeMovedGB(48) + .setContainerMovesScheduled(11) + .setContainerMovesCompleted(11) + .setContainerMovesFailed(0) + .setContainerMovesTimeout(0) + .addSizeEnteringNodesGB( + StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .setUuid("80f6bc27-e6f3-493e-b1f4-25f810ad960d") + .setDataVolumeGB(27L) + .build() + ) + .addSizeEnteringNodesGB( + StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .setUuid("701ca98e-aa1a-4b36-b817-e28ed634bba6") + .setDataVolumeGB(23L) + .build() + ) + .addSizeLeavingNodesGB( + StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .setUuid("b8b9c511-c30f-4933-8938-2f272e307070") + .setDataVolumeGB(24L) + .build() + ) + .addSizeLeavingNodesGB( + StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .setUuid("7bd99815-47e7-4015-bc61-ca6ef6dfd130") + .setDataVolumeGB(26L) + .build() + ) + .build(); + ContainerBalancerStatusInfoResponseProto statusInfoResponseProto = + ContainerBalancerStatusInfoResponseProto.newBuilder() + .setIsRunning(true) + .setContainerBalancerStatusInfo(ContainerBalancerStatusInfo.newBuilder() + .setStartedAt(OffsetDateTime.now().toEpochSecond()) + .setConfiguration(config.toProtobufBuilder().setShouldRun(true)) + .addAllIterationsStatusInfo( + Arrays.asList(iteration0StatusInfo, iteration1StatusInfo, iteration2StatusInfo) + ) + ) + + .build(); //test status is running - Mockito.when(scmClient.getContainerBalancerStatus()) - .thenAnswer(invocation -> true); + Mockito.when(scmClient.getContainerBalancerStatusInfo()).thenReturn(statusInfoResponseProto); statusCmd.execute(scmClient); - Pattern p = Pattern.compile( "^ContainerBalancer\\sis\\sRunning."); Matcher m = p.matcher(outContent.toString(DEFAULT_ENCODING)); @@ -86,12 +222,32 @@ public void testContainerBalancerStatusSubcommandRunning() } @Test - public void testContainerBalancerStatusSubcommandNotRunning() - throws IOException { + public void testContainerBalancerStatusInfoSubcommandRunningOnStoppedBalancer() + throws IOException { ScmClient scmClient = mock(ScmClient.class); - Mockito.when(scmClient.getContainerBalancerStatus()) - .thenAnswer(invocation -> false); + //test status is running + Mockito.when(scmClient.getContainerBalancerStatusInfo()).thenReturn( + ContainerBalancerStatusInfoResponseProto.newBuilder() + .setIsRunning(false) + .build()); + + statusCmd.execute(scmClient); + Pattern p = Pattern.compile( + "^ContainerBalancer\\sis\\sNot\\sRunning."); + Matcher m = p.matcher(outContent.toString(DEFAULT_ENCODING)); + assertTrue(m.find()); + } + + @Test + void testContainerBalancerStatusSubcommandNotRunning() + throws IOException { + ScmClient scmClient = mock(ScmClient.class); + + Mockito.when(scmClient.getContainerBalancerStatusInfo()).thenReturn( + ContainerBalancerStatusInfoResponseProto.newBuilder() + .setIsRunning(false) + .build()); statusCmd.execute(scmClient); @@ -102,7 +258,7 @@ public void testContainerBalancerStatusSubcommandNotRunning() } @Test - public void testContainerBalancerStopSubcommand() throws IOException { + public void testContainerBalancerStopSubcommand() throws IOException { ScmClient scmClient = mock(ScmClient.class); stopCmd.execute(scmClient); @@ -113,10 +269,10 @@ public void testContainerBalancerStopSubcommand() throws IOException { @Test public void testContainerBalancerStartSubcommandWhenBalancerIsNotRunning() - throws IOException { + throws IOException { ScmClient scmClient = mock(ScmClient.class); Mockito.when(scmClient.startContainerBalancer( - null, null, null, null, null, null)) + null, null, null, null, null, null)) .thenReturn( StorageContainerLocationProtocolProtos .StartContainerBalancerResponseProto.newBuilder() @@ -132,10 +288,10 @@ public void testContainerBalancerStartSubcommandWhenBalancerIsNotRunning() @Test public void testContainerBalancerStartSubcommandWhenBalancerIsRunning() - throws IOException { + throws IOException { ScmClient scmClient = mock(ScmClient.class); Mockito.when(scmClient.startContainerBalancer( - null, null, null, null, null, null)) + null, null, null, null, null, null)) .thenReturn(StorageContainerLocationProtocolProtos .StartContainerBalancerResponseProto.newBuilder() .setStart(false) diff --git a/hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot b/hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot index 6e2fb9d85a56..14070237f754 100644 --- a/hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot +++ b/hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot @@ -63,11 +63,43 @@ Datanode Recommission is Finished Run Container Balancer ${result} = Execute ozone admin containerbalancer start -t 1 -d 100 -i 1 Should Contain ${result} Container Balancer started successfully. + +Wait Finish Of Balancing ${result} = Execute ozone admin containerbalancer status Should Contain ${result} ContainerBalancer is Running. Wait Until Keyword Succeeds 3min 10sec ContainerBalancer is Not Running Sleep 60000ms +Verify Verbose Balancer Status + [arguments] ${output} + + Should Contain ${output} ContainerBalancer is Running. + Should Contain ${output} Started at: + Should Contain ${output} Container Balancer Configuration values: + +Verify Balancer Iteration + [arguments] ${output} ${number} ${status} ${containers} + + Should Contain ${output} Iteration number ${number} + Should Contain ${output} Iteration result ${status} + Should Contain ${output} Scheduled to move containers ${containers} + +Run Balancer Status + ${result} = Execute ozone admin containerbalancer status + Should Contain ${result} ContainerBalancer is Running. + +Run Balancer Verbose Status + ${result} = Execute ozone admin containerbalancer status -v + Verify Verbose Balancer Status ${result} + Verify Balancer Iteration ${result} 1 IN_PROGRESS 3 + Should Contain ${result} Current iteration info: + +Run Balancer Verbose History Status + ${result} = Execute ozone admin containerbalancer status -v --history + Verify Verbose Balancer Status ${result} + Verify Balancer Iteration ${result} 1 IN_PROGRESS 3 + Should Contain ${result} Iteration history list: + ContainerBalancer is Not Running ${result} = Execute ozone admin containerbalancer status Should contain ${result} ContainerBalancer is Not Running. @@ -132,6 +164,14 @@ Verify Container Balancer for RATIS containers Run Container Balancer + Run Balancer Status + + Run Balancer Verbose Status + + Run Balancer Verbose History Status + + Wait Finish Of Balancing + ${datanodeOzoneUsedBytesInfoAfterContainerBalancing} = Get Datanode Ozone Used Bytes Info ${uuid} Should Not Be Equal As Integers ${datanodeOzoneUsedBytesInfo} ${datanodeOzoneUsedBytesInfoAfterContainerBalancing} Should Be True ${datanodeOzoneUsedBytesInfoAfterContainerBalancing} < ${SIZE} * 3.5