diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java index ab296fc52bf..05ed833edbe 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java @@ -227,6 +227,19 @@ public int getHealthyVolumeCount() { } } + /** + * Returns count of failed volumes reported by the data node. + * @return count of failed volumes + */ + public int getFailedVolumeCount() { + try { + lock.readLock().lock(); + return failedVolumeCount; + } finally { + lock.readLock().unlock(); + } + } + /** * Returns count of healthy metadata volumes reported from datanode. * @return count of healthy metdata log volumes diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java index 3307a292dca..3c3ff8fb833 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java @@ -29,6 +29,7 @@ import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; import java.util.function.Predicate; +import java.util.stream.Collectors; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.protocol.DatanodeDetails; @@ -477,6 +478,38 @@ public List getDeadNodes() { return getNodes(null, NodeState.DEAD); } + /** + * Returns all nodes that are in the decommissioning state. + * @return list of decommissioning nodes + */ + public List getDecommissioningNodes() { + return getNodes(NodeOperationalState.DECOMMISSIONING, null); + } + + /** + * Returns the count of decommissioning nodes. + * @return decommissioning node count + */ + public int getDecommissioningNodeCount() { + return getDecommissioningNodes().size(); + } + + /** + * Returns all nodes that are in the entering maintenance state. + * @return list of entering maintenance nodes + */ + public List getEnteringMaintenanceNodes() { + return getNodes(NodeOperationalState.ENTERING_MAINTENANCE, null); + } + + /** + * Returns the count of entering maintenance nodes. + * @return entering maintenance node count + */ + public int getEnteringMaintenanceNodeCount() { + return getEnteringMaintenanceNodes().size(); + } + /** * Returns all the nodes with the specified status. * @@ -501,6 +534,25 @@ public List getNodes( return nodeStateMap.getDatanodeInfos(opState, health); } + /** + * Returns all nodes that contain failed volumes. + * @return list of nodes containing failed volumes + */ + public List getVolumeFailuresNodes() { + List allNodes = nodeStateMap.getAllDatanodeInfos(); + List failedVolumeNodes = allNodes.stream(). + filter(dn -> dn.getFailedVolumeCount() > 0).collect(Collectors.toList()); + return failedVolumeNodes; + } + + /** + * Returns the count of nodes containing the failed volume. + * @return failed volume node count + */ + public int getVolumeFailuresNodeCount() { + return getVolumeFailuresNodes().size(); + } + /** * Returns all the nodes which have registered to NodeStateManager. * diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java index 038f76b52e9..3339b27f2ce 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java @@ -1223,6 +1223,8 @@ public Map getNodeStatistics() { Map nodeStatistics = new HashMap<>(); // Statistics node usaged nodeUsageStatistics(nodeStatistics); + // Statistics node states + nodeStateStatistics(nodeStatistics); // todo: Statistics of other instances return nodeStatistics; } @@ -1265,6 +1267,19 @@ private void nodeUsageStatistics(Map nodeStatics) { nodeStatics.put(UsageStatics.STDEV.getLabel(), decimalFormat.format(dev)); } + private void nodeStateStatistics(Map nodeStatics) { + int healthyNodeCount = nodeStateManager.getHealthyNodeCount(); + int deadNodeCount = nodeStateManager.getDeadNodeCount(); + int decommissioningNodeCount = nodeStateManager.getDecommissioningNodeCount(); + int enteringMaintenanceNodeCount = nodeStateManager.getEnteringMaintenanceNodeCount(); + int volumeFailuresNodeCount = nodeStateManager.getVolumeFailuresNodeCount(); + nodeStatics.put(StateStatistics.HEALTHY.getLabel(), String.valueOf(healthyNodeCount)); + nodeStatics.put(StateStatistics.DEAD.getLabel(), String.valueOf(deadNodeCount)); + nodeStatics.put(StateStatistics.DECOMMISSIONING.getLabel(), String.valueOf(decommissioningNodeCount)); + nodeStatics.put(StateStatistics.ENTERING_MAINTENANCE.getLabel(), String.valueOf(enteringMaintenanceNodeCount)); + nodeStatics.put(StateStatistics.VOLUME_FAILURES.getLabel(), String.valueOf(volumeFailuresNodeCount)); + } + /** * Based on the current time and the last heartbeat, calculate the time difference * and get a string of the relative value. E.g. "2s ago", "1m 2s ago", etc. @@ -1346,6 +1361,21 @@ public String getLabel() { } } + private enum StateStatistics { + HEALTHY("Healthy"), + DEAD("Dead"), + DECOMMISSIONING("Decommissioning"), + ENTERING_MAINTENANCE("EnteringMaintenance"), + VOLUME_FAILURES("VolumeFailures"); + private String label; + public String getLabel() { + return label; + } + StateStatistics(String label) { + this.label = label; + } + } + /** * Returns the min of no healthy volumes reported out of the set * of datanodes constituting the pipeline. diff --git a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html index 67655b539f0..5a4f2ff633c 100644 --- a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html +++ b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html @@ -28,7 +28,7 @@

SCM Information

-

Statistics

+

Usage Statistics

@@ -54,6 +54,36 @@

Statistics

+

State Statistics

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
Datanode StateCount
Healthy Nodes{{statistics.nodes.state.healthy}}
Dead Nodes{{statistics.nodes.state.dead}}
Decommissioning Nodes{{statistics.nodes.state.decommissioning}}
Entering Maintenance Nodes{{statistics.nodes.state.enteringmaintenance}}
Volume Failures Nodes{{statistics.nodes.state.volumefailures}}
+

Node Status

diff --git a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js index 8ca9fb257c9..41dc25cb650 100644 --- a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js +++ b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js @@ -39,6 +39,13 @@ max : "N/A", median : "N/A", stdev : "N/A" + }, + state : { + healthy : "N/A", + dead : "N/A", + decommissioning : "N/A", + enteringmaintenance : "N/A", + volumefailures : "N/A" } } } @@ -92,15 +99,25 @@ $scope.lastIndex = Math.ceil(nodeStatusCopy.length / $scope.RecordsToDisplay); $scope.nodeStatus = nodeStatusCopy.slice(0, $scope.RecordsToDisplay); - ctrl.nodemanagermetrics.NodeStatistics.forEach(function(obj) { - if(obj.key == "Min") { - $scope.statistics.nodes.usages.min = obj.value; - } else if(obj.key == "Max") { - $scope.statistics.nodes.usages.max = obj.value; - } else if(obj.key == "Median") { - $scope.statistics.nodes.usages.median = obj.value; - } else if(obj.key == "Stdev") { - $scope.statistics.nodes.usages.stdev = obj.value; + ctrl.nodemanagermetrics.NodeStatistics.forEach(({key, value}) => { + if(key == "Min") { + $scope.statistics.nodes.usages.min = value; + } else if(key == "Max") { + $scope.statistics.nodes.usages.max = value; + } else if(key == "Median") { + $scope.statistics.nodes.usages.median = value; + } else if(key == "Stdev") { + $scope.statistics.nodes.usages.stdev = value; + } else if(key == "Healthy") { + $scope.statistics.nodes.state.healthy = value; + } else if(key == "Dead") { + $scope.statistics.nodes.state.dead = value; + } else if(key == "Decommissioning") { + $scope.statistics.nodes.state.decommissioning = value; + } else if(key == "EnteringMaintenance") { + $scope.statistics.nodes.state.enteringmaintenance = value; + } else if(key == "VolumeFailures") { + $scope.statistics.nodes.state.volumefailures = value; } }); });