From 3ce5a53a10c4587b3bc6b949bccd2579f72bd0f5 Mon Sep 17 00:00:00 2001 From: slfan1989 Date: Sat, 14 Sep 2024 11:41:48 +0800 Subject: [PATCH] HDDS-11196. Improve SCM WebUI Display. --- .../main/resources/webapps/static/ozone.css | 4 + .../webapps/static/templates/jvm.html | 2 +- .../hadoop/hdds/scm/node/SCMNodeManager.java | 18 +++++ .../hadoop/hdds/scm/server/SCMMXBean.java | 3 + .../scm/server/StorageContainerManager.java | 73 +++++++++++++++++++ .../resources/webapps/scm/scm-overview.html | 39 +++++++++- .../src/main/resources/webapps/scm/scm.js | 7 ++ 7 files changed, 141 insertions(+), 5 deletions(-) diff --git a/hadoop-hdds/framework/src/main/resources/webapps/static/ozone.css b/hadoop-hdds/framework/src/main/resources/webapps/static/ozone.css index e08e9c52060..389d9d78f21 100644 --- a/hadoop-hdds/framework/src/main/resources/webapps/static/ozone.css +++ b/hadoop-hdds/framework/src/main/resources/webapps/static/ozone.css @@ -91,3 +91,7 @@ body { .om-roles-background { background-color: #dcfbcd!important; } + +.scm-roles-background { + background-color: #dcfbcd!important; +} \ No newline at end of file diff --git a/hadoop-hdds/framework/src/main/resources/webapps/static/templates/jvm.html b/hadoop-hdds/framework/src/main/resources/webapps/static/templates/jvm.html index c1f7d16aefa..9706ebdf6b3 100644 --- a/hadoop-hdds/framework/src/main/resources/webapps/static/templates/jvm.html +++ b/hadoop-hdds/framework/src/main/resources/webapps/static/templates/jvm.html @@ -21,6 +21,6 @@ Input arguments: - {{$ctrl.jmx.InputArguments}} +
{{$ctrl.jmx.InputArguments.join('\n')}}
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java index fa8f316aa42..5ef9ae47a57 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java @@ -146,6 +146,8 @@ public class SCMNodeManager implements NodeManager { private static final String LASTHEARTBEAT = "LASTHEARTBEAT"; private static final String USEDSPACEPERCENT = "USEDSPACEPERCENT"; private static final String TOTALCAPACITY = "CAPACITY"; + private static final String DNUUID = "UUID"; + private static final String VERSION = "VERSION"; /** * Constructs SCM machine Manager. */ @@ -447,6 +449,8 @@ public RegisteredCommand register( processNodeReport(datanodeDetails, nodeReport); LOG.info("Updated datanode to: {}", dn); scmNodeEventPublisher.fireEvent(SCMEvents.NODE_ADDRESS_UPDATE, dn); + } else if (isVersionChange(oldNode.getVersion(), datanodeDetails.getVersion())) { + nodeStateManager.updateNode(datanodeDetails, layoutInfo); } } catch (NodeNotFoundException e) { LOG.error("Cannot find datanode {} from nodeStateManager", @@ -508,6 +512,18 @@ private boolean updateDnsToUuidMap( return ipChanged || hostNameChanged; } + /** + * Check if the version has been updated. + * + * @param oldVersion datanode oldVersion + * @param newVersion datanode newVersion + * @return true means replacement is needed, while false means replacement is not needed. + */ + private boolean isVersionChange(String oldVersion, String newVersion) { + final boolean ipChanged = !Objects.equals(oldVersion, newVersion); + return ipChanged; + } + /** * Send heartbeat to indicate the datanode is alive and doing well. * @@ -1136,6 +1152,8 @@ public Map> getNodeStatusInfo() { String nonScmUsedPerc = storagePercentage[1]; map.put(USEDSPACEPERCENT, "Ozone: " + scmUsedPerc + "%, other: " + nonScmUsedPerc + "%"); + map.put(DNUUID, dni.getUuidString()); + map.put(VERSION, dni.getVersion()); nodes.put(hostName, map); } return nodes; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMMXBean.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMMXBean.java index de609356b22..472d690a472 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMMXBean.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMMXBean.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdds.scm.server; +import java.util.List; import java.util.Map; import org.apache.hadoop.hdds.annotation.InterfaceAudience; @@ -74,6 +75,8 @@ public interface SCMMXBean extends ServiceRuntimeInfo { String getScmRatisRoles(); + List> getRatisRoles(); + /** * Primordial node is the node on which scm init operation is performed. * @return hostname of primordialNode diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index 868e54f1935..acae0686497 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -170,7 +170,12 @@ import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; import org.apache.hadoop.security.authentication.client.AuthenticationException; import org.apache.hadoop.util.ReflectionUtils; +import org.apache.ratis.proto.RaftProtos; +import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.server.DivisionInfo; +import org.apache.ratis.server.RaftServer; +import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.util.ExitUtils; import org.apache.ratis.util.JvmPauseMonitor; import org.slf4j.Logger; @@ -2137,6 +2142,74 @@ public String getScmRatisRoles() { HddsUtils.format(server.getRatisRoles()) : "STANDALONE"; } + @Override + public List> getRatisRoles() { + final SCMRatisServer server = getScmHAManager().getRatisServer(); + + // If Ratis is enabled + if(server != null) { + + // To attempt to find the SCM Leader, + // and if the Leader is not found + // return Leader is not found message. + RaftServer.Division division = server.getDivision(); + RaftPeer leader = getLeader(division); + if (leader == null) { + return getRatisRolesException("No leader found"); + } + + // If the SCMRatisServer is stopped, return a service stopped message. + if (server.isStopped()) { + return getRatisRolesException("Server is shutting down"); + } + + // Attempt to retrieve role information. + try { + List ratisRoles = server.getRatisRoles(); + List> result = new ArrayList<>(); + for (String role : ratisRoles) { + String[] roleArr = role.split(":"); + List scmInfo = new ArrayList<>(); + // Host Name + scmInfo.add(roleArr[0]); + // Node ID + scmInfo.add(roleArr[3]); + // Ratis Port + scmInfo.add(roleArr[1]); + // Role + scmInfo.add(roleArr[2]); + result.add(scmInfo); + } + return result; + } catch (Exception e) { + LOG.error("Failed to getRatisRoles.", e); + return getRatisRolesException("Exception Occurred, " + e.getMessage()); + } + } + + // If Ratis is not enabled, we will throw an exception directly. + return getRatisRolesException("Ratis is disabled"); + } + + public RaftPeer getLeader(RaftServer.Division division) { + if (division.getInfo().isLeader()) { + return division.getPeer(); + } else { + DivisionInfo info = division.getInfo(); + RaftProtos.RoleInfoProto roleInfoProto = info.getRoleInfoProto(); + RaftProtos.FollowerInfoProto followerInfo = roleInfoProto.getFollowerInfo(); + RaftProtos.ServerRpcProto leaderInfo = followerInfo.getLeaderInfo(); + RaftProtos.RaftPeerProto peerLeaderId = leaderInfo.getId(); + ByteString leaderId = peerLeaderId.getId(); + return leaderId.isEmpty() ? null : + division.getRaftConf().getPeer(RaftPeerId.valueOf(leaderId)); + } + } + + private static List> getRatisRolesException(String exceptionString) { + return Collections.singletonList(Collections.singletonList(exceptionString)); + } + /** * @return hostname of primordialNode */ diff --git a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html index 3f825d4e25f..577a3fd54b9 100644 --- a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html +++ b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html @@ -140,6 +140,10 @@

Node Status

'sortdesc':(columnName == 'comstate' && !reverse)}">Commisioned State Last Heartbeat + UUID + Version @@ -157,6 +161,8 @@

Node Status

{{typestat.opstate}} {{typestat.comstate}} {{typestat.lastheartbeat}} + {{typestat.uuid}} + {{typestat.version}} @@ -210,10 +216,6 @@

Status

Force Exit Safe Mode {{$ctrl.overview.jmx.SafeModeExitForceful}} - - SCM Roles (HA) - {{$ctrl.overview.jmx.ScmRatisRoles}} - Primordial Node (HA) {{$ctrl.overview.jmx.PrimordialNode}} @@ -235,6 +237,35 @@

Meta-Data Volume Information

+

SCM Roles (HA)

+

{{$ctrl.overview.jmx.RatisRoles[0][0]}}

+
+ + + + + + + + + + + + + + + + + + + + + + + +
Host NameNode IDRatis PortRole
{{roles[0]}}{{roles[1]}}{{roles[2]}}{{roles[3]}}
{{roles[0]}}{{roles[1]}}{{roles[2]}}{{roles[3]}}
+
+

Safemode rules statuses

diff --git a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js index 6fac6849530..e00f8b8ede8 100644 --- a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js +++ b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js @@ -56,6 +56,11 @@ } } + $http.get("jmx?qry=Ratis:service=RaftServer,group=*,id=*") + .then(function (result) { + ctrl.role = result.data.beans[0]; + }); + function get_protocol(URLScheme, value, baseProto, fallbackProto) { let protocol = "unknown" let port = -1; @@ -95,6 +100,8 @@ capacity: value && value.find((element) => element.key === "CAPACITY").value, comstate: value && value.find((element) => element.key === "COMSTATE").value, lastheartbeat: value && value.find((element) => element.key === "LASTHEARTBEAT").value, + uuid: value && value.find((element) => element.key === "UUID").value, + version: value && value.find((element) => element.key === "VERSION").value, port: portSpec.port, protocol: portSpec.proto }