Skip to content

Commit 64753ad

Browse files
committed
YARN-10251. Show extended resources on legacy RM UI. Contributed by Eric Payne
1 parent 975b602 commit 64753ad

File tree

6 files changed

+104
-57
lines changed

6 files changed

+104
-57
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/WebPageUtils.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ private static String getAppsTableColumnDefs(
6161
// Update following line if any column added in RM page before column 11
6262
sb.append("{'sType':'num-ignore-str', ")
6363
.append("'aTargets': [12, 13, 14, 15, 16] },\n");
64-
// set progress column index to 19
65-
progressIndex = "[19]";
64+
// set progress column index to 21
65+
progressIndex = "[21]";
6666
} else if (isFairSchedulerPage) {
6767
// Update following line if any column added in scheduler page before column 11
6868
sb.append("{'sType':'num-ignore-str', ")
@@ -112,4 +112,4 @@ public static String resourceRequestsTableInit() {
112112
.toString();
113113
}
114114

115-
}
115+
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/AppInfo.java

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,12 @@
2828
import org.apache.hadoop.classification.InterfaceStability.Evolving;
2929

3030
import org.apache.hadoop.yarn.api.records.ApplicationReport;
31+
import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport;
3132
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
33+
import org.apache.hadoop.yarn.api.records.ResourceInformation;
3234
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
3335
import org.apache.hadoop.yarn.util.Times;
36+
import org.apache.hadoop.yarn.util.resource.ResourceUtils;
3437
import org.apache.hadoop.yarn.util.StringHelper;
3538

3639
@Public
@@ -63,8 +66,10 @@ public class AppInfo {
6366
protected int priority;
6467
private long allocatedCpuVcores;
6568
private long allocatedMemoryMB;
69+
private long allocatedGpus;
6670
private long reservedCpuVcores;
6771
private long reservedMemoryMB;
72+
private long reservedGpus;
6873
protected boolean unmanagedApplication;
6974
private String appNodeLabelExpression;
7075
private String amNodeLabelExpression;
@@ -100,24 +105,35 @@ public AppInfo(ApplicationReport app) {
100105
if (app.getPriority() != null) {
101106
priority = app.getPriority().getPriority();
102107
}
103-
if (app.getApplicationResourceUsageReport() != null) {
104-
runningContainers = app.getApplicationResourceUsageReport()
108+
ApplicationResourceUsageReport usageReport =
109+
app.getApplicationResourceUsageReport();
110+
if (usageReport != null) {
111+
runningContainers = usageReport
105112
.getNumUsedContainers();
106-
if (app.getApplicationResourceUsageReport().getUsedResources() != null) {
107-
allocatedCpuVcores = app.getApplicationResourceUsageReport()
113+
if (usageReport.getUsedResources() != null) {
114+
allocatedCpuVcores = usageReport
108115
.getUsedResources().getVirtualCores();
109-
allocatedMemoryMB = app.getApplicationResourceUsageReport()
116+
allocatedMemoryMB = usageReport
110117
.getUsedResources().getMemorySize();
111-
reservedCpuVcores = app.getApplicationResourceUsageReport()
118+
reservedCpuVcores = usageReport
112119
.getReservedResources().getVirtualCores();
113-
reservedMemoryMB = app.getApplicationResourceUsageReport()
120+
reservedMemoryMB = usageReport
114121
.getReservedResources().getMemorySize();
122+
Integer gpuIndex = ResourceUtils.getResourceTypeIndex()
123+
.get(ResourceInformation.GPU_URI);
124+
allocatedGpus = -1;
125+
reservedGpus = -1;
126+
if (gpuIndex != null) {
127+
allocatedGpus = usageReport.getUsedResources()
128+
.getResourceValue(ResourceInformation.GPU_URI);
129+
reservedGpus = usageReport.getReservedResources()
130+
.getResourceValue(ResourceInformation.GPU_URI);
131+
}
115132
}
116133
aggregateResourceAllocation = StringHelper.getResourceSecondsString(
117-
app.getApplicationResourceUsageReport().getResourceSecondsMap());
134+
usageReport.getResourceSecondsMap());
118135
aggregatePreemptedResourceAllocation = StringHelper
119-
.getResourceSecondsString(app.getApplicationResourceUsageReport()
120-
.getPreemptedResourceSecondsMap());
136+
.getResourceSecondsString(usageReport.getPreemptedResourceSecondsMap());
121137
}
122138
progress = app.getProgress() * 100; // in percent
123139
if (app.getApplicationTags() != null && !app.getApplicationTags().isEmpty()) {
@@ -176,6 +192,10 @@ public long getAllocatedMemoryMB() {
176192
return allocatedMemoryMB;
177193
}
178194

195+
public long getAllocatedGpus() {
196+
return allocatedGpus;
197+
}
198+
179199
public long getReservedCpuVcores() {
180200
return reservedCpuVcores;
181201
}
@@ -184,6 +204,10 @@ public long getReservedMemoryMB() {
184204
return reservedMemoryMB;
185205
}
186206

207+
public long getReservedGpus() {
208+
return reservedGpus;
209+
}
210+
187211
public float getProgress() {
188212
return progress;
189213
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java

Lines changed: 34 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,15 @@
1919
package org.apache.hadoop.yarn.server.resourcemanager.webapp;
2020

2121
import org.apache.hadoop.util.StringUtils;
22+
import org.apache.hadoop.yarn.api.records.Resource;
2223
import org.apache.hadoop.yarn.api.records.ResourceTypeInfo;
2324
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
2425
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo;
25-
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceInfo;
2626
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.SchedulerInfo;
2727
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.UserMetricsInfo;
2828

2929
import org.apache.hadoop.yarn.util.resource.ResourceUtils;
30+
import org.apache.hadoop.yarn.util.resource.Resources;
3031
import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet;
3132
import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet.DIV;
3233
import org.apache.hadoop.yarn.webapp.view.HtmlBlock;
@@ -62,35 +63,34 @@ protected void render(Block html) {
6263

6364
DIV<Hamlet> div = html.div().$class("metrics");
6465

65-
long usedMemoryBytes = 0;
66-
long totalMemoryBytes = 0;
67-
long reservedMemoryBytes = 0;
68-
long usedVCores = 0;
69-
long totalVCores = 0;
70-
long reservedVCores = 0;
66+
Resource usedResources;
67+
Resource totalResources;
68+
Resource reservedResources;
69+
int allocatedContainers;
7170
if (clusterMetrics.getCrossPartitionMetricsAvailable()) {
72-
ResourceInfo usedAllPartitions =
73-
clusterMetrics.getTotalUsedResourcesAcrossPartition();
74-
ResourceInfo totalAllPartitions =
75-
clusterMetrics.getTotalClusterResourcesAcrossPartition();
76-
ResourceInfo reservedAllPartitions =
77-
clusterMetrics.getTotalReservedResourcesAcrossPartition();
78-
usedMemoryBytes = usedAllPartitions.getMemorySize() * BYTES_IN_MB;
79-
totalMemoryBytes = totalAllPartitions.getMemorySize() * BYTES_IN_MB;
80-
reservedMemoryBytes = reservedAllPartitions.getMemorySize() * BYTES_IN_MB;
81-
usedVCores = usedAllPartitions.getvCores();
82-
totalVCores = totalAllPartitions.getvCores();
83-
reservedVCores = reservedAllPartitions.getvCores();
71+
allocatedContainers =
72+
clusterMetrics.getTotalAllocatedContainersAcrossPartition();
73+
usedResources =
74+
clusterMetrics.getTotalUsedResourcesAcrossPartition().getResource();
75+
totalResources =
76+
clusterMetrics.getTotalClusterResourcesAcrossPartition()
77+
.getResource();
78+
reservedResources =
79+
clusterMetrics.getTotalReservedResourcesAcrossPartition()
80+
.getResource();
8481
// getTotalUsedResourcesAcrossPartition includes reserved resources.
85-
usedMemoryBytes -= reservedMemoryBytes;
86-
usedVCores -= reservedVCores;
82+
Resources.subtractFrom(usedResources, reservedResources);
8783
} else {
88-
usedMemoryBytes = clusterMetrics.getAllocatedMB() * BYTES_IN_MB;
89-
totalMemoryBytes = clusterMetrics.getTotalMB() * BYTES_IN_MB;
90-
reservedMemoryBytes = clusterMetrics.getReservedMB() * BYTES_IN_MB;
91-
usedVCores = clusterMetrics.getAllocatedVirtualCores();
92-
totalVCores = clusterMetrics.getTotalVirtualCores();
93-
reservedVCores = clusterMetrics.getReservedVirtualCores();
84+
allocatedContainers = clusterMetrics.getContainersAllocated();
85+
usedResources = Resource.newInstance(
86+
clusterMetrics.getAllocatedMB() * BYTES_IN_MB,
87+
(int) clusterMetrics.getAllocatedVirtualCores());
88+
totalResources = Resource.newInstance(
89+
clusterMetrics.getTotalMB() * BYTES_IN_MB,
90+
(int) clusterMetrics.getTotalVirtualCores());
91+
reservedResources = Resource.newInstance(
92+
clusterMetrics.getReservedMB() * BYTES_IN_MB,
93+
(int) clusterMetrics.getReservedVirtualCores());
9494
}
9595

9696
div.h3("Cluster Metrics").
@@ -102,12 +102,9 @@ protected void render(Block html) {
102102
th().$class("ui-state-default").__("Apps Running").__().
103103
th().$class("ui-state-default").__("Apps Completed").__().
104104
th().$class("ui-state-default").__("Containers Running").__().
105-
th().$class("ui-state-default").__("Memory Used").__().
106-
th().$class("ui-state-default").__("Memory Total").__().
107-
th().$class("ui-state-default").__("Memory Reserved").__().
108-
th().$class("ui-state-default").__("VCores Used").__().
109-
th().$class("ui-state-default").__("VCores Total").__().
110-
th().$class("ui-state-default").__("VCores Reserved").__().
105+
th().$class("ui-state-default").__("Used Resources").__().
106+
th().$class("ui-state-default").__("Total Resources").__().
107+
th().$class("ui-state-default").__("Reserved Resources").__().
111108
__().
112109
__().
113110
tbody().$class("ui-widget-content").
@@ -121,14 +118,10 @@ protected void render(Block html) {
121118
clusterMetrics.getAppsFailed() + clusterMetrics.getAppsKilled()
122119
)
123120
).
124-
td(String.valueOf(
125-
clusterMetrics.getTotalAllocatedContainersAcrossPartition())).
126-
td(StringUtils.byteDesc(usedMemoryBytes)).
127-
td(StringUtils.byteDesc(totalMemoryBytes)).
128-
td(StringUtils.byteDesc(reservedMemoryBytes)).
129-
td(String.valueOf(usedVCores)).
130-
td(String.valueOf(totalVCores)).
131-
td(String.valueOf(reservedVCores)).
121+
td(String.valueOf(allocatedContainers)).
122+
td(usedResources.toString()).
123+
td(totalResources.toString()).
124+
td(reservedResources.toString()).
132125
__().
133126
__().__();
134127

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.commons.text.StringEscapeUtils;
2323
import org.apache.hadoop.util.StringUtils;
2424
import org.apache.hadoop.yarn.api.records.NodeState;
25+
import org.apache.hadoop.yarn.api.records.ResourceInformation;
2526
import org.apache.hadoop.yarn.conf.YarnConfiguration;
2627
import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
2728
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
@@ -30,6 +31,7 @@
3031
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
3132
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo;
3233
import org.apache.hadoop.yarn.util.Times;
34+
import org.apache.hadoop.yarn.util.resource.ResourceUtils;
3335
import org.apache.hadoop.yarn.webapp.SubView;
3436
import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet;
3537
import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet.TABLE;
@@ -86,14 +88,18 @@ protected void render(Block html) {
8688
.th(".mem", "Mem Used")
8789
.th(".mem", "Mem Avail")
8890
.th(".vcores", "VCores Used")
89-
.th(".vcores", "VCores Avail");
91+
.th(".vcores", "VCores Avail")
92+
.th(".gpus", "GPUs Used")
93+
.th(".gpus", "GPUs Avail");
9094
} else {
9195
trbody.th(".containers", "Running Containers (G)")
9296
.th(".allocationTags", "Allocation Tags")
9397
.th(".mem", "Mem Used (G)")
9498
.th(".mem", "Mem Avail (G)")
9599
.th(".vcores", "VCores Used (G)")
96100
.th(".vcores", "VCores Avail (G)")
101+
.th(".gpus", "GPUs Used (G)")
102+
.th(".gpus", "GPUs Avail (G)")
97103
.th(".containers", "Running Containers (O)")
98104
.th(".mem", "Mem Used (O)")
99105
.th(".vcores", "VCores Used (O)")
@@ -165,6 +171,16 @@ protected void render(Block html) {
165171
nodeTableData.append("\",\"<a ").append("href='" + "//" + httpAddress)
166172
.append("'>").append(httpAddress).append("</a>\",").append("\"");
167173
}
174+
Integer gpuIndex = ResourceUtils.getResourceTypeIndex()
175+
.get(ResourceInformation.GPU_URI);
176+
long usedGPUs = 0;
177+
long availableGPUs = 0;
178+
if (gpuIndex != null) {
179+
usedGPUs = info.getUsedResource().getResource()
180+
.getResourceValue(ResourceInformation.GPU_URI);
181+
availableGPUs = info.getAvailableResource().getResource()
182+
.getResourceValue(ResourceInformation.GPU_URI);
183+
}
168184
nodeTableData.append("<br title='")
169185
.append(String.valueOf(info.getLastHealthUpdate())).append("'>")
170186
.append(Times.format(info.getLastHealthUpdate())).append("\",\"")
@@ -179,6 +195,10 @@ protected void render(Block html) {
179195
.append("\",\"").append(String.valueOf(info.getUsedVirtualCores()))
180196
.append("\",\"")
181197
.append(String.valueOf(info.getAvailableVirtualCores()))
198+
.append("\",\"")
199+
.append(String.valueOf(usedGPUs))
200+
.append("\",\"")
201+
.append(String.valueOf(availableGPUs))
182202
.append("\",\"");
183203

184204
// If opportunistic containers are enabled, add extra fields.

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppsBlock.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,10 @@ public class RMAppsBlock extends AppsBlock {
6969
new ColumnHeader(".runningcontainer", "Running Containers"),
7070
new ColumnHeader(".allocatedCpu", "Allocated CPU VCores"),
7171
new ColumnHeader(".allocatedMemory", "Allocated Memory MB"),
72+
new ColumnHeader(".allocatedGpu", "Allocated GPUs"),
7273
new ColumnHeader(".reservedCpu", "Reserved CPU VCores"),
7374
new ColumnHeader(".reservedMemory", "Reserved Memory MB"),
75+
new ColumnHeader(".reservedGpu", "Reserved GPUs"),
7476
new ColumnHeader(".queuePercentage", "% of Queue"),
7577
new ColumnHeader(".clusterPercentage", "% of Cluster"),
7678
new ColumnHeader(".progress", "Progress"),
@@ -119,13 +121,15 @@ protected void renderData(Block html) {
119121
String blacklistedNodesCount = "N/A";
120122
RMApp rmApp = rm.getRMContext().getRMApps()
121123
.get(appAttemptId.getApplicationId());
124+
boolean isAppInCompletedState = false;
122125
if (rmApp != null) {
123126
RMAppAttempt appAttempt = rmApp.getRMAppAttempt(appAttemptId);
124127
Set<String> nodes =
125128
null == appAttempt ? null : appAttempt.getBlacklistedNodes();
126129
if (nodes != null) {
127130
blacklistedNodesCount = String.valueOf(nodes.size());
128131
}
132+
isAppInCompletedState = rmApp.isAppInCompletedStates();
129133
}
130134
String percent = StringUtils.format("%.1f", app.getProgress());
131135
appsTableData
@@ -171,12 +175,18 @@ protected void renderData(Block html) {
171175
.append(app.getAllocatedMemoryMB() == -1 ? "N/A" :
172176
String.valueOf(app.getAllocatedMemoryMB()))
173177
.append("\",\"")
178+
.append((isAppInCompletedState && app.getAllocatedGpus() <= 0)
179+
? UNAVAILABLE : String.valueOf(app.getAllocatedGpus()))
180+
.append("\",\"")
174181
.append(app.getReservedCpuVcores() == -1 ? "N/A" : String
175182
.valueOf(app.getReservedCpuVcores()))
176183
.append("\",\"")
177184
.append(app.getReservedMemoryMB() == -1 ? "N/A" :
178185
String.valueOf(app.getReservedMemoryMB()))
179186
.append("\",\"")
187+
.append((isAppInCompletedState && app.getReservedGpus() <= 0)
188+
? UNAVAILABLE : String.valueOf(app.getReservedGpus()))
189+
.append("\",\"")
180190
.append(queuePercent)
181191
.append("\",\"")
182192
.append(clusterPercent)

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ public class TestNodesPage {
4848

4949
// Number of Actual Table Headers for NodesPage.NodesBlock might change in
5050
// future. In that case this value should be adjusted to the new value.
51-
private final int numberOfThInMetricsTable = 23;
52-
private final int numberOfActualTableHeaders = 14;
51+
private final int numberOfThInMetricsTable = 20;
52+
private final int numberOfActualTableHeaders = 16;
5353
private final int numberOfThForOpportunisticContainers = 4;
5454

5555
private Injector injector;

0 commit comments

Comments
 (0)