Skip to content

Commit 06533ab

Browse files
slfan1989HarshitGupta11
authored andcommitted
YARN-11212. [Federation] Add getNodeToLabels REST APIs for Router. (apache#4614)
1 parent 06d132a commit 06533ab

File tree

6 files changed

+141
-69
lines changed

6 files changed

+141
-69
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeLabelsInfo.java

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@
3232
public class NodeLabelsInfo {
3333

3434
@XmlElement(name = "nodeLabelInfo")
35-
private ArrayList<NodeLabelInfo> nodeLabelsInfo =
36-
new ArrayList<NodeLabelInfo>();
35+
private ArrayList<NodeLabelInfo> nodeLabelsInfo = new ArrayList<>();
3736

3837
public NodeLabelsInfo() {
3938
// JAXB needs this
@@ -44,25 +43,32 @@ public NodeLabelsInfo(ArrayList<NodeLabelInfo> nodeLabels) {
4443
}
4544

4645
public NodeLabelsInfo(List<NodeLabel> nodeLabels) {
47-
this.nodeLabelsInfo = new ArrayList<NodeLabelInfo>();
46+
this.nodeLabelsInfo = new ArrayList<>();
4847
for (NodeLabel label : nodeLabels) {
4948
this.nodeLabelsInfo.add(new NodeLabelInfo(label));
5049
}
5150
}
5251

5352
public NodeLabelsInfo(Set<String> nodeLabelsName) {
54-
this.nodeLabelsInfo = new ArrayList<NodeLabelInfo>();
53+
this.nodeLabelsInfo = new ArrayList<>();
5554
for (String labelName : nodeLabelsName) {
5655
this.nodeLabelsInfo.add(new NodeLabelInfo(labelName));
5756
}
5857
}
5958

59+
public NodeLabelsInfo(Collection<NodeLabel> nodeLabels) {
60+
this.nodeLabelsInfo = new ArrayList<>();
61+
nodeLabels.stream().forEach(nodeLabel -> {
62+
this.nodeLabelsInfo.add(new NodeLabelInfo(nodeLabel));
63+
});
64+
}
65+
6066
public ArrayList<NodeLabelInfo> getNodeLabelsInfo() {
6167
return nodeLabelsInfo;
6268
}
6369

6470
public Set<NodeLabel> getNodeLabels() {
65-
Set<NodeLabel> nodeLabels = new HashSet<NodeLabel>();
71+
Set<NodeLabel> nodeLabels = new HashSet<>();
6672
for (NodeLabelInfo label : nodeLabelsInfo) {
6773
nodeLabels.add(NodeLabel.newInstance(label.getName(),
6874
label.getExclusivity()));
@@ -71,7 +77,7 @@ public Set<NodeLabel> getNodeLabels() {
7177
}
7278

7379
public List<String> getNodeLabelsName() {
74-
ArrayList<String> nodeLabelsName = new ArrayList<String>();
80+
ArrayList<String> nodeLabelsName = new ArrayList<>();
7581
for (NodeLabelInfo label : nodeLabelsInfo) {
7682
nodeLabelsName.add(label.getName());
7783
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeToLabelsInfo.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,17 @@ public NodeToLabelsInfo() {
3535
// JAXB needs this
3636
}
3737

38+
public NodeToLabelsInfo(HashMap<String, NodeLabelsInfo> nodeToLabels) {
39+
if (nodeToLabels != null) {
40+
this.nodeToLabels.putAll(nodeToLabels);
41+
}
42+
}
43+
3844
public HashMap<String, NodeLabelsInfo> getNodeToLabels() {
3945
return nodeToLabels;
4046
}
47+
48+
public void setNodeToLabels(HashMap<String, NodeLabelsInfo> nodeToLabels) {
49+
this.nodeToLabels = nodeToLabels;
50+
}
4151
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java

Lines changed: 45 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ public Response createNewApplication(HttpServletRequest hsr)
282282
.entity(e.getLocalizedMessage()).build();
283283
}
284284

285-
List<SubClusterId> blacklist = new ArrayList<SubClusterId>();
285+
List<SubClusterId> blacklist = new ArrayList<>();
286286

287287
for (int i = 0; i < numSubmitRetries; ++i) {
288288

@@ -295,7 +295,7 @@ public Response createNewApplication(HttpServletRequest hsr)
295295
.entity(e.getLocalizedMessage()).build();
296296
}
297297

298-
LOG.debug("getNewApplication try #{} on SubCluster {}", i, subClusterId);
298+
LOG.debug("getNewApplication try #{} on SubCluster {}.", i, subClusterId);
299299

300300
DefaultRequestInterceptorREST interceptor =
301301
getOrCreateInterceptorForSubCluster(subClusterId,
@@ -304,7 +304,7 @@ public Response createNewApplication(HttpServletRequest hsr)
304304
try {
305305
response = interceptor.createNewApplication(hsr);
306306
} catch (Exception e) {
307-
LOG.warn("Unable to create a new ApplicationId in SubCluster {}",
307+
LOG.warn("Unable to create a new ApplicationId in SubCluster {}.",
308308
subClusterId.getId(), e);
309309
}
310310

@@ -424,7 +424,7 @@ public Response submitApplication(ApplicationSubmissionContextInfo newApp,
424424
.build();
425425
}
426426

427-
List<SubClusterId> blacklist = new ArrayList<SubClusterId>();
427+
List<SubClusterId> blacklist = new ArrayList<>();
428428

429429
for (int i = 0; i < numSubmitRetries; ++i) {
430430

@@ -441,7 +441,7 @@ public Response submitApplication(ApplicationSubmissionContextInfo newApp,
441441
.entity(e.getLocalizedMessage())
442442
.build();
443443
}
444-
LOG.info("submitApplication appId {} try #{} on SubCluster {}",
444+
LOG.info("submitApplication appId {} try #{} on SubCluster {}.",
445445
applicationId, i, subClusterId);
446446

447447
ApplicationHomeSubCluster appHomeSubCluster =
@@ -482,7 +482,7 @@ public Response submitApplication(ApplicationSubmissionContextInfo newApp,
482482
.build();
483483
}
484484
if (subClusterId == subClusterIdInStateStore) {
485-
LOG.info("Application {} already submitted on SubCluster {}",
485+
LOG.info("Application {} already submitted on SubCluster {}.",
486486
applicationId, subClusterId);
487487
} else {
488488
routerMetrics.incrAppsFailedSubmitted();
@@ -712,8 +712,7 @@ public AppsInfo call() {
712712

713713
if (rmApps == null) {
714714
routerMetrics.incrMultipleAppsFailedRetrieved();
715-
LOG.error("Subcluster {} failed to return appReport.",
716-
info.getSubClusterId());
715+
LOG.error("Subcluster {} failed to return appReport.", info.getSubClusterId());
717716
return null;
718717
}
719718
return rmApps;
@@ -873,8 +872,7 @@ private Map<SubClusterInfo, NodeInfo> getNode(
873872
subclusterId, subcluster.getRMWebServiceAddress());
874873
return interceptor.getNode(nodeId);
875874
} catch (Exception e) {
876-
LOG.error("Subcluster {} failed to return nodeInfo.",
877-
subclusterId);
875+
LOG.error("Subcluster {} failed to return nodeInfo.", subclusterId, e);
878876
return null;
879877
}
880878
});
@@ -953,58 +951,28 @@ private SubClusterInfo getNodeSubcluster(String nodeId)
953951
public NodesInfo getNodes(String states) {
954952

955953
NodesInfo nodes = new NodesInfo();
956-
957-
final Map<SubClusterId, SubClusterInfo> subClustersActive;
958954
try {
959-
subClustersActive = getActiveSubclusters();
960-
} catch (Exception e) {
961-
LOG.error("Cannot get nodes: {}", e.getMessage());
962-
return new NodesInfo();
963-
}
964-
965-
// Send the requests in parallel
966-
CompletionService<NodesInfo> compSvc =
967-
new ExecutorCompletionService<NodesInfo>(this.threadpool);
968-
969-
for (final SubClusterInfo info : subClustersActive.values()) {
970-
compSvc.submit(new Callable<NodesInfo>() {
971-
@Override
972-
public NodesInfo call() {
973-
DefaultRequestInterceptorREST interceptor =
974-
getOrCreateInterceptorForSubCluster(
975-
info.getSubClusterId(), info.getRMWebServiceAddress());
976-
try {
977-
NodesInfo nodesInfo = interceptor.getNodes(states);
978-
return nodesInfo;
979-
} catch (Exception e) {
980-
LOG.error("Subcluster {} failed to return nodesInfo.",
981-
info.getSubClusterId());
982-
return null;
983-
}
984-
}
955+
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
956+
Class[] argsClasses = new Class[]{String.class};
957+
Object[] args = new Object[]{states};
958+
ClientMethod remoteMethod = new ClientMethod("getNodes", argsClasses, args);
959+
Map<SubClusterInfo, NodesInfo> nodesMap =
960+
invokeConcurrent(subClustersActive.values(), remoteMethod, NodesInfo.class);
961+
nodesMap.values().stream().forEach(nodesInfo -> {
962+
nodes.addAll(nodesInfo.getNodes());
985963
});
986-
}
987-
988-
// Collect all the responses in parallel
989-
990-
for (int i = 0; i < subClustersActive.size(); i++) {
991-
try {
992-
Future<NodesInfo> future = compSvc.take();
993-
NodesInfo nodesResponse = future.get();
994-
995-
if (nodesResponse != null) {
996-
nodes.addAll(nodesResponse.getNodes());
997-
}
998-
} catch (Throwable e) {
999-
LOG.warn("Failed to get nodes report ", e);
1000-
}
964+
} catch (NotFoundException e) {
965+
LOG.error("Get all active sub cluster(s) error.", e);
966+
} catch (YarnException e) {
967+
LOG.error("getNodes error.", e);
968+
} catch (IOException e) {
969+
LOG.error("getNodes error with io error.", e);
1001970
}
1002971

1003972
// Delete duplicate from all the node reports got from all the available
1004973
// YARN RMs. Nodes can be moved from one subclusters to another. In this
1005974
// operation they result LOST/RUNNING in the previous SubCluster and
1006975
// NEW/RUNNING in the new one.
1007-
1008976
return RouterWebServiceUtil.deleteDuplicateNodesInfo(nodes.getNodes());
1009977
}
1010978

@@ -1172,7 +1140,22 @@ public ApplicationStatisticsInfo getAppStatistics(HttpServletRequest hsr,
11721140
@Override
11731141
public NodeToLabelsInfo getNodeToLabels(HttpServletRequest hsr)
11741142
throws IOException {
1175-
throw new NotImplementedException("Code is not implemented");
1143+
try {
1144+
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
1145+
final HttpServletRequest hsrCopy = clone(hsr);
1146+
Class[] argsClasses = new Class[]{HttpServletRequest.class};
1147+
Object[] args = new Object[]{hsrCopy};
1148+
ClientMethod remoteMethod = new ClientMethod("getNodeToLabels", argsClasses, args);
1149+
Map<SubClusterInfo, NodeToLabelsInfo> nodeToLabelsInfoMap =
1150+
invokeConcurrent(subClustersActive.values(), remoteMethod, NodeToLabelsInfo.class);
1151+
return RouterWebServiceUtil.mergeNodeToLabels(nodeToLabelsInfoMap);
1152+
} catch (NotFoundException e) {
1153+
LOG.error("Get all active sub cluster(s) error.", e);
1154+
throw new IOException("Get all active sub cluster(s) error.", e);
1155+
} catch (YarnException e) {
1156+
LOG.error("getNodeToLabels error.", e);
1157+
throw new IOException("getNodeToLabels error.", e);
1158+
}
11761159
}
11771160

11781161
@Override
@@ -1395,7 +1378,7 @@ public void shutdown() {
13951378
}
13961379

13971380
private <R> Map<SubClusterInfo, R> invokeConcurrent(Collection<SubClusterInfo> clusterIds,
1398-
ClientMethod request, Class<R> clazz) {
1381+
ClientMethod request, Class<R> clazz) throws YarnException {
13991382

14001383
Map<SubClusterInfo, R> results = new HashMap<>();
14011384

@@ -1413,8 +1396,8 @@ private <R> Map<SubClusterInfo, R> invokeConcurrent(Collection<SubClusterInfo> c
14131396
R ret = clazz.cast(retObj);
14141397
return ret;
14151398
} catch (Exception e) {
1416-
LOG.error("SubCluster {} failed to call {} method.", info.getSubClusterId(),
1417-
request.getMethodName(), e);
1399+
LOG.error("SubCluster %s failed to call %s method.",
1400+
info.getSubClusterId(), request.getMethodName(), e);
14181401
return null;
14191402
}
14201403
});
@@ -1428,7 +1411,10 @@ private <R> Map<SubClusterInfo, R> invokeConcurrent(Collection<SubClusterInfo> c
14281411
results.put(clusterId, response);
14291412
}
14301413
} catch (Throwable e) {
1431-
LOG.warn("SubCluster {} failed to {} report.", clusterId, request.getMethodName(), e);
1414+
String msg = String.format("SubCluster %s failed to %s report.",
1415+
clusterId, request.getMethodName());
1416+
LOG.warn(msg, e);
1417+
throw new YarnRuntimeException(msg, e);
14321418
}
14331419
});
14341420
return results;

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/RouterWebServiceUtil.java

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@
3030
import java.util.List;
3131
import java.util.Map;
3232
import java.util.Map.Entry;
33+
import java.util.Collection;
34+
import java.util.Set;
35+
import java.util.HashSet;
3336
import java.util.concurrent.TimeUnit;
3437

3538
import javax.servlet.http.HttpServletRequest;
@@ -43,13 +46,17 @@
4346
import org.apache.hadoop.net.NetUtils;
4447
import org.apache.hadoop.security.UserGroupInformation;
4548
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
49+
import org.apache.hadoop.yarn.api.records.NodeLabel;
4650
import org.apache.hadoop.yarn.conf.YarnConfiguration;
51+
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterInfo;
4752
import org.apache.hadoop.yarn.server.resourcemanager.webapp.RMWebAppUtil;
4853
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo;
4954
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppsInfo;
5055
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo;
5156
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo;
5257
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodesInfo;
58+
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeLabelsInfo;
59+
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeToLabelsInfo;
5360
import org.apache.hadoop.yarn.server.uam.UnmanagedApplicationManager;
5461
import org.apache.hadoop.yarn.webapp.BadRequestException;
5562
import org.apache.hadoop.yarn.webapp.ForbiddenException;
@@ -293,8 +300,8 @@ public static AppsInfo mergeAppsInfo(ArrayList<AppInfo> appsInfo,
293300
boolean returnPartialResult) {
294301
AppsInfo allApps = new AppsInfo();
295302

296-
Map<String, AppInfo> federationAM = new HashMap<String, AppInfo>();
297-
Map<String, AppInfo> federationUAMSum = new HashMap<String, AppInfo>();
303+
Map<String, AppInfo> federationAM = new HashMap<>();
304+
Map<String, AppInfo> federationUAMSum = new HashMap<>();
298305
for (AppInfo a : appsInfo) {
299306
// Check if this AppInfo is an AM
300307
if (a.getAMHostHttpAddress() != null) {
@@ -332,7 +339,7 @@ public static AppsInfo mergeAppsInfo(ArrayList<AppInfo> appsInfo,
332339
}
333340
}
334341

335-
allApps.addAll(new ArrayList<AppInfo>(federationAM.values()));
342+
allApps.addAll(new ArrayList<>(federationAM.values()));
336343
return allApps;
337344
}
338345

@@ -419,7 +426,7 @@ public static NodesInfo deleteDuplicateNodesInfo(ArrayList<NodeInfo> nodes) {
419426
nodesMap.put(node.getNodeId(), node);
420427
}
421428
}
422-
nodesInfo.addAll(new ArrayList<NodeInfo>(nodesMap.values()));
429+
nodesInfo.addAll(new ArrayList<>(nodesMap.values()));
423430
return nodesInfo;
424431
}
425432

@@ -509,4 +516,28 @@ protected static <T> String getMediaTypeFromHttpServletRequest(
509516
return header;
510517
}
511518

519+
public static NodeToLabelsInfo mergeNodeToLabels(
520+
Map<SubClusterInfo, NodeToLabelsInfo> nodeToLabelsInfoMap) {
521+
522+
HashMap<String, NodeLabelsInfo> nodeToLabels = new HashMap<>();
523+
Collection<NodeToLabelsInfo> nodeToLabelsInfos = nodeToLabelsInfoMap.values();
524+
525+
nodeToLabelsInfos.stream().forEach(nodeToLabelsInfo -> {
526+
for (Map.Entry<String, NodeLabelsInfo> item : nodeToLabelsInfo.getNodeToLabels().entrySet()) {
527+
String key = item.getKey();
528+
NodeLabelsInfo itemValue = item.getValue();
529+
NodeLabelsInfo nodeToLabelsValue = nodeToLabels.getOrDefault(item.getKey(), null);
530+
Set<NodeLabel> hashSet = new HashSet<>();
531+
if (itemValue != null) {
532+
hashSet.addAll(itemValue.getNodeLabels());
533+
}
534+
if (nodeToLabelsValue != null) {
535+
hashSet.addAll(nodeToLabelsValue.getNodeLabels());
536+
}
537+
nodeToLabels.put(key, new NodeLabelsInfo(hashSet));
538+
}
539+
});
540+
541+
return new NodeToLabelsInfo(nodeToLabels);
542+
}
512543
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
import java.net.ConnectException;
2323
import java.util.HashSet;
2424
import java.util.Set;
25+
import java.util.HashMap;
26+
import java.util.Collections;
2527
import java.util.concurrent.atomic.AtomicInteger;
2628

2729
import javax.servlet.http.HttpServletRequest;
@@ -52,6 +54,8 @@
5254
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodesInfo;
5355
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceInfo;
5456
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceOptionInfo;
57+
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeToLabelsInfo;
58+
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeLabelsInfo;
5559
import org.apache.hadoop.yarn.server.webapp.dao.ContainerInfo;
5660
import org.apache.hadoop.yarn.server.webapp.dao.ContainersInfo;
5761
import org.apache.hadoop.yarn.webapp.NotFoundException;
@@ -279,4 +283,18 @@ public ContainersInfo getContainers(HttpServletRequest req, HttpServletResponse
279283

280284
return containers;
281285
}
286+
287+
@Override
288+
public NodeToLabelsInfo getNodeToLabels(HttpServletRequest hsr) throws IOException {
289+
if (!isRunning) {
290+
throw new RuntimeException("RM is stopped");
291+
}
292+
NodeLabelsInfo cpuNode = new NodeLabelsInfo(Collections.singleton("CPU"));
293+
NodeLabelsInfo gpuNode = new NodeLabelsInfo(Collections.singleton("GPU"));
294+
295+
HashMap<String, NodeLabelsInfo> nodeLabels = new HashMap<>();
296+
nodeLabels.put("node1", cpuNode);
297+
nodeLabels.put("node2", gpuNode);
298+
return new NodeToLabelsInfo(nodeLabels);
299+
}
282300
}

0 commit comments

Comments
 (0)