Skip to content

Commit 56247db

Browse files
author
Eric E Payne
committed
YARN-10300: appMasterHost not set in RM ApplicationSummary when AM fails before first heartbeat. Contributed by Eric Badger (ebadger).
1 parent ac5d899 commit 56247db

File tree

3 files changed

+62
-1
lines changed

3 files changed

+62
-1
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
import java.util.concurrent.ExecutionException;
2727
import java.util.concurrent.Future;
2828

29+
import org.apache.hadoop.yarn.api.records.Container;
30+
import org.apache.hadoop.yarn.api.records.NodeId;
2931
import org.slf4j.Logger;
3032
import org.slf4j.LoggerFactory;
3133
import org.apache.hadoop.conf.Configuration;
@@ -190,7 +192,16 @@ public static SummaryBuilder createAppSummary(RMApp app) {
190192
RMAppAttempt attempt = app.getCurrentAppAttempt();
191193
if (attempt != null) {
192194
trackingUrl = attempt.getTrackingUrl();
193-
host = attempt.getHost();
195+
Container masterContainer = attempt.getMasterContainer();
196+
if (masterContainer != null) {
197+
NodeId nodeId = masterContainer.getNodeId();
198+
if (nodeId != null) {
199+
String amHost = nodeId.getHost();
200+
if (amHost != null) {
201+
host = amHost;
202+
}
203+
}
204+
}
194205
}
195206
RMAppMetrics metrics = app.getRMAppMetrics();
196207
SummaryBuilder summary = new SummaryBuilder()

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@
2222
import com.google.common.collect.Lists;
2323
import com.google.common.collect.Maps;
2424
import com.google.common.collect.Sets;
25+
import org.apache.hadoop.yarn.api.records.Container;
26+
import org.apache.hadoop.yarn.api.records.NodeId;
2527
import org.apache.hadoop.yarn.api.records.QueueACL;
28+
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
2629
import org.slf4j.Logger;
2730
import org.slf4j.LoggerFactory;
2831
import org.apache.hadoop.conf.Configuration;
@@ -972,6 +975,17 @@ public void testEscapeApplicationSummary() {
972975
when(app.getSubmitTime()).thenReturn(1000L);
973976
when(app.getLaunchTime()).thenReturn(2000L);
974977
when(app.getApplicationTags()).thenReturn(Sets.newHashSet("tag2", "tag1"));
978+
979+
RMAppAttempt mockRMAppAttempt = mock(RMAppAttempt.class);
980+
Container mockContainer = mock(Container.class);
981+
NodeId mockNodeId = mock(NodeId.class);
982+
String host = "127.0.0.1";
983+
984+
when(mockNodeId.getHost()).thenReturn(host);
985+
when(mockContainer.getNodeId()).thenReturn(mockNodeId);
986+
when(mockRMAppAttempt.getMasterContainer()).thenReturn(mockContainer);
987+
when(app.getCurrentAppAttempt()).thenReturn(mockRMAppAttempt);
988+
975989
Map<String, Long> resourceSecondsMap = new HashMap<>();
976990
resourceSecondsMap.put(ResourceInformation.MEMORY_MB.getName(), 16384L);
977991
resourceSecondsMap.put(ResourceInformation.VCORES.getName(), 64L);
@@ -993,6 +1007,7 @@ public void testEscapeApplicationSummary() {
9931007
assertTrue(msg.contains("Multiline" + escaped +"AppName"));
9941008
assertTrue(msg.contains("Multiline" + escaped +"UserName"));
9951009
assertTrue(msg.contains("Multiline" + escaped +"QueueName"));
1010+
assertTrue(msg.contains("appMasterHost=" + host));
9961011
assertTrue(msg.contains("submitTime=1000"));
9971012
assertTrue(msg.contains("launchTime=2000"));
9981013
assertTrue(msg.contains("memorySeconds=16384"));

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,41 @@ public void testSetupTokensWithHTTPS() throws Exception {
453453
testSetupTokens(true, conf);
454454
}
455455

456+
@Test
457+
public void testAMMasterContainerHost() throws Exception {
458+
//Test that masterContainer and its associated host are
459+
//set before the AM is even launched.
460+
MockRM rm = new MockRM();
461+
rm.start();
462+
String host = "127.0.0.1";
463+
String port = "1234";
464+
MockNM nm1 = rm.registerNode(host + ":" + port, 5120);
465+
RMApp app = MockRMAppSubmitter.submitWithMemory(2000, rm);
466+
// kick the scheduling
467+
nm1.nodeHeartbeat(true);
468+
RMAppAttempt attempt = app.getCurrentAppAttempt();
469+
470+
try {
471+
GenericTestUtils.waitFor(new Supplier<Boolean>() {
472+
@Override public Boolean get() {
473+
return attempt.getMasterContainer() != null;
474+
}
475+
}, 10, 200 * 100);
476+
} catch (TimeoutException e) {
477+
fail("timed out while waiting for AM Launch to happen.");
478+
}
479+
480+
Assert.assertEquals(
481+
app.getCurrentAppAttempt().getMasterContainer().getNodeId().getHost(),
482+
host);
483+
484+
//send kill before launch
485+
rm.killApp(app.getApplicationId());
486+
rm.waitForState(app.getApplicationId(), RMAppState.KILLED);
487+
488+
rm.stop();
489+
}
490+
456491
private void testSetupTokens(boolean https, YarnConfiguration conf)
457492
throws Exception {
458493
MockRM rm = new MockRM(conf);

0 commit comments

Comments
 (0)