Skip to content

Commit a6aa6e4

Browse files
committed
YARN-2588. Standby RM fails to transitionToActive if previous transitionToActive failed with ZK exception. Contributed by Rohith Sharmaks
1 parent abae63c commit a6aa6e4

File tree

3 files changed

+63
-2
lines changed
  • hadoop-yarn-project
    • hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src

3 files changed

+63
-2
lines changed

hadoop-yarn-project/CHANGES.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -677,6 +677,9 @@ Release 2.6.0 - UNRELEASED
677677
YARN-2566. DefaultContainerExecutor should pick a working directory randomly.
678678
(Zhihai Xu via kasha)
679679

680+
YARN-2588. Standby RM fails to transitionToActive if previous
681+
transitionToActive failed with ZK exception. (Rohith Sharmaks via jianhe)
682+
680683
Release 2.5.1 - 2014-09-05
681684

682685
INCOMPATIBLE CHANGES

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,8 +1023,14 @@ synchronized void transitionToActive() throws Exception {
10231023
this.rmLoginUGI.doAs(new PrivilegedExceptionAction<Void>() {
10241024
@Override
10251025
public Void run() throws Exception {
1026-
startActiveServices();
1027-
return null;
1026+
try {
1027+
startActiveServices();
1028+
return null;
1029+
} catch (Exception e) {
1030+
resetDispatcher();
1031+
createAndInitActiveServices();
1032+
throw e;
1033+
}
10281034
}
10291035
});
10301036

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHA.java

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import org.apache.hadoop.yarn.event.Dispatcher;
4848
import org.apache.hadoop.yarn.event.EventHandler;
4949
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
50+
import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore;
5051
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
5152
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
5253
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
@@ -398,6 +399,57 @@ public void testHAWithRMHostName() throws Exception {
398399
innerTestHAWithRMHostName(true);
399400
}
400401

402+
@Test(timeout = 30000)
403+
public void testFailoverWhenTransitionToActiveThrowException()
404+
throws Exception {
405+
configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
406+
Configuration conf = new YarnConfiguration(configuration);
407+
408+
MemoryRMStateStore memStore = new MemoryRMStateStore() {
409+
int count = 0;
410+
411+
@Override
412+
public synchronized void startInternal() throws Exception {
413+
// first time throw exception
414+
if (count++ == 0) {
415+
throw new Exception("Session Expired");
416+
}
417+
}
418+
};
419+
// start RM
420+
memStore.init(conf);
421+
422+
rm = new MockRM(conf, memStore);
423+
rm.init(conf);
424+
StateChangeRequestInfo requestInfo =
425+
new StateChangeRequestInfo(
426+
HAServiceProtocol.RequestSource.REQUEST_BY_USER);
427+
428+
assertEquals(STATE_ERR, HAServiceState.INITIALIZING, rm.adminService
429+
.getServiceStatus().getState());
430+
assertFalse("RM is ready to become active before being started",
431+
rm.adminService.getServiceStatus().isReadyToBecomeActive());
432+
checkMonitorHealth();
433+
434+
rm.start();
435+
checkMonitorHealth();
436+
checkStandbyRMFunctionality();
437+
438+
// 2. Try Transition to active, throw exception
439+
try {
440+
rm.adminService.transitionToActive(requestInfo);
441+
Assert.fail("Transitioned to Active should throw exception.");
442+
} catch (Exception e) {
443+
assertTrue("Error when transitioning to Active mode".contains(e
444+
.getMessage()));
445+
}
446+
447+
// 3. Transition to active, success
448+
rm.adminService.transitionToActive(requestInfo);
449+
checkMonitorHealth();
450+
checkActiveRMFunctionality();
451+
}
452+
401453
public void innerTestHAWithRMHostName(boolean includeBindHost) {
402454
//this is run two times, with and without a bind host configured
403455
if (includeBindHost) {

0 commit comments

Comments
 (0)