Skip to content

Commit 0451694

Browse files
author
wuxiaobao
committed
YARN-11626. Optimize ResourceManager's operations on Zookeeper metadata
1 parent 7bedb96 commit 0451694

File tree

3 files changed

+440
-7
lines changed

3 files changed

+440
-7
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,12 @@
6767
<artifactId>mockito-core</artifactId>
6868
<scope>test</scope>
6969
</dependency>
70+
<dependency>
71+
<groupId>org.mockito</groupId>
72+
<artifactId>mockito-inline</artifactId>
73+
<version>2.8.9</version>
74+
<scope>test</scope>
75+
</dependency>
7076
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
7177
<dependency>
7278
<groupId>org.apache.hadoop</groupId>

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -956,7 +956,7 @@ private void handleApplicationAttemptStateOp(
956956
zkAcl, fencingNodePath);
957957
break;
958958
case REMOVE:
959-
zkManager.safeDelete(path, zkAcl, fencingNodePath);
959+
safeDeleteAndCheckNode(path, zkAcl, fencingNodePath);
960960
break;
961961
default:
962962
break;
@@ -1035,10 +1035,10 @@ private void removeApp(String removeAppId, boolean safeRemove,
10351035
for (ApplicationAttemptId attemptId : attempts) {
10361036
String attemptRemovePath =
10371037
getNodePath(appIdRemovePath, attemptId.toString());
1038-
zkManager.safeDelete(attemptRemovePath, zkAcl, fencingNodePath);
1038+
safeDeleteAndCheckNode(attemptRemovePath, zkAcl, fencingNodePath);
10391039
}
10401040
}
1041-
zkManager.safeDelete(appIdRemovePath, zkAcl, fencingNodePath);
1041+
safeDeleteAndCheckNode(appIdRemovePath, zkAcl, fencingNodePath);
10421042
} else {
10431043
CuratorFramework curatorFramework = zkManager.getCurator();
10441044
curatorFramework.delete().deletingChildrenIfNeeded().
@@ -1099,7 +1099,7 @@ protected synchronized void removeRMDelegationTokenState(
10991099
LOG.debug("Removing RMDelegationToken_{}",
11001100
rmDTIdentifier.getSequenceNumber());
11011101

1102-
zkManager.safeDelete(nodeRemovePath, zkAcl, fencingNodePath);
1102+
safeDeleteAndCheckNode(nodeRemovePath, zkAcl, fencingNodePath);
11031103

11041104
// Check if we should remove the parent app node as well.
11051105
checkRemoveParentZnode(nodeRemovePath, splitIndex);
@@ -1160,7 +1160,7 @@ protected synchronized void removeRMDTMasterKeyState(
11601160

11611161
LOG.debug("Removing RMDelegationKey_{}", delegationKey.getKeyId());
11621162

1163-
zkManager.safeDelete(nodeRemovePath, zkAcl, fencingNodePath);
1163+
safeDeleteAndCheckNode(nodeRemovePath, zkAcl, fencingNodePath);
11641164
}
11651165

11661166
@Override
@@ -1200,12 +1200,12 @@ protected synchronized void removeReservationState(String planName,
12001200
LOG.debug("Removing reservationallocation {} for plan {}",
12011201
reservationIdName, planName);
12021202

1203-
zkManager.safeDelete(reservationPath, zkAcl, fencingNodePath);
1203+
safeDeleteAndCheckNode(reservationPath, zkAcl, fencingNodePath);
12041204

12051205
List<String> reservationNodes = getChildren(planNodePath);
12061206

12071207
if (reservationNodes.isEmpty()) {
1208-
zkManager.safeDelete(planNodePath, zkAcl, fencingNodePath);
1208+
safeDeleteAndCheckNode(planNodePath, zkAcl, fencingNodePath);
12091209
}
12101210
}
12111211

@@ -1441,6 +1441,29 @@ void delete(final String path) throws Exception {
14411441
zkManager.delete(path);
14421442
}
14431443

1444+
/**
1445+
* Deletes the path more safe.
1446+
* When NNE is encountered, if the node does not exist,
1447+
* it will ignore this exception to avoid triggering
1448+
* a greater impact of ResourceManager failover on the cluster.
1449+
* @param path Path to be deleted.
1450+
* @param fencingACL fencingACL.
1451+
* @param fencingPath fencingNodePath.
1452+
* @throws Exception if any problem occurs while performing deletion.
1453+
*/
1454+
public void safeDeleteAndCheckNode(String path, List<ACL> fencingACL,
1455+
String fencingPath) throws Exception {
1456+
try{
1457+
zkManager.safeDelete(path, fencingACL, fencingPath);
1458+
} catch (KeeperException.NoNodeException nne) {
1459+
if(!exists(path)){
1460+
LOG.info("Node " + path + " doesn't exist to delete");
1461+
} else {
1462+
throw new KeeperException.NodeExistsException("Node " + path + " should not exist");
1463+
}
1464+
}
1465+
}
1466+
14441467
/**
14451468
* Helper class that periodically attempts creating a znode to ensure that
14461469
* this RM continues to be the Active.

0 commit comments

Comments
 (0)