Explorar o código

YARN-2992. ZKRMStateStore crashes due to session expiry. Contributed by Karthik Kambatla
(cherry picked from commit 1454efe5d4fe4214ec5ef9142d55dbeca7dab953)

(cherry picked from commit ca0349b87ab1b2d0d2b9dc93de7806d26713165c)
(cherry picked from commit 2f6be218fa41fd0f39633ec5ed0df6e0fa0f54b6)

Jian He %!s(int64=10) %!d(string=hai) anos
pai
achega
e7fc071906

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -57,6 +57,9 @@ Release 2.6.1 - UNRELEASED
     YARN-2340. Fixed NPE when queue is stopped during RM restart.
     YARN-2340. Fixed NPE when queue is stopped during RM restart.
     (Rohith Sharmaks via jianhe)
     (Rohith Sharmaks via jianhe)
 
 
+    YARN-2992. ZKRMStateStore crashes due to session expiry. (Karthik Kambatla
+    via jianhe)
+
 Release 2.6.0 - 2014-11-18
 Release 2.6.0 - 2014-11-18
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 3 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java

@@ -1081,6 +1081,8 @@ public class ZKRMStateStore extends RMStateStore {
       switch (code) {
       switch (code) {
         case CONNECTIONLOSS:
         case CONNECTIONLOSS:
         case OPERATIONTIMEOUT:
         case OPERATIONTIMEOUT:
+        case SESSIONEXPIRED:
+        case SESSIONMOVED:
           return true;
           return true;
         default:
         default:
           break;
           break;
@@ -1109,6 +1111,7 @@ public class ZKRMStateStore extends RMStateStore {
           if (shouldRetry(ke.code()) && ++retry < numRetries) {
           if (shouldRetry(ke.code()) && ++retry < numRetries) {
             LOG.info("Retrying operation on ZK. Retry no. " + retry);
             LOG.info("Retrying operation on ZK. Retry no. " + retry);
             Thread.sleep(zkRetryInterval);
             Thread.sleep(zkRetryInterval);
+            createConnection();
             continue;
             continue;
           }
           }
           LOG.info("Maxed out ZK retries. Giving up!");
           LOG.info("Maxed out ZK retries. Giving up!");