Browse Source

Merge -c r1470243 from trunk to branch-2 for YARN-594. Update test and add comments in YARN-534 (Jian He via bikas)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1470245 13f79535-47bb-0310-9956-ffa450edef68
Bikas Saha 12 years ago
parent
commit
66beaf2c7d

+ 2 - 0
hadoop-yarn-project/CHANGES.txt

@@ -207,6 +207,8 @@ Release 2.0.5-beta - UNRELEASED
     YARN-547. Fixed race conditions in public and private resource localization
     YARN-547. Fixed race conditions in public and private resource localization
     which used to cause duplicate downloads. (Omkar Vinit Joshi via vinodkv)
     which used to cause duplicate downloads. (Omkar Vinit Joshi via vinodkv)
 
 
+    YARN-594. Update test and add comments in YARN-534 (Jian He via bikas)
+
 Release 2.0.4-alpha - UNRELEASED
 Release 2.0.4-alpha - UNRELEASED
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 2 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java

@@ -340,6 +340,8 @@ public class RMAppManager implements EventHandler<RMAppManagerEvent>,
       } else {
       } else {
         maxAppAttempts = individualMaxAppAttempts;
         maxAppAttempts = individualMaxAppAttempts;
       }
       }
+      // In work-preserve restart, if attemptCount == maxAttempts, the job still
+      // needs to be recovered because the last attempt may still be running.
       if(appState.getAttemptCount() >= maxAppAttempts) {
       if(appState.getAttemptCount() >= maxAppAttempts) {
         LOG.info("Not recovering application " + appState.getAppId() +
         LOG.info("Not recovering application " + appState.getAppId() +
             " due to recovering attempt is beyond maxAppAttempt limit");
             " due to recovering attempt is beyond maxAppAttempt limit");

+ 6 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java

@@ -364,7 +364,6 @@ public class TestRMRestart {
     Assert.assertNotNull(attemptState);
     Assert.assertNotNull(attemptState);
     Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), 
     Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), 
                         attemptState.getMasterContainer().getId());
                         attemptState.getMasterContainer().getId());
-    rm1.stop();
 
 
     // start new RM   
     // start new RM   
     MockRM rm2 = new MockRM(conf, memStore);
     MockRM rm2 = new MockRM(conf, memStore);
@@ -382,7 +381,12 @@ public class TestRMRestart {
     Assert.assertNull(rm2.getRMContext().getRMApps()
     Assert.assertNull(rm2.getRMContext().getRMApps()
         .get(app1.getApplicationId()));
         .get(app1.getApplicationId()));
 
 
-    // stop the RM
+    // verify that app2 is stored, app1 is removed
+    Assert.assertNotNull(rmAppState.get(app2.getApplicationId()));
+    Assert.assertNull(rmAppState.get(app1.getApplicationId()));
+
+    // stop the RM  
+    rm1.stop();
     rm2.stop();
     rm2.stop();
   }
   }
 }
 }