浏览代码

YARN-2823. Fixed ResourceManager app-attempt state machine to inform schedulers about previous finished attempts of a running appliation to avoid expectation mismatch w.r.t transferred containers. Contributed by Jian He.

Vinod Kumar Vavilapalli 10 年之前
父节点
当前提交
a5657182a7

+ 4 - 0
hadoop-yarn-project/CHANGES.txt

@@ -888,6 +888,10 @@ Release 2.6.0 - UNRELEASED
     YARN-2744. Fixed CapacityScheduler to validate node-labels correctly against
     queues. (Wangda Tan via vinodkv)
 
+    YARN-2823. Fixed ResourceManager app-attempt state machine to inform
+    schedulers about previous finished attempts of a running appliation to avoid
+    expectation mismatch w.r.t transferred containers. (Jian He via vinodkv)
+
 Release 2.5.2 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 4 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java

@@ -1021,6 +1021,10 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
         // state but application is not in final state.
         if (rmApp.getCurrentAppAttempt() == appAttempt
             && !RMAppImpl.isAppInFinalState(rmApp)) {
+          // Add the previous finished attempt to scheduler synchronously so
+          // that scheduler knows the previous attempt.
+          appAttempt.scheduler.handle(new AppAttemptAddedSchedulerEvent(
+            appAttempt.getAppAttemptId(), false, true));
           (new BaseFinalTransition(appAttempt.recoveredFinalState)).transition(
               appAttempt, event);
         }

+ 4 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java

@@ -440,7 +440,10 @@ public class TestRMRestart extends ParameterizedSchedulerTestBase {
     nm1.registerNode();
 
     // create app and launch the AM
-    RMApp app0 = rm1.submitApp(200);
+    RMApp app0 =
+        rm1.submitApp(200, "name", "user",
+          new HashMap<ApplicationAccessType, String>(), false, "default", -1,
+          null, "MAPREDUCE", true, true);
     MockAM am0 = launchAM(app0, rm1, nm1);
 
     // fail the AM by sending CONTAINER_FINISHED event without registering.