Przeglądaj źródła

YARN-295. Fixed a race condition in ResourceManager RMAppAttempt state machine. Contributed by Mayank Bansal.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1501856 13f79535-47bb-0310-9956-ffa450edef68
Vinod Kumar Vavilapalli 12 lat temu
rodzic
commit
dfe70f6a03

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -63,6 +63,9 @@ Release 2.1.1-beta - UNRELEASED
     YARN-368. Fixed a typo in error message in Auxiliary services. (Albert Chu
     via vinodkv)
 
+    YARN-295. Fixed a race condition in ResourceManager RMAppAttempt state
+    machine. (Mayank Bansal via vinodkv)
+
 Release 2.1.0-beta - 2013-07-02
 
   INCOMPATIBLE CHANGES

+ 4 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java

@@ -245,6 +245,10 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
       .addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.KILLED,
           RMAppAttemptEventType.KILL, new KillAllocatedAMTransition())
           
+      .addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.FAILED,
+          RMAppAttemptEventType.CONTAINER_FINISHED,
+          new AMContainerCrashedTransition())
+
        // Transitions from LAUNCHED State
       .addTransition(RMAppAttemptState.LAUNCHED, RMAppAttemptState.RUNNING,
           RMAppAttemptEventType.REGISTERED, new AMRegisteredTransition())

+ 14 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java

@@ -654,6 +654,20 @@ public class TestRMAppAttemptTransitions {
     testAppAttemptFailedState(amContainer, diagnostics);
   }
   
+  @Test
+  public void testAMCrashAtAllocated() {
+    Container amContainer = allocateApplicationAttempt();
+    String containerDiagMsg = "some error";
+    int exitCode = 123;
+    ContainerStatus cs =
+        BuilderUtils.newContainerStatus(amContainer.getId(),
+          ContainerState.COMPLETE, containerDiagMsg, exitCode);
+    applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
+      applicationAttempt.getAppAttemptId(), cs));
+    assertEquals(RMAppAttemptState.FAILED,
+      applicationAttempt.getAppAttemptState());
+  }
+  
   @Test
   public void testRunningToFailed() {
     Container amContainer = allocateApplicationAttempt();