ソースを参照

Merge r1586523 from branch-2. YARN-1903. Set exit code and diagnostics when container is killed at NEW/LOCALIZING state. Contributed by Zhijie Shen

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2.4@1586524 13f79535-47bb-0310-9956-ffa450edef68
Jian He 11 年 前
コミット
bf1cbb228b

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -33,6 +33,9 @@ Release 2.4.1 - UNRELEASED
     verification of public cache files in Windows+local file-system environment.
     (Varun Vasudev via vinodkv)
 
+    YARN-1903. Set exit code and diagnostics when container is killed at
+    NEW/LOCALIZING state. (Zhijie Shen via jianhe)
+
 Release 2.4.0 - 2014-04-07 
 
   INCOMPATIBLE CHANGES

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java

@@ -343,9 +343,10 @@ public class TestNMClient {
         // getContainerStatus can be called after stopContainer
         try {
           // O is possible if CLEANUP_CONTAINER is executed too late
+          // 137 is possible if the container is not terminated but killed
           testGetContainerStatus(container, i, ContainerState.COMPLETE,
               "Container killed by the ApplicationMaster.", Arrays.asList(
-                  new Integer[] {143, 0}));
+                  new Integer[] {137, 143, 0}));
         } catch (YarnException e) {
           // The exception is possible because, after the container is stopped,
           // it may be removed from NM's context.

+ 19 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java

@@ -47,6 +47,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
+import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode;
 import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger;
 import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger.AuditConstants;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEvent;
@@ -141,7 +142,7 @@ public class ContainerImpl implements Container {
         ContainerEventType.UPDATE_DIAGNOSTICS_MSG,
         UPDATE_DIAGNOSTICS_TRANSITION)
     .addTransition(ContainerState.NEW, ContainerState.DONE,
-        ContainerEventType.KILL_CONTAINER, CONTAINER_DONE_TRANSITION)
+        ContainerEventType.KILL_CONTAINER, new KillOnNewTransition())
 
     // From LOCALIZING State
     .addTransition(ContainerState.LOCALIZING,
@@ -760,7 +761,9 @@ public class ContainerImpl implements Container {
       container.cleanup();
       container.metrics.endInitingContainer();
       ContainerKillEvent killEvent = (ContainerKillEvent) event;
+      container.exitCode = ExitCode.TERMINATED.getExitCode();
       container.diagnostics.append(killEvent.getDiagnostic()).append("\n");
+      container.diagnostics.append("Container is killed before being launched.\n");
     }
   }
 
@@ -828,7 +831,6 @@ public class ContainerImpl implements Container {
 
   /**
    * Handle the following transitions:
-   * - NEW -> DONE upon KILL_CONTAINER
    * - {LOCALIZATION_FAILED, EXITED_WITH_SUCCESS, EXITED_WITH_FAILURE,
    *    KILLING, CONTAINER_CLEANEDUP_AFTER_KILL}
    *   -> DONE upon CONTAINER_RESOURCES_CLEANEDUP
@@ -849,6 +851,21 @@ public class ContainerImpl implements Container {
     }
   }
 
+  /**
+   * Handle the following transition:
+   * - NEW -> DONE upon KILL_CONTAINER
+   */
+  static class KillOnNewTransition extends ContainerDoneTransition {
+    @Override
+    public void transition(ContainerImpl container, ContainerEvent event) {
+      ContainerKillEvent killEvent = (ContainerKillEvent) event;
+      container.exitCode = ExitCode.TERMINATED.getExitCode();
+      container.diagnostics.append(killEvent.getDiagnostic()).append("\n");
+      container.diagnostics.append("Container is killed before being launched.\n");
+      super.transition(container, event);
+    }
+  }
+
   /**
    * Update diagnostics, staying in the same state.
    */

+ 39 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java

@@ -310,6 +310,45 @@ public class TestContainer {
       }
     }
   }
+
+  @Test
+  public void testKillOnNew() throws Exception {
+    WrappedContainer wc = null;
+    try {
+      wc = new WrappedContainer(13, 314159265358979L, 4344, "yak");
+      assertEquals(ContainerState.NEW, wc.c.getContainerState());
+      wc.killContainer();
+      assertEquals(ContainerState.DONE, wc.c.getContainerState());
+      assertEquals(ExitCode.TERMINATED.getExitCode(),
+          wc.c.cloneAndGetContainerStatus().getExitStatus());
+      assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics()
+          .contains("KillRequest"));
+    } finally {
+      if (wc != null) {
+        wc.finished();
+      }
+    }
+  }
+
+  @Test
+  public void testKillOnLocalizing() throws Exception {
+    WrappedContainer wc = null;
+    try {
+      wc = new WrappedContainer(14, 314159265358979L, 4344, "yak");
+      wc.initContainer();
+      assertEquals(ContainerState.LOCALIZING, wc.c.getContainerState());
+      wc.killContainer();
+      assertEquals(ContainerState.KILLING, wc.c.getContainerState());
+      assertEquals(ExitCode.TERMINATED.getExitCode(),
+          wc.c.cloneAndGetContainerStatus().getExitStatus());
+      assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics()
+          .contains("KillRequest"));
+    } finally {
+      if (wc != null) {
+        wc.finished();
+      }
+    }
+  }
   
   @Test
   public void testKillOnLocalizationFailed() throws Exception {