فهرست منبع

YARN-1697. NodeManager reports negative running containers (Sandy Ryza)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567356 13f79535-47bb-0310-9956-ffa450edef68
Sanford Ryza 11 سال پیش
والد
کامیت
9024ad4aa0

+ 2 - 0
hadoop-yarn-project/CHANGES.txt

@@ -246,6 +246,8 @@ Release 2.4.0 - UNRELEASED
     YARN-1698. Fixed default TimelineStore in code to match what is documented
     in yarn-default.xml (Zhijie Shen via vinodkv)
 
+    YARN=1697. NodeManager reports negative running containers (Sandy Ryza)
+
 Release 2.3.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 8 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java

@@ -83,6 +83,7 @@ public class ContainerImpl implements Container {
   private final String user;
   private int exitCode = ContainerExitStatus.INVALID;
   private final StringBuilder diagnostics;
+  private boolean wasLaunched;
 
   /** The NM-wide configuration - not specific to this container */
   private final Configuration daemonConf;
@@ -418,7 +419,9 @@ public class ContainerImpl implements Container {
             applicationId, containerId);
         break;
       case EXITED_WITH_FAILURE:
-        metrics.endRunningContainer();
+        if (wasLaunched) {
+          metrics.endRunningContainer();
+        }
         // fall through
       case LOCALIZATION_FAILED:
         metrics.failedContainer();
@@ -428,7 +431,9 @@ public class ContainerImpl implements Container {
             applicationId, containerId);
         break;
       case CONTAINER_CLEANEDUP_AFTER_KILL:
-        metrics.endRunningContainer();
+        if (wasLaunched) {
+          metrics.endRunningContainer();
+        }
         // fall through
       case NEW:
         metrics.killedContainer();
@@ -636,6 +641,7 @@ public class ContainerImpl implements Container {
           new ContainerStartMonitoringEvent(container.containerId,
               vmemBytes, pmemBytes));
       container.metrics.runningContainer();
+      container.wasLaunched  = true;
     }
   }
 

+ 4 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java

@@ -99,4 +99,8 @@ public class NodeManagerMetrics {
   public void addResource(Resource res) {
     availableGB.incr(res.getMemory() / 1024);
   }
+  
+  public int getRunningContainers() {
+    return containersRunning.value();
+  }
 }

+ 3 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java

@@ -348,6 +348,9 @@ public class TestContainer {
           wc.c.getContainerState());
       assertNull(wc.c.getLocalizedResources());
       verifyCleanupCall(wc);
+      wc.c.handle(new ContainerEvent(wc.c.getContainerId(),
+          ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP));
+      assertEquals(0, metrics.getRunningContainers());
     } finally {
       if (wc != null) {
         wc.finished();