Sfoglia il codice sorgente

YARN-10258. Add metrics for 'ApplicationsRunning' in NodeManager. Contributed by ANANDA G B.

Peter Bacsko 4 anni fa
parent
commit
eb72628e15

+ 2 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java

@@ -442,6 +442,7 @@ public class ContainerManagerImpl extends CompositeService implements
     ApplicationImpl app = new ApplicationImpl(dispatcher, p.getUser(), fc,
         appId, creds, context, p.getAppLogAggregationInitedTime());
     context.getApplications().put(appId, app);
+    metrics.runningApplication();
     app.handle(new ApplicationInitEvent(appId, acls, logAggregationContext));
   }
 
@@ -1137,6 +1138,7 @@ public class ContainerManagerImpl extends CompositeService implements
                   applicationID, credentials, context);
           if (context.getApplications().putIfAbsent(applicationID,
               application) == null) {
+            metrics.runningApplication();
             LOG.info("Creating a new application reference for app "
                 + applicationID);
             LogAggregationContext logAggregationContext =

+ 3 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java

@@ -623,6 +623,9 @@ public class ApplicationImpl implements Application {
     public void transition(ApplicationImpl app, ApplicationEvent event) {
       ApplicationId appId = event.getApplicationID();
       app.context.getApplications().remove(appId);
+      if (null != app.context.getNodeManagerMetrics()) {
+        app.context.getNodeManagerMetrics().endRunningApplication();
+      }
       app.aclsManager.removeApplication(appId);
       try {
         app.context.getNMStateStore().removeApplication(appId);

+ 10 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java

@@ -100,6 +100,8 @@ public class NodeManagerMetrics {
   MutableGaugeFloat nodeCpuUtilization;
   @Metric("Current GPU utilization")
   MutableGaugeFloat nodeGpuUtilization;
+  @Metric("Current running apps")
+  MutableGaugeInt applicationsRunning;
 
   @Metric("Missed localization requests in bytes")
       MutableCounterLong localizedCacheMissBytes;
@@ -187,6 +189,14 @@ public class NodeManagerMetrics {
     containersReIniting.decr();
   }
 
+  public void runningApplication() {
+    applicationsRunning.incr();
+  }
+
+  public void endRunningApplication() {
+    applicationsRunning.decr();
+  }
+
   public void pausedContainer() {
     containersPaused.incr();
   }

+ 3 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java

@@ -438,7 +438,7 @@ public class TestContainerManagerRecovery extends BaseContainerManagerTest {
         org.apache.hadoop.yarn.server.nodemanager
             .containermanager.container.ContainerState.RUNNING);
     TestNodeManagerMetrics.checkMetrics(1, 0, 0, 0, 0,
-        1, 1, 1, 9, 1, 7, 0F);
+        1, 1, 1, 9, 1, 7, 0F, 1);
 
     // restart and verify metrics could be recovered
     cm.stop();
@@ -446,7 +446,7 @@ public class TestContainerManagerRecovery extends BaseContainerManagerTest {
     metrics = NodeManagerMetrics.create();
     metrics.addResource(Resource.newInstance(10240, 8));
     TestNodeManagerMetrics.checkMetrics(0, 0, 0, 0, 0, 0,
-        0, 0, 10, 0, 8, 0F);
+        0, 0, 10, 0, 8, 0F, 0);
     context = createContext(conf, stateStore);
     cm = createContainerManager(context, delSrvc);
     cm.init(conf);
@@ -455,7 +455,7 @@ public class TestContainerManagerRecovery extends BaseContainerManagerTest {
     app = context.getApplications().get(appId);
     assertNotNull(app);
     TestNodeManagerMetrics.checkMetrics(1, 0, 0, 0, 0,
-        1, 1, 1, 9, 1, 7, 0F);
+        1, 1, 1, 9, 1, 7, 0F, 1);
     cm.stop();
   }
 

+ 9 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java

@@ -103,12 +103,16 @@ public class TestNodeManagerMetrics {
     // Set node gpu utilization
     metrics.setNodeGpuUtilization(35.5F);
 
+    // ApplicationsRunning expected to be 1
+    metrics.runningApplication();
+    metrics.runningApplication();
+    metrics.endRunningApplication();
+
     // availableGB is expected to be floored,
     // while allocatedGB is expected to be ceiled.
     // allocatedGB: 3.75GB allocated memory is shown as 4GB
     // availableGB: 4.25GB available memory is shown as 4GB
-    checkMetrics(10, 1, 1, 1, 1,
-        1, 4, 7, 4, 13, 3, 35.5F);
+    checkMetrics(10, 1, 1, 1, 1, 1, 4, 7, 4, 13, 3, 35.5F, 1);
 
     // Update resource and check available resource again
     metrics.addResource(total);
@@ -120,7 +124,7 @@ public class TestNodeManagerMetrics {
   public static void checkMetrics(int launched, int completed, int failed,
       int killed, int initing, int running, int allocatedGB,
       int allocatedContainers, int availableGB, int allocatedVCores,
-      int availableVCores, Float nodeGpuUtilization) {
+      int availableVCores, Float nodeGpuUtilization, int applicationsRunning) {
     MetricsRecordBuilder rb = getMetrics("NodeManagerMetrics");
     assertCounter("ContainersLaunched", launched, rb);
     assertCounter("ContainersCompleted", completed, rb);
@@ -132,8 +136,8 @@ public class TestNodeManagerMetrics {
     assertGauge("AllocatedVCores", allocatedVCores, rb);
     assertGauge("AllocatedContainers", allocatedContainers, rb);
     assertGauge("AvailableGB", availableGB, rb);
-    assertGauge("AvailableVCores",availableVCores, rb);
+    assertGauge("AvailableVCores", availableVCores, rb);
     assertGauge("NodeGpuUtilization", nodeGpuUtilization, rb);
-
+    assertGauge("ApplicationsRunning", applicationsRunning, rb);
   }
 }