Browse Source

YARN-10258. Add metrics for 'ApplicationsRunning' in NodeManager. Contributed by ANANDA G B.

Peter Bacsko 4 years ago
parent
commit
eb72628e15

+ 2 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java

@@ -442,6 +442,7 @@ public class ContainerManagerImpl extends CompositeService implements
     ApplicationImpl app = new ApplicationImpl(dispatcher, p.getUser(), fc,
     ApplicationImpl app = new ApplicationImpl(dispatcher, p.getUser(), fc,
         appId, creds, context, p.getAppLogAggregationInitedTime());
         appId, creds, context, p.getAppLogAggregationInitedTime());
     context.getApplications().put(appId, app);
     context.getApplications().put(appId, app);
+    metrics.runningApplication();
     app.handle(new ApplicationInitEvent(appId, acls, logAggregationContext));
     app.handle(new ApplicationInitEvent(appId, acls, logAggregationContext));
   }
   }
 
 
@@ -1137,6 +1138,7 @@ public class ContainerManagerImpl extends CompositeService implements
                   applicationID, credentials, context);
                   applicationID, credentials, context);
           if (context.getApplications().putIfAbsent(applicationID,
           if (context.getApplications().putIfAbsent(applicationID,
               application) == null) {
               application) == null) {
+            metrics.runningApplication();
             LOG.info("Creating a new application reference for app "
             LOG.info("Creating a new application reference for app "
                 + applicationID);
                 + applicationID);
             LogAggregationContext logAggregationContext =
             LogAggregationContext logAggregationContext =

+ 3 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java

@@ -623,6 +623,9 @@ public class ApplicationImpl implements Application {
     public void transition(ApplicationImpl app, ApplicationEvent event) {
     public void transition(ApplicationImpl app, ApplicationEvent event) {
       ApplicationId appId = event.getApplicationID();
       ApplicationId appId = event.getApplicationID();
       app.context.getApplications().remove(appId);
       app.context.getApplications().remove(appId);
+      if (null != app.context.getNodeManagerMetrics()) {
+        app.context.getNodeManagerMetrics().endRunningApplication();
+      }
       app.aclsManager.removeApplication(appId);
       app.aclsManager.removeApplication(appId);
       try {
       try {
         app.context.getNMStateStore().removeApplication(appId);
         app.context.getNMStateStore().removeApplication(appId);

+ 10 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java

@@ -100,6 +100,8 @@ public class NodeManagerMetrics {
   MutableGaugeFloat nodeCpuUtilization;
   MutableGaugeFloat nodeCpuUtilization;
   @Metric("Current GPU utilization")
   @Metric("Current GPU utilization")
   MutableGaugeFloat nodeGpuUtilization;
   MutableGaugeFloat nodeGpuUtilization;
+  @Metric("Current running apps")
+  MutableGaugeInt applicationsRunning;
 
 
   @Metric("Missed localization requests in bytes")
   @Metric("Missed localization requests in bytes")
       MutableCounterLong localizedCacheMissBytes;
       MutableCounterLong localizedCacheMissBytes;
@@ -187,6 +189,14 @@ public class NodeManagerMetrics {
     containersReIniting.decr();
     containersReIniting.decr();
   }
   }
 
 
+  public void runningApplication() {
+    applicationsRunning.incr();
+  }
+
+  public void endRunningApplication() {
+    applicationsRunning.decr();
+  }
+
   public void pausedContainer() {
   public void pausedContainer() {
     containersPaused.incr();
     containersPaused.incr();
   }
   }

+ 3 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java

@@ -438,7 +438,7 @@ public class TestContainerManagerRecovery extends BaseContainerManagerTest {
         org.apache.hadoop.yarn.server.nodemanager
         org.apache.hadoop.yarn.server.nodemanager
             .containermanager.container.ContainerState.RUNNING);
             .containermanager.container.ContainerState.RUNNING);
     TestNodeManagerMetrics.checkMetrics(1, 0, 0, 0, 0,
     TestNodeManagerMetrics.checkMetrics(1, 0, 0, 0, 0,
-        1, 1, 1, 9, 1, 7, 0F);
+        1, 1, 1, 9, 1, 7, 0F, 1);
 
 
     // restart and verify metrics could be recovered
     // restart and verify metrics could be recovered
     cm.stop();
     cm.stop();
@@ -446,7 +446,7 @@ public class TestContainerManagerRecovery extends BaseContainerManagerTest {
     metrics = NodeManagerMetrics.create();
     metrics = NodeManagerMetrics.create();
     metrics.addResource(Resource.newInstance(10240, 8));
     metrics.addResource(Resource.newInstance(10240, 8));
     TestNodeManagerMetrics.checkMetrics(0, 0, 0, 0, 0, 0,
     TestNodeManagerMetrics.checkMetrics(0, 0, 0, 0, 0, 0,
-        0, 0, 10, 0, 8, 0F);
+        0, 0, 10, 0, 8, 0F, 0);
     context = createContext(conf, stateStore);
     context = createContext(conf, stateStore);
     cm = createContainerManager(context, delSrvc);
     cm = createContainerManager(context, delSrvc);
     cm.init(conf);
     cm.init(conf);
@@ -455,7 +455,7 @@ public class TestContainerManagerRecovery extends BaseContainerManagerTest {
     app = context.getApplications().get(appId);
     app = context.getApplications().get(appId);
     assertNotNull(app);
     assertNotNull(app);
     TestNodeManagerMetrics.checkMetrics(1, 0, 0, 0, 0,
     TestNodeManagerMetrics.checkMetrics(1, 0, 0, 0, 0,
-        1, 1, 1, 9, 1, 7, 0F);
+        1, 1, 1, 9, 1, 7, 0F, 1);
     cm.stop();
     cm.stop();
   }
   }
 
 

+ 9 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java

@@ -103,12 +103,16 @@ public class TestNodeManagerMetrics {
     // Set node gpu utilization
     // Set node gpu utilization
     metrics.setNodeGpuUtilization(35.5F);
     metrics.setNodeGpuUtilization(35.5F);
 
 
+    // ApplicationsRunning expected to be 1
+    metrics.runningApplication();
+    metrics.runningApplication();
+    metrics.endRunningApplication();
+
     // availableGB is expected to be floored,
     // availableGB is expected to be floored,
     // while allocatedGB is expected to be ceiled.
     // while allocatedGB is expected to be ceiled.
     // allocatedGB: 3.75GB allocated memory is shown as 4GB
     // allocatedGB: 3.75GB allocated memory is shown as 4GB
     // availableGB: 4.25GB available memory is shown as 4GB
     // availableGB: 4.25GB available memory is shown as 4GB
-    checkMetrics(10, 1, 1, 1, 1,
-        1, 4, 7, 4, 13, 3, 35.5F);
+    checkMetrics(10, 1, 1, 1, 1, 1, 4, 7, 4, 13, 3, 35.5F, 1);
 
 
     // Update resource and check available resource again
     // Update resource and check available resource again
     metrics.addResource(total);
     metrics.addResource(total);
@@ -120,7 +124,7 @@ public class TestNodeManagerMetrics {
   public static void checkMetrics(int launched, int completed, int failed,
   public static void checkMetrics(int launched, int completed, int failed,
       int killed, int initing, int running, int allocatedGB,
       int killed, int initing, int running, int allocatedGB,
       int allocatedContainers, int availableGB, int allocatedVCores,
       int allocatedContainers, int availableGB, int allocatedVCores,
-      int availableVCores, Float nodeGpuUtilization) {
+      int availableVCores, Float nodeGpuUtilization, int applicationsRunning) {
     MetricsRecordBuilder rb = getMetrics("NodeManagerMetrics");
     MetricsRecordBuilder rb = getMetrics("NodeManagerMetrics");
     assertCounter("ContainersLaunched", launched, rb);
     assertCounter("ContainersLaunched", launched, rb);
     assertCounter("ContainersCompleted", completed, rb);
     assertCounter("ContainersCompleted", completed, rb);
@@ -132,8 +136,8 @@ public class TestNodeManagerMetrics {
     assertGauge("AllocatedVCores", allocatedVCores, rb);
     assertGauge("AllocatedVCores", allocatedVCores, rb);
     assertGauge("AllocatedContainers", allocatedContainers, rb);
     assertGauge("AllocatedContainers", allocatedContainers, rb);
     assertGauge("AvailableGB", availableGB, rb);
     assertGauge("AvailableGB", availableGB, rb);
-    assertGauge("AvailableVCores",availableVCores, rb);
+    assertGauge("AvailableVCores", availableVCores, rb);
     assertGauge("NodeGpuUtilization", nodeGpuUtilization, rb);
     assertGauge("NodeGpuUtilization", nodeGpuUtilization, rb);
-
+    assertGauge("ApplicationsRunning", applicationsRunning, rb);
   }
   }
 }
 }