Browse Source

YARN-11529 Add metrics for ContainerMonitorImpl. (#5828)

Xianming Lei 1 year ago
parent
commit
325f7e5fdb

+ 6 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java

@@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
 
 import org.apache.hadoop.classification.VisibleForTesting;
 import org.apache.hadoop.util.Preconditions;
+import org.apache.hadoop.util.Time;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupElasticMemoryController;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
@@ -497,6 +498,7 @@ public class ContainersMonitorImpl extends AbstractService implements
     public void run() {
 
       while (!stopped && !Thread.currentThread().isInterrupted()) {
+        long start = Time.monotonicNow();
         // Print the processTrees for debugging.
         if (LOG.isDebugEnabled()) {
           StringBuilder tmp = new StringBuilder("[ ");
@@ -587,6 +589,9 @@ public class ContainersMonitorImpl extends AbstractService implements
         // Save the aggregated utilization of the containers
         setContainersUtilization(trackedContainersUtilization);
 
+        long duration = Time.monotonicNow() - start;
+        LOG.debug("Finished monitoring container cost {} ms", duration);
+
         // Publish the container utilization metrics to node manager
         // metrics system.
         NodeManagerMetrics nmMetrics = context.getNodeManagerMetrics();
@@ -597,6 +602,7 @@ public class ContainersMonitorImpl extends AbstractService implements
               trackedContainersUtilization.getVirtualMemory());
           nmMetrics.setContainerCpuUtilization(
               trackedContainersUtilization.getCPU());
+          nmMetrics.addContainerMonitorCostTime(duration);
         }
 
         try {

+ 8 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java

@@ -118,6 +118,9 @@ public class NodeManagerMetrics {
   @Metric("Container localization time in milliseconds")
       MutableRate localizationDurationMillis;
 
+  @Metric("ContainerMonitor time cost in milliseconds")
+  MutableGaugeLong containersMonitorCostTime;
+
   // CHECKSTYLE:ON:VisibilityModifier
 
   private JvmMetrics jvmMetrics = null;
@@ -481,4 +484,9 @@ public class NodeManagerMetrics {
   public void localizationComplete(long downloadMillis) {
     localizationDurationMillis.add(downloadMillis);
   }
+
+  public void addContainerMonitorCostTime(long duration) {
+    containersMonitorCostTime.incr(duration);
+  }
+
 }

+ 3 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java

@@ -130,9 +130,12 @@ public class TestNodeManagerMetrics {
 
     // Update resource and check available resource again
     metrics.addResource(total);
+    metrics.addContainerMonitorCostTime(200L);
+
     MetricsRecordBuilder rb = getMetrics("NodeManagerMetrics");
     assertGauge("AvailableGB", 12, rb);
     assertGauge("AvailableVCores", 19, rb);
+    assertGauge("ContainersMonitorCostTime", 200L, rb);
   }
 
   public static void checkMetrics(int launched, int completed, int failed,