Ver Fonte

YARN-9795. ClusterMetrics to include AM allocation delay. Contributed by Fengnan Li.

Tao Yang há 5 anos atrás
pai
commit
73575701ab

+ 9 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java

@@ -48,6 +48,8 @@ public class ClusterMetrics {
   @Metric("# of Shutdown NMs") MutableGaugeInt numShutdownNMs;
   @Metric("AM container launch delay") MutableRate aMLaunchDelay;
   @Metric("AM register delay") MutableRate aMRegisterDelay;
+  @Metric("AM container allocation delay")
+  private MutableRate aMContainerAllocationDelay;
 
   private static final MetricsInfo RECORD_INFO = info("ClusterMetrics",
   "Metrics for the Yarn Cluster");
@@ -190,4 +192,11 @@ public class ClusterMetrics {
     aMRegisterDelay.add(delay);
   }
 
+  public void addAMContainerAllocationDelay(long delay) {
+    aMContainerAllocationDelay.add(delay);
+  }
+
+  public MutableRate getAMContainerAllocationDelay() {
+    return aMContainerAllocationDelay;
+  }
 }

+ 8 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java

@@ -177,6 +177,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
   private long finishTime = 0;
   private long launchAMStartTime = 0;
   private long launchAMEndTime = 0;
+  private long scheduledTime = 0;
+  private long containerAllocatedTime = 0;
 
   // Set to null initially. Will eventually get set
   // if an RMAppAttemptUnregistrationEvent occurs
@@ -1164,6 +1166,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
             && amContainerAllocation.getContainers() != null) {
           assert (amContainerAllocation.getContainers().size() == 0);
         }
+        appAttempt.scheduledTime = System.currentTimeMillis();
         return RMAppAttemptState.SCHEDULED;
       } else {
         // save state and then go to LAUNCHED state
@@ -1220,6 +1223,11 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
         .clearNodeSetForAttempt(appAttempt.applicationAttemptId);
       appAttempt.getSubmissionContext().setResource(
         appAttempt.getMasterContainer().getResource());
+      appAttempt.containerAllocatedTime = System.currentTimeMillis();
+      long allocationDelay =
+          appAttempt.containerAllocatedTime - appAttempt.scheduledTime;
+      ClusterMetrics.getMetrics().addAMContainerAllocationDelay(
+          allocationDelay);
       appAttempt.storeAttempt();
       return RMAppAttemptState.ALLOCATED_SAVING;
     }

+ 7 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClusterMetrics.java

@@ -29,17 +29,23 @@ public class TestClusterMetrics {
 
   private ClusterMetrics metrics;
   /**
-   * Test aMLaunchDelay and aMRegisterDelay Metrics
+   * Test below metrics
+   *  - aMLaunchDelay
+   *  - aMRegisterDelay
+   *  - aMContainerAllocationDelay
    */
   @Test
   public void testAmMetrics() throws Exception {
     assert(metrics != null);
     Assert.assertTrue(!metrics.aMLaunchDelay.changed());
     Assert.assertTrue(!metrics.aMRegisterDelay.changed());
+    Assert.assertTrue(!metrics.getAMContainerAllocationDelay().changed());
     metrics.addAMLaunchDelay(1);
     metrics.addAMRegisterDelay(1);
+    metrics.addAMContainerAllocationDelay(1);
     Assert.assertTrue(metrics.aMLaunchDelay.changed());
     Assert.assertTrue(metrics.aMRegisterDelay.changed());
+    Assert.assertTrue(metrics.getAMContainerAllocationDelay().changed());
   }
 
   @Before