Browse Source

YARN-9169. Add metrics for queued opportunistic and guaranteed containers. Contributed by Abhishek Modi.

Giovanni Matteo Fumarola 6 năm trước cách đây
mục cha
commit
489411579c

+ 12 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java

@@ -199,6 +199,8 @@ public class ContainerScheduler extends AbstractService implements
       break;
     case RECOVERY_COMPLETED:
       startPendingContainers(maxOppQueueLength <= 0);
+      metrics.setQueuedContainers(queuedOpportunisticContainers.size(),
+          queuedGuaranteedContainers.size());
     default:
       LOG.error("Unknown event arrived at ContainerScheduler: "
           + event.toString());
@@ -252,6 +254,8 @@ public class ContainerScheduler extends AbstractService implements
             "continer update of %s", containerId), ex);
       }
       startPendingContainers(maxOppQueueLength <= 0);
+      metrics.setQueuedContainers(queuedOpportunisticContainers.size(),
+          queuedGuaranteedContainers.size());
     }
   }
 
@@ -277,6 +281,8 @@ public class ContainerScheduler extends AbstractService implements
             "UnKnown execution type received " + container.getContainerId()
                 + ", execType " + execType);
       }
+      metrics.setQueuedContainers(queuedOpportunisticContainers.size(),
+          queuedGuaranteedContainers.size());
     } else if (rcs.getStatus() == RecoveredContainerStatus.LAUNCHED) {
       runningContainers.put(container.getContainerId(), container);
       utilizationTracker.addContainerResources(container);
@@ -378,6 +384,8 @@ public class ContainerScheduler extends AbstractService implements
       boolean forceStartGuaranteedContainers = (maxOppQueueLength <= 0);
       startPendingContainers(forceStartGuaranteedContainers);
     }
+    this.metrics.setQueuedContainers(queuedOpportunisticContainers.size(),
+        queuedGuaranteedContainers.size());
   }
 
   /**
@@ -508,6 +516,8 @@ public class ContainerScheduler extends AbstractService implements
         startPendingContainers(false);
       }
     }
+    metrics.setQueuedContainers(queuedOpportunisticContainers.size(),
+        queuedGuaranteedContainers.size());
   }
 
   @SuppressWarnings("unchecked")
@@ -662,6 +672,8 @@ public class ContainerScheduler extends AbstractService implements
         numAllowed--;
       }
     }
+    this.metrics.setQueuedContainers(queuedOpportunisticContainers.size(),
+        queuedGuaranteedContainers.size());
   }
 
   public ContainersMonitor getContainersMonitor() {

+ 21 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java

@@ -54,6 +54,12 @@ public class NodeManagerMetrics {
   @Metric MutableGaugeInt availableVCores;
   @Metric("Container launch duration")
       MutableRate containerLaunchDuration;
+
+  @Metric("Containers queued (Guaranteed)")
+  MutableGaugeInt containersGuaranteedQueued;
+  @Metric("Containers queued (Opportunistic)")
+  MutableGaugeInt containersOpportunisticQueued;
+
   @Metric("# of bad local dirs")
       MutableGaugeInt badLocalDirs;
   @Metric("# of bad log dirs")
@@ -209,6 +215,11 @@ public class NodeManagerMetrics {
     allocatedOpportunisticVCores.decr(res.getVirtualCores());
   }
 
+  public void setQueuedContainers(int opportunisticCount, int guaranteedCount) {
+    containersOpportunisticQueued.set(opportunisticCount);
+    containersGuaranteedQueued.set(guaranteedCount);
+  }
+
   public void addResource(Resource res) {
     availableMB = availableMB + res.getMemorySize();
     availableGB.set((int)Math.floor(availableMB/1024d));
@@ -314,6 +325,16 @@ public class NodeManagerMetrics {
     return runningOpportunisticContainers.value();
   }
 
+  @VisibleForTesting
+  public int getQueuedOpportunisticContainers() {
+    return containersOpportunisticQueued.value();
+  }
+
+  @VisibleForTesting
+  public int getQueuedGuaranteedContainers() {
+    return containersGuaranteedQueued.value();
+  }
+
   public long getCacheSizeBeforeClean() {
     return this.cacheSizeBeforeClean.value();
   }

+ 30 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerQueuing.java

@@ -270,6 +270,8 @@ public class TestContainerSchedulerQueuing extends BaseContainerManagerTest {
           org.apache.hadoop.yarn.api.records.ContainerState.RUNNING,
           status.getState());
     }
+    Assert.assertEquals(0, metrics.getQueuedOpportunisticContainers());
+    Assert.assertEquals(0, metrics.getQueuedGuaranteedContainers());
   }
 
   /**
@@ -326,6 +328,8 @@ public class TestContainerSchedulerQueuing extends BaseContainerManagerTest {
         containerScheduler.getNumQueuedGuaranteedContainers());
     Assert.assertEquals(1,
         containerScheduler.getNumQueuedOpportunisticContainers());
+    Assert.assertEquals(1, metrics.getQueuedOpportunisticContainers());
+    Assert.assertEquals(1, metrics.getQueuedGuaranteedContainers());
   }
 
   /**
@@ -393,6 +397,8 @@ public class TestContainerSchedulerQueuing extends BaseContainerManagerTest {
         containerScheduler.getNumQueuedGuaranteedContainers());
     Assert.assertEquals(2,
         containerScheduler.getNumQueuedOpportunisticContainers());
+    Assert.assertEquals(2, metrics.getQueuedOpportunisticContainers());
+    Assert.assertEquals(0, metrics.getQueuedGuaranteedContainers());
   }
 
   /**
@@ -473,6 +479,9 @@ public class TestContainerSchedulerQueuing extends BaseContainerManagerTest {
         containerScheduler.getNumQueuedGuaranteedContainers());
     Assert.assertEquals(maxOppQueueLength,
         containerScheduler.getNumQueuedOpportunisticContainers());
+    Assert.assertEquals(maxOppQueueLength,
+        metrics.getQueuedOpportunisticContainers());
+    Assert.assertEquals(0, metrics.getQueuedGuaranteedContainers());
   }
 
   /**
@@ -543,6 +552,9 @@ public class TestContainerSchedulerQueuing extends BaseContainerManagerTest {
       System.out.println("\nStatus : [" + status + "]\n");
     }
 
+    Assert.assertEquals(1, metrics.getQueuedOpportunisticContainers());
+    Assert.assertEquals(0, metrics.getQueuedGuaranteedContainers());
+
     // Make sure the remaining OPPORTUNISTIC container starts its execution.
     BaseContainerManagerTest.waitForNMContainerState(containerManager,
         createContainerId(2), ContainerState.DONE, 40);
@@ -554,6 +566,9 @@ public class TestContainerSchedulerQueuing extends BaseContainerManagerTest {
     Assert.assertEquals(
         org.apache.hadoop.yarn.api.records.ContainerState.RUNNING,
         contStatus1.getState());
+
+    Assert.assertEquals(0, metrics.getQueuedOpportunisticContainers());
+    Assert.assertEquals(0, metrics.getQueuedGuaranteedContainers());
   }
 
   /**
@@ -628,6 +643,8 @@ public class TestContainerSchedulerQueuing extends BaseContainerManagerTest {
       }
       System.out.println("\nStatus : [" + status + "]\n");
     }
+    Assert.assertEquals(1, metrics.getQueuedOpportunisticContainers());
+    Assert.assertEquals(0, metrics.getQueuedGuaranteedContainers());
 
     // Make sure that the GUARANTEED container completes
     BaseContainerManagerTest.waitForNMContainerState(containerManager,
@@ -755,6 +772,8 @@ public class TestContainerSchedulerQueuing extends BaseContainerManagerTest {
       Thread.sleep(100);
     }
     Assert.assertEquals(6, containerScheduler.getNumQueuedContainers());
+    Assert.assertEquals(6, metrics.getQueuedOpportunisticContainers());
+    Assert.assertEquals(0, metrics.getQueuedGuaranteedContainers());
 
     ContainerQueuingLimit containerQueuingLimit = ContainerQueuingLimit
         .newInstance();
@@ -791,6 +810,8 @@ public class TestContainerSchedulerQueuing extends BaseContainerManagerTest {
     }
     Assert.assertEquals(4, deQueuedContainers);
     Assert.assertEquals(2, numQueuedOppContainers);
+    Assert.assertEquals(2, metrics.getQueuedOpportunisticContainers());
+    Assert.assertEquals(0, metrics.getQueuedGuaranteedContainers());
   }
 
   /**
@@ -935,6 +956,8 @@ public class TestContainerSchedulerQueuing extends BaseContainerManagerTest {
     }
 
     Assert.assertEquals(2, killedContainers);
+    Assert.assertEquals(0, metrics.getQueuedOpportunisticContainers());
+    Assert.assertEquals(0, metrics.getQueuedGuaranteedContainers());
   }
 
   /**
@@ -1002,6 +1025,8 @@ public class TestContainerSchedulerQueuing extends BaseContainerManagerTest {
     }
 
     Assert.assertEquals(2, killedContainers);
+    Assert.assertEquals(0, metrics.getQueuedOpportunisticContainers());
+    Assert.assertEquals(0, metrics.getQueuedGuaranteedContainers());
   }
 
   /**
@@ -1064,6 +1089,8 @@ public class TestContainerSchedulerQueuing extends BaseContainerManagerTest {
 
     Assert.assertEquals(1, runningContainersNo);
     Assert.assertEquals(2, queuedContainersNo);
+    Assert.assertEquals(2, metrics.getQueuedOpportunisticContainers());
+    Assert.assertEquals(0, metrics.getQueuedGuaranteedContainers());
 
     // Stop one of the two queued containers.
     StopContainersRequest stopRequest = StopContainersRequest.
@@ -1094,6 +1121,7 @@ public class TestContainerSchedulerQueuing extends BaseContainerManagerTest {
         Thread.sleep(1000);
       }
     }
+    Assert.assertEquals(1, metrics.getQueuedOpportunisticContainers());
     Assert.assertEquals(createContainerId(0),
         map.get(ContainerSubState.RUNNING).getContainerId());
     Assert.assertEquals(createContainerId(1),
@@ -1205,6 +1233,8 @@ public class TestContainerSchedulerQueuing extends BaseContainerManagerTest {
 
     // Ensure no containers are queued.
     Assert.assertEquals(0, containerScheduler.getNumQueuedContainers());
+    Assert.assertEquals(0, metrics.getQueuedOpportunisticContainers());
+    Assert.assertEquals(0, metrics.getQueuedGuaranteedContainers());
 
     List<org.apache.hadoop.yarn.server.nodemanager.containermanager.container.
         ContainerState> containerStates =