Browse Source

YARN-1156. Enhance NodeManager AllocatedGB and AvailableGB metrics for aggregation of decimal values. (Contributed by Tsuyoshi OZAWA)

Junping Du 10 năm trước cách đây
mục cha
commit
e65b7c5ff6

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -112,6 +112,9 @@ Release 2.7.0 - UNRELEASED
     YARN-2136. Changed RMStateStore to ignore store opearations when fenced.
     (Varun Saxena via jianhe)
 
+    YARN-1156. Enhance NodeManager AllocatedGB and AvailableGB metrics 
+    for aggregation of decimal values. (Tsuyoshi OZAWA via junping_du)
+
   OPTIMIZATIONS
 
   BUG FIXES

+ 14 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java

@@ -47,6 +47,9 @@ public class NodeManagerMetrics {
   @Metric("Container launch duration")
       MutableRate containerLaunchDuration;
 
+  private long allocatedMB;
+  private long availableMB;
+
   public static NodeManagerMetrics create() {
     return create(DefaultMetricsSystem.instance());
   }
@@ -92,22 +95,27 @@ public class NodeManagerMetrics {
 
   public void allocateContainer(Resource res) {
     allocatedContainers.incr();
-    allocatedGB.incr(res.getMemory() / 1024);
-    availableGB.decr(res.getMemory() / 1024);
+    allocatedMB = allocatedMB + res.getMemory();
+    allocatedGB.set((int)Math.ceil(allocatedMB/1024d));
+    availableMB = availableMB - res.getMemory();
+    availableGB.set((int)Math.floor(availableMB/1024d));
     allocatedVCores.incr(res.getVirtualCores());
     availableVCores.decr(res.getVirtualCores());
   }
 
   public void releaseContainer(Resource res) {
     allocatedContainers.decr();
-    allocatedGB.decr(res.getMemory() / 1024);
-    availableGB.incr(res.getMemory() / 1024);
+    allocatedMB = allocatedMB - res.getMemory();
+    allocatedGB.set((int)Math.ceil(allocatedMB/1024d));
+    availableMB = availableMB + res.getMemory();
+    availableGB.set((int)Math.floor(availableMB/1024d));
     allocatedVCores.decr(res.getVirtualCores());
     availableVCores.incr(res.getVirtualCores());
   }
 
   public void addResource(Resource res) {
-    availableGB.incr(res.getMemory() / 1024);
+    availableMB = availableMB + res.getMemory();
+    availableGB.incr((int)Math.floor(availableMB/1024d));
     availableVCores.incr(res.getVirtualCores());
   }
 
@@ -118,4 +126,5 @@ public class NodeManagerMetrics {
   public int getRunningContainers() {
     return containersRunning.value();
   }
+
 }

+ 12 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java

@@ -33,13 +33,14 @@ public class TestNodeManagerMetrics {
     total.setMemory(8*GiB);
     total.setVirtualCores(16);
     Resource resource = Records.newRecord(Resource.class);
-    resource.setMemory(1*GiB);
+    resource.setMemory(512); //512MiB
     resource.setVirtualCores(2);
 
 
     metrics.addResource(total);
 
-    for (int i = 5; i-- > 0;) {
+    for (int i = 10; i-- > 0;) {
+      // allocate 10 containers(allocatedGB: 5GiB, availableGB: 3GiB)
       metrics.launchedContainer();
       metrics.allocateContainer(resource);
     }
@@ -48,6 +49,7 @@ public class TestNodeManagerMetrics {
     metrics.endInitingContainer();
     metrics.runningContainer();
     metrics.endRunningContainer();
+    // Releasing 3 containers(allocatedGB: 3.5GiB, availableGB: 4.5GiB)
     metrics.completedContainer();
     metrics.releaseContainer(resource);
 
@@ -61,12 +63,17 @@ public class TestNodeManagerMetrics {
     metrics.runningContainer();
     metrics.addContainerLaunchDuration(1);
 
-    checkMetrics(5, 1, 1, 1, 1, 1, 2, 2, 6, 4, 12);
+    // availableGB is expected to be floored,
+    // while allocatedGB is expected to be ceiled.
+    // allocatedGB: 3.5GB allocated memory is shown as 4GB
+    // availableGB: 4.5GB available memory is shown as 4GB
+    checkMetrics(10, 1, 1, 1, 1, 1, 4, 7, 4, 14, 2);
   }
 
   private void checkMetrics(int launched, int completed, int failed, int killed,
-                            int initing, int running, int allocatedGB,
-                            int allocatedContainers, int availableGB, int allocatedVCores, int availableVCores) {
+      int initing, int running, int allocatedGB,
+      int allocatedContainers, int availableGB, int allocatedVCores,
+      int availableVCores) {
     MetricsRecordBuilder rb = getMetrics("NodeManagerMetrics");
     assertCounter("ContainersLaunched", launched, rb);
     assertCounter("ContainersCompleted", completed, rb);