浏览代码

YARN-10251. Show extended resources on legacy RM UI. Contributed by Eric Payne

Jonathan Hung 4 年之前
父节点
当前提交
865828ae63

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/WebPageUtils.java

@@ -56,7 +56,7 @@ public class WebPageUtils {
         .append(", 'mRender': renderHadoopDate }");
         .append(", 'mRender': renderHadoopDate }");
     if (isResourceManager) {
     if (isResourceManager) {
       sb.append("\n, {'sType':'num-ignore-str', 'aTargets': [11, 12, 13, 14, 15] }");
       sb.append("\n, {'sType':'num-ignore-str', 'aTargets': [11, 12, 13, 14, 15] }");
-      progressIndex = "[18]";
+      progressIndex = "[20]";
     } else if (isFairSchedulerPage) {
     } else if (isFairSchedulerPage) {
       sb.append("\n, {'sType':'num-ignore-str', 'aTargets': [11, 12, 13, 14, 15] }");
       sb.append("\n, {'sType':'num-ignore-str', 'aTargets': [11, 12, 13, 14, 15] }");
       progressIndex = "[16]";
       progressIndex = "[16]";
@@ -103,4 +103,4 @@ public class WebPageUtils {
         .toString();
         .toString();
   }
   }
 
 
-}
+}

+ 22 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/AppInfo.java

@@ -30,8 +30,10 @@ import org.apache.hadoop.classification.InterfaceStability.Evolving;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport;
 import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport;
 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
 import org.apache.hadoop.yarn.api.records.YarnApplicationState;
 import org.apache.hadoop.yarn.api.records.YarnApplicationState;
 import org.apache.hadoop.yarn.util.Times;
 import org.apache.hadoop.yarn.util.Times;
+import org.apache.hadoop.yarn.util.resource.ResourceUtils;
 
 
 @Public
 @Public
 @Evolving
 @Evolving
@@ -63,8 +65,10 @@ public class AppInfo {
   protected int priority;
   protected int priority;
   private long allocatedCpuVcores;
   private long allocatedCpuVcores;
   private long allocatedMemoryMB;
   private long allocatedMemoryMB;
+  private long allocatedGpus;
   private long reservedCpuVcores;
   private long reservedCpuVcores;
   private long reservedMemoryMB;
   private long reservedMemoryMB;
+  private long reservedGpus;
   protected boolean unmanagedApplication;
   protected boolean unmanagedApplication;
   private String appNodeLabelExpression;
   private String appNodeLabelExpression;
   private String amNodeLabelExpression;
   private String amNodeLabelExpression;
@@ -110,6 +114,16 @@ public class AppInfo {
         reservedCpuVcores = usageReport.getReservedResources().
         reservedCpuVcores = usageReport.getReservedResources().
             getVirtualCores();
             getVirtualCores();
         reservedMemoryMB = usageReport.getReservedResources().getMemorySize();
         reservedMemoryMB = usageReport.getReservedResources().getMemorySize();
+        Integer gpuIndex = ResourceUtils.getResourceTypeIndex()
+            .get(ResourceInformation.GPU_URI);
+        allocatedGpus = -1;
+        reservedGpus = -1;
+        if (gpuIndex != null) {
+          allocatedGpus = usageReport.getUsedResources()
+              .getResourceValue(ResourceInformation.GPU_URI);
+          reservedGpus = usageReport.getReservedResources()
+              .getResourceValue(ResourceInformation.GPU_URI);
+        }
       }
       }
       aggregateResourceAllocation = usageReport.getMemorySeconds()
       aggregateResourceAllocation = usageReport.getMemorySeconds()
           + " MB-seconds, " + usageReport.getVcoreSeconds()
           + " MB-seconds, " + usageReport.getVcoreSeconds()
@@ -175,6 +189,10 @@ public class AppInfo {
     return allocatedMemoryMB;
     return allocatedMemoryMB;
   }
   }
 
 
+  public long getAllocatedGpus() {
+    return allocatedGpus;
+  }
+
   public long getReservedCpuVcores() {
   public long getReservedCpuVcores() {
     return reservedCpuVcores;
     return reservedCpuVcores;
   }
   }
@@ -183,6 +201,10 @@ public class AppInfo {
     return reservedMemoryMB;
     return reservedMemoryMB;
   }
   }
 
 
+  public long getReservedGpus() {
+    return reservedGpus;
+  }
+
   public float getProgress() {
   public float getProgress() {
     return progress;
     return progress;
   }
   }

+ 34 - 41
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java

@@ -19,14 +19,15 @@
 package org.apache.hadoop.yarn.server.resourcemanager.webapp;
 package org.apache.hadoop.yarn.server.resourcemanager.webapp;
 
 
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ResourceTypeInfo;
 import org.apache.hadoop.yarn.api.records.ResourceTypeInfo;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo;
 import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo;
-import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceInfo;
 import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.SchedulerInfo;
 import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.SchedulerInfo;
 import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.UserMetricsInfo;
 import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.UserMetricsInfo;
 
 
 import org.apache.hadoop.yarn.util.resource.ResourceUtils;
 import org.apache.hadoop.yarn.util.resource.ResourceUtils;
+import org.apache.hadoop.yarn.util.resource.Resources;
 import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
 import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
 import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV;
 import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV;
 import org.apache.hadoop.yarn.webapp.view.HtmlBlock;
 import org.apache.hadoop.yarn.webapp.view.HtmlBlock;
@@ -62,35 +63,34 @@ public class MetricsOverviewTable extends HtmlBlock {
     
     
     DIV<Hamlet> div = html.div().$class("metrics");
     DIV<Hamlet> div = html.div().$class("metrics");
 
 
-    long usedMemoryBytes = 0;
-    long totalMemoryBytes = 0;
-    long reservedMemoryBytes = 0;
-    long usedVCores = 0;
-    long totalVCores = 0;
-    long reservedVCores = 0;
+    Resource usedResources;
+    Resource totalResources;
+    Resource reservedResources;
+    int allocatedContainers;
     if (clusterMetrics.getCrossPartitionMetricsAvailable()) {
     if (clusterMetrics.getCrossPartitionMetricsAvailable()) {
-      ResourceInfo usedAllPartitions =
-          clusterMetrics.getTotalUsedResourcesAcrossPartition();
-      ResourceInfo totalAllPartitions =
-          clusterMetrics.getTotalClusterResourcesAcrossPartition();
-      ResourceInfo reservedAllPartitions =
-          clusterMetrics.getTotalReservedResourcesAcrossPartition();
-      usedMemoryBytes = usedAllPartitions.getMemorySize() * BYTES_IN_MB;
-      totalMemoryBytes = totalAllPartitions.getMemorySize() * BYTES_IN_MB;
-      reservedMemoryBytes = reservedAllPartitions.getMemorySize() * BYTES_IN_MB;
-      usedVCores = usedAllPartitions.getvCores();
-      totalVCores = totalAllPartitions.getvCores();
-      reservedVCores = reservedAllPartitions.getvCores();
+      allocatedContainers =
+          clusterMetrics.getTotalAllocatedContainersAcrossPartition();
+      usedResources =
+          clusterMetrics.getTotalUsedResourcesAcrossPartition().getResource();
+      totalResources =
+          clusterMetrics.getTotalClusterResourcesAcrossPartition()
+          .getResource();
+      reservedResources =
+          clusterMetrics.getTotalReservedResourcesAcrossPartition()
+          .getResource();
       // getTotalUsedResourcesAcrossPartition includes reserved resources.
       // getTotalUsedResourcesAcrossPartition includes reserved resources.
-      usedMemoryBytes -= reservedMemoryBytes;
-      usedVCores -= reservedVCores;
+      Resources.subtractFrom(usedResources, reservedResources);
     } else {
     } else {
-      usedMemoryBytes = clusterMetrics.getAllocatedMB() * BYTES_IN_MB;
-      totalMemoryBytes = clusterMetrics.getTotalMB() * BYTES_IN_MB;
-      reservedMemoryBytes = clusterMetrics.getReservedMB() * BYTES_IN_MB;
-      usedVCores = clusterMetrics.getAllocatedVirtualCores();
-      totalVCores = clusterMetrics.getTotalVirtualCores();
-      reservedVCores = clusterMetrics.getReservedVirtualCores();
+      allocatedContainers = clusterMetrics.getContainersAllocated();
+      usedResources = Resource.newInstance(
+          clusterMetrics.getAllocatedMB() * BYTES_IN_MB,
+          (int) clusterMetrics.getAllocatedVirtualCores());
+      totalResources = Resource.newInstance(
+          clusterMetrics.getTotalMB() * BYTES_IN_MB,
+          (int) clusterMetrics.getTotalVirtualCores());
+      reservedResources = Resource.newInstance(
+          clusterMetrics.getReservedMB() * BYTES_IN_MB,
+          (int) clusterMetrics.getReservedVirtualCores());
     }
     }
 
 
     div.h3("Cluster Metrics").
     div.h3("Cluster Metrics").
@@ -102,12 +102,9 @@ public class MetricsOverviewTable extends HtmlBlock {
         th().$class("ui-state-default")._("Apps Running")._().
         th().$class("ui-state-default")._("Apps Running")._().
         th().$class("ui-state-default")._("Apps Completed")._().
         th().$class("ui-state-default")._("Apps Completed")._().
         th().$class("ui-state-default")._("Containers Running")._().
         th().$class("ui-state-default")._("Containers Running")._().
-        th().$class("ui-state-default")._("Memory Used")._().
-        th().$class("ui-state-default")._("Memory Total")._().
-        th().$class("ui-state-default")._("Memory Reserved")._().
-        th().$class("ui-state-default")._("VCores Used")._().
-        th().$class("ui-state-default")._("VCores Total")._().
-        th().$class("ui-state-default")._("VCores Reserved")._().
+        th().$class("ui-state-default")._("Used Resources")._().
+        th().$class("ui-state-default")._("Total Resources")._().
+        th().$class("ui-state-default")._("Reserved Resources")._().
       _().
       _().
     _().
     _().
     tbody().$class("ui-widget-content").
     tbody().$class("ui-widget-content").
@@ -121,14 +118,10 @@ public class MetricsOverviewTable extends HtmlBlock {
                 clusterMetrics.getAppsFailed() + clusterMetrics.getAppsKilled()
                 clusterMetrics.getAppsFailed() + clusterMetrics.getAppsKilled()
                 )
                 )
             ).
             ).
-        td(String.valueOf(
-            clusterMetrics.getTotalAllocatedContainersAcrossPartition())).
-        td(StringUtils.byteDesc(usedMemoryBytes)).
-        td(StringUtils.byteDesc(totalMemoryBytes)).
-        td(StringUtils.byteDesc(reservedMemoryBytes)).
-        td(String.valueOf(usedVCores)).
-        td(String.valueOf(totalVCores)).
-        td(String.valueOf(reservedVCores)).
+        td(String.valueOf(allocatedContainers)).
+        td(usedResources.toString()).
+        td(totalResources.toString()).
+        td(reservedResources.toString()).
       _().
       _().
     _()._();
     _()._();
 
 

+ 21 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java

@@ -22,6 +22,7 @@ import com.google.inject.Inject;
 import org.apache.commons.lang.StringEscapeUtils;
 import org.apache.commons.lang.StringEscapeUtils;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.yarn.api.records.NodeState;
 import org.apache.hadoop.yarn.api.records.NodeState;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
 import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
@@ -30,6 +31,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo;
 import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo;
 import org.apache.hadoop.yarn.util.Times;
 import org.apache.hadoop.yarn.util.Times;
+import org.apache.hadoop.yarn.util.resource.ResourceUtils;
 import org.apache.hadoop.yarn.webapp.SubView;
 import org.apache.hadoop.yarn.webapp.SubView;
 import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
 import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
 import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE;
 import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE;
@@ -85,13 +87,17 @@ class NodesPage extends RmView {
             .th(".mem", "Mem Used")
             .th(".mem", "Mem Used")
             .th(".mem", "Mem Avail")
             .th(".mem", "Mem Avail")
             .th(".vcores", "VCores Used")
             .th(".vcores", "VCores Used")
-            .th(".vcores", "VCores Avail");
+            .th(".vcores", "VCores Avail")
+            .th(".gpus", "GPUs Used")
+            .th(".gpus", "GPUs Avail");
       } else {
       } else {
         trbody.th(".containers", "Running Containers (G)")
         trbody.th(".containers", "Running Containers (G)")
             .th(".mem", "Mem Used (G)")
             .th(".mem", "Mem Used (G)")
             .th(".mem", "Mem Avail (G)")
             .th(".mem", "Mem Avail (G)")
             .th(".vcores", "VCores Used (G)")
             .th(".vcores", "VCores Used (G)")
             .th(".vcores", "VCores Avail (G)")
             .th(".vcores", "VCores Avail (G)")
+            .th(".gpus", "GPUs Used (G)")
+            .th(".gpus", "GPUs Avail (G)")
             .th(".containers", "Running Containers (O)")
             .th(".containers", "Running Containers (O)")
             .th(".mem", "Mem Used (O)")
             .th(".mem", "Mem Used (O)")
             .th(".vcores", "VCores Used (O)")
             .th(".vcores", "VCores Used (O)")
@@ -163,6 +169,16 @@ class NodesPage extends RmView {
           nodeTableData.append("\",\"<a ").append("href='" + "//" + httpAddress)
           nodeTableData.append("\",\"<a ").append("href='" + "//" + httpAddress)
               .append("'>").append(httpAddress).append("</a>\",").append("\"");
               .append("'>").append(httpAddress).append("</a>\",").append("\"");
         }
         }
+        Integer gpuIndex = ResourceUtils.getResourceTypeIndex()
+            .get(ResourceInformation.GPU_URI);
+        long usedGPUs = 0;
+        long availableGPUs = 0;
+        if (gpuIndex != null) {
+          usedGPUs = info.getUsedResource().getResource()
+              .getResourceValue(ResourceInformation.GPU_URI);
+          availableGPUs = info.getAvailableResource().getResource()
+              .getResourceValue(ResourceInformation.GPU_URI);
+        }
         nodeTableData.append("<br title='")
         nodeTableData.append("<br title='")
             .append(String.valueOf(info.getLastHealthUpdate())).append("'>")
             .append(String.valueOf(info.getLastHealthUpdate())).append("'>")
             .append(Times.format(info.getLastHealthUpdate())).append("\",\"")
             .append(Times.format(info.getLastHealthUpdate())).append("\",\"")
@@ -176,6 +192,10 @@ class NodesPage extends RmView {
             .append("\",\"").append(String.valueOf(info.getUsedVirtualCores()))
             .append("\",\"").append(String.valueOf(info.getUsedVirtualCores()))
             .append("\",\"")
             .append("\",\"")
             .append(String.valueOf(info.getAvailableVirtualCores()))
             .append(String.valueOf(info.getAvailableVirtualCores()))
+            .append("\",\"")
+            .append(String.valueOf(usedGPUs))
+            .append("\",\"")
+            .append(String.valueOf(availableGPUs))
             .append("\",\"");
             .append("\",\"");
 
 
         // If opportunistic containers are enabled, add extra fields.
         // If opportunistic containers are enabled, add extra fields.

+ 10 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppsBlock.java

@@ -69,8 +69,10 @@ public class RMAppsBlock extends AppsBlock {
           .th(".runningcontainer", "Running Containers")
           .th(".runningcontainer", "Running Containers")
           .th(".allocatedCpu", "Allocated CPU VCores")
           .th(".allocatedCpu", "Allocated CPU VCores")
           .th(".allocatedMemory", "Allocated Memory MB")
           .th(".allocatedMemory", "Allocated Memory MB")
+          .th(".allocatedGpu", "Allocated GPUs")
           .th(".reservedCpu", "Reserved CPU VCores")
           .th(".reservedCpu", "Reserved CPU VCores")
           .th(".reservedMemory", "Reserved Memory MB")
           .th(".reservedMemory", "Reserved Memory MB")
+          .th(".reservedGpu", "Reserved GPUs")
           .th(".queuePercentage", "% of Queue")
           .th(".queuePercentage", "% of Queue")
           .th(".clusterPercentage", "% of Cluster")
           .th(".clusterPercentage", "% of Cluster")
           .th(".progress", "Progress")
           .th(".progress", "Progress")
@@ -104,6 +106,7 @@ public class RMAppsBlock extends AppsBlock {
       String blacklistedNodesCount = "N/A";
       String blacklistedNodesCount = "N/A";
       RMApp rmApp = rm.getRMContext().getRMApps()
       RMApp rmApp = rm.getRMContext().getRMApps()
           .get(appAttemptId.getApplicationId());
           .get(appAttemptId.getApplicationId());
+      boolean isAppInCompletedState = false;
       if (rmApp != null) {
       if (rmApp != null) {
         RMAppAttempt appAttempt = rmApp.getRMAppAttempt(appAttemptId);
         RMAppAttempt appAttempt = rmApp.getRMAppAttempt(appAttemptId);
         Set<String> nodes =
         Set<String> nodes =
@@ -111,6 +114,7 @@ public class RMAppsBlock extends AppsBlock {
         if (nodes != null) {
         if (nodes != null) {
           blacklistedNodesCount = String.valueOf(nodes.size());
           blacklistedNodesCount = String.valueOf(nodes.size());
         }
         }
+        isAppInCompletedState = rmApp.isAppInCompletedStates();
       }
       }
       String percent = StringUtils.format("%.1f", app.getProgress());
       String percent = StringUtils.format("%.1f", app.getProgress());
       appsTableData
       appsTableData
@@ -152,12 +156,18 @@ public class RMAppsBlock extends AppsBlock {
         .append(app.getAllocatedMemoryMB() == -1 ? "N/A" :
         .append(app.getAllocatedMemoryMB() == -1 ? "N/A" :
             String.valueOf(app.getAllocatedMemoryMB()))
             String.valueOf(app.getAllocatedMemoryMB()))
         .append("\",\"")
         .append("\",\"")
+        .append((isAppInCompletedState && app.getAllocatedGpus() <= 0)
+            ? UNAVAILABLE : String.valueOf(app.getAllocatedGpus()))
+        .append("\",\"")
         .append(app.getReservedCpuVcores() == -1 ? "N/A" : String
         .append(app.getReservedCpuVcores() == -1 ? "N/A" : String
             .valueOf(app.getReservedCpuVcores()))
             .valueOf(app.getReservedCpuVcores()))
         .append("\",\"")
         .append("\",\"")
         .append(app.getReservedMemoryMB() == -1 ? "N/A" :
         .append(app.getReservedMemoryMB() == -1 ? "N/A" :
             String.valueOf(app.getReservedMemoryMB()))
             String.valueOf(app.getReservedMemoryMB()))
         .append("\",\"")
         .append("\",\"")
+        .append((isAppInCompletedState && app.getReservedGpus() <= 0)
+            ? UNAVAILABLE : String.valueOf(app.getReservedGpus()))
+        .append("\",\"")
         .append(queuePercent)
         .append(queuePercent)
         .append("\",\"")
         .append("\",\"")
         .append(clusterPercent)
         .append(clusterPercent)

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java

@@ -48,8 +48,8 @@ public class TestNodesPage {
 
 
   // Number of Actual Table Headers for NodesPage.NodesBlock might change in
   // Number of Actual Table Headers for NodesPage.NodesBlock might change in
   // future. In that case this value should be adjusted to the new value.
   // future. In that case this value should be adjusted to the new value.
-  final int numberOfThInMetricsTable = 23;
-  final int numberOfActualTableHeaders = 13;
+  final int numberOfThInMetricsTable = 20;
+  final int numberOfActualTableHeaders = 15;
   private final int numberOfThForOpportunisticContainers = 4;
   private final int numberOfThForOpportunisticContainers = 4;
 
 
   private Injector injector;
   private Injector injector;