|
@@ -20,8 +20,11 @@ package org.apache.hadoop.yarn.server.nodemanager;
|
|
|
|
|
|
import org.apache.hadoop.conf.Configuration;
|
|
import org.apache.hadoop.conf.Configuration;
|
|
import org.apache.hadoop.service.AbstractService;
|
|
import org.apache.hadoop.service.AbstractService;
|
|
|
|
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
|
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
|
import org.apache.hadoop.yarn.api.records.ResourceUtilization;
|
|
import org.apache.hadoop.yarn.api.records.ResourceUtilization;
|
|
|
|
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuNodeResourceUpdateHandler;
|
|
|
|
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuResourcePlugin;
|
|
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
|
|
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
|
|
import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin;
|
|
import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.Logger;
|
|
@@ -46,6 +49,10 @@ public class NodeResourceMonitorImpl extends AbstractService implements
|
|
/** Resource calculator. */
|
|
/** Resource calculator. */
|
|
private ResourceCalculatorPlugin resourceCalculatorPlugin;
|
|
private ResourceCalculatorPlugin resourceCalculatorPlugin;
|
|
|
|
|
|
|
|
+ /** Gpu related plugin. */
|
|
|
|
+ private GpuResourcePlugin gpuResourcePlugin;
|
|
|
|
+ private GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler;
|
|
|
|
+
|
|
/** Current <em>resource utilization</em> of the node. */
|
|
/** Current <em>resource utilization</em> of the node. */
|
|
private ResourceUtilization nodeUtilization =
|
|
private ResourceUtilization nodeUtilization =
|
|
ResourceUtilization.newInstance(0, 0, 0f);
|
|
ResourceUtilization.newInstance(0, 0, 0f);
|
|
@@ -72,6 +79,18 @@ public class NodeResourceMonitorImpl extends AbstractService implements
|
|
this.resourceCalculatorPlugin =
|
|
this.resourceCalculatorPlugin =
|
|
ResourceCalculatorPlugin.getNodeResourceMonitorPlugin(conf);
|
|
ResourceCalculatorPlugin.getNodeResourceMonitorPlugin(conf);
|
|
|
|
|
|
|
|
+ if (nmContext.getResourcePluginManager() != null) {
|
|
|
|
+ this.gpuResourcePlugin =
|
|
|
|
+ (GpuResourcePlugin)nmContext.getResourcePluginManager().
|
|
|
|
+ getNameToPlugins().get(ResourceInformation.GPU_URI);
|
|
|
|
+
|
|
|
|
+ if (gpuResourcePlugin != null) {
|
|
|
|
+ this.gpuNodeResourceUpdateHandler =
|
|
|
|
+ (GpuNodeResourceUpdateHandler)gpuResourcePlugin.
|
|
|
|
+ getNodeResourceHandlerInstance();
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
LOG.info(" Using ResourceCalculatorPlugin : "
|
|
LOG.info(" Using ResourceCalculatorPlugin : "
|
|
+ this.resourceCalculatorPlugin);
|
|
+ this.resourceCalculatorPlugin);
|
|
}
|
|
}
|
|
@@ -152,6 +171,14 @@ public class NodeResourceMonitorImpl extends AbstractService implements
|
|
(int) (vmem >> 20), // B -> MB
|
|
(int) (vmem >> 20), // B -> MB
|
|
vcores); // Used Virtual Cores
|
|
vcores); // Used Virtual Cores
|
|
|
|
|
|
|
|
+ float nodeGpuUtilization = 0F;
|
|
|
|
+ try {
|
|
|
|
+ nodeGpuUtilization =
|
|
|
|
+ gpuNodeResourceUpdateHandler.getNodeGpuUtilization();
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
+ LOG.error("Get Node GPU Utilization error: " + e);
|
|
|
|
+ }
|
|
|
|
+
|
|
// Publish the node utilization metrics to node manager
|
|
// Publish the node utilization metrics to node manager
|
|
// metrics system.
|
|
// metrics system.
|
|
NodeManagerMetrics nmMetrics = nmContext.getNodeManagerMetrics();
|
|
NodeManagerMetrics nmMetrics = nmContext.getNodeManagerMetrics();
|
|
@@ -159,6 +186,7 @@ public class NodeResourceMonitorImpl extends AbstractService implements
|
|
nmMetrics.setNodeUsedMemGB(nodeUtilization.getPhysicalMemory());
|
|
nmMetrics.setNodeUsedMemGB(nodeUtilization.getPhysicalMemory());
|
|
nmMetrics.setNodeUsedVMemGB(nodeUtilization.getVirtualMemory());
|
|
nmMetrics.setNodeUsedVMemGB(nodeUtilization.getVirtualMemory());
|
|
nmMetrics.setNodeCpuUtilization(nodeUtilization.getCPU());
|
|
nmMetrics.setNodeCpuUtilization(nodeUtilization.getCPU());
|
|
|
|
+ nmMetrics.setNodeGpuUtilization(nodeGpuUtilization);
|
|
}
|
|
}
|
|
|
|
|
|
try {
|
|
try {
|