Browse Source

YARN-4019. Add JvmPauseMonitor to ResourceManager and NodeManager. Contributed by Robert Kanter.
(cherry picked from commit cfee02b3bdd1117370200c9d8ce216676cff8888)

Junping Du 9 years ago
parent
commit
7af531d2e3

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -322,6 +322,9 @@ Release 2.8.0 - UNRELEASED
     YARN-3961. Expose pending, running and reserved containers of a queue in REST
     YARN-3961. Expose pending, running and reserved containers of a queue in REST
     api and yarn top (adhoot via asuresh)
     api and yarn top (adhoot via asuresh)
 
 
+    YARN-4019. Add JvmPauseMonitor to ResourceManager and NodeManager. (Robert Kanter
+    via junping_du)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
     YARN-3339. TestDockerContainerExecutor should pull a single image and not
     YARN-3339. TestDockerContainerExecutor should pull a single image and not

+ 11 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java

@@ -40,6 +40,7 @@ import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.service.CompositeService;
 import org.apache.hadoop.service.CompositeService;
 import org.apache.hadoop.util.ExitUtil;
 import org.apache.hadoop.util.ExitUtil;
 import org.apache.hadoop.util.GenericOptionsParser;
 import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.JvmPauseMonitor;
 import org.apache.hadoop.util.NodeHealthScriptRunner;
 import org.apache.hadoop.util.NodeHealthScriptRunner;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.ShutdownHookManager;
 import org.apache.hadoop.util.ShutdownHookManager;
@@ -83,6 +84,7 @@ public class NodeManager extends CompositeService
   private static final Log LOG = LogFactory.getLog(NodeManager.class);
   private static final Log LOG = LogFactory.getLog(NodeManager.class);
   private static long nmStartupTime = System.currentTimeMillis();
   private static long nmStartupTime = System.currentTimeMillis();
   protected final NodeManagerMetrics metrics = NodeManagerMetrics.create();
   protected final NodeManagerMetrics metrics = NodeManagerMetrics.create();
+  private JvmPauseMonitor pauseMonitor;
   private ApplicationACLsManager aclsManager;
   private ApplicationACLsManager aclsManager;
   private NodeHealthCheckerService nodeHealthChecker;
   private NodeHealthCheckerService nodeHealthChecker;
   private NodeLabelsProvider nodeLabelsProvider;
   private NodeLabelsProvider nodeLabelsProvider;
@@ -307,13 +309,16 @@ public class NodeManager extends CompositeService
     dispatcher.register(ContainerManagerEventType.class, containerManager);
     dispatcher.register(ContainerManagerEventType.class, containerManager);
     dispatcher.register(NodeManagerEventType.class, this);
     dispatcher.register(NodeManagerEventType.class, this);
     addService(dispatcher);
     addService(dispatcher);
-    
+
+    pauseMonitor = new JvmPauseMonitor(conf);
+    metrics.getJvmMetrics().setPauseMonitor(pauseMonitor);
+
     DefaultMetricsSystem.initialize("NodeManager");
     DefaultMetricsSystem.initialize("NodeManager");
 
 
     // StatusUpdater should be added last so that it get started last 
     // StatusUpdater should be added last so that it get started last 
     // so that we make sure everything is up before registering with RM. 
     // so that we make sure everything is up before registering with RM. 
     addService(nodeStatusUpdater);
     addService(nodeStatusUpdater);
-    
+
     super.serviceInit(conf);
     super.serviceInit(conf);
     // TODO add local dirs to del
     // TODO add local dirs to del
   }
   }
@@ -325,6 +330,7 @@ public class NodeManager extends CompositeService
     } catch (IOException e) {
     } catch (IOException e) {
       throw new YarnRuntimeException("Failed NodeManager login", e);
       throw new YarnRuntimeException("Failed NodeManager login", e);
     }
     }
+    pauseMonitor.start();
     super.serviceStart();
     super.serviceStart();
   }
   }
 
 
@@ -336,6 +342,9 @@ public class NodeManager extends CompositeService
     try {
     try {
       super.serviceStop();
       super.serviceStop();
       DefaultMetricsSystem.shutdown();
       DefaultMetricsSystem.shutdown();
+      if (pauseMonitor != null) {
+        pauseMonitor.stop();
+      }
     } finally {
     } finally {
       // YARN-3641: NM's services stop get failed shouldn't block the
       // YARN-3641: NM's services stop get failed shouldn't block the
       // release of NMLevelDBStore.
       // release of NMLevelDBStore.

+ 11 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java

@@ -57,17 +57,26 @@ public class NodeManagerMetrics {
   @Metric("Disk utilization % on good log dirs")
   @Metric("Disk utilization % on good log dirs")
       MutableGaugeInt goodLogDirsDiskUtilizationPerc;
       MutableGaugeInt goodLogDirsDiskUtilizationPerc;
 
 
+  private JvmMetrics jvmMetrics = null;
 
 
   private long allocatedMB;
   private long allocatedMB;
   private long availableMB;
   private long availableMB;
 
 
+  public NodeManagerMetrics(JvmMetrics jvmMetrics) {
+    this.jvmMetrics = jvmMetrics;
+  }
+
   public static NodeManagerMetrics create() {
   public static NodeManagerMetrics create() {
     return create(DefaultMetricsSystem.instance());
     return create(DefaultMetricsSystem.instance());
   }
   }
 
 
   static NodeManagerMetrics create(MetricsSystem ms) {
   static NodeManagerMetrics create(MetricsSystem ms) {
-    JvmMetrics.create("NodeManager", null, ms);
-    return ms.register(new NodeManagerMetrics());
+    JvmMetrics jm = JvmMetrics.create("NodeManager", null, ms);
+    return ms.register(new NodeManagerMetrics(jm));
+  }
+
+  public JvmMetrics getJvmMetrics() {
+    return jvmMetrics;
   }
   }
 
 
   // Potential instrumentation interface methods
   // Potential instrumentation interface methods

+ 10 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java

@@ -39,6 +39,7 @@ import org.apache.hadoop.service.CompositeService;
 import org.apache.hadoop.service.Service;
 import org.apache.hadoop.service.Service;
 import org.apache.hadoop.util.ExitUtil;
 import org.apache.hadoop.util.ExitUtil;
 import org.apache.hadoop.util.GenericOptionsParser;
 import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.JvmPauseMonitor;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.ShutdownHookManager;
 import org.apache.hadoop.util.ShutdownHookManager;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.StringUtils;
@@ -157,6 +158,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
   private WebApp webApp;
   private WebApp webApp;
   private AppReportFetcher fetcher = null;
   private AppReportFetcher fetcher = null;
   protected ResourceTrackerService resourceTracker;
   protected ResourceTrackerService resourceTracker;
+  private JvmPauseMonitor pauseMonitor;
 
 
   @VisibleForTesting
   @VisibleForTesting
   protected String webAppAddress;
   protected String webAppAddress;
@@ -511,7 +513,9 @@ public class ResourceManager extends CompositeService implements Recoverable {
       rmContext.setResourceTrackerService(resourceTracker);
       rmContext.setResourceTrackerService(resourceTracker);
 
 
       DefaultMetricsSystem.initialize("ResourceManager");
       DefaultMetricsSystem.initialize("ResourceManager");
-      JvmMetrics.initSingleton("ResourceManager", null);
+      JvmMetrics jm = JvmMetrics.initSingleton("ResourceManager", null);
+      pauseMonitor = new JvmPauseMonitor(conf);
+      jm.setPauseMonitor(pauseMonitor);
 
 
       // Initialize the Reservation system
       // Initialize the Reservation system
       if (conf.getBoolean(YarnConfiguration.RM_RESERVATION_SYSTEM_ENABLE,
       if (conf.getBoolean(YarnConfiguration.RM_RESERVATION_SYSTEM_ENABLE,
@@ -566,6 +570,8 @@ public class ResourceManager extends CompositeService implements Recoverable {
       // need events to move to further states.
       // need events to move to further states.
       rmStore.start();
       rmStore.start();
 
 
+      pauseMonitor.start();
+
       if(recoveryEnabled) {
       if(recoveryEnabled) {
         try {
         try {
           LOG.info("Recovery started");
           LOG.info("Recovery started");
@@ -591,6 +597,9 @@ public class ResourceManager extends CompositeService implements Recoverable {
     protected void serviceStop() throws Exception {
     protected void serviceStop() throws Exception {
 
 
       DefaultMetricsSystem.shutdown();
       DefaultMetricsSystem.shutdown();
+      if (pauseMonitor != null) {
+        pauseMonitor.stop();
+      }
 
 
       if (rmContext != null) {
       if (rmContext != null) {
         RMStateStore store = rmContext.getStateStore();
         RMStateStore store = rmContext.getStateStore();