Browse Source

HDDS-921. Add JVM pause monitor to Ozone Daemons (OM, SCM and Datanodes). Contributed by Bharat Viswanadham.

Bharat Viswanadham 6 years ago
parent
commit
26e4be7022

+ 13 - 0
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java

@@ -49,6 +49,7 @@ import org.apache.hadoop.ozone.container.replication.DownloadAndImportReplicator
 import org.apache.hadoop.ozone.container.replication.ReplicationSupervisor;
 import org.apache.hadoop.ozone.container.replication.SimpleContainerDownloader;
 import org.apache.hadoop.ozone.protocol.commands.SCMCommand;
+import org.apache.hadoop.util.JvmPauseMonitor;
 import org.apache.hadoop.util.Time;
 import org.apache.hadoop.util.concurrent.HadoopExecutors;
 
@@ -78,6 +79,8 @@ public class DatanodeStateMachine implements Closeable {
   private Thread cmdProcessThread = null;
   private final ReplicationSupervisor supervisor;
 
+  private JvmPauseMonitor jvmPauseMonitor;
+
   /**
    * Constructs a a datanode state machine.
    *
@@ -159,6 +162,12 @@ public class DatanodeStateMachine implements Closeable {
     container.start();
     reportManager.init();
     initCommandHandlerThread(conf);
+
+    // Start jvm monitor
+    jvmPauseMonitor = new JvmPauseMonitor();
+    jvmPauseMonitor.init(conf);
+    jvmPauseMonitor.start();
+
     while (context.getState() != DatanodeStates.SHUTDOWN) {
       try {
         LOG.debug("Executing cycle Number : {}", context.getExecutionCount());
@@ -242,6 +251,10 @@ public class DatanodeStateMachine implements Closeable {
     if(container != null) {
       container.stop();
     }
+
+    if (jvmPauseMonitor != null) {
+      jvmPauseMonitor.stop();
+    }
   }
 
   /**

+ 16 - 0
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java

@@ -84,6 +84,7 @@ import org.apache.hadoop.ozone.common.StorageInfo;
 import org.apache.hadoop.ozone.lease.LeaseManager;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.JvmPauseMonitor;
 import org.apache.hadoop.util.StringUtils;
 
 import static org.apache.hadoop.hdds.scm.ScmConfigKeys
@@ -183,6 +184,9 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
   private final ReplicationActivityStatus replicationStatus;
   private final SCMChillModeManager scmChillModeManager;
 
+  private JvmPauseMonitor jvmPauseMonitor;
+  private final OzoneConfiguration configuration;
+
   /**
    * Creates a new StorageContainerManager. Configuration will be updated
    * with information on the
@@ -192,6 +196,7 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
    */
   private StorageContainerManager(OzoneConfiguration conf) throws IOException {
 
+    configuration = conf;
     StorageContainerManager.initMetrics();
     initContainerReportCache(conf);
 
@@ -308,6 +313,7 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
     eventQueue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
         scmChillModeManager);
     registerMXBean();
+
   }
 
   /**
@@ -683,6 +689,12 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
     scmBlockManager.start();
     replicationStatus.start();
     replicationManager.start();
+
+    // Start jvm monitor
+    jvmPauseMonitor = new JvmPauseMonitor();
+    jvmPauseMonitor.init(configuration);
+    jvmPauseMonitor.start();
+
     setStartTime();
   }
 
@@ -766,6 +778,10 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
     } catch (Exception ex) {
       LOG.error("SCM Event Queue stop failed", ex);
     }
+
+    if (jvmPauseMonitor != null) {
+      jvmPauseMonitor.stop();
+    }
     IOUtils.cleanupWithLogger(LOG, containerManager);
     IOUtils.cleanupWithLogger(LOG, pipelineManager);
   }

+ 10 - 0
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java

@@ -85,6 +85,7 @@ import org.apache.hadoop.ozone.security.acl.OzoneObjInfo;
 import org.apache.hadoop.ozone.security.acl.RequestContext;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.JvmPauseMonitor;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.ShutdownHookManager;
 import org.apache.hadoop.util.StringUtils;
@@ -178,6 +179,7 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl
   private final File omMetaDir;
   private final boolean isAclEnabled;
   private final IAccessAuthorizer accessAuthorizer;
+  private JvmPauseMonitor jvmPauseMonitor;
 
   private OzoneManager(OzoneConfiguration conf) throws IOException {
     Preconditions.checkNotNull(conf);
@@ -639,6 +641,11 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl
     httpServer = new OzoneManagerHttpServer(configuration, this);
     httpServer.start();
     registerMXBean();
+
+    // Start jvm monitor
+    jvmPauseMonitor = new JvmPauseMonitor();
+    jvmPauseMonitor.init(configuration);
+    jvmPauseMonitor.start();
     setStartTime();
   }
 
@@ -660,6 +667,9 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl
       metadataManager.stop();
       metrics.unRegister();
       unregisterMXBean();
+      if (jvmPauseMonitor != null) {
+        jvmPauseMonitor.stop();
+      }
     } catch (Exception e) {
       LOG.error("OzoneManager stop failed.", e);
     }