瀏覽代碼

HDDS-1550. MiniOzoneCluster is not shutting down all the threads during shutdown. Contributed by Mukul Kumar Singh. (#1050)

* HDDS-1550. MiniOzoneCluster is not shutting down all the threads during shutdown. Contributed by Mukul Kumar Singh.
Mukul Kumar Singh 5 年之前
父節點
當前提交
e5ffb88257

+ 5 - 0
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java

@@ -120,6 +120,11 @@ public abstract class Handler {
       TarContainerPacker packer)
       throws IOException;
 
+  /**
+   * Stop the Handler.
+   */
+  public abstract void stop();
+
   /**
    * Marks the container for closing. Moves the container to CLOSING state.
    *

+ 10 - 0
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/ReportManager.java

@@ -23,10 +23,13 @@ import com.google.protobuf.GeneratedMessage;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
 import org.apache.hadoop.util.concurrent.HadoopExecutors;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
 
 /**
  * ReportManager is responsible for managing all the {@link ReportPublisher}
@@ -34,6 +37,8 @@ import java.util.concurrent.ScheduledExecutorService;
  * which should be used for scheduling the reports.
  */
 public final class ReportManager {
+  private static final Logger LOG =
+      LoggerFactory.getLogger(ReportManager.class);
 
   private final StateContext context;
   private final List<ReportPublisher> publishers;
@@ -71,6 +76,11 @@ public final class ReportManager {
    */
   public void shutdown() {
     executorService.shutdown();
+    try {
+      executorService.awaitTermination(5, TimeUnit.SECONDS);
+    } catch (Exception e) {
+      LOG.error("Failed to shutdown Report Manager", e);
+    }
   }
 
   /**

+ 6 - 0
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java

@@ -57,6 +57,7 @@ import java.net.SocketAddress;
 import java.util.Collections;
 import java.util.List;
 import java.util.UUID;
+import java.util.concurrent.TimeUnit;
 
 /**
  * Creates a Grpc server endpoint that acts as the communication layer for
@@ -172,6 +173,11 @@ public final class XceiverServerGrpc extends XceiverServer {
   public void stop() {
     if (isStarted) {
       server.shutdown();
+      try {
+        server.awaitTermination(5, TimeUnit.SECONDS);
+      } catch (Exception e) {
+        LOG.error("failed to shutdown XceiverServerGrpc", e);
+      }
       isStarted = false;
     }
   }

+ 5 - 0
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java

@@ -158,6 +158,11 @@ public class KeyValueHandler extends Handler {
     return volumeChoosingPolicy;
   }
 
+  @Override
+  public void stop() {
+    blockDeletingService.shutdown();
+  }
+
   @Override
   public ContainerCommandResponseProto handle(
       ContainerCommandRequestProto request, Container container,

+ 1 - 0
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java

@@ -200,6 +200,7 @@ public class OzoneContainer {
     stopContainerScrub();
     writeChannel.stop();
     readChannel.stop();
+    this.handlers.values().forEach(Handler::stop);
     hddsDispatcher.shutdown();
     volumeSet.shutdown();
   }

+ 1 - 0
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java

@@ -207,6 +207,7 @@ public class SCMNodeManager implements NodeManager {
   public void close() throws IOException {
     unregisterMXBean();
     metrics.unRegister();
+    nodeStateManager.close();
   }
 
   /**

+ 7 - 1
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java

@@ -84,6 +84,7 @@ import org.apache.hadoop.hdds.server.events.EventQueue;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.metrics2.MetricsSystem;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.metrics2.util.MBeans;
 import org.apache.hadoop.ozone.OzoneConfigKeys;
@@ -189,6 +190,7 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
   private final OzoneConfiguration configuration;
   private final SafeModeHandler safeModeHandler;
   private SCMContainerMetrics scmContainerMetrics;
+  private MetricsSystem ms;
 
   /**
    *  Network topology Map.
@@ -754,7 +756,7 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
         buildRpcServerStartMessage(
             "StorageContainerLocationProtocol RPC server",
             getClientRpcAddress()));
-    DefaultMetricsSystem.initialize("StorageContainerManager");
+    ms = DefaultMetricsSystem.initialize("StorageContainerManager");
 
     commandWatcherLeaseManager.start();
     getClientProtocolServer().start();
@@ -874,6 +876,10 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
       LOG.error("SCM Metadata store stop failed", ex);
     }
 
+    if (ms != null) {
+      ms.stop();
+    }
+
     scmSafeModeManager.stop();
   }
 

+ 1 - 1
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java

@@ -244,7 +244,7 @@ public class MiniOzoneChaosCluster extends MiniOzoneClusterImpl {
           1, TimeUnit.SECONDS);
       conf.setTimeDuration(HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL, 1,
           TimeUnit.SECONDS);
-      conf.setInt(OzoneConfigKeys.OZONE_CONTAINER_CACHE_SIZE, 8);
+      conf.setInt(OzoneConfigKeys.OZONE_CONTAINER_CACHE_SIZE, 2);
       conf.setInt("hdds.scm.replication.thread.interval", 10 * 1000);
       conf.setInt("hdds.scm.replication.event.timeout", 20 * 1000);
     }

+ 11 - 8
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java

@@ -340,19 +340,20 @@ public class MiniOzoneClusterImpl implements MiniOzoneCluster {
       ozoneManager.join();
     }
 
+    if (!hddsDatanodes.isEmpty()) {
+      LOG.info("Shutting the HddsDatanodes");
+      hddsDatanodes.parallelStream()
+          .forEach(dn -> {
+            dn.stop();
+            dn.join();
+          });
+    }
+
     if (scm != null) {
       LOG.info("Stopping the StorageContainerManager");
       scm.stop();
       scm.join();
     }
-
-    if (!hddsDatanodes.isEmpty()) {
-      LOG.info("Shutting the HddsDatanodes");
-      for (HddsDatanodeService hddsDatanode : hddsDatanodes) {
-        hddsDatanode.stop();
-        hddsDatanode.join();
-      }
-    }
   }
 
   /**
@@ -568,6 +569,8 @@ public class MiniOzoneClusterImpl implements MiniOzoneCluster {
       conf.set(ScmConfigKeys.OZONE_SCM_DATANODE_ADDRESS_KEY, "127.0.0.1:0");
       conf.set(ScmConfigKeys.OZONE_SCM_HTTP_ADDRESS_KEY, "127.0.0.1:0");
       conf.setInt(ScmConfigKeys.OZONE_SCM_HANDLER_COUNT_KEY, numOfScmHandlers);
+      conf.set(HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT,
+          "3s");
       configureSCMheartbeat();
     }
 

+ 6 - 11
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestStorageContainerManager.java

@@ -20,10 +20,8 @@ package org.apache.hadoop.ozone;
 import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_COMMAND_STATUS_REPORT_INTERVAL;
 import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL;
 import static org.junit.Assert.fail;
-import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.argThat;
 import static org.mockito.Matchers.eq;
-import static org.mockito.Mockito.doNothing;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
@@ -65,7 +63,6 @@ import org.apache.hadoop.hdds.scm.server.SCMClientProtocolServer;
 import org.apache.hadoop.hdds.scm.server.SCMStorageConfig;
 import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
 import org.apache.hadoop.hdds.server.events.EventPublisher;
-import org.apache.hadoop.hdds.server.events.TypedEvent;
 import org.apache.hadoop.ozone.container.ContainerTestHelper;
 import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
 import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
@@ -520,6 +517,11 @@ public class TestStorageContainerManager {
 
     // Stop processing HB
     scm.getDatanodeProtocolServer().stop();
+
+    scm.getContainerManager().updateContainerState(selectedContainer
+        .containerID(), HddsProtos.LifeCycleEvent.FINALIZE);
+    cluster.restartStorageContainerManager(true);
+    scm = cluster.getStorageContainerManager();
     EventPublisher publisher = mock(EventPublisher.class);
     ReplicationManager replicationManager = scm.getReplicationManager();
     Field f = replicationManager.getClass().getDeclaredField("eventPublisher");
@@ -528,13 +530,6 @@ public class TestStorageContainerManager {
     modifiersField.setAccessible(true);
     modifiersField.setInt(f, f.getModifiers() & ~Modifier.FINAL);
     f.set(replicationManager, publisher);
-
-    doNothing().when(publisher).fireEvent(any(TypedEvent.class),
-        any(CommandForDatanode.class));
-
-    scm.getContainerManager().updateContainerState(selectedContainer
-        .containerID(), HddsProtos.LifeCycleEvent.FINALIZE);
-    cluster.restartStorageContainerManager(true);
     scm.getReplicationManager().start();
     Thread.sleep(2000);
 
@@ -572,7 +567,7 @@ public class TestStorageContainerManager {
           (CloseContainerCommand) cmdRight.getCommand();
       return cmdRight.getDatanodeId().equals(uuid)
           && left.getContainerID() == right.getContainerID()
-          && left.getPipelineID() == right.getPipelineID()
+          && left.getPipelineID().equals(right.getPipelineID())
           && left.getType() == right.getType()
           && left.getProto().equals(right.getProto());
     }

+ 15 - 1
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestQueryNode.java

@@ -31,7 +31,17 @@ import java.util.concurrent.TimeUnit;
 
 import static java.util.concurrent.TimeUnit.SECONDS;
 
-import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL;
+import static org.apache.hadoop.hdds.HddsConfigKeys
+    .HDDS_HEARTBEAT_INTERVAL;
+import static org.apache.hadoop.hdds.HddsConfigKeys
+    .HDDS_PIPELINE_REPORT_INTERVAL;
+import static org.apache.hadoop.hdds.HddsConfigKeys
+    .HDDS_COMMAND_STATUS_REPORT_INTERVAL;
+import static org.apache.hadoop.hdds.HddsConfigKeys
+    .HDDS_CONTAINER_REPORT_INTERVAL;
+import static org.apache.hadoop.hdds.HddsConfigKeys
+    .HDDS_NODE_REPORT_INTERVAL;
+
 import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DEAD;
 import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY;
 import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE;
@@ -61,6 +71,10 @@ public class TestQueryNode {
     conf.setTimeDuration(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL,
         interval, TimeUnit.MILLISECONDS);
     conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 1, SECONDS);
+    conf.setTimeDuration(HDDS_PIPELINE_REPORT_INTERVAL, 1, SECONDS);
+    conf.setTimeDuration(HDDS_COMMAND_STATUS_REPORT_INTERVAL, 1, SECONDS);
+    conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 1, SECONDS);
+    conf.setTimeDuration(HDDS_NODE_REPORT_INTERVAL, 1, SECONDS);
     conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 3, SECONDS);
     conf.setTimeDuration(OZONE_SCM_DEADNODE_INTERVAL, 6, SECONDS);