Explorar o código

HDDS-612. Even after setting hdds.scm.chillmode.enabled to false, SCM allocateblock fails with ChillModePrecheck exception. Contributed by Hanisha Koneru.

Arpit Agarwal %!s(int64=6) %!d(string=hai) anos
pai
achega
dc27408043

+ 1 - 1
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java

@@ -123,7 +123,7 @@ public class BlockManagerImpl implements EventHandler<Boolean>,
     blockDeletingService =
         new SCMBlockDeletingService(deletedBlockLog, containerManager,
             nodeManager, eventPublisher, svcInterval, serviceTimeout, conf);
-    chillModePrecheck = new ChillModePrecheck();
+    chillModePrecheck = new ChillModePrecheck(conf);
   }
 
   /**

+ 14 - 1
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/ChillModePrecheck.java

@@ -19,6 +19,8 @@
 package org.apache.hadoop.hdds.scm.server;
 
 import java.util.concurrent.atomic.AtomicBoolean;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdds.HddsConfigKeys;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps;
 import org.apache.hadoop.hdds.scm.exceptions.SCMException;
 import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes;
@@ -29,9 +31,20 @@ import org.apache.hadoop.hdds.scm.server.SCMChillModeManager.ChillModeRestricted
  * */
 public class ChillModePrecheck implements Precheck<ScmOps> {
 
-  private AtomicBoolean inChillMode = new AtomicBoolean(true);
+  private AtomicBoolean inChillMode;
   public static final String PRECHECK_TYPE = "ChillModePrecheck";
 
+  public ChillModePrecheck(Configuration conf) {
+    boolean chillModeEnabled = conf.getBoolean(
+        HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
+        HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT);
+    if (chillModeEnabled) {
+      inChillMode = new AtomicBoolean(true);
+    } else {
+      inChillMode = new AtomicBoolean(false);
+    }
+  }
+
   public boolean check(ScmOps op) throws SCMException {
     if (inChillMode.get() && ChillModeRestrictedOps
         .isRestrictedInChillMode(op)) {

+ 17 - 10
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java

@@ -58,6 +58,7 @@ public class SCMChillModeManager implements
 
   private static final Logger LOG =
       LoggerFactory.getLogger(SCMChillModeManager.class);
+  private final boolean isChillModeEnabled;
   private AtomicBoolean inChillMode = new AtomicBoolean(true);
   private AtomicLong containerWithMinReplicas = new AtomicLong(0);
   private Map<String, ChillModeExitRule> exitRules = new HashMap(1);
@@ -70,14 +71,17 @@ public class SCMChillModeManager implements
       EventQueue eventQueue) {
     this.config = conf;
     this.eventPublisher = eventQueue;
-    exitRules.put(CONT_EXIT_RULE,
-        new ContainerChillModeRule(config, allContainers));
-    exitRules.put(DN_EXIT_RULE, new DataNodeChillModeRule(config));
-    if (!conf.getBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
-        HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT)) {
+    this.isChillModeEnabled = conf.getBoolean(
+        HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
+        HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT);
+    if (isChillModeEnabled) {
+      exitRules.put(CONT_EXIT_RULE,
+          new ContainerChillModeRule(config, allContainers));
+      exitRules.put(DN_EXIT_RULE, new DataNodeChillModeRule(config));
+      emitChillModeStatus();
+    } else {
       exitChillMode(eventQueue);
     }
-    emitChillModeStatus();
   }
 
   /**
@@ -85,7 +89,7 @@ public class SCMChillModeManager implements
    */
   @VisibleForTesting
   public void emitChillModeStatus() {
-    eventPublisher.fireEvent(SCMEvents.CHILL_MODE_STATUS, inChillMode.get());
+    eventPublisher.fireEvent(SCMEvents.CHILL_MODE_STATUS, getInChillMode());
   }
 
   private void validateChillModeExitRules(EventPublisher eventQueue) {
@@ -99,7 +103,7 @@ public class SCMChillModeManager implements
 
   /**
    * Exit chill mode. It does following actions:
-   * 1. Set chill mode status to fale.
+   * 1. Set chill mode status to false.
    * 2. Emits START_REPLICATION for ReplicationManager.
    * 3. Cleanup resources.
    * 4. Emit chill mode status.
@@ -131,6 +135,9 @@ public class SCMChillModeManager implements
   }
 
   public boolean getInChillMode() {
+    if (!isChillModeEnabled) {
+      return false;
+    }
     return inChillMode.get();
   }
 
@@ -218,7 +225,7 @@ public class SCMChillModeManager implements
           }
         }
       });
-      if(inChillMode.get()) {
+      if(getInChillMode()) {
         LOG.info("SCM in chill mode. {} % containers have at least one"
                 + " reported replica.",
             (containerWithMinReplicas.get() / maxContainer) * 100);
@@ -268,7 +275,7 @@ public class SCMChillModeManager implements
         return;
       }
 
-      if(inChillMode.get()) {
+      if(getInChillMode()) {
         registeredDnSet.add(reportsProto.getDatanodeDetails().getUuid());
         registeredDns = registeredDnSet.size();
         LOG.info("SCM in chill mode. {} DataNodes registered, {} required.",

+ 4 - 3
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java

@@ -86,12 +86,13 @@ public class SCMClientProtocolServer implements
   private final InetSocketAddress clientRpcAddress;
   private final StorageContainerManager scm;
   private final OzoneConfiguration conf;
-  private ChillModePrecheck chillModePrecheck = new ChillModePrecheck();
+  private ChillModePrecheck chillModePrecheck;
 
   public SCMClientProtocolServer(OzoneConfiguration conf,
       StorageContainerManager scm) throws IOException {
     this.scm = scm;
     this.conf = conf;
+    chillModePrecheck = new ChillModePrecheck(conf);
     final int handlerCount =
         conf.getInt(OZONE_SCM_HANDLER_COUNT_KEY,
             OZONE_SCM_HANDLER_COUNT_DEFAULT);
@@ -357,8 +358,8 @@ public class SCMClientProtocolServer implements
    * Set chill mode status based on SCMEvents.CHILL_MODE_STATUS event.
    */
   @Override
-  public void onMessage(Boolean inChillMOde, EventPublisher publisher) {
-    chillModePrecheck.setInChillMode(inChillMOde);
+  public void onMessage(Boolean inChillMode, EventPublisher publisher) {
+    chillModePrecheck.setInChillMode(inChillMode);
   }
 
   /**

+ 12 - 9
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java

@@ -230,9 +230,7 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
     ContainerReportHandler containerReportHandler =
         new ContainerReportHandler(containerManager, scmNodeManager,
             replicationStatus);
-    scmChillModeManager = new SCMChillModeManager(conf,
-        containerManager.getContainers(),
-        eventQueue);
+
     PipelineActionEventHandler pipelineActionEventHandler =
         new PipelineActionEventHandler();
 
@@ -292,8 +290,6 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
     eventQueue.addHandler(SCMEvents.CMD_STATUS_REPORT, cmdStatusReportHandler);
     eventQueue.addHandler(SCMEvents.START_REPLICATION,
         replicationStatus.getReplicationStatusListener());
-    eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
-        replicationStatus.getChillModeStatusListener());
     eventQueue
         .addHandler(SCMEvents.PENDING_DELETE_STATUS, pendingDeleteHandler);
     eventQueue.addHandler(SCMEvents.DELETE_BLOCK_STATUS,
@@ -301,13 +297,20 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
     eventQueue.addHandler(SCMEvents.PIPELINE_ACTIONS,
         pipelineActionEventHandler);
     eventQueue.addHandler(SCMEvents.PIPELINE_CLOSE, pipelineCloseHandler);
-    eventQueue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
-        scmChillModeManager);
-    eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
-        (BlockManagerImpl) scmBlockManager);
+
     eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, clientProtocolServer);
     eventQueue.addHandler(SCMEvents.PIPELINE_REPORT, pipelineReportHandler);
 
+    eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
+        replicationStatus.getChillModeStatusListener());
+    eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
+        (BlockManagerImpl) scmBlockManager);
+    scmChillModeManager = new SCMChillModeManager(conf,
+        containerManager.getContainers(),
+        eventQueue);
+
+    eventQueue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
+        scmChillModeManager);
     registerMXBean();
   }
 

+ 21 - 1
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestScmChillMode.java

@@ -229,7 +229,7 @@ public class TestScmChillMode {
 
   }
 
-  @Test
+  @Test(timeout=300_000)
   public void testSCMChillMode() throws Exception {
     MiniOzoneCluster.Builder clusterBuilder = MiniOzoneCluster.newBuilder(conf)
         .setHbInterval(1000)
@@ -360,4 +360,24 @@ public class TestScmChillMode {
             .getContainerWithPipeline(containers.get(0).getContainerID()));
   }
 
+  @Test(timeout = 300_000)
+  public void testSCMChillModeDisabled() throws Exception {
+    cluster.stop();
+
+    // If chill mode is disabled, cluster should not be in chill mode even if
+    // min number of datanodes are not started.
+    conf.setBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED, false);
+    conf.setInt(HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE, 3);
+    builder = MiniOzoneCluster.newBuilder(conf)
+        .setHbInterval(1000)
+        .setHbProcessorInterval(500)
+        .setNumDatanodes(1);
+    cluster = builder.build();
+    StorageContainerManager scm = cluster.getStorageContainerManager();
+    assertFalse(scm.isInChillMode());
+
+    // Even on SCM restart, cluster should be out of chill mode immediately.
+    cluster.restartStorageContainerManager();
+    assertFalse(scm.isInChillMode());
+  }
 }