Browse Source

HDDS-440. Datanode loops forever if it cannot create directories.
Contributed by Bharat Viswanadham.

(cherry picked from commit a968ea489743ed09d63a6e267e34491e490cd2d8)

Anu Engineer 6 years ago
parent
commit
5479b2aa24

+ 10 - 2
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/datanode/InitDatanodeState.java

@@ -116,7 +116,7 @@ public class InitDatanodeState implements DatanodeState,
   /**
    * Persist DatanodeDetails to datanode.id file.
    */
-  private void persistContainerDatanodeDetails() throws IOException {
+  private void persistContainerDatanodeDetails() {
     String dataNodeIDPath = HddsUtils.getDatanodeIdFilePath(conf);
     if (Strings.isNullOrEmpty(dataNodeIDPath)) {
       LOG.error("A valid file path is needed for config setting {}",
@@ -128,7 +128,15 @@ public class InitDatanodeState implements DatanodeState,
     DatanodeDetails datanodeDetails = this.context.getParent()
         .getDatanodeDetails();
     if (datanodeDetails != null && !idPath.exists()) {
-      ContainerUtils.writeDatanodeDetailsTo(datanodeDetails, idPath);
+      try {
+        ContainerUtils.writeDatanodeDetailsTo(datanodeDetails, idPath);
+      } catch (IOException ex) {
+        // As writing DatanodeDetails in to datanodeid file failed, which is
+        // a critical thing, so shutting down the state machine.
+        LOG.error("Writing to {} failed {}", dataNodeIDPath, ex.getMessage());
+        this.context.setState(DatanodeStateMachine.DatanodeStates.SHUTDOWN);
+        return;
+      }
       LOG.info("DatanodeDetails is persisted to {}", dataNodeIDPath);
     }
   }

+ 42 - 0
hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeStateMachine.java

@@ -311,6 +311,48 @@ public class TestDatanodeStateMachine {
     }
   }
 
+  @Test
+  public void testDatanodeStateMachineWithIdWriteFail() throws Exception {
+
+    File idPath = new File(
+        conf.get(ScmConfigKeys.OZONE_SCM_DATANODE_ID));
+    idPath.delete();
+    DatanodeDetails datanodeDetails = getNewDatanodeDetails();
+    DatanodeDetails.Port port = DatanodeDetails.newPort(
+        DatanodeDetails.Port.Name.STANDALONE,
+        OzoneConfigKeys.DFS_CONTAINER_IPC_PORT_DEFAULT);
+    datanodeDetails.setPort(port);
+
+    try (DatanodeStateMachine stateMachine =
+             new DatanodeStateMachine(datanodeDetails, conf)) {
+      DatanodeStateMachine.DatanodeStates currentState =
+          stateMachine.getContext().getState();
+      Assert.assertEquals(DatanodeStateMachine.DatanodeStates.INIT,
+          currentState);
+
+      DatanodeState<DatanodeStateMachine.DatanodeStates> task =
+          stateMachine.getContext().getTask();
+      Assert.assertEquals(InitDatanodeState.class, task.getClass());
+
+      //Set the idPath to read only, state machine will fail to write
+      // datanodeId file and set the state to shutdown.
+      idPath.getParentFile().mkdirs();
+      idPath.getParentFile().setReadOnly();
+
+      task.execute(executorService);
+      DatanodeStateMachine.DatanodeStates newState =
+          task.await(2, TimeUnit.SECONDS);
+
+      //As, we have changed the permission of idPath to readable, writing
+      // will fail and it will set the state to shutdown.
+      Assert.assertEquals(DatanodeStateMachine.DatanodeStates.SHUTDOWN,
+          newState);
+
+      //Setting back to writable.
+      idPath.getParentFile().setWritable(true);
+    }
+  }
+
   /**
    * Test state transition with a list of invalid scm configurations,
    * and verify the state transits to SHUTDOWN each time.