Browse Source

HDDS-1370. Command Execution in Datanode fails because of NPE (#715)

Bharat Viswanadham 6 years ago
parent
commit
0e770a6539

+ 18 - 12
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java

@@ -348,20 +348,26 @@ public class StateContext {
       throws InterruptedException, ExecutionException, TimeoutException {
     stateExecutionCount.incrementAndGet();
     DatanodeState<DatanodeStateMachine.DatanodeStates> task = getTask();
-    if (this.isEntering()) {
-      task.onEnter();
-    }
-    task.execute(service);
-    DatanodeStateMachine.DatanodeStates newState = task.await(time, unit);
-    if (this.state != newState) {
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("Task {} executed, state transited from {} to {}",
-            task.getClass().getSimpleName(), this.state, newState);
+
+    // Adding not null check, in a case where datanode is still starting up, but
+    // we called stop DatanodeStateMachine, this sets state to SHUTDOWN, and
+    // there is a chance of getting task as null.
+    if (task != null) {
+      if (this.isEntering()) {
+        task.onEnter();
       }
-      if (isExiting(newState)) {
-        task.onExit();
+      task.execute(service);
+      DatanodeStateMachine.DatanodeStates newState = task.await(time, unit);
+      if (this.state != newState) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Task {} executed, state transited from {} to {}",
+              task.getClass().getSimpleName(), this.state, newState);
+        }
+        if (isExiting(newState)) {
+          task.onExit();
+        }
+        this.setState(newState);
       }
-      this.setState(newState);
     }
   }
 

+ 10 - 1
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/datanode/RunningDatanodeState.java

@@ -86,7 +86,16 @@ public class RunningDatanodeState implements DatanodeState {
     for (EndpointStateMachine endpoint : connectionManager.getValues()) {
       Callable<EndpointStateMachine.EndPointStates> endpointTask
           = getEndPointTask(endpoint);
-      ecs.submit(endpointTask);
+      if (endpointTask != null) {
+        ecs.submit(endpointTask);
+      } else {
+        // This can happen if a task is taking more time than the timeOut
+        // specified for the task in await, and when it is completed the task
+        // has set the state to Shutdown, we may see the state as shutdown
+        // here. So, we need to Shutdown DatanodeStateMachine.
+        LOG.error("State is Shutdown in RunningDatanodeState");
+        context.setState(DatanodeStateMachine.DatanodeStates.SHUTDOWN);
+      }
     }
   }
   //TODO : Cache some of these tasks instead of creating them