Browse Source

HDFS-3071. haadmin failover command does not provide enough detail when target NN is not ready to be active. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1304204 13f79535-47bb-0310-9956-ffa450edef68
Todd Lipcon 13 years ago
parent
commit
13e2ed8c65
16 changed files with 239 additions and 164 deletions
  1. 15 15
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
  2. 1 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
  3. 5 17
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
  4. 56 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceStatus.java
  5. 9 8
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HealthMonitor.java
  6. 21 20
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolClientSideTranslatorPB.java
  7. 21 27
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolServerSideTranslatorPB.java
  8. 10 23
      hadoop-common-project/hadoop-common/src/main/proto/HAServiceProtocol.proto
  9. 18 12
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java
  10. 0 2
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
  11. 2 2
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHealthMonitor.java
  12. 3 0
      hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
  13. 28 10
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
  14. 4 9
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
  15. 20 14
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java
  16. 26 3
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java

+ 15 - 15
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java

@@ -60,20 +60,32 @@ public class FailoverController {
                                         InetSocketAddress toSvcAddr,
                                         boolean forceActive)
       throws FailoverFailedException {
-    HAServiceState toSvcState;
+    HAServiceStatus toSvcStatus;
 
     try {
-      toSvcState = toSvc.getServiceState();
+      toSvcStatus = toSvc.getServiceStatus();
     } catch (IOException e) {
       String msg = "Unable to get service state for " + toSvcAddr;
       LOG.error(msg, e);
       throw new FailoverFailedException(msg, e);
     }
 
-    if (!toSvcState.equals(HAServiceState.STANDBY)) {
+    if (!toSvcStatus.getState().equals(HAServiceState.STANDBY)) {
       throw new FailoverFailedException(
           "Can't failover to an active service");
     }
+    
+    if (!toSvcStatus.isReadyToBecomeActive()) {
+      String notReadyReason = toSvcStatus.getNotReadyReason();
+      if (!forceActive) {
+        throw new FailoverFailedException(
+            toSvcAddr + " is not ready to become active: " +
+            notReadyReason);
+      } else {
+        LOG.warn("Service is not ready to become active, but forcing: " +
+            notReadyReason);
+      }
+    }
 
     try {
       HAServiceProtocolHelper.monitorHealth(toSvc);
@@ -84,18 +96,6 @@ public class FailoverController {
       throw new FailoverFailedException(
           "Got an IO exception", e);
     }
-
-    try {
-      if (!toSvc.readyToBecomeActive()) {
-        if (!forceActive) {
-          throw new FailoverFailedException(
-              toSvcAddr + " is not ready to become active");
-        }
-      }
-    } catch (IOException e) {
-      throw new FailoverFailedException(
-          "Got an IO exception", e);
-    }
   }
 
   /**

+ 1 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java

@@ -32,7 +32,6 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.ha.protocolPB.HAServiceProtocolClientSideTranslatorPB;
-import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
@@ -221,7 +220,7 @@ public abstract class HAAdmin extends Configured implements Tool {
     }
 
     HAServiceProtocol proto = getProtocol(argv[1]);
-    out.println(proto.getServiceState());
+    out.println(proto.getServiceStatus().getState());
     return 0;
   }
 

+ 5 - 17
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java

@@ -115,27 +115,15 @@ public interface HAServiceProtocol {
                                            IOException;
 
   /**
-   * Return the current state of the service.
+   * Return the current status of the service. The status indicates
+   * the current <em>state</em> (e.g ACTIVE/STANDBY) as well as
+   * some additional information. {@see HAServiceStatus}
    * 
    * @throws AccessControlException
    *           if access is denied.
    * @throws IOException
    *           if other errors happen
    */
-  public HAServiceState getServiceState() throws AccessControlException,
-                                                 IOException;
-
-  /**
-   * Return true if the service is capable and ready to transition
-   * from the standby state to the active state.
-   * 
-   * @return true if the service is ready to become active, false otherwise.
-   * @throws AccessControlException
-   *           if access is denied.
-   * @throws IOException
-   *           if other errors happen
-   */
-  public boolean readyToBecomeActive() throws ServiceFailedException,
-                                              AccessControlException,
-                                              IOException;
+  public HAServiceStatus getServiceStatus() throws AccessControlException,
+                                                   IOException;
 }

+ 56 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceStatus.java

@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ha;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
+
+@InterfaceAudience.Private
+public class HAServiceStatus {
+  private HAServiceState state;
+  private boolean readyToBecomeActive;
+  private String notReadyReason;
+  
+  public HAServiceStatus(HAServiceState state) {
+    this.state = state;
+  }
+
+  public HAServiceState getState() {
+    return state;
+  }
+
+  public HAServiceStatus setReadyToBecomeActive() {
+    this.readyToBecomeActive = true;
+    this.notReadyReason = null;
+    return this;
+  }
+  
+  public HAServiceStatus setNotReadyToBecomeActive(String reason) {
+    this.readyToBecomeActive = false;
+    this.notReadyReason = reason;
+    return this;
+  }
+
+  public boolean isReadyToBecomeActive() {
+    return readyToBecomeActive;
+  }
+
+  public String getNotReadyReason() {
+    return notReadyReason;
+  }
+}

+ 9 - 8
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HealthMonitor.java

@@ -77,7 +77,8 @@ class HealthMonitor {
   private List<Callback> callbacks = Collections.synchronizedList(
       new LinkedList<Callback>());
 
-  private HAServiceState lastServiceState = HAServiceState.INITIALIZING;
+  private HAServiceStatus lastServiceState = new HAServiceStatus(
+      HAServiceState.INITIALIZING);
   
   enum State {
     /**
@@ -188,10 +189,10 @@ class HealthMonitor {
 
   private void doHealthChecks() throws InterruptedException {
     while (shouldRun) {
-      HAServiceState state = null;
+      HAServiceStatus status = null;
       boolean healthy = false;
       try {
-        state = proxy.getServiceState();
+        status = proxy.getServiceStatus();
         proxy.monitorHealth();
         healthy = true;
       } catch (HealthCheckFailedException e) {
@@ -207,8 +208,8 @@ class HealthMonitor {
         return;
       }
       
-      if (state != null) {
-        setLastServiceState(state);
+      if (status != null) {
+        setLastServiceStatus(status);
       }
       if (healthy) {
         enterState(State.SERVICE_HEALTHY);
@@ -218,8 +219,8 @@ class HealthMonitor {
     }
   }
   
-  private synchronized void setLastServiceState(HAServiceState serviceState) {
-    this.lastServiceState = serviceState;
+  private synchronized void setLastServiceStatus(HAServiceStatus status) {
+    this.lastServiceState = status;
   }
 
   private synchronized void enterState(State newState) {
@@ -238,7 +239,7 @@ class HealthMonitor {
     return state;
   }
   
-  synchronized HAServiceState getLastServiceState() {
+  synchronized HAServiceStatus getLastServiceStatus() {
     return lastServiceState;
   }
   

+ 21 - 20
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolClientSideTranslatorPB.java

@@ -27,10 +27,11 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.ha.HAServiceProtocol;
-import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.GetServiceStateRequestProto;
+import org.apache.hadoop.ha.HAServiceStatus;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.GetServiceStatusRequestProto;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.GetServiceStatusResponseProto;
 import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.HAServiceStateProto;
 import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.MonitorHealthRequestProto;
-import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.ReadyToBecomeActiveRequestProto;
 import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToActiveRequestProto;
 import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToStandbyRequestProto;
 import org.apache.hadoop.ipc.ProtobufHelper;
@@ -60,10 +61,8 @@ public class HAServiceProtocolClientSideTranslatorPB implements
       TransitionToActiveRequestProto.newBuilder().build();
   private final static TransitionToStandbyRequestProto TRANSITION_TO_STANDBY_REQ = 
       TransitionToStandbyRequestProto.newBuilder().build();
-  private final static GetServiceStateRequestProto GET_SERVICE_STATE_REQ = 
-      GetServiceStateRequestProto.newBuilder().build();
-  private final static ReadyToBecomeActiveRequestProto ACTIVE_READY_REQ = 
-      ReadyToBecomeActiveRequestProto.newBuilder().build();
+  private final static GetServiceStatusRequestProto GET_SERVICE_STATUS_REQ = 
+      GetServiceStatusRequestProto.newBuilder().build();
   
   private final HAServiceProtocolPB rpcProxy;
 
@@ -113,14 +112,26 @@ public class HAServiceProtocolClientSideTranslatorPB implements
   }
 
   @Override
-  public HAServiceState getServiceState() throws IOException {
-    HAServiceStateProto state;
+  public HAServiceStatus getServiceStatus() throws IOException {
+    GetServiceStatusResponseProto status;
     try {
-      state = rpcProxy.getServiceState(NULL_CONTROLLER,
-          GET_SERVICE_STATE_REQ).getState();
+      status = rpcProxy.getServiceStatus(NULL_CONTROLLER,
+          GET_SERVICE_STATUS_REQ);
     } catch (ServiceException e) {
       throw ProtobufHelper.getRemoteException(e);
     }
+    
+    HAServiceStatus ret = new HAServiceStatus(
+        convert(status.getState()));
+    if (status.getReadyToBecomeActive()) {
+      ret.setReadyToBecomeActive();
+    } else {
+      ret.setNotReadyToBecomeActive(status.getNotReadyReason());
+    }
+    return ret;
+  }
+  
+  private HAServiceState convert(HAServiceStateProto state) {
     switch(state) {
     case ACTIVE:
       return HAServiceState.ACTIVE;
@@ -137,16 +148,6 @@ public class HAServiceProtocolClientSideTranslatorPB implements
     RPC.stopProxy(rpcProxy);
   }
 
-  @Override
-  public boolean readyToBecomeActive() throws IOException {
-    try {
-      return rpcProxy.readyToBecomeActive(NULL_CONTROLLER, ACTIVE_READY_REQ)
-          .getReadyToBecomeActive();
-    } catch (ServiceException e) {
-      throw ProtobufHelper.getRemoteException(e);
-    }
-  }
-
   @Override
   public Object getUnderlyingProxyObject() {
     return rpcProxy;

+ 21 - 27
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolServerSideTranslatorPB.java

@@ -22,14 +22,12 @@ import java.io.IOException;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.ha.HAServiceProtocol;
-import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
-import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.GetServiceStateRequestProto;
-import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.GetServiceStateResponseProto;
+import org.apache.hadoop.ha.HAServiceStatus;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.GetServiceStatusRequestProto;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.GetServiceStatusResponseProto;
 import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.HAServiceStateProto;
 import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.MonitorHealthRequestProto;
 import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.MonitorHealthResponseProto;
-import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.ReadyToBecomeActiveRequestProto;
-import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.ReadyToBecomeActiveResponseProto;
 import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToActiveRequestProto;
 import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToActiveResponseProto;
 import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToStandbyRequestProto;
@@ -99,29 +97,37 @@ public class HAServiceProtocolServerSideTranslatorPB implements
   }
 
   @Override
-  public GetServiceStateResponseProto getServiceState(RpcController controller,
-      GetServiceStateRequestProto request) throws ServiceException {
-    HAServiceState s;
+  public GetServiceStatusResponseProto getServiceStatus(RpcController controller,
+      GetServiceStatusRequestProto request) throws ServiceException {
+    HAServiceStatus s;
     try {
-      s = server.getServiceState();
+      s = server.getServiceStatus();
     } catch(IOException e) {
       throw new ServiceException(e);
     }
     
-    HAServiceStateProto ret;
-    switch (s) {
+    HAServiceStateProto retState;
+    switch (s.getState()) {
     case ACTIVE:
-      ret = HAServiceStateProto.ACTIVE;
+      retState = HAServiceStateProto.ACTIVE;
       break;
     case STANDBY:
-      ret = HAServiceStateProto.STANDBY;
+      retState = HAServiceStateProto.STANDBY;
       break;
     case INITIALIZING:
     default:
-      ret = HAServiceStateProto.INITIALIZING;
+      retState = HAServiceStateProto.INITIALIZING;
       break;
     }
-    return GetServiceStateResponseProto.newBuilder().setState(ret).build();
+    
+    GetServiceStatusResponseProto.Builder ret =
+      GetServiceStatusResponseProto.newBuilder()
+        .setState(retState)
+        .setReadyToBecomeActive(s.isReadyToBecomeActive());
+    if (!s.isReadyToBecomeActive()) {
+      ret.setNotReadyReason(s.getNotReadyReason());
+    }
+    return ret.build();
   }
 
   @Override
@@ -143,16 +149,4 @@ public class HAServiceProtocolServerSideTranslatorPB implements
         RPC.getProtocolVersion(HAServiceProtocolPB.class),
         HAServiceProtocolPB.class);
   }
-
-  @Override
-  public ReadyToBecomeActiveResponseProto readyToBecomeActive(
-      RpcController controller, ReadyToBecomeActiveRequestProto request)
-      throws ServiceException {
-    try {
-      return ReadyToBecomeActiveResponseProto.newBuilder()
-          .setReadyToBecomeActive(server.readyToBecomeActive()).build();
-    } catch (IOException e) {
-      throw new ServiceException(e);
-    }
-  }
 }

+ 10 - 23
hadoop-common-project/hadoop-common/src/main/proto/HAServiceProtocol.proto

@@ -66,27 +66,20 @@ message TransitionToStandbyResponseProto {
 /**
  * void request
  */
-message GetServiceStateRequestProto { 
+message GetServiceStatusRequestProto { 
 }
 
 /**
  * Returns the state of the service
  */
-message GetServiceStateResponseProto { 
+message GetServiceStatusResponseProto { 
   required HAServiceStateProto state = 1;
-}
 
-/**
- * void request
- */
-message ReadyToBecomeActiveRequestProto { 
-}
-
-/**
- * Returns true if service is ready to become active
- */
-message ReadyToBecomeActiveResponseProto { 
-  required bool readyToBecomeActive = 1;
+  // If state is STANDBY, indicate whether it is
+  // ready to become active.
+  optional bool readyToBecomeActive = 2;
+  // If not ready to become active, a textual explanation of why not
+  optional string notReadyReason = 3;
 }
 
 /**
@@ -115,14 +108,8 @@ service HAServiceProtocolService {
       returns(TransitionToStandbyResponseProto);
 
   /**
-   * Get the current state of the service.
-   */
-  rpc getServiceState(GetServiceStateRequestProto)
-      returns(GetServiceStateResponseProto);
-
-  /**
-   * Check if the service is ready to become active
+   * Get the current status of the service.
    */
-  rpc readyToBecomeActive(ReadyToBecomeActiveRequestProto)
-      returns(ReadyToBecomeActiveResponseProto);
+  rpc getServiceStatus(GetServiceStatusRequestProto)
+      returns(GetServiceStatusResponseProto);
 }

+ 18 - 12
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java

@@ -30,8 +30,6 @@ import org.apache.hadoop.ha.protocolPB.HAServiceProtocolClientSideTranslatorPB;
 import org.apache.hadoop.ha.TestNodeFencer.AlwaysSucceedFencer;
 import org.apache.hadoop.ha.TestNodeFencer.AlwaysFailFencer;
 import static org.apache.hadoop.ha.TestNodeFencer.setupFencer;
-import org.apache.hadoop.ipc.ProtocolSignature;
-import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.AccessControlException;
 
@@ -66,13 +64,16 @@ public class TestFailoverController {
     }
 
     @Override
-    public HAServiceState getServiceState() throws IOException {
-      return state;
+    public HAServiceStatus getServiceStatus() throws IOException {
+      HAServiceStatus ret = new HAServiceStatus(state);
+      if (state == HAServiceState.STANDBY) {
+        ret.setReadyToBecomeActive();
+      }
+      return ret;
     }
-
-    @Override
-    public boolean readyToBecomeActive() throws ServiceFailedException, IOException {
-      return true;
+    
+    private HAServiceState getServiceState() {
+      return state;
     }
   }
   
@@ -127,13 +128,13 @@ public class TestFailoverController {
   public void testFailoverWithoutPermission() throws Exception {
     DummyService svc1 = new DummyService(HAServiceState.ACTIVE) {
       @Override
-      public HAServiceState getServiceState() throws IOException {
+      public HAServiceStatus getServiceStatus() throws IOException {
         throw new AccessControlException("Access denied");
       }
     };
     DummyService svc2 = new DummyService(HAServiceState.STANDBY) {
       @Override
-      public HAServiceState getServiceState() throws IOException {
+      public HAServiceStatus getServiceStatus() throws IOException {
         throw new AccessControlException("Access denied");
       }
     };
@@ -153,8 +154,10 @@ public class TestFailoverController {
     DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
     DummyService svc2 = new DummyService(HAServiceState.STANDBY) {
       @Override
-      public boolean readyToBecomeActive() throws ServiceFailedException, IOException {
-        return false;
+      public HAServiceStatus getServiceStatus() throws IOException {
+        HAServiceStatus ret = new HAServiceStatus(HAServiceState.STANDBY);
+        ret.setNotReadyToBecomeActive("injected not ready");
+        return ret;
       }
     };
     NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
@@ -164,6 +167,9 @@ public class TestFailoverController {
       fail("Can't failover to a service that's not ready");
     } catch (FailoverFailedException ffe) {
       // Expected
+      if (!ffe.getMessage().contains("injected not ready")) {
+        throw ffe;
+      }
     }
 
     assertEquals(HAServiceState.ACTIVE, svc1.getServiceState());

+ 0 - 2
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java

@@ -30,7 +30,6 @@ import org.apache.hadoop.conf.Configuration;
 import org.junit.Before;
 import org.junit.Test;
 import org.mockito.Mockito;
-import static org.mockito.Mockito.when;
 
 import com.google.common.base.Charsets;
 import com.google.common.base.Joiner;
@@ -46,7 +45,6 @@ public class TestHAAdmin {
   @Before
   public void setup() throws IOException {
     mockProtocol = Mockito.mock(HAServiceProtocol.class);
-    when(mockProtocol.readyToBecomeActive()).thenReturn(true);
     tool = new HAAdmin() {
       @Override
       protected HAServiceProtocol getProtocol(String target) throws IOException {

+ 2 - 2
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHealthMonitor.java

@@ -64,8 +64,8 @@ public class TestHealthMonitor {
     conf.setInt(CommonConfigurationKeys.HA_HM_CONNECT_RETRY_INTERVAL_KEY, 50);
     conf.setInt(CommonConfigurationKeys.HA_HM_SLEEP_AFTER_DISCONNECT_KEY, 50);
     mockProxy = Mockito.mock(HAServiceProtocol.class);
-    Mockito.doReturn(HAServiceState.ACTIVE)
-      .when(mockProxy).getServiceState();
+    Mockito.doReturn(new HAServiceStatus(HAServiceState.ACTIVE))
+      .when(mockProxy).getServiceStatus();
     
     hm = new HealthMonitor(conf, BOGUS_ADDR) {
       @Override

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -159,6 +159,9 @@ Release 0.23.3 - UNRELEASED
     HDFS-3044. fsck move should be non-destructive by default.
     (Colin Patrick McCabe via eli)
 
+    HDFS-3071. haadmin failover command does not provide enough detail when
+    target NN is not ready to be active. (todd)
+
   OPTIMIZATIONS
 
     HDFS-2477. Optimize computing the diff between a block report and the

+ 28 - 10
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java

@@ -32,6 +32,7 @@ import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
+import org.apache.hadoop.ha.HAServiceStatus;
 import org.apache.hadoop.ha.HealthCheckFailedException;
 import org.apache.hadoop.ha.ServiceFailedException;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
@@ -990,24 +991,41 @@ public class NameNode {
     state.setState(haContext, STANDBY_STATE);
   }
 
-  synchronized HAServiceState getServiceState() throws AccessControlException {
+  synchronized HAServiceStatus getServiceStatus()
+      throws ServiceFailedException, AccessControlException {
     namesystem.checkSuperuserPrivilege();
+    if (!haEnabled) {
+      throw new ServiceFailedException("HA for namenode is not enabled");
+    }
     if (state == null) {
-      return HAServiceState.INITIALIZING;
+      return new HAServiceStatus(HAServiceState.INITIALIZING);
     }
-    return state.getServiceState();
+    HAServiceState retState = state.getServiceState();
+    HAServiceStatus ret = new HAServiceStatus(retState);
+    if (retState == HAServiceState.STANDBY) {
+      String safemodeTip = namesystem.getSafeModeTip();
+      if (!safemodeTip.isEmpty()) {
+        ret.setNotReadyToBecomeActive(
+            "The NameNode is in safemode. " +
+            safemodeTip);
+      } else {
+        ret.setReadyToBecomeActive();
+      }
+    } else if (retState == HAServiceState.ACTIVE) {
+      ret.setReadyToBecomeActive();
+    } else {
+      ret.setNotReadyToBecomeActive("State is " + state);
+    }
+    return ret;
   }
 
-  synchronized boolean readyToBecomeActive()
-      throws ServiceFailedException, AccessControlException {
-    namesystem.checkSuperuserPrivilege();
-    if (!haEnabled) {
-      throw new ServiceFailedException("HA for namenode is not enabled");
+  synchronized HAServiceState getServiceState() {
+    if (state == null) {
+      return HAServiceState.INITIALIZING;
     }
-    return !isInSafeMode();
+    return state.getServiceState();
   }
 
-  
   /**
    * Class used as expose {@link NameNode} as context to {@link HAState}
    * 

+ 4 - 9
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java

@@ -41,6 +41,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.permission.PermissionStatus;
+import org.apache.hadoop.ha.HAServiceStatus;
 import org.apache.hadoop.ha.HealthCheckFailedException;
 import org.apache.hadoop.ha.ServiceFailedException;
 import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.HAServiceProtocolService;
@@ -981,15 +982,9 @@ class NameNodeRpcServer implements NamenodeProtocols {
   }
 
   @Override // HAServiceProtocol
-  public synchronized HAServiceState getServiceState() 
-      throws AccessControlException {
-    return nn.getServiceState();
-  }
-
-  @Override // HAServiceProtocol
-  public synchronized boolean readyToBecomeActive() 
-      throws ServiceFailedException, AccessControlException {
-    return nn.readyToBecomeActive();
+  public synchronized HAServiceStatus getServiceStatus() 
+      throws AccessControlException, ServiceFailedException {
+    return nn.getServiceStatus();
   }
 
   /**

+ 20 - 14
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java

@@ -31,13 +31,13 @@ import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.ha.HAServiceProtocol;
 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
+import org.apache.hadoop.ha.HAServiceStatus;
 import org.apache.hadoop.ha.HealthCheckFailedException;
 import org.apache.hadoop.ha.NodeFencer;
 
 import org.junit.Before;
 import org.junit.Test;
 import org.mockito.Mockito;
-import static org.mockito.Mockito.when;
 
 import com.google.common.base.Charsets;
 import com.google.common.base.Joiner;
@@ -51,6 +51,11 @@ public class TestDFSHAAdmin {
   private HAServiceProtocol mockProtocol;
   
   private static final String NSID = "ns1";
+
+  private static final HAServiceStatus STANDBY_READY_RESULT =
+    new HAServiceStatus(HAServiceState.STANDBY)
+    .setReadyToBecomeActive();
+  
   private static String HOST_A = "1.2.3.1";
   private static String HOST_B = "1.2.3.2";
 
@@ -73,7 +78,6 @@ public class TestDFSHAAdmin {
   @Before
   public void setup() throws IOException {
     mockProtocol = Mockito.mock(HAServiceProtocol.class);
-    when(mockProtocol.readyToBecomeActive()).thenReturn(true);
     tool = new DFSHAAdmin() {
       @Override
       protected HAServiceProtocol getProtocol(String serviceId) throws IOException {
@@ -105,8 +109,9 @@ public class TestDFSHAAdmin {
 
   @Test
   public void testNamenodeResolution() throws Exception {
+    Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
     assertEquals(0, runTool("-getServiceState", "nn1"));
-    Mockito.verify(mockProtocol).getServiceState();
+    Mockito.verify(mockProtocol).getServiceStatus();
     assertEquals(-1, runTool("-getServiceState", "undefined"));
     assertOutputContains(
         "Unable to determine service address for namenode 'undefined'");
@@ -133,13 +138,13 @@ public class TestDFSHAAdmin {
 
   @Test
   public void testFailoverWithNoFencerConfigured() throws Exception {
-    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
     assertEquals(-1, runTool("-failover", "nn1", "nn2"));
   }
 
   @Test
   public void testFailoverWithFencerConfigured() throws Exception {
-    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
     HdfsConfiguration conf = getHAConf();
     conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
     tool.setConf(conf);
@@ -148,7 +153,7 @@ public class TestDFSHAAdmin {
 
   @Test
   public void testFailoverWithFencerAndNameservice() throws Exception {
-    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
     HdfsConfiguration conf = getHAConf();
     conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
     tool.setConf(conf);
@@ -157,7 +162,7 @@ public class TestDFSHAAdmin {
 
   @Test
   public void testFailoverWithFencerConfiguredAndForce() throws Exception {
-    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
     HdfsConfiguration conf = getHAConf();
     conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
     tool.setConf(conf);
@@ -166,7 +171,7 @@ public class TestDFSHAAdmin {
 
   @Test
   public void testFailoverWithForceActive() throws Exception {
-    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
     HdfsConfiguration conf = getHAConf();
     conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
     tool.setConf(conf);
@@ -175,7 +180,7 @@ public class TestDFSHAAdmin {
 
   @Test
   public void testFailoverWithInvalidFenceArg() throws Exception {
-    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
     HdfsConfiguration conf = getHAConf();
     conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
     tool.setConf(conf);
@@ -184,13 +189,13 @@ public class TestDFSHAAdmin {
 
   @Test
   public void testFailoverWithFenceButNoFencer() throws Exception {
-    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
     assertEquals(-1, runTool("-failover", "nn1", "nn2", "--forcefence"));
   }
 
   @Test
   public void testFailoverWithFenceAndBadFencer() throws Exception {
-    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
     HdfsConfiguration conf = getHAConf();
     conf.set(NodeFencer.CONF_METHODS_KEY, "foobar!");
     tool.setConf(conf);
@@ -199,7 +204,7 @@ public class TestDFSHAAdmin {
 
   @Test
   public void testForceFenceOptionListedBeforeArgs() throws Exception {
-    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
     HdfsConfiguration conf = getHAConf();
     conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
     tool.setConf(conf);
@@ -207,9 +212,10 @@ public class TestDFSHAAdmin {
   }
 
   @Test
-  public void testGetServiceState() throws Exception {
+  public void testGetServiceStatus() throws Exception {
+    Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
     assertEquals(0, runTool("-getServiceState", "nn1"));
-    Mockito.verify(mockProtocol).getServiceState();
+    Mockito.verify(mockProtocol).getServiceStatus();
   }
 
   @Test

+ 26 - 3
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java

@@ -21,6 +21,7 @@ import static org.junit.Assert.*;
 
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.io.PrintStream;
 
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.Log;
@@ -28,6 +29,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.ha.NodeFencer;
 
 import org.junit.After;
@@ -46,7 +48,10 @@ public class TestDFSHAAdminMiniCluster {
   private MiniDFSCluster cluster;
   private Configuration conf; 
   private DFSHAAdmin tool;
-  
+  private ByteArrayOutputStream errOutBytes = new ByteArrayOutputStream();
+
+  private String errOutput;
+
   @Before
   public void setup() throws IOException {
     conf = new Configuration();
@@ -55,6 +60,7 @@ public class TestDFSHAAdminMiniCluster {
         .build();
     tool = new DFSHAAdmin();  
     tool.setConf(conf);
+    tool.setErrOut(new PrintStream(errOutBytes));
     cluster.waitActive();
   }
 
@@ -67,6 +73,12 @@ public class TestDFSHAAdminMiniCluster {
   public void testGetServiceState() throws Exception {
     assertEquals(0, runTool("-getServiceState", "nn1"));
     assertEquals(0, runTool("-getServiceState", "nn2"));
+    
+    cluster.transitionToActive(0);
+    assertEquals(0, runTool("-getServiceState", "nn1"));
+    
+    NameNodeAdapter.enterSafeMode(cluster.getNameNode(0), false);
+    assertEquals(0, runTool("-getServiceState", "nn1"));
   }
     
   @Test 
@@ -85,6 +97,18 @@ public class TestDFSHAAdminMiniCluster {
     assertEquals(0, runTool("-transitionToStandby", "nn2"));
     assertTrue(nnode2.isStandbyState());
   }
+  
+  @Test
+  public void testTryFailoverToSafeMode() throws Exception {
+    conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
+    tool.setConf(conf);
+
+    NameNodeAdapter.enterSafeMode(cluster.getNameNode(0), false);
+    assertEquals(-1, runTool("-failover", "nn2", "nn1"));
+    assertTrue("Bad output: " + errOutput,
+        errOutput.contains("is not ready to become active: " +
+            "The NameNode is in safemode"));
+  }
     
   /**
    * Test failover with various options
@@ -132,11 +156,10 @@ public class TestDFSHAAdminMiniCluster {
   }
   
   private int runTool(String ... args) throws Exception {
-    ByteArrayOutputStream errOutBytes = new ByteArrayOutputStream();
     errOutBytes.reset();
     LOG.info("Running: DFSHAAdmin " + Joiner.on(" ").join(args));
     int ret = tool.run(args);
-    String errOutput = new String(errOutBytes.toByteArray(), Charsets.UTF_8);
+    errOutput = new String(errOutBytes.toByteArray(), Charsets.UTF_8);
     LOG.info("Output:\n" + errOutput);
     return ret;
   }