Przeglądaj źródła

HADOOP-8721. ZKFC should not retry 45 times when attempting a graceful fence during a failover. Contributed by Vinayakumar B.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1376194 13f79535-47bb-0310-9956-ffa450edef68
Aaron Myers 13 lat temu
rodzic
commit
d66223fd99

+ 3 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -420,6 +420,9 @@ Branch-2 ( Unreleased changes )
     HADOOP-8720. TestLocalFileSystem should use test root subdirectory.
     (Vlad Rozov via eli)
 
+    HADOOP-8721. ZKFC should not retry 45 times when attempting a graceful
+    fence during a failover. (Vinayakumar B via atm)
+
   BREAKDOWN OF HDFS-3042 SUBTASKS
 
     HADOOP-8220. ZKFailoverController doesn't handle failure to become active

+ 5 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java

@@ -154,6 +154,11 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
     "ha.failover-controller.graceful-fence.rpc-timeout.ms";
   public static final int HA_FC_GRACEFUL_FENCE_TIMEOUT_DEFAULT = 5000;
   
+  /* FC connection retries for graceful fencing */
+  public static final String HA_FC_GRACEFUL_FENCE_CONNECTION_RETRIES =
+      "ha.failover-controller.graceful-fence.connection.retries";
+  public static final int HA_FC_GRACEFUL_FENCE_CONNECTION_RETRIES_DEFAULT = 1;
+
   /* Timeout that the CLI (manual) FC waits for monitorHealth, getServiceState */
   public static final String HA_FC_CLI_CHECK_TIMEOUT_KEY =
     "ha.failover-controller.cli-check.rpc-timeout.ms";

+ 19 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java

@@ -49,16 +49,34 @@ public class FailoverController {
   private final int rpcTimeoutToNewActive;
   
   private final Configuration conf;
+  /*
+   * Need a copy of conf for graceful fence to set 
+   * configurable retries for IPC client.
+   * Refer HDFS-3561
+   */
+  private final Configuration gracefulFenceConf;
 
   private final RequestSource requestSource;
   
   public FailoverController(Configuration conf,
       RequestSource source) {
     this.conf = conf;
+    this.gracefulFenceConf = new Configuration(conf);
     this.requestSource = source;
     
     this.gracefulFenceTimeout = getGracefulFenceTimeout(conf);
     this.rpcTimeoutToNewActive = getRpcTimeoutToNewActive(conf);
+    
+    //Configure less retries for graceful fence 
+    int gracefulFenceConnectRetries = conf.getInt(
+        CommonConfigurationKeys.HA_FC_GRACEFUL_FENCE_CONNECTION_RETRIES,
+        CommonConfigurationKeys.HA_FC_GRACEFUL_FENCE_CONNECTION_RETRIES_DEFAULT);
+    gracefulFenceConf.setInt(
+        CommonConfigurationKeys.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY,
+        gracefulFenceConnectRetries);
+    gracefulFenceConf.setInt(
+        CommonConfigurationKeys.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
+        gracefulFenceConnectRetries);
   }
 
   static int getGracefulFenceTimeout(Configuration conf) {
@@ -150,7 +168,7 @@ public class FailoverController {
   boolean tryGracefulFence(HAServiceTarget svc) {
     HAServiceProtocol proxy = null;
     try {
-      proxy = svc.getProxy(conf, gracefulFenceTimeout);
+      proxy = svc.getProxy(gracefulFenceConf, gracefulFenceTimeout);
       proxy.transitionToStandby(createReqInfo());
       return true;
     } catch (ServiceFailedException sfe) {