Browse Source

HDFS-15404. ShellCommandFencer should expose info about source. Contributed by Chen Liang.

(cherry picked from commit 3833c616e087518196bcb77ac2479c66a0b188d8)
Chen Liang 4 years ago
parent
commit
370efc6d95

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java

@@ -213,7 +213,7 @@ public class FailoverController {
 
     // Fence fromSvc if it's required or forced by the user
     if (tryFence) {
-      if (!fromSvc.getFencer().fence(fromSvc)) {
+      if (!fromSvc.getFencer().fence(fromSvc, toSvc)) {
         throw new FailoverFailedException("Unable to fence " +
             fromSvc + ". Fencing failed.");
       }

+ 15 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceTarget.java

@@ -44,6 +44,12 @@ public abstract class HAServiceTarget {
   private static final String PORT_SUBST_KEY = "port";
   private static final String ADDRESS_SUBST_KEY = "address";
 
+  /**
+   * The HAState this service target is intended to be after transition
+   * is complete.
+   */
+  private HAServiceProtocol.HAServiceState transitionTargetHAStatus;
+
   /**
    * @return the IPC address of the target node.
    */
@@ -93,6 +99,15 @@ public abstract class HAServiceTarget {
     return getProxyForAddress(conf, timeoutMs, getAddress());
   }
 
+  public void setTransitionTargetHAStatus(
+      HAServiceProtocol.HAServiceState status) {
+    this.transitionTargetHAStatus = status;
+  }
+
+  public HAServiceProtocol.HAServiceState getTransitionTargetHAStatus() {
+    return this.transitionTargetHAStatus;
+  }
+
   /**
    * Returns a proxy to connect to the target HA service for health monitoring.
    * If {@link #getHealthMonitorAddress()} is implemented to return a non-null

+ 20 - 3
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java

@@ -89,15 +89,32 @@ public class NodeFencer {
   }
 
   public boolean fence(HAServiceTarget fromSvc) {
+    return fence(fromSvc, null);
+  }
+
+  public boolean fence(HAServiceTarget fromSvc, HAServiceTarget toSvc) {
     LOG.info("====== Beginning Service Fencing Process... ======");
     int i = 0;
     for (FenceMethodWithArg method : methods) {
       LOG.info("Trying method " + (++i) + "/" + methods.size() +": " + method);
       
       try {
-        if (method.method.tryFence(fromSvc, method.arg)) {
-          LOG.info("====== Fencing successful by method " + method + " ======");
-          return true;
+        // only true when target node is given, AND fencing on it failed
+        boolean toSvcFencingFailed = false;
+        // if target is given, try to fence on target first. Only if fencing
+        // on target succeeded, do fencing on source node.
+        if (toSvc != null) {
+          toSvcFencingFailed = !method.method.tryFence(toSvc, method.arg);
+        }
+        if (toSvcFencingFailed) {
+          LOG.error("====== Fencing on target failed, skipping fencing "
+              + "on source ======");
+        } else {
+          if (method.method.tryFence(fromSvc, method.arg)) {
+            LOG.info("====== Fencing successful by method "
+                + method + " ======");
+            return true;
+          }
         }
       } catch (BadFencingConfigurationException e) {
         LOG.error("Fencing method " + method + " misconfigured", e);

+ 46 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ShellCommandFencer.java

@@ -19,6 +19,7 @@ package org.apache.hadoop.ha;
 
 import java.io.IOException;
 import java.lang.reflect.Field;
+import java.util.Arrays;
 import java.util.Map;
 
 import org.apache.hadoop.conf.Configured;
@@ -60,6 +61,11 @@ public class ShellCommandFencer
   /** Prefix for target parameters added to the environment */
   private static final String TARGET_PREFIX = "target_";
 
+  /** Prefix for source parameters added to the environment */
+  private static final String SOURCE_PREFIX = "source_";
+
+  private static final String ARG_DELIMITER = ",";
+
   @VisibleForTesting
   static Logger LOG = LoggerFactory.getLogger(ShellCommandFencer.class);
 
@@ -73,8 +79,9 @@ public class ShellCommandFencer
   }
 
   @Override
-  public boolean tryFence(HAServiceTarget target, String cmd) {
+  public boolean tryFence(HAServiceTarget target, String args) {
     ProcessBuilder builder;
+    String cmd = parseArgs(target.getTransitionTargetHAStatus(), args);
 
     if (!Shell.WINDOWS) {
       builder = new ProcessBuilder("bash", "-e", "-c", cmd);
@@ -127,6 +134,28 @@ public class ShellCommandFencer
     return rc == 0;
   }
 
+  private String parseArgs(HAServiceProtocol.HAServiceState state,
+      String cmd) {
+    String[] args = cmd.split(ARG_DELIMITER);
+    if (args.length == 1) {
+      // only one command is given, assuming both src and dst
+      // will execute the same command/script.
+      return args[0];
+    }
+    if (args.length > 2) {
+      throw new IllegalArgumentException("Expecting arguments size of at most "
+          + "two, getting " + Arrays.asList(args));
+    }
+    if (HAServiceProtocol.HAServiceState.ACTIVE.equals(state)) {
+      return args[0];
+    } else if (HAServiceProtocol.HAServiceState.STANDBY.equals(state)) {
+      return args[1];
+    } else {
+      throw new IllegalArgumentException(
+          "Unexpected HA service state:" + state);
+    }
+  }
+
   /**
    * Abbreviate a string by putting '...' in the middle of it,
    * in an attempt to keep logs from getting too messy.
@@ -190,9 +219,24 @@ public class ShellCommandFencer
    */
   private void addTargetInfoAsEnvVars(HAServiceTarget target,
       Map<String, String> environment) {
+    String prefix;
+    HAServiceProtocol.HAServiceState targetState =
+        target.getTransitionTargetHAStatus();
+    if (targetState == null ||
+        HAServiceProtocol.HAServiceState.ACTIVE.equals(targetState)) {
+      // null is assumed to be same as ACTIVE, this is to be compatible
+      // with existing tests/use cases where target state is not specified
+      // but assuming it's active.
+      prefix = TARGET_PREFIX;
+    } else if (HAServiceProtocol.HAServiceState.STANDBY.equals(targetState)) {
+      prefix = SOURCE_PREFIX;
+    } else {
+      throw new IllegalArgumentException(
+          "Unexpected HA service state:" + targetState);
+    }
     for (Map.Entry<String, String> e :
          target.getFencingParameters().entrySet()) {
-      String key = TARGET_PREFIX + e.getKey();
+      String key = prefix + e.getKey();
       key = key.replace('.', '_');
       environment.put(key, e.getValue());
     }

+ 5 - 5
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java

@@ -177,7 +177,7 @@ public class TestFailoverController {
     }
 
     // svc1 still thinks it's active, that's OK, it was fenced
-    assertEquals(1, AlwaysSucceedFencer.fenceCalled);
+    assertEquals(2, AlwaysSucceedFencer.fenceCalled);
     assertSame(svc1, AlwaysSucceedFencer.fencedSvc);
     assertEquals(HAServiceState.ACTIVE, svc1.state);
     assertEquals(HAServiceState.ACTIVE, svc2.state);
@@ -201,7 +201,7 @@ public class TestFailoverController {
     }
 
     assertEquals(1, AlwaysFailFencer.fenceCalled);
-    assertSame(svc1, AlwaysFailFencer.fencedSvc);
+    assertSame(svc2, AlwaysFailFencer.fencedSvc);
     assertEquals(HAServiceState.ACTIVE, svc1.state);
     assertEquals(HAServiceState.STANDBY, svc2.state);
   }
@@ -223,7 +223,7 @@ public class TestFailoverController {
     // If fencing was requested and it failed we don't try to make
     // svc2 active anyway, and we don't failback to svc1.
     assertEquals(1, AlwaysFailFencer.fenceCalled);
-    assertSame(svc1, AlwaysFailFencer.fencedSvc);
+    assertSame(svc2, AlwaysFailFencer.fencedSvc);
     assertEquals(HAServiceState.STANDBY, svc1.state);
     assertEquals(HAServiceState.STANDBY, svc2.state);
   }
@@ -344,7 +344,7 @@ public class TestFailoverController {
     // and we didn't force it, so we failed back to svc1 and fenced svc2.
     // Note svc2 still thinks it's active, that's OK, we fenced it.
     assertEquals(HAServiceState.ACTIVE, svc1.state);
-    assertEquals(1, AlwaysSucceedFencer.fenceCalled);
+    assertEquals(2, AlwaysSucceedFencer.fenceCalled);
     assertSame(svc2, AlwaysSucceedFencer.fencedSvc);
   }
 
@@ -373,7 +373,7 @@ public class TestFailoverController {
     // so we did not failback to svc1, ie it's still standby.
     assertEquals(HAServiceState.STANDBY, svc1.state);
     assertEquals(1, AlwaysFailFencer.fenceCalled);
-    assertSame(svc2, AlwaysFailFencer.fencedSvc);
+    assertSame(svc1, AlwaysFailFencer.fencedSvc);
   }
 
   @Test

+ 31 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestShellCommandFencer.java

@@ -163,6 +163,37 @@ public class TestShellCommandFencer {
     }
   }
 
+  /**
+   * Test if fencing target has peer set, the failover can trigger different
+   * commands on source and destination respectively.
+   */
+  @Test
+  public void testEnvironmentWithPeer() {
+    HAServiceTarget target = new DummyHAService(HAServiceState.ACTIVE,
+        new InetSocketAddress("dummytarget", 1111));
+    HAServiceTarget source = new DummyHAService(HAServiceState.STANDBY,
+        new InetSocketAddress("dummysource", 2222));
+    target.setTransitionTargetHAStatus(HAServiceState.ACTIVE);
+    source.setTransitionTargetHAStatus(HAServiceState.STANDBY);
+    String cmd = "echo $target_host $target_port,"
+        + "echo $source_host $source_port";
+    if (!Shell.WINDOWS) {
+      fencer.tryFence(target, cmd);
+      Mockito.verify(ShellCommandFencer.LOG).info(
+          Mockito.contains("echo $ta...rget_port: dummytarget 1111"));
+      fencer.tryFence(source, cmd);
+      Mockito.verify(ShellCommandFencer.LOG).info(
+          Mockito.contains("echo $so...urce_port: dummysource 2222"));
+    } else {
+      fencer.tryFence(target, cmd);
+      Mockito.verify(ShellCommandFencer.LOG).info(
+          Mockito.contains("echo %ta...get_port%: dummytarget 1111"));
+      fencer.tryFence(source, cmd);
+      Mockito.verify(ShellCommandFencer.LOG).info(
+          Mockito.contains("echo %so...urce_port%: dummysource 2222"));
+    }
+  }
+
 
   /**
    * Test that we properly close off our input to the subprocess

+ 5 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java

@@ -284,6 +284,11 @@ public class DFSHAAdmin extends HAAdmin {
     HAServiceTarget fromNode = resolveTarget(args[0]);
     HAServiceTarget toNode = resolveTarget(args[1]);
 
+    fromNode.setTransitionTargetHAStatus(
+        HAServiceProtocol.HAServiceState.STANDBY);
+    toNode.setTransitionTargetHAStatus(
+        HAServiceProtocol.HAServiceState.ACTIVE);
+
     // Check that auto-failover is consistently configured for both nodes.
     Preconditions.checkState(
         fromNode.isAutoFailoverEnabled() ==

+ 4 - 4
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java

@@ -189,13 +189,13 @@ public class TestDFSHAAdminMiniCluster {
     tmpFile.deleteOnExit();
     if (Shell.WINDOWS) {
       conf.set(DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY,
-          "shell(echo %target_nameserviceid%.%target_namenodeid% " +
-              "%target_port% %dfs_ha_namenode_id% > " +
+          "shell(echo %source_nameserviceid%.%source_namenodeid% " +
+              "%source_port% %dfs_ha_namenode_id% > " +
               tmpFile.getAbsolutePath() + ")");
     } else {
       conf.set(DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY,
-          "shell(echo -n $target_nameserviceid.$target_namenodeid " +
-          "$target_port $dfs_ha_namenode_id > " +
+          "shell(echo -n $source_nameserviceid.$source_namenodeid " +
+          "$source_port $dfs_ha_namenode_id > " +
           tmpFile.getAbsolutePath() + ")");
     }