Bladeren bron

HDFS-14130. [SBN read] Make ZKFC ObserverNode aware. Contributed by xiangheng and Konstantin Shvachko.

Konstantin V Shvachko 6 jaren geleden
bovenliggende
commit
a6ab37192a

+ 13 - 3
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java

@@ -728,9 +728,9 @@ public abstract class ZKFailoverController {
   }
 
   /**
-   * Ensure that the local node is in a healthy state, and thus
-   * eligible for graceful failover.
-   * @throws ServiceFailedException if the node is unhealthy
+   * If the local node is an observer or is unhealthy it
+   * is not eligible for graceful failover.
+   * @throws ServiceFailedException if the node is an observer or unhealthy
    */
   private synchronized void checkEligibleForFailover()
       throws ServiceFailedException {
@@ -740,6 +740,11 @@ public abstract class ZKFailoverController {
           localTarget + " is not currently healthy. " +
           "Cannot be failover target");
     }
+    if (serviceState == HAServiceState.OBSERVER) {
+      throw new ServiceFailedException(
+          localTarget + " is in observer state. " +
+          "Cannot be failover target");
+    }
   }
 
   /**
@@ -856,6 +861,11 @@ public abstract class ZKFailoverController {
           }
           return;
         }
+        if (changedState == HAServiceState.OBSERVER) {
+          elector.quitElection(true);
+          serviceState = HAServiceState.OBSERVER;
+          return;
+        }
         if (changedState == serviceState) {
           serviceStateMismatchCount = 0;
           return;

+ 41 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java

@@ -280,6 +280,21 @@ public class TestZKFailoverController extends ClientBaseWithFixes {
     cluster.expireAndVerifyFailover(1, 0);
   }
 
+  /**
+   * Test that the local node is observer.
+   */
+  @Test
+  public void testVerifyObserverState()
+          throws Exception {
+    cluster.start(3);
+    DummyHAService svc2 = cluster.getService(2);
+    svc2.state = HAServiceState.OBSERVER;
+
+    // Verify svc2 is observer
+    LOG.info("Waiting for svc2 to enter observer state");
+    cluster.waitForHAState(2, HAServiceState.OBSERVER);
+  }
+
   /**
    * Test that, if the standby node is unhealthy, it doesn't try to become
    * active
@@ -474,6 +489,32 @@ public class TestZKFailoverController extends ClientBaseWithFixes {
     }
   }
 
+  @Test
+  public void testObserverExitGracefulFailover() throws Exception {
+    cluster.start(3);
+
+    cluster.waitForActiveLockHolder(0);
+
+    // Mark it become observer, wait for it to exit election
+    DummyHAService svc2 = cluster.getService(2);
+    svc2.state = HAServiceState.OBSERVER;
+    cluster.waitForHAState(2, HAServiceState.OBSERVER);
+    cluster.setFailToBecomeActive(2, true);
+    cluster.setFailToBecomeStandby(2, true);
+    cluster.setFailToBecomeObserver(2, true);
+    cluster.waitForElectorState(2, ActiveStandbyElector.State.INIT);
+
+    // Ask for failover, it should fail, because it's observer
+    try {
+      cluster.getService(2).getZKFCProxy(conf, 5000).gracefulFailover();
+      fail("Did not fail to graceful failover to observer!");
+    } catch (ServiceFailedException sfe) {
+      GenericTestUtils.assertExceptionContains(
+              cluster.getService(2).toString() +
+                      " is in observer state.", sfe);
+    }
+  }
+
   @Test
   public void testGracefulFailoverFailBecomingActive() throws Exception {
     cluster.start();

+ 16 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSZKFailoverController.java

@@ -20,6 +20,8 @@ package org.apache.hadoop.hdfs.tools;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
 import java.util.concurrent.TimeoutException;
 
 import org.apache.hadoop.conf.Configuration;
@@ -212,6 +214,20 @@ public class TestDFSZKFailoverController extends ClientBaseWithFixes {
         tool.run(new String[]{"-failover", "nn2", "nn1"}));
     waitForHAState(0, HAServiceState.ACTIVE);
     waitForHAState(1, HAServiceState.STANDBY);
+    // Answer "yes" to the prompt for --forcemanual
+    InputStream inOriginial = System.in;
+    System.setIn(new ByteArrayInputStream("yes\n".getBytes()));
+    int result = tool.run(
+        new String[]{"-transitionToObserver", "-forcemanual", "nn2"});
+    assertEquals("State transition returned: " + result, 0, result);
+    waitForHAState(1, HAServiceState.OBSERVER);
+    // Answer "yes" to the prompt for --forcemanual
+    System.setIn(new ByteArrayInputStream("yes\n".getBytes()));
+    result = tool.run(
+        new String[]{"-transitionToStandby", "-forcemanual", "nn2"});
+    System.setIn(inOriginial);
+    assertEquals("State transition returned: " + result, 0, result);
+    waitForHAState(1, HAServiceState.STANDBY);
   }
 
   private void waitForHAState(int nnidx, final HAServiceState state)