فهرست منبع

HADOOP-9459. ActiveStandbyElector can join election even before Service HEALTHY, and results in null data at ActiveBreadCrumb. Contributed by Vinay and Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1482226 13f79535-47bb-0310-9956-ffa450edef68
Todd Lipcon 12 سال پیش
والد
کامیت
b30d21bd13

+ 4 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -155,6 +155,10 @@ Release 2.0.5-beta - UNRELEASED
     HADOOP-9485. No default value in the code for
     hadoop.rpc.socket.factory.class.default. (Colin Patrick McCabe via atm)
 
+    HADOOP-9459. ActiveStandbyElector can join election even before
+    Service HEALTHY, and results in null data at ActiveBreadCrumb.
+    (Vinay and todd via todd)
+
 Release 2.0.4-alpha - 2013-04-25 
 
   INCOMPATIBLE CHANGES

+ 12 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java

@@ -643,6 +643,8 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
   }
 
   private void joinElectionInternal() {
+    Preconditions.checkState(appData != null,
+        "trying to join election without any app data");
     if (zkClient == null) {
       if (!reEstablishSession()) {
         fatalError("Failed to reEstablish connection with ZooKeeper");
@@ -669,8 +671,14 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
     try {
       terminateConnection();
       sleepFor(sleepTime);
-      
-      joinElectionInternal();
+      // Should not join election even before the SERVICE is reported
+      // as HEALTHY from ZKFC monitoring.
+      if (appData != null) {
+        joinElectionInternal();
+      } else {
+        LOG.info("Not joining election since service has not yet been " +
+            "reported as healthy.");
+      }
     } finally {
       sessionReestablishLockForTests.unlock();
     }
@@ -798,6 +806,8 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
    */
   private void writeBreadCrumbNode(Stat oldBreadcrumbStat)
       throws KeeperException, InterruptedException {
+    Preconditions.checkState(appData != null, "no appdata");
+    
     LOG.info("Writing znode " + zkBreadCrumbPath +
         " to indicate that the local node is the most recent active...");
     if (oldBreadcrumbStat == null) {

+ 18 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElector.java

@@ -721,4 +721,22 @@ public class TestActiveStandbyElector {
       GenericTestUtils.assertExceptionContains( "ConnectionLoss", ke);
     }
   }
+
+  /**
+   * joinElection(..) should happen only after SERVICE_HEALTHY.
+   */
+  @Test
+  public void testBecomeActiveBeforeServiceHealthy() throws Exception {
+    mockNoPriorActive();
+    WatchedEvent mockEvent = Mockito.mock(WatchedEvent.class);
+    Mockito.when(mockEvent.getType()).thenReturn(Event.EventType.None);
+    // session expired should enter safe mode
+    // But for first time, before the SERVICE_HEALTY i.e. appData is set,
+    // should not enter the election.
+    Mockito.when(mockEvent.getState()).thenReturn(Event.KeeperState.Expired);
+    elector.processWatchEvent(mockZK, mockEvent);
+    // joinElection should not be called.
+    Mockito.verify(mockZK, Mockito.times(0)).create(ZK_LOCK_NAME, null,
+        Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL, elector, mockZK);
+  }
 }