Browse Source

HADOOP-9183. Potential deadlock in ActiveStandbyElector.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1431251 13f79535-47bb-0310-9956-ffa450edef68
Thomas White 12 years ago
parent
commit
d863f7a1e4

+ 2 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -528,6 +528,8 @@ Release 2.0.3-alpha - Unreleased
     HADOOP-9155. FsPermission should have different default value, 777 for
     directory and 666 for file. (Binglin Chang via atm)
 
+    HADOOP-9183. Potential deadlock in ActiveStandbyElector. (tomwhite)
+
 Release 2.0.2-alpha - 2012-09-07 
 
   INCOMPATIBLE CHANGES

+ 10 - 25
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java

@@ -613,7 +613,7 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
     // Unfortunately, the ZooKeeper constructor connects to ZooKeeper and
     // may trigger the Connected event immediately. So, if we register the
     // watcher after constructing ZooKeeper, we may miss that event. Instead,
-    // we construct the watcher first, and have it queue any events it receives
+    // we construct the watcher first, and have it block any events it receives
     // before we can set its ZooKeeper reference.
     WatcherWithClientRef watcher = new WatcherWithClientRef();
     ZooKeeper zk = new ZooKeeper(zkHostPort, zkSessionTimeout, watcher);
@@ -1002,19 +1002,17 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
     private CountDownLatch hasReceivedEvent = new CountDownLatch(1);
 
     /**
-     * If any events arrive before the reference to ZooKeeper is set,
-     * they get queued up and later forwarded when the reference is
-     * available.
+     * Latch used to wait until the reference to ZooKeeper is set.
      */
-    private final List<WatchedEvent> queuedEvents = Lists.newLinkedList();
+    private CountDownLatch hasSetZooKeeper = new CountDownLatch(1);
     
     private WatcherWithClientRef() {
     }
 
     private WatcherWithClientRef(ZooKeeper zk) {
-      this.zk = zk;
+      setZooKeeperRef(zk);
     }
-    
+
     /**
      * Waits for the next event from ZooKeeper to arrive.
      * 
@@ -1029,9 +1027,7 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
         if (!hasReceivedEvent.await(connectionTimeoutMs, TimeUnit.MILLISECONDS)) {
           LOG.error("Connection timed out: couldn't connect to ZooKeeper in "
               + connectionTimeoutMs + " milliseconds");
-          synchronized (this) {
-            zk.close();
-          }
+          zk.close();
           throw KeeperException.create(Code.CONNECTIONLOSS);
         }
       } catch (InterruptedException e) {
@@ -1041,29 +1037,18 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
       }
     }
 
-    private synchronized void setZooKeeperRef(ZooKeeper zk) {
+    private void setZooKeeperRef(ZooKeeper zk) {
       Preconditions.checkState(this.zk == null,
           "zk already set -- must be set exactly once");
       this.zk = zk;
-      
-      for (WatchedEvent e : queuedEvents) {
-        forwardEvent(e);
-      }
-      queuedEvents.clear();
+      hasSetZooKeeper.countDown();
     }
 
     @Override
-    public synchronized void process(WatchedEvent event) {
-      if (zk != null) {
-        forwardEvent(event);
-      } else {
-        queuedEvents.add(event);
-      }
-    }
-    
-    private void forwardEvent(WatchedEvent event) {
+    public void process(WatchedEvent event) {
       hasReceivedEvent.countDown();
       try {
+        hasSetZooKeeper.await(zkSessionTimeout, TimeUnit.MILLISECONDS);
         ActiveStandbyElector.this.processWatchEvent(
             zk, event);
       } catch (Throwable t) {