Procházet zdrojové kódy

ZOOKEEPER-1699. Leader should timeout and give up leadership when losing quorum of last proposed configuration (Alexander Shraer via michim)

git-svn-id: https://svn.apache.org/repos/asf/zookeeper/trunk@1596422 13f79535-47bb-0310-9956-ffa450edef68
Michi Mutsuzaki před 11 roky
rodič
revize
a8e0d7ba7d

+ 3 - 0
CHANGES.txt

@@ -661,6 +661,9 @@ BUGFIXES:
   MBeans instead of use MBeanRegistry.unregisterAll() method.
   (César Álvarez Núñez via michim)
 
+  ZOOKEEPER-1699. Leader should timeout and give up leadership when losing
+  quorum of last proposed configuration (Alexander Shraer via michim)
+
 IMPROVEMENTS:
 
   ZOOKEEPER-1170. Fix compiler (eclipse) warnings: unused imports,

+ 43 - 79
src/java/main/org/apache/zookeeper/server/quorum/Leader.java

@@ -62,69 +62,14 @@ public class Leader {
         LOG.info("TCP NoDelay set to: " + nodelay);
     }
 
-    static public class Proposal {
+    static public class Proposal  extends SyncedLearnerTracker {
         public QuorumPacket packet;
-
-        private ArrayList<QuorumVerifierAcksetPair> qvAcksetPairs = new ArrayList<QuorumVerifierAcksetPair>();
-
         public Request request;
 
         @Override
         public String toString() {
             return packet.getType() + ", " + packet.getZxid() + ", " + request;
         }
-
-        public void addQuorumVerifier(QuorumVerifier qv) {
-            qvAcksetPairs.add(new QuorumVerifierAcksetPair(qv,
-                    new HashSet<Long>(qv.getVotingMembers().size())));
-        }
-
-        public boolean addAck(Long sid) {
-            boolean change = false;
-            for (QuorumVerifierAcksetPair qvAckset : qvAcksetPairs) {
-                if (qvAckset.getQuorumVerifier().getVotingMembers().containsKey(sid)) {
-                    qvAckset.getAckset().add(sid);
-                    change = true;
-                }
-            }
-            return change;
-        }
-
-        public boolean hasAllQuorums() {
-            for (QuorumVerifierAcksetPair qvAckset : qvAcksetPairs) {
-                if (!qvAckset.getQuorumVerifier().containsQuorum(qvAckset.getAckset()))
-                    return false;
-            }
-            return true;
-        }
-        
-        public String ackSetsToString(){
-            StringBuilder sb = new StringBuilder();
-            
-            for (QuorumVerifierAcksetPair qvAckset : qvAcksetPairs) {
-                sb.append(qvAckset.getAckset().toString()).append(",");
-            }
-            
-            return sb.substring(0, sb.length()-1);
-        }
-
-        public static class QuorumVerifierAcksetPair {
-            private final QuorumVerifier _qv;
-            private final HashSet<Long> _ackset;
-
-            public QuorumVerifierAcksetPair(QuorumVerifier qv, HashSet<Long> ackset) {                
-                _qv = qv;
-                _ackset = ackset;
-            }
-
-            public QuorumVerifier getQuorumVerifier() {
-                return _qv;
-            }
-
-            public HashSet<Long> getAckset() {
-                return _ackset;
-            }
-        }
     }
 
     final LeaderZooKeeperServer zk;
@@ -585,34 +530,53 @@ public class Leader {
             boolean tickSkip = true;
 
             while (true) {
-                Thread.sleep(self.tickTime / 2);
-                if (!tickSkip) {
-                    self.tick++;
-                }
-                HashSet<Long> syncedSet = new HashSet<Long>();
+                synchronized (this) {
+                    long start = System.currentTimeMillis();
+                    long cur = start;
+                    long end = start + self.tickTime / 2;
+                    while (cur < end) {
+                        wait(end - cur);
+                        cur = System.currentTimeMillis();
+                    }
 
-                // lock on the followers when we use it.
-                syncedSet.add(self.getId());
+                    if (!tickSkip) {
+                        self.tick++;
+                    }
 
-                for (LearnerHandler f : getLearners()) {
-                    // Synced set is used to check we have a supporting quorum, so only
-                    // PARTICIPANT, not OBSERVER, learners should be used
-                    if (f.synced() && self.getQuorumVerifier().getVotingMembers().containsKey(f.getSid())) {
-                        syncedSet.add(f.getSid());
+                    // We use an instance of SyncedLearnerTracker to
+                    // track synced learners to make sure we still have a
+                    // quorum of current (and potentially next pending) view.
+                    SyncedLearnerTracker syncedAckSet = new SyncedLearnerTracker();
+                    syncedAckSet.addQuorumVerifier(self.getQuorumVerifier());
+                    if (self.getLastSeenQuorumVerifier() != null
+                            && self.getLastSeenQuorumVerifier().getVersion() > self
+                                    .getQuorumVerifier().getVersion()) {
+                        syncedAckSet.addQuorumVerifier(self
+                                .getLastSeenQuorumVerifier());
                     }
+
+                    syncedAckSet.addAck(self.getId());
+
+                    for (LearnerHandler f : getLearners()) {
+                        if (f.synced()) {
+                            syncedAckSet.addAck(f.getSid());
+                        }
+                    }
+
+                    if (!tickSkip && !syncedAckSet.hasAllQuorums()) {
+                        // Lost quorum of last committed and/or last proposed
+                        // config, shutdown
+                        shutdown("Not sufficient followers synced, only synced with sids: [ "
+                                + syncedAckSet.ackSetsToString() + " ]");
+                        // make sure the order is the same!
+                        // the leader goes to looking
+                        return;
+                    }
+                    tickSkip = !tickSkip;
+                }
+                for (LearnerHandler f : getLearners()) {
                     f.ping();
                 }
-
-              if (!tickSkip && !self.getQuorumVerifier().containsQuorum(syncedSet)) {
-                  //if (!tickSkip && syncedCount < self.quorumPeers.size() / 2) {
-                  // Lost quorum, shutdown
-                  shutdown("Not sufficient followers synced, only synced with sids: [ "
-                          + getSidSetString(syncedSet) + " ]");
-                  // make sure the order is the same!
-                  // the leader goes to looking
-                  return;
-              }
-              tickSkip = !tickSkip;
             }
         } finally {
             zk.unregisterJMX(this);

+ 45 - 0
src/java/test/org/apache/zookeeper/test/ReconfigTest.java

@@ -33,6 +33,7 @@ import org.apache.zookeeper.ZooDefs;
 import org.apache.zookeeper.ZooKeeper;
 import org.apache.zookeeper.AsyncCallback.DataCallback;
 import org.apache.zookeeper.data.Stat;
+import org.apache.zookeeper.server.quorum.QuorumStats;
 import org.apache.zookeeper.server.quorum.QuorumPeer.QuorumServer;
 import org.apache.zookeeper.server.quorum.QuorumPeer.ServerState;
 import org.apache.zookeeper.server.quorum.flexible.QuorumHierarchical;
@@ -402,6 +403,50 @@ public class ReconfigTest extends ZKTestCase implements DataCallback{
         closeAllHandles(zkArr);
     }
 
+    @Test
+    public void testLeaderTimesoutOnNewQuorum() throws Exception {
+        qu = new QuorumUtil(1); // create 3 servers
+        qu.disableJMXTest = true;
+        qu.startAll();
+        ZooKeeper[] zkArr = createHandles(qu);
+
+        List<String> leavingServers = new ArrayList<String>();
+        leavingServers.add("3");
+        qu.shutdown(2);
+        try {
+            // Since we just shut down server 2, its still considered "synced"
+            // by the leader, which allows us to start the reconfig 
+            // (PrepRequestProcessor checks that a quorum of the new
+            // config is synced before starting a reconfig).
+            // We try to remove server 3, which requires a quorum of {1,2,3}
+            // (we have that) and of {1,2}, but 2 is down so we won't get a
+            // quorum of new config ACKs.
+            zkArr[1].reconfig(null, leavingServers, null, -1, new Stat());
+            Assert.fail("Reconfig should have failed since we don't have quorum of new config");
+        } catch (KeeperException.ConnectionLossException e) {
+            // We expect leader to loose quorum of proposed config and time out
+        } catch (Exception e) {
+            Assert.fail("Should have been ConnectionLossException!");
+        }
+
+        // The leader should time out and remaining servers should go into
+        // LOOKING state. A new leader won't be established since that 
+        // would require completing the reconfig, which is not possible while
+        // 2 is down.
+        Assert.assertEquals(QuorumStats.Provider.LOOKING_STATE,
+                qu.getPeer(1).peer.getServerState());
+        Assert.assertEquals(QuorumStats.Provider.LOOKING_STATE,
+                qu.getPeer(3).peer.getServerState());
+
+        qu.restart(2);
+
+        // Now that 2 is back up, they'll complete the reconfig removing 3 and
+        // can process other ops.
+        testServerHasConfig(zkArr[1], null, leavingServers);
+        testNormalOperation(zkArr[1], zkArr[2]);
+        closeAllHandles(zkArr);
+    }
+
     @Test
     public void testRemoveOneAsynchronous() throws Exception {
         qu = new QuorumUtil(2);