Selaa lähdekoodia

ZOOKEEPER-3240: Close socket on Learner shutdown to avoid dangling so…

…cket

Author: Brian Nixon <nixon@fb.com>

Reviewers: hanm@apache.org, eolivelli@apache.org, andor@apache.org

Closes #996 from enixon/learner-close-socket and squashes the following commits:

57f5e891 [Brian Nixon] delete ReconfigFailureCasesTest::testLeaderTimesoutOnNewQuorum as it relies on the fixed buggy behavior
1598b3ad [Brian Nixon] remove extraneous check on socket status
381889da [Brian Nixon] ZOOKEEPER-3240: Close socket on Learner shutdown to avoid dangling socket
Brian Nixon 6 vuotta sitten
vanhempi
commit
96eefafce7

+ 1 - 5
zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Follower.java

@@ -116,11 +116,7 @@ public class Follower extends Learner{
                 }
             } catch (Exception e) {
                 LOG.warn("Exception when following the leader", e);
-                try {
-                    sock.close();
-                } catch (IOException e1) {
-                    e1.printStackTrace();
-                }
+                closeSocket();
     
                 // clear pending revalidations
                 pendingRevalidations.clear();

+ 11 - 0
zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java

@@ -673,6 +673,7 @@ public class Learner {
         self.setZooKeeperServer(null);
         self.closeAllConnections();
         self.adminServer.setZooKeeperServer(null);
+        closeSocket();
         // shutdown previous zookeeper
         if (zk != null) {
             zk.shutdown();
@@ -682,4 +683,14 @@ public class Learner {
     boolean isRunning() {
         return self.isRunning() && zk.isRunning();
     }
+
+    void closeSocket() {
+        try {
+            if (sock != null) {
+                sock.close();
+            }
+        } catch (IOException e) {
+            LOG.warn("Ignoring error closing connection to leader", e);
+        }
+    }
 }

+ 1 - 5
zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Observer.java

@@ -122,11 +122,7 @@ public class Observer extends Learner{
                 }
             } catch (Exception e) {
                 LOG.warn("Exception when observing the leader", e);
-                try {
-                    sock.close();
-                } catch (IOException e1) {
-                    e1.printStackTrace();
-                }
+                closeSocket();
 
                 // clear pending revalidations
                 pendingRevalidations.clear();

+ 3 - 0
zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java

@@ -827,7 +827,10 @@ public class QuorumPeerMainTest extends QuorumPeerTestBase {
         int leader = servers.findLeader();
         Map<Long, Proposal> outstanding =  servers.mt[leader].main.quorumPeer.leader.outstandingProposals;
         // increase the tick time to delay the leader going to looking
+        int previousTick = servers.mt[leader].main.quorumPeer.tickTime;
         servers.mt[leader].main.quorumPeer.tickTime = LEADER_TIMEOUT_MS;
+        // let the previous tick on the leader exhaust itself so the new tick time takes effect
+        Thread.sleep(previousTick);
         LOG.warn("LEADER {}", leader);
 
         for (int i = 0; i < SERVER_COUNT; i++) {

+ 0 - 43
zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/ReconfigFailureCasesTest.java

@@ -159,49 +159,6 @@ public class ReconfigFailureCasesTest extends QuorumPeerTestBase {
         ReconfigTest.closeAllHandles(zkArr, zkAdminArr);
     }
 
-    /*
-     * Tests that if a quorum of a new config is synced with the leader and a reconfig
-     * is allowed to start but then the new quorum is lost, the leader will time out and
-     * we go to leader election.
-     */
-    @Test
-    public void testLeaderTimesoutOnNewQuorum() throws Exception {
-        qu = new QuorumUtil(1); // create 3 servers
-        qu.disableJMXTest = true;
-        qu.startAll();
-        ZooKeeper[] zkArr = ReconfigTest.createHandles(qu);
-        ZooKeeperAdmin[] zkAdminArr = ReconfigTest.createAdminHandles(qu);
-
-        List<String> leavingServers = new ArrayList<String>();
-        leavingServers.add("3");
-        qu.shutdown(2);
-        try {
-            // Since we just shut down server 2, its still considered "synced"
-            // by the leader, which allows us to start the reconfig
-            // (PrepRequestProcessor checks that a quorum of the new
-            // config is synced before starting a reconfig).
-            // We try to remove server 3, which requires a quorum of {1,2,3}
-            // (we have that) and of {1,2}, but 2 is down so we won't get a
-            // quorum of new config ACKs.
-            zkAdminArr[1].reconfigure(null, leavingServers, null, -1, null);
-            Assert.fail("Reconfig should have failed since we don't have quorum of new config");
-        } catch (KeeperException.ConnectionLossException e) {
-            // We expect leader to lose quorum of proposed config and time out
-        } catch (Exception e) {
-            Assert.fail("Should have been ConnectionLossException!");
-        }
-
-        // The leader should time out and remaining servers should go into
-        // LOOKING state. A new leader won't be established since that
-        // would require completing the reconfig, which is not possible while
-        // 2 is down.
-        Assert.assertEquals(QuorumStats.Provider.LOOKING_STATE,
-                qu.getPeer(1).peer.getServerState());
-        Assert.assertEquals(QuorumStats.Provider.LOOKING_STATE,
-                qu.getPeer(3).peer.getServerState());
-        ReconfigTest.closeAllHandles(zkArr, zkAdminArr);
-    }
-
     /*
      * Converting an observer into a participant may sometimes fail with a
      * NewConfigNoQuorum exception. This test-case demonstrates the scenario.