Browse Source

ZOOKEEPER-2692: Fix race condition in testWatchAutoResetWithPending

We occasionally run into an issue with testWatchAutoResetWithPending where we get flaky test behavior due to not being able to reliably predict when the client has received notification from each watch that may be fired (perhaps due to resource contention on the box running the tests). This patch works around that by waiting for a one second quiet period, after which we can more safely assume all watches that will be fired have been fired.

Here is an example of the test failure: https://builds.apache.org/job/ZooKeeper-trunk-jdk8/935/

Author: Abraham Fine <abefine@cloudera.com>

Reviewers: Michael Han <hanm@apache.org>

Closes #171 from afine/ZOOKEEPER-2692

(cherry picked from commit 1912fa8d63e6bbfdb5bb96bec85efa7c94e131e9)
Signed-off-by: Michael Han <hanm@apache.org>
Abraham Fine 8 years ago
parent
commit
d13d2f08c7
1 changed files with 16 additions and 2 deletions
  1. 16 2
      src/java/test/org/apache/zookeeper/test/WatcherTest.java

+ 16 - 2
src/java/test/org/apache/zookeeper/test/WatcherTest.java

@@ -44,6 +44,8 @@ import org.junit.Test;
 public class WatcherTest extends ClientBase {
     protected static final Logger LOG = LoggerFactory.getLogger(WatcherTest.class);
 
+    private long timeOfLastWatcherInvocation;
+
     private final static class MyStatCallback implements StatCallback {
         int rc;
         public void processResult(int rc, String path, Object ctx, Stat stat) {
@@ -59,6 +61,7 @@ public class WatcherTest extends ClientBase {
         public void process(WatchedEvent event) {
             super.process(event);
             if (event.getType() != Event.EventType.None) {
+                timeOfLastWatcherInvocation = System.currentTimeMillis();
                 try {
                     events.put(event);
                 } catch (InterruptedException e) {
@@ -172,7 +175,6 @@ public class WatcherTest extends ClientBase {
     }
 
     final static int COUNT = 100;
-    boolean hasSeenDelete = true;
     /**
      * This test checks that watches for pending requests do not get triggered,
      * but watches set by previous requests do.
@@ -206,7 +208,7 @@ public class WatcherTest extends ClientBase {
        startServer();
        watches[COUNT/2-1].waitForConnected(60000);
        Assert.assertEquals(null, zk.exists("/test", false));
-       Thread.sleep(10);
+       waitForAllWatchers();
        for(int i = 0; i < COUNT/2; i++) {
            Assert.assertEquals("For " + i, 1, watches[i].events.size());
        }
@@ -221,6 +223,18 @@ public class WatcherTest extends ClientBase {
        zk.close();
     }
 
+    /**
+     * Wait until no watcher has been fired in the last second to ensure that all watches
+     * that are waiting to be fired have been fired
+     * @throws Exception
+     */
+    private void waitForAllWatchers() throws Exception {
+      timeOfLastWatcherInvocation = System.currentTimeMillis();
+      while (System.currentTimeMillis() - timeOfLastWatcherInvocation < 1000) {
+        Thread.sleep(1000);
+      }
+    }
+
     final int TIMEOUT = 5000;
 
     @Test