瀏覽代碼

ZOOKEEPER-1636: cleanup completion list of a failed multi request

(from Thawan Kooburat)

Author: Michael Edwards <Michael Edwards>

Reviewers: andor@apache.org

Closes #717 from mkedwards/ZOOKEEPER-1636-for-master
Michael Edwards 6 年之前
父節點
當前提交
b1fd480b2c

+ 16 - 1
zookeeper-client/zookeeper-client-c/src/zookeeper.c

@@ -2594,6 +2594,17 @@ completion_list_t *dequeue_completion(completion_head_t *list)
     return cptr;
     return cptr;
 }
 }
 
 
+// cleanup completion list of a failed multi request
+static void cleanup_failed_multi(zhandle_t *zh, int xid, int rc, completion_list_t *cptr) {
+    completion_list_t *entry;
+    completion_head_t *clist = &cptr->c.clist;
+    while ((entry = dequeue_completion(clist)) != NULL) {
+        // Fake failed response for all sub-requests
+        deserialize_response(zh, entry->c.type, xid, 1, rc, entry, NULL);
+        destroy_completion_entry(entry);
+    }
+}
+
 static int deserialize_multi(zhandle_t *zh, int xid, completion_list_t *cptr, struct iarchive *ia)
 static int deserialize_multi(zhandle_t *zh, int xid, completion_list_t *cptr, struct iarchive *ia)
 {
 {
     int rc = 0;
     int rc = 0;
@@ -2723,8 +2734,12 @@ static void deserialize_response(zhandle_t *zh, int type, int xid, int failed, i
     case COMPLETION_MULTI:
     case COMPLETION_MULTI:
         LOG_DEBUG(LOGCALLBACK(zh), "Calling COMPLETION_MULTI for xid=%#x failed=%d rc=%d",
         LOG_DEBUG(LOGCALLBACK(zh), "Calling COMPLETION_MULTI for xid=%#x failed=%d rc=%d",
                     cptr->xid, failed, rc);
                     cptr->xid, failed, rc);
-        rc = deserialize_multi(zh, xid, cptr, ia);
         assert(cptr->c.void_result);
         assert(cptr->c.void_result);
+        if (failed) {
+            cleanup_failed_multi(zh, xid, rc, cptr);
+        } else {
+            rc = deserialize_multi(zh, xid, cptr, ia);
+        }
         cptr->c.void_result(rc, cptr->data);
         cptr->c.void_result(rc, cptr->data);
         break;
         break;
     default:
     default:

+ 68 - 0
zookeeper-client/zookeeper-client-c/tests/TestMulti.cc

@@ -178,6 +178,7 @@ class Zookeeper_multi : public CPPUNIT_NS::TestFixture
     CPPUNIT_TEST(testCheck);
     CPPUNIT_TEST(testCheck);
     CPPUNIT_TEST(testWatch);
     CPPUNIT_TEST(testWatch);
     CPPUNIT_TEST(testSequentialNodeCreateInAsyncMulti);
     CPPUNIT_TEST(testSequentialNodeCreateInAsyncMulti);
+    CPPUNIT_TEST(testBigAsyncMulti);
     CPPUNIT_TEST_SUITE_END();
     CPPUNIT_TEST_SUITE_END();
 
 
     static void watcher(zhandle_t *, int type, int state, const char *path,void*v){
     static void watcher(zhandle_t *, int type, int state, const char *path,void*v){
@@ -248,6 +249,16 @@ public:
         count++;
         count++;
     }
     }
 
 
+    static void multi_completion_fn_rc(int rc, const void *data) {
+        count++;
+        *((int*) data) = rc;
+    }
+    
+    static void create_completion_fn_rc(int rc, const char* value, const void *data) {
+        count++;
+        *((int*) data) = rc;
+    }
+    
     static void waitForMultiCompletion(int seconds) {
     static void waitForMultiCompletion(int seconds) {
         time_t expires = time(0) + seconds;
         time_t expires = time(0) + seconds;
         while(count == 0 && time(0) < expires) {
         while(count == 0 && time(0) < expires) {
@@ -654,6 +665,63 @@ public:
         // wait for multi completion in doMultiInWatch
         // wait for multi completion in doMultiInWatch
         waitForMultiCompletion(5);
         waitForMultiCompletion(5);
      }
      }
+    
+    /**
+     * ZOOKEEPER-1636: If request is too large, the server will cut the
+     * connection without sending response packet. The client will try to
+     * process completion on multi request and eventually cause SIGSEGV
+     */
+    void testBigAsyncMulti() {
+        int rc;
+        int callback_rc = (int) ZOK;
+        watchctx_t ctx;
+        zhandle_t *zk = createClient(&ctx);
+        
+        // The request should be more than 1MB which exceeds the default
+        // jute.maxbuffer and causes the server to drop client connection
+        const int iteration = 500;
+        const int type_count = 3;
+        const int nops = iteration * type_count;
+        char buff[1024];
+
+        zoo_op_result_t results[nops];
+        zoo_op_t ops[nops];
+        struct Stat* s[nops];
+        int index = 0;
+
+        // Test that we deliver error to 3 types of sub-request
+        for (int i = 0; i < iteration; ++i) {
+            zoo_set_op_init(&ops[index++], "/x", buff, sizeof(buff), -1, s[i]);
+            zoo_create_op_init(&ops[index++], "/x", buff, sizeof(buff),
+                               &ZOO_OPEN_ACL_UNSAFE, ZOO_SEQUENCE, NULL, 0);
+            zoo_delete_op_init(&ops[index++], "/x", -1);
+        }
+        
+        rc = zoo_amulti(zk, nops, ops, results, multi_completion_fn_rc,
+                                  +                         &callback_rc);
+        CPPUNIT_ASSERT_EQUAL((int) ZOK, rc);
+        
+        waitForMultiCompletion(10);
+        // With the bug, we will get SIGSEGV before reaching this point
+        CPPUNIT_ASSERT_EQUAL((int) ZCONNECTIONLOSS, callback_rc);
+        
+        // Make sure that all sub-request completions get processed
+        for (int i = 0; i < nops; ++i) {
+            CPPUNIT_ASSERT_EQUAL((int) ZCONNECTIONLOSS, results[i].err);
+        }
+
+        // The handle should be able to recover itself.
+        ctx.waitForConnected(zk);
+
+        // Try to submit another async request to see if it get processed
+        // correctly
+        rc = zoo_acreate(zk, "/target", "", 0, &ZOO_OPEN_ACL_UNSAFE, 0,
+                         create_completion_fn_rc, &callback_rc);
+        CPPUNIT_ASSERT_EQUAL((int) ZOK, rc);
+
+        waitForMultiCompletion(10);
+        CPPUNIT_ASSERT_EQUAL((int) ZOK, callback_rc);
+    }
 
 
      /**
      /**
       * ZOOKEEPER-1624: PendingChanges of create sequential node request didn't
       * ZOOKEEPER-1624: PendingChanges of create sequential node request didn't