Prechádzať zdrojové kódy

ZOOKEEPER-1090. Race condition while taking snapshot can lead to not restoring data tree correctly.

git-svn-id: https://svn.apache.org/repos/asf/zookeeper/trunk@1151738 13f79535-47bb-0310-9956-ffa450edef68
Benjamin Reed 14 rokov pred
rodič
commit
6f07f77e16

+ 2 - 0
CHANGES.txt

@@ -266,6 +266,8 @@ BUGFIXES:
   ZOOKEEPER-1119. zkServer stop command incorrectly reading comment lines in
   zoo.cfg (phunt via mahadev)
 
+  ZOOKEEPER-1090. Race condition while taking snapshot can lead to not restoring data tree correctly (Vishal K via breed)
+
 IMPROVEMENTS:
   ZOOKEEPER-724. Improve junit test integration - log harness information 
   (phunt via mahadev)

+ 17 - 3
src/java/main/org/apache/zookeeper/server/DataTree.java

@@ -791,9 +791,6 @@ public class DataTree {
             rc.type = header.getType();
             rc.err = 0;
             rc.multiResult = null;
-            if (rc.zxid > lastProcessedZxid) {
-                lastProcessedZxid = rc.zxid;
-            }
             switch (header.getType()) {
                 case OpCode.create:
                     CreateTxn createTxn = (CreateTxn) txn;
@@ -914,6 +911,23 @@ public class DataTree {
         } catch (IOException e) {
             LOG.warn("Failed:" + debug, e);
         }
+        /*
+         * A snapshot might be in progress while we are modifying the data
+         * tree. If we set lastProcessedZxid prior to making corresponding
+         * change to the tree, then the zxid associated with the snapshot
+         * file will be ahead of its contents. Thus, while restoring from
+         * the snapshot, the restore method will not apply the transaction
+         * for zxid associated with the snapshot file, since the restore
+         * method assumes that transaction to be present in the snapshot.
+         *
+         * To avoid this, we first apply the transaction and then modify
+         * lastProcessedZxid.  During restore, we correctly handle the
+         * case where the snapshot contains data ahead of the zxid associated
+         * with the file.
+         */
+        if (rc.zxid > lastProcessedZxid) {
+        	lastProcessedZxid = rc.zxid;
+        }
         return rc;
     }
 

+ 126 - 3
src/java/test/org/apache/zookeeper/test/LoadFromLogTest.java

@@ -43,10 +43,10 @@ import org.apache.zookeeper.server.DataTree;
 import org.apache.zookeeper.server.DataNode;
 import org.apache.zookeeper.txn.CreateTxn;
 import org.apache.zookeeper.txn.DeleteTxn;
-import org.apache.zookeeper.txn.TxnHeader;
 import org.apache.zookeeper.ZooDefs.OpCode;
 import org.apache.jute.Record;
 import java.io.FileInputStream;
+
 import org.apache.jute.BinaryInputArchive;
 import org.apache.zookeeper.server.persistence.FileHeader;
 
@@ -59,6 +59,7 @@ public class LoadFromLogTest extends ZKTestCase implements  Watcher {
     // setting up the quorum has a transaction overhead for creating and closing the session
     private static final int TRANSACTION_OVERHEAD = 2;	
     private static final int TOTAL_TRANSACTIONS = NUM_MESSAGES + TRANSACTION_OVERHEAD;
+    private volatile boolean connected;
 
     /**
      * test that all transactions from the Log are loaded, and only once
@@ -114,7 +115,22 @@ public class LoadFromLogTest extends ZKTestCase implements  Watcher {
 
 
     public void process(WatchedEvent event) {
-        // do nothing
+    	switch (event.getType()) {
+    	case None:   
+    		switch (event.getState()) {
+    		case SyncConnected:
+    			connected = true;
+    			break;
+    		case Disconnected:
+    			connected = false;
+    			break;
+    		default:   
+    			break;
+    		}
+        	break;
+    	default:
+    		break;
+    	}
     }
 
     /**
@@ -215,4 +231,111 @@ public class LoadFromLogTest extends ZKTestCase implements  Watcher {
         Assert.assertTrue("Missing magic number ",
               header.getMagic() == FileTxnLog.TXNLOG_MAGIC);
     }
-}
+    
+    /**
+     * Test we can restore the snapshot that has data ahead of the zxid
+     * of the snapshot file. 
+     */
+    @Test
+    public void testRestore() throws Exception {
+		// setup a single server cluster
+		File tmpDir = ClientBase.createTmpDir();
+		ClientBase.setupTestEnv();
+		ZooKeeperServer zks = new ZooKeeperServer(tmpDir, tmpDir, 3000);
+		SyncRequestProcessor.setSnapCount(10000);
+		final int PORT = Integer.parseInt(HOSTPORT.split(":")[1]);
+		ServerCnxnFactory f = ServerCnxnFactory.createFactory(PORT, -1);
+		f.startup(zks);
+		Assert.assertTrue("waiting for server being up ", ClientBase
+				.waitForServerUp(HOSTPORT, CONNECTION_TIMEOUT));
+		ZooKeeper zk = new ZooKeeper(HOSTPORT, CONNECTION_TIMEOUT, this);
+
+		long start = System.currentTimeMillis();
+		while (!connected) {
+			long end = System.currentTimeMillis();
+			if (end - start > 5000) {
+				Assert.assertTrue("Could not connect with server in 5 seconds",
+						false);
+			}
+			try {
+				Thread.sleep(200);
+			} catch (Exception e) {
+				LOG.warn("Intrrupted");
+			}
+
+		}
+		// generate some transactions
+		String lastPath = null;
+		try {
+			zk.create("/invalidsnap", new byte[0], Ids.OPEN_ACL_UNSAFE,
+					CreateMode.PERSISTENT);
+			for (int i = 0; i < NUM_MESSAGES; i++) {
+				lastPath = zk.create("/invalidsnap/test-", new byte[0],
+						Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT_SEQUENTIAL);
+			}
+		} finally {
+			zk.close();
+		}
+		String[] tokens = lastPath.split("-");
+		String expectedPath = "/invalidsnap/test-"
+				+ String.format("%010d",
+						(new Integer(tokens[1])).intValue() + 1);
+		long eZxid = zks.getZKDatabase().getDataTreeLastProcessedZxid();
+		// force the zxid to be behind the content
+		zks.getZKDatabase().setlastProcessedZxid(
+				zks.getZKDatabase().getDataTreeLastProcessedZxid() - 10);
+		LOG.info("Set lastProcessedZxid to "
+				+ zks.getZKDatabase().getDataTreeLastProcessedZxid());
+		// Force snapshot and restore
+		zks.takeSnapshot();
+		zks.shutdown();
+		f.shutdown();
+
+		zks = new ZooKeeperServer(tmpDir, tmpDir, 3000);
+		SyncRequestProcessor.setSnapCount(10000);
+		f = ServerCnxnFactory.createFactory(PORT, -1);
+		f.startup(zks);
+		Assert.assertTrue("waiting for server being up ", ClientBase
+				.waitForServerUp(HOSTPORT, CONNECTION_TIMEOUT));
+		connected = false;
+		long fZxid = zks.getZKDatabase().getDataTreeLastProcessedZxid();
+
+		// Verify lastProcessedZxid is set correctly
+		Assert.assertTrue("Restore failed expected zxid=" + eZxid + " found="
+				+ fZxid, fZxid == eZxid);
+		zk = new ZooKeeper(HOSTPORT, CONNECTION_TIMEOUT, this);
+		start = System.currentTimeMillis();
+		while (!connected) {
+			long end = System.currentTimeMillis();
+			if (end - start > 5000) {
+				Assert.assertTrue("Could not connect with server in 5 seconds",
+						false);
+			}
+			try {
+				Thread.sleep(200);
+			} catch (Exception e) {
+				LOG.warn("Intrrupted");
+			}
+
+		}
+		// Verify correctness of data and whether sequential znode creation 
+		// proceeds correctly after this point
+		String[] children;
+		String path;
+		try {
+			children = zk.getChildren("/invalidsnap", false).toArray(
+					new String[0]);
+			path = zk.create("/invalidsnap/test-", new byte[0],
+					Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT_SEQUENTIAL);
+		} finally {
+			zk.close();
+		}
+		LOG.info("Expected " + expectedPath + " found " + path);
+		Assert.assertTrue("Error in sequential znode creation expected "
+				+ expectedPath + " found " + path, path.equals(expectedPath));
+		Assert.assertTrue("Unexpected number of children " + children.length
+				+ " expected " + NUM_MESSAGES,
+				(children.length == NUM_MESSAGES));
+		f.shutdown();
+	}
+}