Bläddra i källkod

ZOOKEEPER-3125: Fixing pzxid consistent issue when replaying a txn for a deleted node

Author: Fangmin Lyu <allenlyu@fb.com>

Reviewers: breed@apache.org, andor@apache.org

Closes #605 from lvfangmin/ZOOKEEPER-3125
Fangmin Lyu 6 år sedan
förälder
incheckning
6651a126cd

+ 25 - 8
src/java/main/org/apache/zookeeper/server/DataTree.java

@@ -545,6 +545,19 @@ public class DataTree {
         int lastSlash = path.lastIndexOf('/');
         String parentName = path.substring(0, lastSlash);
         String childName = path.substring(lastSlash + 1);
+
+        // The child might already be deleted during taking fuzzy snapshot,
+        // but we still need to update the pzxid here before throw exception
+        // for no such child
+        DataNode parent = nodes.get(parentName);
+        if (parent == null) {
+            throw new KeeperException.NoNodeException();
+        }
+        synchronized (parent) {
+            parent.removeChild(childName);
+            parent.stat.setPzxid(zxid);
+        }
+
         DataNode node = nodes.get(path);
         if (node == null) {
             throw new KeeperException.NoNodeException();
@@ -554,13 +567,11 @@ public class DataTree {
             aclCache.removeUsage(node.acl);
             nodeDataSize.addAndGet(-getNodeSize(path, node.data));
         }
-        DataNode parent = nodes.get(parentName);
-        if (parent == null) {
-            throw new KeeperException.NoNodeException();
-        }
+
+        // Synchronized to sync the containers and ttls change, probably
+        // only need to sync on containers and ttls, will update it in a
+        // separate patch.
         synchronized (parent) {
-            parent.removeChild(childName);
-            parent.stat.setPzxid(zxid);
             long eowner = node.stat.getEphemeralOwner();
             EphemeralType ephemeralType = EphemeralType.get(eowner);
             if (ephemeralType == EphemeralType.CONTAINER) {
@@ -576,6 +587,7 @@ public class DataTree {
                 }
             }
         }
+
         if (parentName.startsWith(procZookeeper) && Quotas.limitNode.equals(childName)) {
             // delete the node in the trie.
             // we need to update the trie as well
@@ -1198,8 +1210,7 @@ public class DataTree {
             Set<String> childs = node.getChildren();
             children = childs.toArray(new String[childs.size()]);
         }
-        oa.writeString(pathString, "path");
-        oa.writeRecord(nodeCopy, "node");
+        serializeNodeData(oa, pathString, nodeCopy);
         path.append('/');
         int off = path.length();
         for (String child : children) {
@@ -1212,6 +1223,12 @@ public class DataTree {
         }
     }
 
+    // visiable for test
+    public void serializeNodeData(OutputArchive oa, String path, DataNode node) throws IOException {
+        oa.writeString(path, "path");
+        oa.writeRecord(node, "node");
+    }
+
     public void serialize(OutputArchive oa, String tag) throws IOException {
         aclCache.serialize(oa);
         serializeNode(oa, new StringBuilder(""));

+ 117 - 0
src/java/test/org/apache/zookeeper/server/quorum/FuzzySnapshotRelatedTest.java

@@ -18,22 +18,27 @@
 
 package org.apache.zookeeper.server.quorum;
 
+import java.io.IOException;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import javax.security.sasl.SaslException;
 
+import org.apache.jute.OutputArchive;
+
 import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.KeeperException.NoNodeException;
 import org.apache.zookeeper.KeeperException.NodeExistsException;
 import org.apache.zookeeper.Op;
+
 import org.apache.zookeeper.PortAssignment;
 import org.apache.zookeeper.ZooDefs.Ids;
 import org.apache.zookeeper.ZooKeeper;
 import org.apache.zookeeper.ZooKeeper.States;
 import org.apache.zookeeper.data.ACL;
 import org.apache.zookeeper.data.Stat;
+import org.apache.zookeeper.server.DataNode;
 import org.apache.zookeeper.server.DataTree;
 import org.apache.zookeeper.server.ZKDatabase;
 import org.apache.zookeeper.server.ZooKeeperServer;
@@ -162,6 +167,98 @@ public class FuzzySnapshotRelatedTest extends QuorumPeerTestBase {
                 new String(zk[followerA].getData(node2, null, null)));
     }
 
+    /**
+     * It's possibel during SNAP sync, the parent is serialized before the
+     * child get deleted during sending the snapshot over.
+     *
+     * In which case, we need to make sure the pzxid get correctly updated
+     * when applying the txns received.
+     */
+    @Test
+    public void testPZxidUpdatedDuringSnapSyncing() throws Exception {
+        LOG.info("Enable force snapshot sync");
+        System.setProperty(LearnerHandler.FORCE_SNAP_SYNC, "true");
+
+        final String parent = "/testPZxidUpdatedWhenDeletingNonExistNode";
+        final String child = parent + "/child";
+        createEmptyNode(zk[leaderId], parent);
+        createEmptyNode(zk[leaderId], child);
+
+        LOG.info("shutdown follower {}", followerA);
+        mt[followerA].shutdown();
+        QuorumPeerMainTest.waitForOne(zk[followerA], States.CONNECTING);
+
+        LOG.info("Set up ZKDatabase to catch the node serializing in DataTree");
+        addSerializeListener(leaderId, parent, child);
+
+        LOG.info("Restart follower A to trigger a SNAP sync with leader");
+        mt[followerA].start();
+        QuorumPeerMainTest.waitForOne(zk[followerA], States.CONNECTED);
+
+        LOG.info("Check and make sure the pzxid of the parent is the same " +
+                "on leader and follower A");
+        compareStat(parent, leaderId, followerA);
+    }
+
+    /**
+     * It's possible during taking fuzzy snapshot, the parent is serialized
+     * before the child get deleted in the fuzzy range.
+     *
+     * In which case, we need to make sure the pzxid get correctly updated
+     * when replaying the txns.
+     */
+    @Test
+    public void testPZxidUpdatedWhenLoadingSnapshot() throws Exception {
+
+        final String parent = "/testPZxidUpdatedDuringTakingSnapshot";
+        final String child = parent + "/child";
+        createEmptyNode(zk[followerA], parent);
+        createEmptyNode(zk[followerA], child);
+
+        LOG.info("Set up ZKDatabase to catch the node serializing in DataTree");
+        addSerializeListener(followerA, parent, child);
+
+        LOG.info("Take snapshot on follower A");
+        ZooKeeperServer zkServer = mt[followerA].main.quorumPeer.getActiveServer();
+        zkServer.takeSnapshot(true);
+
+        LOG.info("Restarting follower A to load snapshot");
+        mt[followerA].shutdown();
+        mt[followerA].start();
+        QuorumPeerMainTest.waitForOne(zk[followerA], States.CONNECTED);
+
+        LOG.info("Check and make sure the pzxid of the parent is the same " +
+                "on leader and follower A");
+        compareStat(parent, leaderId, followerA);
+    }
+
+    private void addSerializeListener(int sid, String parent, String child) {
+        final ZooKeeper zkClient = zk[followerA];
+        CustomDataTree dt =
+                (CustomDataTree) mt[sid].main.quorumPeer.getZkDb().getDataTree();
+        dt.addListener(parent, new NodeSerializeListener() {
+            @Override
+            public void nodeSerialized(String path) {
+                try {
+                    zkClient.delete(child, -1);
+                    LOG.info("Deleted the child node after the parent is serialized");
+                } catch (Exception e) {
+                    LOG.error("Error when deleting node {}", e);
+                }
+            }
+        });
+    }
+
+    private void compareStat(String path, int sid, int compareWithSid) throws Exception{
+        Stat stat1 = new Stat();
+        zk[sid].getData(path, null, stat1);
+
+        Stat stat2 = new Stat();
+        zk[compareWithSid].getData(path, null, stat2);
+
+        Assert.assertEquals(stat1, stat2);
+    }
+
     private void createEmptyNode(ZooKeeper zk, String path) throws Exception {
         zk.create(path, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
     }
@@ -174,6 +271,22 @@ public class FuzzySnapshotRelatedTest extends QuorumPeerTestBase {
     static class CustomDataTree extends DataTree {
         Map<String, NodeCreateListener> nodeCreateListeners =
                 new HashMap<String, NodeCreateListener>();
+        Map<String, NodeSerializeListener> listeners =
+                new HashMap<String, NodeSerializeListener>();
+
+        @Override
+        public void serializeNodeData(OutputArchive oa, String path,
+                DataNode node) throws IOException {
+            super.serializeNodeData(oa, path, node);
+            NodeSerializeListener listener = listeners.get(path);
+            if (listener != null) {
+                listener.nodeSerialized(path);
+            }
+        }
+
+        public void addListener(String path, NodeSerializeListener listener) {
+            listeners.put(path, listener);
+        }
 
         @Override
         public void createNode(final String path, byte data[], List<ACL> acl,
@@ -193,6 +306,10 @@ public class FuzzySnapshotRelatedTest extends QuorumPeerTestBase {
         }
     }
 
+    static interface NodeSerializeListener {
+        public void nodeSerialized(String path);
+    }
+
     static class CustomizedQPMain extends TestQPMain {
         @Override
         protected QuorumPeer getQuorumPeer() throws SaslException {