Browse Source

ZOOKEEPER-3306: Fixing node not accessible issue due the inconsistent ACL reference map after SNAP sync

Please check the Jira for more details of the bug.

Need more opinion on this fix to see if there is other corner cases or side effect we didn't handle properly.

Author: Fangmin Lyu <fangmin@apache.org>

Reviewers: andor@apache.org

Closes #848 from lvfangmin/ZOOKEEPER-3306
Fangmin Lyu 6 years ago
parent
commit
46b2018dbe

+ 22 - 2
zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java

@@ -452,6 +452,19 @@ public class DataTree {
             throw new KeeperException.NoNodeException();
         }
         synchronized (parent) {
+            // Add the ACL to ACL cache first, to avoid the ACL not being
+            // created race condition during fuzzy snapshot sync.
+            //
+            // This is the simplest fix, which may add ACL reference count
+            // again if it's already counted in in the ACL map of fuzzy
+            // snapshot, which might also happen for deleteNode txn, but
+            // at least it won't cause the ACL not exist issue.
+            //
+            // Later we can audit and delete all non-referenced ACLs from
+            // ACL map when loading the snapshot/txns from disk, like what
+            // we did for the global sessions.
+            Long longval = aclCache.convertAcls(acl);
+
             Set<String> children = parent.getChildren();
             if (children.contains(childName)) {
                 throw new KeeperException.NodeExistsException();
@@ -470,7 +483,6 @@ public class DataTree {
                 parent.stat.setCversion(parentCVersion);
                 parent.stat.setPzxid(zxid);
             }
-            Long longval = aclCache.convertAcls(acl);
             DataNode child = new DataNode(data, longval, stat);
             parent.addChild(childName);
             nodeDataSize.addAndGet(getNodeSize(path, child.data));
@@ -1249,8 +1261,11 @@ public class DataTree {
         oa.writeRecord(node, "node");
     }
 
-    public void serialize(OutputArchive oa, String tag) throws IOException {
+    public void serializeAcls(OutputArchive oa) throws IOException { 
         aclCache.serialize(oa);
+    }
+
+    public void serializeNodes(OutputArchive oa) throws IOException { 
         serializeNode(oa, new StringBuilder(""));
         // / marks end of stream
         // we need to check if clear had been called in between the snapshot.
@@ -1259,6 +1274,11 @@ public class DataTree {
         }
     }
 
+    public void serialize(OutputArchive oa, String tag) throws IOException {
+        serializeAcls(oa);
+        serializeNodes(oa);
+    }
+
     public void deserialize(InputArchive ia, String tag) throws IOException {
         aclCache.deserialize(ia);
         nodes.clear();

+ 67 - 0
zookeeper-server/src/test/java/org/apache/zookeeper/server/persistence/FileTxnSnapLogTest.java

@@ -18,12 +18,18 @@
 
 package org.apache.zookeeper.server.persistence;
 
+import org.apache.jute.BinaryInputArchive;
+import org.apache.jute.BinaryOutputArchive;
+import org.apache.jute.InputArchive;
+import org.apache.jute.OutputArchive;
 import org.apache.jute.Record;
 import org.apache.zookeeper.ZooDefs;
+import org.apache.zookeeper.server.DataNode;
 import org.apache.zookeeper.server.DataTree;
 import org.apache.zookeeper.server.Request;
 import org.apache.zookeeper.test.ClientBase;
 import org.apache.zookeeper.test.TestUtils;
+import org.apache.zookeeper.txn.CreateTxn;
 import org.apache.zookeeper.txn.SetDataTxn;
 import org.apache.zookeeper.txn.TxnHeader;
 import org.junit.After;
@@ -32,6 +38,8 @@ import org.junit.Assert;
 import org.junit.Test;
 
 import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
@@ -271,4 +279,63 @@ public class FileTxnSnapLogTest {
 
         createFileTxnSnapLogWithNoAutoCreateDataDir(logDir, snapDir);
     }
+
+    /**
+     * Make sure the ACL is exist in the ACL map after SNAP syncing.
+     *
+     * ZooKeeper uses ACL reference id and count to save the space in snapshot.
+     * During fuzzy snapshot sync, the reference count may not be updated
+     * correctly in case like the znode is already exist.
+     *
+     * When ACL reference count reaches 0, it will be deleted from the cache,
+     * but actually there might be other nodes still using it. When visiting
+     * a node with the deleted ACL id, it will be rejected because it doesn't
+     * exist anymore.
+     *
+     * Here is the detailed flow for one of the scenario here:
+     *   1. Server A starts to have snap sync with leader
+     *   2. After serializing the ACL map to Server A, there is a txn T1 to
+     *      create a node N1 with new ACL_1 which was not exist in ACL map
+     *   3. On leader, after this txn, the ACL map will be ID1 -> (ACL_1, COUNT: 1),
+     *      and data tree N1 -> ID1
+     *   4. On server A, it will be empty ACL map, and N1 -> ID1 in fuzzy snapshot
+     *   5. When replaying the txn T1, it will skip at the beginning since the
+     *      node is already exist, which leaves an empty ACL map, and N1 is
+     *      referencing to a non-exist ACL ID1
+     *   6. Node N1 will be not accessible because the ACL not exist, and if it
+     *      became leader later then all the write requests will be rejected as
+     *      well with marshalling error.
+     */
+    @Test
+    public void testACLCreatedDuringFuzzySnapshotSync() throws IOException {
+        DataTree leaderDataTree = new DataTree();
+
+        // Start the simulated snap-sync by serializing ACL cache.
+        File file = File.createTempFile("snapshot", "zk");
+        FileOutputStream os = new FileOutputStream(file);
+        OutputArchive oa = BinaryOutputArchive.getArchive(os);
+        leaderDataTree.serializeAcls(oa);
+
+        // Add couple of transaction in-between.
+        TxnHeader hdr1 = new TxnHeader(1, 2, 2, 2, ZooDefs.OpCode.create);
+        Record txn1 = new CreateTxn("/a1", "foo".getBytes(), ZooDefs.Ids.CREATOR_ALL_ACL, false, -1);
+        leaderDataTree.processTxn(hdr1, txn1);
+
+        // Finish the snapshot.
+        leaderDataTree.serializeNodes(oa);
+        os.close();
+
+        // Simulate restore on follower and replay.
+        FileInputStream is = new FileInputStream(file);
+        InputArchive ia = BinaryInputArchive.getArchive(is);
+        DataTree followerDataTree = new DataTree();
+        followerDataTree.deserialize(ia, "tree");
+        followerDataTree.processTxn(hdr1, txn1);
+
+        DataNode a1 = leaderDataTree.getNode("/a1");
+        Assert.assertNotNull(a1);
+        Assert.assertEquals(ZooDefs.Ids.CREATOR_ALL_ACL, leaderDataTree.getACL(a1));
+
+        Assert.assertEquals(ZooDefs.Ids.CREATOR_ALL_ACL, followerDataTree.getACL(a1));
+    }
 }