소스 검색

ZOOKEEPER-3546 - Allow optional deletion of never used Container Nodes

Edge cases can cause Container Nodes to never be deleted. i.e. if the container node is created and then the client that create the node crashes the container will not get deleted unless another client creates a node inside of it. This is because the initial implementation does not delete container nodes with a cversion of 0. This PR adds a new system property, "znode.container.maxNeverUsedIntervalMs", that can be set to delete containers with a cversion of 0 that have been retained for a period of time. This is a backward compatible change as the default value for this is Long.MAX_VALUE - i.e. never.

Author: randgalt <jordan@jordanzimmerman.com>

Reviewers: Enrico Olivelli <eolivelli@apache.org>, Fangmin Lyu <fangmin@apache.org>

Closes #1138 from Randgalt/ZOOKEEPER-3546-allow-delete-of-never-used-containers
randgalt 5 년 전
부모
커밋
4132b64b36

+ 9 - 0
zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md

@@ -1611,6 +1611,15 @@ Both subsystems need to have sufficient amount of threads to achieve peak read t
     minute. This prevents herding during container deletion.
     Default is "10000".
 
+* *znode.container.maxNeverUsedIntervalMs* :
+    (Java system property only)
+    **New in 3.6.0:** The
+    maximum interval in milliseconds that a container that has never had
+    any children is retained. Should be long enough for your client to
+    create the container, do any needed work and then create children.
+    Default is "0" which is used to indicate that containers
+    that have never had any children are never deleted.
+
 <a name="sc_debug_observability_config"></a>
 
 #### Debug Observability Configurations

+ 43 - 9
zookeeper-server/src/main/java/org/apache/zookeeper/server/ContainerManager.java

@@ -46,6 +46,7 @@ public class ContainerManager {
     private final RequestProcessor requestProcessor;
     private final int checkIntervalMs;
     private final int maxPerMinute;
+    private final long maxNeverUsedIntervalMs;
     private final Timer timer;
     private final AtomicReference<TimerTask> task = new AtomicReference<TimerTask>(null);
 
@@ -58,13 +59,28 @@ public class ContainerManager {
      *                     herding of container deletions
      */
     public ContainerManager(ZKDatabase zkDb, RequestProcessor requestProcessor, int checkIntervalMs, int maxPerMinute) {
+        this(zkDb, requestProcessor, checkIntervalMs, maxPerMinute, Long.MAX_VALUE);
+    }
+
+    /**
+     * @param zkDb the ZK database
+     * @param requestProcessor request processer - used to inject delete
+     *                         container requests
+     * @param checkIntervalMs how often to check containers in milliseconds
+     * @param maxPerMinute the max containers to delete per second - avoids
+     *                     herding of container deletions
+     * @param maxNeverUsedIntervalMs the max time in milliseconds that a container that has never had
+     *                                  any children is retained
+     */
+    public ContainerManager(ZKDatabase zkDb, RequestProcessor requestProcessor, int checkIntervalMs, int maxPerMinute, long maxNeverUsedIntervalMs) {
         this.zkDb = zkDb;
         this.requestProcessor = requestProcessor;
         this.checkIntervalMs = checkIntervalMs;
         this.maxPerMinute = maxPerMinute;
+        this.maxNeverUsedIntervalMs = maxNeverUsedIntervalMs;
         timer = new Timer("ContainerManagerTask", true);
 
-        LOG.info("Using checkIntervalMs={} maxPerMinute={}", checkIntervalMs, maxPerMinute);
+        LOG.info("Using checkIntervalMs={} maxPerMinute={} maxNeverUsedIntervalMs={}", checkIntervalMs, maxPerMinute, maxNeverUsedIntervalMs);
     }
 
     /**
@@ -116,7 +132,7 @@ public class ContainerManager {
             Request request = new Request(null, 0, 0, ZooDefs.OpCode.deleteContainer, path, null);
             try {
                 LOG.info("Attempting to delete candidate container: {}", containerPath);
-                requestProcessor.processRequest(request);
+                postDeleteRequest(request);
             } catch (Exception e) {
                 LOG.error("Could not delete container: {}", containerPath, e);
             }
@@ -129,6 +145,11 @@ public class ContainerManager {
         }
     }
 
+    // VisibleForTesting
+    protected void postDeleteRequest(Request request) throws RequestProcessor.RequestProcessorException {
+        requestProcessor.processRequest(request);
+    }
+
     // VisibleForTesting
     protected long getMinIntervalMs() {
         return TimeUnit.MINUTES.toMillis(1) / maxPerMinute;
@@ -139,12 +160,26 @@ public class ContainerManager {
         Set<String> candidates = new HashSet<String>();
         for (String containerPath : zkDb.getDataTree().getContainers()) {
             DataNode node = zkDb.getDataTree().getNode(containerPath);
-            /*
-                cversion > 0: keep newly created containers from being deleted
-                before any children have been added. If you were to create the
-                container just before a container cleaning period the container
-                would be immediately be deleted.
-             */
+            if ((node != null) && node.getChildren().isEmpty()) {
+                /*
+                    cversion > 0: keep newly created containers from being deleted
+                    before any children have been added. If you were to create the
+                    container just before a container cleaning period the container
+                    would be immediately be deleted.
+                 */
+                if (node.stat.getCversion() > 0) {
+                    candidates.add(containerPath);
+                } else {
+                    /*
+                        Users may not want unused containers to live indefinitely. Allow a system
+                        property to be set that sets the max time for a cversion-0 container
+                        to stay before being deleted
+                     */
+                    if ((maxNeverUsedIntervalMs != 0) && (getElapsed(node) > maxNeverUsedIntervalMs)) {
+                        candidates.add(containerPath);
+                    }
+                }
+            }
             if ((node != null) && (node.stat.getCversion() > 0) && (node.getChildren().isEmpty())) {
                 candidates.add(containerPath);
             }
@@ -155,7 +190,6 @@ public class ContainerManager {
                 Set<String> children = node.getChildren();
                 if (children.isEmpty()) {
                     if (EphemeralType.get(node.stat.getEphemeralOwner()) == EphemeralType.TTL) {
-                        long elapsed = getElapsed(node);
                         long ttl = EphemeralType.TTL.getValue(node.stat.getEphemeralOwner());
                         if ((ttl != 0) && (getElapsed(node) > ttl)) {
                             candidates.add(ttlPath);

+ 3 - 1
zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServerMain.java

@@ -169,7 +169,9 @@ public class ZooKeeperServerMain {
                 zkServer.getZKDatabase(),
                 zkServer.firstProcessor,
                 Integer.getInteger("znode.container.checkIntervalMs", (int) TimeUnit.MINUTES.toMillis(1)),
-                Integer.getInteger("znode.container.maxPerMinute", 10000));
+                Integer.getInteger("znode.container.maxPerMinute", 10000),
+                Long.getLong("znode.container.maxNeverUsedIntervalMs", 0)
+            );
             containerManager.start();
             ZKAuditProvider.addZKStartStopAuditLog();
 

+ 3 - 1
zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/LeaderZooKeeperServer.java

@@ -81,7 +81,9 @@ public class LeaderZooKeeperServer extends QuorumZooKeeperServer {
             getZKDatabase(),
             prepRequestProcessor,
             Integer.getInteger("znode.container.checkIntervalMs", (int) TimeUnit.MINUTES.toMillis(1)),
-            Integer.getInteger("znode.container.maxPerMinute", 10000));
+            Integer.getInteger("znode.container.maxPerMinute", 10000),
+            Long.getLong("znode.container.maxNeverUsedIntervalMs", 0)
+        );
     }
 
     @Override

+ 71 - 11
zookeeper-server/src/test/java/org/apache/zookeeper/server/CreateContainerTest.java

@@ -31,7 +31,10 @@ import java.util.concurrent.Callable;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.Executors;
 import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.Semaphore;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
 import org.apache.zookeeper.AsyncCallback;
 import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.KeeperException;
@@ -45,11 +48,24 @@ import org.junit.Test;
 public class CreateContainerTest extends ClientBase {
 
     private ZooKeeper zk;
+    private Semaphore completedContainerDeletions;
 
     @Override
     public void setUp() throws Exception {
         super.setUp();
         zk = createClient();
+
+        completedContainerDeletions = new Semaphore(0);
+        ZKDatabase testDatabase = new ZKDatabase(serverFactory.zkServer.getZKDatabase().snapLog) {
+            @Override
+            public void addCommittedProposal(Request request) {
+                super.addCommittedProposal(request);
+                if (request.type == ZooDefs.OpCode.deleteContainer) {
+                    completedContainerDeletions.release();
+                }
+            }
+        };
+        serverFactory.zkServer.setZKDatabase(testDatabase);
     }
 
     @Override
@@ -95,8 +111,7 @@ public class CreateContainerTest extends ClientBase {
         ContainerManager containerManager = new ContainerManager(serverFactory.getZooKeeperServer().getZKDatabase(), serverFactory.getZooKeeperServer().firstProcessor, 1, 100);
         containerManager.checkContainers();
 
-        Thread.sleep(1000);
-
+        assertTrue(completedContainerDeletions.tryAcquire(1, TimeUnit.SECONDS));
         assertNull("Container should have been deleted", zk.exists("/foo", false));
     }
 
@@ -123,8 +138,7 @@ public class CreateContainerTest extends ClientBase {
         ContainerManager containerManager = new ContainerManager(serverFactory.getZooKeeperServer().getZKDatabase(), serverFactory.getZooKeeperServer().firstProcessor, 1, 100);
         containerManager.checkContainers();
 
-        Thread.sleep(1000);
-
+        assertTrue(completedContainerDeletions.tryAcquire(1, TimeUnit.SECONDS));
         assertNull("Container should have been deleted", zk.exists("/foo", false));
 
         createContainer = Op.create("/foo", new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.CONTAINER);
@@ -134,8 +148,7 @@ public class CreateContainerTest extends ClientBase {
 
         containerManager.checkContainers();
 
-        Thread.sleep(1000);
-
+        assertTrue(completedContainerDeletions.tryAcquire(1, TimeUnit.SECONDS));
         assertNull("Container should have been deleted", zk.exists("/foo", false));
     }
 
@@ -157,8 +170,7 @@ public class CreateContainerTest extends ClientBase {
         ContainerManager containerManager = new ContainerManager(serverFactory.getZooKeeperServer().getZKDatabase(), serverFactory.getZooKeeperServer().firstProcessor, 1, 100);
         containerManager.checkContainers();
 
-        Thread.sleep(1000);
-
+        assertTrue(completedContainerDeletions.tryAcquire(1, TimeUnit.SECONDS));
         assertNull("Container should have been deleted", zk.exists("/foo", false));
     }
 
@@ -171,9 +183,9 @@ public class CreateContainerTest extends ClientBase {
 
         ContainerManager containerManager = new ContainerManager(serverFactory.getZooKeeperServer().getZKDatabase(), serverFactory.getZooKeeperServer().firstProcessor, 1, 100);
         containerManager.checkContainers();
-        Thread.sleep(1000);
+        assertTrue(completedContainerDeletions.tryAcquire(1, TimeUnit.SECONDS));
         containerManager.checkContainers();
-        Thread.sleep(1000);
+        assertTrue(completedContainerDeletions.tryAcquire(1, TimeUnit.SECONDS));
 
         assertNull("Container should have been deleted", zk.exists("/foo/bar", false));
         assertNull("Container should have been deleted", zk.exists("/foo", false));
@@ -191,8 +203,8 @@ public class CreateContainerTest extends ClientBase {
             }
         };
         containerManager.checkContainers();
-        Thread.sleep(1000);
 
+        assertTrue(completedContainerDeletions.tryAcquire(1, TimeUnit.SECONDS));
         assertNotNull("Container should have not been deleted", zk.exists("/foo", false));
     }
 
@@ -237,6 +249,54 @@ public class CreateContainerTest extends ClientBase {
         assertEquals(queue.poll(5, TimeUnit.SECONDS), "/four");
     }
 
+    @Test(timeout = 30000)
+    public void testMaxNeverUsedInterval() throws KeeperException, InterruptedException {
+        zk.create("/foo", new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.CONTAINER);
+        AtomicLong elapsed = new AtomicLong(0);
+        AtomicInteger deletesQty = new AtomicInteger(0);
+        ContainerManager containerManager = new ContainerManager(serverFactory.getZooKeeperServer().getZKDatabase(), serverFactory.getZooKeeperServer().firstProcessor, 1, 100, 1000) {
+            @Override
+            protected void postDeleteRequest(Request request) throws RequestProcessor.RequestProcessorException {
+                deletesQty.incrementAndGet();
+                super.postDeleteRequest(request);
+            }
+
+            @Override
+            protected long getElapsed(DataNode node) {
+                return elapsed.get();
+            }
+        };
+        containerManager.checkContainers(); // elapsed time will appear to be 0 - container will not get deleted
+        assertEquals(deletesQty.get(), 0);
+        assertNotNull("Container should not have been deleted", zk.exists("/foo", false));
+
+        elapsed.set(10000);
+        containerManager.checkContainers(); // elapsed time will appear to be 10000 - container should get deleted
+        assertTrue(completedContainerDeletions.tryAcquire(1, TimeUnit.SECONDS));
+        assertNull("Container should have been deleted", zk.exists("/foo", false));
+    }
+
+    @Test(timeout = 30000)
+    public void testZeroMaxNeverUsedInterval() throws KeeperException, InterruptedException {
+        zk.create("/foo", new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.CONTAINER);
+        AtomicInteger deletesQty = new AtomicInteger(0);
+        ContainerManager containerManager = new ContainerManager(serverFactory.getZooKeeperServer().getZKDatabase(), serverFactory.getZooKeeperServer().firstProcessor, 1, 100, 0) {
+            @Override
+            protected void postDeleteRequest(Request request) throws RequestProcessor.RequestProcessorException {
+                deletesQty.incrementAndGet();
+                super.postDeleteRequest(request);
+            }
+
+            @Override
+            protected long getElapsed(DataNode node) {
+                return 10000;   // some number greater than 0
+            }
+        };
+        containerManager.checkContainers(); // elapsed time will appear to be 0 - container will not get deleted
+        assertEquals(deletesQty.get(), 0);
+        assertNotNull("Container should not have been deleted", zk.exists("/foo", false));
+    }
+
     private void createNoStatVerifyResult(String newName) throws KeeperException, InterruptedException {
         assertNull("Node existed before created", zk.exists(newName, false));
         zk.create(newName, newName.getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.CONTAINER);