6 years ago · d6a12a80ba
--- a/pom.xml
+++ b/pom.xml
@@ -0,0 +1,236 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
			
 
				+<!--
			
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+-->
			
 
				+  <modelVersion>4.0.0</modelVersion>
			
 
				+  <parent>
			
 
				+    <groupId>org.apache</groupId>
			
 
				+    <artifactId>apache</artifactId>
			
 
				+    <version>18</version>
			
 
				+    <relativePath/>
			
 
				+    <!-- no parent resolution -->
			
 
				+  </parent>
			
 
				+  <groupId>org.apache.zookeeper</groupId>
			
 
				+  <artifactId>zookeeper</artifactId>
			
 
				+  <packaging>pom</packaging>
			
 
				+  <version>2.6.0-SNAPSHOT</version>
			
 
				+  <name>Apache ZooKeeper</name>
			
 
				+  <description>
			
 
				+    ZooKeeper is a centralized service for maintaining configuration information, naming,
			
 
				+    providing distributed synchronization, and providing group services. All of these kinds
			
 
				+    of services are used in some form or another by distributed applications. Each time they
			
 
				+    are implemented there is a lot of work that goes into fixing the bugs and race conditions
			
 
				+    that are inevitable. Because of the difficulty of implementing these kinds of services,
			
 
				+    applications initially usually skimp on them ,which make them brittle in the presence of
			
 
				+    change and difficult to manage. Even when done correctly, different implementations of
			
 
				+    these services lead to management complexity when the applications are deployed.
			
 
				+  </description>
			
 
				+  <url>http://zookeeper.apache.org</url>
			
 
				+  <inceptionYear>2008</inceptionYear>
			
 
				+  <!-- Set here so we can consistently use the correct name, even on branches with
			
 
				+       an ASF parent pom older than v15. Also uses the url from v18.
			
 
				+    -->
			
 
				+  <licenses>
			
 
				+    <license>
			
 
				+      <name>Apache License, Version 2.0</name>
			
 
				+      <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
			
 
				+      <distribution>repo</distribution>
			
 
				+    </license>
			
 
				+  </licenses>
			
 
				+
			
 
				+  <modules>
			
 
				+    <module>zookeeper-docs</module>
			
 
				+  </modules>
			
 
				+  <scm>
			
 
				+    <connection>scm:git:git://git.apache.org/zookeeper.git</connection>
			
 
				+    <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/zookeeper.git</developerConnection>
			
 
				+    <url>https://git-wip-us.apache.org/repos/asf?p=zookeeper.git</url>
			
 
				+  </scm>
			
 
				+  <issueManagement>
			
 
				+    <system>JIRA</system>
			
 
				+    <url>http://issues.apache.org/jira/browse/ZOOKEEPER</url>
			
 
				+  </issueManagement>
			
 
				+  <ciManagement>
			
 
				+    <system>hudson</system>
			
 
				+    <url>http://hudson.zones.apache.org/hudson/view/ZooKeeper/job/ZooKeeper-TRUNK/</url>
			
 
				+  </ciManagement>
			
 
				+  <mailingLists>
			
 
				+    <mailingList>
			
 
				+      <name>User List</name>
			
 
				+      <subscribe>user-subscribe@zookeeper.apache.org</subscribe>
			
 
				+      <unsubscribe>user-unsubscribe@zookeeper.apache.org</unsubscribe>
			
 
				+      <post>user@zookeeper.apache.org</post>
			
 
				+      <archive>http://mail-archives.apache.org/mod_mbox/zookeeper-user/</archive>
			
 
				+    </mailingList>
			
 
				+    <mailingList>
			
 
				+      <name>Developer List</name>
			
 
				+      <subscribe>dev-subscribe@zookeeper.apache.org</subscribe>
			
 
				+      <unsubscribe>dev-unsubscribe@zookeeper.apache.org</unsubscribe>
			
 
				+      <post>dev@zookeeper.apache.org</post>
			
 
				+      <archive>http://mail-archives.apache.org/mod_mbox/zookeeper-dev/</archive>
			
 
				+    </mailingList>
			
 
				+    <mailingList>
			
 
				+      <name>Commits List</name>
			
 
				+      <subscribe>commits-subscribe@zookeeper.apache.org</subscribe>
			
 
				+      <unsubscribe>commits-unsubscribe@zookeeper.apache.org</unsubscribe>
			
 
				+      <archive>http://mail-archives.apache.org/mod_mbox/zookeeper-commits/</archive>
			
 
				+    </mailingList>
			
 
				+    <mailingList>
			
 
				+      <name>Issues List</name>
			
 
				+      <subscribe>issues-subscribe@zookeeper.apache.org</subscribe>
			
 
				+      <unsubscribe>issues-unsubscribe@zookeeper.apache.org</unsubscribe>
			
 
				+      <archive>http://mail-archives.apache.org/mod_mbox/zookeeper-issues/</archive>
			
 
				+    </mailingList>
			
 
				+    <mailingList>
			
 
				+      <name>Builds List</name>
			
 
				+      <subscribe>builds-subscribe@zookeeper.apache.org</subscribe>
			
 
				+      <unsubscribe>builds-unsubscribe@zookeeper.apache.org</unsubscribe>
			
 
				+      <archive>http://mail-archives.apache.org/mod_mbox/zookeeper-builds/</archive>
			
 
				+    </mailingList>
			
 
				+  </mailingLists>
			
 
				+  <developers>
			
 
				+    <developer>
			
 
				+      <id>tdunning</id>
			
 
				+      <name>Ted Dunning	</name>
			
 
				+      <email>tdunning@apache.org</email>
			
 
				+      <timezone>-8</timezone>
			
 
				+    </developer>
			
 
				+    <developer>
			
 
				+      <id>camille</id>
			
 
				+      <name>Camille Fournier</name>
			
 
				+      <email>camille@apache.org</email>
			
 
				+      <timezone>-5</timezone>
			
 
				+    </developer>
			
 
				+    <developer>
			
 
				+      <id>phunt</id>
			
 
				+      <name>Patrick Hunt</name>
			
 
				+      <email>phunt@apache.org</email>
			
 
				+      <timezone>-8</timezone>
			
 
				+    </developer>
			
 
				+    <developer>
			
 
				+      <id>fpj</id>
			
 
				+      <name>Flavio Junqueira</name>
			
 
				+      <email>fpj@apache.org</email>
			
 
				+      <timezone>+0</timezone>
			
 
				+    </developer>
			
 
				+    <developer>
			
 
				+      <id>ivank</id>
			
 
				+      <name>Ivan Kelly</name>
			
 
				+      <email>ivank@apache.org</email>
			
 
				+      <timezone>+2</timezone>
			
 
				+    </developer>
			
 
				+    <developer>
			
 
				+      <id>mahadev</id>
			
 
				+      <name>Mahadev Konar</name>
			
 
				+      <email>mahadev@apache.org</email>
			
 
				+      <timezone>-8</timezone>
			
 
				+    </developer>
			
 
				+    <developer>
			
 
				+      <id>michim</id>
			
 
				+      <name>Michi Mutsuzaki</name>
			
 
				+      <email>michim@apache.org</email>
			
 
				+      <timezone>-8</timezone>
			
 
				+    </developer>
			
 
				+    <developer>
			
 
				+      <id>cnauroth</id>
			
 
				+      <name>Chris Nauroth</name>
			
 
				+      <email>cnauroth@apache.org</email>
			
 
				+      <timezone>-8</timezone>
			
 
				+    </developer>
			
 
				+    <developer>
			
 
				+      <id>breed</id>
			
 
				+      <name>Benjamin Reed</name>
			
 
				+      <email>breed@apache.org</email>
			
 
				+      <timezone>-8</timezone>
			
 
				+    </developer>
			
 
				+    <developer>
			
 
				+      <id>henry</id>
			
 
				+      <name>Henry Robinson</name>
			
 
				+      <email>henry@apache.org</email>
			
 
				+      <timezone>-8</timezone>
			
 
				+    </developer>
			
 
				+    <developer>
			
 
				+      <id>rgs</id>
			
 
				+      <name>Raul Gutierrez Segales</name>
			
 
				+      <email>rgs@apache.org</email>
			
 
				+      <timezone>-8</timezone>
			
 
				+    </developer>
			
 
				+    <developer>
			
 
				+      <id>rakeshr</id>
			
 
				+      <name>Rakesh Radhakrishnan</name>
			
 
				+      <email>rakeshr@apache.org</email>
			
 
				+      <timezone>+5:30</timezone>
			
 
				+    </developer>
			
 
				+    <developer>
			
 
				+      <id>hanm</id>
			
 
				+      <name>Michael Han</name>
			
 
				+      <email>hanm@apache.org</email>
			
 
				+      <timezone>-8</timezone>
			
 
				+    </developer>
			
 
				+    <developer>
			
 
				+      <id>gkesavan</id>
			
 
				+      <name>Giridharan Kesavan</name>
			
 
				+      <email>gkesavan@apache.org</email>
			
 
				+      <timezone>-8</timezone>
			
 
				+    </developer>
			
 
				+    <developer>
			
 
				+      <id>akornev</id>
			
 
				+      <name>Andrew Kornev</name>
			
 
				+      <email>akornev@apache.org</email>
			
 
				+    </developer>
			
 
				+    <developer>
			
 
				+      <id>shralex</id>
			
 
				+      <name>Alex Shraer</name>
			
 
				+      <email>shralex@apache.org</email>
			
 
				+      <timezone>-8</timezone>
			
 
				+    </developer>
			
 
				+    <developer>
			
 
				+      <id>thawan</id>
			
 
				+      <name>Thawan Kooburat</name>
			
 
				+      <email>thawan@apache.org</email>
			
 
				+      <timezone>-8</timezone>
			
 
				+    </developer>
			
 
				+    <developer>
			
 
				+      <id>hdeng</id>
			
 
				+      <name>Hongchao Deng</name>
			
 
				+      <email>hdeng@apache.org</email>
			
 
				+      <timezone>-8</timezone>
			
 
				+    </developer>
			
 
				+    <developer>
			
 
				+      <id>arshad</id>
			
 
				+      <name>Mohammad Arshad</name>
			
 
				+      <email>arshad@apache.org</email>
			
 
				+      <timezone>+5:30</timezone>
			
 
				+    </developer>
			
 
				+    <developer>
			
 
				+      <id>afine</id>
			
 
				+      <name>Abraham Fine</name>
			
 
				+      <email>afine@apache.org</email>
			
 
				+      <timezone>-8</timezone>
			
 
				+    </developer>
			
 
				+    <developer>
			
 
				+      <id>andor</id>
			
 
				+      <name>Andor Molnar</name>
			
 
				+      <email>andor@apache.org</email>
			
 
				+      <timezone>+1</timezone>
			
 
				+    </developer>
			
 
				+  </developers>
			
 
				+
			
 
				+</project>
			
--- a/zookeeper-docs/pom.xml
+++ b/zookeeper-docs/pom.xml
@@ -0,0 +1,61 @@
 
				+<?xml version="1.0"?>
			
 
				+<project xmlns="http://maven.apache.org/POM/4.0.0"
			
 
				+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
			
 
				+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
			
 
				+    <!--
			
 
				+    /**
			
 
				+     * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+     * or more contributor license agreements.  See the NOTICE file
			
 
				+     * distributed with this work for additional information
			
 
				+     * regarding copyright ownership.  The ASF licenses this file
			
 
				+     * to you under the Apache License, Version 2.0 (the
			
 
				+     * "License"); you may not use this file except in compliance
			
 
				+     * with the License.  You may obtain a copy of the License at
			
 
				+     *
			
 
				+     *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+     *
			
 
				+     * Unless required by applicable law or agreed to in writing, software
			
 
				+     * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+     * See the License for the specific language governing permissions and
			
 
				+     * limitations under the License.
			
 
				+     */
			
 
				+    -->
			
 
				+    <modelVersion>4.0.0</modelVersion>
			
 
				+    <parent>
			
 
				+        <groupId>org.apache.zookeeper</groupId>
			
 
				+        <artifactId>zookeeper</artifactId>
			
 
				+        <version>2.6.0-SNAPSHOT</version>
			
 
				+        <relativePath>..</relativePath>
			
 
				+    </parent>
			
 
				+
			
 
				+    <groupId>org.apache.zookeeper</groupId>
			
 
				+    <artifactId>zookeeper-docs</artifactId>
			
 
				+    <version>2.6.0-SNAPSHOT</version>
			
 
				+    <name>Apache ZooKeeper - Documentation</name>
			
 
				+    <description>Documentation</description>
			
 
				+
			
 
				+    <build>
			
 
				+        <plugins>
			
 
				+            <plugin>
			
 
				+                <groupId>com.ruleoftech</groupId>
			
 
				+                <artifactId>markdown-page-generator-plugin</artifactId>
			
 
				+                <version>0.10</version>
			
 
				+                <executions>
			
 
				+                    <execution>
			
 
				+                        <phase>process-sources</phase>
			
 
				+                        <goals>
			
 
				+                            <goal>generate</goal>
			
 
				+                        </goals>
			
 
				+                    </execution>
			
 
				+                </executions>
			
 
				+                <configuration>
			
 
				+                    <headerHtmlFile>${project.basedir}/src/main/resources/markdown/html/header.html</headerHtmlFile>
			
 
				+                    <footerHtmlFile>${project.basedir}/src/main/resources/markdown/html/footer.html</footerHtmlFile>
			
 
				+                    <copyDirectories>images,skin</copyDirectories>
			
 
				+                </configuration>
			
 
				+            </plugin>
			
 
				+        </plugins>
			
 
				+    </build>
			
 
				+
			
 
				+</project>
			
--- a/zookeeper-docs/src/main/resources/markdown/html/footer.html
+++ b/zookeeper-docs/src/main/resources/markdown/html/footer.html
@@ -0,0 +1,18 @@
 
				+</div>
			
 
				+<div class="clearboth">&nbsp;</div>
			
 
				+</div>
			
 
				+<div id="footer">
			
 
				+    <div class="lastmodified">
			
 
				+        <script type="text/javascript">
			
 
				+        <!--
			
 
				+            document.write("Last Published: " + document.lastModified);
			
 
				+        //  -->
			
 
				+        </script>
			
 
				+    </div>
			
 
				+    <div class="copyright">
			
 
				+        Copyright &copy; <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
			
 
				+    </div>
			
 
				+    <div id="logos"></div>
			
 
				+</div>
			
 
				+</body>
			
 
				+</html>
			
--- a/zookeeper-docs/src/main/resources/markdown/html/header.html
+++ b/zookeeper-docs/src/main/resources/markdown/html/header.html
@@ -0,0 +1,128 @@
 
				+
			
 
				+<!DOCTYPE html>
			
 
				+<html>
			
 
				+<head>
			
 
				+    <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
			
 
				+    <title>ZooKeeper: Because Coordinating Distributed Systems is a Zoo</title>
			
 
				+    <link type="text/css" href="skin/basic.css" rel="stylesheet">
			
 
				+    <link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
			
 
				+    <link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
			
 
				+    <link type="text/css" href="skin/profile.css" rel="stylesheet">
			
 
				+    <script src="skin/getBlank.js" language="javascript" type="text/javascript"></script>
			
 
				+    <script src="skin/getMenu.js" language="javascript" type="text/javascript"></script>
			
 
				+    <script src="skin/init.js" language="javascript" type="text/javascript"></script>
			
 
				+    <link rel="shortcut icon" href="images/favicon.ico">
			
 
				+</head>
			
 
				+<body onload="init();">
			
 
				+<div id="top">
			
 
				+    <div class="breadtrail">
			
 
				+        <a href="http://www.apache.org/">Apache</a> &gt; <a href="http://zookeeper.apache.org/">ZooKeeper</a>
			
 
				+    </div>
			
 
				+    <div class="header">
			
 
				+        <div class="grouplogo">
			
 
				+            <a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
			
 
				+        </div>
			
 
				+        <div class="projectlogo">
			
 
				+            <a href="http://zookeeper.apache.org/"><img class="logoImage" alt="ZooKeeper" src="images/zookeeper_small.gif" title="ZooKeeper: distributed coordination"></a>
			
 
				+        </div>
			
 
				+        <div class="searchbox">
			
 
				+            <form action="http://www.google.com/search" method="get">
			
 
				+                <input value="zookeeper.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp;
			
 
				+                <input name="Search" value="Search" type="submit">
			
 
				+            </form>
			
 
				+        </div>
			
 
				+        <ul id="tabs">
			
 
				+            <li>
			
 
				+                <a class="unselected" href="http://zookeeper.apache.org/">Project</a>
			
 
				+            </li>
			
 
				+            <li>
			
 
				+                <a class="unselected" href="https://cwiki.apache.org/confluence/display/ZOOKEEPER/">Wiki</a>
			
 
				+            </li>
			
 
				+            <li class="current">
			
 
				+                <a class="selected" href="index.html">ZooKeeper 3.6 Documentation</a>
			
 
				+            </li>
			
 
				+        </ul>
			
 
				+    </div>
			
 
				+</div>
			
 
				+<div id="main">
			
 
				+    <div id="publishedStrip">
			
 
				+        <div id="level2tabs"></div>
			
 
				+        <script type="text/javascript"><!--
			
 
				+document.write("Last Published: " + document.lastModified);
			
 
				+//  --></script>
			
 
				+    </div>
			
 
				+    <div class="breadtrail">
			
 
				+        &nbsp;
			
 
				+    </div>
			
 
				+    <div id="menu">
			
 
				+        <div onclick="SwitchMenu('menu_1', 'skin/')" id="menu_1Title" class="menutitle">Overview</div>
			
 
				+        <div id="menu_1" class="menuitemgroup">
			
 
				+            <div class="menuitem">
			
 
				+                <a href="index.html">Welcome</a>
			
 
				+            </div>
			
 
				+            <div class="menuitem">
			
 
				+                <a href="zookeeperOver.html">Overview</a>
			
 
				+            </div>
			
 
				+            <div class="menuitem">
			
 
				+                <a href="zookeeperStarted.html">Getting Started</a>
			
 
				+            </div>
			
 
				+            <div class="menuitem">
			
 
				+                <a href="releasenotes.html">Release Notes</a>
			
 
				+            </div>
			
 
				+        </div>
			
 
				+        <div onclick="SwitchMenu('menu_2', 'skin/')" id="menu_2Title" class="menutitle">Developer</div>
			
 
				+        <div id="menu_2" class="menuitemgroup">
			
 
				+            <div class="menuitem">
			
 
				+                <a href="api/index.html">API Docs</a>
			
 
				+            </div>
			
 
				+            <div class="menuitem">
			
 
				+                <a href="zookeeperProgrammers.html">Programmer's Guide</a>
			
 
				+            </div>
			
 
				+            <div class="menuitem">
			
 
				+                <a href="javaExample.html">Java Example</a>
			
 
				+            </div>
			
 
				+            <div class="menuitem">
			
 
				+                <a href="zookeeperTutorial.html">Barrier and Queue Tutorial</a>
			
 
				+            </div>
			
 
				+            <div class="menuitem">
			
 
				+                <a href="recipes.html">Recipes</a>
			
 
				+            </div>
			
 
				+        </div>
			
 
				+        <div onclick="SwitchMenu('menu_3', 'skin/')" id="menu_3Title" class="menutitle">Admin &amp; Ops</div>
			
 
				+        <div id="menu_3" class="menuitemgroup">
			
 
				+            <div class="menuitem">
			
 
				+                <a href="zookeeperAdmin.html">Administrator's Guide</a>
			
 
				+            </div>
			
 
				+            <div class="menuitem">
			
 
				+                <a href="zookeeperQuotas.html">Quota Guide</a>
			
 
				+            </div>
			
 
				+            <div class="menuitem">
			
 
				+                <a href="zookeeperJMX.html">JMX</a>
			
 
				+            </div>
			
 
				+            <div class="menuitem">
			
 
				+                <a href="zookeeperObservers.html">Observers Guide</a>
			
 
				+            </div>
			
 
				+            <div class="menuitem">
			
 
				+                <a href="zookeeperReconfig.html">Dynamic Reconfiguration</a>
			
 
				+            </div>
			
 
				+        </div>
			
 
				+        <div onclick="SwitchMenu('menu_4', 'skin/')" id="menu_4Title" class="menutitle">Contributor</div>
			
 
				+        <div id="menu_4" class="menuitemgroup">
			
 
				+            <div class="menuitem">
			
 
				+                <a href="zookeeperInternals.html">ZooKeeper Internals</a>
			
 
				+            </div>
			
 
				+        </div>
			
 
				+        <div onclick="SwitchMenu('menu_5', 'skin/')" id="menu_5Title" class="menutitle">Miscellaneous</div>
			
 
				+        <div id="menu_5" class="menuitemgroup">
			
 
				+            <div class="menuitem">
			
 
				+                <a href="https://cwiki.apache.org/confluence/display/ZOOKEEPER">Wiki</a>
			
 
				+            </div>
			
 
				+            <div class="menuitem">
			
 
				+                <a href="https://cwiki.apache.org/confluence/display/ZOOKEEPER/FAQ">FAQ</a>
			
 
				+            </div>
			
 
				+            <div class="menuitem">
			
 
				+                <a href="http://zookeeper.apache.org/mailing_lists.html">Mailing Lists</a>
			
 
				+            </div>
			
 
				+        </div>
			
 
				+    </div>
			
 
				+    <div id="content">
			
--- a/zookeeper-docs/src/main/resources/markdown/images/2pc.jpg
+++ b/zookeeper-docs/src/main/resources/markdown/images/2pc.jpg
--- a/zookeeper-docs/src/main/resources/markdown/images/bk-overview.jpg
+++ b/zookeeper-docs/src/main/resources/markdown/images/bk-overview.jpg
--- a/zookeeper-docs/src/main/resources/markdown/images/favicon.ico
+++ b/zookeeper-docs/src/main/resources/markdown/images/favicon.ico
--- a/zookeeper-docs/src/main/resources/markdown/images/hadoop-logo.jpg
+++ b/zookeeper-docs/src/main/resources/markdown/images/hadoop-logo.jpg
--- a/zookeeper-docs/src/main/resources/markdown/images/state_dia.dia
+++ b/zookeeper-docs/src/main/resources/markdown/images/state_dia.dia
--- a/zookeeper-docs/src/main/resources/markdown/images/state_dia.jpg
+++ b/zookeeper-docs/src/main/resources/markdown/images/state_dia.jpg
--- a/zookeeper-docs/src/main/resources/markdown/images/zkarch.jpg
+++ b/zookeeper-docs/src/main/resources/markdown/images/zkarch.jpg
--- a/zookeeper-docs/src/main/resources/markdown/images/zkcomponents.jpg
+++ b/zookeeper-docs/src/main/resources/markdown/images/zkcomponents.jpg
--- a/zookeeper-docs/src/main/resources/markdown/images/zknamespace.jpg
+++ b/zookeeper-docs/src/main/resources/markdown/images/zknamespace.jpg
--- a/zookeeper-docs/src/main/resources/markdown/images/zkperfRW-3.2.jpg
+++ b/zookeeper-docs/src/main/resources/markdown/images/zkperfRW-3.2.jpg
--- a/zookeeper-docs/src/main/resources/markdown/images/zkperfRW.jpg
+++ b/zookeeper-docs/src/main/resources/markdown/images/zkperfRW.jpg
--- a/zookeeper-docs/src/main/resources/markdown/images/zkperfreliability.jpg
+++ b/zookeeper-docs/src/main/resources/markdown/images/zkperfreliability.jpg
--- a/zookeeper-docs/src/main/resources/markdown/images/zkservice.jpg
+++ b/zookeeper-docs/src/main/resources/markdown/images/zkservice.jpg
--- a/zookeeper-docs/src/main/resources/markdown/images/zookeeper_small.gif
+++ b/zookeeper-docs/src/main/resources/markdown/images/zookeeper_small.gif
--- a/zookeeper-docs/src/main/resources/markdown/index.md
+++ b/zookeeper-docs/src/main/resources/markdown/index.md
@@ -0,0 +1,58 @@
 
				+<!--
			
 
				+Copyright 2002-2004 The Apache Software Foundation
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+//-->
			
 
				+
			
 
				+## ZooKeeper: Because Coordinating Distributed Systems is a Zoo
			
 
				+
			
 
				+ZooKeeper is a high-performance coordination service for
			
 
				+distributed applications.  It exposes common services - such as
			
 
				+naming, configuration management, synchronization, and group
			
 
				+services - in a simple interface so you don't have to write them
			
 
				+from scratch.  You can use it off-the-shelf to implement
			
 
				+consensus, group management, leader election, and presence
			
 
				+protocols. And you can build on it for your own, specific needs.
			
 
				+
			
 
				+The following documents describe concepts and procedures to get
			
 
				+you started using ZooKeeper. If you have more questions, please
			
 
				+ask the [mailing list](http://zookeeper.apache.org/mailing_lists.html) or browse the
			
 
				+archives.
			
 
				+
			
 
				++ **ZooKeeper Overview**
			
 
				+    Technical Overview Documents for Client Developers, Adminstrators, and Contributors
			
 
				+    + [Overview](zookeeperOver.html) - a bird's eye view of ZooKeeper, including design concepts and architecture
			
 
				+    + [Getting Started](zookeeperStarted.html) - a tutorial-style guide for developers to install, run, and program to ZooKeeper
			
 
				+    + [Release Notes](releasenotes.html) - new developer and user facing features, improvements, and incompatibilities
			
 
				++ **Developers**
			
 
				+    Documents for Developers using the ZooKeeper Client API
			
 
				+    + [API Docs](index.html) - the technical reference to ZooKeeper Client APIs
			
 
				+    + [Programmer's Guide](zookeeperProgrammers.html) - a client application developer's guide to ZooKeeper
			
 
				+    + [ZooKeeper Java Example](javaExample.html) - a simple Zookeeper client appplication, written in Java
			
 
				+    + [Barrier and Queue Tutorial](zookeeperTutorial.html) - sample implementations of barriers and queues
			
 
				+    + [ZooKeeper Recipes](recipes.html) - higher level solutions to common problems in distributed applications
			
 
				++ **Administrators & Operators**
			
 
				+    Documents for Administrators and Operations Engineers of ZooKeeper Deployments
			
 
				+    + [Administrator's Guide](zookeeperAdmin.html) - a guide for system administrators and anyone else who might deploy ZooKeeper
			
 
				+    + [Quota Guide](zookeeperQuotas.html) - a guide for system administrators on Quotas in ZooKeeper.
			
 
				+    + [JMX](zookeeperJMX.html) - how to enable JMX in ZooKeeper
			
 
				+    + [Hierarchical quorums](zookeeperHierarchicalQuorums.html)
			
 
				+    + [Observers](zookeeperObservers.html) - non-voting ensemble members that easily improve ZooKeeper's scalability
			
 
				+    + [Dynamic Reconfiguration](zookeeperReconfig.html) - a guide on how to use dynamic reconfiguration in ZooKeeper
			
 
				++ **Contributors**
			
 
				+    Documents for Developers Contributing to the ZooKeeper Open Source Project
			
 
				+    + [ZooKeeper Internals](zookeeperInternals.html) - assorted topics on the inner workings of ZooKeeper
			
 
				++ **Miscellaneous ZooKeeper Documentation**
			
 
				+    + [Wiki](https://cwiki.apache.org/confluence/display/ZOOKEEPER)
			
 
				+    + [FAQ](https://cwiki.apache.org/confluence/display/ZOOKEEPER/FAQ)
			
 
				+
			
--- a/zookeeper-docs/src/main/resources/markdown/javaExample.md
+++ b/zookeeper-docs/src/main/resources/markdown/javaExample.md
@@ -0,0 +1,627 @@
 
				+<!--
			
 
				+Copyright 2002-2004 The Apache Software Foundation
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+//-->
			
 
				+
			
 
				+# ZooKeeper Java Example
			
 
				+
			
 
				+* [A Simple Watch Client](#ch_Introduction)
			
 
				+    * [Requirements](#sc_requirements)
			
 
				+    * [Program Design](#sc_design)
			
 
				+* [The Executor Class](#sc_executor)
			
 
				+* [The DataMonitor Class](#sc_DataMonitor)
			
 
				+* [Complete Source Listings](#sc_completeSourceCode)
			
 
				+
			
 
				+<a name="ch_Introduction"></a>
			
 
				+
			
 
				+## A Simple Watch Client
			
 
				+
			
 
				+To introduce you to the ZooKeeper Java API, we develop here a very simple
			
 
				+watch client. This ZooKeeper client watches a ZooKeeper node for changes
			
 
				+and responds to by starting or stopping a program.
			
 
				+
			
 
				+<a name="sc_requirements"></a>
			
 
				+
			
 
				+### Requirements
			
 
				+
			
 
				+The client has four requirements:
			
 
				+
			
 
				+* It takes as parameters:
			
 
				+  * the address of the ZooKeeper service
			
 
				+  * the name of a znode - the one to be watched
			
 
				+  * the name of a file to write the output to
			
 
				+  * an executable with arguments.
			
 
				+* It fetches the data associated with the znode and starts the executable.
			
 
				+* If the znode changes, the client refetches the contents and restarts the executable.
			
 
				+* If the znode disappears, the client kills the executable.
			
 
				+
			
 
				+<a name="sc_design"></a>
			
 
				+
			
 
				+### Program Design
			
 
				+
			
 
				+Conventionally, ZooKeeper applications are broken into two units, one which maintains the connection,
			
 
				+and the other which monitors data.  In this application, the class called the **Executor**
			
 
				+maintains the ZooKeeper connection, and the class called the  **DataMonitor** monitors the data
			
 
				+in the ZooKeeper tree. Also, Executor contains the main thread and contains the execution logic.
			
 
				+It is responsible for what little user interaction there is, as well as interaction with the exectuable program you
			
 
				+pass in as an argument and which the sample (per the requirements) shuts down and restarts, according to the
			
 
				+state of the znode.
			
 
				+
			
 
				+<a name="sc_executor"></a>
			
 
				+
			
 
				+## The Executor Class
			
 
				+
			
 
				+The Executor object is the primary container of the sample application. It contains
			
 
				+both the **ZooKeeper** object, **DataMonitor**, as described above in
			
 
				+[Program Design](#sc_design).
			
 
				+
			
 
				+
			
 
				+    // from the Executor class...
			
 
				+
			
 
				+    public static void main(String[] args) {
			
 
				+        if (args.length < 4) {
			
 
				+            System.err
			
 
				+                    .println("USAGE: Executor hostPort znode filename program [args ...]");
			
 
				+            System.exit(2);
			
 
				+        }
			
 
				+        String hostPort = args[0];
			
 
				+        String znode = args[1];
			
 
				+        String filename = args[2];
			
 
				+        String exec[] = new String[args.length - 3];
			
 
				+        System.arraycopy(args, 3, exec, 0, exec.length);
			
 
				+        try {
			
 
				+            new Executor(hostPort, znode, filename, exec).run();
			
 
				+        } catch (Exception e) {
			
 
				+            e.printStackTrace();
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public Executor(String hostPort, String znode, String filename,
			
 
				+            String exec[]) throws KeeperException, IOException {
			
 
				+        this.filename = filename;
			
 
				+        this.exec = exec;
			
 
				+        zk = new ZooKeeper(hostPort, 3000, this);
			
 
				+        dm = new DataMonitor(zk, znode, null, this);
			
 
				+    }
			
 
				+
			
 
				+    public void run() {
			
 
				+        try {
			
 
				+            synchronized (this) {
			
 
				+                while (!dm.dead) {
			
 
				+                    wait();
			
 
				+                }
			
 
				+            }
			
 
				+        } catch (InterruptedException e) {
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+Recall that the Executor's job is to start and stop the executable whose name you pass in on the command line.
			
 
				+It does this in response to events fired by the ZooKeeper object. As you can see in the code above, the Executor passes
			
 
				+a reference to itself as the Watcher argument in the ZooKeeper constructor. It also passes a reference to itself
			
 
				+as DataMonitorListener argument to the DataMonitor constructor. Per the Executor's definition, it implements both these
			
 
				+interfaces:
			
 
				+
			
 
				+    public class Executor implements Watcher, Runnable, DataMonitor.DataMonitorListener {
			
 
				+    ...
			
 
				+
			
 
				+
			
 
				+The **Watcher** interface is defined by the ZooKeeper Java API.
			
 
				+ZooKeeper uses it to communicate back to its container. It supports only one method, `process()`, and ZooKeeper uses
			
 
				+it to communciates generic events that the main thread would be intersted in, such as the state of the ZooKeeper connection or the ZooKeeper session.The Executor
			
 
				+in this example simply forwards those events down to the DataMonitor to decide what to do with them. It does this simply to illustrate
			
 
				+the point that, by convention, the Executor or some Executor-like object "owns" the ZooKeeper connection, but it is free to delegate the events to other
			
 
				+events to other objects. It also uses this as the default channel on which to fire watch events. (More on this later.)
			
 
				+
			
 
				+
			
 
				+    public void process(WatchedEvent event) {
			
 
				+        dm.process(event);
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+The **DataMonitorListener**
			
 
				+interface, on the other hand, is not part of the the ZooKeeper API. It is a completely custom interface,
			
 
				+designed for this sample application. The DataMonitor object uses it to communicate back to its container, which
			
 
				+is also the the Executor object.The DataMonitorListener interface looks like this:
			
 
				+
			
 
				+
			
 
				+    public interface DataMonitorListener {
			
 
				+        /**
			
 
				+        * The existence status of the node has changed.
			
 
				+        */
			
 
				+        void exists(byte data[]);
			
 
				+
			
 
				+        /**
			
 
				+        * The ZooKeeper session is no longer valid.
			
 
				+        *
			
 
				+        * @param rc
			
 
				+        * the ZooKeeper reason code
			
 
				+        */
			
 
				+        void closing(int rc);
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+This interface is defined in the DataMonitor class and implemented in the Executor class.
			
 
				+When `Executor.exists()` is invoked,
			
 
				+the Executor decides whether to start up or shut down per the requirements. Recall that the requires say to kill the executable when the
			
 
				+znode ceases to _exist_.
			
 
				+
			
 
				+When `Executor.closing()`
			
 
				+is invoked, the Executor decides whether or not to shut itself down in response to the ZooKeeper connection permanently disappearing.
			
 
				+
			
 
				+As you might have guessed, DataMonitor is the object that invokes
			
 
				+these methods, in response to changes in ZooKeeper's state.
			
 
				+
			
 
				+Here are Executor's implementation of
			
 
				+`DataMonitorListener.exists()` and `DataMonitorListener.closing`:
			
 
				+
			
 
				+
			
 
				+    public void exists( byte[] data ) {
			
 
				+        if (data == null) {
			
 
				+            if (child != null) {
			
 
				+                System.out.println("Killing process");
			
 
				+                child.destroy();
			
 
				+                try {
			
 
				+                    child.waitFor();
			
 
				+                } catch (InterruptedException e) {
			
 
				+               }
			
 
				+            }
			
 
				+            child = null;
			
 
				+        } else {
			
 
				+            if (child != null) {
			
 
				+                System.out.println("Stopping child");
			
 
				+                child.destroy();
			
 
				+                try {
			
 
				+                   child.waitFor();
			
 
				+                } catch (InterruptedException e) {
			
 
				+                e.printStackTrace();
			
 
				+                }
			
 
				+            }
			
 
				+            try {
			
 
				+                FileOutputStream fos = new FileOutputStream(filename);
			
 
				+                fos.write(data);
			
 
				+                fos.close();
			
 
				+            } catch (IOException e) {
			
 
				+                e.printStackTrace();
			
 
				+            }
			
 
				+            try {
			
 
				+                System.out.println("Starting child");
			
 
				+                child = Runtime.getRuntime().exec(exec);
			
 
				+                new StreamWriter(child.getInputStream(), System.out);
			
 
				+                new StreamWriter(child.getErrorStream(), System.err);
			
 
				+            } catch (IOException e) {
			
 
				+                e.printStackTrace();
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public void closing(int rc) {
			
 
				+        synchronized (this) {
			
 
				+            notifyAll();
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+<a name="sc_DataMonitor"></a>
			
 
				+
			
 
				+## The DataMonitor Class
			
 
				+
			
 
				+The DataMonitor class has the meat of the ZooKeeper logic. It is mostly
			
 
				+asynchronous and event driven. DataMonitor kicks things off in the constructor with:
			
 
				+
			
 
				+
			
 
				+    public DataMonitor(ZooKeeper zk, String znode, Watcher chainedWatcher,
			
 
				+            DataMonitorListener listener) {
			
 
				+        this.zk = zk;
			
 
				+        this.znode = znode;
			
 
				+        this.chainedWatcher = chainedWatcher;
			
 
				+        this.listener = listener;
			
 
				+
			
 
				+        // Get things started by checking if the node exists. We are going
			
 
				+        // to be completely event driven
			
 
				+
			
 
				+
			
 
				+The call to `ZooKeeper.exists()` checks for the existence of the znode,
			
 
				+sets a watch, and passes a reference to itself (`this`)
			
 
				+as the completion callback object. In this sense, it kicks things off, since the
			
 
				+real processing happens when the watch is triggered.
			
 
				+
			
 
				+######Note
			
 
				+
			
 
				+>Don't confuse the completion callback with the watch callback. The `ZooKeeper.exists()`
			
 
				+completion callback, which happens to be the method `StatCallback.processResult()` implemented
			
 
				+in the DataMonitor object, is invoked when the asynchronous _setting of the watch_ operation
			
 
				+(by `ZooKeeper.exists()`) completes on the server.
			
 
				+
			
 
				+>The triggering of the watch, on the other hand, sends an event to the _Executor_ object, since
			
 
				+the Executor registered as the Watcher of the ZooKeeper object.
			
 
				+
			
 
				+>As an aside, you might note that the DataMonitor could also register itself as the Watcher
			
 
				+for this particular watch event. This is new to ZooKeeper 3.0.0 (the support of multiple Watchers). In this
			
 
				+example, however, DataMonitor does not register as the Watcher.
			
 
				+
			
 
				+When the `ZooKeeper.exists()` operation completes on the server, the ZooKeeper API invokes this completion callback on
			
 
				+the client:
			
 
				+
			
 
				+
			
 
				+    public void processResult(int rc, String path, Object ctx, Stat stat) {
			
 
				+        boolean exists;
			
 
				+        switch (rc) {
			
 
				+        case Code.Ok:
			
 
				+            exists = true;
			
 
				+            break;
			
 
				+        case Code.NoNode:
			
 
				+            exists = false;
			
 
				+            break;
			
 
				+        case Code.SessionExpired:
			
 
				+        case Code.NoAuth:
			
 
				+            dead = true;
			
 
				+            listener.closing(rc);
			
 
				+            return;
			
 
				+        default:
			
 
				+            // Retry errors
			
 
				+            zk.exists(znode, true, this, null);
			
 
				+            return;
			
 
				+        }
			
 
				+
			
 
				+        byte b[] = null;
			
 
				+        if (exists) {
			
 
				+            try {
			
 
				+                b = zk.getData(znode, false, null);
			
 
				+            } catch (KeeperException e) {
			
 
				+                // We don't need to worry about recovering now. The watch
			
 
				+                // callbacks will kick off any exception handling
			
 
				+                e.printStackTrace();
			
 
				+            } catch (InterruptedException e) {
			
 
				+                return;
			
 
				+            }
			
 
				+        }     
			
 
				+        if ((b == null &amp;&amp; b != prevData)
			
 
				+            || (b != null &amp;&amp; !Arrays.equals(prevData, b))) {
			
 
				+            listener.exists(b);</emphasis>
			
 
				+            prevData = b;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+The code first checks the error codes for znode existence, fatal errors, and
			
 
				+recoverable errors. If the file (or znode) exists, it gets the data from the znode, and
			
 
				+then invoke the exists() callback of Executor if the state has changed. Note,
			
 
				+it doesn't have to do any Exception processing for the getData call because it
			
 
				+has watches pending for anything that could cause an error: if the node is deleted
			
 
				+before it calls `ZooKeeper.getData()`, the watch event set by
			
 
				+the `ZooKeeper.exists()` triggers a callback;
			
 
				+if there is a communication error, a connection watch event fires when
			
 
				+the connection comes back up.
			
 
				+
			
 
				+Finally, notice how DataMonitor processes watch events:
			
 
				+
			
 
				+
			
 
				+    public void process(WatchedEvent event) {
			
 
				+        String path = event.getPath();
			
 
				+        if (event.getType() == Event.EventType.None) {
			
 
				+            // We are are being told that the state of the
			
 
				+            // connection has changed
			
 
				+            switch (event.getState()) {
			
 
				+            case SyncConnected:
			
 
				+                // In this particular example we don't need to do anything
			
 
				+                // here - watches are automatically re-registered with
			
 
				+                // server and any watches triggered while the client was
			
 
				+                // disconnected will be delivered (in order of course)
			
 
				+                break;
			
 
				+            case Expired:
			
 
				+                // It's all over
			
 
				+                dead = true;
			
 
				+                listener.closing(KeeperException.Code.SessionExpired);
			
 
				+                break;
			
 
				+            }
			
 
				+        } else {
			
 
				+            if (path != null && path.equals(znode)) {
			
 
				+                // Something has changed on the node, let's find out
			
 
				+                zk.exists(znode, true, this, null);
			
 
				+            }
			
 
				+        }
			
 
				+        if (chainedWatcher != null) {
			
 
				+            chainedWatcher.process(event);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+If the client-side ZooKeeper libraries can re-establish the
			
 
				+communication channel (SyncConnected event) to ZooKeeper before
			
 
				+session expiration (Expired event) all of the session's watches will
			
 
				+automatically be re-established with the server (auto-reset of watches
			
 
				+is new in ZooKeeper 3.0.0). See [ZooKeeper Watches](zookeeperProgrammers.html#ch_zkWatches)
			
 
				+in the programmer guide for more on this. A bit lower down in this
			
 
				+function, when DataMonitor gets an event for a znode, it calls`ZooKeeper.exists()` to find out what has changed.
			
 
				+
			
 
				+<a name="sc_completeSourceCode"></a>
			
 
				+
			
 
				+## Complete Source Listings
			
 
				+
			
 
				+### Executor.java
			
 
				+
			
 
				+
			
 
				+    /**
			
 
				+     * A simple example program to use DataMonitor to start and
			
 
				+     * stop executables based on a znode. The program watches the
			
 
				+     * specified znode and saves the data that corresponds to the
			
 
				+     * znode in the filesystem. It also starts the specified program
			
 
				+     * with the specified arguments when the znode exists and kills
			
 
				+     * the program if the znode goes away.
			
 
				+     */
			
 
				+    import java.io.FileOutputStream;
			
 
				+    import java.io.IOException;
			
 
				+    import java.io.InputStream;
			
 
				+    import java.io.OutputStream;
			
 
				+
			
 
				+    import org.apache.zookeeper.KeeperException;
			
 
				+    import org.apache.zookeeper.WatchedEvent;
			
 
				+    import org.apache.zookeeper.Watcher;
			
 
				+    import org.apache.zookeeper.ZooKeeper;
			
 
				+
			
 
				+    public class Executor
			
 
				+        implements Watcher, Runnable, DataMonitor.DataMonitorListener
			
 
				+    {
			
 
				+        String znode;
			
 
				+        DataMonitor dm;
			
 
				+        ZooKeeper zk;
			
 
				+        String filename;
			
 
				+        String exec[];
			
 
				+        Process child;
			
 
				+
			
 
				+        public Executor(String hostPort, String znode, String filename,
			
 
				+                String exec[]) throws KeeperException, IOException {
			
 
				+            this.filename = filename;
			
 
				+            this.exec = exec;
			
 
				+            zk = new ZooKeeper(hostPort, 3000, this);
			
 
				+            dm = new DataMonitor(zk, znode, null, this);
			
 
				+        }
			
 
				+
			
 
				+        /**
			
 
				+         * @param args
			
 
				+         */
			
 
				+        public static void main(String[] args) {
			
 
				+            if (args.length < 4) {
			
 
				+                System.err
			
 
				+                        .println("USAGE: Executor hostPort znode filename program [args ...]");
			
 
				+                System.exit(2);
			
 
				+            }
			
 
				+            String hostPort = args[0];
			
 
				+            String znode = args[1];
			
 
				+            String filename = args[2];
			
 
				+            String exec[] = new String[args.length - 3];
			
 
				+            System.arraycopy(args, 3, exec, 0, exec.length);
			
 
				+            try {
			
 
				+                new Executor(hostPort, znode, filename, exec).run();
			
 
				+            } catch (Exception e) {
			
 
				+                e.printStackTrace();
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        /***************************************************************************
			
 
				+         * We do process any events ourselves, we just need to forward them on.
			
 
				+         *
			
 
				+         * @see org.apache.zookeeper.Watcher#process(org.apache.zookeeper.proto.WatcherEvent)
			
 
				+         */
			
 
				+        public void process(WatchedEvent event) {
			
 
				+            dm.process(event);
			
 
				+        }
			
 
				+
			
 
				+        public void run() {
			
 
				+            try {
			
 
				+                synchronized (this) {
			
 
				+                    while (!dm.dead) {
			
 
				+                        wait();
			
 
				+                    }
			
 
				+                }
			
 
				+            } catch (InterruptedException e) {
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        public void closing(int rc) {
			
 
				+            synchronized (this) {
			
 
				+                notifyAll();
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        static class StreamWriter extends Thread {
			
 
				+            OutputStream os;
			
 
				+
			
 
				+            InputStream is;
			
 
				+
			
 
				+            StreamWriter(InputStream is, OutputStream os) {
			
 
				+                this.is = is;
			
 
				+                this.os = os;
			
 
				+                start();
			
 
				+            }
			
 
				+
			
 
				+            public void run() {
			
 
				+                byte b[] = new byte[80];
			
 
				+                int rc;
			
 
				+                try {
			
 
				+                    while ((rc = is.read(b)) > 0) {
			
 
				+                        os.write(b, 0, rc);
			
 
				+                    }
			
 
				+                } catch (IOException e) {
			
 
				+                }
			
 
				+
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        public void exists(byte[] data) {
			
 
				+            if (data == null) {
			
 
				+                if (child != null) {
			
 
				+                    System.out.println("Killing process");
			
 
				+                    child.destroy();
			
 
				+                    try {
			
 
				+                        child.waitFor();
			
 
				+                    } catch (InterruptedException e) {
			
 
				+                    }
			
 
				+                }
			
 
				+                child = null;
			
 
				+            } else {
			
 
				+                if (child != null) {
			
 
				+                    System.out.println("Stopping child");
			
 
				+                    child.destroy();
			
 
				+                    try {
			
 
				+                        child.waitFor();
			
 
				+                    } catch (InterruptedException e) {
			
 
				+                        e.printStackTrace();
			
 
				+                    }
			
 
				+                }
			
 
				+                try {
			
 
				+                    FileOutputStream fos = new FileOutputStream(filename);
			
 
				+                    fos.write(data);
			
 
				+                    fos.close();
			
 
				+                } catch (IOException e) {
			
 
				+                    e.printStackTrace();
			
 
				+                }
			
 
				+                try {
			
 
				+                    System.out.println("Starting child");
			
 
				+                    child = Runtime.getRuntime().exec(exec);
			
 
				+                    new StreamWriter(child.getInputStream(), System.out);
			
 
				+                    new StreamWriter(child.getErrorStream(), System.err);
			
 
				+                } catch (IOException e) {
			
 
				+                    e.printStackTrace();
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+### DataMonitor.java
			
 
				+
			
 
				+
			
 
				+    /**
			
 
				+     * A simple class that monitors the data and existence of a ZooKeeper
			
 
				+     * node. It uses asynchronous ZooKeeper APIs.
			
 
				+     */
			
 
				+    import java.util.Arrays;
			
 
				+
			
 
				+    import org.apache.zookeeper.KeeperException;
			
 
				+    import org.apache.zookeeper.WatchedEvent;
			
 
				+    import org.apache.zookeeper.Watcher;
			
 
				+    import org.apache.zookeeper.ZooKeeper;
			
 
				+    import org.apache.zookeeper.AsyncCallback.StatCallback;
			
 
				+    import org.apache.zookeeper.KeeperException.Code;
			
 
				+    import org.apache.zookeeper.data.Stat;
			
 
				+
			
 
				+    public class DataMonitor implements Watcher, StatCallback {
			
 
				+
			
 
				+        ZooKeeper zk;
			
 
				+        String znode;
			
 
				+        Watcher chainedWatcher;
			
 
				+        boolean dead;
			
 
				+        DataMonitorListener listener;
			
 
				+        byte prevData[];
			
 
				+
			
 
				+        public DataMonitor(ZooKeeper zk, String znode, Watcher chainedWatcher,
			
 
				+                DataMonitorListener listener) {
			
 
				+            this.zk = zk;
			
 
				+            this.znode = znode;
			
 
				+            this.chainedWatcher = chainedWatcher;
			
 
				+            this.listener = listener;
			
 
				+            // Get things started by checking if the node exists. We are going
			
 
				+            // to be completely event driven
			
 
				+            zk.exists(znode, true, this, null);
			
 
				+        }
			
 
				+
			
 
				+        /**
			
 
				+         * Other classes use the DataMonitor by implementing this method
			
 
				+         */
			
 
				+        public interface DataMonitorListener {
			
 
				+            /**
			
 
				+             * The existence status of the node has changed.
			
 
				+             */
			
 
				+            void exists(byte data[]);
			
 
				+
			
 
				+            /**
			
 
				+             * The ZooKeeper session is no longer valid.
			
 
				+             *
			
 
				+             * @param rc
			
 
				+             *                the ZooKeeper reason code
			
 
				+             */
			
 
				+            void closing(int rc);
			
 
				+        }
			
 
				+
			
 
				+        public void process(WatchedEvent event) {
			
 
				+            String path = event.getPath();
			
 
				+            if (event.getType() == Event.EventType.None) {
			
 
				+                // We are are being told that the state of the
			
 
				+                // connection has changed
			
 
				+                switch (event.getState()) {
			
 
				+                case SyncConnected:
			
 
				+                    // In this particular example we don't need to do anything
			
 
				+                    // here - watches are automatically re-registered with
			
 
				+                    // server and any watches triggered while the client was
			
 
				+                    // disconnected will be delivered (in order of course)
			
 
				+                    break;
			
 
				+                case Expired:
			
 
				+                    // It's all over
			
 
				+                    dead = true;
			
 
				+                    listener.closing(KeeperException.Code.SessionExpired);
			
 
				+                    break;
			
 
				+                }
			
 
				+            } else {
			
 
				+                if (path != null && path.equals(znode)) {
			
 
				+                    // Something has changed on the node, let's find out
			
 
				+                    zk.exists(znode, true, this, null);
			
 
				+                }
			
 
				+            }
			
 
				+            if (chainedWatcher != null) {
			
 
				+                chainedWatcher.process(event);
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        public void processResult(int rc, String path, Object ctx, Stat stat) {
			
 
				+            boolean exists;
			
 
				+            switch (rc) {
			
 
				+            case Code.Ok:
			
 
				+                exists = true;
			
 
				+                break;
			
 
				+            case Code.NoNode:
			
 
				+                exists = false;
			
 
				+                break;
			
 
				+            case Code.SessionExpired:
			
 
				+            case Code.NoAuth:
			
 
				+                dead = true;
			
 
				+                listener.closing(rc);
			
 
				+                return;
			
 
				+            default:
			
 
				+                // Retry errors
			
 
				+                zk.exists(znode, true, this, null);
			
 
				+                return;
			
 
				+            }
			
 
				+
			
 
				+            byte b[] = null;
			
 
				+            if (exists) {
			
 
				+                try {
			
 
				+                    b = zk.getData(znode, false, null);
			
 
				+                } catch (KeeperException e) {
			
 
				+                    // We don't need to worry about recovering now. The watch
			
 
				+                    // callbacks will kick off any exception handling
			
 
				+                    e.printStackTrace();
			
 
				+                } catch (InterruptedException e) {
			
 
				+                    return;
			
 
				+                }
			
 
				+            }
			
 
				+            if ((b == null && b != prevData)
			
 
				+                    || (b != null && !Arrays.equals(prevData, b))) {
			
 
				+                listener.exists(b);
			
 
				+                prevData = b;
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
--- a/zookeeper-docs/src/main/resources/markdown/recipes.md
+++ b/zookeeper-docs/src/main/resources/markdown/recipes.md
@@ -0,0 +1,416 @@
 
				+<!--
			
 
				+Copyright 2002-2004 The Apache Software Foundation
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+//-->
			
 
				+
			
 
				+# ZooKeeper Recipes and Solutions
			
 
				+
			
 
				+* [A Guide to Creating Higher-level Constructs with ZooKeeper](#ch_recipes)
			
 
				+    * [Important Note About Error Handling](#sc_recipes_errorHandlingNote)
			
 
				+    * [Out of the Box Applications: Name Service, Configuration, Group Membership](#sc_outOfTheBox)
			
 
				+    * [Barriers](#sc_recipes_eventHandles)
			
 
				+        * [Double Barriers](#sc_doubleBarriers)
			
 
				+    * [Queues](#sc_recipes_Queues)
			
 
				+        * [Priority Queues](#sc_recipes_priorityQueues)
			
 
				+    * [Locks](#sc_recipes_Locks)
			
 
				+        * [Recoverable Errors and the GUID](#sc_recipes_GuidNote)
			
 
				+        * [Shared Locks](#Shared+Locks)
			
 
				+        * [Revocable Shared Locks](#sc_revocableSharedLocks)
			
 
				+    * [Two-phased Commit](#sc_recipes_twoPhasedCommit)
			
 
				+    * [Leader Election](#sc_leaderElection)
			
 
				+
			
 
				+<a name="ch_recipes"></a>
			
 
				+
			
 
				+## A Guide to Creating Higher-level Constructs with ZooKeeper
			
 
				+
			
 
				+In this article, you'll find guidelines for using
			
 
				+ZooKeeper to implement higher order functions. All of them are conventions
			
 
				+implemented at the client and do not require special support from
			
 
				+ZooKeeper. Hopfully the community will capture these conventions in client-side libraries
			
 
				+to ease their use and to encourage standardization.
			
 
				+
			
 
				+One of the most interesting things about ZooKeeper is that even
			
 
				+though ZooKeeper uses _asynchronous_ notifications, you
			
 
				+can use it to build _synchronous_ consistency
			
 
				+primitives, such as queues and locks. As you will see, this is possible
			
 
				+because ZooKeeper imposes an overall order on updates, and has mechanisms
			
 
				+to expose this ordering.
			
 
				+
			
 
				+Note that the recipes below attempt to employ best practices. In
			
 
				+particular, they avoid polling, timers or anything else that would result
			
 
				+in a "herd effect", causing bursts of traffic and limiting
			
 
				+scalability.
			
 
				+
			
 
				+There are many useful functions that can be imagined that aren't
			
 
				+included here - revocable read-write priority locks, as just one example.
			
 
				+And some of the constructs mentioned here - locks, in particular -
			
 
				+illustrate certain points, even though you may find other constructs, such
			
 
				+as event handles or queues, a more practical means of performing the same
			
 
				+function. In general, the examples in this section are designed to
			
 
				+stimulate thought.
			
 
				+
			
 
				+<a name="sc_recipes_errorHandlingNote"></a>
			
 
				+
			
 
				+### Important Note About Error Handling
			
 
				+
			
 
				+When implementing the recipes you must handle recoverable exceptions
			
 
				+(see the [FAQ](https://cwiki.apache.org/confluence/display/ZOOKEEPER/FAQ)). In
			
 
				+particular, several of the recipes employ sequential ephemeral
			
 
				+nodes. When creating a sequential ephemeral node there is an error case in
			
 
				+which the create() succeeds on the server but the server crashes before
			
 
				+returning the name of the node to the client. When the client reconnects its
			
 
				+session is still valid and, thus, the node is not removed. The implication is
			
 
				+that it is difficult for the client to know if its node was created or not. The
			
 
				+recipes below include measures to handle this.
			
 
				+
			
 
				+<a name="sc_outOfTheBox"></a>
			
 
				+
			
 
				+### Out of the Box Applications: Name Service, Configuration, Group Membership
			
 
				+
			
 
				+Name service and configuration are two of the primary applications
			
 
				+of ZooKeeper. These two functions are provided directly by the ZooKeeper
			
 
				+API.
			
 
				+
			
 
				+Another function directly provided by ZooKeeper is _group
			
 
				+membership_. The group is represented by a node. Members of the
			
 
				+group create ephemeral nodes under the group node. Nodes of the members
			
 
				+that fail abnormally will be removed automatically when ZooKeeper detects
			
 
				+the failure.
			
 
				+
			
 
				+<a name="sc_recipes_eventHandles"></a>
			
 
				+
			
 
				+### Barriers
			
 
				+
			
 
				+Distributed systems use _barriers_
			
 
				+to block processing of a set of nodes until a condition is met
			
 
				+at which time all the nodes are allowed to proceed. Barriers are
			
 
				+implemented in ZooKeeper by designating a barrier node. The
			
 
				+barrier is in place if the barrier node exists. Here's the
			
 
				+pseudo code:
			
 
				+
			
 
				+1. Client calls the ZooKeeper API's **exists()** function on the barrier node, with
			
 
				+  _watch_ set to true.
			
 
				+1. If **exists()** returns false, the
			
 
				+  barrier is gone and the client proceeds
			
 
				+1. Else, if **exists()** returns true,
			
 
				+  the clients wait for a watch event from ZooKeeper for the barrier
			
 
				+  node.
			
 
				+1. When the watch event is triggered, the client reissues the
			
 
				+  **exists( )** call, again waiting until
			
 
				+  the barrier node is removed.
			
 
				+
			
 
				+<a name="sc_doubleBarriers"></a>
			
 
				+
			
 
				+#### Double Barriers
			
 
				+
			
 
				+Double barriers enable clients to synchronize the beginning and
			
 
				+the end of a computation. When enough processes have joined the barrier,
			
 
				+processes start their computation and leave the barrier once they have
			
 
				+finished. This recipe shows how to use a ZooKeeper node as a
			
 
				+barrier.
			
 
				+
			
 
				+The pseudo code in this recipe represents the barrier node as
			
 
				+_b_. Every client process _p_
			
 
				+registers with the barrier node on entry and unregisters when it is
			
 
				+ready to leave. A node registers with the barrier node via the **Enter** procedure below, it waits until
			
 
				+_x_ client process register before proceeding with
			
 
				+the computation. (The _x_ here is up to you to
			
 
				+determine for your system.)
			
 
				+
			
 
				+| **Enter**                         | **Leave**                     |
			
 
				+|-----------------------------------|-------------------------------|
			
 
				+| 1. Create a name __n_ = _b_+“/”+_p__ | 1. **L = getChildren(b, false)** |
			
 
				+| 2. Set watch: **exists(_b_ + ‘‘/ready’’, true)** | 2. if no children, exit |
			
 
				+| 3. Create child: **create(_n_, EPHEMERAL)**  | 3. if _p_ is only process node in L, delete(n) and exit |
			
 
				+| 4. **L = getChildren(b, false)**  | 4. if _p_ is the lowest process node in L, wait on highest process node in L |
			
 
				+| 5. if fewer children in L than_x_, wait for watch event  | 5. else **delete(_n_)**if still exists and wait on lowest process node in L |
			
 
				+| 6. else **create(b + ‘‘/ready’’, REGULAR)** | 6. goto 1 |
			
 
				+
			
 
				+On entering, all processes watch on a ready node and
			
 
				+create an ephemeral node as a child of the barrier node. Each process
			
 
				+but the last enters the barrier and waits for the ready node to appear
			
 
				+at line 5. The process that creates the xth node, the last process, will
			
 
				+see x nodes in the list of children and create the ready node, waking up
			
 
				+the other processes. Note that waiting processes wake up only when it is
			
 
				+time to exit, so waiting is efficient.
			
 
				+
			
 
				+On exit, you can't use a flag such as _ready_
			
 
				+because you are watching for process nodes to go away. By using
			
 
				+ephemeral nodes, processes that fail after the barrier has been entered
			
 
				+do not prevent correct processes from finishing. When processes are
			
 
				+ready to leave, they need to delete their process nodes and wait for all
			
 
				+other processes to do the same.
			
 
				+
			
 
				+Processes exit when there are no process nodes left as children of
			
 
				+_b_. However, as an efficiency, you can use the
			
 
				+lowest process node as the ready flag. All other processes that are
			
 
				+ready to exit watch for the lowest existing process node to go away, and
			
 
				+the owner of the lowest process watches for any other process node
			
 
				+(picking the highest for simplicity) to go away. This means that only a
			
 
				+single process wakes up on each node deletion except for the last node,
			
 
				+which wakes up everyone when it is removed.
			
 
				+
			
 
				+<a name="sc_recipes_Queues"></a>
			
 
				+
			
 
				+### Queues
			
 
				+
			
 
				+Distributed queues are a common data structure. To implement a
			
 
				+distributed queue in ZooKeeper, first designate a znode to hold the queue,
			
 
				+the queue node. The distributed clients put something into the queue by
			
 
				+calling create() with a pathname ending in "queue-", with the
			
 
				+_sequence_ and _ephemeral_ flags in
			
 
				+the create() call set to true. Because the _sequence_
			
 
				+flag is set, the new pathnames will have the form
			
 
				+_path-to-queue-node_/queue-X, where X is a monotonic increasing number. A
			
 
				+client that wants to be removed from the queue calls ZooKeeper's **getChildren( )** function, with
			
 
				+_watch_ set to true on the queue node, and begins
			
 
				+processing nodes with the lowest number. The client does not need to issue
			
 
				+another **getChildren( )** until it exhausts
			
 
				+the list obtained from the first **getChildren(
			
 
				+)** call. If there are are no children in the queue node, the
			
 
				+reader waits for a watch notification to check the queue again.
			
 
				+
			
 
				+######Note
			
 
				+>There now exists a Queue implementation in ZooKeeper
			
 
				+recipes directory. This is distributed with the release --
			
 
				+zookeeper-recipes/zookeeper-recipes-queue directory of the release artifact.
			
 
				+
			
 
				+<a name="sc_recipes_priorityQueues"></a>
			
 
				+
			
 
				+#### Priority Queues
			
 
				+
			
 
				+To implement a priority queue, you need only make two simple
			
 
				+changes to the generic [queue
			
 
				+recipe](#sc_recipes_Queues) . First, to add to a queue, the pathname ends with
			
 
				+"queue-YY" where YY is the priority of the element with lower numbers
			
 
				+representing higher priority (just like UNIX). Second, when removing
			
 
				+from the queue, a client uses an up-to-date children list meaning that
			
 
				+the client will invalidate previously obtained children lists if a watch
			
 
				+notification triggers for the queue node.
			
 
				+
			
 
				+<a name="sc_recipes_Locks"></a>
			
 
				+
			
 
				+### Locks
			
 
				+
			
 
				+Fully distributed locks that are globally synchronous, meaning at
			
 
				+any snapshot in time no two clients think they hold the same lock. These
			
 
				+can be implemented using ZooKeeeper. As with priority queues, first define
			
 
				+a lock node.
			
 
				+
			
 
				+######Note
			
 
				+>There now exists a Lock implementation in ZooKeeper
			
 
				+recipes directory. This is distributed with the release --
			
 
				+zookeeper-recipes/zookeeper-recipes-lock directory of the release artifact.
			
 
				+
			
 
				+Clients wishing to obtain a lock do the following:
			
 
				+
			
 
				+1. Call **create( )** with a pathname
			
 
				+  of "_locknode_/guid-lock-" and the _sequence_ and
			
 
				+  _ephemeral_ flags set. The _guid_
			
 
				+  is needed in case the create() result is missed. See the note below.
			
 
				+1. Call **getChildren( )** on the lock
			
 
				+  node _without_ setting the watch flag (this is
			
 
				+  important to avoid the herd effect).
			
 
				+1. If the pathname created in step **1** has the lowest sequence number suffix, the
			
 
				+  client has the lock and the client exits the protocol.
			
 
				+1. The client calls **exists( )** with
			
 
				+  the watch flag set on the path in the lock directory with the next
			
 
				+  lowest sequence number.
			
 
				+1. if **exists( )** returns false, go
			
 
				+  to step **2**. Otherwise, wait for a
			
 
				+  notification for the pathname from the previous step before going to
			
 
				+  step **2**.
			
 
				+
			
 
				+The unlock protocol is very simple: clients wishing to release a
			
 
				+lock simply delete the node they created in step 1.
			
 
				+
			
 
				+Here are a few things to notice:
			
 
				+
			
 
				+* The removal of a node will only cause one client to wake up
			
 
				+  since each node is watched by exactly one client. In this way, you
			
 
				+  avoid the herd effect.
			
 
				+
			
 
				+* There is no polling or timeouts.
			
 
				+
			
 
				+* Because of the way you implement locking, it is easy to see the
			
 
				+  amount of lock contention, break locks, debug locking problems,
			
 
				+  etc.
			
 
				+
			
 
				+<a name="sc_recipes_GuidNote"></a>
			
 
				+
			
 
				+#### Recoverable Errors and the GUID
			
 
				+
			
 
				+* If a recoverable error occurs calling **create()** the
			
 
				+  client should call **getChildren()** and check for a node
			
 
				+  containing the _guid_ used in the path name.
			
 
				+  This handles the case (noted [above](#sc_recipes_errorHandlingNote)) of
			
 
				+  the create() succeeding on the server but the server crashing before returning the name
			
 
				+  of the new node.
			
 
				+
			
 
				+<a name="Shared+Locks"></a>
			
 
				+
			
 
				+#### Shared Locks
			
 
				+
			
 
				+You can implement shared locks by with a few changes to the lock
			
 
				+protocol:
			
 
				+
			
 
				+| **Obtaining a read lock:** | **Obtaining a write lock:** |
			
 
				+|----------------------------|-----------------------------|
			
 
				+| 1. Call **create( )** to create a node with pathname "*guid-/read-*". This is the lock node use later in the protocol. Make sure to set both the _sequence_ and _ephemeral_ flags. | 1. Call **create( )** to create a node with pathname "*guid-/write-*". This is the lock node spoken of later in the protocol. Make sure to set both _sequence_ and _ephemeral_ flags. |
			
 
				+| 2. Call **getChildren( )** on the lock node _without_ setting the _watch_ flag - this is important, as it avoids the herd effect. | 2. Call **getChildren( )** on the lock node _without_ setting the _watch_ flag - this is important, as it avoids the herd effect. |
			
 
				+| 3. If there are no children with a pathname starting with "*write-*" and having a lower sequence number than the node created in step **1**, the client has the lock and can exit the protocol. | 3. If there are no children with a lower sequence number than the node created in step **1**, the client has the lock and the client exits the protocol. |
			
 
				+| 4. Otherwise, call **exists( )**, with _watch_ flag, set on the node in lock directory with pathname staring with "*write-*" having the next lowest sequence number. | 4. Call **exists( ),** with _watch_ flag set, on the node with the pathname that has the next lowest sequence number. |
			
 
				+| 5. If **exists( )** returns _false_, goto step **2**. | 5. If **exists( )** returns _false_, goto step **2**. Otherwise, wait for a notification for the pathname from the previous step before going to step **2**. |
			
 
				+| 6. Otherwise, wait for a notification for the pathname from the previous step before going to step **2** |  |
			
 
				+
			
 
				+Notes:
			
 
				+
			
 
				+* It might appear that this recipe creates a herd effect:
			
 
				+  when there is a large group of clients waiting for a read
			
 
				+  lock, and all getting notified more or less simultaneously
			
 
				+  when the "*write-*" node with the lowest
			
 
				+  sequence number is deleted. In fact. that's valid behavior:
			
 
				+  as all those waiting reader clients should be released since
			
 
				+  they have the lock. The herd effect refers to releasing a
			
 
				+  "herd" when in fact only a single or a small number of
			
 
				+  machines can proceed.
			
 
				+
			
 
				+* See the [note for Locks](#sc_recipes_GuidNote) on how to use the guid in the node.
			
 
				+
			
 
				+<a name="sc_revocableSharedLocks"></a>
			
 
				+
			
 
				+#### Revocable Shared Locks
			
 
				+
			
 
				+With minor modifications to the Shared Lock protocol, you make
			
 
				+shared locks revocable by modifying the shared lock protocol:
			
 
				+
			
 
				+In step **1**, of both obtain reader
			
 
				+and writer lock protocols, call **getData(
			
 
				+)** with _watch_ set, immediately after the
			
 
				+call to **create( )**. If the client
			
 
				+subsequently receives notification for the node it created in step
			
 
				+**1**, it does another **getData( )** on that node, with
			
 
				+_watch_ set and looks for the string "unlock", which
			
 
				+signals to the client that it must release the lock. This is because,
			
 
				+according to this shared lock protocol, you can request the client with
			
 
				+the lock give up the lock by calling **setData()** on the lock node, writing "unlock" to that node.
			
 
				+
			
 
				+Note that this protocol requires the lock holder to consent to
			
 
				+releasing the lock. Such consent is important, especially if the lock
			
 
				+holder needs to do some processing before releasing the lock. Of course
			
 
				+you can always implement _Revocable Shared Locks with Freaking
			
 
				+Laser Beams_ by stipulating in your protocol that the revoker
			
 
				+is allowed to delete the lock node if after some length of time the lock
			
 
				+isn't deleted by the lock holder.
			
 
				+
			
 
				+<a name="sc_recipes_twoPhasedCommit"></a>
			
 
				+
			
 
				+### Two-phased Commit
			
 
				+
			
 
				+A two-phase commit protocol is an algorithm that lets all clients in
			
 
				+a distributed system agree either to commit a transaction or abort.
			
 
				+
			
 
				+In ZooKeeper, you can implement a two-phased commit by having a
			
 
				+coordinator create a transaction node, say "/app/Tx", and one child node
			
 
				+per participating site, say "/app/Tx/s_i". When coordinator creates the
			
 
				+child node, it leaves the content undefined. Once each site involved in
			
 
				+the transaction receives the transaction from the coordinator, the site
			
 
				+reads each child node and sets a watch. Each site then processes the query
			
 
				+and votes "commit" or "abort" by writing to its respective node. Once the
			
 
				+write completes, the other sites are notified, and as soon as all sites
			
 
				+have all votes, they can decide either "abort" or "commit". Note that a
			
 
				+node can decide "abort" earlier if some site votes for "abort".
			
 
				+
			
 
				+An interesting aspect of this implementation is that the only role
			
 
				+of the coordinator is to decide upon the group of sites, to create the
			
 
				+ZooKeeper nodes, and to propagate the transaction to the corresponding
			
 
				+sites. In fact, even propagating the transaction can be done through
			
 
				+ZooKeeper by writing it in the transaction node.
			
 
				+
			
 
				+There are two important drawbacks of the approach described above.
			
 
				+One is the message complexity, which is O(n²). The second is the
			
 
				+impossibility of detecting failures of sites through ephemeral nodes. To
			
 
				+detect the failure of a site using ephemeral nodes, it is necessary that
			
 
				+the site create the node.
			
 
				+
			
 
				+To solve the first problem, you can have only the coordinator
			
 
				+notified of changes to the transaction nodes, and then notify the sites
			
 
				+once coordinator reaches a decision. Note that this approach is scalable,
			
 
				+but it's is slower too, as it requires all communication to go through the
			
 
				+coordinator.
			
 
				+
			
 
				+To address the second problem, you can have the coordinator
			
 
				+propagate the transaction to the sites, and have each site creating its
			
 
				+own ephemeral node.
			
 
				+
			
 
				+<a name="sc_leaderElection"></a>
			
 
				+
			
 
				+### Leader Election
			
 
				+
			
 
				+A simple way of doing leader election with ZooKeeper is to use the
			
 
				+**SEQUENCE|EPHEMERAL** flags when creating
			
 
				+znodes that represent "proposals" of clients. The idea is to have a znode,
			
 
				+say "/election", such that each znode creates a child znode "/election/guid-n_"
			
 
				+with both flags SEQUENCE|EPHEMERAL. With the sequence flag, ZooKeeper
			
 
				+automatically appends a sequence number that is greater than any one
			
 
				+previously appended to a child of "/election". The process that created
			
 
				+the znode with the smallest appended sequence number is the leader.
			
 
				+
			
 
				+That's not all, though. It is important to watch for failures of the
			
 
				+leader, so that a new client arises as the new leader in the case the
			
 
				+current leader fails. A trivial solution is to have all application
			
 
				+processes watching upon the current smallest znode, and checking if they
			
 
				+are the new leader when the smallest znode goes away (note that the
			
 
				+smallest znode will go away if the leader fails because the node is
			
 
				+ephemeral). But this causes a herd effect: upon a failure of the current
			
 
				+leader, all other processes receive a notification, and execute
			
 
				+getChildren on "/election" to obtain the current list of children of
			
 
				+"/election". If the number of clients is large, it causes a spike on the
			
 
				+number of operations that ZooKeeper servers have to process. To avoid the
			
 
				+herd effect, it is sufficient to watch for the next znode down on the
			
 
				+sequence of znodes. If a client receives a notification that the znode it
			
 
				+is watching is gone, then it becomes the new leader in the case that there
			
 
				+is no smaller znode. Note that this avoids the herd effect by not having
			
 
				+all clients watching the same znode.
			
 
				+
			
 
				+Here's the pseudo code:
			
 
				+
			
 
				+Let ELECTION be a path of choice of the application. To volunteer to
			
 
				+be a leader:
			
 
				+
			
 
				+1. Create znode z with path "ELECTION/guid-n_" with both SEQUENCE and
			
 
				+  EPHEMERAL flags;
			
 
				+1. Let C be the children of "ELECTION", and i be the sequence
			
 
				+  number of z;
			
 
				+1. Watch for changes on "ELECTION/guid-n_j", where j is the largest
			
 
				+  sequence number such that j < i and n_j is a znode in C;
			
 
				+
			
 
				+Upon receiving a notification of znode deletion:
			
 
				+
			
 
				+1. Let C be the new set of children of ELECTION;
			
 
				+1. If z is the smallest node in C, then execute leader
			
 
				+  procedure;
			
 
				+1. Otherwise, watch for changes on "ELECTION/guid-n_j", where j is the
			
 
				+  largest sequence number such that j < i and n_j is a znode in C;
			
 
				+
			
 
				+Notes:
			
 
				+
			
 
				+* Note that the znode having no preceding znode on the list of
			
 
				+  children does not imply that the creator of this znode is aware that it is
			
 
				+  the current leader. Applications may consider creating a separate znode
			
 
				+  to acknowledge that the leader has executed the leader procedure.
			
 
				+
			
 
				+* See the [note for Locks](#sc_recipes_GuidNote) on how to use the guid in the node.
			
 
				+
			
 
				+
			
--- a/zookeeper-docs/src/main/resources/markdown/skin/basic.css
+++ b/zookeeper-docs/src/main/resources/markdown/skin/basic.css
@@ -0,0 +1,167 @@
 
				+/*
			
 
				+* Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+* contributor license agreements.  See the NOTICE file distributed with
			
 
				+* this work for additional information regarding copyright ownership.
			
 
				+* The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+* (the "License"); you may not use this file except in compliance with
			
 
				+* the License.  You may obtain a copy of the License at
			
 
				+*
			
 
				+*     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+*
			
 
				+* Unless required by applicable law or agreed to in writing, software
			
 
				+* distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+* See the License for the specific language governing permissions and
			
 
				+* limitations under the License.
			
 
				+*/
			
 
				+/**
			
 
				+ * General
			
 
				+ */
			
 
				+
			
 
				+img { border: 0; }
			
 
				+
			
 
				+#content table {
			
 
				+  border: 0;
			
 
				+  width: 100%;
			
 
				+}
			
 
				+/*Hack to get IE to render the table at 100%*/
			
 
				+* html #content table { margin-left: -3px; }
			
 
				+
			
 
				+#content th,
			
 
				+#content td {
			
 
				+  margin: 0;
			
 
				+  padding: 0;
			
 
				+  vertical-align: top;
			
 
				+}
			
 
				+
			
 
				+.clearboth {
			
 
				+  clear: both;
			
 
				+}
			
 
				+
			
 
				+.note, .warning, .fixme {
			
 
				+  clear:right;
			
 
				+  border: solid black 1px;
			
 
				+  margin: 1em 3em;
			
 
				+}
			
 
				+
			
 
				+.note .label {
			
 
				+  background: #369;
			
 
				+  color: white;
			
 
				+  font-weight: bold;
			
 
				+  padding: 5px 10px;
			
 
				+}
			
 
				+.note .content {
			
 
				+  background: #F0F0FF;
			
 
				+  color: black;
			
 
				+  line-height: 120%;
			
 
				+  font-size: 90%;
			
 
				+  padding: 5px 10px;
			
 
				+}
			
 
				+.warning .label {
			
 
				+  background: #C00;
			
 
				+  color: white;
			
 
				+  font-weight: bold;
			
 
				+  padding: 5px 10px;
			
 
				+}
			
 
				+.warning .content {
			
 
				+  background: #FFF0F0;
			
 
				+  color: black;
			
 
				+  line-height: 120%;
			
 
				+  font-size: 90%;
			
 
				+  padding: 5px 10px;
			
 
				+}
			
 
				+.fixme .label {
			
 
				+  background: #C6C600;
			
 
				+  color: black;
			
 
				+  font-weight: bold;
			
 
				+  padding: 5px 10px;
			
 
				+}
			
 
				+.fixme .content {
			
 
				+  padding: 5px 10px;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Typography
			
 
				+ */
			
 
				+
			
 
				+body {
			
 
				+  font-family: verdana, "Trebuchet MS", arial, helvetica, sans-serif;
			
 
				+  font-size: 100%;
			
 
				+}
			
 
				+
			
 
				+#content {
			
 
				+  font-family: Georgia, Palatino, Times, serif;
			
 
				+  font-size: 95%;
			
 
				+}
			
 
				+#tabs {
			
 
				+  font-size: 70%;
			
 
				+}
			
 
				+#menu {
			
 
				+  font-size: 80%;
			
 
				+}
			
 
				+#footer {
			
 
				+  font-size: 70%;
			
 
				+}
			
 
				+
			
 
				+h1, h2, h3, h4, h5, h6 {
			
 
				+  font-family: "Trebuchet MS", verdana, arial, helvetica, sans-serif;
			
 
				+  font-weight: bold;
			
 
				+  margin-top: 1em;
			
 
				+  margin-bottom: .5em;
			
 
				+}
			
 
				+
			
 
				+h1 {
			
 
				+    margin-top: 0;
			
 
				+    margin-bottom: 1em;
			
 
				+  font-size: 1.4em;
			
 
				+}
			
 
				+#content h1 {
			
 
				+  font-size: 160%;
			
 
				+  margin-bottom: .5em;
			
 
				+}
			
 
				+#menu h1 {
			
 
				+  margin: 0;
			
 
				+  padding: 10px;
			
 
				+  background: #336699;
			
 
				+  color: white;
			
 
				+}
			
 
				+h2 { font-size: 120%; }
			
 
				+h3 { font-size: 100%; }
			
 
				+h4 { font-size: 90%; }
			
 
				+h5 { font-size: 80%; }
			
 
				+h6 { font-size: 75%; }
			
 
				+
			
 
				+p {
			
 
				+  line-height: 120%;
			
 
				+  text-align: left;
			
 
				+  margin-top: .5em;
			
 
				+  margin-bottom: 1em;
			
 
				+}
			
 
				+
			
 
				+#content li,
			
 
				+#content th,
			
 
				+#content td,
			
 
				+#content li ul,
			
 
				+#content li ol{
			
 
				+  margin-top: .5em;
			
 
				+  margin-bottom: .5em;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#content li li,
			
 
				+#minitoc-area li{
			
 
				+  margin-top: 0em;
			
 
				+  margin-bottom: 0em;
			
 
				+}
			
 
				+
			
 
				+#content .attribution {
			
 
				+  text-align: right;
			
 
				+  font-style: italic;
			
 
				+  font-size: 85%;
			
 
				+  margin-top: 1em;
			
 
				+}
			
 
				+
			
 
				+.codefrag {
			
 
				+  font-family: "Courier New", Courier, monospace;
			
 
				+  font-size: 110%;
			
 
				+}
			
--- a/zookeeper-docs/src/main/resources/markdown/skin/chapter.gif
+++ b/zookeeper-docs/src/main/resources/markdown/skin/chapter.gif
--- a/zookeeper-docs/src/main/resources/markdown/skin/chapter_open.gif
+++ b/zookeeper-docs/src/main/resources/markdown/skin/chapter_open.gif
--- a/zookeeper-docs/src/main/resources/markdown/skin/current.gif
+++ b/zookeeper-docs/src/main/resources/markdown/skin/current.gif
--- a/zookeeper-docs/src/main/resources/markdown/skin/getBlank.js
+++ b/zookeeper-docs/src/main/resources/markdown/skin/getBlank.js
@@ -0,0 +1,40 @@
 
				+/*
			
 
				+* Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+* contributor license agreements.  See the NOTICE file distributed with
			
 
				+* this work for additional information regarding copyright ownership.
			
 
				+* The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+* (the "License"); you may not use this file except in compliance with
			
 
				+* the License.  You may obtain a copy of the License at
			
 
				+*
			
 
				+*     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+*
			
 
				+* Unless required by applicable law or agreed to in writing, software
			
 
				+* distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+* See the License for the specific language governing permissions and
			
 
				+* limitations under the License.
			
 
				+*/
			
 
				+/**
			
 
				+ * getBlank script - when included in a html file and called from a form text field, will set the value of this field to ""
			
 
				+ * if the text value is still the standard value.
			
 
				+ * getPrompt script - when included in a html file and called from a form text field, will set the value of this field to the prompt
			
 
				+ * if the text value is empty.
			
 
				+ *
			
 
				+ * Typical usage:
			
 
				+ * <script type="text/javascript" language="JavaScript" src="getBlank.js"></script>
			
 
				+ * <input type="text" id="query" value="Search the site:" onFocus="getBlank (this, 'Search the site:');" onBlur="getBlank (this, 'Search the site:');"/>
			
 
				+ */
			
 
				+<!--
			
 
				+function getBlank (form, stdValue){
			
 
				+if (form.value == stdValue){
			
 
				+	form.value = '';
			
 
				+	}
			
 
				+return true;
			
 
				+}
			
 
				+function getPrompt (form, stdValue){
			
 
				+if (form.value == ''){
			
 
				+	form.value = stdValue;
			
 
				+	}
			
 
				+return true;
			
 
				+}
			
 
				+//-->
			
--- a/zookeeper-docs/src/main/resources/markdown/skin/getMenu.js
+++ b/zookeeper-docs/src/main/resources/markdown/skin/getMenu.js
@@ -0,0 +1,45 @@
 
				+/*
			
 
				+* Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+* contributor license agreements.  See the NOTICE file distributed with
			
 
				+* this work for additional information regarding copyright ownership.
			
 
				+* The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+* (the "License"); you may not use this file except in compliance with
			
 
				+* the License.  You may obtain a copy of the License at
			
 
				+*
			
 
				+*     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+*
			
 
				+* Unless required by applicable law or agreed to in writing, software
			
 
				+* distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+* See the License for the specific language governing permissions and
			
 
				+* limitations under the License.
			
 
				+*/
			
 
				+/**
			
 
				+ * This script, when included in a html file, can be used to make collapsible menus
			
 
				+ *
			
 
				+ * Typical usage:
			
 
				+ * <script type="text/javascript" language="JavaScript" src="menu.js"></script>
			
 
				+ */
			
 
				+
			
 
				+if (document.getElementById){ 
			
 
				+  document.write('<style type="text/css">.menuitemgroup{display: none;}</style>')
			
 
				+}
			
 
				+
			
 
				+
			
 
				+function SwitchMenu(obj, thePath)
			
 
				+{
			
 
				+var open = 'url("'+thePath + 'chapter_open.gif")';
			
 
				+var close = 'url("'+thePath + 'chapter.gif")';
			
 
				+  if(document.getElementById)  {
			
 
				+    var el = document.getElementById(obj);
			
 
				+    var title = document.getElementById(obj+'Title');
			
 
				+
			
 
				+    if(el.style.display != "block"){ 
			
 
				+      title.style.backgroundImage = open;
			
 
				+      el.style.display = "block";
			
 
				+    }else{
			
 
				+      title.style.backgroundImage = close;
			
 
				+      el.style.display = "none";
			
 
				+    }
			
 
				+  }// end -  if(document.getElementById) 
			
 
				+}//end - function SwitchMenu(obj)
			
--- a/zookeeper-docs/src/main/resources/markdown/skin/header_white_line.gif
+++ b/zookeeper-docs/src/main/resources/markdown/skin/header_white_line.gif
--- a/zookeeper-docs/src/main/resources/markdown/skin/init.js
+++ b/zookeeper-docs/src/main/resources/markdown/skin/init.js
@@ -0,0 +1,57 @@
 
				+/*
			
 
				+* Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+* contributor license agreements.  See the NOTICE file distributed with
			
 
				+* this work for additional information regarding copyright ownership.
			
 
				+* The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+* (the "License"); you may not use this file except in compliance with
			
 
				+* the License.  You may obtain a copy of the License at
			
 
				+*
			
 
				+*     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+*
			
 
				+* Unless required by applicable law or agreed to in writing, software
			
 
				+* distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+* See the License for the specific language governing permissions and
			
 
				+* limitations under the License.
			
 
				+*/
			
 
				+/**
			
 
				+ * This script, when included in a html file, can be used to make collapsible menus
			
 
				+ *
			
 
				+ * Typical usage:
			
 
				+ * <script type="text/javascript" language="JavaScript" src="menu.js"></script>
			
 
				+ */
			
 
				+
			
 
				+function getFileName(url){
			
 
				+    var fileName = url.substring(url.lastIndexOf('/')+1);
			
 
				+    return fileName;
			
 
				+}
			
 
				+
			
 
				+function init(){
			
 
				+    var url = window .location.pathname;
			
 
				+    var fileName = getFileName(url);
			
 
				+
			
 
				+    var menuItemGroup = document.getElementById("menu").children;
			
 
				+
			
 
				+    for (i = 0; i < menuItemGroup.length; i++) {
			
 
				+        if("menutitle" === menuItemGroup[i].className){
			
 
				+            continue;
			
 
				+        }
			
 
				+        var menuItem = menuItemGroup[i].children;
			
 
				+        if(menuItem.length>0){
			
 
				+            for (j = 0; j < menuItem.length; j++) {
			
 
				+                if(menuItem[j].firstElementChild != null){
			
 
				+                    var linkItem = menuItem[j].firstElementChild;
			
 
				+                    if('a' === linkItem.localName){
			
 
				+                        var linkFile = getFileName(linkItem.href);
			
 
				+                        if(fileName === linkFile && linkItem.href.lastIndexOf("api/index.html")<0){
			
 
				+                            linkItem.className = "selected";
			
 
				+                            linkItem.parentNode.parentNode.className = "selectedmenuitemgroup";
			
 
				+                            var title = document.getElementById(linkItem.parentNode.parentNode.id+"Title");
			
 
				+                            title.className="menutitle selected";
			
 
				+                        }
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
--- a/zookeeper-docs/src/main/resources/markdown/skin/instruction_arrow.png
+++ b/zookeeper-docs/src/main/resources/markdown/skin/instruction_arrow.png
--- a/zookeeper-docs/src/main/resources/markdown/skin/menu.js
+++ b/zookeeper-docs/src/main/resources/markdown/skin/menu.js
@@ -0,0 +1,48 @@
 
				+/*
			
 
				+* Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+* contributor license agreements.  See the NOTICE file distributed with
			
 
				+* this work for additional information regarding copyright ownership.
			
 
				+* The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+* (the "License"); you may not use this file except in compliance with
			
 
				+* the License.  You may obtain a copy of the License at
			
 
				+*
			
 
				+*     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+*
			
 
				+* Unless required by applicable law or agreed to in writing, software
			
 
				+* distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+* See the License for the specific language governing permissions and
			
 
				+* limitations under the License.
			
 
				+*/
			
 
				+/**
			
 
				+ * This script, when included in a html file, can be used to make collapsible menus
			
 
				+ *
			
 
				+ * Typical usage:
			
 
				+ * <script type="text/javascript" language="JavaScript" src="menu.js"></script>
			
 
				+ */
			
 
				+
			
 
				+if (document.getElementById){ 
			
 
				+  document.write('<style type="text/css">.menuitemgroup{display: none;}</style>')
			
 
				+}
			
 
				+
			
 
				+function SwitchMenu(obj)
			
 
				+{
			
 
				+  if(document.getElementById)  {
			
 
				+    var el = document.getElementById(obj);
			
 
				+    var title = document.getElementById(obj+'Title');
			
 
				+
			
 
				+    if(obj.indexOf("_selected_")==0&&el.style.display == ""){
			
 
				+      el.style.display = "block";
			
 
				+      title.className = "pagegroupselected";
			
 
				+    }
			
 
				+
			
 
				+    if(el.style.display != "block"){
			
 
				+      el.style.display = "block";
			
 
				+      title.className = "pagegroupopen";
			
 
				+    }
			
 
				+    else{
			
 
				+      el.style.display = "none";
			
 
				+      title.className = "pagegroup";
			
 
				+    }
			
 
				+  }// end -  if(document.getElementById) 
			
 
				+}//end - function SwitchMenu(obj)
			
--- a/zookeeper-docs/src/main/resources/markdown/skin/page.gif
+++ b/zookeeper-docs/src/main/resources/markdown/skin/page.gif
--- a/zookeeper-docs/src/main/resources/markdown/skin/print.css
+++ b/zookeeper-docs/src/main/resources/markdown/skin/print.css
@@ -0,0 +1,54 @@
 
				+/*
			
 
				+* Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+* contributor license agreements.  See the NOTICE file distributed with
			
 
				+* this work for additional information regarding copyright ownership.
			
 
				+* The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+* (the "License"); you may not use this file except in compliance with
			
 
				+* the License.  You may obtain a copy of the License at
			
 
				+*
			
 
				+*     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+*
			
 
				+* Unless required by applicable law or agreed to in writing, software
			
 
				+* distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+* See the License for the specific language governing permissions and
			
 
				+* limitations under the License.
			
 
				+*/
			
 
				+body {
			
 
				+  font-family: Georgia, Palatino, serif;
			
 
				+  font-size: 12pt;
			
 
				+  background: white;
			
 
				+}
			
 
				+
			
 
				+#tabs,
			
 
				+#menu,
			
 
				+#content .toc {
			
 
				+  display: none;
			
 
				+}
			
 
				+
			
 
				+#content {
			
 
				+  width: auto;
			
 
				+  padding: 0;
			
 
				+  float: none !important;
			
 
				+  color: black;
			
 
				+  background: inherit;
			
 
				+}
			
 
				+
			
 
				+a:link, a:visited {
			
 
				+  color: #336699;
			
 
				+  background: inherit;
			
 
				+  text-decoration: underline;
			
 
				+}
			
 
				+
			
 
				+#top .logo {
			
 
				+  padding: 0;
			
 
				+  margin: 0 0 2em 0;
			
 
				+}
			
 
				+
			
 
				+#footer {
			
 
				+  margin-top: 4em;
			
 
				+}
			
 
				+
			
 
				+acronym {
			
 
				+  border: 0;
			
 
				+}
			
--- a/zookeeper-docs/src/main/resources/markdown/skin/printer.gif
+++ b/zookeeper-docs/src/main/resources/markdown/skin/printer.gif
--- a/zookeeper-docs/src/main/resources/markdown/skin/profile.css
+++ b/zookeeper-docs/src/main/resources/markdown/skin/profile.css
@@ -0,0 +1,159 @@
 
				+
			
 
				+
			
 
				+/* ==================== aural ============================ */
			
 
				+
			
 
				+@media aural {
			
 
				+  h1, h2, h3, h4, h5, h6 { voice-family: paul, male; stress: 20; richness: 90 }
			
 
				+  h1 { pitch: x-low; pitch-range: 90 }
			
 
				+  h2 { pitch: x-low; pitch-range: 80 }
			
 
				+  h3 { pitch: low; pitch-range: 70 }
			
 
				+  h4 { pitch: medium; pitch-range: 60 }
			
 
				+  h5 { pitch: medium; pitch-range: 50 }
			
 
				+  h6 { pitch: medium; pitch-range: 40 }
			
 
				+  li, dt, dd { pitch: medium; richness: 60 }
			
 
				+  dt { stress: 80 }
			
 
				+  pre, code, tt { pitch: medium; pitch-range: 0; stress: 0; richness: 80 }
			
 
				+  em { pitch: medium; pitch-range: 60; stress: 60; richness: 50 }
			
 
				+  strong { pitch: medium; pitch-range: 60; stress: 90; richness: 90 }
			
 
				+  dfn { pitch: high; pitch-range: 60; stress: 60 }
			
 
				+  s, strike { richness: 0 }
			
 
				+  i { pitch: medium; pitch-range: 60; stress: 60; richness: 50 }
			
 
				+  b { pitch: medium; pitch-range: 60; stress: 90; richness: 90 }
			
 
				+  u { richness: 0 }
			
 
				+  
			
 
				+  :link { voice-family: harry, male }
			
 
				+  :visited { voice-family: betty, female }
			
 
				+  :active { voice-family: betty, female; pitch-range: 80; pitch: x-high }
			
 
				+}
			
 
				+  
			
 
				+#top          { background-color: #FFFFFF;}  
			
 
				+ 
			
 
				+#top .header .current { background-color: #4C6C8F;} 
			
 
				+#top .header .current a:link {  color: #ffffff;  }
			
 
				+#top .header .current a:visited { color: #ffffff; }
			
 
				+#top .header .current a:hover { color: #ffffff; }
			
 
				+ 
			
 
				+#tabs li      { background-color: #E5E4D9 ;} 
			
 
				+#tabs li a:link {  color: #000000;  }
			
 
				+#tabs li a:visited { color: #000000; }
			
 
				+#tabs li a:hover { color: #000000; }
			
 
				+
			
 
				+#level2tabs a.selected      { background-color: #4C6C8F ;} 
			
 
				+#level2tabs a:link {  color: #ffffff;  }
			
 
				+#level2tabs a:visited { color: #ffffff; }
			
 
				+#level2tabs a:hover { color: #ffffff; }
			
 
				+
			
 
				+#level2tabs { background-color: #E5E4D9;}
			
 
				+#level2tabs a.unselected:link {  color: #000000;  }
			
 
				+#level2tabs a.unselected:visited { color: #000000; }
			
 
				+#level2tabs a.unselected:hover { color: #000000; }
			
 
				+
			
 
				+.heading { background-color: #E5E4D9;} 
			
 
				+
			
 
				+.boxed { background-color: #E5E4D9;} 
			
 
				+.underlined_5 	{border-bottom: solid 5px #E5E4D9;}
			
 
				+.underlined_10 	{border-bottom: solid 10px #E5E4D9;}
			
 
				+table caption { 
			
 
				+background-color: #E5E4D9; 
			
 
				+color: #000000;
			
 
				+}
			
 
				+    
			
 
				+#feedback {
			
 
				+color: #FFFFFF;
			
 
				+background: #4C6C8F;
			
 
				+text-align: center;
			
 
				+}
			
 
				+#feedback #feedbackto {
			
 
				+color: #FFFFFF;
			
 
				+}   
			
 
				+
			
 
				+#publishedStrip { 
			
 
				+color: #FFFFFF;
			
 
				+background: #4C6C8F; 
			
 
				+}
			
 
				+
			
 
				+#publishedStrip { 
			
 
				+color: #000000;
			
 
				+background: #E5E4D9; 
			
 
				+}
			
 
				+
			
 
				+#menu a.selected  { background-color: #CFDCED;
			
 
				+  border-color: #999999;
			
 
				+  color: #000000;}
			
 
				+#menu a.selected:visited {  color: #000000;}
			
 
				+
			
 
				+#menu           { border-color: #999999;}
			
 
				+#menu .menupageitemgroup  { border-color: #999999;}
			
 
				+
			
 
				+#menu      { background-color: #4C6C8F;} 
			
 
				+#menu  {  color: #ffffff;} 
			
 
				+#menu a:link {  color: #ffffff;} 
			
 
				+#menu a:visited {  color: #ffffff;} 
			
 
				+#menu a:hover {  
			
 
				+background-color: #4C6C8F;
			
 
				+color: #ffffff;} 
			
 
				+
			
 
				+#menu h1 {
			
 
				+color: #000000;
			
 
				+background-color: #cfdced;
			
 
				+}   
			
 
				+ 
			
 
				+#top .searchbox { 
			
 
				+background-color: #E5E4D9 ;
			
 
				+color: #000000; 
			
 
				+} 
			
 
				+ 
			
 
				+#menu .menupageitemgroup     { 
			
 
				+background-color: #E5E4D9;
			
 
				+}
			
 
				+#menu .menupageitem {
			
 
				+color: #000000;
			
 
				+} 
			
 
				+#menu .menupageitem a:link {  color: #000000;} 
			
 
				+#menu .menupageitem a:visited {  color: #000000;} 
			
 
				+#menu .menupageitem a:hover {  
			
 
				+background-color: #E5E4D9;
			
 
				+color: #000000;
			
 
				+}
			
 
				+
			
 
				+body{ 
			
 
				+background-color: #ffffff;
			
 
				+color: #000000;
			
 
				+} 
			
 
				+a:link { color:#0000ff} 
			
 
				+a:visited { color:#009999} 
			
 
				+a:hover { color:#6587ff} 
			
 
				+
			
 
				+ 
			
 
				+.ForrestTable      { background-color: #ccc;} 
			
 
				+ 
			
 
				+.ForrestTable td   { background-color: #ffffff;} 
			
 
				+ 
			
 
				+.highlight        { background-color: #ffff00;} 
			
 
				+ 
			
 
				+.fixme        { border-color: #c60;} 
			
 
				+ 
			
 
				+.note         { border-color: #069;} 
			
 
				+ 
			
 
				+.warning         { border-color: #900;}
			
 
				+ 
			
 
				+#footer       { background-color: #E5E4D9;} 
			
 
				+/* extra-css */
			
 
				+    
			
 
				+    p.quote {
			
 
				+      margin-left: 2em;
			
 
				+      padding: .5em;
			
 
				+      background-color: #f0f0f0;
			
 
				+      font-family: monospace;
			
 
				+    }
			
 
				+
			
 
				+    pre {
			
 
				+      margin-left: 0em;
			
 
				+      padding: 0.5em;
			
 
				+      background-color: #f0f0f0;
			
 
				+      font-family: monospace;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+
			
 
				+  
			
--- a/zookeeper-docs/src/main/resources/markdown/skin/prototype.js
+++ b/zookeeper-docs/src/main/resources/markdown/skin/prototype.js
@@ -0,0 +1,1257 @@
 
				+/*  Prototype JavaScript framework, version 1.4.0_pre4
			
 
				+ *  (c) 2005 Sam Stephenson <sam@conio.net>
			
 
				+ *
			
 
				+ *  THIS FILE IS AUTOMATICALLY GENERATED. When sending patches, please diff
			
 
				+ *  against the source tree, available from the Prototype darcs repository. 
			
 
				+ *
			
 
				+ *  Prototype is freely distributable under the terms of an MIT-style license.
			
 
				+ *
			
 
				+ *  For details, see the Prototype web site: http://prototype.conio.net/
			
 
				+ *
			
 
				+/*--------------------------------------------------------------------------*/
			
 
				+
			
 
				+var Prototype = {
			
 
				+  Version: '1.4.0_pre4',
			
 
				+  
			
 
				+  emptyFunction: function() {},
			
 
				+  K: function(x) {return x}
			
 
				+}
			
 
				+
			
 
				+var Class = {
			
 
				+  create: function() {
			
 
				+    return function() { 
			
 
				+      this.initialize.apply(this, arguments);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+var Abstract = new Object();
			
 
				+
			
 
				+Object.extend = function(destination, source) {
			
 
				+  for (property in source) {
			
 
				+    destination[property] = source[property];
			
 
				+  }
			
 
				+  return destination;
			
 
				+}
			
 
				+
			
 
				+Function.prototype.bind = function(object) {
			
 
				+  var __method = this;
			
 
				+  return function() {
			
 
				+    return __method.apply(object, arguments);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+Function.prototype.bindAsEventListener = function(object) {
			
 
				+  var __method = this;
			
 
				+  return function(event) {
			
 
				+    return __method.call(object, event || window.event);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+Number.prototype.toColorPart = function() {
			
 
				+  var digits = this.toString(16);
			
 
				+  if (this < 16) return '0' + digits;
			
 
				+  return digits;
			
 
				+}
			
 
				+
			
 
				+var Try = {
			
 
				+  these: function() {
			
 
				+    var returnValue;
			
 
				+
			
 
				+    for (var i = 0; i < arguments.length; i++) {
			
 
				+      var lambda = arguments[i];
			
 
				+      try {
			
 
				+        returnValue = lambda();
			
 
				+        break;
			
 
				+      } catch (e) {}
			
 
				+    }
			
 
				+
			
 
				+    return returnValue;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+/*--------------------------------------------------------------------------*/
			
 
				+
			
 
				+var PeriodicalExecuter = Class.create();
			
 
				+PeriodicalExecuter.prototype = {
			
 
				+  initialize: function(callback, frequency) {
			
 
				+    this.callback = callback;
			
 
				+    this.frequency = frequency;
			
 
				+    this.currentlyExecuting = false;
			
 
				+
			
 
				+    this.registerCallback();
			
 
				+  },
			
 
				+
			
 
				+  registerCallback: function() {
			
 
				+    setInterval(this.onTimerEvent.bind(this), this.frequency * 1000);
			
 
				+  },
			
 
				+
			
 
				+  onTimerEvent: function() {
			
 
				+    if (!this.currentlyExecuting) {
			
 
				+      try { 
			
 
				+        this.currentlyExecuting = true;
			
 
				+        this.callback(); 
			
 
				+      } finally { 
			
 
				+        this.currentlyExecuting = false;
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+/*--------------------------------------------------------------------------*/
			
 
				+
			
 
				+function $() {
			
 
				+  var elements = new Array();
			
 
				+
			
 
				+  for (var i = 0; i < arguments.length; i++) {
			
 
				+    var element = arguments[i];
			
 
				+    if (typeof element == 'string')
			
 
				+      element = document.getElementById(element);
			
 
				+
			
 
				+    if (arguments.length == 1) 
			
 
				+      return element;
			
 
				+
			
 
				+    elements.push(element);
			
 
				+  }
			
 
				+
			
 
				+  return elements;
			
 
				+}
			
 
				+
			
 
				+if (!Array.prototype.push) {
			
 
				+  Array.prototype.push = function() {
			
 
				+		var startLength = this.length;
			
 
				+		for (var i = 0; i < arguments.length; i++)
			
 
				+      this[startLength + i] = arguments[i];
			
 
				+	  return this.length;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+if (!Function.prototype.apply) {
			
 
				+  // Based on code from http://www.youngpup.net/
			
 
				+  Function.prototype.apply = function(object, parameters) {
			
 
				+    var parameterStrings = new Array();
			
 
				+    if (!object)     object = window;
			
 
				+    if (!parameters) parameters = new Array();
			
 
				+    
			
 
				+    for (var i = 0; i < parameters.length; i++)
			
 
				+      parameterStrings[i] = 'parameters[' + i + ']';
			
 
				+    
			
 
				+    object.__apply__ = this;
			
 
				+    var result = eval('object.__apply__(' + 
			
 
				+      parameterStrings.join(', ') + ')');
			
 
				+    object.__apply__ = null;
			
 
				+    
			
 
				+    return result;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+Object.extend(String.prototype, {
			
 
				+  stripTags: function() {
			
 
				+    return this.replace(/<\/?[^>]+>/gi, '');
			
 
				+  },
			
 
				+
			
 
				+  escapeHTML: function() {
			
 
				+    var div = document.createElement('div');
			
 
				+    var text = document.createTextNode(this);
			
 
				+    div.appendChild(text);
			
 
				+    return div.innerHTML;
			
 
				+  },
			
 
				+
			
 
				+  unescapeHTML: function() {
			
 
				+    var div = document.createElement('div');
			
 
				+    div.innerHTML = this.stripTags();
			
 
				+    return div.childNodes[0].nodeValue;
			
 
				+  },
			
 
				+  
			
 
				+  parseQuery: function() {
			
 
				+    var str = this;
			
 
				+    if (str.substring(0,1) == '?') {
			
 
				+      str = this.substring(1);
			
 
				+    }
			
 
				+    var result = {};
			
 
				+    var pairs = str.split('&');
			
 
				+    for (var i = 0; i < pairs.length; i++) {
			
 
				+      var pair = pairs[i].split('=');
			
 
				+      result[pair[0]] = pair[1];
			
 
				+    }
			
 
				+    return result;
			
 
				+  }
			
 
				+});
			
 
				+
			
 
				+
			
 
				+var _break    = new Object();
			
 
				+var _continue = new Object();
			
 
				+
			
 
				+var Enumerable = {
			
 
				+  each: function(iterator) {
			
 
				+    var index = 0;
			
 
				+    try {
			
 
				+      this._each(function(value) {
			
 
				+        try {
			
 
				+          iterator(value, index++);
			
 
				+        } catch (e) {
			
 
				+          if (e != _continue) throw e;
			
 
				+        }
			
 
				+      });
			
 
				+    } catch (e) {
			
 
				+      if (e != _break) throw e;
			
 
				+    }
			
 
				+  },
			
 
				+  
			
 
				+  all: function(iterator) {
			
 
				+    var result = true;
			
 
				+    this.each(function(value, index) {
			
 
				+      if (!(result &= (iterator || Prototype.K)(value, index))) 
			
 
				+        throw _break;
			
 
				+    });
			
 
				+    return result;
			
 
				+  },
			
 
				+  
			
 
				+  any: function(iterator) {
			
 
				+    var result = true;
			
 
				+    this.each(function(value, index) {
			
 
				+      if (result &= (iterator || Prototype.K)(value, index)) 
			
 
				+        throw _break;
			
 
				+    });
			
 
				+    return result;
			
 
				+  },
			
 
				+  
			
 
				+  collect: function(iterator) {
			
 
				+    var results = [];
			
 
				+    this.each(function(value, index) {
			
 
				+      results.push(iterator(value, index));
			
 
				+    });
			
 
				+    return results;
			
 
				+  },
			
 
				+  
			
 
				+  detect: function (iterator) {
			
 
				+    var result;
			
 
				+    this.each(function(value, index) {
			
 
				+      if (iterator(value, index)) {
			
 
				+        result = value;
			
 
				+        throw _break;
			
 
				+      }
			
 
				+    });
			
 
				+    return result;
			
 
				+  },
			
 
				+  
			
 
				+  findAll: function(iterator) {
			
 
				+    var results = [];
			
 
				+    this.each(function(value, index) {
			
 
				+      if (iterator(value, index))
			
 
				+        results.push(value);
			
 
				+    });
			
 
				+    return results;
			
 
				+  },
			
 
				+  
			
 
				+  grep: function(pattern, iterator) {
			
 
				+    var results = [];
			
 
				+    this.each(function(value, index) {
			
 
				+      var stringValue = value.toString();
			
 
				+      if (stringValue.match(pattern))
			
 
				+        results.push((iterator || Prototype.K)(value, index));
			
 
				+    })
			
 
				+    return results;
			
 
				+  },
			
 
				+  
			
 
				+  include: function(object) {
			
 
				+    var found = false;
			
 
				+    this.each(function(value) {
			
 
				+      if (value == object) {
			
 
				+        found = true;
			
 
				+        throw _break;
			
 
				+      }
			
 
				+    });
			
 
				+    return found;
			
 
				+  },
			
 
				+  
			
 
				+  inject: function(memo, iterator) {
			
 
				+    this.each(function(value, index) {
			
 
				+      memo = iterator(memo, value, index);
			
 
				+    });
			
 
				+    return memo;
			
 
				+  },
			
 
				+  
			
 
				+  invoke: function(method) {
			
 
				+    var args = $A(arguments).slice(1);
			
 
				+    return this.collect(function(value) {
			
 
				+      return value[method].apply(value, args);
			
 
				+    });
			
 
				+  },
			
 
				+  
			
 
				+  max: function(iterator) {
			
 
				+    var result;
			
 
				+    this.each(function(value, index) {
			
 
				+      value = (iterator || Prototype.K)(value, index);
			
 
				+      if (value >= (result || value))
			
 
				+        result = value;
			
 
				+    });
			
 
				+    return result;
			
 
				+  },
			
 
				+  
			
 
				+  min: function(iterator) {
			
 
				+    var result;
			
 
				+    this.each(function(value, index) {
			
 
				+      value = (iterator || Prototype.K)(value, index);
			
 
				+      if (value <= (result || value))
			
 
				+        result = value;
			
 
				+    });
			
 
				+    return result;
			
 
				+  },
			
 
				+  
			
 
				+  partition: function(iterator) {
			
 
				+    var trues = [], falses = [];
			
 
				+    this.each(function(value, index) {
			
 
				+      ((iterator || Prototype.K)(value, index) ? 
			
 
				+        trues : falses).push(value);
			
 
				+    });
			
 
				+    return [trues, falses];
			
 
				+  },
			
 
				+  
			
 
				+  pluck: function(property) {
			
 
				+    var results = [];
			
 
				+    this.each(function(value, index) {
			
 
				+      results.push(value[property]);
			
 
				+    });
			
 
				+    return results;
			
 
				+  },
			
 
				+  
			
 
				+  reject: function(iterator) {
			
 
				+    var results = [];
			
 
				+    this.each(function(value, index) {
			
 
				+      if (!iterator(value, index))
			
 
				+        results.push(value);
			
 
				+    });
			
 
				+    return results;
			
 
				+  },
			
 
				+  
			
 
				+  sortBy: function(iterator) {
			
 
				+    return this.collect(function(value, index) {
			
 
				+      return {value: value, criteria: iterator(value, index)};
			
 
				+    }).sort(function(left, right) {
			
 
				+      var a = left.criteria, b = right.criteria;
			
 
				+      return a < b ? -1 : a > b ? 1 : 0;
			
 
				+    }).pluck('value');
			
 
				+  },
			
 
				+  
			
 
				+  toArray: function() {
			
 
				+    return this.collect(Prototype.K);
			
 
				+  },
			
 
				+  
			
 
				+  zip: function() {
			
 
				+    var iterator = Prototype.K, args = $A(arguments);
			
 
				+    if (typeof args.last() == 'function')
			
 
				+      iterator = args.pop();
			
 
				+
			
 
				+    var collections = [this].concat(args).map($A);
			
 
				+    return this.map(function(value, index) {
			
 
				+      iterator(value = collections.pluck(index));
			
 
				+      return value;
			
 
				+    });
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+Object.extend(Enumerable, {
			
 
				+  map:     Enumerable.collect,
			
 
				+  find:    Enumerable.detect,
			
 
				+  select:  Enumerable.findAll,
			
 
				+  member:  Enumerable.include,
			
 
				+  entries: Enumerable.toArray
			
 
				+});
			
 
				+
			
 
				+$A = Array.from = function(iterable) {
			
 
				+  var results = [];
			
 
				+  for (var i = 0; i < iterable.length; i++)
			
 
				+    results.push(iterable[i]);
			
 
				+  return results;
			
 
				+}
			
 
				+
			
 
				+Object.extend(Array.prototype, {
			
 
				+  _each: function(iterator) {
			
 
				+    for (var i = 0; i < this.length; i++)
			
 
				+      iterator(this[i]);
			
 
				+  },
			
 
				+  
			
 
				+  first: function() {
			
 
				+    return this[0];
			
 
				+  },
			
 
				+  
			
 
				+  last: function() {
			
 
				+    return this[this.length - 1];
			
 
				+  }
			
 
				+});
			
 
				+
			
 
				+Object.extend(Array.prototype, Enumerable);
			
 
				+
			
 
				+
			
 
				+var Ajax = {
			
 
				+  getTransport: function() {
			
 
				+    return Try.these(
			
 
				+      function() {return new ActiveXObject('Msxml2.XMLHTTP')},
			
 
				+      function() {return new ActiveXObject('Microsoft.XMLHTTP')},
			
 
				+      function() {return new XMLHttpRequest()}
			
 
				+    ) || false;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+Ajax.Base = function() {};
			
 
				+Ajax.Base.prototype = {
			
 
				+  setOptions: function(options) {
			
 
				+    this.options = {
			
 
				+      method:       'post',
			
 
				+      asynchronous: true,
			
 
				+      parameters:   ''
			
 
				+    }
			
 
				+    Object.extend(this.options, options || {});
			
 
				+  },
			
 
				+
			
 
				+  responseIsSuccess: function() {
			
 
				+    return this.transport.status == undefined
			
 
				+        || this.transport.status == 0 
			
 
				+        || (this.transport.status >= 200 && this.transport.status < 300);
			
 
				+  },
			
 
				+
			
 
				+  responseIsFailure: function() {
			
 
				+    return !this.responseIsSuccess();
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+Ajax.Request = Class.create();
			
 
				+Ajax.Request.Events = 
			
 
				+  ['Uninitialized', 'Loading', 'Loaded', 'Interactive', 'Complete'];
			
 
				+
			
 
				+Ajax.Request.prototype = Object.extend(new Ajax.Base(), {
			
 
				+  initialize: function(url, options) {
			
 
				+    this.transport = Ajax.getTransport();
			
 
				+    this.setOptions(options);
			
 
				+    this.request(url);
			
 
				+  },
			
 
				+
			
 
				+  request: function(url) {
			
 
				+    var parameters = this.options.parameters || '';
			
 
				+    if (parameters.length > 0) parameters += '&_=';
			
 
				+
			
 
				+    try {
			
 
				+      if (this.options.method == 'get')
			
 
				+        url += '?' + parameters;
			
 
				+
			
 
				+      this.transport.open(this.options.method, url,
			
 
				+        this.options.asynchronous);
			
 
				+
			
 
				+      if (this.options.asynchronous) {
			
 
				+        this.transport.onreadystatechange = this.onStateChange.bind(this);
			
 
				+        setTimeout((function() {this.respondToReadyState(1)}).bind(this), 10);
			
 
				+      }
			
 
				+
			
 
				+      this.setRequestHeaders();
			
 
				+
			
 
				+      var body = this.options.postBody ? this.options.postBody : parameters;
			
 
				+      this.transport.send(this.options.method == 'post' ? body : null);
			
 
				+
			
 
				+    } catch (e) {
			
 
				+    }
			
 
				+  },
			
 
				+
			
 
				+  setRequestHeaders: function() {
			
 
				+    var requestHeaders = 
			
 
				+      ['X-Requested-With', 'XMLHttpRequest',
			
 
				+       'X-Prototype-Version', Prototype.Version];
			
 
				+
			
 
				+    if (this.options.method == 'post') {
			
 
				+      requestHeaders.push('Content-type', 
			
 
				+        'application/x-www-form-urlencoded');
			
 
				+
			
 
				+      /* Force "Connection: close" for Mozilla browsers to work around
			
 
				+       * a bug where XMLHttpReqeuest sends an incorrect Content-length
			
 
				+       * header. See Mozilla Bugzilla #246651. 
			
 
				+       */
			
 
				+      if (this.transport.overrideMimeType)
			
 
				+        requestHeaders.push('Connection', 'close');
			
 
				+    }
			
 
				+
			
 
				+    if (this.options.requestHeaders)
			
 
				+      requestHeaders.push.apply(requestHeaders, this.options.requestHeaders);
			
 
				+
			
 
				+    for (var i = 0; i < requestHeaders.length; i += 2)
			
 
				+      this.transport.setRequestHeader(requestHeaders[i], requestHeaders[i+1]);
			
 
				+  },
			
 
				+
			
 
				+  onStateChange: function() {
			
 
				+    var readyState = this.transport.readyState;
			
 
				+    if (readyState != 1)
			
 
				+      this.respondToReadyState(this.transport.readyState);
			
 
				+  },
			
 
				+
			
 
				+  respondToReadyState: function(readyState) {
			
 
				+    var event = Ajax.Request.Events[readyState];
			
 
				+
			
 
				+    if (event == 'Complete')
			
 
				+      (this.options['on' + this.transport.status]
			
 
				+       || this.options['on' + (this.responseIsSuccess() ? 'Success' : 'Failure')]
			
 
				+       || Prototype.emptyFunction)(this.transport);
			
 
				+
			
 
				+    (this.options['on' + event] || Prototype.emptyFunction)(this.transport);
			
 
				+
			
 
				+    /* Avoid memory leak in MSIE: clean up the oncomplete event handler */
			
 
				+    if (event == 'Complete')
			
 
				+      this.transport.onreadystatechange = Prototype.emptyFunction;
			
 
				+  }
			
 
				+});
			
 
				+
			
 
				+Ajax.Updater = Class.create();
			
 
				+Ajax.Updater.ScriptFragment = '(?:<script.*?>)((\n|.)*?)(?:<\/script>)';
			
 
				+
			
 
				+Object.extend(Object.extend(Ajax.Updater.prototype, Ajax.Request.prototype), {
			
 
				+  initialize: function(container, url, options) {
			
 
				+    this.containers = {
			
 
				+      success: container.success ? $(container.success) : $(container),
			
 
				+      failure: container.failure ? $(container.failure) :
			
 
				+        (container.success ? null : $(container))
			
 
				+    }
			
 
				+
			
 
				+    this.transport = Ajax.getTransport();
			
 
				+    this.setOptions(options);
			
 
				+
			
 
				+    var onComplete = this.options.onComplete || Prototype.emptyFunction;
			
 
				+    this.options.onComplete = (function() {
			
 
				+      this.updateContent();
			
 
				+      onComplete(this.transport);
			
 
				+    }).bind(this);
			
 
				+
			
 
				+    this.request(url);
			
 
				+  },
			
 
				+
			
 
				+  updateContent: function() {
			
 
				+    var receiver = this.responseIsSuccess() ?
			
 
				+      this.containers.success : this.containers.failure;
			
 
				+
			
 
				+    var match    = new RegExp(Ajax.Updater.ScriptFragment, 'img');
			
 
				+    var response = this.transport.responseText.replace(match, '');
			
 
				+    var scripts  = this.transport.responseText.match(match);
			
 
				+
			
 
				+    if (receiver) {
			
 
				+      if (this.options.insertion) {
			
 
				+        new this.options.insertion(receiver, response);
			
 
				+      } else {
			
 
				+        receiver.innerHTML = response;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    if (this.responseIsSuccess()) {
			
 
				+      if (this.onComplete)
			
 
				+        setTimeout((function() {this.onComplete(
			
 
				+          this.transport)}).bind(this), 10);
			
 
				+    }
			
 
				+
			
 
				+    if (this.options.evalScripts && scripts) {
			
 
				+      match = new RegExp(Ajax.Updater.ScriptFragment, 'im');
			
 
				+      setTimeout((function() {
			
 
				+        for (var i = 0; i < scripts.length; i++)
			
 
				+          eval(scripts[i].match(match)[1]);
			
 
				+      }).bind(this), 10);
			
 
				+    }
			
 
				+  }
			
 
				+});
			
 
				+
			
 
				+Ajax.PeriodicalUpdater = Class.create();
			
 
				+Ajax.PeriodicalUpdater.prototype = Object.extend(new Ajax.Base(), {
			
 
				+  initialize: function(container, url, options) {
			
 
				+    this.setOptions(options);
			
 
				+    this.onComplete = this.options.onComplete;
			
 
				+
			
 
				+    this.frequency = (this.options.frequency || 2);
			
 
				+    this.decay = 1;
			
 
				+
			
 
				+    this.updater = {};
			
 
				+    this.container = container;
			
 
				+    this.url = url;
			
 
				+
			
 
				+    this.start();
			
 
				+  },
			
 
				+
			
 
				+  start: function() {
			
 
				+    this.options.onComplete = this.updateComplete.bind(this);
			
 
				+    this.onTimerEvent();
			
 
				+  },
			
 
				+
			
 
				+  stop: function() {
			
 
				+    this.updater.onComplete = undefined;
			
 
				+    clearTimeout(this.timer);
			
 
				+    (this.onComplete || Ajax.emptyFunction).apply(this, arguments);
			
 
				+  },
			
 
				+
			
 
				+  updateComplete: function(request) {
			
 
				+    if (this.options.decay) {
			
 
				+      this.decay = (request.responseText == this.lastText ? 
			
 
				+        this.decay * this.options.decay : 1);
			
 
				+
			
 
				+      this.lastText = request.responseText;
			
 
				+    }
			
 
				+    this.timer = setTimeout(this.onTimerEvent.bind(this), 
			
 
				+      this.decay * this.frequency * 1000);
			
 
				+  },
			
 
				+
			
 
				+  onTimerEvent: function() {
			
 
				+    this.updater = new Ajax.Updater(this.container, this.url, this.options);
			
 
				+  }
			
 
				+});
			
 
				+
			
 
				+document.getElementsByClassName = function(className) {
			
 
				+  var children = document.getElementsByTagName('*') || document.all;
			
 
				+  var elements = new Array();
			
 
				+  
			
 
				+  for (var i = 0; i < children.length; i++) {
			
 
				+    var child = children[i];
			
 
				+    var classNames = child.className.split(' ');
			
 
				+    for (var j = 0; j < classNames.length; j++) {
			
 
				+      if (classNames[j] == className) {
			
 
				+        elements.push(child);
			
 
				+        break;
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  return elements;
			
 
				+}
			
 
				+
			
 
				+/*--------------------------------------------------------------------------*/
			
 
				+
			
 
				+if (!window.Element) {
			
 
				+  var Element = new Object();
			
 
				+}
			
 
				+
			
 
				+Object.extend(Element, {
			
 
				+  toggle: function() {
			
 
				+    for (var i = 0; i < arguments.length; i++) {
			
 
				+      var element = $(arguments[i]);
			
 
				+      element.style.display = 
			
 
				+        (element.style.display == 'none' ? '' : 'none');
			
 
				+    }
			
 
				+  },
			
 
				+
			
 
				+  hide: function() {
			
 
				+    for (var i = 0; i < arguments.length; i++) {
			
 
				+      var element = $(arguments[i]);
			
 
				+      element.style.display = 'none';
			
 
				+    }
			
 
				+  },
			
 
				+
			
 
				+  show: function() {
			
 
				+    for (var i = 0; i < arguments.length; i++) {
			
 
				+      var element = $(arguments[i]);
			
 
				+      element.style.display = '';
			
 
				+    }
			
 
				+  },
			
 
				+
			
 
				+  remove: function(element) {
			
 
				+    element = $(element);
			
 
				+    element.parentNode.removeChild(element);
			
 
				+  },
			
 
				+   
			
 
				+  getHeight: function(element) {
			
 
				+    element = $(element);
			
 
				+    return element.offsetHeight; 
			
 
				+  },
			
 
				+
			
 
				+  hasClassName: function(element, className) {
			
 
				+    element = $(element);
			
 
				+    if (!element)
			
 
				+      return;
			
 
				+    var a = element.className.split(' ');
			
 
				+    for (var i = 0; i < a.length; i++) {
			
 
				+      if (a[i] == className)
			
 
				+        return true;
			
 
				+    }
			
 
				+    return false;
			
 
				+  },
			
 
				+
			
 
				+  addClassName: function(element, className) {
			
 
				+    element = $(element);
			
 
				+    Element.removeClassName(element, className);
			
 
				+    element.className += ' ' + className;
			
 
				+  },
			
 
				+
			
 
				+  removeClassName: function(element, className) {
			
 
				+    element = $(element);
			
 
				+    if (!element)
			
 
				+      return;
			
 
				+    var newClassName = '';
			
 
				+    var a = element.className.split(' ');
			
 
				+    for (var i = 0; i < a.length; i++) {
			
 
				+      if (a[i] != className) {
			
 
				+        if (i > 0)
			
 
				+          newClassName += ' ';
			
 
				+        newClassName += a[i];
			
 
				+      }
			
 
				+    }
			
 
				+    element.className = newClassName;
			
 
				+  },
			
 
				+  
			
 
				+  // removes whitespace-only text node children
			
 
				+  cleanWhitespace: function(element) {
			
 
				+    var element = $(element);
			
 
				+    for (var i = 0; i < element.childNodes.length; i++) {
			
 
				+      var node = element.childNodes[i];
			
 
				+      if (node.nodeType == 3 && !/\S/.test(node.nodeValue)) 
			
 
				+        Element.remove(node);
			
 
				+    }
			
 
				+  }
			
 
				+});
			
 
				+
			
 
				+var Toggle = new Object();
			
 
				+Toggle.display = Element.toggle;
			
 
				+
			
 
				+/*--------------------------------------------------------------------------*/
			
 
				+
			
 
				+Abstract.Insertion = function(adjacency) {
			
 
				+  this.adjacency = adjacency;
			
 
				+}
			
 
				+
			
 
				+Abstract.Insertion.prototype = {
			
 
				+  initialize: function(element, content) {
			
 
				+    this.element = $(element);
			
 
				+    this.content = content;
			
 
				+    
			
 
				+    if (this.adjacency && this.element.insertAdjacentHTML) {
			
 
				+      this.element.insertAdjacentHTML(this.adjacency, this.content);
			
 
				+    } else {
			
 
				+      this.range = this.element.ownerDocument.createRange();
			
 
				+      if (this.initializeRange) this.initializeRange();
			
 
				+      this.fragment = this.range.createContextualFragment(this.content);
			
 
				+      this.insertContent();
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+var Insertion = new Object();
			
 
				+
			
 
				+Insertion.Before = Class.create();
			
 
				+Insertion.Before.prototype = Object.extend(new Abstract.Insertion('beforeBegin'), {
			
 
				+  initializeRange: function() {
			
 
				+    this.range.setStartBefore(this.element);
			
 
				+  },
			
 
				+  
			
 
				+  insertContent: function() {
			
 
				+    this.element.parentNode.insertBefore(this.fragment, this.element);
			
 
				+  }
			
 
				+});
			
 
				+
			
 
				+Insertion.Top = Class.create();
			
 
				+Insertion.Top.prototype = Object.extend(new Abstract.Insertion('afterBegin'), {
			
 
				+  initializeRange: function() {
			
 
				+    this.range.selectNodeContents(this.element);
			
 
				+    this.range.collapse(true);
			
 
				+  },
			
 
				+  
			
 
				+  insertContent: function() {  
			
 
				+    this.element.insertBefore(this.fragment, this.element.firstChild);
			
 
				+  }
			
 
				+});
			
 
				+
			
 
				+Insertion.Bottom = Class.create();
			
 
				+Insertion.Bottom.prototype = Object.extend(new Abstract.Insertion('beforeEnd'), {
			
 
				+  initializeRange: function() {
			
 
				+    this.range.selectNodeContents(this.element);
			
 
				+    this.range.collapse(this.element);
			
 
				+  },
			
 
				+  
			
 
				+  insertContent: function() {
			
 
				+    this.element.appendChild(this.fragment);
			
 
				+  }
			
 
				+});
			
 
				+
			
 
				+Insertion.After = Class.create();
			
 
				+Insertion.After.prototype = Object.extend(new Abstract.Insertion('afterEnd'), {
			
 
				+  initializeRange: function() {
			
 
				+    this.range.setStartAfter(this.element);
			
 
				+  },
			
 
				+  
			
 
				+  insertContent: function() {
			
 
				+    this.element.parentNode.insertBefore(this.fragment, 
			
 
				+      this.element.nextSibling);
			
 
				+  }
			
 
				+});
			
 
				+
			
 
				+var Field = {
			
 
				+  clear: function() {
			
 
				+    for (var i = 0; i < arguments.length; i++)
			
 
				+      $(arguments[i]).value = '';
			
 
				+  },
			
 
				+
			
 
				+  focus: function(element) {
			
 
				+    $(element).focus();
			
 
				+  },
			
 
				+  
			
 
				+  present: function() {
			
 
				+    for (var i = 0; i < arguments.length; i++)
			
 
				+      if ($(arguments[i]).value == '') return false;
			
 
				+    return true;
			
 
				+  },
			
 
				+  
			
 
				+  select: function(element) {
			
 
				+    $(element).select();
			
 
				+  },
			
 
				+   
			
 
				+  activate: function(element) {
			
 
				+    $(element).focus();
			
 
				+    $(element).select();
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+/*--------------------------------------------------------------------------*/
			
 
				+
			
 
				+var Form = {
			
 
				+  serialize: function(form) {
			
 
				+    var elements = Form.getElements($(form));
			
 
				+    var queryComponents = new Array();
			
 
				+    
			
 
				+    for (var i = 0; i < elements.length; i++) {
			
 
				+      var queryComponent = Form.Element.serialize(elements[i]);
			
 
				+      if (queryComponent)
			
 
				+        queryComponents.push(queryComponent);
			
 
				+    }
			
 
				+    
			
 
				+    return queryComponents.join('&');
			
 
				+  },
			
 
				+  
			
 
				+  getElements: function(form) {
			
 
				+    var form = $(form);
			
 
				+    var elements = new Array();
			
 
				+
			
 
				+    for (tagName in Form.Element.Serializers) {
			
 
				+      var tagElements = form.getElementsByTagName(tagName);
			
 
				+      for (var j = 0; j < tagElements.length; j++)
			
 
				+        elements.push(tagElements[j]);
			
 
				+    }
			
 
				+    return elements;
			
 
				+  },
			
 
				+  
			
 
				+  getInputs: function(form, typeName, name) {
			
 
				+    var form = $(form);
			
 
				+    var inputs = form.getElementsByTagName('input');
			
 
				+    
			
 
				+    if (!typeName && !name)
			
 
				+      return inputs;
			
 
				+      
			
 
				+    var matchingInputs = new Array();
			
 
				+    for (var i = 0; i < inputs.length; i++) {
			
 
				+      var input = inputs[i];
			
 
				+      if ((typeName && input.type != typeName) ||
			
 
				+          (name && input.name != name)) 
			
 
				+        continue;
			
 
				+      matchingInputs.push(input);
			
 
				+    }
			
 
				+
			
 
				+    return matchingInputs;
			
 
				+  },
			
 
				+
			
 
				+  disable: function(form) {
			
 
				+    var elements = Form.getElements(form);
			
 
				+    for (var i = 0; i < elements.length; i++) {
			
 
				+      var element = elements[i];
			
 
				+      element.blur();
			
 
				+      element.disabled = 'true';
			
 
				+    }
			
 
				+  },
			
 
				+
			
 
				+  enable: function(form) {
			
 
				+    var elements = Form.getElements(form);
			
 
				+    for (var i = 0; i < elements.length; i++) {
			
 
				+      var element = elements[i];
			
 
				+      element.disabled = '';
			
 
				+    }
			
 
				+  },
			
 
				+
			
 
				+  focusFirstElement: function(form) {
			
 
				+    var form = $(form);
			
 
				+    var elements = Form.getElements(form);
			
 
				+    for (var i = 0; i < elements.length; i++) {
			
 
				+      var element = elements[i];
			
 
				+      if (element.type != 'hidden' && !element.disabled) {
			
 
				+        Field.activate(element);
			
 
				+        break;
			
 
				+      }
			
 
				+    }
			
 
				+  },
			
 
				+
			
 
				+  reset: function(form) {
			
 
				+    $(form).reset();
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+Form.Element = {
			
 
				+  serialize: function(element) {
			
 
				+    var element = $(element);
			
 
				+    var method = element.tagName.toLowerCase();
			
 
				+    var parameter = Form.Element.Serializers[method](element);
			
 
				+    
			
 
				+    if (parameter)
			
 
				+      return encodeURIComponent(parameter[0]) + '=' + 
			
 
				+        encodeURIComponent(parameter[1]);                   
			
 
				+  },
			
 
				+  
			
 
				+  getValue: function(element) {
			
 
				+    var element = $(element);
			
 
				+    var method = element.tagName.toLowerCase();
			
 
				+    var parameter = Form.Element.Serializers[method](element);
			
 
				+    
			
 
				+    if (parameter) 
			
 
				+      return parameter[1];
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+Form.Element.Serializers = {
			
 
				+  input: function(element) {
			
 
				+    switch (element.type.toLowerCase()) {
			
 
				+      case 'submit':
			
 
				+      case 'hidden':
			
 
				+      case 'password':
			
 
				+      case 'text':
			
 
				+        return Form.Element.Serializers.textarea(element);
			
 
				+      case 'checkbox':  
			
 
				+      case 'radio':
			
 
				+        return Form.Element.Serializers.inputSelector(element);
			
 
				+    }
			
 
				+    return false;
			
 
				+  },
			
 
				+
			
 
				+  inputSelector: function(element) {
			
 
				+    if (element.checked)
			
 
				+      return [element.name, element.value];
			
 
				+  },
			
 
				+
			
 
				+  textarea: function(element) {
			
 
				+    return [element.name, element.value];
			
 
				+  },
			
 
				+
			
 
				+  select: function(element) {
			
 
				+    var value = '';
			
 
				+    if (element.type == 'select-one') {
			
 
				+      var index = element.selectedIndex;
			
 
				+      if (index >= 0)
			
 
				+        value = element.options[index].value || element.options[index].text;
			
 
				+    } else {
			
 
				+      value = new Array();
			
 
				+      for (var i = 0; i < element.length; i++) {
			
 
				+        var opt = element.options[i];
			
 
				+        if (opt.selected)
			
 
				+          value.push(opt.value || opt.text);
			
 
				+      }
			
 
				+    }
			
 
				+    return [element.name, value];
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+/*--------------------------------------------------------------------------*/
			
 
				+
			
 
				+var $F = Form.Element.getValue;
			
 
				+
			
 
				+/*--------------------------------------------------------------------------*/
			
 
				+
			
 
				+Abstract.TimedObserver = function() {}
			
 
				+Abstract.TimedObserver.prototype = {
			
 
				+  initialize: function(element, frequency, callback) {
			
 
				+    this.frequency = frequency;
			
 
				+    this.element   = $(element);
			
 
				+    this.callback  = callback;
			
 
				+    
			
 
				+    this.lastValue = this.getValue();
			
 
				+    this.registerCallback();
			
 
				+  },
			
 
				+  
			
 
				+  registerCallback: function() {
			
 
				+    setInterval(this.onTimerEvent.bind(this), this.frequency * 1000);
			
 
				+  },
			
 
				+  
			
 
				+  onTimerEvent: function() {
			
 
				+    var value = this.getValue();
			
 
				+    if (this.lastValue != value) {
			
 
				+      this.callback(this.element, value);
			
 
				+      this.lastValue = value;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+Form.Element.Observer = Class.create();
			
 
				+Form.Element.Observer.prototype = Object.extend(new Abstract.TimedObserver(), {
			
 
				+  getValue: function() {
			
 
				+    return Form.Element.getValue(this.element);
			
 
				+  }
			
 
				+});
			
 
				+
			
 
				+Form.Observer = Class.create();
			
 
				+Form.Observer.prototype = Object.extend(new Abstract.TimedObserver(), {
			
 
				+  getValue: function() {
			
 
				+    return Form.serialize(this.element);
			
 
				+  }
			
 
				+});
			
 
				+
			
 
				+/*--------------------------------------------------------------------------*/
			
 
				+
			
 
				+Abstract.EventObserver = function() {}
			
 
				+Abstract.EventObserver.prototype = {
			
 
				+  initialize: function(element, callback) {
			
 
				+    this.element  = $(element);
			
 
				+    this.callback = callback;
			
 
				+    
			
 
				+    this.lastValue = this.getValue();
			
 
				+    if (this.element.tagName.toLowerCase() == 'form')
			
 
				+      this.registerFormCallbacks();
			
 
				+    else
			
 
				+      this.registerCallback(this.element);
			
 
				+  },
			
 
				+  
			
 
				+  onElementEvent: function() {
			
 
				+    var value = this.getValue();
			
 
				+    if (this.lastValue != value) {
			
 
				+      this.callback(this.element, value);
			
 
				+      this.lastValue = value;
			
 
				+    }
			
 
				+  },
			
 
				+  
			
 
				+  registerFormCallbacks: function() {
			
 
				+    var elements = Form.getElements(this.element);
			
 
				+    for (var i = 0; i < elements.length; i++)
			
 
				+      this.registerCallback(elements[i]);
			
 
				+  },
			
 
				+  
			
 
				+  registerCallback: function(element) {
			
 
				+    if (element.type) {
			
 
				+      switch (element.type.toLowerCase()) {
			
 
				+        case 'checkbox':  
			
 
				+        case 'radio':
			
 
				+          element.target = this;
			
 
				+          element.prev_onclick = element.onclick || Prototype.emptyFunction;
			
 
				+          element.onclick = function() {
			
 
				+            this.prev_onclick(); 
			
 
				+            this.target.onElementEvent();
			
 
				+          }
			
 
				+          break;
			
 
				+        case 'password':
			
 
				+        case 'text':
			
 
				+        case 'textarea':
			
 
				+        case 'select-one':
			
 
				+        case 'select-multiple':
			
 
				+          element.target = this;
			
 
				+          element.prev_onchange = element.onchange || Prototype.emptyFunction;
			
 
				+          element.onchange = function() {
			
 
				+            this.prev_onchange(); 
			
 
				+            this.target.onElementEvent();
			
 
				+          }
			
 
				+          break;
			
 
				+      }
			
 
				+    }    
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+Form.Element.EventObserver = Class.create();
			
 
				+Form.Element.EventObserver.prototype = Object.extend(new Abstract.EventObserver(), {
			
 
				+  getValue: function() {
			
 
				+    return Form.Element.getValue(this.element);
			
 
				+  }
			
 
				+});
			
 
				+
			
 
				+Form.EventObserver = Class.create();
			
 
				+Form.EventObserver.prototype = Object.extend(new Abstract.EventObserver(), {
			
 
				+  getValue: function() {
			
 
				+    return Form.serialize(this.element);
			
 
				+  }
			
 
				+});
			
 
				+
			
 
				+
			
 
				+if (!window.Event) {
			
 
				+  var Event = new Object();
			
 
				+}
			
 
				+
			
 
				+Object.extend(Event, {
			
 
				+  KEY_BACKSPACE: 8,
			
 
				+  KEY_TAB:       9,
			
 
				+  KEY_RETURN:   13,
			
 
				+  KEY_ESC:      27,
			
 
				+  KEY_LEFT:     37,
			
 
				+  KEY_UP:       38,
			
 
				+  KEY_RIGHT:    39,
			
 
				+  KEY_DOWN:     40,
			
 
				+  KEY_DELETE:   46,
			
 
				+
			
 
				+  element: function(event) {
			
 
				+    return event.target || event.srcElement;
			
 
				+  },
			
 
				+
			
 
				+  isLeftClick: function(event) {
			
 
				+    return (((event.which) && (event.which == 1)) ||
			
 
				+            ((event.button) && (event.button == 1)));
			
 
				+  },
			
 
				+
			
 
				+  pointerX: function(event) {
			
 
				+    return event.pageX || (event.clientX + 
			
 
				+      (document.documentElement.scrollLeft || document.body.scrollLeft));
			
 
				+  },
			
 
				+
			
 
				+  pointerY: function(event) {
			
 
				+    return event.pageY || (event.clientY + 
			
 
				+      (document.documentElement.scrollTop || document.body.scrollTop));
			
 
				+  },
			
 
				+
			
 
				+  stop: function(event) {
			
 
				+    if (event.preventDefault) { 
			
 
				+      event.preventDefault(); 
			
 
				+      event.stopPropagation(); 
			
 
				+    } else {
			
 
				+      event.returnValue = false;
			
 
				+    }
			
 
				+  },
			
 
				+
			
 
				+  // find the first node with the given tagName, starting from the
			
 
				+  // node the event was triggered on; traverses the DOM upwards
			
 
				+  findElement: function(event, tagName) {
			
 
				+    var element = Event.element(event);
			
 
				+    while (element.parentNode && (!element.tagName ||
			
 
				+        (element.tagName.toUpperCase() != tagName.toUpperCase())))
			
 
				+      element = element.parentNode;
			
 
				+    return element;
			
 
				+  },
			
 
				+
			
 
				+  observers: false,
			
 
				+  
			
 
				+  _observeAndCache: function(element, name, observer, useCapture) {
			
 
				+    if (!this.observers) this.observers = [];
			
 
				+    if (element.addEventListener) {
			
 
				+      this.observers.push([element, name, observer, useCapture]);
			
 
				+      element.addEventListener(name, observer, useCapture);
			
 
				+    } else if (element.attachEvent) {
			
 
				+      this.observers.push([element, name, observer, useCapture]);
			
 
				+      element.attachEvent('on' + name, observer);
			
 
				+    }
			
 
				+  },
			
 
				+  
			
 
				+  unloadCache: function() {
			
 
				+    if (!Event.observers) return;
			
 
				+    for (var i = 0; i < Event.observers.length; i++) {
			
 
				+      Event.stopObserving.apply(this, Event.observers[i]);
			
 
				+      Event.observers[i][0] = null;
			
 
				+    }
			
 
				+    Event.observers = false;
			
 
				+  },
			
 
				+
			
 
				+  observe: function(element, name, observer, useCapture) {
			
 
				+    var element = $(element);
			
 
				+    useCapture = useCapture || false;
			
 
				+    
			
 
				+    if (name == 'keypress' &&
			
 
				+        ((/Konqueror|Safari|KHTML/.test(navigator.userAgent)) 
			
 
				+        || element.attachEvent))
			
 
				+      name = 'keydown';
			
 
				+    
			
 
				+    this._observeAndCache(element, name, observer, useCapture);
			
 
				+  },
			
 
				+
			
 
				+  stopObserving: function(element, name, observer, useCapture) {
			
 
				+    var element = $(element);
			
 
				+    useCapture = useCapture || false;
			
 
				+    
			
 
				+    if (name == 'keypress' &&
			
 
				+        ((/Konqueror|Safari|KHTML/.test(navigator.userAgent)) 
			
 
				+        || element.detachEvent))
			
 
				+      name = 'keydown';
			
 
				+    
			
 
				+    if (element.removeEventListener) {
			
 
				+      element.removeEventListener(name, observer, useCapture);
			
 
				+    } else if (element.detachEvent) {
			
 
				+      element.detachEvent('on' + name, observer);
			
 
				+    }
			
 
				+  }
			
 
				+});
			
 
				+
			
 
				+/* prevent memory leaks in IE */
			
 
				+Event.observe(window, 'unload', Event.unloadCache, false);
			
 
				+
			
 
				+var Position = {
			
 
				+
			
 
				+  // set to true if needed, warning: firefox performance problems
			
 
				+  // NOT neeeded for page scrolling, only if draggable contained in
			
 
				+  // scrollable elements
			
 
				+  includeScrollOffsets: false, 
			
 
				+
			
 
				+  // must be called before calling withinIncludingScrolloffset, every time the
			
 
				+  // page is scrolled
			
 
				+  prepare: function() {
			
 
				+    this.deltaX =  window.pageXOffset 
			
 
				+                || document.documentElement.scrollLeft 
			
 
				+                || document.body.scrollLeft 
			
 
				+                || 0;
			
 
				+    this.deltaY =  window.pageYOffset 
			
 
				+                || document.documentElement.scrollTop 
			
 
				+                || document.body.scrollTop 
			
 
				+                || 0;
			
 
				+  },
			
 
				+
			
 
				+  realOffset: function(element) {
			
 
				+    var valueT = 0, valueL = 0;
			
 
				+    do {
			
 
				+      valueT += element.scrollTop  || 0;
			
 
				+      valueL += element.scrollLeft || 0; 
			
 
				+      element = element.parentNode;
			
 
				+    } while (element);
			
 
				+    return [valueL, valueT];
			
 
				+  },
			
 
				+
			
 
				+  cumulativeOffset: function(element) {
			
 
				+    var valueT = 0, valueL = 0;
			
 
				+    do {
			
 
				+      valueT += element.offsetTop  || 0;
			
 
				+      valueL += element.offsetLeft || 0;
			
 
				+      element = element.offsetParent;
			
 
				+    } while (element);
			
 
				+    return [valueL, valueT];
			
 
				+  },
			
 
				+
			
 
				+  // caches x/y coordinate pair to use with overlap
			
 
				+  within: function(element, x, y) {
			
 
				+    if (this.includeScrollOffsets)
			
 
				+      return this.withinIncludingScrolloffsets(element, x, y);
			
 
				+    this.xcomp = x;
			
 
				+    this.ycomp = y;
			
 
				+    this.offset = this.cumulativeOffset(element);
			
 
				+
			
 
				+    return (y >= this.offset[1] &&
			
 
				+            y <  this.offset[1] + element.offsetHeight &&
			
 
				+            x >= this.offset[0] && 
			
 
				+            x <  this.offset[0] + element.offsetWidth);
			
 
				+  },
			
 
				+
			
 
				+  withinIncludingScrolloffsets: function(element, x, y) {
			
 
				+    var offsetcache = this.realOffset(element);
			
 
				+
			
 
				+    this.xcomp = x + offsetcache[0] - this.deltaX;
			
 
				+    this.ycomp = y + offsetcache[1] - this.deltaY;
			
 
				+    this.offset = this.cumulativeOffset(element);
			
 
				+
			
 
				+    return (this.ycomp >= this.offset[1] &&
			
 
				+            this.ycomp <  this.offset[1] + element.offsetHeight &&
			
 
				+            this.xcomp >= this.offset[0] && 
			
 
				+            this.xcomp <  this.offset[0] + element.offsetWidth);
			
 
				+  },
			
 
				+
			
 
				+  // within must be called directly before
			
 
				+  overlap: function(mode, element) {  
			
 
				+    if (!mode) return 0;  
			
 
				+    if (mode == 'vertical') 
			
 
				+      return ((this.offset[1] + element.offsetHeight) - this.ycomp) / 
			
 
				+        element.offsetHeight;
			
 
				+    if (mode == 'horizontal')
			
 
				+      return ((this.offset[0] + element.offsetWidth) - this.xcomp) / 
			
 
				+        element.offsetWidth;
			
 
				+  },
			
 
				+
			
 
				+  clone: function(source, target) {
			
 
				+    source = $(source);
			
 
				+    target = $(target);
			
 
				+    target.style.position = 'absolute';
			
 
				+    var offsets = this.cumulativeOffset(source);
			
 
				+    target.style.top    = offsets[1] + 'px';
			
 
				+    target.style.left   = offsets[0] + 'px';
			
 
				+    target.style.width  = source.offsetWidth + 'px';
			
 
				+    target.style.height = source.offsetHeight + 'px';
			
 
				+  }
			
 
				+}
			
--- a/zookeeper-docs/src/main/resources/markdown/skin/screen.css
+++ b/zookeeper-docs/src/main/resources/markdown/skin/screen.css
@@ -0,0 +1,531 @@
 
				+/*
			
 
				+* Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+* contributor license agreements.  See the NOTICE file distributed with
			
 
				+* this work for additional information regarding copyright ownership.
			
 
				+* The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+* (the "License"); you may not use this file except in compliance with
			
 
				+* the License.  You may obtain a copy of the License at
			
 
				+*
			
 
				+*     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+*
			
 
				+* Unless required by applicable law or agreed to in writing, software
			
 
				+* distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+* See the License for the specific language governing permissions and
			
 
				+* limitations under the License.
			
 
				+*/
			
 
				+body {  margin: 0px 0px 0px 0px; font-family: Verdana, Helvetica, sans-serif; }
			
 
				+
			
 
				+h1     { font-size : 160%; margin: 0px 0px 0px 0px;  padding: 0px; }
			
 
				+h2     { font-size : 140%; margin: 1em 0px 0.8em 0px; padding: 0px; font-weight : bold;}
			
 
				+h3     { font-size : 130%; margin: 0.8em 0px 0px 0px; padding: 0px; font-weight : bold; }
			
 
				+.h3 { margin: 22px 0px 3px 0px; }
			
 
				+h4     { font-size : 120%; margin: 0.7em 0px 0px 0px; padding: 0px; font-weight : normal; text-align: left; }
			
 
				+.h4 { margin: 18px 0px 0px 0px; }
			
 
				+h4.faq { font-size : 120%; margin: 18px 0px 0px 0px; padding: 0px; font-weight : bold;   text-align: left; }
			
 
				+h5     { font-size : 100%; margin: 14px 0px 0px 0px; padding: 0px; font-weight : normal; text-align: left; }
			
 
				+
			
 
				+/**
			
 
				+* table
			
 
				+*/
			
 
				+table .title { background-color: #000000; }
			
 
				+.ForrestTable         {
			
 
				+    color: #ffffff;
			
 
				+    background-color: #7099C5;
			
 
				+    width: 100%;
			
 
				+    font-size : 100%;
			
 
				+    empty-cells: show;
			
 
				+}
			
 
				+table caption {
			
 
				+    padding-left: 5px;
			
 
				+    color: white;
			
 
				+    text-align: left;
			
 
				+    font-weight: bold;
			
 
				+    background-color: #000000;
			
 
				+}
			
 
				+.ForrestTable td {
			
 
				+    color: black;
			
 
				+    background-color: #f0f0ff;
			
 
				+}
			
 
				+.ForrestTable th { text-align: center; }
			
 
				+/**
			
 
				+ * Page Header
			
 
				+ */
			
 
				+
			
 
				+#top {
			
 
				+    position: relative;
			
 
				+    float: left;
			
 
				+    width: 100%;
			
 
				+    background: #294563; /* if you want a background in the header, put it here */
			
 
				+}
			
 
				+
			
 
				+#top .breadtrail {
			
 
				+    background: #CFDCED;
			
 
				+    color: black;
			
 
				+    border-bottom: solid 1px white;
			
 
				+    padding: 3px 10px;
			
 
				+    font-size: 75%;
			
 
				+}
			
 
				+#top .breadtrail a { color: black; }
			
 
				+
			
 
				+#top .header {
			
 
				+    float: left;
			
 
				+    width: 100%;
			
 
				+    background: url("header_white_line.gif") repeat-x bottom;
			
 
				+}
			
 
				+
			
 
				+#top .grouplogo {
			
 
				+    padding: 7px 0 10px 10px;
			
 
				+    float: left;
			
 
				+    text-align: left;
			
 
				+}
			
 
				+#top .projectlogo {
			
 
				+    padding: 7px 0 10px 10px;
			
 
				+    float: left;
			
 
				+    width: 33%;
			
 
				+    text-align: right;
			
 
				+}
			
 
				+#top .projectlogoA1 {
			
 
				+    padding: 7px 0 10px 10px;
			
 
				+    float: right;
			
 
				+}
			
 
				+html>body #top .searchbox {
			
 
				+    bottom: 0px;
			
 
				+}
			
 
				+#top .searchbox {
			
 
				+    position: absolute;
			
 
				+    right: 10px;
			
 
				+    height: 42px;
			
 
				+    font-size: 70%;
			
 
				+    white-space: nowrap;
			
 
				+    bottom: -1px; /* compensate for IE rendering issue */
			
 
				+    border-radius: 5px 5px 0px 0px;
			
 
				+}
			
 
				+
			
 
				+#top .searchbox form {
			
 
				+    padding: 5px 10px;
			
 
				+    margin: 0;
			
 
				+}
			
 
				+#top .searchbox p {
			
 
				+    padding: 0 0 2px 0;
			
 
				+    margin: 0;
			
 
				+}
			
 
				+#top .searchbox input {
			
 
				+    font-size: 100%;
			
 
				+}
			
 
				+
			
 
				+#tabs {
			
 
				+    clear: both;
			
 
				+    padding-left: 10px;
			
 
				+    margin: 0;
			
 
				+    list-style: none;
			
 
				+}
			
 
				+
			
 
				+#tabs li {
			
 
				+    float: left;
			
 
				+    margin: 0 3px 0 0;
			
 
				+    padding: 0;
			
 
				+    border-radius: 5px 5px 0px 0px;
			
 
				+}
			
 
				+
			
 
				+/*background: url("tab-left.gif") no-repeat left top;*/
			
 
				+#tabs li a {
			
 
				+    float: left;
			
 
				+    display: block;
			
 
				+    font-family: verdana, arial, sans-serif;
			
 
				+    text-decoration: none;
			
 
				+    color: black;
			
 
				+    white-space: nowrap;
			
 
				+    padding: 5px 15px 4px;
			
 
				+    width: .1em; /* IE/Win fix */
			
 
				+}
			
 
				+
			
 
				+#tabs li a:hover {
			
 
				+   
			
 
				+    cursor: pointer;
			
 
				+    text-decoration:underline;
			
 
				+}
			
 
				+
			
 
				+#tabs > li a { width: auto; } /* Rest of IE/Win fix */
			
 
				+
			
 
				+/* Commented Backslash Hack hides rule from IE5-Mac \*/
			
 
				+#tabs a { float: none; }
			
 
				+/* End IE5-Mac hack */
			
 
				+
			
 
				+#top .header .current {
			
 
				+    background-color: #4C6C8F;
			
 
				+}
			
 
				+#top .header .current a {
			
 
				+    font-weight: bold;
			
 
				+    padding-bottom: 5px;
			
 
				+    color: white;
			
 
				+}
			
 
				+#publishedStrip {
			
 
				+    padding-right: 10px;
			
 
				+    padding-left: 20px;
			
 
				+    padding-top: 3px;
			
 
				+    padding-bottom:3px;
			
 
				+    color: #ffffff;
			
 
				+    font-size : 60%;
			
 
				+    font-weight: bold;
			
 
				+    background-color: #4C6C8F;
			
 
				+    text-align:right;
			
 
				+}
			
 
				+
			
 
				+#level2tabs {
			
 
				+margin: 0;
			
 
				+float:left;
			
 
				+position:relative;
			
 
				+
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+#level2tabs  a:hover {
			
 
				+   
			
 
				+    cursor: pointer;
			
 
				+    text-decoration:underline;
			
 
				+    
			
 
				+}
			
 
				+
			
 
				+#level2tabs  a{
			
 
				+   
			
 
				+    cursor: pointer;
			
 
				+    text-decoration:none;
			
 
				+    background-image: url('chapter.gif');
			
 
				+    background-repeat: no-repeat;
			
 
				+    background-position: center left;
			
 
				+    padding-left: 6px;
			
 
				+    margin-left: 6px;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+*    border-top: solid #4C6C8F 15px;
			
 
				+*/
			
 
				+#main {
			
 
				+    position: relative;
			
 
				+    background: white;
			
 
				+    clear:both;
			
 
				+}
			
 
				+#main .breadtrail {
			
 
				+    clear:both;
			
 
				+    position: relative;
			
 
				+    background: #CFDCED;
			
 
				+    color: black;
			
 
				+    border-bottom: solid 1px black;
			
 
				+    border-top: solid 1px black;
			
 
				+    padding: 0px 180px;
			
 
				+    font-size: 75%;
			
 
				+    z-index:10;
			
 
				+}
			
 
				+
			
 
				+img.corner {
			
 
				+   width: 15px;
			
 
				+   height: 15px;
			
 
				+   border: none;
			
 
				+   display: block !important;
			
 
				+}
			
 
				+
			
 
				+img.cornersmall {
			
 
				+   width: 5px;
			
 
				+   height: 5px;
			
 
				+   border: none;
			
 
				+   display: block !important;
			
 
				+}
			
 
				+/**
			
 
				+ * Side menu
			
 
				+ */
			
 
				+#menu a {  font-weight: normal; text-decoration: none;}
			
 
				+#menu a:visited {  font-weight: normal; }
			
 
				+#menu a:active {  font-weight: normal; }
			
 
				+#menu a:hover {  font-weight: normal;  text-decoration:underline;}
			
 
				+
			
 
				+#menuarea { width:10em;}
			
 
				+#menu {
			
 
				+    position: relative;
			
 
				+    float: left;
			
 
				+    width: 160px;
			
 
				+    padding-top: 0px;
			
 
				+    padding-bottom: 15px;
			
 
				+    top:-18px;
			
 
				+    left:10px;
			
 
				+    z-index: 20;
			
 
				+    background-color: #f90;
			
 
				+    font-size : 70%;
			
 
				+    border-radius: 0px 0px 15px 15px;
			
 
				+}
			
 
				+
			
 
				+.menutitle {
			
 
				+        cursor:pointer;
			
 
				+        padding: 3px 12px;
			
 
				+        margin-left: 10px;
			
 
				+        background-image: url('chapter.gif');
			
 
				+        background-repeat: no-repeat;
			
 
				+        background-position: center left;
			
 
				+        font-weight : bold;
			
 
				+}
			
 
				+
			
 
				+.menutitle.selected {
			
 
				+        background-image: url('chapter_open.gif');
			
 
				+}
			
 
				+
			
 
				+.menutitle:hover{text-decoration:underline;cursor: pointer;}
			
 
				+
			
 
				+#menu .menuitemgroup {
			
 
				+        margin: 0px 0px 6px 8px;
			
 
				+        padding: 0px;
			
 
				+        font-weight : bold; }
			
 
				+
			
 
				+#menu .selectedmenuitemgroup{
			
 
				+        margin: 0px 0px 0px 8px;
			
 
				+        padding: 0px;
			
 
				+        font-weight : normal; 
			
 
				+       
			
 
				+        }
			
 
				+
			
 
				+#menu .menuitem {
			
 
				+        padding: 2px 0px 1px 13px;
			
 
				+        background-image: url('page.gif');
			
 
				+        background-repeat: no-repeat;
			
 
				+        background-position: center left;
			
 
				+        font-weight : normal;
			
 
				+        margin-left: 10px;
			
 
				+}
			
 
				+
			
 
				+#menu .selected {
			
 
				+        font-style : normal;
			
 
				+        margin-right: 10px;
			
 
				+         
			
 
				+}
			
 
				+.menuitem .selected {
			
 
				+        border-style: solid;
			
 
				+        border-width: 1px;
			
 
				+}
			
 
				+#menu .menupageitemgroup {
			
 
				+        padding: 3px 0px 4px 6px;
			
 
				+        font-style : normal;
			
 
				+        border-bottom: 1px solid ;
			
 
				+        border-left: 1px solid ;
			
 
				+        border-right: 1px solid ;
			
 
				+        margin-right: 10px;
			
 
				+}
			
 
				+#menu .menupageitem {
			
 
				+        font-style : normal;
			
 
				+        font-weight : normal;
			
 
				+        border-width: 0px;
			
 
				+        font-size : 90%;
			
 
				+}
			
 
				+#menu .searchbox {
			
 
				+    text-align: center;
			
 
				+}
			
 
				+#menu .searchbox form {
			
 
				+    padding: 3px 3px;
			
 
				+    margin: 0;
			
 
				+}
			
 
				+#menu .searchbox input {
			
 
				+    font-size: 100%;
			
 
				+}
			
 
				+
			
 
				+#content {
			
 
				+    padding: 20px 20px 20px 180px;
			
 
				+    margin: 0;
			
 
				+    font : small Verdana, Helvetica, sans-serif;
			
 
				+    font-size : 80%;
			
 
				+}
			
 
				+
			
 
				+#content ul {
			
 
				+    margin: 0;
			
 
				+    padding: 0 25px;
			
 
				+}
			
 
				+#content li {
			
 
				+    padding: 0 5px;
			
 
				+}
			
 
				+#feedback {
			
 
				+    color: black;
			
 
				+    background: #CFDCED;
			
 
				+    text-align:center;
			
 
				+    margin-top: 5px;
			
 
				+}
			
 
				+#feedback #feedbackto {
			
 
				+    font-size: 90%;
			
 
				+    color: black;
			
 
				+}
			
 
				+#footer {
			
 
				+    clear: both;
			
 
				+    position: relative; /* IE bugfix (http://www.dracos.co.uk/web/css/ie6floatbug/) */
			
 
				+    width: 100%;
			
 
				+    background: #CFDCED;
			
 
				+    border-top: solid 1px #4C6C8F;
			
 
				+    color: black;
			
 
				+}
			
 
				+#footer .copyright {
			
 
				+    position: relative; /* IE bugfix cont'd */
			
 
				+    padding: 5px;
			
 
				+    margin: 0;
			
 
				+    width: 60%;
			
 
				+}
			
 
				+#footer .lastmodified {
			
 
				+    position: relative; /* IE bugfix cont'd */
			
 
				+    float: right;
			
 
				+    width: 30%;
			
 
				+    padding: 5px;
			
 
				+    margin: 0;
			
 
				+    text-align: right;
			
 
				+}
			
 
				+#footer a { color: white; }
			
 
				+
			
 
				+#footer #logos {
			
 
				+    text-align: left;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/**
			
 
				+ * Misc Styles
			
 
				+ */
			
 
				+
			
 
				+acronym { cursor: help; }
			
 
				+.boxed      { background-color: #a5b6c6;}
			
 
				+.underlined_5     {border-bottom: solid 5px #4C6C8F;}
			
 
				+.underlined_10     {border-bottom: solid 10px #4C6C8F;}
			
 
				+/* ==================== snail trail ============================ */
			
 
				+
			
 
				+.trail {
			
 
				+  position: relative; /* IE bugfix cont'd */
			
 
				+  font-size: 70%;
			
 
				+  text-align: right;
			
 
				+  float: right;
			
 
				+  margin: -10px 5px 0px 5px;
			
 
				+  padding: 0;
			
 
				+}
			
 
				+
			
 
				+#motd-area {
			
 
				+    position:relative;
			
 
				+    float:right;
			
 
				+    width: 35%;
			
 
				+    background-color: #f0f0ff;
			
 
				+    border: solid 1px #4C6C8F;
			
 
				+    margin: 0px 0px 10px 10px;
			
 
				+    padding: 5px;
			
 
				+}
			
 
				+
			
 
				+#minitoc-area {
			
 
				+    border-top: solid 1px #4C6C8F;
			
 
				+    border-bottom: solid 1px #4C6C8F;
			
 
				+    margin: 15px 10% 5px 15px;
			
 
				+   /* margin-bottom: 15px;
			
 
				+    margin-left: 15px;
			
 
				+    margin-right: 10%;*/
			
 
				+    padding-bottom: 7px;
			
 
				+    padding-top: 5px;
			
 
				+}
			
 
				+.minitoc {
			
 
				+    list-style-image: url('current.gif');
			
 
				+    font-weight: normal;
			
 
				+}
			
 
				+
			
 
				+.abstract{
			
 
				+    text-align:justify;
			
 
				+    }
			
 
				+
			
 
				+li p {
			
 
				+    margin: 0;
			
 
				+    padding: 0;
			
 
				+}
			
 
				+
			
 
				+.pdflink {
			
 
				+    position: relative; /* IE bugfix cont'd */
			
 
				+    float: right;
			
 
				+    margin: 0px 5px;
			
 
				+    padding: 0;
			
 
				+}
			
 
				+.pdflink br {
			
 
				+    margin-top: -10px;
			
 
				+    padding-left: 1px;
			
 
				+}
			
 
				+.pdflink a {
			
 
				+    display: block;
			
 
				+    font-size: 70%;
			
 
				+    text-align: center;
			
 
				+    margin: 0;
			
 
				+    padding: 0;
			
 
				+}
			
 
				+
			
 
				+.pdflink img {
			
 
				+    display: block;
			
 
				+    height: 16px;
			
 
				+    width: 16px;
			
 
				+}
			
 
				+.xmllink {
			
 
				+    position: relative; /* IE bugfix cont'd */
			
 
				+    float: right;
			
 
				+    margin: 0px 5px;
			
 
				+    padding: 0;
			
 
				+}
			
 
				+.xmllink br {
			
 
				+    margin-top: -10px;
			
 
				+    padding-left: 1px;
			
 
				+}
			
 
				+.xmllink a {
			
 
				+    display: block;
			
 
				+    font-size: 70%;
			
 
				+    text-align: center;
			
 
				+    margin: 0;
			
 
				+    padding: 0;
			
 
				+}
			
 
				+
			
 
				+.xmllink img {
			
 
				+    display: block;
			
 
				+    height: 16px;
			
 
				+    width: 16px;
			
 
				+}
			
 
				+.podlink {
			
 
				+    position: relative; /* IE bugfix cont'd */
			
 
				+    float: right;
			
 
				+    margin: 0px 5px;
			
 
				+    padding: 0;
			
 
				+}
			
 
				+.podlink br {
			
 
				+    margin-top: -10px;
			
 
				+    padding-left: 1px;
			
 
				+}
			
 
				+.podlink a {
			
 
				+    display: block;
			
 
				+    font-size: 70%;
			
 
				+    text-align: center;
			
 
				+    margin: 0;
			
 
				+    padding: 0;
			
 
				+}
			
 
				+
			
 
				+.podlink img {
			
 
				+    display: block;
			
 
				+    height: 16px;
			
 
				+    width: 16px;
			
 
				+}
			
 
				+
			
 
				+.printlink {
			
 
				+    position: relative; /* IE bugfix cont'd */
			
 
				+    float: right;
			
 
				+}
			
 
				+.printlink br {
			
 
				+    margin-top: -10px;
			
 
				+    padding-left: 1px;
			
 
				+}
			
 
				+.printlink a {
			
 
				+    display: block;
			
 
				+    font-size: 70%;
			
 
				+    text-align: center;
			
 
				+    margin: 0;
			
 
				+    padding: 0;
			
 
				+}
			
 
				+.printlink img {
			
 
				+    display: block;
			
 
				+    height: 16px;
			
 
				+    width: 16px;
			
 
				+}
			
 
				+
			
 
				+p.instruction {
			
 
				+  display: list-item;
			
 
				+  list-style-image: url('../instruction_arrow.png');
			
 
				+  list-style-position: outside;
			
 
				+  margin-left: 2em;
			
 
				+} 
			
--- a/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md
+++ b/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md
@@ -0,0 +1,1575 @@
 
				+<!--
			
 
				+Copyright 2002-2004 The Apache Software Foundation
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+//-->
			
 
				+
			
 
				+# ZooKeeper Administrator's Guide
			
 
				+
			
 
				+### A Guide to Deployment and Administration
			
 
				+
			
 
				+* [Deployment](#ch_deployment)
			
 
				+    * [System Requirements](#sc_systemReq)
			
 
				+        * [Supported Platforms](#sc_supportedPlatforms)
			
 
				+        * [Required Software](#sc_requiredSoftware)
			
 
				+    * [Clustered (Multi-Server) Setup](#sc_zkMulitServerSetup)
			
 
				+    * [Single Server and Developer Setup](#sc_singleAndDevSetup)
			
 
				+* [Administration](#ch_administration)
			
 
				+    * [Designing a ZooKeeper Deployment](#sc_designing)
			
 
				+        * [Cross Machine Requirements](#sc_CrossMachineRequirements)
			
 
				+        * [Single Machine Requirements](#Single+Machine+Requirements)
			
 
				+    * [Provisioning](#sc_provisioning)
			
 
				+    * [Things to Consider: ZooKeeper Strengths and Limitations](#sc_strengthsAndLimitations)
			
 
				+    * [Administering](#sc_administering)
			
 
				+    * [Maintenance](#sc_maintenance)
			
 
				+        * [Ongoing Data Directory Cleanup](#Ongoing+Data+Directory+Cleanup)
			
 
				+        * [Debug Log Cleanup (log4j)](#Debug+Log+Cleanup+%28log4j%29)
			
 
				+    * [Supervision](#sc_supervision)
			
 
				+    * [Monitoring](#sc_monitoring)
			
 
				+    * [Logging](#sc_logging)
			
 
				+    * [Troubleshooting](#sc_troubleshooting)
			
 
				+    * [Configuration Parameters](#sc_configuration)
			
 
				+        * [Minimum Configuration](#sc_minimumConfiguration)
			
 
				+        * [Advanced Configuration](#sc_advancedConfiguration)
			
 
				+        * [Cluster Options](#sc_clusterOptions)
			
 
				+        * [Encryption, Authentication, Authorization Options](#sc_authOptions)
			
 
				+        * [Experimental Options/Features](#Experimental+Options%2FFeatures)
			
 
				+        * [Unsafe Options](#Unsafe+Options)
			
 
				+        * [Disabling data directory autocreation](#Disabling+data+directory+autocreation)
			
 
				+        * [Enabling db existence validation](#sc_db_existence_validation)
			
 
				+        * [Performance Tuning Options](#sc_performance_options)
			
 
				+        * [Communication using the Netty framework](#Communication+using+the+Netty+framework)
			
 
				+        * [AdminServer configuration](#sc_adminserver_config)
			
 
				+    * [ZooKeeper Commands](#sc_zkCommands)
			
 
				+        * [The Four Letter Words](#sc_4lw)
			
 
				+        * [The AdminServer](#sc_adminserver)
			
 
				+    * [Data File Management](#sc_dataFileManagement)
			
 
				+        * [The Data Directory](#The+Data+Directory)
			
 
				+        * [The Log Directory](#The+Log+Directory)
			
 
				+        * [File Management](#sc_filemanagement)
			
 
				+        * [Recovery - TxnLogToolkit](#Recovery+-+TxnLogToolkit)
			
 
				+    * [Things to Avoid](#sc_commonProblems)
			
 
				+    * [Best Practices](#sc_bestPractices)
			
 
				+
			
 
				+<a name="ch_deployment"></a>
			
 
				+
			
 
				+## Deployment
			
 
				+
			
 
				+This section contains information about deploying Zookeeper and
			
 
				+covers these topics:
			
 
				+
			
 
				+* [System Requirements](#sc_systemReq)
			
 
				+* [Clustered (Multi-Server) Setup](#sc_zkMulitServerSetup)
			
 
				+* [Single Server and Developer Setup](#sc_singleAndDevSetup)
			
 
				+
			
 
				+The first two sections assume you are interested in installing
			
 
				+ZooKeeper in a production environment such as a datacenter. The final
			
 
				+section covers situations in which you are setting up ZooKeeper on a
			
 
				+limited basis - for evaluation, testing, or development - but not in a
			
 
				+production environment.
			
 
				+
			
 
				+<a name="sc_systemReq"></a>
			
 
				+
			
 
				+### System Requirements
			
 
				+
			
 
				+<a name="sc_supportedPlatforms"></a>
			
 
				+
			
 
				+#### Supported Platforms
			
 
				+
			
 
				+ZooKeeper consists of multiple components.  Some components are
			
 
				+supported broadly, and other components are supported only on a smaller
			
 
				+set of platforms.
			
 
				+
			
 
				+* **Client** is the Java client
			
 
				+  library, used by applications to connect to a ZooKeeper ensemble.
			
 
				+* **Server** is the Java server
			
 
				+  that runs on the ZooKeeper ensemble nodes.
			
 
				+* **Native Client** is a client
			
 
				+  implemented in C, similar to the Java client, used by applications
			
 
				+  to connect to a ZooKeeper ensemble.
			
 
				+* **Contrib** refers to multiple
			
 
				+  optional add-on components.
			
 
				+
			
 
				+The following matrix describes the level of support committed for
			
 
				+running each component on different operating system platforms.
			
 
				+
			
 
				+##### Support Matrix
			
 
				+
			
 
				+| Operating System | Client | Server | Native Client | Contrib |
			
 
				+|------------------|--------|--------|---------------|---------|
			
 
				+| GNU/Linux | Development and Production | Development and Production | Development and Production | Development and Production |
			
 
				+| Solaris | Development and Production | Development and Production | Not Supported | Not Supported |
			
 
				+| FreeBSD | Development and Production | Development and Production | Not Supported | Not Supported |
			
 
				+| Windows | Development and Production | Development and Production | Not Supported | Not Supported |
			
 
				+| Mac OS X | Development Only | Development Only | Not Supported | Not Supported |
			
 
				+
			
 
				+For any operating system not explicitly mentioned as supported in
			
 
				+the matrix, components may or may not work.  The ZooKeeper community
			
 
				+will fix obvious bugs that are reported for other platforms, but there
			
 
				+is no full support.
			
 
				+
			
 
				+<a name="sc_requiredSoftware"></a>
			
 
				+
			
 
				+#### Required Software
			
 
				+
			
 
				+ZooKeeper runs in Java, release 1.7 or greater (JDK 7 or
			
 
				+greater, FreeBSD support requires openjdk7).  It runs as an
			
 
				+_ensemble_ of ZooKeeper servers. Three
			
 
				+ZooKeeper servers is the minimum recommended size for an
			
 
				+ensemble, and we also recommend that they run on separate
			
 
				+machines. At Yahoo!, ZooKeeper is usually deployed on
			
 
				+dedicated RHEL boxes, with dual-core processors, 2GB of RAM,
			
 
				+and 80GB IDE hard drives.
			
 
				+
			
 
				+<a name="sc_zkMulitServerSetup"></a>
			
 
				+
			
 
				+### Clustered (Multi-Server) Setup
			
 
				+
			
 
				+For reliable ZooKeeper service, you should deploy ZooKeeper in a
			
 
				+cluster known as an _ensemble_. As long as a majority
			
 
				+of the ensemble are up, the service will be available. Because Zookeeper
			
 
				+requires a majority, it is best to use an
			
 
				+odd number of machines. For example, with four machines ZooKeeper can
			
 
				+only handle the failure of a single machine; if two machines fail, the
			
 
				+remaining two machines do not constitute a majority. However, with five
			
 
				+machines ZooKeeper can handle the failure of two machines.
			
 
				+
			
 
				+######Note
			
 
				+>As mentioned in the
			
 
				+[ZooKeeper Getting Started Guide](zookeeperStarted.html)
			
 
				+, a minimum of three servers are required for a fault tolerant
			
 
				+clustered setup, and it is strongly recommended that you have an
			
 
				+odd number of servers.
			
 
				+
			
 
				+>Usually three servers is more than enough for a production
			
 
				+install, but for maximum reliability during maintenance, you may
			
 
				+wish to install five servers. With three servers, if you perform
			
 
				+maintenance on one of them, you are vulnerable to a failure on one
			
 
				+of the other two servers during that maintenance. If you have five
			
 
				+of them running, you can take one down for maintenance, and know
			
 
				+that you're still OK if one of the other four suddenly fails.
			
 
				+
			
 
				+>Your redundancy considerations should include all aspects of
			
 
				+your environment. If you have three ZooKeeper servers, but their
			
 
				+network cables are all plugged into the same network switch, then
			
 
				+the failure of that switch will take down your entire ensemble.
			
 
				+
			
 
				+Here are the steps to setting a server that will be part of an
			
 
				+ensemble. These steps should be performed on every host in the
			
 
				+ensemble:
			
 
				+
			
 
				+1. Install the Java JDK. You can use the native packaging system
			
 
				+  for your system, or download the JDK from:
			
 
				+  [http://java.sun.com/javase/downloads/index.jsp](http://java.sun.com/javase/downloads/index.jsp)
			
 
				+  
			
 
				+2. Set the Java heap size. This is very important to avoid
			
 
				+  swapping, which will seriously degrade ZooKeeper performance. To
			
 
				+  determine the correct value, use load tests, and make sure you are
			
 
				+  well below the usage limit that would cause you to swap. Be
			
 
				+  conservative - use a maximum heap size of 3GB for a 4GB
			
 
				+  machine.
			
 
				+  
			
 
				+3. Install the ZooKeeper Server Package. It can be downloaded
			
 
				+  from:
			
 
				+  [http://zookeeper.apache.org/releases.html](http://zookeeper.apache.org/releases.html)
			
 
				+  
			
 
				+4. Create a configuration file. This file can be called anything.
			
 
				+  Use the following settings as a starting point:
			
 
				+
			
 
				+        tickTime=2000
			
 
				+        dataDir=/var/lib/zookeeper/
			
 
				+        clientPort=2181
			
 
				+        initLimit=5
			
 
				+        syncLimit=2
			
 
				+        server.1=zoo1:2888:3888
			
 
				+        server.2=zoo2:2888:3888
			
 
				+        server.3=zoo3:2888:3888
			
 
				+
			
 
				+     You can find the meanings of these and other configuration
			
 
				+  settings in the section [Configuration Parameters](#sc_configuration). A word
			
 
				+  though about a few here:
			
 
				+  Every machine that is part of the ZooKeeper ensemble should know
			
 
				+  about every other machine in the ensemble. You accomplish this with
			
 
				+  the series of lines of the form **server.id=host:port:port**. The parameters **host** and **port** are straightforward. You attribute the
			
 
				+  server id to each machine by creating a file named
			
 
				+  *myid*, one for each server, which resides in
			
 
				+  that server's data directory, as specified by the configuration file
			
 
				+  parameter **dataDir**.
			
 
				+  
			
 
				+5. The myid file
			
 
				+  consists of a single line containing only the text of that machine's
			
 
				+  id. So *myid* of server 1 would contain the text
			
 
				+  "1" and nothing else. The id must be unique within the
			
 
				+  ensemble and should have a value between 1 and 255.
			
 
				+  **IMPORTANT:** if you enable extended features such
			
 
				+   as TTL Nodes (see below) the id must be between 1 
			
 
				+   and 254 due to internal limitations.
			
 
				+  
			
 
				+6. Create an initialization marker file *initialize*
			
 
				+  in the same directory as *myid*. This file indicates
			
 
				+  that an empty data directory is expected. When present, an empty data base
			
 
				+  is created and the marker file deleted. When not present, an empty data
			
 
				+  directory will mean this peer will not have voting rights and it will not
			
 
				+  populate the data directory until it communicates with an active leader.
			
 
				+  Intended use is to only create this file when bringing up a new
			
 
				+  ensemble.
			
 
				+  
			
 
				+7. If your configuration file is set up, you can start a
			
 
				+  ZooKeeper server:
			
 
				+  
			
 
				+        $ java -cp zookeeper.jar:lib/slf4j-api-1.7.5.jar:lib/slf4j-log4j12-1.7.5.jar:lib/log4j-1.2.17.jar:conf \\
			
 
				+        org.apache.zookeeper.server.quorum.QuorumPeerMain zoo.cfg
			
 
				+       
			
 
				+  QuorumPeerMain starts a ZooKeeper server,
			
 
				+  [JMX](http://java.sun.com/javase/technologies/core/mntr-mgmt/javamanagement/)
			
 
				+  management beans are also registered which allows
			
 
				+  management through a JMX management console.
			
 
				+  The [ZooKeeper JMX
			
 
				+  document](zookeeperJMX.html) contains details on managing ZooKeeper with JMX.
			
 
				+  See the script _bin/zkServer.sh_,
			
 
				+  which is included in the release, for an example
			
 
				+  of starting server instances.
			
 
				+8. Test your deployment by connecting to the hosts:
			
 
				+  In Java, you can run the following command to execute
			
 
				+  simple operations:
			
 
				+  
			
 
				+        $ bin/zkCli.sh -server 127.0.0.1:2181
			
 
				+
			
 
				+<a name="sc_singleAndDevSetup"></a>
			
 
				+
			
 
				+### Single Server and Developer Setup
			
 
				+
			
 
				+If you want to setup ZooKeeper for development purposes, you will
			
 
				+probably want to setup a single server instance of ZooKeeper, and then
			
 
				+install either the Java or C client-side libraries and bindings on your
			
 
				+development machine.
			
 
				+
			
 
				+The steps to setting up a single server instance are the similar
			
 
				+to the above, except the configuration file is simpler. You can find the
			
 
				+complete instructions in the [Installing and
			
 
				+Running ZooKeeper in Single Server Mode](zookeeperStarted.html#sc_InstallingSingleMode) section of the [ZooKeeper Getting Started
			
 
				+Guide](zookeeperStarted.html).
			
 
				+
			
 
				+For information on installing the client side libraries, refer to
			
 
				+the [Bindings](zookeeperProgrammers.html#ch_bindings)
			
 
				+section of the [ZooKeeper
			
 
				+Programmer's Guide](zookeeperProgrammers.html).
			
 
				+
			
 
				+<a name="ch_administration"></a>
			
 
				+
			
 
				+## Administration
			
 
				+
			
 
				+This section contains information about running and maintaining
			
 
				+ZooKeeper and covers these topics:
			
 
				+
			
 
				+* [Designing a ZooKeeper Deployment](#sc_designing)
			
 
				+* [Provisioning](#sc_provisioning)
			
 
				+* [Things to Consider: ZooKeeper Strengths and Limitations](#sc_strengthsAndLimitations)
			
 
				+* [Administering](#sc_administering)
			
 
				+* [Maintenance](#sc_maintenance)
			
 
				+* [Supervision](#sc_supervision)
			
 
				+* [Monitoring](#sc_monitoring)
			
 
				+* [Logging](#sc_logging)
			
 
				+* [Troubleshooting](#sc_troubleshooting)
			
 
				+* [Configuration Parameters](#sc_configuration)
			
 
				+* [ZooKeeper Commands](#sc_zkCommands)
			
 
				+* [Data File Management](#sc_dataFileManagement)
			
 
				+* [Things to Avoid](#sc_commonProblems)
			
 
				+* [Best Practices](#sc_bestPractices)
			
 
				+
			
 
				+<a name="sc_designing"></a>
			
 
				+
			
 
				+### Designing a ZooKeeper Deployment
			
 
				+
			
 
				+The reliability of ZooKeeper rests on two basic assumptions.
			
 
				+
			
 
				+1. Only a minority of servers in a deployment
			
 
				+  will fail. _Failure_ in this context
			
 
				+  means a machine crash, or some error in the network that
			
 
				+  partitions a server off from the majority.
			
 
				+1. Deployed machines operate correctly. To
			
 
				+  operate correctly means to execute code correctly, to have
			
 
				+  clocks that work properly, and to have storage and network
			
 
				+  components that perform consistently.
			
 
				+
			
 
				+The sections below contain considerations for ZooKeeper
			
 
				+administrators to maximize the probability for these assumptions
			
 
				+to hold true. Some of these are cross-machines considerations,
			
 
				+and others are things you should consider for each and every
			
 
				+machine in your deployment.
			
 
				+
			
 
				+<a name="sc_CrossMachineRequirements"></a>
			
 
				+
			
 
				+#### Cross Machine Requirements
			
 
				+
			
 
				+For the ZooKeeper service to be active, there must be a
			
 
				+majority of non-failing machines that can communicate with
			
 
				+each other. To create a deployment that can tolerate the
			
 
				+failure of F machines, you should count on deploying 2xF+1
			
 
				+machines.  Thus, a deployment that consists of three machines
			
 
				+can handle one failure, and a deployment of five machines can
			
 
				+handle two failures. Note that a deployment of six machines
			
 
				+can only handle two failures since three machines is not a
			
 
				+majority.  For this reason, ZooKeeper deployments are usually
			
 
				+made up of an odd number of machines.
			
 
				+
			
 
				+To achieve the highest probability of tolerating a failure
			
 
				+you should try to make machine failures independent. For
			
 
				+example, if most of the machines share the same switch,
			
 
				+failure of that switch could cause a correlated failure and
			
 
				+bring down the service. The same holds true of shared power
			
 
				+circuits, cooling systems, etc.
			
 
				+
			
 
				+<a name="Single+Machine+Requirements"></a>
			
 
				+
			
 
				+#### Single Machine Requirements
			
 
				+
			
 
				+If ZooKeeper has to contend with other applications for
			
 
				+access to resources like storage media, CPU, network, or
			
 
				+memory, its performance will suffer markedly.  ZooKeeper has
			
 
				+strong durability guarantees, which means it uses storage
			
 
				+media to log changes before the operation responsible for the
			
 
				+change is allowed to complete. You should be aware of this
			
 
				+dependency then, and take great care if you want to ensure
			
 
				+that ZooKeeper operations aren’t held up by your media. Here
			
 
				+are some things you can do to minimize that sort of
			
 
				+degradation:
			
 
				+
			
 
				+* ZooKeeper's transaction log must be on a dedicated
			
 
				+  device. (A dedicated partition is not enough.) ZooKeeper
			
 
				+  writes the log sequentially, without seeking Sharing your
			
 
				+  log device with other processes can cause seeks and
			
 
				+  contention, which in turn can cause multi-second
			
 
				+  delays.
			
 
				+* Do not put ZooKeeper in a situation that can cause a
			
 
				+  swap. In order for ZooKeeper to function with any sort of
			
 
				+  timeliness, it simply cannot be allowed to swap.
			
 
				+  Therefore, make certain that the maximum heap size given
			
 
				+  to ZooKeeper is not bigger than the amount of real memory
			
 
				+  available to ZooKeeper.  For more on this, see
			
 
				+  [Things to Avoid](#sc_commonProblems)
			
 
				+  below.
			
 
				+
			
 
				+<a name="sc_provisioning"></a>
			
 
				+
			
 
				+### Provisioning
			
 
				+
			
 
				+<a name="sc_strengthsAndLimitations"></a>
			
 
				+
			
 
				+### Things to Consider: ZooKeeper Strengths and Limitations
			
 
				+
			
 
				+<a name="sc_administering"></a>
			
 
				+
			
 
				+### Administering
			
 
				+
			
 
				+<a name="sc_maintenance"></a>
			
 
				+
			
 
				+### Maintenance
			
 
				+
			
 
				+Little long term maintenance is required for a ZooKeeper
			
 
				+cluster however you must be aware of the following:
			
 
				+
			
 
				+<a name="Ongoing+Data+Directory+Cleanup"></a>
			
 
				+
			
 
				+#### Ongoing Data Directory Cleanup
			
 
				+
			
 
				+The ZooKeeper [Data
			
 
				+Directory](#var_datadir) contains files which are a persistent copy
			
 
				+of the znodes stored by a particular serving ensemble. These
			
 
				+are the snapshot and transactional log files. As changes are
			
 
				+made to the znodes these changes are appended to a
			
 
				+transaction log. Occasionally, when a log grows large, a
			
 
				+snapshot of the current state of all znodes will be written
			
 
				+to the filesystem and a new transaction log file is created
			
 
				+for future transactions. During snapshotting, ZooKeeper may
			
 
				+continue appending incoming transactions to the old log file.
			
 
				+Therefore, some transactions which are newer than a snapshot
			
 
				+may be found in the last transaction log preceding the
			
 
				+snapshot.
			
 
				+
			
 
				+A ZooKeeper server **will not remove
			
 
				+old snapshots and log files** when using the default
			
 
				+configuration (see autopurge below), this is the
			
 
				+responsibility of the operator. Every serving environment is
			
 
				+different and therefore the requirements of managing these
			
 
				+files may differ from install to install (backup for example).
			
 
				+
			
 
				+The PurgeTxnLog utility implements a simple retention
			
 
				+policy that administrators can use. The [API docs](index.html) contains details on
			
 
				+calling conventions (arguments, etc...).
			
 
				+
			
 
				+In the following example the last count snapshots and
			
 
				+their corresponding logs are retained and the others are
			
 
				+deleted.  The value of <count> should typically be
			
 
				+greater than 3 (although not required, this provides 3 backups
			
 
				+in the unlikely event a recent log has become corrupted). This
			
 
				+can be run as a cron job on the ZooKeeper server machines to
			
 
				+clean up the logs daily.
			
 
				+
			
 
				+    java -cp zookeeper.jar:lib/slf4j-api-1.7.5.jar:lib/slf4j-log4j12-1.7.5.jar:lib/log4j-1.2.17.jar:conf org.apache.zookeeper.server.PurgeTxnLog <dataDir> <snapDir> -n <count>
			
 
				+
			
 
				+
			
 
				+Automatic purging of the snapshots and corresponding
			
 
				+transaction logs was introduced in version 3.4.0 and can be
			
 
				+enabled via the following configuration parameters **autopurge.snapRetainCount** and **autopurge.purgeInterval**. For more on
			
 
				+this, see [Advanced Configuration](#sc_advancedConfiguration)
			
 
				+below.
			
 
				+
			
 
				+<a name="Debug+Log+Cleanup+%28log4j%29"></a>
			
 
				+
			
 
				+#### Debug Log Cleanup (log4j)
			
 
				+
			
 
				+See the section on [logging](#sc_logging) in this document. It is
			
 
				+expected that you will setup a rolling file appender using the
			
 
				+in-built log4j feature. The sample configuration file in the
			
 
				+release tar's conf/log4j.properties provides an example of
			
 
				+this.
			
 
				+
			
 
				+<a name="sc_supervision"></a>
			
 
				+
			
 
				+### Supervision
			
 
				+
			
 
				+You will want to have a supervisory process that manages
			
 
				+each of your ZooKeeper server processes (JVM). The ZK server is
			
 
				+designed to be "fail fast" meaning that it will shutdown
			
 
				+(process exit) if an error occurs that it cannot recover
			
 
				+from. As a ZooKeeper serving cluster is highly reliable, this
			
 
				+means that while the server may go down the cluster as a whole
			
 
				+is still active and serving requests. Additionally, as the
			
 
				+cluster is "self healing" the failed server once restarted will
			
 
				+automatically rejoin the ensemble w/o any manual
			
 
				+interaction.
			
 
				+
			
 
				+Having a supervisory process such as [daemontools](http://cr.yp.to/daemontools.html) or
			
 
				+[SMF](http://en.wikipedia.org/wiki/Service\_Management\_Facility)
			
 
				+(other options for supervisory process are also available, it's
			
 
				+up to you which one you would like to use, these are just two
			
 
				+examples) managing your ZooKeeper server ensures that if the
			
 
				+process does exit abnormally it will automatically be restarted
			
 
				+and will quickly rejoin the cluster.
			
 
				+
			
 
				+It is also recommended to configure the ZooKeeper server process to
			
 
				+terminate and dump its heap if an OutOfMemoryError** occurs.  This is achieved
			
 
				+by launching the JVM with the following arguments on Linux and Windows
			
 
				+respectively.  The *zkServer.sh* and
			
 
				+*zkServer.cmd* scripts that ship with ZooKeeper set
			
 
				+these options.
			
 
				+
			
 
				+    -XX:+HeapDumpOnOutOfMemoryError -XX:OnOutOfMemoryError='kill -9 %p'
			
 
				+
			
 
				+    "-XX:+HeapDumpOnOutOfMemoryError" "-XX:OnOutOfMemoryError=cmd /c taskkill /pid %%%%p /t /f"
			
 
				+
			
 
				+<a name="sc_monitoring"></a>
			
 
				+
			
 
				+### Monitoring
			
 
				+
			
 
				+The ZooKeeper service can be monitored in one of two
			
 
				+primary ways; 1) the command port through the use of [4 letter words](#sc_zkCommands) and 2) [JMX](zookeeperJMX.html). See the appropriate section for
			
 
				+your environment/requirements.
			
 
				+
			
 
				+<a name="sc_logging"></a>
			
 
				+
			
 
				+### Logging
			
 
				+
			
 
				+ZooKeeper uses **[SLF4J](http://www.slf4j.org)**
			
 
				+version 1.7.5 as its logging infrastructure. For backward compatibility it is bound to
			
 
				+**LOG4J** but you can use
			
 
				+**[LOGBack](http://logback.qos.ch/)**
			
 
				+or any other supported logging framework of your choice.
			
 
				+
			
 
				+The ZooKeeper default *log4j.properties*
			
 
				+file resides in the *conf* directory. Log4j requires that
			
 
				+*log4j.properties* either be in the working directory
			
 
				+(the directory from which ZooKeeper is run) or be accessible from the classpath.
			
 
				+
			
 
				+For more information about SLF4J, see
			
 
				+[its manual](http://www.slf4j.org/manual.html).
			
 
				+
			
 
				+For more information about LOG4J, see
			
 
				+[Log4j Default Initialization Procedure](http://logging.apache.org/log4j/1.2/manual.html#defaultInit)
			
 
				+of the log4j manual.
			
 
				+
			
 
				+<a name="sc_troubleshooting"></a>
			
 
				+
			
 
				+### Troubleshooting
			
 
				+
			
 
				+* *Server not coming up because of file corruption* :
			
 
				+    A server might not be able to read its database and fail to come up because of
			
 
				+    some file corruption in the transaction logs of the ZooKeeper server. You will
			
 
				+    see some IOException on loading ZooKeeper database. In such a case,
			
 
				+    make sure all the other servers in your ensemble are up and  working. Use "stat"
			
 
				+    command on the command port to see if they are in good health. After you have verified that
			
 
				+    all the other servers of the ensemble are up, you can go ahead and clean the database
			
 
				+    of the corrupt server. Delete all the files in datadir/version-2 and datalogdir/version-2/.
			
 
				+    Restart the server.
			
 
				+
			
 
				+<a name="sc_configuration"></a>
			
 
				+
			
 
				+### Configuration Parameters
			
 
				+
			
 
				+ZooKeeper's behavior is governed by the ZooKeeper configuration
			
 
				+file. This file is designed so that the exact same file can be used by
			
 
				+all the servers that make up a ZooKeeper server assuming the disk
			
 
				+layouts are the same. If servers use different configuration files, care
			
 
				+must be taken to ensure that the list of servers in all of the different
			
 
				+configuration files match.
			
 
				+
			
 
				+######Note
			
 
				+>In 3.5.0 and later, some of these parameters should be placed in
			
 
				+a dynamic configuration file. If they are placed in the static
			
 
				+configuration file, ZooKeeper will automatically move them over to the
			
 
				+dynamic configuration file. See [Dynamic Reconfiguration](zookeeperReconfig.html) for more information.
			
 
				+
			
 
				+<a name="sc_minimumConfiguration"></a>
			
 
				+
			
 
				+#### Minimum Configuration
			
 
				+
			
 
				+Here are the minimum configuration keywords that must be defined
			
 
				+in the configuration file:
			
 
				+
			
 
				+* *clientPort* :
			
 
				+    the port to listen for client connections; that is, the
			
 
				+    port that clients attempt to connect to.
			
 
				+
			
 
				+* *secureClientPort* :
			
 
				+    the port to listen on for secure client connections using SSL.
			
 
				+    **clientPort** specifies
			
 
				+    the port for plaintext connections while **secureClientPort** specifies the port for SSL
			
 
				+    connections. Specifying both enables mixed-mode while omitting
			
 
				+    either will disable that mode.
			
 
				+    Note that SSL feature will be enabled when user plugs-in
			
 
				+    zookeeper.serverCnxnFactory, zookeeper.clientCnxnSocket as Netty.
			
 
				+
			
 
				+* *dataDir* :
			
 
				+    the location where ZooKeeper will store the in-memory
			
 
				+    database snapshots and, unless specified otherwise, the
			
 
				+    transaction log of updates to the database.
			
 
				+    ######Note
			
 
				+    >Be careful where you put the transaction log. A
			
 
				+    dedicated transaction log device is key to consistent good
			
 
				+    performance. Putting the log on a busy device will adversely
			
 
				+    effect performance.
			
 
				+
			
 
				+* *tickTime* :
			
 
				+    the length of a single tick, which is the basic time unit
			
 
				+    used by ZooKeeper, as measured in milliseconds. It is used to
			
 
				+    regulate heartbeats, and timeouts. For example, the minimum
			
 
				+    session timeout will be two ticks.
			
 
				+
			
 
				+<a name="sc_advancedConfiguration"></a>
			
 
				+
			
 
				+#### Advanced Configuration
			
 
				+
			
 
				+The configuration settings in the section are optional. You can
			
 
				+use them to further fine tune the behaviour of your ZooKeeper servers.
			
 
				+Some can also be set using Java system properties, generally of the
			
 
				+form _zookeeper.keyword_. The exact system
			
 
				+property, when available, is noted below.
			
 
				+
			
 
				+* *dataLogDir* :
			
 
				+    (No Java system property)
			
 
				+    This option will direct the machine to write the
			
 
				+    transaction log to the **dataLogDir** rather than the **dataDir**. This allows a dedicated log
			
 
				+    device to be used, and helps avoid competition between logging
			
 
				+    and snapshots.
			
 
				+    ######Note
			
 
				+    >Having a dedicated log device has a large impact on
			
 
				+    throughput and stable latencies. It is highly recommended to
			
 
				+    dedicate a log device and set **dataLogDir** to point to a directory on
			
 
				+    that device, and then make sure to point **dataDir** to a directory
			
 
				+    _not_ residing on that device.
			
 
				+
			
 
				+* *globalOutstandingLimit* :
			
 
				+    (Java system property: **zookeeper.globalOutstandingLimit.**)
			
 
				+    Clients can submit requests faster than ZooKeeper can
			
 
				+    process them, especially if there are a lot of clients. To
			
 
				+    prevent ZooKeeper from running out of memory due to queued
			
 
				+    requests, ZooKeeper will throttle clients so that there is no
			
 
				+    more than globalOutstandingLimit outstanding requests in the
			
 
				+    system. The default limit is 1,000.
			
 
				+
			
 
				+* *preAllocSize* :
			
 
				+    (Java system property: **zookeeper.preAllocSize**)
			
 
				+    To avoid seeks ZooKeeper allocates space in the
			
 
				+    transaction log file in blocks of preAllocSize kilobytes. The
			
 
				+    default block size is 64M. One reason for changing the size of
			
 
				+    the blocks is to reduce the block size if snapshots are taken
			
 
				+    more often. (Also, see **snapCount**).
			
 
				+
			
 
				+* *snapCount* :
			
 
				+    (Java system property: **zookeeper.snapCount**)
			
 
				+    ZooKeeper records its transactions using snapshots and
			
 
				+    a transaction log (think write-ahead log).The number of
			
 
				+    transactions recorded in the transaction log before a snapshot
			
 
				+    can be taken (and the transaction log rolled) is determined
			
 
				+    by snapCount. In order to prevent all of the machines in the quorum
			
 
				+    from taking a snapshot at the same time, each ZooKeeper server
			
 
				+    will take a snapshot when the number of transactions in the transaction log
			
 
				+    reaches a runtime generated random value in the \[snapCount/2+1, snapCount]
			
 
				+    range.The default snapCount is 100,000.
			
 
				+
			
 
				+* *maxClientCnxns* :
			
 
				+    (No Java system property)
			
 
				+    Limits the number of concurrent connections (at the socket
			
 
				+    level) that a single client, identified by IP address, may make
			
 
				+    to a single member of the ZooKeeper ensemble. This is used to
			
 
				+    prevent certain classes of DoS attacks, including file
			
 
				+    descriptor exhaustion. The default is 60. Setting this to 0
			
 
				+    entirely removes the limit on concurrent connections.
			
 
				+
			
 
				+* *clientPortAddress* :
			
 
				+    **New in 3.3.0:** the
			
 
				+    address (ipv4, ipv6 or hostname) to listen for client
			
 
				+    connections; that is, the address that clients attempt
			
 
				+    to connect to. This is optional, by default we bind in
			
 
				+    such a way that any connection to the **clientPort** for any
			
 
				+    address/interface/nic on the server will be
			
 
				+    accepted.
			
 
				+
			
 
				+* *minSessionTimeout* :
			
 
				+    (No Java system property)
			
 
				+    **New in 3.3.0:** the
			
 
				+    minimum session timeout in milliseconds that the server
			
 
				+    will allow the client to negotiate. Defaults to 2 times
			
 
				+    the **tickTime**.
			
 
				+
			
 
				+* *maxSessionTimeout* :
			
 
				+    (No Java system property)
			
 
				+    **New in 3.3.0:** the
			
 
				+    maximum session timeout in milliseconds that the server
			
 
				+    will allow the client to negotiate. Defaults to 20 times
			
 
				+    the **tickTime**.
			
 
				+
			
 
				+* *fsync.warningthresholdms* :
			
 
				+    (Java system property: **zookeeper.fsync.warningthresholdms**)
			
 
				+    **New in 3.3.4:** A
			
 
				+    warning message will be output to the log whenever an
			
 
				+    fsync in the Transactional Log (WAL) takes longer than
			
 
				+    this value. The values is specified in milliseconds and
			
 
				+    defaults to 1000. This value can only be set as a
			
 
				+    system property.
			
 
				+
			
 
				+* *autopurge.snapRetainCount* :
			
 
				+    (No Java system property)
			
 
				+    **New in 3.4.0:**
			
 
				+    When enabled, ZooKeeper auto purge feature retains
			
 
				+    the **autopurge.snapRetainCount** most
			
 
				+    recent snapshots and the corresponding transaction logs in the
			
 
				+    **dataDir** and **dataLogDir** respectively and deletes the rest.
			
 
				+    Defaults to 3. Minimum value is 3.
			
 
				+
			
 
				+* *autopurge.purgeInterval* :
			
 
				+    (No Java system property)
			
 
				+    **New in 3.4.0:** The
			
 
				+    time interval in hours for which the purge task has to
			
 
				+    be triggered. Set to a positive integer (1 and above)
			
 
				+    to enable the auto purging. Defaults to 0.
			
 
				+
			
 
				+* *syncEnabled* :
			
 
				+    (Java system property: **zookeeper.observer.syncEnabled**)
			
 
				+    **New in 3.4.6, 3.5.0:**
			
 
				+    The observers now log transaction and write snapshot to disk
			
 
				+    by default like the participants. This reduces the recovery time
			
 
				+    of the observers on restart. Set to "false" to disable this
			
 
				+    feature. Default is "true"
			
 
				+
			
 
				+<a name="sc_clusterOptions"></a>
			
 
				+
			
 
				+#### Cluster Options
			
 
				+
			
 
				+The options in this section are designed for use with an ensemble
			
 
				+of servers -- that is, when deploying clusters of servers.
			
 
				+
			
 
				+* *electionAlg* :
			
 
				+    (No Java system property)
			
 
				+    Election implementation to use. A value of "1" corresponds to the
			
 
				+    non-authenticated UDP-based version of fast leader election, "2"
			
 
				+    corresponds to the authenticated UDP-based version of fast
			
 
				+    leader election, and "3" corresponds to TCP-based version of
			
 
				+    fast leader election. Currently, algorithm 3 is the default.
			
 
				+    ######Note
			
 
				+    >The implementations of leader election 1, and 2 are now
			
 
				+    **deprecated**. We have the intention
			
 
				+    of removing them in the next release, at which point only the
			
 
				+    FastLeaderElection will be available.
			
 
				+
			
 
				+* *initLimit* :
			
 
				+    (No Java system property)
			
 
				+    Amount of time, in ticks (see [tickTime](#id_tickTime)), to allow followers to
			
 
				+    connect and sync to a leader. Increased this value as needed, if
			
 
				+    the amount of data managed by ZooKeeper is large.
			
 
				+
			
 
				+* *leaderServes* :
			
 
				+    (Java system property: zookeeper.**leaderServes**)
			
 
				+    Leader accepts client connections. Default value is "yes".
			
 
				+    The leader machine coordinates updates. For higher update
			
 
				+    throughput at the slight expense of read throughput the leader
			
 
				+    can be configured to not accept clients and focus on
			
 
				+    coordination. The default to this option is yes, which means
			
 
				+    that a leader will accept client connections.
			
 
				+    ######Note
			
 
				+    >Turning on leader selection is highly recommended when
			
 
				+    you have more than three ZooKeeper servers in an ensemble.
			
 
				+
			
 
				+* *server.x=[hostname]:nnnnn[:nnnnn], etc* :
			
 
				+    (No Java system property)
			
 
				+    servers making up the ZooKeeper ensemble. When the server
			
 
				+    starts up, it determines which server it is by looking for the
			
 
				+    file *myid* in the data directory. That file
			
 
				+    contains the server number, in ASCII, and it should match
			
 
				+    **x** in **server.x** in the left hand side of this
			
 
				+    setting.
			
 
				+    The list of servers that make up ZooKeeper servers that is
			
 
				+    used by the clients must match the list of ZooKeeper servers
			
 
				+    that each ZooKeeper server has.
			
 
				+    There are two port numbers **nnnnn**.
			
 
				+    The first followers use to connect to the leader, and the second is for
			
 
				+    leader election. If you want to test multiple servers on a single machine, then
			
 
				+    different ports can be used for each server.
			
 
				+
			
 
				+* *syncLimit* :
			
 
				+    (No Java system property)
			
 
				+    Amount of time, in ticks (see [tickTime](#id_tickTime)), to allow followers to sync
			
 
				+    with ZooKeeper. If followers fall too far behind a leader, they
			
 
				+    will be dropped.
			
 
				+
			
 
				+* *group.x=nnnnn[:nnnnn]* :
			
 
				+    (No Java system property)
			
 
				+    Enables a hierarchical quorum construction."x" is a group identifier
			
 
				+    and the numbers following the "=" sign correspond to server identifiers.
			
 
				+    The left-hand side of the assignment is a colon-separated list of server
			
 
				+    identifiers. Note that groups must be disjoint and the union of all groups
			
 
				+    must be the ZooKeeper ensemble.
			
 
				+    You will find an example [here](zookeeperHierarchicalQuorums.html)
			
 
				+
			
 
				+* *weight.x=nnnnn* :
			
 
				+    (No Java system property)
			
 
				+    Used along with "group", it assigns a weight to a server when
			
 
				+    forming quorums. Such a value corresponds to the weight of a server
			
 
				+    when voting. There are a few parts of ZooKeeper that require voting
			
 
				+    such as leader election and the atomic broadcast protocol. By default
			
 
				+    the weight of server is 1. If the configuration defines groups, but not
			
 
				+    weights, then a value of 1 will be assigned to all servers.
			
 
				+    You will find an example [here](zookeeperHierarchicalQuorums.html)
			
 
				+
			
 
				+* *cnxTimeout* :
			
 
				+    (Java system property: zookeeper.**cnxTimeout**)
			
 
				+    Sets the timeout value for opening connections for leader election notifications.
			
 
				+    Only applicable if you are using electionAlg 3.
			
 
				+    ######Note
			
 
				+    >Default value is 5 seconds.
			
 
				+
			
 
				+* *standaloneEnabled* :
			
 
				+    (No Java system property)
			
 
				+    **New in 3.5.0:**
			
 
				+    When set to false, a single server can be started in replicated
			
 
				+    mode, a lone participant can run with observers, and a cluster
			
 
				+    can reconfigure down to one node, and up from one node. The
			
 
				+    default is true for backwards compatibility. It can be set
			
 
				+    using QuorumPeerConfig's setStandaloneEnabled method or by
			
 
				+    adding "standaloneEnabled=false" or "standaloneEnabled=true"
			
 
				+    to a server's config file.
			
 
				+
			
 
				+* *reconfigEnabled* :
			
 
				+    (No Java system property)
			
 
				+    **New in 3.5.3:**
			
 
				+    This controls the enabling or disabling of
			
 
				+    [Dynamic Reconfiguration](zookeeperReconfig.html) feature. When the feature
			
 
				+    is enabled, users can perform reconfigure operations through
			
 
				+    the ZooKeeper client API or through ZooKeeper command line tools
			
 
				+    assuming users are authorized to perform such operations.
			
 
				+    When the feature is disabled, no user, including the super user,
			
 
				+    can perform a reconfiguration. Any attempt to reconfigure will return an error.
			
 
				+    **"reconfigEnabled"** option can be set as
			
 
				+    **"reconfigEnabled=false"** or
			
 
				+    **"reconfigEnabled=true"**
			
 
				+    to a server's config file, or using QuorumPeerConfig's
			
 
				+    setReconfigEnabled method. The default value is false.
			
 
				+    If present, the value should be consistent across every server in
			
 
				+    the entire ensemble. Setting the value as true on some servers and false
			
 
				+    on other servers will cause inconsistent behavior depending on which server
			
 
				+    is elected as leader. If the leader has a setting of
			
 
				+    **"reconfigEnabled=true"**, then the ensemble
			
 
				+    will have reconfig feature enabled. If the leader has a setting of
			
 
				+    **"reconfigEnabled=false"**, then the ensemble
			
 
				+    will have reconfig feature disabled. It is thus recommended to have a consistent
			
 
				+    value for **"reconfigEnabled"** across servers
			
 
				+    in the ensemble.
			
 
				+
			
 
				+* *4lw.commands.whitelist* :
			
 
				+    (Java system property: **zookeeper.4lw.commands.whitelist**)
			
 
				+    **New in 3.5.3:**
			
 
				+    A list of comma separated [Four Letter Words](#sc_4lw)
			
 
				+    commands that user wants to use. A valid Four Letter Words
			
 
				+    command must be put in this list else ZooKeeper server will
			
 
				+    not enable the command.
			
 
				+    By default the whitelist only contains "srvr" command
			
 
				+    which zkServer.sh uses. The rest of four letter word commands are disabled
			
 
				+    by default.
			
 
				+    Here's an example of the configuration that enables stat, ruok, conf, and isro
			
 
				+    command while disabling the rest of Four Letter Words command:
			
 
				+
			
 
				+        4lw.commands.whitelist=stat, ruok, conf, isro
			
 
				+
			
 
				+
			
 
				+If you really need enable all four letter word commands by default, you can use
			
 
				+the asterisk option so you don't have to include every command one by one in the list.
			
 
				+As an example, this will enable all four letter word commands:
			
 
				+
			
 
				+
			
 
				+    4lw.commands.whitelist=*
			
 
				+
			
 
				+
			
 
				+* *tcpKeepAlive* :
			
 
				+    (Java system property: **zookeeper.tcpKeepAlive**)
			
 
				+    **New in 3.5.4:**
			
 
				+    Setting this to true sets the TCP keepAlive flag on the
			
 
				+    sockets used by quorum members to perform elections.
			
 
				+    This will allow for connections between quorum members to
			
 
				+    remain up when there is network infrastructure that may
			
 
				+    otherwise break them. Some NATs and firewalls may terminate
			
 
				+    or lose state for long running or idle connections.
			
 
				+    Enabling this option relies on OS level settings to work
			
 
				+    properly, check your operating system's options regarding TCP
			
 
				+    keepalive for more information.  Defaults to
			
 
				+    **false**.
			
 
				+
			
 
				+<a name="sc_authOptions"></a>
			
 
				+
			
 
				+#### Encryption, Authentication, Authorization Options
			
 
				+
			
 
				+The options in this section allow control over
			
 
				+encryption/authentication/authorization performed by the service.
			
 
				+
			
 
				+* *DigestAuthenticationProvider.superDigest* :
			
 
				+    (Java system property: **zookeeper.DigestAuthenticationProvider.superDigest**)
			
 
				+    By default this feature is **disabled**
			
 
				+    **New in 3.2:**
			
 
				+    Enables a ZooKeeper ensemble administrator to access the
			
 
				+    znode hierarchy as a "super" user. In particular no ACL
			
 
				+    checking occurs for a user authenticated as
			
 
				+    super.
			
 
				+    org.apache.zookeeper.server.auth.DigestAuthenticationProvider
			
 
				+    can be used to generate the superDigest, call it with
			
 
				+    one parameter of "super:<password>". Provide the
			
 
				+    generated "super:<data>" as the system property value
			
 
				+    when starting each server of the ensemble.
			
 
				+    When authenticating to a ZooKeeper server (from a
			
 
				+    ZooKeeper client) pass a scheme of "digest" and authdata
			
 
				+    of "super:<password>". Note that digest auth passes
			
 
				+    the authdata in plaintext to the server, it would be
			
 
				+    prudent to use this authentication method only on
			
 
				+    localhost (not over the network) or over an encrypted
			
 
				+    connection.
			
 
				+
			
 
				+* *X509AuthenticationProvider.superUser* :
			
 
				+    (Java system property: **zookeeper.X509AuthenticationProvider.superUser**)
			
 
				+    The SSL-backed way to enable a ZooKeeper ensemble
			
 
				+    administrator to access the znode hierarchy as a "super" user.
			
 
				+    When this parameter is set to an X500 principal name, only an
			
 
				+    authenticated client with that principal will be able to bypass
			
 
				+    ACL checking and have full privileges to all znodes.
			
 
				+
			
 
				+* *zookeeper.superUser* :
			
 
				+    (Java system property: **zookeeper.superUser**)
			
 
				+    Similar to **zookeeper.X509AuthenticationProvider.superUser**
			
 
				+    but is generic for SASL based logins. It stores the name of
			
 
				+    a user that can access the znode hierarchy as a "super" user.
			
 
				+
			
 
				+* *ssl.keyStore.location and ssl.keyStore.password* :
			
 
				+    (Java system properties: **zookeeper.ssl.keyStore.location** and **zookeeper.ssl.keyStore.password**)
			
 
				+    Specifies the file path to a JKS containing the local
			
 
				+    credentials to be used for SSL connections, and the
			
 
				+    password to unlock the file.
			
 
				+
			
 
				+* *ssl.trustStore.location and ssl.trustStore.password* :
			
 
				+    (Java system properties: **zookeeper.ssl.trustStore.location** and **zookeeper.ssl.trustStore.password**)
			
 
				+    Specifies the file path to a JKS containing the remote
			
 
				+    credentials to be used for SSL connections, and the
			
 
				+    password to unlock the file.
			
 
				+
			
 
				+* *ssl.authProvider* :
			
 
				+    (Java system property: **zookeeper.ssl.authProvider**)
			
 
				+    Specifies a subclass of **org.apache.zookeeper.auth.X509AuthenticationProvider**
			
 
				+    to use for secure client authentication. This is useful in
			
 
				+    certificate key infrastructures that do not use JKS. It may be
			
 
				+    necessary to extend **javax.net.ssl.X509KeyManager** and **javax.net.ssl.X509TrustManager**
			
 
				+    to get the desired behavior from the SSL stack. To configure the
			
 
				+    ZooKeeper server to use the custom provider for authentication,
			
 
				+    choose a scheme name for the custom AuthenticationProvider and
			
 
				+    set the property **zookeeper.authProvider.[scheme]** to the fully-qualified class name of the custom
			
 
				+    implementation. This will load the provider into the ProviderRegistry.
			
 
				+    Then set this property **zookeeper.ssl.authProvider=[scheme]** and that provider
			
 
				+    will be used for secure authentication.
			
 
				+
			
 
				+<a name="Experimental+Options%2FFeatures"></a>
			
 
				+
			
 
				+#### Experimental Options/Features
			
 
				+
			
 
				+New features that are currently considered experimental.
			
 
				+
			
 
				+* *Read Only Mode Server* :
			
 
				+    (Java system property: **readonlymode.enabled**)
			
 
				+    **New in 3.4.0:**
			
 
				+    Setting this value to true enables Read Only Mode server
			
 
				+    support (disabled by default). ROM allows clients
			
 
				+    sessions which requested ROM support to connect to the
			
 
				+    server even when the server might be partitioned from
			
 
				+    the quorum. In this mode ROM clients can still read
			
 
				+    values from the ZK service, but will be unable to write
			
 
				+    values and see changes from other clients. See
			
 
				+    ZOOKEEPER-784 for more details.
			
 
				+
			
 
				+<a name="Unsafe+Options"></a>
			
 
				+
			
 
				+#### Unsafe Options
			
 
				+
			
 
				+The following options can be useful, but be careful when you use
			
 
				+them. The risk of each is explained along with the explanation of what
			
 
				+the variable does.
			
 
				+
			
 
				+* *forceSync* :
			
 
				+    (Java system property: **zookeeper.forceSync**)
			
 
				+    Requires updates to be synced to media of the transaction
			
 
				+    log before finishing processing the update. If this option is
			
 
				+    set to no, ZooKeeper will not require updates to be synced to
			
 
				+    the media.
			
 
				+
			
 
				+* *jute.maxbuffer:* :
			
 
				+    (Java system property:**jute.maxbuffer**)
			
 
				+    This option can only be set as a Java system property.
			
 
				+    There is no zookeeper prefix on it. It specifies the maximum
			
 
				+    size of the data that can be stored in a znode. The default is
			
 
				+    0xfffff, or just under 1M. If this option is changed, the system
			
 
				+    property must be set on all servers and clients otherwise
			
 
				+    problems will arise. This is really a sanity check. ZooKeeper is
			
 
				+    designed to store data on the order of kilobytes in size.
			
 
				+
			
 
				+* *skipACL* :
			
 
				+    (Java system property: **zookeeper.skipACL**)
			
 
				+    Skips ACL checks. This results in a boost in throughput,
			
 
				+    but opens up full access to the data tree to everyone.
			
 
				+
			
 
				+* *quorumListenOnAllIPs* :
			
 
				+    When set to true the ZooKeeper server will listen
			
 
				+    for connections from its peers on all available IP addresses,
			
 
				+    and not only the address configured in the server list of the
			
 
				+    configuration file. It affects the connections handling the
			
 
				+    ZAB protocol and the Fast Leader Election protocol. Default
			
 
				+    value is **false**.
			
 
				+
			
 
				+<a name="Disabling+data+directory+autocreation"></a>
			
 
				+
			
 
				+#### Disabling data directory autocreation
			
 
				+
			
 
				+**New in 3.5:** The default
			
 
				+behavior of a ZooKeeper server is to automatically create the
			
 
				+data directory (specified in the configuration file) when
			
 
				+started if that directory does not already exist. This can be
			
 
				+inconvenient and even dangerous in some cases. Take the case
			
 
				+where a configuration change is made to a running server,
			
 
				+wherein the **dataDir** parameter
			
 
				+is accidentally changed. When the ZooKeeper server is
			
 
				+restarted it will create this non-existent directory and begin
			
 
				+serving - with an empty znode namespace. This scenario can
			
 
				+result in an effective "split brain" situation (i.e. data in
			
 
				+both the new invalid directory and the original valid data
			
 
				+store). As such is would be good to have an option to turn off
			
 
				+this autocreate behavior. In general for production
			
 
				+environments this should be done, unfortunately however the
			
 
				+default legacy behavior cannot be changed at this point and
			
 
				+therefore this must be done on a case by case basis. This is
			
 
				+left to users and to packagers of ZooKeeper distributions.
			
 
				+
			
 
				+When running **zkServer.sh** autocreate can be disabled
			
 
				+by setting the environment variable **ZOO_DATADIR_AUTOCREATE_DISABLE** to 1.
			
 
				+When running ZooKeeper servers directly from class files this
			
 
				+can be accomplished by setting **zookeeper.datadir.autocreate=false** on
			
 
				+the java command line, i.e. **-Dzookeeper.datadir.autocreate=false**
			
 
				+
			
 
				+When this feature is disabled, and the ZooKeeper server
			
 
				+determines that the required directories do not exist it will
			
 
				+generate an error and refuse to start.
			
 
				+
			
 
				+A new script **zkServer-initialize.sh** is provided to
			
 
				+support this new feature. If autocreate is disabled it is
			
 
				+necessary for the user to first install ZooKeeper, then create
			
 
				+the data directory (and potentially txnlog directory), and
			
 
				+then start the server. Otherwise as mentioned in the previous
			
 
				+paragraph the server will not start. Running **zkServer-initialize.sh** will create the
			
 
				+required directories, and optionally setup the myid file
			
 
				+(optional command line parameter). This script can be used
			
 
				+even if the autocreate feature itself is not used, and will
			
 
				+likely be of use to users as this (setup, including creation
			
 
				+of the myid file) has been an issue for users in the past.
			
 
				+Note that this script ensures the data directories exist only,
			
 
				+it does not create a config file, but rather requires a config
			
 
				+file to be available in order to execute.
			
 
				+
			
 
				+<a name="sc_db_existence_validation"></a>
			
 
				+
			
 
				+#### Enabling db existence validation
			
 
				+
			
 
				+**New in 3.6.0:** The default
			
 
				+behavior of a ZooKeeper server on startup when no data tree
			
 
				+is found is to set zxid to zero and join the quorum as a
			
 
				+voting member. This can be dangerous if some event (e.g. a
			
 
				+rogue 'rm -rf') has removed the data directory while the
			
 
				+server was down since this server may help elect a leader
			
 
				+that is missing transactions. Enabling db existence validation
			
 
				+will change the behavior on startup when no data tree is
			
 
				+found: the server joins the ensemble as a non-voting participant
			
 
				+until it is able to sync with the leader and acquire an up-to-date
			
 
				+version of the ensemble data. To indicate an empty data tree is
			
 
				+expected (ensemble creation), the user should place a file
			
 
				+'initialize' in the same directory as 'myid'. This file will
			
 
				+be detected and deleted by the server on startup.
			
 
				+
			
 
				+Initialization validation can be enabled when running
			
 
				+ZooKeeper servers directly from class files by setting
			
 
				+**zookeeper.db.autocreate=false**
			
 
				+on the java command line, i.e.
			
 
				+**-Dzookeeper.db.autocreate=false**.
			
 
				+Running **zkServer-initialize.sh**
			
 
				+will create the required initialization file.
			
 
				+
			
 
				+<a name="sc_performance_options"></a>
			
 
				+
			
 
				+#### Performance Tuning Options
			
 
				+
			
 
				+**New in 3.5.0:** Several subsystems have been reworked
			
 
				+to improve read throughput. This includes multi-threading of the NIO communication subsystem and
			
 
				+request processing pipeline (Commit Processor). NIO is the default client/server communication
			
 
				+subsystem. Its threading model comprises 1 acceptor thread, 1-N selector threads and 0-M
			
 
				+socket I/O worker threads. In the request processing pipeline the system can be configured
			
 
				+to process multiple read request at once while maintaining the same consistency guarantee
			
 
				+(same-session read-after-write). The Commit Processor threading model comprises 1 main
			
 
				+thread and 0-N worker threads.
			
 
				+
			
 
				+The default values are aimed at maximizing read throughput on a dedicated ZooKeeper machine.
			
 
				+Both subsystems need to have sufficient amount of threads to achieve peak read throughput.
			
 
				+
			
 
				+* *zookeeper.nio.numSelectorThreads* :
			
 
				+    (Java system property only: **zookeeper.nio.numSelectorThreads**)
			
 
				+    **New in 3.5.0:**
			
 
				+    Number of NIO selector threads. At least 1 selector thread required.
			
 
				+    It is recommended to use more than one selector for large numbers
			
 
				+    of client connections. The default value is sqrt( number of cpu cores / 2 ).
			
 
				+
			
 
				+* *zookeeper.nio.numWorkerThreads* :
			
 
				+    (Java system property only: **zookeeper.nio.numWorkerThreads**)
			
 
				+    **New in 3.5.0:**
			
 
				+    Number of NIO worker threads. If configured with 0 worker threads, the selector threads
			
 
				+    do the socket I/O directly. The default value is 2 times the number of cpu cores.
			
 
				+
			
 
				+* *zookeeper.commitProcessor.numWorkerThreads* :
			
 
				+    (Java system property only: **zookeeper.commitProcessor.numWorkerThreads**)
			
 
				+    **New in 3.5.0:**
			
 
				+    Number of Commit Processor worker threads. If configured with 0 worker threads, the main thread
			
 
				+    will process the request directly. The default value is the number of cpu cores.
			
 
				+
			
 
				+* *znode.container.checkIntervalMs* :
			
 
				+    (Java system property only)
			
 
				+    **New in 3.6.0:** The
			
 
				+    time interval in milliseconds for each check of candidate container
			
 
				+    and ttl nodes. Default is "60000".
			
 
				+
			
 
				+* *znode.container.maxPerMinute* :
			
 
				+    (Java system property only)
			
 
				+    **New in 3.6.0:** The
			
 
				+    maximum number of container and ttl nodes that can be deleted per
			
 
				+    minute. This prevents herding during container deletion.
			
 
				+    Default is "10000".
			
 
				+
			
 
				+<a name="Communication+using+the+Netty+framework"></a>
			
 
				+
			
 
				+#### Communication using the Netty framework
			
 
				+
			
 
				+[Netty](http://netty.io)
			
 
				+is an NIO based client/server communication framework, it
			
 
				+simplifies (over NIO being used directly) many of the
			
 
				+complexities of network level communication for java
			
 
				+applications. Additionally the Netty framework has built
			
 
				+in support for encryption (SSL) and authentication
			
 
				+(certificates). These are optional features and can be
			
 
				+turned on or off individually.
			
 
				+
			
 
				+In versions 3.5+, a ZooKeeper server can use Netty
			
 
				+instead of NIO (default option) by setting the environment
			
 
				+variable **zookeeper.serverCnxnFactory**
			
 
				+to **org.apache.zookeeper.server.NettyServerCnxnFactory**;
			
 
				+for the client, set **zookeeper.clientCnxnSocket**
			
 
				+to **org.apache.zookeeper.ClientCnxnSocketNetty**.
			
 
				+
			
 
				+TBD - tuning options for netty - currently there are none that are netty specific but we should add some. Esp around max bound on the number of reader worker threads netty creates.
			
 
				+
			
 
				+TBD - how to manage encryption
			
 
				+
			
 
				+TBD - how to manage certificates
			
 
				+
			
 
				+<a name="sc_adminserver_config"></a>
			
 
				+
			
 
				+#### AdminServer configuration
			
 
				+
			
 
				+**New in 3.5.0:** The following
			
 
				+options are used to configure the [AdminServer](#sc_adminserver).
			
 
				+
			
 
				+* *admin.enableServer* :
			
 
				+    (Java system property: **zookeeper.admin.enableServer**)
			
 
				+    Set to "false" to disable the AdminServer.  By default the
			
 
				+    AdminServer is enabled.
			
 
				+
			
 
				+* *admin.serverAddress* :
			
 
				+    (Java system property: **zookeeper.admin.serverAddress**)
			
 
				+    The address the embedded Jetty server listens on. Defaults to 0.0.0.0.
			
 
				+
			
 
				+* *admin.serverPort* :
			
 
				+    (Java system property: **zookeeper.admin.serverPort**)
			
 
				+    The port the embedded Jetty server listens on.  Defaults to 8080.
			
 
				+
			
 
				+* *admin.idleTimeout* :
			
 
				+    (Java system property: **zookeeper.admin.idleTimeout**)
			
 
				+    Set the maximum idle time in milliseconds that a connection can wait
			
 
				+    before sending or receiving data. Defaults to 30000 ms.
			
 
				+
			
 
				+* *admin.commandURL* :
			
 
				+    (Java system property: **zookeeper.admin.commandURL**)
			
 
				+    The URL for listing and issuing commands relative to the
			
 
				+    root URL.  Defaults to "/commands".
			
 
				+
			
 
				+<a name="sc_zkCommands"></a>
			
 
				+
			
 
				+### ZooKeeper Commands
			
 
				+
			
 
				+<a name="sc_4lw"></a>
			
 
				+
			
 
				+#### The Four Letter Words
			
 
				+
			
 
				+ZooKeeper responds to a small set of commands. Each command is
			
 
				+composed of four letters. You issue the commands to ZooKeeper via telnet
			
 
				+or nc, at the client port.
			
 
				+
			
 
				+Three of the more interesting commands: "stat" gives some
			
 
				+general information about the server and connected clients,
			
 
				+while "srvr" and "cons" give extended details on server and
			
 
				+connections respectively.
			
 
				+
			
 
				+**New in 3.5.3:**
			
 
				+Four Letter Words need to be explicitly white listed before using.
			
 
				+Please refer **4lw.commands.whitelist**
			
 
				+described in [cluster configuration section](#sc_clusterOptions) for details.
			
 
				+Moving forward, Four Letter Words will be deprecated, please use
			
 
				+[AdminServer](#sc_adminserver) instead.
			
 
				+
			
 
				+* *conf* :
			
 
				+    **New in 3.3.0:** Print
			
 
				+    details about serving configuration.
			
 
				+
			
 
				+* *cons* :
			
 
				+    **New in 3.3.0:** List
			
 
				+    full connection/session details for all clients connected
			
 
				+    to this server. Includes information on numbers of packets
			
 
				+    received/sent, session id, operation latencies, last
			
 
				+    operation performed, etc...
			
 
				+
			
 
				+* *crst* :
			
 
				+    **New in 3.3.0:** Reset
			
 
				+    connection/session statistics for all connections.
			
 
				+
			
 
				+* *dump* :
			
 
				+    Lists the outstanding sessions and ephemeral nodes. This
			
 
				+    only works on the leader.
			
 
				+
			
 
				+* *envi* :
			
 
				+    Print details about serving environment
			
 
				+
			
 
				+* *ruok* :
			
 
				+    Tests if server is running in a non-error state. The server
			
 
				+    will respond with imok if it is running. Otherwise it will not
			
 
				+    respond at all.
			
 
				+    A response of "imok" does not necessarily indicate that the
			
 
				+    server has joined the quorum, just that the server process is active
			
 
				+    and bound to the specified client port. Use "stat" for details on
			
 
				+    state wrt quorum and client connection information.
			
 
				+
			
 
				+* *srst* :
			
 
				+    Reset server statistics.
			
 
				+
			
 
				+* *srvr* :
			
 
				+    **New in 3.3.0:** Lists
			
 
				+    full details for the server.
			
 
				+
			
 
				+* *stat* :
			
 
				+    Lists brief details for the server and connected
			
 
				+    clients.
			
 
				+
			
 
				+* *wchs* :
			
 
				+    **New in 3.3.0:** Lists
			
 
				+    brief information on watches for the server.
			
 
				+
			
 
				+* *wchc* :
			
 
				+    **New in 3.3.0:** Lists
			
 
				+    detailed information on watches for the server, by
			
 
				+    session.  This outputs a list of sessions(connections)
			
 
				+    with associated watches (paths). Note, depending on the
			
 
				+    number of watches this operation may be expensive (ie
			
 
				+    impact server performance), use it carefully.
			
 
				+
			
 
				+* *dirs* :
			
 
				+    **New in 3.5.1:**
			
 
				+    Shows the total size of snapshot and log files in bytes
			
 
				+
			
 
				+* *wchp* :
			
 
				+    **New in 3.3.0:** Lists
			
 
				+    detailed information on watches for the server, by path.
			
 
				+    This outputs a list of paths (znodes) with associated
			
 
				+    sessions. Note, depending on the number of watches this
			
 
				+    operation may be expensive (ie impact server performance),
			
 
				+    use it carefully.
			
 
				+
			
 
				+* *mntr* :
			
 
				+    **New in 3.4.0:** Outputs a list
			
 
				+    of variables that could be used for monitoring the health of the cluster.
			
 
				+
			
 
				+
			
 
				+    $ echo mntr | nc localhost 2185
			
 
				+                  zk_version  3.4.0
			
 
				+                  zk_avg_latency  0
			
 
				+                  zk_max_latency  0
			
 
				+                  zk_min_latency  0
			
 
				+                  zk_packets_received 70
			
 
				+                  zk_packets_sent 69
			
 
				+                  zk_outstanding_requests 0
			
 
				+                  zk_server_state leader
			
 
				+                  zk_znode_count   4
			
 
				+                  zk_watch_count  0
			
 
				+                  zk_ephemerals_count 0
			
 
				+                  zk_approximate_data_size    27
			
 
				+                  zk_followers    4                   - only exposed by the Leader
			
 
				+                  zk_synced_followers 4               - only exposed by the Leader
			
 
				+                  zk_pending_syncs    0               - only exposed by the Leader
			
 
				+                  zk_open_file_descriptor_count 23    - only available on Unix platforms
			
 
				+                  zk_max_file_descriptor_count 1024   - only available on Unix platforms
			
 
				+
			
 
				+
			
 
				+The output is compatible with java properties format and the content
			
 
				+may change over time (new keys added). Your scripts should expect changes.
			
 
				+ATTENTION: Some of the keys are platform specific and some of the keys are only exported by the Leader.
			
 
				+The output contains multiple lines with the following format:
			
 
				+
			
 
				+
			
 
				+    key \t value
			
 
				+
			
 
				+
			
 
				+* *isro* :
			
 
				+    **New in 3.4.0:** Tests if
			
 
				+    server is running in read-only mode.  The server will respond with
			
 
				+    "ro" if in read-only mode or "rw" if not in read-only mode.
			
 
				+
			
 
				+* *gtmk* :
			
 
				+    Gets the current trace mask as a 64-bit signed long value in
			
 
				+    decimal format.  See `stmk` for an explanation of
			
 
				+    the possible values.
			
 
				+
			
 
				+* *stmk* :
			
 
				+    Sets the current trace mask.  The trace mask is 64 bits,
			
 
				+    where each bit enables or disables a specific category of trace
			
 
				+    logging on the server.  Log4J must be configured to enable
			
 
				+    `TRACE` level first in order to see trace logging
			
 
				+    messages.  The bits of the trace mask correspond to the following
			
 
				+    trace logging categories.
			
 
				+    
			
 
				+    | Trace Mask Bit Values |                     |
			
 
				+    |-----------------------|---------------------|
			
 
				+    | 0b0000000000 | Unused, reserved for future use. |
			
 
				+    | 0b0000000010 | Logs client requests, excluding ping requests. |
			
 
				+    | 0b0000000100 | Unused, reserved for future use. |
			
 
				+    | 0b0000001000 | Logs client ping requests. |
			
 
				+    | 0b0000010000 | Logs packets received from the quorum peer that is the current leader, excluding ping requests. |
			
 
				+    | 0b0000100000 | Logs addition, removal and validation of client sessions. |
			
 
				+    | 0b0001000000 | Logs delivery of watch events to client sessions. |
			
 
				+    | 0b0010000000 | Logs ping packets received from the quorum peer that is the current leader. |
			
 
				+    | 0b0100000000 | Unused, reserved for future use. |
			
 
				+    | 0b1000000000 | Unused, reserved for future use. |
			
 
				+
			
 
				+    All remaining bits in the 64-bit value are unused and
			
 
				+    reserved for future use.  Multiple trace logging categories are
			
 
				+    specified by calculating the bitwise OR of the documented values.
			
 
				+    The default trace mask is 0b0100110010.  Thus, by default, trace
			
 
				+    logging includes client requests, packets received from the
			
 
				+    leader and sessions.
			
 
				+    To set a different trace mask, send a request containing the
			
 
				+    `stmk` four-letter word followed by the trace
			
 
				+    mask represented as a 64-bit signed long value.  This example uses
			
 
				+    the Perl `pack` function to construct a trace
			
 
				+    mask that enables all trace logging categories described above and
			
 
				+    convert it to a 64-bit signed long value with big-endian byte
			
 
				+    order.  The result is appended to `stmk` and sent
			
 
				+    to the server using netcat.  The server responds with the new
			
 
				+    trace mask in decimal format.
			
 
				+
			
 
				+
			
 
				+    $ perl -e "print 'stmk', pack('q>', 0b0011111010)" | nc localhost 2181
			
 
				+    250
			
 
				+
			
 
				+
			
 
				+Here's an example of the **ruok**
			
 
				+command:
			
 
				+
			
 
				+
			
 
				+    $ echo ruok | nc 127.0.0.1 5111
			
 
				+        imok
			
 
				+
			
 
				+
			
 
				+<a name="sc_adminserver"></a>
			
 
				+
			
 
				+#### The AdminServer
			
 
				+
			
 
				+**New in 3.5.0:** The AdminServer is
			
 
				+an embedded Jetty server that provides an HTTP interface to the four
			
 
				+letter word commands.  By default, the server is started on port 8080,
			
 
				+and commands are issued by going to the URL "/commands/\[command name]",
			
 
				+e.g., http://localhost:8080/commands/stat.  The command response is
			
 
				+returned as JSON.  Unlike the original protocol, commands are not
			
 
				+restricted to four-letter names, and commands can have multiple names;
			
 
				+for instance, "stmk" can also be referred to as "set_trace_mask".  To
			
 
				+view a list of all available commands, point a browser to the URL
			
 
				+/commands (e.g., http://localhost:8080/commands).  See the [AdminServer configuration options](#sc_adminserver_config)
			
 
				+for how to change the port and URLs.
			
 
				+
			
 
				+The AdminServer is enabled by default, but can be disabled by either:
			
 
				+
			
 
				+* Setting the zookeeper.admin.enableServer system
			
 
				+  property to false.
			
 
				+* Removing Jetty from the classpath.  (This option is
			
 
				+  useful if you would like to override ZooKeeper's jetty
			
 
				+  dependency.)
			
 
				+
			
 
				+Note that the TCP four letter word interface is still available if
			
 
				+the AdminServer is disabled.
			
 
				+
			
 
				+<a name="sc_dataFileManagement"></a>
			
 
				+
			
 
				+### Data File Management
			
 
				+
			
 
				+ZooKeeper stores its data in a data directory and its transaction
			
 
				+log in a transaction log directory. By default these two directories are
			
 
				+the same. The server can (and should) be configured to store the
			
 
				+transaction log files in a separate directory than the data files.
			
 
				+Throughput increases and latency decreases when transaction logs reside
			
 
				+on a dedicated log devices.
			
 
				+
			
 
				+<a name="The+Data+Directory"></a>
			
 
				+
			
 
				+#### The Data Directory
			
 
				+
			
 
				+This directory has two or three files in it:
			
 
				+
			
 
				+* *myid* - contains a single integer in
			
 
				+  human readable ASCII text that represents the server id.
			
 
				+* *initialize* - presence indicates lack of
			
 
				+  data tree is expected. Cleaned up once data tree is created.
			
 
				+* *snapshot.<zxid>* - holds the fuzzy
			
 
				+  snapshot of a data tree.
			
 
				+
			
 
				+Each ZooKeeper server has a unique id. This id is used in two
			
 
				+places: the *myid* file and the configuration file.
			
 
				+The *myid* file identifies the server that
			
 
				+corresponds to the given data directory. The configuration file lists
			
 
				+the contact information for each server identified by its server id.
			
 
				+When a ZooKeeper server instance starts, it reads its id from the
			
 
				+*myid* file and then, using that id, reads from the
			
 
				+configuration file, looking up the port on which it should
			
 
				+listen.
			
 
				+
			
 
				+The *snapshot* files stored in the data
			
 
				+directory are fuzzy snapshots in the sense that during the time the
			
 
				+ZooKeeper server is taking the snapshot, updates are occurring to the
			
 
				+data tree. The suffix of the *snapshot* file names
			
 
				+is the _zxid_, the ZooKeeper transaction id, of the
			
 
				+last committed transaction at the start of the snapshot. Thus, the
			
 
				+snapshot includes a subset of the updates to the data tree that
			
 
				+occurred while the snapshot was in process. The snapshot, then, may
			
 
				+not correspond to any data tree that actually existed, and for this
			
 
				+reason we refer to it as a fuzzy snapshot. Still, ZooKeeper can
			
 
				+recover using this snapshot because it takes advantage of the
			
 
				+idempotent nature of its updates. By replaying the transaction log
			
 
				+against fuzzy snapshots ZooKeeper gets the state of the system at the
			
 
				+end of the log.
			
 
				+
			
 
				+<a name="The+Log+Directory"></a>
			
 
				+
			
 
				+#### The Log Directory
			
 
				+
			
 
				+The Log Directory contains the ZooKeeper transaction logs.
			
 
				+Before any update takes place, ZooKeeper ensures that the transaction
			
 
				+that represents the update is written to non-volatile storage. A new
			
 
				+log file is started when the number of transactions written to the
			
 
				+current log file reaches a (variable) threshold. The threshold is
			
 
				+computed using the same parameter which influences the frequency of
			
 
				+snapshotting (see snapCount above). The log file's suffix is the first
			
 
				+zxid written to that log.
			
 
				+
			
 
				+<a name="sc_filemanagement"></a>
			
 
				+
			
 
				+#### File Management
			
 
				+
			
 
				+The format of snapshot and log files does not change between
			
 
				+standalone ZooKeeper servers and different configurations of
			
 
				+replicated ZooKeeper servers. Therefore, you can pull these files from
			
 
				+a running replicated ZooKeeper server to a development machine with a
			
 
				+stand-alone ZooKeeper server for trouble shooting.
			
 
				+
			
 
				+Using older log and snapshot files, you can look at the previous
			
 
				+state of ZooKeeper servers and even restore that state. The
			
 
				+LogFormatter class allows an administrator to look at the transactions
			
 
				+in a log.
			
 
				+
			
 
				+The ZooKeeper server creates snapshot and log files, but
			
 
				+never deletes them. The retention policy of the data and log
			
 
				+files is implemented outside of the ZooKeeper server. The
			
 
				+server itself only needs the latest complete fuzzy snapshot, all log
			
 
				+files following it, and the last log file preceding it.  The latter
			
 
				+requirement is necessary to include updates which happened after this
			
 
				+snapshot was started but went into the existing log file at that time.
			
 
				+This is possible because snapshotting and rolling over of logs
			
 
				+proceed somewhat independently in ZooKeeper. See the
			
 
				+[maintenance](#sc_maintenance) section in
			
 
				+this document for more details on setting a retention policy
			
 
				+and maintenance of ZooKeeper storage.
			
 
				+
			
 
				+######Note
			
 
				+>The data stored in these files is not encrypted. In the case of
			
 
				+storing sensitive data in ZooKeeper, necessary measures need to be
			
 
				+taken to prevent unauthorized access. Such measures are external to
			
 
				+ZooKeeper (e.g., control access to the files) and depend on the
			
 
				+individual settings in which it is being deployed.
			
 
				+
			
 
				+<a name="Recovery+-+TxnLogToolkit"></a>
			
 
				+
			
 
				+####Recovery - TxnLogToolkit
			
 
				+
			
 
				+TxnLogToolkit is a command line tool shipped with ZooKeeper which
			
 
				+is capable of recovering transaction log entries with broken CRC.
			
 
				+
			
 
				+Running it without any command line parameters or with the `-h,--help` argument, it outputs the following help page:
			
 
				+
			
 
				+    $ bin/zkTxnLogToolkit.sh
			
 
				+    usage: TxnLogToolkit [-dhrv] txn_log_file_name
			
 
				+    -d,--dump      Dump mode. Dump all entries of the log file. (this is the default)
			
 
				+    -h,--help      Print help message
			
 
				+    -r,--recover   Recovery mode. Re-calculate CRC for broken entries.
			
 
				+    -v,--verbose   Be verbose in recovery mode: print all entries, not just fixed ones.
			
 
				+    -y,--yes       Non-interactive mode: repair all CRC errors without asking
			
 
				+    
			
 
				+The default behaviour is safe: it dumps the entries of the given
			
 
				+transaction log file to the screen: (same as using `-d,--dump` parameter)
			
 
				+
			
 
				+    $ bin/zkTxnLogToolkit.sh log.100000001
			
 
				+    ZooKeeper Transactional Log File with dbid 0 txnlog format version 2
			
 
				+    4/5/18 2:15:58 PM CEST session 0x16295bafcc40000 cxid 0x0 zxid 0x100000001 createSession 30000
			
 
				+    CRC ERROR - 4/5/18 2:16:05 PM CEST session 0x16295bafcc40000 cxid 0x1 zxid 0x100000002 closeSession null
			
 
				+    4/5/18 2:16:05 PM CEST session 0x16295bafcc40000 cxid 0x1 zxid 0x100000002 closeSession null
			
 
				+    4/5/18 2:16:12 PM CEST session 0x26295bafcc90000 cxid 0x0 zxid 0x100000003 createSession 30000
			
 
				+    4/5/18 2:17:34 PM CEST session 0x26295bafcc90000 cxid 0x0 zxid 0x200000001 closeSession null
			
 
				+    4/5/18 2:17:34 PM CEST session 0x16295bd23720000 cxid 0x0 zxid 0x200000002 createSession 30000
			
 
				+    4/5/18 2:18:02 PM CEST session 0x16295bd23720000 cxid 0x2 zxid 0x200000003 create '/andor,#626262,v{s{31,s{'world,'anyone}}},F,1
			
 
				+    EOF reached after 6 txns.
			
 
				+
			
 
				+There's a CRC error in the 2nd entry of the above transaction log file. In **dump**
			
 
				+mode, the toolkit only prints this information to the screen without touching the original file. In
			
 
				+**recovery** mode (`-r,--recover` flag) the original file still remains
			
 
				+untouched and all transactions will be copied over to a new txn log file with ".fixed" suffix. It recalculates
			
 
				+CRC values and copies the calculated value, if it doesn't match the original txn entry.
			
 
				+By default, the tool works interactively: it asks for confirmation whenever CRC error encountered.
			
 
				+
			
 
				+    $ bin/zkTxnLogToolkit.sh -r log.100000001
			
 
				+    ZooKeeper Transactional Log File with dbid 0 txnlog format version 2
			
 
				+    CRC ERROR - 4/5/18 2:16:05 PM CEST session 0x16295bafcc40000 cxid 0x1 zxid 0x100000002 closeSession null
			
 
				+    Would you like to fix it (Yes/No/Abort) ?
			
 
				+
			
 
				+Answering **Yes** means the newly calculated CRC value will be outputted
			
 
				+to the new file. **No** means that the original CRC value will be copied over.
			
 
				+**Abort** will abort the entire operation and exits.
			
 
				+(In this case the ".fixed" will not be deleted and left in a half-complete state: contains only entries which
			
 
				+have already been processed or only the header if the operation was aborted at the first entry.)
			
 
				+
			
 
				+    $ bin/zkTxnLogToolkit.sh -r log.100000001
			
 
				+    ZooKeeper Transactional Log File with dbid 0 txnlog format version 2
			
 
				+    CRC ERROR - 4/5/18 2:16:05 PM CEST session 0x16295bafcc40000 cxid 0x1 zxid 0x100000002 closeSession null
			
 
				+    Would you like to fix it (Yes/No/Abort) ? y
			
 
				+    EOF reached after 6 txns.
			
 
				+    Recovery file log.100000001.fixed has been written with 1 fixed CRC error(s)
			
 
				+
			
 
				+The default behaviour of recovery is to be silent: only entries with CRC error get printed to the screen.
			
 
				+One can turn on verbose mode with the `-v,--verbose` parameter to see all records.
			
 
				+Interactive mode can be turned off with the `-y,--yes` parameter. In this case all CRC errors will be fixed
			
 
				+in the new transaction file.
			
 
				+
			
 
				+<a name="sc_commonProblems"></a>
			
 
				+
			
 
				+### Things to Avoid
			
 
				+
			
 
				+Here are some common problems you can avoid by configuring
			
 
				+ZooKeeper correctly:
			
 
				+
			
 
				+* *inconsistent lists of servers* :
			
 
				+    The list of ZooKeeper servers used by the clients must match
			
 
				+    the list of ZooKeeper servers that each ZooKeeper server has.
			
 
				+    Things work okay if the client list is a subset of the real list,
			
 
				+    but things will really act strange if clients have a list of
			
 
				+    ZooKeeper servers that are in different ZooKeeper clusters. Also,
			
 
				+    the server lists in each Zookeeper server configuration file
			
 
				+    should be consistent with one another.
			
 
				+
			
 
				+* *incorrect placement of transaction log* :
			
 
				+    The most performance critical part of ZooKeeper is the
			
 
				+    transaction log. ZooKeeper syncs transactions to media before it
			
 
				+    returns a response. A dedicated transaction log device is key to
			
 
				+    consistent good performance. Putting the log on a busy device will
			
 
				+    adversely effect performance. If you only have one storage device,
			
 
				+    put trace files on NFS and increase the snapshotCount; it doesn't
			
 
				+    eliminate the problem, but it should mitigate it.
			
 
				+
			
 
				+* *incorrect Java heap size* :
			
 
				+    You should take special care to set your Java max heap size
			
 
				+    correctly. In particular, you should not create a situation in
			
 
				+    which ZooKeeper swaps to disk. The disk is death to ZooKeeper.
			
 
				+    Everything is ordered, so if processing one request swaps the
			
 
				+    disk, all other queued requests will probably do the same. the
			
 
				+    disk. DON'T SWAP.
			
 
				+    Be conservative in your estimates: if you have 4G of RAM, do
			
 
				+    not set the Java max heap size to 6G or even 4G. For example, it
			
 
				+    is more likely you would use a 3G heap for a 4G machine, as the
			
 
				+    operating system and the cache also need memory. The best and only
			
 
				+    recommend practice for estimating the heap size your system needs
			
 
				+    is to run load tests, and then make sure you are well below the
			
 
				+    usage limit that would cause the system to swap.
			
 
				+
			
 
				+* *Publicly accessible deployment* :
			
 
				+    A ZooKeeper ensemble is expected to operate in a trusted computing environment.
			
 
				+    It is thus recommended to deploy ZooKeeper behind a firewall.
			
 
				+
			
 
				+<a name="sc_bestPractices"></a>
			
 
				+
			
 
				+### Best Practices
			
 
				+
			
 
				+For best results, take note of the following list of good
			
 
				+Zookeeper practices:
			
 
				+
			
 
				+For multi-tenant installations see the [section](zookeeperProgrammers.html#ch_zkSessions)
			
 
				+detailing ZooKeeper "chroot" support, this can be very useful
			
 
				+when deploying many applications/services interfacing to a
			
 
				+single ZooKeeper cluster.
			
 
				+
			
 
				+
			
--- a/zookeeper-docs/src/main/resources/markdown/zookeeperHierarchicalQuorums.md
+++ b/zookeeper-docs/src/main/resources/markdown/zookeeperHierarchicalQuorums.md
@@ -0,0 +1,47 @@
 
				+<!--
			
 
				+Copyright 2002-2004 The Apache Software Foundation
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+//-->
			
 
				+
			
 
				+# Introduction to hierarchical quorums
			
 
				+
			
 
				+This document gives an example of how to use hierarchical quorums. The basic idea is
			
 
				+very simple. First, we split servers into groups, and add a line for each group listing
			
 
				+the servers that form this group. Next we have to assign a weight to each server.
			
 
				+
			
 
				+The following example shows how to configure a system with three groups of three servers
			
 
				+each, and we assign a weight of 1 to each server:
			
 
				+
			
 
				+
			
 
				+    group.1=1:2:3
			
 
				+    group.2=4:5:6
			
 
				+    group.3=7:8:9
			
 
				+
			
 
				+    weight.1=1
			
 
				+    weight.2=1
			
 
				+    weight.3=1
			
 
				+    weight.4=1
			
 
				+    weight.5=1
			
 
				+    weight.6=1
			
 
				+    weight.7=1
			
 
				+    weight.8=1
			
 
				+    weight.9=1
			
 
				+
			
 
				+
			
 
				+When running the system, we are able to form a quorum once we have a majority of votes from
			
 
				+a majority of non-zero-weight groups. Groups that have zero weight are discarded and not
			
 
				+considered when forming quorums. Looking at the example, we are able to form a quorum once
			
 
				+we have votes from at least two servers from each of two different groups.
			
 
				+
			
 
				+
			
--- a/zookeeper-docs/src/main/resources/markdown/zookeeperInternals.md
+++ b/zookeeper-docs/src/main/resources/markdown/zookeeperInternals.md
@@ -0,0 +1,370 @@
 
				+<!--
			
 
				+Copyright 2002-2004 The Apache Software Foundation
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+//-->
			
 
				+
			
 
				+# ZooKeeper Internals
			
 
				+
			
 
				+* [Introduction](#ch_Introduction)
			
 
				+* [Atomic Broadcast](#sc_atomicBroadcast)
			
 
				+    * [Guarantees, Properties, and Definitions](#sc_guaranteesPropertiesDefinitions)
			
 
				+    * [Leader Activation](#sc_leaderElection)
			
 
				+    * [Active Messaging](#sc_activeMessaging)
			
 
				+    * [Summary](#sc_summary)
			
 
				+    * [Comparisons](#sc_comparisons)
			
 
				+* [Quorums](#sc_quorum)
			
 
				+* [Logging](#sc_logging)
			
 
				+    * [Developer Guidelines](#sc_developerGuidelines)
			
 
				+        * [Logging at the Right Level](#sc_rightLevel)
			
 
				+        * [Use of Standard slf4j Idioms](#sc_slf4jIdioms)
			
 
				+
			
 
				+<a name="ch_Introduction"></a>
			
 
				+
			
 
				+## Introduction
			
 
				+
			
 
				+This document contains information on the inner workings of ZooKeeper.
			
 
				+So far, it discusses these topics:
			
 
				+
			
 
				+* [Atomic Broadcast](#sc_atomicBroadcast)
			
 
				+* [Logging](#sc_logging)
			
 
				+
			
 
				+<a name="sc_atomicBroadcast"></a>
			
 
				+
			
 
				+## Atomic Broadcast
			
 
				+
			
 
				+At the heart of ZooKeeper is an atomic messaging system that keeps all of the servers in sync.
			
 
				+
			
 
				+<a name="sc_guaranteesPropertiesDefinitions"></a>
			
 
				+
			
 
				+### Guarantees, Properties, and Definitions
			
 
				+
			
 
				+The specific guarantees provided by the messaging system used by ZooKeeper are the following:
			
 
				+
			
 
				+* *_Reliable delivery_* :
			
 
				+    If a message, m, is delivered
			
 
				+    by one server, it will be eventually delivered by all servers.
			
 
				+
			
 
				+* *_Total order_* :
			
 
				+    If a message is
			
 
				+    delivered before message b by one server, a will be delivered before b by all
			
 
				+    servers. If a and b are delivered messages, either a will be delivered before b
			
 
				+    or b will be delivered before a.
			
 
				+
			
 
				+* *_Causal order_* :
			
 
				+    If a message b is sent after a message a has been delivered by the sender of b,
			
 
				+    a must be ordered before b. If a sender sends c after sending b, c must be ordered after b.
			
 
				+
			
 
				+The ZooKeeper messaging system also needs to be efficient, reliable, and easy to
			
 
				+implement and maintain. We make heavy use of messaging, so we need the system to
			
 
				+be able to handle thousands of requests per second. Although we can require at
			
 
				+least k+1 correct servers to send new messages, we must be able to recover from
			
 
				+correlated failures such as power outages. When we implemented the system we had
			
 
				+little time and few engineering resources, so we needed a protocol that is
			
 
				+accessible to engineers and is easy to implement. We found that our protocol
			
 
				+satisfied all of these goals.
			
 
				+
			
 
				+Our protocol assumes that we can construct point-to-point FIFO channels between
			
 
				+the servers. While similar services usually assume message delivery that can
			
 
				+lose or reorder messages, our assumption of FIFO channels is very practical
			
 
				+given that we use TCP for communication. Specifically we rely on the following property of TCP:
			
 
				+
			
 
				+* *_Ordered delivery_* :
			
 
				+    Data is delivered in the same order it is sent and a message m is
			
 
				+    delivered only after all messages sent before m have been delivered.
			
 
				+    (The corollary to this is that if message m is lost all messages after m will be lost.)
			
 
				+
			
 
				+* *_No message after close_* :
			
 
				+    Once a FIFO channel is closed, no messages will be received from it.
			
 
				+
			
 
				+FLP proved that consensus cannot be achieved in asynchronous distributed systems
			
 
				+if failures are possible. To ensure we achieve consensus in the presence of failures
			
 
				+we use timeouts. However, we rely on times for liveness not for correctness. So,
			
 
				+if timeouts stop working (clocks malfunction for example) the messaging system may
			
 
				+hang, but it will not violate its guarantees.
			
 
				+
			
 
				+When describing the ZooKeeper messaging protocol we will talk of packets,
			
 
				+proposals, and messages:
			
 
				+
			
 
				+* *_Packet_* :
			
 
				+    a sequence of bytes sent through a FIFO channel
			
 
				+
			
 
				+* *_Proposal_* :
			
 
				+    a unit of agreement. Proposals are agreed upon by exchanging packets
			
 
				+    with a quorum of ZooKeeper servers. Most proposals contain messages, however the
			
 
				+    NEW_LEADER proposal is an example of a proposal that does not correspond to a message.
			
 
				+
			
 
				+* *_Message_* :
			
 
				+    a sequence of bytes to be atomically broadcast to all ZooKeeper
			
 
				+    servers. A message put into a proposal and agreed upon before it is delivered.
			
 
				+
			
 
				+As stated above, ZooKeeper guarantees a total order of messages, and it also
			
 
				+guarantees a total order of proposals. ZooKeeper exposes the total ordering using
			
 
				+a ZooKeeper transaction id (_zxid_). All proposals will be stamped with a zxid when
			
 
				+it is proposed and exactly reflects the total ordering. Proposals are sent to all
			
 
				+ZooKeeper servers and committed when a quorum of them acknowledge the proposal.
			
 
				+If a proposal contains a message, the message will be delivered when the proposal
			
 
				+is committed. Acknowledgement means the server has recorded the proposal to persistent storage.
			
 
				+Our quorums have the requirement that any pair of quorum must have at least one server
			
 
				+in common. We ensure this by requiring that all quorums have size (_n/2+1_) where
			
 
				+n is the number of servers that make up a ZooKeeper service.
			
 
				+
			
 
				+The zxid has two parts: the epoch and a counter. In our implementation the zxid
			
 
				+is a 64-bit number. We use the high order 32-bits for the epoch and the low order
			
 
				+32-bits for the counter. Because it has two parts represent the zxid both as a
			
 
				+number and as a pair of integers, (_epoch, count_). The epoch number represents a
			
 
				+change in leadership. Each time a new leader comes into power it will have its
			
 
				+own epoch number. We have a simple algorithm to assign a unique zxid to a proposal:
			
 
				+the leader simply increments the zxid to obtain a unique zxid for each proposal. _Leadership activation will ensure that only one leader uses a given epoch, so our
			
 
				+simple algorithm guarantees that every proposal will have a unique id._
			
 
				+
			
 
				+ZooKeeper messaging consists of two phases:
			
 
				+
			
 
				+* *_Leader activation_* :
			
 
				+    In this phase a leader establishes the correct state of the system
			
 
				+    and gets ready to start making proposals.
			
 
				+
			
 
				+* *_Active messaging_* :
			
 
				+    In this phase a leader accepts messages to propose and coordinates message delivery.
			
 
				+
			
 
				+ZooKeeper is a holistic protocol. We do not focus on individual proposals, rather
			
 
				+look at the stream of proposals as a whole. Our strict ordering allows us to do this
			
 
				+efficiently and greatly simplifies our protocol. Leadership activation embodies
			
 
				+this holistic concept. A leader becomes active only when a quorum of followers
			
 
				+(The leader counts as a follower as well. You can always vote for yourself ) has synced
			
 
				+up with the leader, they have the same state. This state consists of all of the
			
 
				+proposals that the leader believes have been committed and the proposal to follow
			
 
				+the leader, the NEW_LEADER proposal. (Hopefully you are thinking to
			
 
				+yourself, _Does the set of proposals that the leader believes has been committed
			
 
				+included all the proposals that really have been committed?_ The answer is _yes_.
			
 
				+Below, we make clear why.)
			
 
				+
			
 
				+<a name="sc_leaderElection"></a>
			
 
				+
			
 
				+### Leader Activation
			
 
				+
			
 
				+Leader activation includes leader election. We currently have two leader election
			
 
				+algorithms in ZooKeeper: LeaderElection and FastLeaderElection (AuthFastLeaderElection
			
 
				+is a variant of FastLeaderElection that uses UDP and allows servers to perform a simple
			
 
				+form of authentication to avoid IP spoofing). ZooKeeper messaging doesn't care about the
			
 
				+exact method of electing a leader has long as the following holds:
			
 
				+
			
 
				+* The leader has seen the highest zxid of all the followers.
			
 
				+* A quorum of servers have committed to following the leader.
			
 
				+
			
 
				+Of these two requirements only the first, the highest zxid amoung the followers
			
 
				+needs to hold for correct operation. The second requirement, a quorum of followers,
			
 
				+just needs to hold with high probability. We are going to recheck the second requirement,
			
 
				+so if a failure happens during or after the leader election and quorum is lost,
			
 
				+we will recover by abandoning leader activation and running another election.
			
 
				+
			
 
				+After leader election a single server will be designated as a leader and start
			
 
				+waiting for followers to connect. The rest of the servers will try to connect to
			
 
				+the leader. The leader will sync up with followers by sending any proposals they
			
 
				+are missing, or if a follower is missing too many proposals, it will send a full
			
 
				+snapshot of the state to the follower.
			
 
				+
			
 
				+There is a corner case in which a follower that has proposals, U, not seen
			
 
				+by a leader arrives. Proposals are seen in order, so the proposals of U will have a zxids
			
 
				+higher than zxids seen by the leader. The follower must have arrived after the
			
 
				+leader election, otherwise the follower would have been elected leader given that
			
 
				+it has seen a higher zxid. Since committed proposals must be seen by a quorum of
			
 
				+servers, and a quorum of servers that elected the leader did not see U, the proposals
			
 
				+of you have not been committed, so they can be discarded. When the follower connects
			
 
				+to the leader, the leader will tell the follower to discard U.
			
 
				+
			
 
				+A new leader establishes a zxid to start using for new proposals by getting the
			
 
				+epoch, e, of the highest zxid it has seen and setting the next zxid to use to be
			
 
				+(e+1, 0), fter the leader syncs with a follower, it will propose a NEW_LEADER
			
 
				+proposal. Once the NEW_LEADER proposal has been committed, the leader will activate
			
 
				+and start receiving and issuing proposals.
			
 
				+
			
 
				+It all sounds complicated but here are the basic rules of operation during leader
			
 
				+activation:
			
 
				+
			
 
				+* A follower will ACK the NEW_LEADER proposal after it has synced with the leader.
			
 
				+* A follower will only ACK a NEW_LEADER proposal with a given zxid from a single server.
			
 
				+* A new leader will COMMIT the NEW_LEADER proposal when a quorum of followers have ACKed it.
			
 
				+* A follower will commit any state it received from the leader when the NEW_LEADER proposal is COMMIT.
			
 
				+* A new leader will not accept new proposals until the NEW_LEADER proposal has been COMMITED.
			
 
				+
			
 
				+If leader election terminates erroneously, we don't have a problem since the
			
 
				+NEW_LEADER proposal will not be committed since the leader will not have quorum.
			
 
				+When this happens, the leader and any remaining followers will timeout and go back
			
 
				+to leader election.
			
 
				+
			
 
				+<a name="sc_activeMessaging"></a>
			
 
				+
			
 
				+### Active Messaging
			
 
				+
			
 
				+Leader Activation does all the heavy lifting. Once the leader is coronated he can
			
 
				+start blasting out proposals. As long as he remains the leader no other leader can
			
 
				+emerge since no other leader will be able to get a quorum of followers. If a new
			
 
				+leader does emerge,
			
 
				+it means that the leader has lost quorum, and the new leader will clean up any
			
 
				+mess left over during her leadership activation.
			
 
				+
			
 
				+ZooKeeper messaging operates similar to a classic two-phase commit.
			
 
				+
			
 
				+![Two phase commit](images/2pc.jpg)
			
 
				+
			
 
				+All communication channels are FIFO, so everything is done in order. Specifically
			
 
				+the following operating constraints are observed:
			
 
				+
			
 
				+* The leader sends proposals to all followers using
			
 
				+  the same order. Moreover, this order follows the order in which requests have been
			
 
				+  received. Because we use FIFO channels this means that followers also receive proposals in order.
			
 
				+* Followers process messages in the order they are received. This
			
 
				+  means that messages will be ACKed in order and the leader will receive ACKs from
			
 
				+  followers in order, due to the FIFO channels. It also means that if message $m$
			
 
				+  has been written to non-volatile storage, all messages that were proposed before
			
 
				+  $m$ have been written to non-volatile storage.
			
 
				+* The leader will issue a COMMIT to all followers as soon as a
			
 
				+  quorum of followers have ACKed a message. Since messages are ACKed in order,
			
 
				+  COMMITs will be sent by the leader as received by the followers in order.
			
 
				+* COMMITs are processed in order. Followers deliver a proposals
			
 
				+  message when that proposal is committed.
			
 
				+
			
 
				+<a name="sc_summary"></a>
			
 
				+
			
 
				+### Summary
			
 
				+
			
 
				+So there you go. Why does it work? Specifically, why does a set of proposals
			
 
				+believed by a new leader always contain any proposal that has actually been committed?
			
 
				+First, all proposals have a unique zxid, so unlike other protocols, we never have
			
 
				+to worry about two different values being proposed for the same zxid; followers
			
 
				+(a leader is also a follower) see and record proposals in order; proposals are
			
 
				+committed in order; there is only one active leader at a time since followers only
			
 
				+follow a single leader at a time; a new leader has seen all committed proposals
			
 
				+from the previous epoch since it has seen the highest zxid from a quorum of servers;
			
 
				+any uncommited proposals from a previous epoch seen by a new leader will be committed
			
 
				+by that leader before it becomes active.
			
 
				+
			
 
				+<a name="sc_comparisons"></a>
			
 
				+
			
 
				+### Comparisons
			
 
				+
			
 
				+Isn't this just Multi-Paxos? No, Multi-Paxos requires some way of assuring that
			
 
				+there is only a single coordinator. We do not count on such assurances. Instead
			
 
				+we use the leader activation to recover from leadership change or old leaders
			
 
				+believing they are still active.
			
 
				+
			
 
				+Isn't this just Paxos? Your active messaging phase looks just like phase 2 of Paxos?
			
 
				+Actually, to us active messaging looks just like 2 phase commit without the need to
			
 
				+handle aborts. Active messaging is different from both in the sense that it has
			
 
				+cross proposal ordering requirements. If we do not maintain strict FIFO ordering of
			
 
				+all packets, it all falls apart. Also, our leader activation phase is different from
			
 
				+both of them. In particular, our use of epochs allows us to skip blocks of uncommitted
			
 
				+proposals and to not worry about duplicate proposals for a given zxid.
			
 
				+
			
 
				+<a name="sc_quorum"></a>
			
 
				+
			
 
				+## Quorums
			
 
				+
			
 
				+Atomic broadcast and leader election use the notion of quorum to guarantee a consistent
			
 
				+view of the system. By default, ZooKeeper uses majority quorums, which means that every
			
 
				+voting that happens in one of these protocols requires a majority to vote on. One example is
			
 
				+acknowledging a leader proposal: the leader can only commit once it receives an
			
 
				+acknowledgement from a quorum of servers.
			
 
				+
			
 
				+If we extract the properties that we really need from our use of majorities, we have that we only
			
 
				+need to guarantee that groups of processes used to validate an operation by voting (e.g., acknowledging
			
 
				+a leader proposal) pairwise intersect in at least one server. Using majorities guarantees such a property.
			
 
				+However, there are other ways of constructing quorums different from majorities. For example, we can assign
			
 
				+weights to the votes of servers, and say that the votes of some servers are more important. To obtain a quorum,
			
 
				+we get enough votes so that the sum of weights of all votes is larger than half of the total sum of all weights.
			
 
				+
			
 
				+A different construction that uses weights and is useful in wide-area deployments (co-locations) is a hierarchical
			
 
				+one. With this construction, we split the servers into disjoint groups and assign weights to processes. To form
			
 
				+a quorum, we have to get a hold of enough servers from a majority of groups G, such that for each group g in G,
			
 
				+the sum of votes from g is larger than half of the sum of weights in g. Interestingly, this construction enables
			
 
				+smaller quorums. If we have, for example, 9 servers, we split them into 3 groups, and assign a weight of 1 to each
			
 
				+server, then we are able to form quorums of size 4. Note that two subsets of processes composed each of a majority
			
 
				+of servers from each of a majority of groups necessarily have a non-empty intersection. It is reasonable to expect
			
 
				+that a majority of co-locations will have a majority of servers available with high probability.
			
 
				+
			
 
				+With ZooKeeper, we provide a user with the ability of configuring servers to use majority quorums, weights, or a
			
 
				+hierarchy of groups.
			
 
				+
			
 
				+<a name="sc_logging"></a>
			
 
				+
			
 
				+## Logging
			
 
				+
			
 
				+Zookeeper uses [slf4j](http://www.slf4j.org/index.html) as an abstraction layer for logging. [log4j](http://logging.apache.org/log4j) in version 1.2 is chosen as the final logging implementation for now.
			
 
				+For better embedding support, it is planned in the future to leave the decision of choosing the final logging implementation to the end user.
			
 
				+Therefore, always use the slf4j api to write log statements in the code, but configure log4j for how to log at runtime.
			
 
				+Note that slf4j has no FATAL level, former messages at FATAL level have been moved to ERROR level.
			
 
				+For information on configuring log4j for
			
 
				+ZooKeeper, see the [Logging](zookeeperAdmin.html#sc_logging) section
			
 
				+of the [ZooKeeper Administrator's Guide.](zookeeperAdmin.html)
			
 
				+
			
 
				+<a name="sc_developerGuidelines"></a>
			
 
				+
			
 
				+### Developer Guidelines
			
 
				+
			
 
				+Please follow the  [slf4j manual](http://www.slf4j.org/manual.html) when creating log statements within code.
			
 
				+Also read the[FAQ on performance](http://www.slf4j.org/faq.html#logging\_performance)
			
 
				+, when creating log statements. Patch reviewers will look for the following:
			
 
				+
			
 
				+<a name="sc_rightLevel"></a>
			
 
				+
			
 
				+#### Logging at the Right Level
			
 
				+
			
 
				+There are several levels of logging in slf4j.
			
 
				+
			
 
				+It's important to pick the right one. In order of higher to lower severity:
			
 
				+
			
 
				+1. ERROR level designates error events that might still allow the application to continue running.
			
 
				+1. WARN level designates potentially harmful situations.
			
 
				+1. INFO level designates informational messages that highlight the progress of the application at coarse-grained level.
			
 
				+1. DEBUG Level designates fine-grained informational events that are most useful to debug an application.
			
 
				+1. TRACE Level designates finer-grained informational events than the DEBUG.
			
 
				+
			
 
				+ZooKeeper is typically run in production such that log messages of INFO level
			
 
				+severity and higher (more severe) are output to the log.
			
 
				+
			
 
				+<a name="sc_slf4jIdioms"></a>
			
 
				+
			
 
				+#### Use of Standard slf4j Idioms
			
 
				+
			
 
				+_Static Message Logging_
			
 
				+
			
 
				+    LOG.debug("process completed successfully!");
			
 
				+
			
 
				+However when creating parameterized messages are required, use formatting anchors.
			
 
				+
			
 
				+    LOG.debug("got {} messages in {} minutes",new Object[]{count,time});
			
 
				+
			
 
				+_Naming_
			
 
				+
			
 
				+Loggers should be named after the class in which they are used.
			
 
				+
			
 
				+    public class Foo {
			
 
				+        private static final Logger LOG = LoggerFactory.getLogger(Foo.class);
			
 
				+        ....
			
 
				+        public Foo() {
			
 
				+            LOG.info("constructing Foo");
			
 
				+
			
 
				+_Exception handling_
			
 
				+
			
 
				+    try {
			
 
				+        // code
			
 
				+    } catch (XYZException e) {
			
 
				+        // do this
			
 
				+        LOG.error("Something bad happened", e);
			
 
				+        // don't do this (generally)
			
 
				+        // LOG.error(e);
			
 
				+        // why? because "don't do" case hides the stack trace
			
 
				+
			
 
				+        // continue process here as you need... recover or (re)throw
			
 
				+    }
			
--- a/zookeeper-docs/src/main/resources/markdown/zookeeperJMX.md
+++ b/zookeeper-docs/src/main/resources/markdown/zookeeperJMX.md
@@ -0,0 +1,118 @@
 
				+<!--
			
 
				+Copyright 2002-2004 The Apache Software Foundation
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+//-->
			
 
				+
			
 
				+# ZooKeeper JMX
			
 
				+
			
 
				+* [JMX](#ch_jmx)
			
 
				+* [Starting ZooKeeper with JMX enabled](#ch_starting)
			
 
				+* [Run a JMX console](#ch_console)
			
 
				+* [ZooKeeper MBean Reference](#ch_reference)
			
 
				+
			
 
				+<a name="ch_jmx"></a>
			
 
				+
			
 
				+## JMX
			
 
				+
			
 
				+Apache ZooKeeper has extensive support for JMX, allowing you
			
 
				+to view and manage a ZooKeeper serving ensemble.
			
 
				+
			
 
				+This document assumes that you have basic knowledge of
			
 
				+JMX. See [Sun JMX Technology](http://java.sun.com/javase/technologies/core/mntr-mgmt/javamanagement/) page to get started with JMX.
			
 
				+
			
 
				+See the [JMX Management Guide](http://java.sun.com/javase/6/docs/technotes/guides/management/agent.html) for details on setting up local and
			
 
				+remote management of VM instances. By default the included
			
 
				+_zkServer.sh_ supports only local management -
			
 
				+review the linked document to enable support for remote management
			
 
				+(beyond the scope of this document).
			
 
				+
			
 
				+<a name="ch_starting"></a>
			
 
				+
			
 
				+## Starting ZooKeeper with JMX enabled
			
 
				+
			
 
				+The class
			
 
				+_org.apache.zookeeper.server.quorum.QuorumPeerMain_
			
 
				+will start a JMX manageable ZooKeeper server. This class
			
 
				+registers the proper MBeans during initalization to support JMX
			
 
				+monitoring and management of the
			
 
				+instance. See _bin/zkServer.sh_ for one
			
 
				+example of starting ZooKeeper using QuorumPeerMain.
			
 
				+
			
 
				+<a name="ch_console"></a>
			
 
				+
			
 
				+## Run a JMX console
			
 
				+
			
 
				+There are a number of JMX consoles available which can connect
			
 
				+to the running server. For this example we will use Sun's
			
 
				+_jconsole_.
			
 
				+
			
 
				+The Java JDK ships with a simple JMX console
			
 
				+named [jconsole](http://java.sun.com/developer/technicalArticles/J2SE/jconsole.html)
			
 
				+which can be used to connect to ZooKeeper and inspect a running
			
 
				+server. Once you've started ZooKeeper using QuorumPeerMain
			
 
				+start _jconsole_, which typically resides in
			
 
				+_JDK_HOME/bin/jconsole_
			
 
				+
			
 
				+When the "new connection" window is displayed either connect
			
 
				+to local process (if jconsole started on same host as Server) or
			
 
				+use the remote process connection.
			
 
				+
			
 
				+By default the "overview" tab for the VM is displayed (this
			
 
				+is a great way to get insight into the VM btw). Select
			
 
				+the "MBeans" tab.
			
 
				+
			
 
				+You should now see _org.apache.ZooKeeperService_
			
 
				+on the left hand side. Expand this item and depending on how you've
			
 
				+started the server you will be able to monitor and manage various
			
 
				+service related features.
			
 
				+
			
 
				+Also note that ZooKeeper will register log4j MBeans as
			
 
				+well. In the same section along the left hand side you will see
			
 
				+"log4j". Expand that to manage log4j through JMX. Of particular
			
 
				+interest is the ability to dynamically change the logging levels
			
 
				+used by editing the appender and root thresholds. Log4j MBean
			
 
				+registration can be disabled by passing
			
 
				+_-Dzookeeper.jmx.log4j.disable=true_ to the JVM
			
 
				+when starting ZooKeeper.
			
 
				+
			
 
				+<a name="ch_reference"></a>
			
 
				+
			
 
				+## ZooKeeper MBean Reference
			
 
				+
			
 
				+This table details JMX for a server participating in a
			
 
				+replicated ZooKeeper ensemble (ie not standalone). This is the
			
 
				+typical case for a production environment.
			
 
				+
			
 
				+### MBeans, their names and description
			
 
				+
			
 
				+| MBean | MBean Object Name | Description                               |
			
 
				+|-----------|-------------------|-------------------------------------------------|
			
 
				+| Quorum | ReplicatedServer_id<#> | Represents the Quorum, or Ensemble - parent of all cluster members. Note that the object name includes the "myid" of the server (name suffix) that your JMX agent has connected to. |
			
 
				+| LocalPeer/RemotePeer | replica.<#> | Represents a local or remote peer (ie server participating in the ensemble). Note that the object name includes the "myid" of the server (name suffix). |
			
 
				+| LeaderElection | LeaderElection | Represents a ZooKeeper cluster leader election which is in progress. Provides information about the election, such as when it started. |
			
 
				+| Leader | Leader | Indicates that the parent replica is the leader and provides attributes/operations for that server. Note that Leader is a subclass of ZooKeeperServer, so it provides all of the information normally associated with a ZooKeeperServer node. |
			
 
				+| Follower | Follower | Indicates that the parent replica is a follower and provides attributes/operations for that server. Note that Follower is a subclass of ZooKeeperServer, so it provides all of the information normally associated with a ZooKeeperServer node. |
			
 
				+| DataTree | InMemoryDataTree | Statistics on the in memory znode database, also operations to access finer (and more computationally intensive) statistics on the data (such as ephemeral count). InMemoryDataTrees are children of ZooKeeperServer nodes. |
			
 
				+| ServerCnxn | <session_id> | Statistics on each client connection, also operations on those connections (such as termination). Note the object name is the session id of the connection in hex form. |
			
 
				+
			
 
				+This table details JMX for a standalone server. Typically
			
 
				+standalone is only used in development situations.
			
 
				+
			
 
				+### MBeans, their names and description
			
 
				+
			
 
				+| MBean | MBean Object Name | Description            |
			
 
				+|-------|-------------------|------------------------|
			
 
				+| ZooKeeperServer | StandaloneServer_port<#> | Statistics on the running server, also operations to reset these attributes. Note that the object name includes the client port of the server (name suffix). |
			
 
				+| DataTree | InMemoryDataTree | Statistics on the in memory znode database, also operations to access finer (and more computationally intensive) statistics on the data (such as ephemeral count). |
			
 
				+| ServerCnxn | < session_id > | Statistics on each client connection, also operations on those connections (such as termination). Note the object name is the session id of the connection in hex form. |
			
--- a/zookeeper-docs/src/main/resources/markdown/zookeeperObservers.md
+++ b/zookeeper-docs/src/main/resources/markdown/zookeeperObservers.md
@@ -0,0 +1,106 @@
 
				+<!--
			
 
				+Copyright 2002-2004 The Apache Software Foundation
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+//-->
			
 
				+
			
 
				+# ZooKeeper Observers
			
 
				+
			
 
				+* [Observers: Scaling ZooKeeper Without Hurting Write Performance](#ch_Introduction)
			
 
				+* [How to use Observers](#sc_UsingObservers)
			
 
				+* [Example use cases](#ch_UseCases)
			
 
				+
			
 
				+<a name="ch_Introduction"></a>
			
 
				+
			
 
				+## Observers: Scaling ZooKeeper Without Hurting Write Performance
			
 
				+
			
 
				+Although ZooKeeper performs very well by having clients connect directly
			
 
				+to voting members of the ensemble, this architecture makes it hard to
			
 
				+scale out to huge numbers of clients. The problem is that as we add more
			
 
				+voting members, the write performance drops. This is due to the fact that
			
 
				+a write operation requires the agreement of (in general) at least half the
			
 
				+nodes in an ensemble and therefore the cost of a vote can increase
			
 
				+significantly as more voters are added.
			
 
				+
			
 
				+We have introduced a new type of ZooKeeper node called
			
 
				+an _Observer_ which helps address this problem and
			
 
				+further improves ZooKeeper's scalability. Observers are non-voting members
			
 
				+of an ensemble which only hear the results of votes, not the agreement
			
 
				+protocol that leads up to them. Other than this simple distinction,
			
 
				+Observers function exactly the same as Followers - clients may connect to
			
 
				+them and send read and write requests to them. Observers forward these
			
 
				+requests to the Leader like Followers do, but they then simply wait to
			
 
				+hear the result of the vote. Because of this, we can increase the number
			
 
				+of Observers as much as we like without harming the performance of votes.
			
 
				+
			
 
				+Observers have other advantages. Because they do not vote, they are not a
			
 
				+critical part of the ZooKeeper ensemble. Therefore they can fail, or be
			
 
				+disconnected from the cluster, without harming the availability of the
			
 
				+ZooKeeper service. The benefit to the user is that Observers may connect
			
 
				+over less reliable network links than Followers. In fact, Observers may be
			
 
				+used to talk to a ZooKeeper server from another data center. Clients of
			
 
				+the Observer will see fast reads, as all reads are served locally, and
			
 
				+writes result in minimal network traffic as the number of messages
			
 
				+required in the absence of the vote protocol is smaller.
			
 
				+
			
 
				+<a name="sc_UsingObservers"></a>
			
 
				+
			
 
				+## How to use Observers
			
 
				+
			
 
				+Setting up a ZooKeeper ensemble that uses Observers is very simple,
			
 
				+and requires just two changes to your config files. Firstly, in the config
			
 
				+file of every node that is to be an Observer, you must place this line:
			
 
				+
			
 
				+    peerType=observer
			
 
				+
			
 
				+This line tells ZooKeeper that the server is to be an Observer. Secondly,
			
 
				+in every server config file, you must add :observer to the server
			
 
				+definition line of each Observer. For example:
			
 
				+
			
 
				+    server.1:localhost:2181:3181:observer
			
 
				+
			
 
				+This tells every other server that server.1 is an Observer, and that they
			
 
				+should not expect it to vote. This is all the configuration you need to do
			
 
				+to add an Observer to your ZooKeeper cluster. Now you can connect to it as
			
 
				+though it were an ordinary Follower. Try it out, by running:
			
 
				+
			
 
				+    $ bin/zkCli.sh -server localhost:2181
			
 
				+
			
 
				+where localhost:2181 is the hostname and port number of the Observer as
			
 
				+specified in every config file. You should see a command line prompt
			
 
				+through which you can issue commands like _ls_ to query
			
 
				+the ZooKeeper service.
			
 
				+
			
 
				+<a name="ch_UseCases"></a>
			
 
				+
			
 
				+## Example use cases
			
 
				+
			
 
				+Two example use cases for Observers are listed below. In fact, wherever
			
 
				+you wish to scale the number of clients of your ZooKeeper ensemble, or
			
 
				+where you wish to insulate the critical part of an ensemble from the load
			
 
				+of dealing with client requests, Observers are a good architectural
			
 
				+choice.
			
 
				+
			
 
				+* As a datacenter bridge: Forming a ZK ensemble between two
			
 
				+  datacenters is a problematic endeavour as the high variance in latency
			
 
				+  between the datacenters could lead to false positive failure detection
			
 
				+  and partitioning. However if the ensemble runs entirely in one
			
 
				+  datacenter, and the second datacenter runs only Observers, partitions
			
 
				+  aren't problematic as the ensemble remains connected. Clients of the
			
 
				+  Observers may still see and issue proposals.
			
 
				+* As a link to a message bus: Some companies have expressed an
			
 
				+  interest in using ZK as a component of a persistent reliable message
			
 
				+  bus. Observers would give a natural integration point for this work: a
			
 
				+  plug-in mechanism could be used to attach the stream of proposals an
			
 
				+  Observer sees to a publish-subscribe system, again without loading the
			
 
				+  core ensemble.
			
--- a/zookeeper-docs/src/main/resources/markdown/zookeeperOtherInfo.md
+++ b/zookeeper-docs/src/main/resources/markdown/zookeeperOtherInfo.md
@@ -0,0 +1,22 @@
 
				+<!--
			
 
				+Copyright 2002-2004 The Apache Software Foundation
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+//-->
			
 
				+
			
 
				+# ZooKeeper
			
 
				+
			
 
				+## Other Info
			
 
				+
			
 
				+currently empty
			
 
				+
			
--- a/zookeeper-docs/src/main/resources/markdown/zookeeperOver.md
+++ b/zookeeper-docs/src/main/resources/markdown/zookeeperOver.md
@@ -0,0 +1,343 @@
 
				+<!--
			
 
				+Copyright 2002-2004 The Apache Software Foundation
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+//-->
			
 
				+
			
 
				+# ZooKeeper
			
 
				+
			
 
				+* [ZooKeeper: A Distributed Coordination Service for Distributed Applications](#ch_DesignOverview)
			
 
				+    * [Design Goals](#sc_designGoals)
			
 
				+    * [Data model and the hierarchical namespace](#sc_dataModelNameSpace)
			
 
				+    * [Nodes and ephemeral nodes](#Nodes+and+ephemeral+nodes)
			
 
				+    * [Conditional updates and watches](#Conditional+updates+and+watches)
			
 
				+    * [Guarantees](#Guarantees)
			
 
				+    * [Simple API](#Simple+API)
			
 
				+    * [Implementation](#Implementation)
			
 
				+    * [Uses](#Uses)
			
 
				+    * [Performance](#Performance)
			
 
				+    * [Reliability](#Reliability)
			
 
				+    * [The ZooKeeper Project](#The+ZooKeeper+Project)
			
 
				+
			
 
				+<a name="ch_DesignOverview"></a>
			
 
				+
			
 
				+## ZooKeeper: A Distributed Coordination Service for Distributed Applications
			
 
				+
			
 
				+ZooKeeper is a distributed, open-source coordination service for
			
 
				+distributed applications. It exposes a simple set of primitives that
			
 
				+distributed applications can build upon to implement higher level services
			
 
				+for synchronization, configuration maintenance, and groups and naming. It
			
 
				+is designed to be easy to program to, and uses a data model styled after
			
 
				+the familiar directory tree structure of file systems. It runs in Java and
			
 
				+has bindings for both Java and C.
			
 
				+
			
 
				+Coordination services are notoriously hard to get right. They are
			
 
				+especially prone to errors such as race conditions and deadlock. The
			
 
				+motivation behind ZooKeeper is to relieve distributed applications the
			
 
				+responsibility of implementing coordination services from scratch.
			
 
				+
			
 
				+<a name="sc_designGoals"></a>
			
 
				+
			
 
				+### Design Goals
			
 
				+
			
 
				+**ZooKeeper is simple.** ZooKeeper
			
 
				+allows distributed processes to coordinate with each other through a
			
 
				+shared hierarchal namespace which is organized similarly to a standard
			
 
				+file system. The name space consists of data registers - called znodes,
			
 
				+in ZooKeeper parlance - and these are similar to files and directories.
			
 
				+Unlike a typical file system, which is designed for storage, ZooKeeper
			
 
				+data is kept in-memory, which means ZooKeeper can achieve high
			
 
				+throughput and low latency numbers.
			
 
				+
			
 
				+The ZooKeeper implementation puts a premium on high performance,
			
 
				+highly available, strictly ordered access. The performance aspects of
			
 
				+ZooKeeper means it can be used in large, distributed systems. The
			
 
				+reliability aspects keep it from being a single point of failure. The
			
 
				+strict ordering means that sophisticated synchronization primitives can
			
 
				+be implemented at the client.
			
 
				+
			
 
				+**ZooKeeper is replicated.** Like the
			
 
				+distributed processes it coordinates, ZooKeeper itself is intended to be
			
 
				+replicated over a sets of hosts called an ensemble.
			
 
				+
			
 
				+![ZooKeeper Service](images/zkservice.jpg)
			
 
				+
			
 
				+The servers that make up the ZooKeeper service must all know about
			
 
				+each other. They maintain an in-memory image of state, along with a
			
 
				+transaction logs and snapshots in a persistent store. As long as a
			
 
				+majority of the servers are available, the ZooKeeper service will be
			
 
				+available.
			
 
				+
			
 
				+Clients connect to a single ZooKeeper server. The client maintains
			
 
				+a TCP connection through which it sends requests, gets responses, gets
			
 
				+watch events, and sends heart beats. If the TCP connection to the server
			
 
				+breaks, the client will connect to a different server.
			
 
				+
			
 
				+**ZooKeeper is ordered.** ZooKeeper
			
 
				+stamps each update with a number that reflects the order of all
			
 
				+ZooKeeper transactions. Subsequent operations can use the order to
			
 
				+implement higher-level abstractions, such as synchronization
			
 
				+primitives.
			
 
				+
			
 
				+**ZooKeeper is fast.** It is
			
 
				+especially fast in "read-dominant" workloads. ZooKeeper applications run
			
 
				+on thousands of machines, and it performs best where reads are more
			
 
				+common than writes, at ratios of around 10:1.
			
 
				+
			
 
				+<a name="sc_dataModelNameSpace"></a>
			
 
				+
			
 
				+### Data model and the hierarchical namespace
			
 
				+
			
 
				+The name space provided by ZooKeeper is much like that of a
			
 
				+standard file system. A name is a sequence of path elements separated by
			
 
				+a slash (/). Every node in ZooKeeper's name space is identified by a
			
 
				+path.
			
 
				+
			
 
				+#### ZooKeeper's Hierarchical Namespace
			
 
				+
			
 
				+![ZooKeeper's Hierarchical Namespace](images/zknamespace.jpg)
			
 
				+
			
 
				+<a name="Nodes+and+ephemeral+nodes"></a>
			
 
				+
			
 
				+### Nodes and ephemeral nodes
			
 
				+
			
 
				+Unlike standard file systems, each node in a ZooKeeper
			
 
				+namespace can have data associated with it as well as children. It is
			
 
				+like having a file-system that allows a file to also be a directory.
			
 
				+(ZooKeeper was designed to store coordination data: status information,
			
 
				+configuration, location information, etc., so the data stored at each
			
 
				+node is usually small, in the byte to kilobyte range.) We use the term
			
 
				+_znode_ to make it clear that we are talking about
			
 
				+ZooKeeper data nodes.
			
 
				+
			
 
				+Znodes maintain a stat structure that includes version numbers for
			
 
				+data changes, ACL changes, and timestamps, to allow cache validations
			
 
				+and coordinated updates. Each time a znode's data changes, the version
			
 
				+number increases. For instance, whenever a client retrieves data it also
			
 
				+receives the version of the data.
			
 
				+
			
 
				+The data stored at each znode in a namespace is read and written
			
 
				+atomically. Reads get all the data bytes associated with a znode and a
			
 
				+write replaces all the data. Each node has an Access Control List (ACL)
			
 
				+that restricts who can do what.
			
 
				+
			
 
				+ZooKeeper also has the notion of ephemeral nodes. These znodes
			
 
				+exists as long as the session that created the znode is active. When the
			
 
				+session ends the znode is deleted. Ephemeral nodes are useful when you
			
 
				+want to implement _[tbd]_.
			
 
				+
			
 
				+<a name="Conditional+updates+and+watches"></a>
			
 
				+
			
 
				+### Conditional updates and watches
			
 
				+
			
 
				+ZooKeeper supports the concept of _watches_.
			
 
				+Clients can set a watch on a znode. A watch will be triggered and
			
 
				+removed when the znode changes. When a watch is triggered, the client
			
 
				+receives a packet saying that the znode has changed. If the
			
 
				+connection between the client and one of the Zoo Keeper servers is
			
 
				+broken, the client will receive a local notification. These can be used
			
 
				+to _[tbd]_.
			
 
				+
			
 
				+<a name="Guarantees"></a>
			
 
				+
			
 
				+### Guarantees
			
 
				+
			
 
				+ZooKeeper is very fast and very simple. Since its goal, though, is
			
 
				+to be a basis for the construction of more complicated services, such as
			
 
				+synchronization, it provides a set of guarantees. These are:
			
 
				+
			
 
				+* Sequential Consistency - Updates from a client will be applied
			
 
				+  in the order that they were sent.
			
 
				+* Atomicity - Updates either succeed or fail. No partial
			
 
				+  results.
			
 
				+* Single System Image - A client will see the same view of the
			
 
				+  service regardless of the server that it connects to.
			
 
				+
			
 
				+* Reliability - Once an update has been applied, it will persist
			
 
				+  from that time forward until a client overwrites the update.
			
 
				+
			
 
				+* Timeliness - The clients view of the system is guaranteed to
			
 
				+  be up-to-date within a certain time bound.
			
 
				+
			
 
				+For more information on these, and how they can be used, see
			
 
				+_[tbd]_
			
 
				+
			
 
				+<a name="Simple+API"></a>
			
 
				+
			
 
				+### Simple API
			
 
				+
			
 
				+One of the design goals of ZooKeeper is provide a very simple
			
 
				+programming interface. As a result, it supports only these
			
 
				+operations:
			
 
				+
			
 
				+* *create* :
			
 
				+    creates a node at a location in the tree
			
 
				+
			
 
				+* *delete* :
			
 
				+    deletes a node
			
 
				+
			
 
				+* *exists* :
			
 
				+    tests if a node exists at a location
			
 
				+
			
 
				+* *get data* :
			
 
				+    reads the data from a node
			
 
				+
			
 
				+* *set data* :
			
 
				+    writes data to a node
			
 
				+
			
 
				+* *get children* :
			
 
				+    retrieves a list of children of a node
			
 
				+
			
 
				+* *sync* :
			
 
				+    waits for data to be propagated
			
 
				+
			
 
				+For a more in-depth discussion on these, and how they can be used
			
 
				+to implement higher level operations, please refer to
			
 
				+_[tbd]_
			
 
				+
			
 
				+<a name="Implementation"></a>
			
 
				+
			
 
				+### Implementation
			
 
				+
			
 
				+[ZooKeeper Components](#zkComponents) shows the high-level components
			
 
				+of the ZooKeeper service. With the exception of the request processor,
			
 
				+each of
			
 
				+the servers that make up the ZooKeeper service replicates its own copy
			
 
				+of each of the components.
			
 
				+
			
 
				+<a name="zkComponents"></a>
			
 
				+
			
 
				+![ZooKeeper Components](images/zkcomponents.jpg)
			
 
				+
			
 
				+The replicated database is an in-memory database containing the
			
 
				+entire data tree. Updates are logged to disk for recoverability, and
			
 
				+writes are serialized to disk before they are applied to the in-memory
			
 
				+database.
			
 
				+
			
 
				+Every ZooKeeper server services clients. Clients connect to
			
 
				+exactly one server to submit irequests. Read requests are serviced from
			
 
				+the local replica of each server database. Requests that change the
			
 
				+state of the service, write requests, are processed by an agreement
			
 
				+protocol.
			
 
				+
			
 
				+As part of the agreement protocol all write requests from clients
			
 
				+are forwarded to a single server, called the
			
 
				+_leader_. The rest of the ZooKeeper servers, called
			
 
				+_followers_, receive message proposals from the
			
 
				+leader and agree upon message delivery. The messaging layer takes care
			
 
				+of replacing leaders on failures and syncing followers with
			
 
				+leaders.
			
 
				+
			
 
				+ZooKeeper uses a custom atomic messaging protocol. Since the
			
 
				+messaging layer is atomic, ZooKeeper can guarantee that the local
			
 
				+replicas never diverge. When the leader receives a write request, it
			
 
				+calculates what the state of the system is when the write is to be
			
 
				+applied and transforms this into a transaction that captures this new
			
 
				+state.
			
 
				+
			
 
				+<a name="Uses"></a>
			
 
				+
			
 
				+### Uses
			
 
				+
			
 
				+The programming interface to ZooKeeper is deliberately simple.
			
 
				+With it, however, you can implement higher order operations, such as
			
 
				+synchronizations primitives, group membership, ownership, etc. Some
			
 
				+distributed applications have used it to: _[tbd: add uses from
			
 
				+white paper and video presentation.]_ For more information, see
			
 
				+_[tbd]_
			
 
				+
			
 
				+<a name="Performance"></a>
			
 
				+
			
 
				+### Performance
			
 
				+
			
 
				+ZooKeeper is designed to be highly performant. But is it? The
			
 
				+results of the ZooKeeper's development team at Yahoo! Research indicate
			
 
				+that it is. (See [ZooKeeper Throughput as the Read-Write Ratio Varies](#zkPerfRW).) It is especially high
			
 
				+performance in applications where reads outnumber writes, since writes
			
 
				+involve synchronizing the state of all servers. (Reads outnumbering
			
 
				+writes is typically the case for a coordination service.)
			
 
				+
			
 
				+<a name="zkPerfRW"></a>
			
 
				+
			
 
				+![ZooKeeper Throughput as the Read-Write Ratio Varies](images/zkperfRW-3.2.jpg)
			
 
				+
			
 
				+The [ZooKeeper Throughput as the Read-Write Ratio Varies](#zkPerfRW) is a throughput
			
 
				+graph of ZooKeeper release 3.2 running on servers with dual 2Ghz
			
 
				+Xeon and two SATA 15K RPM drives.  One drive was used as a
			
 
				+dedicated ZooKeeper log device. The snapshots were written to
			
 
				+the OS drive. Write requests were 1K writes and the reads were
			
 
				+1K reads.  "Servers" indicate the size of the ZooKeeper
			
 
				+ensemble, the number of servers that make up the
			
 
				+service. Approximately 30 other servers were used to simulate
			
 
				+the clients. The ZooKeeper ensemble was configured such that
			
 
				+leaders do not allow connections from clients.
			
 
				+
			
 
				+######Note
			
 
				+>In version 3.2 r/w performance improved by ~2x compared to
			
 
				+ the [previous 3.1 release](http://zookeeper.apache.org/docs/r3.1.1/zookeeperOver.html#Performance).
			
 
				+
			
 
				+Benchmarks also indicate that it is reliable, too.
			
 
				+[Reliability in the Presence of Errors](#zkPerfReliability) shows how a deployment responds to
			
 
				+various failures. The events marked in the figure are the following:
			
 
				+
			
 
				+1. Failure and recovery of a follower
			
 
				+1. Failure and recovery of a different follower
			
 
				+1. Failure of the leader
			
 
				+1. Failure and recovery of two followers
			
 
				+1. Failure of another leader
			
 
				+
			
 
				+<a name="Reliability"></a>
			
 
				+
			
 
				+### Reliability
			
 
				+
			
 
				+To show the behavior of the system over time as
			
 
				+failures are injected we ran a ZooKeeper service made up of
			
 
				+7 machines. We ran the same saturation benchmark as before,
			
 
				+but this time we kept the write percentage at a constant
			
 
				+30%, which is a conservative ratio of our expected
			
 
				+workloads.
			
 
				+
			
 
				+<a name="zkPerfReliability"></a>
			
 
				+
			
 
				+![Reliability in the Presence of Errors](images/zkperfreliability.jpg)
			
 
				+
			
 
				+The are a few important observations from this graph. First, if
			
 
				+followers fail and recover quickly, then ZooKeeper is able to sustain a
			
 
				+high throughput despite the failure. But maybe more importantly, the
			
 
				+leader election algorithm allows for the system to recover fast enough
			
 
				+to prevent throughput from dropping substantially. In our observations,
			
 
				+ZooKeeper takes less than 200ms to elect a new leader. Third, as
			
 
				+followers recover, ZooKeeper is able to raise throughput again once they
			
 
				+start processing requests.
			
 
				+
			
 
				+<a name="The+ZooKeeper+Project"></a>
			
 
				+
			
 
				+### The ZooKeeper Project
			
 
				+
			
 
				+ZooKeeper has been
			
 
				+[successfully used](https://cwiki.apache.org/confluence/display/ZOOKEEPER/PoweredBy)
			
 
				+in many industrial applications.  It is used at Yahoo! as the
			
 
				+coordination and failure recovery service for Yahoo! Message
			
 
				+Broker, which is a highly scalable publish-subscribe system
			
 
				+managing thousands of topics for replication and data
			
 
				+delivery.  It is used by the Fetching Service for Yahoo!
			
 
				+crawler, where it also manages failure recovery. A number of
			
 
				+Yahoo! advertising systems also use ZooKeeper to implement
			
 
				+reliable services.
			
 
				+
			
 
				+All users and developers are encouraged to join the
			
 
				+community and contribute their expertise. See the
			
 
				+[Zookeeper Project on Apache](http://zookeeper.apache.org/)
			
 
				+for more information.
			
 
				+
			
 
				+
			
--- a/zookeeper-docs/src/main/resources/markdown/zookeeperProgrammers.md
+++ b/zookeeper-docs/src/main/resources/markdown/zookeeperProgrammers.md
@@ -0,0 +1,1519 @@
 
				+<!--
			
 
				+Copyright 2002-2004 The Apache Software Foundation
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+//-->
			
 
				+
			
 
				+# ZooKeeper Programmer's Guide
			
 
				+
			
 
				+### Developing Distributed Applications that use ZooKeeper
			
 
				+
			
 
				+* [Introduction](#_introduction)
			
 
				+* [The ZooKeeper Data Model](#ch_zkDataModel)
			
 
				+    * [ZNodes](#sc_zkDataModel_znodes)
			
 
				+        * [Watches](#sc_zkDataMode_watches)
			
 
				+        * [Data Access](#Data+Access)
			
 
				+        * [Ephemeral Nodes](#Ephemeral+Nodes)
			
 
				+        * [Sequence Nodes -- Unique Naming](#Sequence+Nodes+--+Unique+Naming)
			
 
				+        * [Container Nodes](#Container+Nodes)
			
 
				+        * [TTL Nodes](#TTL+Nodes)
			
 
				+    * [Time in ZooKeeper](#sc_timeInZk)
			
 
				+    * [ZooKeeper Stat Structure](#sc_zkStatStructure)
			
 
				+* [ZooKeeper Sessions](#ch_zkSessions)
			
 
				+* [ZooKeeper Watches](#ch_zkWatches)
			
 
				+    * [Semantics of Watches](#sc_WatchSemantics)
			
 
				+    * [Remove Watches](#sc_WatchRemoval)
			
 
				+    * [What ZooKeeper Guarantees about Watches](#sc_WatchGuarantees)
			
 
				+    * [Things to Remember about Watches](#sc_WatchRememberThese)
			
 
				+* [ZooKeeper access control using ACLs](#sc_ZooKeeperAccessControl)
			
 
				+    * [ACL Permissions](#sc_ACLPermissions)
			
 
				+        * [Builtin ACL Schemes](#sc_BuiltinACLSchemes)
			
 
				+        * [ZooKeeper C client API](#ZooKeeper+C+client+API)
			
 
				+* [Pluggable ZooKeeper authentication](#sc_ZooKeeperPluggableAuthentication)
			
 
				+* [Consistency Guarantees](#ch_zkGuarantees)
			
 
				+* [Bindings](#ch_bindings)
			
 
				+    * [Java Binding](#Java+Binding)
			
 
				+        * [Client Configuration Parameters](#sc_java_client_configuration)
			
 
				+    * [C Binding](#C+Binding)
			
 
				+        * [Installation](#Installation)
			
 
				+        * [Building Your Own C Client](#Building+Your+Own+C+Client)
			
 
				+* [Building Blocks: A Guide to ZooKeeper Operations](#ch_guideToZkOperations)
			
 
				+    * [Handling Errors](#sc_errorsZk)
			
 
				+    * [Connecting to ZooKeeper](#sc_connectingToZk)
			
 
				+    * [Read Operations](#sc_readOps)
			
 
				+    * [Write Operations](#sc_writeOps)
			
 
				+    * [Handling Watches](#sc_handlingWatches)
			
 
				+    * [Miscelleaneous ZooKeeper Operations](#sc_miscOps)
			
 
				+* [Program Structure, with Simple Example](#ch_programStructureWithExample)
			
 
				+* [Gotchas: Common Problems and Troubleshooting](#ch_gotchas)
			
 
				+
			
 
				+<a name="_introduction"></a>
			
 
				+
			
 
				+## Introduction
			
 
				+
			
 
				+This document is a guide for developers wishing to create
			
 
				+distributed applications that take advantage of ZooKeeper's coordination
			
 
				+services. It contains conceptual and practical information.
			
 
				+
			
 
				+The first four sections of this guide present higher level
			
 
				+discussions of various ZooKeeper concepts. These are necessary both for an
			
 
				+understanding of how ZooKeeper works as well how to work with it. It does
			
 
				+not contain source code, but it does assume a familiarity with the
			
 
				+problems associated with distributed computing. The sections in this first
			
 
				+group are:
			
 
				+
			
 
				+* [The ZooKeeper Data Model](#ch_zkDataModel)
			
 
				+* [ZooKeeper Sessions](#ch_zkSessions)
			
 
				+* [ZooKeeper Watches](#ch_zkWatches)
			
 
				+* [Consistency Guarantees](#ch_zkGuarantees)
			
 
				+
			
 
				+The next four sections provide practical programming
			
 
				+information. These are:
			
 
				+
			
 
				+* [Building Blocks: A Guide to ZooKeeper Operations](#ch_guideToZkOperations)
			
 
				+* [Bindings](#ch_bindings)
			
 
				+* [Program Structure, with Simple Example](#ch_programStructureWithExample)
			
 
				+  _[tbd]_
			
 
				+* [Gotchas: Common Problems and Troubleshooting](#ch_gotchas)
			
 
				+
			
 
				+The book concludes with an [appendix](#apx_linksToOtherInfo) containing links to other
			
 
				+useful, ZooKeeper-related information.
			
 
				+
			
 
				+Most of information in this document is written to be accessible as
			
 
				+stand-alone reference material. However, before starting your first
			
 
				+ZooKeeper application, you should probably at least read the chaptes on
			
 
				+the [ZooKeeper Data Model](#ch_zkDataModel) and [ZooKeeper Basic Operations](#ch_guideToZkOperations). Also,
			
 
				+the [Simple Programmming
			
 
				+Example](#ch_programStructureWithExample) _[tbd]_ is helpful for understanding the basic
			
 
				+structure of a ZooKeeper client application.
			
 
				+
			
 
				+<a name="ch_zkDataModel"></a>
			
 
				+
			
 
				+## The ZooKeeper Data Model
			
 
				+
			
 
				+ZooKeeper has a hierarchal name space, much like a distributed file
			
 
				+system. The only difference is that each node in the namespace can have
			
 
				+data associated with it as well as children. It is like having a file
			
 
				+system that allows a file to also be a directory. Paths to nodes are
			
 
				+always expressed as canonical, absolute, slash-separated paths; there are
			
 
				+no relative reference. Any unicode character can be used in a path subject
			
 
				+to the following constraints:
			
 
				+
			
 
				+* The null character (\\u0000) cannot be part of a path name. (This
			
 
				+  causes problems with the C binding.)
			
 
				+* The following characters can't be used because they don't
			
 
				+  display well, or render in confusing ways: \\u0001 - \\u001F and \\u007F
			
 
				+  - \\u009F.
			
 
				+* The following characters are not allowed: \\ud800 - uF8FF,
			
 
				+  \\uFFF0 - uFFFF.
			
 
				+* The "." character can be used as part of another name, but "."
			
 
				+  and ".." cannot alone be used to indicate a node along a path,
			
 
				+  because ZooKeeper doesn't use relative paths. The following would be
			
 
				+  invalid: "/a/b/./c" or "/a/b/../c".
			
 
				+* The token "zookeeper" is reserved.
			
 
				+
			
 
				+<a name="sc_zkDataModel_znodes"></a>
			
 
				+
			
 
				+### ZNodes
			
 
				+
			
 
				+Every node in a ZooKeeper tree is referred to as a
			
 
				+_znode_. Znodes maintain a stat structure that
			
 
				+includes version numbers for data changes, acl changes. The stat
			
 
				+structure also has timestamps. The version number, together with the
			
 
				+timestamp, allows ZooKeeper to validate the cache and to coordinate
			
 
				+updates. Each time a znode's data changes, the version number increases.
			
 
				+For instance, whenever a client retrieves data, it also receives the
			
 
				+version of the data. And when a client performs an update or a delete,
			
 
				+it must supply the version of the data of the znode it is changing. If
			
 
				+the version it supplies doesn't match the actual version of the data,
			
 
				+the update will fail. (This behavior can be overridden. For more
			
 
				+information see... )_[tbd...]_
			
 
				+
			
 
				+######Note
			
 
				+
			
 
				+>In distributed application engineering, the word
			
 
				+_node_ can refer to a generic host machine, a
			
 
				+server, a member of an ensemble, a client process, etc. In the ZooKeeper
			
 
				+documentation, _znodes_ refer to the data nodes.
			
 
				+_Servers_ refer to machines that make up the
			
 
				+ZooKeeper service; _quorum peers_ refer to the
			
 
				+servers that make up an ensemble; client refers to any host or process
			
 
				+which uses a ZooKeeper service.
			
 
				+
			
 
				+Znodes are the main enitity that a programmer access. They have
			
 
				+several characteristics that are worth mentioning here.
			
 
				+
			
 
				+<a name="sc_zkDataMode_watches"></a>
			
 
				+
			
 
				+#### Watches
			
 
				+
			
 
				+Clients can set watches on znodes. Changes to that znode trigger
			
 
				+the watch and then clear the watch. When a watch triggers, ZooKeeper
			
 
				+sends the client a notification. More information about watches can be
			
 
				+found in the section
			
 
				+[ZooKeeper Watches](#ch_zkWatches).
			
 
				+
			
 
				+<a name="Data+Access"></a>
			
 
				+
			
 
				+#### Data Access
			
 
				+
			
 
				+The data stored at each znode in a namespace is read and written
			
 
				+atomically. Reads get all the data bytes associated with a znode and a
			
 
				+write replaces all the data. Each node has an Access Control List
			
 
				+(ACL) that restricts who can do what.
			
 
				+
			
 
				+ZooKeeper was not designed to be a general database or large
			
 
				+object store. Instead, it manages coordination data. This data can
			
 
				+come in the form of configuration, status information, rendezvous, etc.
			
 
				+A common property of the various forms of coordination data is that
			
 
				+they are relatively small: measured in kilobytes.
			
 
				+The ZooKeeper client and the server implementations have sanity checks
			
 
				+to ensure that znodes have less than 1M of data, but the data should
			
 
				+be much less than that on average. Operating on relatively large data
			
 
				+sizes will cause some operations to take much more time than others and
			
 
				+will affect the latencies of some operations because of the extra time
			
 
				+needed to move more data over the network and onto storage media. If
			
 
				+large data storage is needed, the usually pattern of dealing with such
			
 
				+data is to store it on a bulk storage system, such as NFS or HDFS, and
			
 
				+store pointers to the storage locations in ZooKeeper.
			
 
				+
			
 
				+<a name="Ephemeral+Nodes"></a>
			
 
				+
			
 
				+#### Ephemeral Nodes
			
 
				+
			
 
				+ZooKeeper also has the notion of ephemeral nodes. These znodes
			
 
				+exists as long as the session that created the znode is active. When
			
 
				+the session ends the znode is deleted. Because of this behavior
			
 
				+ephemeral znodes are not allowed to have children.
			
 
				+
			
 
				+<a name="Sequence+Nodes+--+Unique+Naming"></a>
			
 
				+
			
 
				+#### Sequence Nodes -- Unique Naming
			
 
				+
			
 
				+When creating a znode you can also request that
			
 
				+ZooKeeper append a monotonically increasing counter to the end
			
 
				+of path. This counter is unique to the parent znode. The
			
 
				+counter has a format of %010d -- that is 10 digits with 0
			
 
				+(zero) padding (the counter is formatted in this way to
			
 
				+simplify sorting), i.e. "<path>0000000001". See
			
 
				+[Queue
			
 
				+Recipe](recipes.html#sc_recipes_Queues) for an example use of this feature. Note: the
			
 
				+counter used to store the next sequence number is a signed int
			
 
				+(4bytes) maintained by the parent node, the counter will
			
 
				+overflow when incremented beyond 2147483647 (resulting in a
			
 
				+name "<path>-2147483648").
			
 
				+
			
 
				+<a name="Container+Nodes"></a>
			
 
				+
			
 
				+#### Container Nodes
			
 
				+
			
 
				+**Added in 3.6.0**
			
 
				+
			
 
				+ZooKeeper has the notion of container znodes. Container znodes are
			
 
				+special purpose znodes useful for recipes such as leader, lock, etc.
			
 
				+When the last child of a container is deleted, the container becomes
			
 
				+a candidate to be deleted by the server at some point in the future.
			
 
				+
			
 
				+Given this property, you should be prepared to get
			
 
				+KeeperException.NoNodeException when creating children inside of
			
 
				+container znodes. i.e. when creating child znodes inside of container znodes
			
 
				+always check for KeeperException.NoNodeException and recreate the container
			
 
				+znode when it occurs.
			
 
				+
			
 
				+<a name="TTL+Nodes"></a>
			
 
				+
			
 
				+#### TTL Nodes
			
 
				+
			
 
				+**Added in 3.6.0**
			
 
				+
			
 
				+When creating PERSISTENT or PERSISTENT_SEQUENTIAL znodes,
			
 
				+you can optionally set a TTL in milliseconds for the znode. If the znode
			
 
				+is not modified within the TTL and has no children it will become a candidate
			
 
				+to be deleted by the server at some point in the future.
			
 
				+
			
 
				+Note: TTL Nodes must be enabled via System property as they
			
 
				+are disabled by default. See the [Administrator's Guide](zookeeperAdmin.html#sc_configuration) for
			
 
				+details. If you attempt to create TTL Nodes without the
			
 
				+proper System property set the server will throw
			
 
				+KeeperException.UnimplementedException.
			
 
				+
			
 
				+<a name="sc_timeInZk"></a>
			
 
				+ 
			
 
				+### Time in ZooKeeper
			
 
				+
			
 
				+ZooKeeper tracks time multiple ways:
			
 
				+
			
 
				+* **Zxid**
			
 
				+  Every change to the ZooKeeper state receives a stamp in the
			
 
				+  form of a _zxid_ (ZooKeeper Transaction Id).
			
 
				+  This exposes the total ordering of all changes to ZooKeeper. Each
			
 
				+  change will have a unique zxid and if zxid1 is smaller than zxid2
			
 
				+  then zxid1 happened before zxid2.
			
 
				+* **Version numbers**
			
 
				+  Every change to a node will cause an increase to one of the
			
 
				+  version numbers of that node. The three version numbers are version
			
 
				+  (number of changes to the data of a znode), cversion (number of
			
 
				+  changes to the children of a znode), and aversion (number of changes
			
 
				+  to the ACL of a znode).
			
 
				+* **Ticks**
			
 
				+  When using multi-server ZooKeeper, servers use ticks to define
			
 
				+  timing of events such as status uploads, session timeouts,
			
 
				+  connection timeouts between peers, etc. The tick time is only
			
 
				+  indirectly exposed through the minimum session timeout (2 times the
			
 
				+  tick time); if a client requests a session timeout less than the
			
 
				+  minimum session timeout, the server will tell the client that the
			
 
				+  session timeout is actually the minimum session timeout.
			
 
				+* **Real time**
			
 
				+  ZooKeeper doesn't use real time, or clock time, at all except
			
 
				+  to put timestamps into the stat structure on znode creation and
			
 
				+  znode modification.
			
 
				+
			
 
				+<a name="sc_zkStatStructure"></a>
			
 
				+
			
 
				+### ZooKeeper Stat Structure
			
 
				+
			
 
				+The Stat structure for each znode in ZooKeeper is made up of the
			
 
				+following fields:
			
 
				+
			
 
				+* **czxid**
			
 
				+  The zxid of the change that caused this znode to be
			
 
				+  created.
			
 
				+* **mzxid**
			
 
				+  The zxid of the change that last modified this znode.
			
 
				+* **pzxid**
			
 
				+  The zxid of the change that last modified children of this znode.
			
 
				+* **ctime**
			
 
				+  The time in milliseconds from epoch when this znode was
			
 
				+  created.
			
 
				+* **mtime**
			
 
				+  The time in milliseconds from epoch when this znode was last
			
 
				+  modified.
			
 
				+* **version**
			
 
				+  The number of changes to the data of this znode.
			
 
				+* **cversion**
			
 
				+  The number of changes to the children of this znode.
			
 
				+* **aversion**
			
 
				+  The number of changes to the ACL of this znode.
			
 
				+* **ephemeralOwner**
			
 
				+  The session id of the owner of this znode if the znode is an
			
 
				+  ephemeral node. If it is not an ephemeral node, it will be
			
 
				+  zero.
			
 
				+* **dataLength**
			
 
				+  The length of the data field of this znode.
			
 
				+* **numChildren**
			
 
				+  The number of children of this znode.
			
 
				+
			
 
				+<a name="ch_zkSessions"></a>
			
 
				+
			
 
				+## ZooKeeper Sessions
			
 
				+
			
 
				+A ZooKeeper client establishes a session with the ZooKeeper
			
 
				+service by creating a handle to the service using a language
			
 
				+binding. Once created, the handle starts of in the CONNECTING state
			
 
				+and the client library tries to connect to one of the servers that
			
 
				+make up the ZooKeeper service at which point it switches to the
			
 
				+CONNECTED state. During normal operation will be in one of these
			
 
				+two states. If an unrecoverable error occurs, such as session
			
 
				+expiration or authentication failure, or if the application explicitly
			
 
				+closes the handle, the handle will move to the CLOSED state.
			
 
				+The following figure shows the possible state transitions of a
			
 
				+ZooKeeper client:
			
 
				+
			
 
				+![State transitions](images/state_dia.jpg)
			
 
				+
			
 
				+To create a client session the application code must provide
			
 
				+a connection string containing a comma separated list of host:port pairs,
			
 
				+each corresponding to a ZooKeeper server (e.g. "127.0.0.1:4545" or
			
 
				+"127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002"). The ZooKeeper
			
 
				+client library will pick an arbitrary server and try to connect to
			
 
				+it. If this connection fails, or if the client becomes
			
 
				+disconnected from the server for any reason, the client will
			
 
				+automatically try the next server in the list, until a connection
			
 
				+is (re-)established.
			
 
				+
			
 
				+**Added in 3.2.0**: An
			
 
				+optional "chroot" suffix may also be appended to the connection
			
 
				+string. This will run the client commands while interpreting all
			
 
				+paths relative to this root (similar to the unix chroot
			
 
				+command). If used the example would look like:
			
 
				+"127.0.0.1:4545/app/a" or
			
 
				+"127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002/app/a" where the
			
 
				+client would be rooted at "/app/a" and all paths would be relative
			
 
				+to this root - ie getting/setting/etc...  "/foo/bar" would result
			
 
				+in operations being run on "/app/a/foo/bar" (from the server
			
 
				+perspective). This feature is particularly useful in multi-tenant
			
 
				+environments where each user of a particular ZooKeeper service
			
 
				+could be rooted differently. This makes re-use much simpler as
			
 
				+each user can code his/her application as if it were rooted at
			
 
				+"/", while actual location (say /app/a) could be determined at
			
 
				+deployment time.
			
 
				+
			
 
				+When a client gets a handle to the ZooKeeper service,
			
 
				+ZooKeeper creates a ZooKeeper session, represented as a 64-bit
			
 
				+number, that it assigns to the client. If the client connects to a
			
 
				+different ZooKeeper server, it will send the session id as a part
			
 
				+of the connection handshake.  As a security measure, the server
			
 
				+creates a password for the session id that any ZooKeeper server
			
 
				+can validate.The password is sent to the client with the session
			
 
				+id when the client establishes the session. The client sends this
			
 
				+password with the session id whenever it reestablishes the session
			
 
				+with a new server.
			
 
				+
			
 
				+One of the parameters to the ZooKeeper client library call
			
 
				+to create a ZooKeeper session is the session timeout in
			
 
				+milliseconds. The client sends a requested timeout, the server
			
 
				+responds with the timeout that it can give the client. The current
			
 
				+implementation requires that the timeout be a minimum of 2 times
			
 
				+the tickTime (as set in the server configuration) and a maximum of
			
 
				+20 times the tickTime. The ZooKeeper client API allows access to
			
 
				+the negotiated timeout.
			
 
				+
			
 
				+When a client (session) becomes partitioned from the ZK
			
 
				+serving cluster it will begin searching the list of servers that
			
 
				+were specified during session creation. Eventually, when
			
 
				+connectivity between the client and at least one of the servers is
			
 
				+re-established, the session will either again transition to the
			
 
				+"connected" state (if reconnected within the session timeout
			
 
				+value) or it will transition to the "expired" state (if
			
 
				+reconnected after the session timeout). It is not advisable to
			
 
				+create a new session object (a new ZooKeeper.class or zookeeper
			
 
				+handle in the c binding) for disconnection. The ZK client library
			
 
				+will handle reconnect for you. In particular we have heuristics
			
 
				+built into the client library to handle things like "herd effect",
			
 
				+etc... Only create a new session when you are notified of session
			
 
				+expiration (mandatory).
			
 
				+
			
 
				+Session expiration is managed by the ZooKeeper cluster
			
 
				+itself, not by the client. When the ZK client establishes a
			
 
				+session with the cluster it provides a "timeout" value detailed
			
 
				+above. This value is used by the cluster to determine when the
			
 
				+client's session expires. Expirations happens when the cluster
			
 
				+does not hear from the client within the specified session timeout
			
 
				+period (i.e. no heartbeat). At session expiration the cluster will
			
 
				+delete any/all ephemeral nodes owned by that session and
			
 
				+immediately notify any/all connected clients of the change (anyone
			
 
				+watching those znodes). At this point the client of the expired
			
 
				+session is still disconnected from the cluster, it will not be
			
 
				+notified of the session expiration until/unless it is able to
			
 
				+re-establish a connection to the cluster. The client will stay in
			
 
				+disconnected state until the TCP connection is re-established with
			
 
				+the cluster, at which point the watcher of the expired session
			
 
				+will receive the "session expired" notification.
			
 
				+
			
 
				+Example state transitions for an expired session as seen by
			
 
				+the expired session's watcher:
			
 
				+
			
 
				+1. 'connected' : session is established and client
			
 
				+  is communicating with cluster (client/server communication is
			
 
				+  operating properly)
			
 
				+1. .... client is partitioned from the
			
 
				+  cluster
			
 
				+1. 'disconnected' : client has lost connectivity
			
 
				+  with the cluster
			
 
				+1. .... time elapses, after 'timeout' period the
			
 
				+  cluster expires the session, nothing is seen by client as it is
			
 
				+  disconnected from cluster
			
 
				+1. .... time elapses, the client regains network
			
 
				+  level connectivity with the cluster
			
 
				+1. 'expired' : eventually the client reconnects to
			
 
				+  the cluster, it is then notified of the
			
 
				+  expiration
			
 
				+
			
 
				+Another parameter to the ZooKeeper session establishment
			
 
				+call is the default watcher. Watchers are notified when any state
			
 
				+change occurs in the client. For example if the client loses
			
 
				+connectivity to the server the client will be notified, or if the
			
 
				+client's session expires, etc... This watcher should consider the
			
 
				+initial state to be disconnected (i.e. before any state changes
			
 
				+events are sent to the watcher by the client lib). In the case of
			
 
				+a new connection, the first event sent to the watcher is typically
			
 
				+the session connection event.
			
 
				+
			
 
				+The session is kept alive by requests sent by the client. If
			
 
				+the session is idle for a period of time that would timeout the
			
 
				+session, the client will send a PING request to keep the session
			
 
				+alive. This PING request not only allows the ZooKeeper server to
			
 
				+know that the client is still active, but it also allows the
			
 
				+client to verify that its connection to the ZooKeeper server is
			
 
				+still active. The timing of the PING is conservative enough to
			
 
				+ensure reasonable time to detect a dead connection and reconnect
			
 
				+to a new server.
			
 
				+
			
 
				+Once a connection to the server is successfully established
			
 
				+(connected) there are basically two cases where the client lib generates
			
 
				+connectionloss (the result code in c binding, exception in Java -- see
			
 
				+the API documentation for binding specific details) when either a synchronous or
			
 
				+asynchronous operation is performed and one of the following holds:
			
 
				+
			
 
				+1. The application calls an operation on a session that is no
			
 
				+  longer alive/valid
			
 
				+1. The ZooKeeper client disconnects from a server when there
			
 
				+  are pending operations to that server, i.e., there is a pending asynchronous call.
			
 
				+
			
 
				+**Added in 3.2.0 -- SessionMovedException**. There is an internal
			
 
				+exception that is generally not seen by clients called the SessionMovedException.
			
 
				+This exception occurs because a request was received on a connection for a session
			
 
				+which has been reestablished on a different server. The normal cause of this error is
			
 
				+a client that sends a request to a server, but the network packet gets delayed, so
			
 
				+the client times out and connects to a new server. When the delayed packet arrives at
			
 
				+the first server, the old server detects that the session has moved, and closes the
			
 
				+client connection. Clients normally do not see this error since they do not read
			
 
				+from those old connections. (Old connections are usually closed.) One situation in which this
			
 
				+condition can be seen is when two clients try to reestablish the same connection using
			
 
				+a saved session id and password. One of the clients will reestablish the connection
			
 
				+and the second client will be disconnected (causing the pair to attempt to re-establish
			
 
				+its connection/session indefinitely).
			
 
				+
			
 
				+**Updating the list of servers**.  We allow a client to
			
 
				+update the connection string by providing a new comma separated list of host:port pairs,
			
 
				+each corresponding to a ZooKeeper server. The function invokes a probabilistic load-balancing
			
 
				+algorithm which may cause the client to disconnect from its current host with the goal
			
 
				+to achieve expected uniform number of connections per server in the new list.
			
 
				+In case the current host to which the client is connected is not in the new list
			
 
				+this call will always cause the connection to be dropped. Otherwise, the decision
			
 
				+is based on whether the number of servers has increased or decreased and by how much.
			
 
				+
			
 
				+For example, if the previous connection string contained 3 hosts and now the list contains
			
 
				+these 3 hosts and 2 more hosts, 40% of clients connected to each of the 3 hosts will
			
 
				+move to one of the new hosts in order to balance the load. The algorithm will cause the client
			
 
				+to drop its connection to the current host to which it is connected with probability 0.4 and in this
			
 
				+case cause the client to connect to one of the 2 new hosts, chosen at random.
			
 
				+
			
 
				+Another example -- suppose we have 5 hosts and now update the list to remove 2 of the hosts,
			
 
				+the clients connected to the 3 remaining hosts will stay connected, whereas all clients connected
			
 
				+to the 2 removed hosts will need to move to one of the 3 hosts, chosen at random. If the connection
			
 
				+is dropped, the client moves to a special mode where he chooses a new server to connect to using the
			
 
				+probabilistic algorithm, and not just round robin.
			
 
				+
			
 
				+In the first example, each client decides to disconnect with probability 0.4 but once the decision is
			
 
				+made, it will try to connect to a random new server and only if it cannot connect to any of the new
			
 
				+servers will it try to connect to the old ones. After finding a server, or trying all servers in the
			
 
				+new list and failing to connect, the client moves back to the normal mode of operation where it picks
			
 
				+an arbitrary server from the connectString and attempt to connect to it. If that fails, is will continue
			
 
				+trying different random servers in round robin. (see above the algorithm used to initially choose a server)
			
 
				+
			
 
				+<a name="ch_zkWatches"></a>
			
 
				+
			
 
				+## ZooKeeper Watches
			
 
				+
			
 
				+All of the read operations in ZooKeeper - **getData()**, **getChildren()**, and **exists()** - have the option of setting a watch as a
			
 
				+side effect. Here is ZooKeeper's definition of a watch: a watch event is
			
 
				+one-time trigger, sent to the client that set the watch, which occurs when
			
 
				+the data for which the watch was set changes. There are three key points
			
 
				+to consider in this definition of a watch:
			
 
				+
			
 
				+* **One-time trigger**
			
 
				+  One watch event will be sent to the client when the data has changed.
			
 
				+  For example, if a client does a getData("/znode1", true) and later the
			
 
				+  data for /znode1 is changed or deleted, the client will get a watch
			
 
				+  event for /znode1. If /znode1 changes again, no watch event will be
			
 
				+  sent unless the client has done another read that sets a new
			
 
				+  watch.
			
 
				+* **Sent to the client**
			
 
				+  This implies that an event is on the way to the client, but may
			
 
				+  not reach the client before the successful return code to the change
			
 
				+  operation reaches the client that initiated the change. Watches are
			
 
				+  sent asynchronously to watchers. ZooKeeper provides an ordering
			
 
				+  guarantee: a client will never see a change for which it has set a
			
 
				+  watch until it first sees the watch event. Network delays or other
			
 
				+  factors may cause different clients to see watches and return codes
			
 
				+  from updates at different times. The key point is that everything seen
			
 
				+  by the different clients will have a consistent order.
			
 
				+* **The data for which the watch was
			
 
				+  set**
			
 
				+  This refers to the different ways a node can change.  It
			
 
				+  helps to think of ZooKeeper as maintaining two lists of
			
 
				+  watches: data watches and child watches.  getData() and
			
 
				+  exists() set data watches. getChildren() sets child
			
 
				+  watches. Alternatively, it may help to think of watches being
			
 
				+  set according to the kind of data returned. getData() and
			
 
				+  exists() return information about the data of the node,
			
 
				+  whereas getChildren() returns a list of children.  Thus,
			
 
				+  setData() will trigger data watches for the znode being set
			
 
				+  (assuming the set is successful). A successful create() will
			
 
				+  trigger a data watch for the znode being created and a child
			
 
				+  watch for the parent znode. A successful delete() will trigger
			
 
				+  both a data watch and a child watch (since there can be no
			
 
				+  more children) for a znode being deleted as well as a child
			
 
				+  watch for the parent znode.
			
 
				+
			
 
				+Watches are maintained locally at the ZooKeeper server to which the
			
 
				+client is connected. This allows watches to be lightweight to set,
			
 
				+maintain, and dispatch. When a client connects to a new server, the watch
			
 
				+will be triggered for any session events. Watches will not be received
			
 
				+while disconnected from a server. When a client reconnects, any previously
			
 
				+registered watches will be reregistered and triggered if needed. In
			
 
				+general this all occurs transparently. There is one case where a watch
			
 
				+may be missed: a watch for the existence of a znode not yet created will
			
 
				+be missed if the znode is created and deleted while disconnected.
			
 
				+
			
 
				+<a name="sc_WatchSemantics"></a>
			
 
				+
			
 
				+### Semantics of Watches
			
 
				+
			
 
				+We can set watches with the three calls that read the state of
			
 
				+ZooKeeper: exists, getData, and getChildren. The following list details
			
 
				+the events that a watch can trigger and the calls that enable them:
			
 
				+
			
 
				+* **Created event:**
			
 
				+  Enabled with a call to exists.
			
 
				+* **Deleted event:**
			
 
				+  Enabled with a call to exists, getData, and getChildren.
			
 
				+* **Changed event:**
			
 
				+  Enabled with a call to exists and getData.
			
 
				+* **Child event:**
			
 
				+  Enabled with a call to getChildren.
			
 
				+
			
 
				+<a name="sc_WatchRemoval"></a>
			
 
				+
			
 
				+### Remove Watches
			
 
				+
			
 
				+We can remove the watches registered on a znode with a call to
			
 
				+removeWatches. Also, a ZooKeeper client can remove watches locally even
			
 
				+if there is no server connection by setting the local flag to true. The
			
 
				+following list details the events which will be triggered after the
			
 
				+successful watch removal.
			
 
				+
			
 
				+* **Child Remove event:**
			
 
				+  Watcher which was added with a call to getChildren.
			
 
				+* **Data Remove event:**
			
 
				+  Watcher which was added with a call to exists or getData.
			
 
				+
			
 
				+<a name="sc_WatchGuarantees"></a>
			
 
				+
			
 
				+### What ZooKeeper Guarantees about Watches
			
 
				+
			
 
				+With regard to watches, ZooKeeper maintains these
			
 
				+guarantees:
			
 
				+
			
 
				+* Watches are ordered with respect to other events, other
			
 
				+  watches, and asynchronous replies. The ZooKeeper client libraries
			
 
				+  ensures that everything is dispatched in order.
			
 
				+
			
 
				+* A client will see a watch event for a znode it is watching
			
 
				+  before seeing the new data that corresponds to that znode.
			
 
				+
			
 
				+* The order of watch events from ZooKeeper corresponds to the
			
 
				+  order of the updates as seen by the ZooKeeper service.
			
 
				+
			
 
				+<a name="sc_WatchRememberThese"></a>
			
 
				+
			
 
				+### Things to Remember about Watches
			
 
				+
			
 
				+* Watches are one time triggers; if you get a watch event and
			
 
				+  you want to get notified of future changes, you must set another
			
 
				+  watch.
			
 
				+
			
 
				+* Because watches are one time triggers and there is latency
			
 
				+  between getting the event and sending a new request to get a watch
			
 
				+  you cannot reliably see every change that happens to a node in
			
 
				+  ZooKeeper. Be prepared to handle the case where the znode changes
			
 
				+  multiple times between getting the event and setting the watch
			
 
				+  again. (You may not care, but at least realize it may
			
 
				+  happen.)
			
 
				+
			
 
				+* A watch object, or function/context pair, will only be
			
 
				+  triggered once for a given notification. For example, if the same
			
 
				+  watch object is registered for an exists and a getData call for the
			
 
				+  same file and that file is then deleted, the watch object would
			
 
				+  only be invoked once with the deletion notification for the file.
			
 
				+
			
 
				+* When you disconnect from a server (for example, when the
			
 
				+  server fails), you will not get any watches until the connection
			
 
				+  is reestablished. For this reason session events are sent to all
			
 
				+  outstanding watch handlers. Use session events to go into a safe
			
 
				+  mode: you will not be receiving events while disconnected, so your
			
 
				+  process should act conservatively in that mode.
			
 
				+
			
 
				+<a name="sc_ZooKeeperAccessControl"></a>
			
 
				+
			
 
				+## ZooKeeper access control using ACLs
			
 
				+
			
 
				+ZooKeeper uses ACLs to control access to its znodes (the
			
 
				+data nodes of a ZooKeeper data tree). The ACL implementation is
			
 
				+quite similar to UNIX file access permissions: it employs
			
 
				+permission bits to allow/disallow various operations against a
			
 
				+node and the scope to which the bits apply. Unlike standard UNIX
			
 
				+permissions, a ZooKeeper node is not limited by the three standard
			
 
				+scopes for user (owner of the file), group, and world
			
 
				+(other). ZooKeeper does not have a notion of an owner of a
			
 
				+znode. Instead, an ACL specifies sets of ids and permissions that
			
 
				+are associated with those ids.
			
 
				+
			
 
				+Note also that an ACL pertains only to a specific znode. In
			
 
				+particular it does not apply to children. For example, if
			
 
				+_/app_ is only readable by ip:172.16.16.1 and
			
 
				+_/app/status_ is world readable, anyone will
			
 
				+be able to read _/app/status_; ACLs are not
			
 
				+recursive.
			
 
				+
			
 
				+ZooKeeper supports pluggable authentication schemes. Ids are
			
 
				+specified using the form _scheme:expression_,
			
 
				+where _scheme_ is the authentication scheme
			
 
				+that the id corresponds to. The set of valid expressions are defined
			
 
				+by the scheme. For example, _ip:172.16.16.1_ is
			
 
				+an id for a host with the address _172.16.16.1_
			
 
				+using the _ip_ scheme, whereas _digest:bob:password_
			
 
				+is an id for the user with the name of _bob_ using
			
 
				+the _digest_ scheme.
			
 
				+
			
 
				+When a client connects to ZooKeeper and authenticates
			
 
				+itself, ZooKeeper associates all the ids that correspond to a
			
 
				+client with the clients connection. These ids are checked against
			
 
				+the ACLs of znodes when a clients tries to access a node. ACLs are
			
 
				+made up of pairs of _(scheme:expression,
			
 
				+perms)_. The format of
			
 
				+the _expression_ is specific to the scheme. For
			
 
				+example, the pair _(ip:19.22.0.0/16, READ)_
			
 
				+gives the _READ_ permission to any clients with
			
 
				+an IP address that starts with 19.22.
			
 
				+
			
 
				+<a name="sc_ACLPermissions"></a>
			
 
				+
			
 
				+### ACL Permissions
			
 
				+
			
 
				+ZooKeeper supports the following permissions:
			
 
				+
			
 
				+* **CREATE**: you can create a child node
			
 
				+* **READ**: you can get data from a node and list its children.
			
 
				+* **WRITE**: you can set data for a node
			
 
				+* **DELETE**: you can delete a child node
			
 
				+* **ADMIN**: you can set permissions
			
 
				+
			
 
				+The _CREATE_
			
 
				+and _DELETE_ permissions have been broken out
			
 
				+of the _WRITE_ permission for finer grained
			
 
				+access controls. The cases for _CREATE_
			
 
				+and _DELETE_ are the following:
			
 
				+
			
 
				+You want A to be able to do a set on a ZooKeeper node, but
			
 
				+not be able to _CREATE_
			
 
				+or _DELETE_ children.
			
 
				+
			
 
				+_CREATE_
			
 
				+without _DELETE_: clients create requests by
			
 
				+creating ZooKeeper nodes in a parent directory. You want all
			
 
				+clients to be able to add, but only request processor can
			
 
				+delete. (This is kind of like the APPEND permission for
			
 
				+files.)
			
 
				+
			
 
				+Also, the _ADMIN_ permission is there
			
 
				+since ZooKeeper doesn’t have a notion of file owner. In some
			
 
				+sense the _ADMIN_ permission designates the
			
 
				+entity as the owner. ZooKeeper doesn’t support the LOOKUP
			
 
				+permission (execute permission bit on directories to allow you
			
 
				+to LOOKUP even though you can't list the directory). Everyone
			
 
				+implicitly has LOOKUP permission. This allows you to stat a
			
 
				+node, but nothing more. (The problem is, if you want to call
			
 
				+zoo_exists() on a node that doesn't exist, there is no
			
 
				+permission to check.)
			
 
				+
			
 
				+<a name="sc_BuiltinACLSchemes"></a>
			
 
				+
			
 
				+#### Builtin ACL Schemes
			
 
				+
			
 
				+ZooKeeeper has the following built in schemes:
			
 
				+
			
 
				+* **world** has a
			
 
				+  single id, _anyone_, that represents
			
 
				+  anyone.
			
 
				+* **auth** is a special
			
 
				+  scheme which ignores any provided expression and instead uses the current user,
			
 
				+  credentials, and scheme. Any expression (whether _user_ like with SASL
			
 
				+  authentication or _user:password_ like with DIGEST authentication) provided is ignored
			
 
				+  by the ZooKeeper server when persisting the ACL. However, the expression must still be
			
 
				+  provided in the ACL because the ACL must match the form _scheme:expression:perms_.
			
 
				+  This scheme is provided as a convenience as it is a common use-case for
			
 
				+  a user to create a znode and then restrict access to that znode to only that user.
			
 
				+  If there is no authenticated user, setting an ACL with the auth scheme will fail.
			
 
				+* **digest** uses
			
 
				+  a _username:password_ string to generate
			
 
				+  MD5 hash which is then used as an ACL ID
			
 
				+  identity. Authentication is done by sending
			
 
				+  the _username:password_ in clear text. When
			
 
				+  used in the ACL the expression will be
			
 
				+  the _username:base64_
			
 
				+  encoded _SHA1_
			
 
				+  password _digest_.
			
 
				+* **ip** uses the
			
 
				+  client host IP as an ACL ID identity. The ACL expression is of
			
 
				+  the form _addr/bits_ where the most
			
 
				+  significant _bits_
			
 
				+  of _addr_ are matched against the most
			
 
				+  significant _bits_ of the client host
			
 
				+  IP.
			
 
				+* **x509** uses the client
			
 
				+  X500 Principal as an ACL ID identity. The ACL expression is the exact
			
 
				+  X500 Principal name of a client. When using the secure port, clients
			
 
				+  are automatically authenticated and their auth info for the x509 scheme
			
 
				+  is set.
			
 
				+
			
 
				+<a name="ZooKeeper+C+client+API"></a>
			
 
				+
			
 
				+#### ZooKeeper C client API
			
 
				+
			
 
				+The following constants are provided by the ZooKeeper C
			
 
				+library:
			
 
				+
			
 
				+* _const_ _int_ ZOO_PERM_READ; //can read node’s value and list its children
			
 
				+* _const_ _int_ ZOO_PERM_WRITE;// can set the node’s value
			
 
				+* _const_ _int_ ZOO_PERM_CREATE; //can create children
			
 
				+* _const_ _int_ ZOO_PERM_DELETE;// can delete children
			
 
				+* _const_ _int_ ZOO_PERM_ADMIN; //can execute set_acl()
			
 
				+* _const_ _int_ ZOO_PERM_ALL;// all of the above flags OR’d together
			
 
				+
			
 
				+The following are the standard ACL IDs:
			
 
				+
			
 
				+* _struct_ Id ZOO_ANYONE_ID_UNSAFE; //(‘world’,’anyone’)
			
 
				+* _struct_ Id ZOO_AUTH_IDS;// (‘auth’,’’)
			
 
				+
			
 
				+ZOO_AUTH_IDS empty identity string should be interpreted as “the identity of the creator”.
			
 
				+
			
 
				+ZooKeeper client comes with three standard ACLs:
			
 
				+
			
 
				+* _struct_ ACL_vector ZOO_OPEN_ACL_UNSAFE; //(ZOO_PERM_ALL,ZOO_ANYONE_ID_UNSAFE)
			
 
				+* _struct_ ACL_vector ZOO_READ_ACL_UNSAFE;// (ZOO_PERM_READ, ZOO_ANYONE_ID_UNSAFE)
			
 
				+* _struct_ ACL_vector ZOO_CREATOR_ALL_ACL; //(ZOO_PERM_ALL,ZOO_AUTH_IDS)
			
 
				+
			
 
				+The ZOO_OPEN_ACL_UNSAFE is completely open free for all
			
 
				+ACL: any application can execute any operation on the node and
			
 
				+can create, list and delete its children. The
			
 
				+ZOO_READ_ACL_UNSAFE is read-only access for any
			
 
				+application. CREATE_ALL_ACL grants all permissions to the
			
 
				+creator of the node. The creator must have been authenticated by
			
 
				+the server (for example, using “_digest_”
			
 
				+scheme) before it can create nodes with this ACL.
			
 
				+
			
 
				+The following ZooKeeper operations deal with ACLs:
			
 
				+
			
 
				+* _int_ _zoo_add_auth_
			
 
				+  (zhandle_t \*zh,_const_ _char_*
			
 
				+  scheme,_const_ _char_*
			
 
				+  cert, _int_ certLen, void_completion_t
			
 
				+  completion, _const_ _void_
			
 
				+  \*data);
			
 
				+
			
 
				+The application uses the zoo_add_auth function to
			
 
				+authenticate itself to the server. The function can be called
			
 
				+multiple times if the application wants to authenticate using
			
 
				+different schemes and/or identities.
			
 
				+
			
 
				+* _int_ _zoo_create_
			
 
				+  (zhandle_t \*zh, _const_ _char_
			
 
				+  \*path, _const_ _char_
			
 
				+  \*value,_int_
			
 
				+  valuelen, _const_ _struct_
			
 
				+  ACL_vector \*acl, _int_
			
 
				+  flags,_char_
			
 
				+  \*realpath, _int_
			
 
				+  max_realpath_len);
			
 
				+
			
 
				+zoo_create(...) operation creates a new node. The acl
			
 
				+parameter is a list of ACLs associated with the node. The parent
			
 
				+node must have the CREATE permission bit set.
			
 
				+
			
 
				+* _int_ _zoo_get_acl_
			
 
				+  (zhandle_t \*zh, _const_ _char_
			
 
				+  \*path,_struct_ ACL_vector
			
 
				+  \*acl, _struct_ Stat \*stat);
			
 
				+
			
 
				+This operation returns a node’s ACL info.
			
 
				+
			
 
				+* _int_ _zoo_set_acl_
			
 
				+  (zhandle_t \*zh, _const_ _char_
			
 
				+  \*path, _int_
			
 
				+  version,_const_ _struct_
			
 
				+  ACL_vector \*acl);
			
 
				+
			
 
				+This function replaces node’s ACL list with a new one. The
			
 
				+node must have the ADMIN permission set.
			
 
				+
			
 
				+Here is a sample code that makes use of the above APIs to
			
 
				+authenticate itself using the “_foo_” scheme
			
 
				+and create an ephemeral node “/xyz” with create-only
			
 
				+permissions.
			
 
				+
			
 
				+######Note
			
 
				+>This is a very simple example which is intended to show
			
 
				+how to interact with ZooKeeper ACLs
			
 
				+specifically. See *.../trunk/zookeeper-client/zookeeper-client-c/src/cli.c*
			
 
				+for an example of a C client implementation
			
 
				+
			
 
				+
			
 
				+
			
 
				+    #include <string.h>
			
 
				+    #include <errno.h>
			
 
				+    
			
 
				+    #include "zookeeper.h"
			
 
				+    
			
 
				+    static zhandle_t *zh;
			
 
				+    
			
 
				+    /**
			
 
				+     * In this example this method gets the cert for your
			
 
				+     *   environment -- you must provide
			
 
				+     */
			
 
				+    char *foo_get_cert_once(char* id) { return 0; }
			
 
				+
			
 
				+    /** Watcher function -- empty for this example, not something you should
			
 
				+     * do in real code */
			
 
				+    void watcher(zhandle_t *zzh, int type, int state, const char *path,
			
 
				+             void *watcherCtx) {}
			
 
				+    
			
 
				+    int main(int argc, char argv) {
			
 
				+      char buffer[512];
			
 
				+      char p[2048];
			
 
				+      char *cert=0;
			
 
				+      char appId[64];
			
 
				+
			
 
				+      strcpy(appId, "example.foo_test");
			
 
				+      cert = foo_get_cert_once(appId);
			
 
				+      if(cert!=0) {
			
 
				+        fprintf(stderr,
			
 
				+            "Certificate for appid [%s] is [%s]\n",appId,cert);
			
 
				+        strncpy(p,cert, sizeof(p)-1);
			
 
				+        free(cert);
			
 
				+      } else {
			
 
				+        fprintf(stderr, "Certificate for appid [%s] not found\n",appId);
			
 
				+        strcpy(p, "dummy");
			
 
				+      }
			
 
				+    
			
 
				+      zoo_set_debug_level(ZOO_LOG_LEVEL_DEBUG);
			
 
				+    
			
 
				+      zh = zookeeper_init("localhost:3181", watcher, 10000, 0, 0, 0);
			
 
				+      if (!zh) {
			
 
				+        return errno;
			
 
				+      }
			
 
				+      if(zoo_add_auth(zh,"foo",p,strlen(p),0,0)!=ZOK)
			
 
				+        return 2;
			
 
				+    
			
 
				+      struct ACL CREATE_ONLY_ACL[] = {{ZOO_PERM_CREATE, ZOO_AUTH_IDS}};
			
 
				+      struct ACL_vector CREATE_ONLY = {1, CREATE_ONLY_ACL};
			
 
				+      int rc = zoo_create(zh,"/xyz","value", 5, &CREATE_ONLY, ZOO_EPHEMERAL,
			
 
				+                      buffer, sizeof(buffer)-1);
			
 
				+    
			
 
				+      /** this operation will fail with a ZNOAUTH error */
			
 
				+      int buflen= sizeof(buffer);
			
 
				+      struct Stat stat;
			
 
				+      rc = zoo_get(zh, "/xyz", 0, buffer, &buflen, &stat);
			
 
				+      if (rc) {
			
 
				+        fprintf(stderr, "Error %d for %s\n", rc, __LINE__);
			
 
				+      }
			
 
				+    
			
 
				+      zookeeper_close(zh);
			
 
				+      return 0;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+<a name="sc_ZooKeeperPluggableAuthentication"></a>
			
 
				+
			
 
				+## Pluggable ZooKeeper authentication
			
 
				+
			
 
				+ZooKeeper runs in a variety of different environments with
			
 
				+various different authentication schemes, so it has a completely
			
 
				+pluggable authentication framework. Even the builtin authentication
			
 
				+schemes use the pluggable authentication framework.
			
 
				+
			
 
				+To understand how the authentication framework works, first you must
			
 
				+understand the two main authentication operations. The framework
			
 
				+first must authenticate the client. This is usually done as soon as
			
 
				+the client connects to a server and consists of validating information
			
 
				+sent from or gathered about a client and associating it with the connection.
			
 
				+The second operation handled by the framework is finding the entries in an
			
 
				+ACL that correspond to client. ACL entries are <_idspec,
			
 
				+permissions_> pairs. The _idspec_ may be
			
 
				+a simple string match against the authentication information associated
			
 
				+with the connection or it may be a expression that is evaluated against that
			
 
				+information. It is up to the implementation of the authentication plugin
			
 
				+to do the match. Here is the interface that an authentication plugin must
			
 
				+implement:
			
 
				+
			
 
				+
			
 
				+    public interface AuthenticationProvider {
			
 
				+        String getScheme();
			
 
				+        KeeperException.Code handleAuthentication(ServerCnxn cnxn, byte authData[]);
			
 
				+        boolean isValid(String id);
			
 
				+        boolean matches(String id, String aclExpr);
			
 
				+        boolean isAuthenticated();
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+The first method _getScheme_ returns the string
			
 
				+that identifies the plugin. Because we support multiple methods of authentication,
			
 
				+an authentication credential or an _idspec_ will always be
			
 
				+prefixed with _scheme:_. The ZooKeeper server uses the scheme
			
 
				+returned by the authentication plugin to determine which ids the scheme
			
 
				+applies to.
			
 
				+
			
 
				+_handleAuthentication_ is called when a client
			
 
				+sends authentication information to be associated with a connection. The
			
 
				+client specifies the scheme to which the information corresponds. The
			
 
				+ZooKeeper server passes the information to the authentication plugin whose
			
 
				+_getScheme_ matches the scheme passed by the client. The
			
 
				+implementor of _handleAuthentication_ will usually return
			
 
				+an error if it determines that the information is bad, or it will associate information
			
 
				+with the connection using _cnxn.getAuthInfo().add(new Id(getScheme(), data))_.
			
 
				+
			
 
				+The authentication plugin is involved in both setting and using ACLs. When an
			
 
				+ACL is set for a znode, the ZooKeeper server will pass the id part of the entry to
			
 
				+the _isValid(String id)_ method. It is up to the plugin to verify
			
 
				+that the id has a correct form. For example, _ip:172.16.0.0/16_
			
 
				+is a valid id, but _ip:host.com_ is not. If the new ACL includes
			
 
				+an "auth" entry, _isAuthenticated_ is used to see if the
			
 
				+authentication information for this scheme that is assocatied with the connection
			
 
				+should be added to the ACL. Some schemes
			
 
				+should not be included in auth. For example, the IP address of the client is not
			
 
				+considered as an id that should be added to the ACL if auth is specified.
			
 
				+
			
 
				+ZooKeeper invokes _matches(String id, String aclExpr)_ when checking an ACL. It
			
 
				+needs to match authentication information of the client against the relevant ACL
			
 
				+entries. To find the entries which apply to the client, the ZooKeeper server will
			
 
				+find the scheme of each entry and if there is authentication information
			
 
				+from that client for that scheme, _matches(String id, String aclExpr)_
			
 
				+will be called with _id_ set to the authentication information
			
 
				+that was previously added to the connection by _handleAuthentication_ and
			
 
				+_aclExpr_ set to the id of the ACL entry. The authentication plugin
			
 
				+uses its own logic and matching scheme to determine if _id_ is included
			
 
				+in _aclExpr_.
			
 
				+
			
 
				+There are two built in authentication plugins: _ip_ and
			
 
				+_digest_. Additional plugins can adding using system properties. At
			
 
				+startup the ZooKeeper server will look for system properties that start with
			
 
				+"zookeeper.authProvider." and interpret the value of those properties as the class name
			
 
				+of an authentication plugin. These properties can be set using the
			
 
				+_-Dzookeeeper.authProvider.X=com.f.MyAuth_ or adding entries such as
			
 
				+the following in the server configuration file:
			
 
				+
			
 
				+
			
 
				+    authProvider.1=com.f.MyAuth
			
 
				+    authProvider.2=com.f.MyAuth2
			
 
				+
			
 
				+
			
 
				+Care should be taking to ensure that the suffix on the property is unique. If there are
			
 
				+duplicates such as _-Dzookeeeper.authProvider.X=com.f.MyAuth -Dzookeeper.authProvider.X=com.f.MyAuth2_,
			
 
				+only one will be used. Also all servers must have the same plugins defined, otherwise clients using
			
 
				+the authentication schemes provided by the plugins will have problems connecting to some servers.
			
 
				+
			
 
				+**Added in 3.6.0**: An alternate abstraction is available for pluggable
			
 
				+authentication. It provides additional arguments.
			
 
				+
			
 
				+
			
 
				+    public abstract class ServerAuthenticationProvider implements AuthenticationProvider {
			
 
				+        public abstract KeeperException.Code handleAuthentication(ServerObjs serverObjs, byte authData[]);
			
 
				+        public abstract boolean matches(ServerObjs serverObjs, MatchValues matchValues);
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+Instead of implementing AuthenticationProvider you extend ServerAuthenticationProvider. Your handleAuthentication()
			
 
				+and matches() methods will then receive the additional parameters (via ServerObjs and MatchValues).
			
 
				+
			
 
				+* **ZooKeeperServer**
			
 
				+  The ZooKeeperServer instance
			
 
				+* **ServerCnxn**
			
 
				+  The current connection
			
 
				+* **path**
			
 
				+  The ZNode path being operated on (or null if not used)
			
 
				+* **perm**
			
 
				+  The operation value or 0
			
 
				+* **setAcls**
			
 
				+  When the setAcl() method is being operated on, the list of ACLs that are being set
			
 
				+
			
 
				+<a name="ch_zkGuarantees"></a>
			
 
				+
			
 
				+## Consistency Guarantees
			
 
				+
			
 
				+ZooKeeper is a high performance, scalable service. Both reads and
			
 
				+write operations are designed to be fast, though reads are faster than
			
 
				+writes. The reason for this is that in the case of reads, ZooKeeper can
			
 
				+serve older data, which in turn is due to ZooKeeper's consistency
			
 
				+guarantees:
			
 
				+
			
 
				+* *Sequential Consistency* :
			
 
				+    Updates from a client will be applied in the order that they
			
 
				+    were sent.
			
 
				+
			
 
				+* *Atomicity* :
			
 
				+    Updates either succeed or fail -- there are no partial
			
 
				+    results.
			
 
				+
			
 
				+* *Single System Image* :
			
 
				+    A client will see the same view of the service regardless of
			
 
				+    the server that it connects to.
			
 
				+
			
 
				+* *Reliability* :
			
 
				+    Once an update has been applied, it will persist from that
			
 
				+    time forward until a client overwrites the update. This guarantee
			
 
				+    has two corollaries:
			
 
				+    1. If a client gets a successful return code, the update will
			
 
				+      have been applied. On some failures (communication errors,
			
 
				+      timeouts, etc) the client will not know if the update has
			
 
				+      applied or not. We take steps to minimize the failures, but the
			
 
				+      guarantee is only present with successful return codes.
			
 
				+      (This is called the _monotonicity condition_ in Paxos.)
			
 
				+    1. Any updates that are seen by the client, through a read
			
 
				+      request or successful update, will never be rolled back when
			
 
				+      recovering from server failures.
			
 
				+
			
 
				+* *Timeliness* :
			
 
				+    The clients view of the system is guaranteed to be up-to-date
			
 
				+    within a certain time bound (on the order of tens of seconds).
			
 
				+    Either system changes will be seen by a client within this bound, or
			
 
				+    the client will detect a service outage.
			
 
				+
			
 
				+Using these consistency guarantees it is easy to build higher level
			
 
				+functions such as leader election, barriers, queues, and read/write
			
 
				+revocable locks solely at the ZooKeeper client (no additions needed to
			
 
				+ZooKeeper). See [Recipes and Solutions](recipes.html)
			
 
				+for more details.
			
 
				+
			
 
				+######Note
			
 
				+
			
 
				+>Sometimes developers mistakenly assume one other guarantee that
			
 
				+ZooKeeper does _not_ in fact make. This is:
			
 
				+> * Simultaneously Consistent Cross-Client Views* :
			
 
				+    ZooKeeper does not guarantee that at every instance in
			
 
				+    time, two different clients will have identical views of
			
 
				+    ZooKeeper data. Due to factors like network delays, one client
			
 
				+    may perform an update before another client gets notified of the
			
 
				+    change. Consider the scenario of two clients, A and B. If client
			
 
				+    A sets the value of a znode /a from 0 to 1, then tells client B
			
 
				+    to read /a, client B may read the old value of 0, depending on
			
 
				+    which server it is connected to. If it
			
 
				+    is important that Client A and Client B read the same value,
			
 
				+    Client B should should call the **sync()** method from the ZooKeeper API
			
 
				+    method before it performs its read.
			
 
				+    So, ZooKeeper by itself doesn't guarantee that changes occur
			
 
				+    synchronously across all servers, but ZooKeeper
			
 
				+    primitives can be used to construct higher level functions that
			
 
				+    provide useful client synchronization. (For more information,
			
 
				+    see the [ZooKeeper Recipes](recipes.html).
			
 
				+    _[tbd:..]_).
			
 
				+
			
 
				+<a name="ch_bindings"></a>
			
 
				+
			
 
				+## Bindings
			
 
				+
			
 
				+The ZooKeeper client libraries come in two languages: Java and C.
			
 
				+The following sections describe these.
			
 
				+
			
 
				+<a name="Java+Binding"></a>
			
 
				+
			
 
				+### Java Binding
			
 
				+
			
 
				+There are two packages that make up the ZooKeeper Java binding:
			
 
				+**org.apache.zookeeper** and **org.apache.zookeeper.data**. The rest of the
			
 
				+packages that make up ZooKeeper are used internally or are part of the
			
 
				+server implementation. The **org.apache.zookeeper.data** package is made up of
			
 
				+generated classes that are used simply as containers.
			
 
				+
			
 
				+The main class used by a ZooKeeper Java client is the **ZooKeeper** class. Its two constructors differ only
			
 
				+by an optional session id and password. ZooKeeper supports session
			
 
				+recovery accross instances of a process. A Java program may save its
			
 
				+session id and password to stable storage, restart, and recover the
			
 
				+session that was used by the earlier instance of the program.
			
 
				+
			
 
				+When a ZooKeeper object is created, two threads are created as
			
 
				+well: an IO thread and an event thread. All IO happens on the IO thread
			
 
				+(using Java NIO). All event callbacks happen on the event thread.
			
 
				+Session maintenance such as reconnecting to ZooKeeper servers and
			
 
				+maintaining heartbeat is done on the IO thread. Responses for
			
 
				+synchronous methods are also processed in the IO thread. All responses
			
 
				+to asynchronous methods and watch events are processed on the event
			
 
				+thread. There are a few things to notice that result from this
			
 
				+design:
			
 
				+
			
 
				+* All completions for asynchronous calls and watcher callbacks
			
 
				+  will be made in order, one at a time. The caller can do any
			
 
				+  processing they wish, but no other callbacks will be processed
			
 
				+  during that time.
			
 
				+* Callbacks do not block the processing of the IO thread or the
			
 
				+  processing of the synchronous calls.
			
 
				+* Synchronous calls may not return in the correct order. For
			
 
				+  example, assume a client does the following processing: issues an
			
 
				+  asynchronous read of node **/a** with
			
 
				+  _watch_ set to true, and then in the completion
			
 
				+  callback of the read it does a synchronous read of **/a**. (Maybe not good practice, but not illegal
			
 
				+  either, and it makes for a simple example.)
			
 
				+  Note that if there is a change to **/a** between the asynchronous read and the
			
 
				+  synchronous read, the client library will receive the watch event
			
 
				+  saying **/a** changed before the
			
 
				+  response for the synchronous read, but because the completion
			
 
				+  callback is blocking the event queue, the synchronous read will
			
 
				+  return with the new value of **/a**
			
 
				+  before the watch event is processed.
			
 
				+
			
 
				+Finally, the rules associated with shutdown are straightforward:
			
 
				+once a ZooKeeper object is closed or receives a fatal event
			
 
				+(SESSION_EXPIRED and AUTH_FAILED), the ZooKeeper object becomes invalid.
			
 
				+On a close, the two threads shut down and any further access on zookeeper
			
 
				+handle is undefined behavior and should be avoided.
			
 
				+
			
 
				+<a name="sc_java_client_configuration"></a>
			
 
				+
			
 
				+#### Client Configuration Parameters
			
 
				+
			
 
				+The following list contains configuration properties for the Java client. You can set any
			
 
				+of these properties using Java system properties. For server properties, please check the
			
 
				+following reference
			
 
				+[Server configuration section.](zookeeperAdmin.html#sc_configuration)
			
 
				+
			
 
				+* *zookeeper.sasl.client* :
			
 
				+    Set the value to **false** to disable
			
 
				+    SASL authentication. Default is **true**.
			
 
				+
			
 
				+* *zookeeper.sasl.clientconfig* :
			
 
				+    Specifies the context key in the JAAS login file. Default is "Client".
			
 
				+
			
 
				+* *zookeeper.sasl.client.username* :
			
 
				+    Traditionally, a principal is divided into three parts: the primary, the instance, and the realm.
			
 
				+    The format of a typical Kerberos V5 principal is primary/instance@REALM.
			
 
				+    zookeeper.sasl.client.username specifies the primary part of the server principal. Default
			
 
				+    is "zookeeper". Instance part is derived from the server IP. Finally server's principal is
			
 
				+    username/IP@realm, where username is the value of zookeeper.sasl.client.username, IP is
			
 
				+    the server IP, and realm is the value of zookeeper.server.realm.
			
 
				+
			
 
				+* *zookeeper.server.realm* :
			
 
				+    Realm part of the server principal. By default it is the client principal realm.
			
 
				+
			
 
				+* *zookeeper.disableAutoWatchReset* :
			
 
				+    This switch controls whether automatic watch resetting is enabled. Clients automatically
			
 
				+    reset watches during session reconnect by default, this option allows the client to turn off
			
 
				+    this behavior by setting zookeeper.disableAutoWatchReset to **true**.
			
 
				+
			
 
				+* *zookeeper.client.secure* :
			
 
				+    If you want to connect to the server secure client port, you need to set this property to
			
 
				+    **true**
			
 
				+    on the client. This will connect to server using SSL with specified credentials. Note that
			
 
				+    it requires the Netty client.
			
 
				+
			
 
				+* *zookeeper.clientCnxnSocket* :
			
 
				+    Specifies which ClientCnxnSocket to be used. Possible values are
			
 
				+    **org.apache.zookeeper.ClientCnxnSocketNIO**
			
 
				+    and
			
 
				+    **org.apache.zookeeper.ClientCnxnSocketNetty**
			
 
				+    . Default is
			
 
				+    **org.apache.zookeeper.ClientCnxnSocketNIO**
			
 
				+    . If you want to connect to server's secure client port, you need to set this property to
			
 
				+    **org.apache.zookeeper.ClientCnxnSocketNetty**
			
 
				+    on client.
			
 
				+
			
 
				+* *zookeeper.ssl.keyStore.location and zookeeper.ssl.keyStore.password* :
			
 
				+    Specifies the file path to a JKS containing the local credentials to be used for SSL connections,
			
 
				+    and the password to unlock the file.
			
 
				+
			
 
				+* *zookeeper.ssl.trustStore.location and zookeeper.ssl.trustStore.password* :
			
 
				+    Specifies the file path to a JKS containing the remote credentials to be used for SSL connections,
			
 
				+    and the password to unlock the file.
			
 
				+
			
 
				+* *jute.maxbuffer* :
			
 
				+    It specifies the maximum size of the incoming data from the server. The default value is 4194304
			
 
				+    Bytes , or just 4 MB. This is really a sanity check. The ZooKeeper server is designed to store and send
			
 
				+    data on the order of kilobytes. If incoming data length is more than this value, an IOException
			
 
				+    is raised.
			
 
				+
			
 
				+* *zookeeper.kinit* :
			
 
				+    Specifies path to kinit binary. Default is "/usr/bin/kinit".
			
 
				+
			
 
				+<a name="C+Binding"></a>
			
 
				+
			
 
				+### C Binding
			
 
				+
			
 
				+The C binding has a single-threaded and multi-threaded library.
			
 
				+The multi-threaded library is easiest to use and is most similar to the
			
 
				+Java API. This library will create an IO thread and an event dispatch
			
 
				+thread for handling connection maintenance and callbacks. The
			
 
				+single-threaded library allows ZooKeeper to be used in event driven
			
 
				+applications by exposing the event loop used in the multi-threaded
			
 
				+library.
			
 
				+
			
 
				+The package includes two shared libraries: zookeeper_st and
			
 
				+zookeeper_mt. The former only provides the asynchronous APIs and
			
 
				+callbacks for integrating into the application's event loop. The only
			
 
				+reason this library exists is to support the platforms were a
			
 
				+_pthread_ library is not available or is unstable
			
 
				+(i.e. FreeBSD 4.x). In all other cases, application developers should
			
 
				+link with zookeeper_mt, as it includes support for both Sync and Async
			
 
				+API.
			
 
				+
			
 
				+<a name="Installation"></a>
			
 
				+
			
 
				+#### Installation
			
 
				+
			
 
				+If you're building the client from a check-out from the Apache
			
 
				+repository, follow the steps outlined below. If you're building from a
			
 
				+project source package downloaded from apache, skip to step **3**.
			
 
				+
			
 
				+1. Run `ant compile_jute` from the ZooKeeper
			
 
				+  top level directory (*.../trunk*).
			
 
				+  This will create a directory named "generated" under
			
 
				+  *.../trunk/zookeeper-client/zookeeper-client-c*.
			
 
				+1. Change directory to the*.../trunk/zookeeper-client/zookeeper-client-c*
			
 
				+  and run `autoreconf -if` to bootstrap **autoconf**, **automake** and **libtool**. Make sure you have **autoconf version 2.59** or greater installed.
			
 
				+  Skip to step**4**.
			
 
				+1. If you are building from a project source package,
			
 
				+  unzip/untar the source tarball and cd to the*
			
 
				+              zookeeper-x.x.x/zookeeper-client/zookeeper-client-c* directory.
			
 
				+1. Run `./configure <your-options>` to
			
 
				+  generate the makefile. Here are some of options the **configure** utility supports that can be
			
 
				+  useful in this step:
			
 
				+  * `--enable-debug`
			
 
				+    Enables optimization and enables debug info compiler
			
 
				+    options. (Disabled by default.)
			
 
				+  * `--without-syncapi`
			
 
				+    Disables Sync API support; zookeeper_mt library won't be
			
 
				+    built. (Enabled by default.)
			
 
				+  * `--disable-static`
			
 
				+    Do not build static libraries. (Enabled by
			
 
				+    default.)
			
 
				+  * `--disable-shared`
			
 
				+    Do not build shared libraries. (Enabled by
			
 
				+    default.)
			
 
				+######Note
			
 
				+>See INSTALL for general information about running **configure**.
			
 
				+1. Run `make` or `make
			
 
				+  install` to build the libraries and install them.
			
 
				+1. To generate doxygen documentation for the ZooKeeper API, run
			
 
				+  `make doxygen-doc`. All documentation will be
			
 
				+  placed in a new subfolder named docs. By default, this command
			
 
				+  only generates HTML. For information on other document formats,
			
 
				+  run `./configure --help`
			
 
				+
			
 
				+<a name="Building+Your+Own+C+Client"></a>
			
 
				+
			
 
				+#### Building Your Own C Client
			
 
				+
			
 
				+In order to be able to use the ZooKeeper C API in your application
			
 
				+you have to remember to
			
 
				+
			
 
				+1. Include ZooKeeper header: `#include <zookeeper/zookeeper.h>`
			
 
				+1. If you are building a multithreaded client, compile with
			
 
				+  `-DTHREADED` compiler flag to enable the multi-threaded version of
			
 
				+  the library, and then link against against the
			
 
				+  _zookeeper_mt_ library. If you are building a
			
 
				+  single-threaded client, do not compile with `-DTHREADED`, and be
			
 
				+  sure to link against the_zookeeper_st_library.
			
 
				+
			
 
				+######Note
			
 
				+>See *.../trunk/zookeeper-client/zookeeper-client-c/src/cli.c*
			
 
				+for an example of a C client implementation
			
 
				+
			
 
				+<a name="ch_guideToZkOperations"></a>
			
 
				+
			
 
				+## Building Blocks: A Guide to ZooKeeper Operations
			
 
				+
			
 
				+This section surveys all the operations a developer can perform
			
 
				+against a ZooKeeper server. It is lower level information than the earlier
			
 
				+concepts chapters in this manual, but higher level than the ZooKeeper API
			
 
				+Reference. It covers these topics:
			
 
				+
			
 
				+* [Connecting to ZooKeeper](#sc_connectingToZk)
			
 
				+
			
 
				+<a name="sc_errorsZk"></a>
			
 
				+
			
 
				+### Handling Errors
			
 
				+
			
 
				+Both the Java and C client bindings may report errors. The Java client binding does so by throwing KeeperException, calling code() on the exception will return the specific error code. The C client binding returns an error code as defined in the enum ZOO_ERRORS. API callbacks indicate result code for both language bindings. See the API documentation (javadoc for Java, doxygen for C) for full details on the possible errors and their meaning.
			
 
				+
			
 
				+<a name="sc_connectingToZk"></a>
			
 
				+
			
 
				+### Connecting to ZooKeeper
			
 
				+
			
 
				+Before we begin, you will have to set up a running Zookeeper server so that we can start developing the client. For C client bindings, we will be using the multithreaded library(zookeeper_mt) with a simple example written in C. To establish a connection with Zookeeper server, we make use of C API - _zookeeper_init_ with the following signature:
			
 
				+
			
 
				+    int zookeeper_init(const char *host, watcher_fn fn, int recv_timeout, const clientid_t *clientid, void *context, int flags);
			
 
				+
			
 
				+* **host* :
			
 
				+    Connection string to zookeeper server in the format of host:port. If there are multiple servers, use comma as separator after specifying the host:port pairs. Eg: "127.0.0.1:2181,127.0.0.1:3001,127.0.0.1:3002"
			
 
				+
			
 
				+* *fn* :
			
 
				+    Watcher function to process events when a notification is triggered.
			
 
				+
			
 
				+* *recv_timeout* :
			
 
				+    Session expiration time in milliseconds.
			
 
				+
			
 
				+* **clientid* :
			
 
				+    We can specify 0 for a new session. If a session has already establish previously, we could provide that client ID and it would reconnect to that previous session.
			
 
				+
			
 
				+* **context* :
			
 
				+    Context object that can be associated with the zkhandle_t handler. If it is not used, we can set it to 0.
			
 
				+
			
 
				+* *flags* :
			
 
				+    In an initiation, we can leave it for 0.
			
 
				+
			
 
				+We will demonstrate client that outputs "Connected to Zookeeper" after successful connection or an error message otherwise. Let's call the following code _zkClient.cc_ :
			
 
				+
			
 
				+
			
 
				+    #include <stdio.h>
			
 
				+    #include <zookeeper/zookeeper.h>
			
 
				+    #include <errno.h>
			
 
				+    using namespace std;
			
 
				+
			
 
				+    // Keeping track of the connection state
			
 
				+    static int connected = 0;
			
 
				+    static int expired   = 0;
			
 
				+
			
 
				+    // *zkHandler handles the connection with Zookeeper
			
 
				+    static zhandle_t *zkHandler;
			
 
				+
			
 
				+    // watcher function would process events
			
 
				+    void watcher(zhandle_t *zkH, int type, int state, const char *path, void *watcherCtx)
			
 
				+    {
			
 
				+        if (type == ZOO_SESSION_EVENT) {
			
 
				+
			
 
				+            // state refers to states of zookeeper connection.
			
 
				+            // To keep it simple, we would demonstrate these 3: ZOO_EXPIRED_SESSION_STATE, ZOO_CONNECTED_STATE, ZOO_NOTCONNECTED_STATE
			
 
				+            // If you are using ACL, you should be aware of an authentication failure state - ZOO_AUTH_FAILED_STATE
			
 
				+            if (state == ZOO_CONNECTED_STATE) {
			
 
				+                connected = 1;
			
 
				+            } else if (state == ZOO_NOTCONNECTED_STATE ) {
			
 
				+                connected = 0;
			
 
				+            } else if (state == ZOO_EXPIRED_SESSION_STATE) {
			
 
				+                expired = 1;
			
 
				+                connected = 0;
			
 
				+                zookeeper_close(zkH);
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    int main(){
			
 
				+        zoo_set_debug_level(ZOO_LOG_LEVEL_DEBUG);
			
 
				+
			
 
				+        // zookeeper_init returns the handler upon a successful connection, null otherwise
			
 
				+        zkHandler = zookeeper_init("localhost:2181", watcher, 10000, 0, 0, 0);
			
 
				+
			
 
				+        if (!zkHandler) {
			
 
				+            return errno;
			
 
				+        }else{
			
 
				+            printf("Connection established with Zookeeper. \n");
			
 
				+        }
			
 
				+
			
 
				+        // Close Zookeeper connection
			
 
				+        zookeeper_close(zkHandler);
			
 
				+
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+Compile the code with the multithreaded library mentioned before.
			
 
				+
			
 
				+`> g++ -Iinclude/ zkClient.cpp -lzookeeper_mt -o Client`
			
 
				+
			
 
				+Run the client.
			
 
				+
			
 
				+`> ./Client`
			
 
				+
			
 
				+From the output, you should see "Connected to Zookeeper" along with Zookeeper's DEBUG messages if the connection is successful.
			
 
				+
			
 
				+<a name="sc_readOps"></a>
			
 
				+
			
 
				+### Read Operations
			
 
				+
			
 
				+<a name="sc_writeOps"></a>
			
 
				+
			
 
				+### Write Operations
			
 
				+
			
 
				+<a name="sc_handlingWatches"></a>
			
 
				+
			
 
				+### Handling Watches
			
 
				+
			
 
				+<a name="sc_miscOps"></a>
			
 
				+
			
 
				+### Miscelleaneous ZooKeeper Operations
			
 
				+
			
 
				+<a name="ch_programStructureWithExample"></a>
			
 
				+
			
 
				+## Program Structure, with Simple Example
			
 
				+
			
 
				+_[tbd]_
			
 
				+
			
 
				+<a name="ch_gotchas"></a>
			
 
				+
			
 
				+## Gotchas: Common Problems and Troubleshooting
			
 
				+
			
 
				+So now you know ZooKeeper. It's fast, simple, your application
			
 
				+works, but wait ... something's wrong. Here are some pitfalls that
			
 
				+ZooKeeper users fall into:
			
 
				+
			
 
				+1. If you are using watches, you must look for the connected watch
			
 
				+  event. When a ZooKeeper client disconnects from a server, you will
			
 
				+  not receive notification of changes until reconnected. If you are
			
 
				+  watching for a znode to come into existence, you will miss the event
			
 
				+  if the znode is created and deleted while you are disconnected.
			
 
				+1. You must test ZooKeeper server failures. The ZooKeeper service
			
 
				+  can survive failures as long as a majority of servers are active. The
			
 
				+  question to ask is: can your application handle it? In the real world
			
 
				+  a client's connection to ZooKeeper can break. (ZooKeeper server
			
 
				+  failures and network partitions are common reasons for connection
			
 
				+  loss.) The ZooKeeper client library takes care of recovering your
			
 
				+  connection and letting you know what happened, but you must make sure
			
 
				+  that you recover your state and any outstanding requests that failed.
			
 
				+  Find out if you got it right in the test lab, not in production - test
			
 
				+  with a ZooKeeper service made up of a several of servers and subject
			
 
				+  them to reboots.
			
 
				+1. The list of ZooKeeper servers used by the client must match the
			
 
				+  list of ZooKeeper servers that each ZooKeeper server has. Things can
			
 
				+  work, although not optimally, if the client list is a subset of the
			
 
				+  real list of ZooKeeper servers, but not if the client lists ZooKeeper
			
 
				+  servers not in the ZooKeeper cluster.
			
 
				+1. Be careful where you put that transaction log. The most
			
 
				+  performance-critical part of ZooKeeper is the transaction log.
			
 
				+  ZooKeeper must sync transactions to media before it returns a
			
 
				+  response. A dedicated transaction log device is key to consistent good
			
 
				+  performance. Putting the log on a busy device will adversely effect
			
 
				+  performance. If you only have one storage device, put trace files on
			
 
				+  NFS and increase the snapshotCount; it doesn't eliminate the problem,
			
 
				+  but it can mitigate it.
			
 
				+1. Set your Java max heap size correctly. It is very important to
			
 
				+  _avoid swapping._ Going to disk unnecessarily will
			
 
				+  almost certainly degrade your performance unacceptably. Remember, in
			
 
				+  ZooKeeper, everything is ordered, so if one request hits the disk, all
			
 
				+  other queued requests hit the disk.
			
 
				+  To avoid swapping, try to set the heapsize to the amount of
			
 
				+  physical memory you have, minus the amount needed by the OS and cache.
			
 
				+  The best way to determine an optimal heap size for your configurations
			
 
				+  is to _run load tests_. If for some reason you
			
 
				+  can't, be conservative in your estimates and choose a number well
			
 
				+  below the limit that would cause your machine to swap. For example, on
			
 
				+  a 4G machine, a 3G heap is a conservative estimate to start
			
 
				+  with.
			
 
				+
			
 
				+## Links to Other Information
			
 
				+
			
 
				+Outside the formal documentation, there're several other sources of
			
 
				+information for ZooKeeper developers.
			
 
				+
			
 
				+* *ZooKeeper Whitepaper _[tbd: find url]_* :
			
 
				+    The definitive discussion of ZooKeeper design and performance,
			
 
				+    by Yahoo! Research
			
 
				+
			
 
				+* *API Reference _[tbd: find url]_* :
			
 
				+    The complete reference to the ZooKeeper API
			
 
				+
			
 
				+* *[ZooKeeper Talk at the Hadoup Summit 2008](http://us.dl1.yimg.com/download.yahoo.com/dl/ydn/zookeeper.m4v)* :
			
 
				+    A video introduction to ZooKeeper, by Benjamin Reed of Yahoo!
			
 
				+    Research
			
 
				+
			
 
				+* *[Barrier and Queue Tutorial](https://cwiki.apache.org/confluence/display/ZOOKEEPER/Tutorial)* :
			
 
				+    The excellent Java tutorial by Flavio Junqueira, implementing
			
 
				+    simple barriers and producer-consumer queues using ZooKeeper.
			
 
				+
			
 
				+* *[ZooKeeper - A Reliable, Scalable Distributed Coordination System](https://cwiki.apache.org/confluence/display/ZOOKEEPER/ZooKeeperArticles)* :
			
 
				+    An article by Todd Hoff (07/15/2008)
			
 
				+
			
 
				+* *[ZooKeeper Recipes](recipes.html)* :
			
 
				+    Pseudo-level discussion of the implementation of various
			
 
				+    synchronization solutions with ZooKeeper: Event Handles, Queues,
			
 
				+    Locks, and Two-phase Commits.
			
 
				+
			
 
				+* *_[tbd]_* :
			
 
				+    Any other good sources anyone can think of...
			
 
				+
			
--- a/zookeeper-docs/src/main/resources/markdown/zookeeperQuotas.md
+++ b/zookeeper-docs/src/main/resources/markdown/zookeeperQuotas.md
@@ -0,0 +1,61 @@
 
				+<!--
			
 
				+Copyright 2002-2004 The Apache Software Foundation
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+//-->
			
 
				+
			
 
				+# ZooKeeper Quota's Guide
			
 
				+
			
 
				+### A Guide to Deployment and Administration
			
 
				+
			
 
				+* [Quotas](#zookeeper_quotas)
			
 
				+    * [Setting Quotas](#Setting+Quotas)
			
 
				+    * [Listing Quotas](#Listing+Quotas)
			
 
				+    * [Deleting Quotas](#Deleting+Quotas)
			
 
				+
			
 
				+<a name="zookeeper_quotas"></a>
			
 
				+
			
 
				+## Quotas
			
 
				+
			
 
				+ZooKeeper has both namespace and bytes quotas. You can use the ZooKeeperMain class to setup quotas.
			
 
				+ZooKeeper prints _WARN_ messages if users exceed the quota assigned to them. The messages
			
 
				+are printed in the log of the ZooKeeper.
			
 
				+
			
 
				+    $ bin/zkCli.sh -server host:port**
			
 
				+
			
 
				+The above command gives you a command line option of using quotas.
			
 
				+
			
 
				+<a name="Setting+Quotas"></a>
			
 
				+
			
 
				+### Setting Quotas
			
 
				+
			
 
				+You can use _setquota_ to set a quota on a ZooKeeper node. It has an option of setting quota with
			
 
				+`-n` (for namespace)
			
 
				+and `-b` (for bytes).
			
 
				+
			
 
				+The ZooKeeper quota are stored in ZooKeeper itself in /zookeeper/quota. To disable other people from
			
 
				+changing the quota's set the ACL for /zookeeper/quota such that only admins are able to read and write to it.
			
 
				+
			
 
				+<a name="Listing+Quotas"></a>
			
 
				+
			
 
				+### Listing Quotas
			
 
				+
			
 
				+You can use _listquota_ to list a quota on a ZooKeeper node.
			
 
				+
			
 
				+<a name="Deleting+Quotas"></a>
			
 
				+
			
 
				+### Deleting Quotas
			
 
				+
			
 
				+You can use _delquota_ to delete quota on a ZooKeeper node.
			
 
				+
			
 
				+
			
--- a/zookeeper-docs/src/main/resources/markdown/zookeeperReconfig.md
+++ b/zookeeper-docs/src/main/resources/markdown/zookeeperReconfig.md
@@ -0,0 +1,873 @@
 
				+<!--
			
 
				+Copyright 2002-2004 The Apache Software Foundation
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+//-->
			
 
				+
			
 
				+# ZooKeeper Dynamic Reconfiguration
			
 
				+
			
 
				+* [Overview](#ch_reconfig_intro)
			
 
				+* [Changes to Configuration Format](#ch_reconfig_format)
			
 
				+    * [Specifying the client port](#sc_reconfig_clientport)
			
 
				+    * [The standaloneEnabled flag](#sc_reconfig_standaloneEnabled)
			
 
				+    * [The reconfigEnabled flag](#sc_reconfig_reconfigEnabled)
			
 
				+    * [Dynamic configuration file](#sc_reconfig_file)
			
 
				+    * [Backward compatibility](#sc_reconfig_backward)
			
 
				+* [Upgrading to 3.5.0](#ch_reconfig_upgrade)
			
 
				+* [Dynamic Reconfiguration of the ZooKeeper Ensemble](#ch_reconfig_dyn)
			
 
				+    * [API](#ch_reconfig_api)
			
 
				+    * [Security](#sc_reconfig_access_control)
			
 
				+    * [Retrieving the current dynamic configuration](#sc_reconfig_retrieving)
			
 
				+    * [Modifying the current dynamic configuration](#sc_reconfig_modifying)
			
 
				+        * [General](#sc_reconfig_general)
			
 
				+        * [Incremental mode](#sc_reconfig_incremental)
			
 
				+        * [Non-incremental mode](#sc_reconfig_nonincremental)
			
 
				+        * [Conditional reconfig](#sc_reconfig_conditional)
			
 
				+        * [Error conditions](#sc_reconfig_errors)
			
 
				+        * [Additional comments](#sc_reconfig_additional)
			
 
				+* [Rebalancing Client Connections](#ch_reconfig_rebalancing)
			
 
				+
			
 
				+<a name="ch_reconfig_intro"></a>
			
 
				+
			
 
				+## Overview
			
 
				+
			
 
				+Prior to the 3.5.0 release, the membership and all other configuration
			
 
				+parameters of Zookeeper were static - loaded during boot and immutable at
			
 
				+runtime. Operators resorted to ''rolling restarts'' - a manually intensive
			
 
				+and error-prone method of changing the configuration that has caused data
			
 
				+loss and inconsistency in production.
			
 
				+
			
 
				+Starting with 3.5.0, “rolling restarts” are no longer needed!
			
 
				+ZooKeeper comes with full support for automated configuration changes: the
			
 
				+set of Zookeeper servers, their roles (participant / observer), all ports,
			
 
				+and even the quorum system can be changed dynamically, without service
			
 
				+interruption and while maintaining data consistency. Reconfigurations are
			
 
				+performed immediately, just like other operations in ZooKeeper. Multiple
			
 
				+changes can be done using a single reconfiguration command. The dynamic
			
 
				+reconfiguration functionality does not limit operation concurrency, does
			
 
				+not require client operations to be stopped during reconfigurations, has a
			
 
				+very simple interface for administrators and no added complexity to other
			
 
				+client operations.
			
 
				+
			
 
				+New client-side features allow clients to find out about configuration
			
 
				+changes and to update the connection string (list of servers and their
			
 
				+client ports) stored in their ZooKeeper handle. A probabilistic algorithm
			
 
				+is used to rebalance clients across the new configuration servers while
			
 
				+keeping the extent of client migrations proportional to the change in
			
 
				+ensemble membership.
			
 
				+
			
 
				+This document provides the administrator manual for reconfiguration.
			
 
				+For a detailed description of the reconfiguration algorithms, performance
			
 
				+measurements, and more, please see our paper:
			
 
				+
			
 
				+* *Shraer, A., Reed, B., Malkhi, D., Junqueira, F. Dynamic
			
 
				+Reconfiguration of Primary/Backup Clusters. In _USENIX Annual
			
 
				+Technical Conference (ATC)_(2012), 425-437* :
			
 
				+    Links: [paper (pdf)](https://www.usenix.org/system/files/conference/atc12/atc12-final74.pdf), [slides (pdf)](https://www.usenix.org/sites/default/files/conference/protected-files/shraer\_atc12\_slides.pdf), [video](https://www.usenix.org/conference/atc12/technical-sessions/presentation/shraer), [hadoop summit slides](http://www.slideshare.net/Hadoop\_Summit/dynamic-reconfiguration-of-zookeeper)
			
 
				+
			
 
				+**Note:** Starting with 3.5.3, the dynamic reconfiguration
			
 
				+feature is disabled by default, and has to be explicitly turned on via
			
 
				+[reconfigEnabled](zookeeperAdmin.html#sc_advancedConfiguration) configuration option.
			
 
				+
			
 
				+<a name="ch_reconfig_format"></a>
			
 
				+
			
 
				+## Changes to Configuration Format
			
 
				+
			
 
				+<a name="sc_reconfig_clientport"></a>
			
 
				+
			
 
				+### Specifying the client port
			
 
				+
			
 
				+A client port of a server is the port on which the server accepts
			
 
				+client connection requests. Starting with 3.5.0 the
			
 
				+_clientPort_ and _clientPortAddress_ configuration parameters should no longer be used. Instead,
			
 
				+this information is now part of the server keyword specification, which
			
 
				+becomes as follows:
			
 
				+
			
 
				+    server.<positive id> = <address1>:<port1>:<port2>[:role];[<client port address>:]<client port>**
			
 
				+
			
 
				+The client port specification is to the right of the semicolon. The
			
 
				+client port address is optional, and if not specified it defaults to
			
 
				+"0.0.0.0". As usual, role is also optional, it can be
			
 
				+_participant_ or _observer_
			
 
				+(_participant_ by default).
			
 
				+
			
 
				+Examples of legal server statements:
			
 
				+
			
 
				+    server.5 = 125.23.63.23:1234:1235;1236
			
 
				+    server.5 = 125.23.63.23:1234:1235:participant;1236
			
 
				+    server.5 = 125.23.63.23:1234:1235:observer;1236
			
 
				+    server.5 = 125.23.63.23:1234:1235;125.23.63.24:1236
			
 
				+    server.5 = 125.23.63.23:1234:1235:participant;125.23.63.23:1236
			
 
				+
			
 
				+<a name="sc_reconfig_standaloneEnabled"></a>
			
 
				+
			
 
				+### The _standaloneEnabled_ flag
			
 
				+
			
 
				+Prior to 3.5.0, one could run ZooKeeper in Standalone mode or in a
			
 
				+Distributed mode. These are separate implementation stacks, and
			
 
				+switching between them during run time is not possible. By default (for
			
 
				+backward compatibility) _standaloneEnabled_ is set to
			
 
				+_true_. The consequence of using this default is that
			
 
				+if started with a single server the ensemble will not be allowed to
			
 
				+grow, and if started with more than one server it will not be allowed to
			
 
				+shrink to contain fewer than two participants.
			
 
				+
			
 
				+Setting the flag to _false_ instructs the system
			
 
				+to run the Distributed software stack even if there is only a single
			
 
				+participant in the ensemble. To achieve this the (static) configuration
			
 
				+file should contain:
			
 
				+
			
 
				+    standaloneEnabled=false**
			
 
				+
			
 
				+With this setting it is possible to start a ZooKeeper ensemble
			
 
				+containing a single participant and to dynamically grow it by adding
			
 
				+more servers. Similarly, it is possible to shrink an ensemble so that
			
 
				+just a single participant remains, by removing servers.
			
 
				+
			
 
				+Since running the Distributed mode allows more flexibility, we
			
 
				+recommend setting the flag to _false_. We expect that
			
 
				+the legacy Standalone mode will be deprecated in the future.
			
 
				+
			
 
				+<a name="sc_reconfig_reconfigEnabled"></a>
			
 
				+
			
 
				+### The _reconfigEnabled_ flag
			
 
				+
			
 
				+Starting with 3.5.0 and prior to 3.5.3, there is no way to disable
			
 
				+dynamic reconfiguration feature. We would like to offer the option of
			
 
				+disabling reconfiguration feature because with reconfiguration enabled,
			
 
				+we have a security concern that a malicious actor can make arbitrary changes
			
 
				+to the configuration of a ZooKeeper ensemble, including adding a compromised
			
 
				+server to the ensemble. We prefer to leave to the discretion of the user to
			
 
				+decide whether to enable it or not and make sure that the appropriate security
			
 
				+measure are in place. So in 3.5.3 the [reconfigEnabled](zookeeperAdmin.html#sc_advancedConfiguration) configuration option is introduced
			
 
				+such that the reconfiguration feature can be completely disabled and any attempts
			
 
				+to reconfigure a cluster through reconfig API with or without authentication
			
 
				+will fail by default, unless **reconfigEnabled** is set to
			
 
				+**true**.
			
 
				+
			
 
				+To set the option to true, the configuration file (zoo.cfg) should contain:
			
 
				+
			
 
				+    reconfigEnabled=true
			
 
				+
			
 
				+<a name="sc_reconfig_file"></a>
			
 
				+
			
 
				+### Dynamic configuration file
			
 
				+
			
 
				+Starting with 3.5.0 we're distinguishing between dynamic
			
 
				+configuration parameters, which can be changed during runtime, and
			
 
				+static configuration parameters, which are read from a configuration
			
 
				+file when a server boots and don't change during its execution. For now,
			
 
				+the following configuration keywords are considered part of the dynamic
			
 
				+configuration: _server_, _group_
			
 
				+and _weight_.
			
 
				+
			
 
				+Dynamic configuration parameters are stored in a separate file on
			
 
				+the server (which we call the dynamic configuration file). This file is
			
 
				+linked from the static config file using the new
			
 
				+_dynamicConfigFile_ keyword.
			
 
				+
			
 
				+**Example**
			
 
				+
			
 
				+#### zoo_replicated1.cfg
			
 
				+
			
 
				+
			
 
				+    tickTime=2000
			
 
				+    dataDir=/zookeeper/data/zookeeper1
			
 
				+    initLimit=5
			
 
				+    syncLimit=2
			
 
				+    dynamicConfigFile=/zookeeper/conf/zoo_replicated1.cfg.dynamic
			
 
				+
			
 
				+
			
 
				+#### zoo_replicated1.cfg.dynamic
			
 
				+
			
 
				+
			
 
				+    server.1=125.23.63.23:2780:2783:participant;2791
			
 
				+    server.2=125.23.63.24:2781:2784:participant;2792
			
 
				+    server.3=125.23.63.25:2782:2785:participant;2793
			
 
				+
			
 
				+
			
 
				+When the ensemble configuration changes, the static configuration
			
 
				+parameters remain the same. The dynamic parameters are pushed by
			
 
				+ZooKeeper and overwrite the dynamic configuration files on all servers.
			
 
				+Thus, the dynamic configuration files on the different servers are
			
 
				+usually identical (they can only differ momentarily when a
			
 
				+reconfiguration is in progress, or if a new configuration hasn't
			
 
				+propagated yet to some of the servers). Once created, the dynamic
			
 
				+configuration file should not be manually altered. Changed are only made
			
 
				+through the new reconfiguration commands outlined below. Note that
			
 
				+changing the config of an offline cluster could result in an
			
 
				+inconsistency with respect to configuration information stored in the
			
 
				+ZooKeeper log (and the special configuration znode, populated from the
			
 
				+log) and is therefore highly discouraged.
			
 
				+
			
 
				+**Example 2**
			
 
				+
			
 
				+Users may prefer to initially specify a single configuration file.
			
 
				+The following is thus also legal:
			
 
				+
			
 
				+#### zoo_replicated1.cfg
			
 
				+
			
 
				+
			
 
				+    tickTime=2000
			
 
				+    dataDir=/zookeeper/data/zookeeper1
			
 
				+    initLimit=5
			
 
				+    syncLimit=2
			
 
				+    clientPort=
			
 
				+
			
 
				+
			
 
				+The configuration files on each server will be automatically split
			
 
				+into dynamic and static files, if they are not already in this format.
			
 
				+So the configuration file above will be automatically transformed into
			
 
				+the two files in Example 1. Note that the clientPort and
			
 
				+clientPortAddress lines (if specified) will be automatically removed
			
 
				+during this process, if they are redundant (as in the example above).
			
 
				+The original static configuration file is backed up (in a .bak
			
 
				+file).
			
 
				+
			
 
				+<a name="sc_reconfig_backward"></a>
			
 
				+
			
 
				+### Backward compatibility
			
 
				+
			
 
				+We still support the old configuration format. For example, the
			
 
				+following configuration file is acceptable (but not recommended):
			
 
				+
			
 
				+#### zoo_replicated1.cfg
			
 
				+
			
 
				+    tickTime=2000
			
 
				+    dataDir=/zookeeper/data/zookeeper1
			
 
				+    initLimit=5
			
 
				+    syncLimit=2
			
 
				+    clientPort=2791
			
 
				+    server.1=125.23.63.23:2780:2783:participant
			
 
				+    server.2=125.23.63.24:2781:2784:participant
			
 
				+    server.3=125.23.63.25:2782:2785:participant
			
 
				+
			
 
				+
			
 
				+During boot, a dynamic configuration file is created and contains
			
 
				+the dynamic part of the configuration as explained earlier. In this
			
 
				+case, however, the line "clientPort=2791" will remain in the static
			
 
				+configuration file of server 1 since it is not redundant -- it was not
			
 
				+specified as part of the "server.1=..." using the format explained in
			
 
				+the section [Changes to Configuration Format](#ch_reconfig_format). If a reconfiguration
			
 
				+is invoked that sets the client port of server 1, we remove
			
 
				+"clientPort=2791" from the static configuration file (the dynamic file
			
 
				+now contain this information as part of the specification of server
			
 
				+1).
			
 
				+
			
 
				+<a name="ch_reconfig_upgrade"></a>
			
 
				+
			
 
				+## Upgrading to 3.5.0
			
 
				+
			
 
				+Upgrading a running ZooKeeper ensemble to 3.5.0 should be done only
			
 
				+after upgrading your ensemble to the 3.4.6 release. Note that this is only
			
 
				+necessary for rolling upgrades (if you're fine with shutting down the
			
 
				+system completely, you don't have to go through 3.4.6). If you attempt a
			
 
				+rolling upgrade without going through 3.4.6 (for example from 3.4.5), you
			
 
				+may get the following error:
			
 
				+
			
 
				+    2013-01-30 11:32:10,663 [myid:2] - INFO [localhost/127.0.0.1:2784:QuorumCnxManager$Listener@498] - Received connection request /127.0.0.1:60876
			
 
				+    2013-01-30 11:32:10,663 [myid:2] - WARN [localhost/127.0.0.1:2784:QuorumCnxManager@349] - Invalid server id: -65536
			
 
				+
			
 
				+During a rolling upgrade, each server is taken down in turn and
			
 
				+rebooted with the new 3.5.0 binaries. Before starting the server with
			
 
				+3.5.0 binaries, we highly recommend updating the configuration file so
			
 
				+that all server statements "server.x=..." contain client ports (see the
			
 
				+section [Specifying the client port](#sc_reconfig_clientport)). As explained earlier
			
 
				+you may leave the configuration in a single file, as well as leave the
			
 
				+clientPort/clientPortAddress statements (although if you specify client
			
 
				+ports in the new format, these statements are now redundant).
			
 
				+
			
 
				+<a name="ch_reconfig_dyn"></a>
			
 
				+
			
 
				+## Dynamic Reconfiguration of the ZooKeeper Ensemble
			
 
				+
			
 
				+The ZooKeeper Java and C API were extended with getConfig and reconfig
			
 
				+commands that facilitate reconfiguration. Both commands have a synchronous
			
 
				+(blocking) variant and an asynchronous one. We demonstrate these commands
			
 
				+here using the Java CLI, but note that you can similarly use the C CLI or
			
 
				+invoke the commands directly from a program just like any other ZooKeeper
			
 
				+command.
			
 
				+
			
 
				+<a name="ch_reconfig_api"></a>
			
 
				+
			
 
				+### API
			
 
				+
			
 
				+There are two sets of APIs for both Java and C client.
			
 
				+
			
 
				+* ***Reconfiguration API*** :
			
 
				+    Reconfiguration API is used to reconfigure the ZooKeeper cluster.
			
 
				+    Starting with 3.5.3, reconfiguration Java APIs are moved into ZooKeeperAdmin class
			
 
				+    from ZooKeeper class, and use of this API requires ACL setup and user
			
 
				+    authentication (see [Security](#sc_reconfig_access_control) for more information.).
			
 
				+
			
 
				+* ***Get Configuration API*** :
			
 
				+    Get configuration APIs are used to retrieve ZooKeeper cluster configuration information
			
 
				+    stored in /zookeeper/config znode. Use of this API does not require specific setup or authentication,
			
 
				+    because /zookeeper/config is readable to any users.
			
 
				+
			
 
				+<a name="sc_reconfig_access_control"></a>
			
 
				+
			
 
				+### Security
			
 
				+
			
 
				+Prior to **3.5.3**, there is no enforced security mechanism
			
 
				+over reconfig so any ZooKeeper clients that can connect to ZooKeeper server ensemble
			
 
				+will have the ability to change the state of a ZooKeeper cluster via reconfig.
			
 
				+It is thus possible for a malicious client to add compromised server to an ensemble,
			
 
				+e.g., add a compromised server, or remove legitimate servers.
			
 
				+Cases like these could be security vulnerabilities on a case by case basis.
			
 
				+
			
 
				+To address this security concern, we introduced access control over reconfig
			
 
				+starting from **3.5.3** such that only a specific set of users
			
 
				+can use reconfig commands or APIs, and these users need be configured explicitly. In addition,
			
 
				+the setup of ZooKeeper cluster must enable authentication so ZooKeeper clients can be authenticated.
			
 
				+
			
 
				+We also provides an escape hatch for users who operate and interact with a ZooKeeper ensemble in a secured
			
 
				+environment (i.e. behind company firewall). For those users who want to use reconfiguration feature but
			
 
				+don't want the overhead of configuring an explicit list of authorized user for reconfig access checks,
			
 
				+they can set ["skipACL"](zookeeperAdmin.html#sc_authOptions) to "yes" which will
			
 
				+skip ACL check and allow any user to reconfigure cluster.
			
 
				+
			
 
				+Overall, ZooKeeper provides flexible configuration options for the reconfigure feature
			
 
				+that allow a user to choose based on user's security requirement.
			
 
				+We leave to the discretion of the user to decide appropriate security measure are in place.
			
 
				+
			
 
				+* ***Access Control*** :
			
 
				+    The dynamic configuration is stored in a special znode
			
 
				+    ZooDefs.CONFIG_NODE = /zookeeper/config. This node by default is read only
			
 
				+    for all users, except super user and users that's explicitly configured for write
			
 
				+    access.
			
 
				+    Clients that need to use reconfig commands or reconfig API should be configured as users
			
 
				+    that have write access to CONFIG_NODE. By default, only the super user has full control including
			
 
				+    write access to CONFIG_NODE. Additional users can be granted write access through superuser
			
 
				+    by setting an ACL that has write permission associated with specified user.
			
 
				+    A few examples of how to setup ACLs and use reconfiguration API with authentication can be found in
			
 
				+    ReconfigExceptionTest.java and TestReconfigServer.cc.
			
 
				+
			
 
				+* ***Authentication*** :
			
 
				+    Authentication of users is orthogonal to the access control and is delegated to
			
 
				+    existing authentication mechanism supported by ZooKeeper's pluggable authentication schemes.
			
 
				+    See [ZooKeeper and SASL](https://cwiki.apache.org/confluence/display/ZOOKEEPER/Zookeeper+and+SASL) for more details on this topic.
			
 
				+
			
 
				+* ***Disable ACL check*** :
			
 
				+    ZooKeeper supports ["skipACL"](zookeeperAdmin.html#sc_authOptions) option such that ACL
			
 
				+    check will be completely skipped, if skipACL is set to "yes". In such cases any unauthenticated
			
 
				+    users can use reconfig API.
			
 
				+
			
 
				+<a name="sc_reconfig_retrieving"></a>
			
 
				+
			
 
				+### Retrieving the current dynamic configuration
			
 
				+
			
 
				+The dynamic configuration is stored in a special znode
			
 
				+ZooDefs.CONFIG_NODE = /zookeeper/config. The new
			
 
				+`config` CLI command reads this znode (currently it is
			
 
				+simply a wrapper to `get /zookeeper/config`). As with
			
 
				+normal reads, to retrieve the latest committed value you should do a
			
 
				+`sync` first.
			
 
				+
			
 
				+    [zk: 127.0.0.1:2791(CONNECTED) 3] config
			
 
				+    server.1=localhost:2780:2783:participant;localhost:2791
			
 
				+    server.2=localhost:2781:2784:participant;localhost:2792
			
 
				+    server.3=localhost:2782:2785:participant;localhost:2793
			
 
				+
			
 
				+Notice the last line of the output. This is the configuration
			
 
				+version. The version equals to the zxid of the reconfiguration command
			
 
				+which created this configuration. The version of the first established
			
 
				+configuration equals to the zxid of the NEWLEADER message sent by the
			
 
				+first successfully established leader. When a configuration is written
			
 
				+to a dynamic configuration file, the version automatically becomes part
			
 
				+of the filename and the static configuration file is updated with the
			
 
				+path to the new dynamic configuration file. Configuration files
			
 
				+corresponding to earlier versions are retained for backup
			
 
				+purposes.
			
 
				+
			
 
				+During boot time the version (if it exists) is extracted from the
			
 
				+filename. The version should never be altered manually by users or the
			
 
				+system administrator. It is used by the system to know which
			
 
				+configuration is most up-to-date. Manipulating it manually can result in
			
 
				+data loss and inconsistency.
			
 
				+
			
 
				+Just like a `get` command, the
			
 
				+`config` CLI command accepts the _-w_
			
 
				+flag for setting a watch on the znode, and _-s_ flag for
			
 
				+displaying the Stats of the znode. It additionally accepts a new flag
			
 
				+_-c_ which outputs only the version and the client
			
 
				+connection string corresponding to the current configuration. For
			
 
				+example, for the configuration above we would get:
			
 
				+
			
 
				+    [zk: 127.0.0.1:2791(CONNECTED) 17] config -c
			
 
				+    400000003 localhost:2791,localhost:2793,localhost:2792
			
 
				+
			
 
				+Note that when using the API directly, this command is called
			
 
				+`getConfig`.
			
 
				+
			
 
				+As any read command it returns the configuration known to the
			
 
				+follower to which your client is connected, which may be slightly
			
 
				+out-of-date. One can use the `sync` command for
			
 
				+stronger guarantees. For example using the Java API:
			
 
				+
			
 
				+    zk.sync(ZooDefs.CONFIG_NODE, void_callback, context);
			
 
				+    zk.getConfig(watcher, callback, context);
			
 
				+
			
 
				+Note: in 3.5.0 it doesn't really matter which path is passed to the
			
 
				+`sync()` command as all the server's state is brought
			
 
				+up to date with the leader (so one could use a different path instead of
			
 
				+ZooDefs.CONFIG_NODE). However, this may change in the future.
			
 
				+
			
 
				+<a name="sc_reconfig_modifying"></a>
			
 
				+
			
 
				+### Modifying the current dynamic configuration
			
 
				+
			
 
				+Modifying the configuration is done through the
			
 
				+`reconfig` command. There are two modes of
			
 
				+reconfiguration: incremental and non-incremental (bulk). The
			
 
				+non-incremental simply specifies the new dynamic configuration of the
			
 
				+system. The incremental specifies changes to the current configuration.
			
 
				+The `reconfig` command returns the new
			
 
				+configuration.
			
 
				+
			
 
				+A few examples are in: *ReconfigTest.java*,
			
 
				+*ReconfigRecoveryTest.java* and
			
 
				+*TestReconfigServer.cc*.
			
 
				+
			
 
				+<a name="sc_reconfig_general"></a>
			
 
				+
			
 
				+#### General
			
 
				+
			
 
				+**Removing servers:** Any server can
			
 
				+be removed, including the leader (although removing the leader will
			
 
				+result in a short unavailability, see Figures 6 and 8 in the [paper](https://www.usenix.org/conference/usenixfederatedconferencesweek/dynamic-recon%EF%AC%81guration-primarybackup-clusters)). The server will not be shut-down automatically.
			
 
				+Instead, it becomes a "non-voting follower". This is somewhat similar
			
 
				+to an observer in that its votes don't count towards the Quorum of
			
 
				+votes necessary to commit operations. However, unlike a non-voting
			
 
				+follower, an observer doesn't actually see any operation proposals and
			
 
				+does not ACK them. Thus a non-voting follower has a more significant
			
 
				+negative effect on system throughput compared to an observer.
			
 
				+Non-voting follower mode should only be used as a temporary mode,
			
 
				+before shutting the server down, or adding it as a follower or as an
			
 
				+observer to the ensemble. We do not shut the server down automatically
			
 
				+for two main reasons. The first reason is that we do not want all the
			
 
				+clients connected to this server to be immediately disconnected,
			
 
				+causing a flood of connection requests to other servers. Instead, it
			
 
				+is better if each client decides when to migrate independently. The
			
 
				+second reason is that removing a server may sometimes (rarely) be
			
 
				+necessary in order to change it from "observer" to "participant" (this
			
 
				+is explained in the section [Additional comments](#sc_reconfig_additional)).
			
 
				+
			
 
				+Note that the new configuration should have some minimal number of
			
 
				+participants in order to be considered legal. If the proposed change
			
 
				+would leave the cluster with less than 2 participants and standalone
			
 
				+mode is enabled (standaloneEnabled=true, see the section [The _standaloneEnabled_ flag](#sc_reconfig_standaloneEnabled)), the reconfig will not be
			
 
				+processed (BadArgumentsException). If standalone mode is disabled
			
 
				+(standaloneEnabled=false) then its legal to remain with 1 or more
			
 
				+participants.
			
 
				+
			
 
				+**Adding servers:** Before a
			
 
				+reconfiguration is invoked, the administrator must make sure that a
			
 
				+quorum (majority) of participants from the new configuration are
			
 
				+already connected and synced with the current leader. To achieve this
			
 
				+we need to connect a new joining server to the leader before it is
			
 
				+officially part of the ensemble. This is done by starting the joining
			
 
				+server using an initial list of servers which is technically not a
			
 
				+legal configuration of the system but (a) contains the joiner, and (b)
			
 
				+gives sufficient information to the joiner in order for it to find and
			
 
				+connect to the current leader. We list a few different options of
			
 
				+doing this safely.
			
 
				+
			
 
				+1. Initial configuration of joiners is comprised of servers in
			
 
				+  the last committed configuration and one or more joiners, where
			
 
				+  **joiners are listed as observers.**
			
 
				+  For example, if servers D and E are added at the same time to (A,
			
 
				+  B, C) and server C is being removed, the initial configuration of
			
 
				+  D could be (A, B, C, D) or (A, B, C, D, E), where D and E are
			
 
				+  listed as observers. Similarly, the configuration of E could be
			
 
				+  (A, B, C, E) or (A, B, C, D, E), where D and E are listed as
			
 
				+  observers. **Note that listing the joiners as
			
 
				+  observers will not actually make them observers - it will only
			
 
				+  prevent them from accidentally forming a quorum with other
			
 
				+  joiners.** Instead, they will contact the servers in the
			
 
				+  current configuration and adopt the last committed configuration
			
 
				+  (A, B, C), where the joiners are absent. Configuration files of
			
 
				+  joiners are backed up and replaced automatically as this happens.
			
 
				+  After connecting to the current leader, joiners become non-voting
			
 
				+  followers until the system is reconfigured and they are added to
			
 
				+  the ensemble (as participant or observer, as appropriate).
			
 
				+1. Initial configuration of each joiner is comprised of servers
			
 
				+  in the last committed configuration + **the
			
 
				+  joiner itself, listed as a participant.** For example, to
			
 
				+  add a new server D to a configuration consisting of servers (A, B,
			
 
				+  C), the administrator can start D using an initial configuration
			
 
				+  file consisting of servers (A, B, C, D). If both D and E are added
			
 
				+  at the same time to (A, B, C), the initial configuration of D
			
 
				+  could be (A, B, C, D) and the configuration of E could be (A, B,
			
 
				+  C, E). Similarly, if D is added and C is removed at the same time,
			
 
				+  the initial configuration of D could be (A, B, C, D). Never list
			
 
				+  more than one joiner as participant in the initial configuration
			
 
				+  (see warning below).
			
 
				+1. Whether listing the joiner as an observer or as participant,
			
 
				+  it is also fine not to list all the current configuration servers,
			
 
				+  as long as the current leader is in the list. For example, when
			
 
				+  adding D we could start D with a configuration file consisting of
			
 
				+  just (A, D) if A is the current leader. however this is more
			
 
				+  fragile since if A fails before D officially joins the ensemble, D
			
 
				+  doesn’t know anyone else and therefore the administrator will have
			
 
				+  to intervene and restart D with another server list.
			
 
				+
			
 
				+######Note
			
 
				+>##### Warning
			
 
				+
			
 
				+>Never specify more than one joining server in the same initial
			
 
				+configuration as participants. Currently, the joining servers don’t
			
 
				+know that they are joining an existing ensemble; if multiple joiners
			
 
				+are listed as participants they may form an independent quorum
			
 
				+creating a split-brain situation such as processing operations
			
 
				+independently from your main ensemble. It is OK to list multiple
			
 
				+joiners as observers in an initial config.
			
 
				+
			
 
				+If the configuration of existing servers changes or they become unavailable
			
 
				+before the joiner succeeds to connect and learn obout configuration changes, the
			
 
				+joiner may need to be restarted with an updated configuration file in order to be
			
 
				+able to connect.
			
 
				+
			
 
				+Finally, note that once connected to the leader, a joiner adopts
			
 
				+the last committed configuration, in which it is absent (the initial
			
 
				+config of the joiner is backed up before being rewritten). If the
			
 
				+joiner restarts in this state, it will not be able to boot since it is
			
 
				+absent from its configuration file. In order to start it you’ll once
			
 
				+again have to specify an initial configuration.
			
 
				+
			
 
				+**Modifying server parameters:** One
			
 
				+can modify any of the ports of a server, or its role
			
 
				+(participant/observer) by adding it to the ensemble with different
			
 
				+parameters. This works in both the incremental and the bulk
			
 
				+reconfiguration modes. It is not necessary to remove the server and
			
 
				+then add it back; just specify the new parameters as if the server is
			
 
				+not yet in the system. The server will detect the configuration change
			
 
				+and perform the necessary adjustments. See an example in the section
			
 
				+[Incremental mode](#sc_reconfig_incremental) and an exception to this
			
 
				+rule in the section [Additional comments](#sc_reconfig_additional).
			
 
				+
			
 
				+It is also possible to change the Quorum System used by the
			
 
				+ensemble (for example, change the Majority Quorum System to a
			
 
				+Hierarchical Quorum System on the fly). This, however, is only allowed
			
 
				+using the bulk (non-incremental) reconfiguration mode. In general,
			
 
				+incremental reconfiguration only works with the Majority Quorum
			
 
				+System. Bulk reconfiguration works with both Hierarchical and Majority
			
 
				+Quorum Systems.
			
 
				+
			
 
				+**Performance Impact:** There is
			
 
				+practically no performance impact when removing a follower, since it
			
 
				+is not being automatically shut down (the effect of removal is that
			
 
				+the server's votes are no longer being counted). When adding a server,
			
 
				+there is no leader change and no noticeable performance disruption.
			
 
				+For details and graphs please see Figures 6, 7 and 8 in the [paper](https://www.usenix.org/conference/usenixfederatedconferencesweek/dynamic-recon%EF%AC%81guration-primarybackup-clusters).
			
 
				+
			
 
				+The most significant disruption will happen when a leader change
			
 
				+is caused, in one of the following cases:
			
 
				+
			
 
				+1. Leader is removed from the ensemble.
			
 
				+1. Leader's role is changed from participant to observer.
			
 
				+1. The port used by the leader to send transactions to others
			
 
				+  (quorum port) is modified.
			
 
				+
			
 
				+In these cases we perform a leader hand-off where the old leader
			
 
				+nominates a new leader. The resulting unavailability is usually
			
 
				+shorter than when a leader crashes since detecting leader failure is
			
 
				+unnecessary and electing a new leader can usually be avoided during a
			
 
				+hand-off (see Figures 6 and 8 in the [paper](https://www.usenix.org/conference/usenixfederatedconferencesweek/dynamic-recon%EF%AC%81guration-primarybackup-clusters)).
			
 
				+
			
 
				+When the client port of a server is modified, it does not drop
			
 
				+existing client connections. New connections to the server will have
			
 
				+to use the new client port.
			
 
				+
			
 
				+**Progress guarantees:** Up to the
			
 
				+invocation of the reconfig operation, a quorum of the old
			
 
				+configuration is required to be available and connected for ZooKeeper
			
 
				+to be able to make progress. Once reconfig is invoked, a quorum of
			
 
				+both the old and of the new configurations must be available. The
			
 
				+final transition happens once (a) the new configuration is activated,
			
 
				+and (b) all operations scheduled before the new configuration is
			
 
				+activated by the leader are committed. Once (a) and (b) happen, only a
			
 
				+quorum of the new configuration is required. Note, however, that
			
 
				+neither (a) nor (b) are visible to a client. Specifically, when a
			
 
				+reconfiguration operation commits, it only means that an activation
			
 
				+message was sent out by the leader. It does not necessarily mean that
			
 
				+a quorum of the new configuration got this message (which is required
			
 
				+in order to activate it) or that (b) has happened. If one wants to
			
 
				+make sure that both (a) and (b) has already occurred (for example, in
			
 
				+order to know that it is safe to shut down old servers that were
			
 
				+removed), one can simply invoke an update
			
 
				+(`set-data`, or some other quorum operation, but not
			
 
				+a `sync`) and wait for it to commit. An alternative
			
 
				+way to achieve this was to introduce another round to the
			
 
				+reconfiguration protocol (which, for simplicity and compatibility with
			
 
				+Zab, we decided to avoid).
			
 
				+
			
 
				+<a name="sc_reconfig_incremental"></a>
			
 
				+
			
 
				+#### Incremental mode
			
 
				+
			
 
				+The incremental mode allows adding and removing servers to the
			
 
				+current configuration. Multiple changes are allowed. For
			
 
				+example:
			
 
				+
			
 
				+    > reconfig -remove 3 -add
			
 
				+    server.5=125.23.63.23:1234:1235;1236
			
 
				+
			
 
				+Both the add and the remove options get a list of comma separated
			
 
				+arguments (no spaces):
			
 
				+
			
 
				+    > reconfig -remove 3,4 -add
			
 
				+    server.5=localhost:2111:2112;2113,6=localhost:2114:2115:observer;2116
			
 
				+
			
 
				+The format of the server statement is exactly the same as
			
 
				+described in the section [Specifying the client port](#sc_reconfig_clientport) and
			
 
				+includes the client port. Notice that here instead of "server.5=" you
			
 
				+can just say "5=". In the example above, if server 5 is already in the
			
 
				+system, but has different ports or is not an observer, it is updated
			
 
				+and once the configuration commits becomes an observer and starts
			
 
				+using these new ports. This is an easy way to turn participants into
			
 
				+observers and vise versa or change any of their ports, without
			
 
				+rebooting the server.
			
 
				+
			
 
				+ZooKeeper supports two types of Quorum Systems – the simple
			
 
				+Majority system (where the leader commits operations after receiving
			
 
				+ACKs from a majority of voters) and a more complex Hierarchical
			
 
				+system, where votes of different servers have different weights and
			
 
				+servers are divided into voting groups. Currently, incremental
			
 
				+reconfiguration is allowed only if the last proposed configuration
			
 
				+known to the leader uses a Majority Quorum System
			
 
				+(BadArgumentsException is thrown otherwise).
			
 
				+
			
 
				+Incremental mode - examples using the Java API:
			
 
				+
			
 
				+    List<String> leavingServers = new ArrayList<String>();
			
 
				+    leavingServers.add("1");
			
 
				+    leavingServers.add("2");
			
 
				+    byte[] config = zk.reconfig(null, leavingServers, null, -1, new Stat());
			
 
				+
			
 
				+    List<String> leavingServers = new ArrayList<String>();
			
 
				+    List<String> joiningServers = new ArrayList<String>();
			
 
				+    leavingServers.add("1");
			
 
				+    joiningServers.add("server.4=localhost:1234:1235;1236");
			
 
				+    byte[] config = zk.reconfig(joiningServers, leavingServers, null, -1, new Stat());
			
 
				+
			
 
				+    String configStr = new String(config);
			
 
				+    System.out.println(configStr);
			
 
				+
			
 
				+There is also an asynchronous API, and an API accepting comma
			
 
				+separated Strings instead of List<String>. See
			
 
				+src/java/main/org/apache/zookeeper/ZooKeeper.java.
			
 
				+
			
 
				+<a name="sc_reconfig_nonincremental"></a>
			
 
				+
			
 
				+#### Non-incremental mode
			
 
				+
			
 
				+The second mode of reconfiguration is non-incremental, whereby a
			
 
				+client gives a complete specification of the new dynamic system
			
 
				+configuration. The new configuration can either be given in place or
			
 
				+read from a file:
			
 
				+
			
 
				+    > reconfig -file newconfig.cfg
			
 
				+    
			
 
				+//newconfig.cfg is a dynamic config file, see [Dynamic configuration file](#sc_reconfig_file)
			
 
				+
			
 
				+    > reconfig -members
			
 
				+    server.1=125.23.63.23:2780:2783:participant;2791,server.2=125.23.63.24:2781:2784:participant;2792,server.3=125.23.63.25:2782:2785:participant;2793}}
			
 
				+
			
 
				+The new configuration may use a different Quorum System. For
			
 
				+example, you may specify a Hierarchical Quorum System even if the
			
 
				+current ensemble uses a Majority Quorum System.
			
 
				+
			
 
				+Bulk mode - example using the Java API:
			
 
				+
			
 
				+    List<String> newMembers = new ArrayList<String>();
			
 
				+    newMembers.add("server.1=1111:1234:1235;1236");
			
 
				+    newMembers.add("server.2=1112:1237:1238;1239");
			
 
				+    newMembers.add("server.3=1114:1240:1241:observer;1242");
			
 
				+
			
 
				+    byte[] config = zk.reconfig(null, null, newMembers, -1, new Stat());
			
 
				+
			
 
				+    String configStr = new String(config);
			
 
				+    System.out.println(configStr);
			
 
				+
			
 
				+There is also an asynchronous API, and an API accepting comma
			
 
				+separated String containing the new members instead of
			
 
				+List<String>. See
			
 
				+src/java/main/org/apache/zookeeper/ZooKeeper.java.
			
 
				+
			
 
				+<a name="sc_reconfig_conditional"></a>
			
 
				+
			
 
				+#### Conditional reconfig
			
 
				+
			
 
				+Sometimes (especially in non-incremental mode) a new proposed
			
 
				+configuration depends on what the client "believes" to be the current
			
 
				+configuration, and should be applied only to that configuration.
			
 
				+Specifically, the `reconfig` succeeds only if the
			
 
				+last configuration at the leader has the specified version.
			
 
				+
			
 
				+    > reconfig -file <filename> -v <version>
			
 
				+
			
 
				+In the previously listed Java examples, instead of -1 one could
			
 
				+specify a configuration version to condition the
			
 
				+reconfiguration.
			
 
				+
			
 
				+<a name="sc_reconfig_errors"></a>
			
 
				+
			
 
				+#### Error conditions
			
 
				+
			
 
				+In addition to normal ZooKeeper error conditions, a
			
 
				+reconfiguration may fail for the following reasons:
			
 
				+
			
 
				+1. another reconfig is currently in progress
			
 
				+  (ReconfigInProgress)
			
 
				+1. the proposed change would leave the cluster with less than 2
			
 
				+  participants, in case standalone mode is enabled, or, if
			
 
				+  standalone mode is disabled then its legal to remain with 1 or
			
 
				+  more participants (BadArgumentsException)
			
 
				+1. no quorum of the new configuration was connected and
			
 
				+  up-to-date with the leader when the reconfiguration processing
			
 
				+  began (NewConfigNoQuorum)
			
 
				+1. `-v x` was specified, but the version
			
 
				+`y` of the latest configuration is not
			
 
				+`x` (BadVersionException)
			
 
				+1. an incremental reconfiguration was requested but the last
			
 
				+  configuration at the leader uses a Quorum System which is
			
 
				+  different from the Majority system (BadArgumentsException)
			
 
				+1. syntax error (BadArgumentsException)
			
 
				+1. I/O exception when reading the configuration from a file
			
 
				+  (BadArgumentsException)
			
 
				+
			
 
				+Most of these are illustrated by test-cases in
			
 
				+*ReconfigFailureCases.java*.
			
 
				+
			
 
				+<a name="sc_reconfig_additional"></a>
			
 
				+
			
 
				+#### Additional comments
			
 
				+
			
 
				+**Liveness:** To better understand
			
 
				+the difference between incremental and non-incremental
			
 
				+reconfiguration, suppose that client C1 adds server D to the system
			
 
				+while a different client C2 adds server E. With the non-incremental
			
 
				+mode, each client would first invoke `config` to find
			
 
				+out the current configuration, and then locally create a new list of
			
 
				+servers by adding its own suggested server. The new configuration can
			
 
				+then be submitted using the non-incremental
			
 
				+`reconfig` command. After both reconfigurations
			
 
				+complete, only one of E or D will be added (not both), depending on
			
 
				+which client's request arrives second to the leader, overwriting the
			
 
				+previous configuration. The other client can repeat the process until
			
 
				+its change takes effect. This method guarantees system-wide progress
			
 
				+(i.e., for one of the clients), but does not ensure that every client
			
 
				+succeeds. To have more control C2 may request to only execute the
			
 
				+reconfiguration in case the version of the current configuration
			
 
				+hasn't changed, as explained in the section [Conditional reconfig](#sc_reconfig_conditional). In this way it may avoid blindly
			
 
				+overwriting the configuration of C1 if C1's configuration reached the
			
 
				+leader first.
			
 
				+
			
 
				+With incremental reconfiguration, both changes will take effect as
			
 
				+they are simply applied by the leader one after the other to the
			
 
				+current configuration, whatever that is (assuming that the second
			
 
				+reconfig request reaches the leader after it sends a commit message
			
 
				+for the first reconfig request -- currently the leader will refuse to
			
 
				+propose a reconfiguration if another one is already pending). Since
			
 
				+both clients are guaranteed to make progress, this method guarantees
			
 
				+stronger liveness. In practice, multiple concurrent reconfigurations
			
 
				+are probably rare. Non-incremental reconfiguration is currently the
			
 
				+only way to dynamically change the Quorum System. Incremental
			
 
				+configuration is currently only allowed with the Majority Quorum
			
 
				+System.
			
 
				+
			
 
				+**Changing an observer into a
			
 
				+follower:** Clearly, changing a server that participates in
			
 
				+voting into an observer may fail if error (2) occurs, i.e., if fewer
			
 
				+than the minimal allowed number of participants would remain. However,
			
 
				+converting an observer into a participant may sometimes fail for a
			
 
				+more subtle reason: Suppose, for example, that the current
			
 
				+configuration is (A, B, C, D), where A is the leader, B and C are
			
 
				+followers and D is an observer. In addition, suppose that B has
			
 
				+crashed. If a reconfiguration is submitted where D is said to become a
			
 
				+follower, it will fail with error (3) since in this configuration, a
			
 
				+majority of voters in the new configuration (any 3 voters), must be
			
 
				+connected and up-to-date with the leader. An observer cannot
			
 
				+acknowledge the history prefix sent during reconfiguration, and
			
 
				+therefore it does not count towards these 3 required servers and the
			
 
				+reconfiguration will be aborted. In case this happens, a client can
			
 
				+achieve the same task by two reconfig commands: first invoke a
			
 
				+reconfig to remove D from the configuration and then invoke a second
			
 
				+command to add it back as a participant (follower). During the
			
 
				+intermediate state D is a non-voting follower and can ACK the state
			
 
				+transfer performed during the second reconfig comand.
			
 
				+
			
 
				+<a name="ch_reconfig_rebalancing"></a>
			
 
				+
			
 
				+## Rebalancing Client Connections
			
 
				+
			
 
				+When a ZooKeeper cluster is started, if each client is given the same
			
 
				+connection string (list of servers), the client will randomly choose a
			
 
				+server in the list to connect to, which makes the expected number of
			
 
				+client connections per server the same for each of the servers. We
			
 
				+implemented a method that preserves this property when the set of servers
			
 
				+changes through reconfiguration. See Sections 4 and 5.1 in the [paper](https://www.usenix.org/conference/usenixfederatedconferencesweek/dynamic-recon%EF%AC%81guration-primarybackup-clusters).
			
 
				+
			
 
				+In order for the method to work, all clients must subscribe to
			
 
				+configuration changes (by setting a watch on /zookeeper/config either
			
 
				+directly or through the `getConfig` API command). When
			
 
				+the watch is triggered, the client should read the new configuration by
			
 
				+invoking `sync` and `getConfig` and if
			
 
				+the configuration is indeed new invoke the
			
 
				+`updateServerList` API command. To avoid mass client
			
 
				+migration at the same time, it is better to have each client sleep a
			
 
				+random short period of time before invoking
			
 
				+`updateServerList`.
			
 
				+
			
 
				+A few examples can be found in:
			
 
				+*StaticHostProviderTest.java* and
			
 
				+*TestReconfig.cc*
			
 
				+
			
 
				+Example (this is not a recipe, but a simplified example just to
			
 
				+explain the general idea):
			
 
				+
			
 
				+    public void process(WatchedEvent event) {
			
 
				+        synchronized (this) {
			
 
				+            if (event.getType() == EventType.None) {
			
 
				+                connected = (event.getState() == KeeperState.SyncConnected);
			
 
				+                notifyAll();
			
 
				+            } else if (event.getPath()!=null &&  event.getPath().equals(ZooDefs.CONFIG_NODE)) {
			
 
				+                // in prod code never block the event thread!
			
 
				+                zk.sync(ZooDefs.CONFIG_NODE, this, null);
			
 
				+                zk.getConfig(this, this, null);
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    
			
 
				+    public void processResult(int rc, String path, Object ctx, byte[] data, Stat stat) {
			
 
				+        if (path!=null &&  path.equals(ZooDefs.CONFIG_NODE)) {
			
 
				+            String config[] = ConfigUtils.getClientConfigStr(new String(data)).split(" ");   // similar to config -c
			
 
				+            long version = Long.parseLong(config[0], 16);
			
 
				+            if (this.configVersion == null){
			
 
				+                 this.configVersion = version;
			
 
				+            } else if (version > this.configVersion) {
			
 
				+                hostList = config[1];
			
 
				+                try {
			
 
				+                    // the following command is not blocking but may cause the client to close the socket and
			
 
				+                    // migrate to a different server. In practice its better to wait a short period of time, chosen
			
 
				+                    // randomly, so that different clients migrate at different times
			
 
				+                    zk.updateServerList(hostList);
			
 
				+                } catch (IOException e) {
			
 
				+                    System.err.println("Error updating server list");
			
 
				+                    e.printStackTrace();
			
 
				+                }
			
 
				+                this.configVersion = version;
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
--- a/zookeeper-docs/src/main/resources/markdown/zookeeperStarted.md
+++ b/zookeeper-docs/src/main/resources/markdown/zookeeperStarted.md
@@ -0,0 +1,364 @@
 
				+<!--
			
 
				+Copyright 2002-2004 The Apache Software Foundation
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+//-->
			
 
				+
			
 
				+# ZooKeeper Getting Started Guide
			
 
				+
			
 
				+* [Getting Started: Coordinating Distributed Applications with ZooKeeper](#getting-started-coordinating-distributed-applications-with-zooKeeper)
			
 
				+    * [Pre-requisites](#sc_Prerequisites)
			
 
				+    * [Download](#sc_Download)
			
 
				+    * [Standalone Operation](#sc_InstallingSingleMode)
			
 
				+    * [Managing ZooKeeper Storage](#sc_FileManagement)
			
 
				+    * [Connecting to ZooKeeper](#sc_ConnectingToZooKeeper)
			
 
				+    * [Programming to ZooKeeper](#sc_ProgrammingToZooKeeper)
			
 
				+    * [Running Replicated ZooKeeper](#sc_RunningReplicatedZooKeeper)
			
 
				+    * [Other Optimizations](#other-optimizations)
			
 
				+
			
 
				+<a name="getting-started-coordinating-distributed-applications-with-zooKeeper"></a>
			
 
				+
			
 
				+## Getting Started: Coordinating Distributed Applications with ZooKeeper
			
 
				+
			
 
				+This document contains information to get you started quickly with
			
 
				+ZooKeeper. It is aimed primarily at developers hoping to try it out, and
			
 
				+contains simple installation instructions for a single ZooKeeper server, a
			
 
				+few commands to verify that it is running, and a simple programming
			
 
				+example. Finally, as a convenience, there are a few sections regarding
			
 
				+more complicated installations, for example running replicated
			
 
				+deployments, and optimizing the transaction log. However for the complete
			
 
				+instructions for commercial deployments, please refer to the [ZooKeeper
			
 
				+Administrator's Guide](zookeeperAdmin.html).
			
 
				+
			
 
				+<a name="sc_Prerequisites"></a>
			
 
				+
			
 
				+### Pre-requisites
			
 
				+
			
 
				+See [System Requirements](zookeeperAdmin.html#sc_systemReq) in the Admin guide.
			
 
				+
			
 
				+<a name="sc_Download"></a>
			
 
				+
			
 
				+### Download
			
 
				+
			
 
				+To get a ZooKeeper distribution, download a recent
			
 
				+[stable](http://zookeeper.apache.org/releases.html) release from one of the Apache Download
			
 
				+Mirrors.
			
 
				+
			
 
				+<a name="sc_InstallingSingleMode"></a>
			
 
				+
			
 
				+### Standalone Operation
			
 
				+
			
 
				+Setting up a ZooKeeper server in standalone mode is
			
 
				+straightforward. The server is contained in a single JAR file,
			
 
				+so installation consists of creating a configuration.
			
 
				+
			
 
				+Once you've downloaded a stable ZooKeeper release unpack
			
 
				+it and cd to the root
			
 
				+
			
 
				+To start ZooKeeper you need a configuration file. Here is a sample,
			
 
				+create it in **conf/zoo.cfg**:
			
 
				+
			
 
				+
			
 
				+    tickTime=2000
			
 
				+    dataDir=/var/lib/zookeeper
			
 
				+    clientPort=2181
			
 
				+
			
 
				+
			
 
				+This file can be called anything, but for the sake of this
			
 
				+discussion call
			
 
				+it **conf/zoo.cfg**. Change the
			
 
				+value of **dataDir** to specify an
			
 
				+existing (empty to start with) directory.  Here are the meanings
			
 
				+for each of the fields:
			
 
				+
			
 
				+* ***tickTime*** :
			
 
				+    the basic time unit in milliseconds used by ZooKeeper. It is
			
 
				+    used to do heartbeats and the minimum session timeout will be
			
 
				+    twice the tickTime.
			
 
				+
			
 
				+* ***dataDir*** :
			
 
				+    the location to store the in-memory database snapshots and,
			
 
				+    unless specified otherwise, the transaction log of updates to the
			
 
				+    database.
			
 
				+
			
 
				+* ***clientPort*** :
			
 
				+    the port to listen for client connections
			
 
				+
			
 
				+Now that you created the configuration file, you can start
			
 
				+ZooKeeper:
			
 
				+
			
 
				+
			
 
				+    bin/zkServer.sh start
			
 
				+
			
 
				+
			
 
				+ZooKeeper logs messages using log4j -- more detail
			
 
				+available in the
			
 
				+[Logging](zookeeperProgrammers.html#Logging)
			
 
				+section of the Programmer's Guide. You will see log messages
			
 
				+coming to the console (default) and/or a log file depending on
			
 
				+the log4j configuration.
			
 
				+
			
 
				+The steps outlined here run ZooKeeper in standalone mode. There is
			
 
				+no replication, so if ZooKeeper process fails, the service will go down.
			
 
				+This is fine for most development situations, but to run ZooKeeper in
			
 
				+replicated mode, please see [Running Replicated
			
 
				+ZooKeeper](#sc_RunningReplicatedZooKeeper).
			
 
				+
			
 
				+<a name="sc_FileManagement"></a>
			
 
				+
			
 
				+### Managing ZooKeeper Storage
			
 
				+
			
 
				+For long running production systems ZooKeeper storage must
			
 
				+be managed externally (dataDir and logs). See the section on
			
 
				+[maintenance](zookeeperAdmin.html#sc_maintenance) for
			
 
				+more details.
			
 
				+
			
 
				+<a name="sc_ConnectingToZooKeeper"></a>
			
 
				+
			
 
				+### Connecting to ZooKeeper
			
 
				+
			
 
				+
			
 
				+    $ bin/zkCli.sh -server 127.0.0.1:2181
			
 
				+
			
 
				+
			
 
				+This lets you perform simple, file-like operations.
			
 
				+
			
 
				+Once you have connected, you should see something like:
			
 
				+
			
 
				+
			
 
				+    Connecting to localhost:2181
			
 
				+    log4j:WARN No appenders could be found for logger (org.apache.zookeeper.ZooKeeper).
			
 
				+    log4j:WARN Please initialize the log4j system properly.
			
 
				+    Welcome to ZooKeeper!
			
 
				+    JLine support is enabled
			
 
				+    [zkshell: 0]
			
 
				+
			
 
				+From the shell, type `help` to get a listing of commands that can be executed from the client, as in:
			
 
				+
			
 
				+
			
 
				+    [zkshell: 0] help
			
 
				+    ZooKeeper host:port cmd args
			
 
				+        get path [watch]
			
 
				+        ls path [watch]
			
 
				+        set path data [version]
			
 
				+        delquota [-n|-b] path
			
 
				+        quit
			
 
				+        printwatches on|off
			
 
				+        create path data acl
			
 
				+        stat path [watch]
			
 
				+        listquota path
			
 
				+        history
			
 
				+        setAcl path acl
			
 
				+        getAcl path
			
 
				+        sync path
			
 
				+        redo cmdno
			
 
				+        addauth scheme auth
			
 
				+        delete path [version]
			
 
				+        deleteall path
			
 
				+        setquota -n|-b val path
			
 
				+
			
 
				+
			
 
				+From here, you can try a few simple commands to get a feel for this simple command line interface.  First, start by issuing the list command, as
			
 
				+in `ls`, yielding:
			
 
				+
			
 
				+
			
 
				+    [zkshell: 8] ls /
			
 
				+    [zookeeper]
			
 
				+
			
 
				+
			
 
				+Next, create a new znode by running `create /zk_test my_data`. This creates a new znode and associates the string "my_data" with the node.
			
 
				+You should see:
			
 
				+
			
 
				+
			
 
				+    [zkshell: 9] create /zk_test my_data
			
 
				+    Created /zk_test
			
 
				+
			
 
				+
			
 
				+Issue another `ls /` command to see what the directory looks like:
			
 
				+
			
 
				+
			
 
				+    [zkshell: 11] ls /
			
 
				+    [zookeeper, zk_test]
			
 
				+
			
 
				+
			
 
				+Notice that the zk_test directory has now been created.
			
 
				+
			
 
				+Next, verify that the data was associated with the znode by running the `get` command, as in:
			
 
				+
			
 
				+
			
 
				+    [zkshell: 12] get /zk_test
			
 
				+    my_data
			
 
				+    cZxid = 5
			
 
				+    ctime = Fri Jun 05 13:57:06 PDT 2009
			
 
				+    mZxid = 5
			
 
				+    mtime = Fri Jun 05 13:57:06 PDT 2009
			
 
				+    pZxid = 5
			
 
				+    cversion = 0
			
 
				+    dataVersion = 0
			
 
				+    aclVersion = 0
			
 
				+    ephemeralOwner = 0
			
 
				+    dataLength = 7
			
 
				+    numChildren = 0
			
 
				+
			
 
				+
			
 
				+We can change the data associated with zk_test by issuing the `set` command, as in:
			
 
				+
			
 
				+
			
 
				+    [zkshell: 14] set /zk_test junk
			
 
				+    cZxid = 5
			
 
				+    ctime = Fri Jun 05 13:57:06 PDT 2009
			
 
				+    mZxid = 6
			
 
				+    mtime = Fri Jun 05 14:01:52 PDT 2009
			
 
				+    pZxid = 5
			
 
				+    cversion = 0
			
 
				+    dataVersion = 1
			
 
				+    aclVersion = 0
			
 
				+    ephemeralOwner = 0
			
 
				+    dataLength = 4
			
 
				+    numChildren = 0
			
 
				+    [zkshell: 15] get /zk_test
			
 
				+    junk
			
 
				+    cZxid = 5
			
 
				+    ctime = Fri Jun 05 13:57:06 PDT 2009
			
 
				+    mZxid = 6
			
 
				+    mtime = Fri Jun 05 14:01:52 PDT 2009
			
 
				+    pZxid = 5
			
 
				+    cversion = 0
			
 
				+    dataVersion = 1
			
 
				+    aclVersion = 0
			
 
				+    ephemeralOwner = 0
			
 
				+    dataLength = 4
			
 
				+    numChildren = 0
			
 
				+
			
 
				+
			
 
				+(Notice we did a `get` after setting the data and it did, indeed, change.
			
 
				+
			
 
				+Finally, let's `delete` the node by issuing:
			
 
				+
			
 
				+
			
 
				+    [zkshell: 16] delete /zk_test
			
 
				+    [zkshell: 17] ls /
			
 
				+    [zookeeper]
			
 
				+    [zkshell: 18]
			
 
				+
			
 
				+
			
 
				+That's it for now.  To explore more, continue with the rest of this document and see the [Programmer's Guide](zookeeperProgrammers.html).
			
 
				+
			
 
				+<a name="sc_ProgrammingToZooKeeper"></a>
			
 
				+
			
 
				+### Programming to ZooKeeper
			
 
				+
			
 
				+ZooKeeper has a Java bindings and C bindings. They are
			
 
				+functionally equivalent. The C bindings exist in two variants: single
			
 
				+threaded and multi-threaded. These differ only in how the messaging loop
			
 
				+is done. For more information, see the [Programming
			
 
				+Examples in the ZooKeeper Programmer's Guide](zookeeperProgrammers.html#ch_programStructureWithExample) for
			
 
				+sample code using of the different APIs.
			
 
				+
			
 
				+<a name="sc_RunningReplicatedZooKeeper"></a>
			
 
				+
			
 
				+### Running Replicated ZooKeeper
			
 
				+
			
 
				+Running ZooKeeper in standalone mode is convenient for evaluation,
			
 
				+some development, and testing. But in production, you should run
			
 
				+ZooKeeper in replicated mode. A replicated group of servers in the same
			
 
				+application is called a _quorum_, and in replicated
			
 
				+mode, all servers in the quorum have copies of the same configuration
			
 
				+file.
			
 
				+
			
 
				+######Note
			
 
				+>For replicated mode, a minimum of three servers are required,
			
 
				+and it is strongly recommended that you have an odd number of
			
 
				+servers. If you only have two servers, then you are in a
			
 
				+situation where if one of them fails, there are not enough
			
 
				+machines to form a majority quorum. Two servers is inherently
			
 
				+**less** stable than a single server, because there are two single
			
 
				+points of failure.
			
 
				+
			
 
				+The required
			
 
				+**conf/zoo.cfg**
			
 
				+file for replicated mode is similar to the one used in standalone
			
 
				+mode, but with a few differences. Here is an example:
			
 
				+
			
 
				+    tickTime=2000
			
 
				+    dataDir=/var/lib/zookeeper
			
 
				+    clientPort=2181
			
 
				+    initLimit=5
			
 
				+    syncLimit=2
			
 
				+    server.1=zoo1:2888:3888
			
 
				+    server.2=zoo2:2888:3888
			
 
				+    server.3=zoo3:2888:3888
			
 
				+
			
 
				+The new entry, **initLimit** is
			
 
				+timeouts ZooKeeper uses to limit the length of time the ZooKeeper
			
 
				+servers in quorum have to connect to a leader. The entry **syncLimit** limits how far out of date a server can
			
 
				+be from a leader.
			
 
				+
			
 
				+With both of these timeouts, you specify the unit of time using
			
 
				+**tickTime**. In this example, the timeout
			
 
				+for initLimit is 5 ticks at 2000 milleseconds a tick, or 10
			
 
				+seconds.
			
 
				+
			
 
				+The entries of the form _server.X_ list the
			
 
				+servers that make up the ZooKeeper service. When the server starts up,
			
 
				+it knows which server it is by looking for the file
			
 
				+_myid_ in the data directory. That file has the
			
 
				+contains the server number, in ASCII.
			
 
				+
			
 
				+Finally, note the two port numbers after each server
			
 
				+name: " 2888" and "3888". Peers use the former port to connect
			
 
				+to other peers. Such a connection is necessary so that peers
			
 
				+can communicate, for example, to agree upon the order of
			
 
				+updates. More specifically, a ZooKeeper server uses this port
			
 
				+to connect followers to the leader. When a new leader arises, a
			
 
				+follower opens a TCP connection to the leader using this
			
 
				+port. Because the default leader election also uses TCP, we
			
 
				+currently require another port for leader election. This is the
			
 
				+second port in the server entry.
			
 
				+
			
 
				+######Note
			
 
				+>If you want to test multiple servers on a single
			
 
				+machine, specify the servername
			
 
				+as _localhost_ with unique quorum &
			
 
				+leader election ports (i.e. 2888:3888, 2889:3889, 2890:3890 in
			
 
				+the example above) for each server.X in that server's config
			
 
				+file. Of course separate _dataDir_s and
			
 
				+distinct _clientPort_s are also necessary
			
 
				+(in the above replicated example, running on a
			
 
				+single _localhost_, you would still have
			
 
				+three config files).
			
 
				+
			
 
				+>Please be aware that setting up multiple servers on a single
			
 
				+machine will not create any redundancy. If something were to
			
 
				+happen which caused the machine to die, all of the zookeeper
			
 
				+servers would be offline. Full redundancy requires that each
			
 
				+server have its own machine. It must be a completely separate
			
 
				+physical server. Multiple virtual machines on the same physical
			
 
				+host are still vulnerable to the complete failure of that host.
			
 
				+
			
 
				+<a name="other-optimizations"></a>
			
 
				+
			
 
				+### Other Optimizations
			
 
				+
			
 
				+There are a couple of other configuration parameters that can
			
 
				+greatly increase performance:
			
 
				+
			
 
				+* To get low latencies on updates it is important to
			
 
				+  have a dedicated transaction log directory. By default
			
 
				+  transaction logs are put in the same directory as the data
			
 
				+  snapshots and _myid_ file. The dataLogDir
			
 
				+  parameters indicates a different directory to use for the
			
 
				+  transaction logs.
			
 
				+* _[tbd: what is the other config param?]_
			
 
				+
			
 
				+
			
--- a/zookeeper-docs/src/main/resources/markdown/zookeeperTutorial.md
+++ b/zookeeper-docs/src/main/resources/markdown/zookeeperTutorial.md
@@ -0,0 +1,666 @@
 
				+<!--
			
 
				+Copyright 2002-2004 The Apache Software Foundation
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+//-->
			
 
				+
			
 
				+# Programming with ZooKeeper - A basic tutorial
			
 
				+
			
 
				+* [Introduction](#ch_Introduction)
			
 
				+* [Barriers](#sc_barriers)
			
 
				+* [Producer-Consumer Queues](#sc_producerConsumerQueues)
			
 
				+* [Complete example](#Complete+example)
			
 
				+    * [Queue test](#Queue+test)
			
 
				+    * [Barrier test](#Barrier+test)
			
 
				+    * [Source Listing](#sc_sourceListing)
			
 
				+
			
 
				+<a name="ch_Introduction"></a>
			
 
				+
			
 
				+## Introduction
			
 
				+
			
 
				+In this tutorial, we show simple implementations of barriers and
			
 
				+producer-consumer queues using ZooKeeper. We call the respective classes Barrier and Queue.
			
 
				+These examples assume that you have at least one ZooKeeper server running.
			
 
				+
			
 
				+Both primitives use the following common excerpt of code:
			
 
				+
			
 
				+    static ZooKeeper zk = null;
			
 
				+    static Integer mutex;
			
 
				+
			
 
				+    String root;
			
 
				+
			
 
				+    SyncPrimitive(String address) {
			
 
				+        if(zk == null){
			
 
				+            try {
			
 
				+                System.out.println("Starting ZK:");
			
 
				+                zk = new ZooKeeper(address, 3000, this);
			
 
				+                mutex = new Integer(-1);
			
 
				+                System.out.println("Finished starting ZK: " + zk);
			
 
				+            } catch (IOException e) {
			
 
				+                System.out.println(e.toString());
			
 
				+                zk = null;
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    synchronized public void process(WatchedEvent event) {
			
 
				+        synchronized (mutex) {
			
 
				+            mutex.notify();
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+
			
 
				+Both classes extend SyncPrimitive. In this way, we execute steps that are
			
 
				+common to all primitives in the constructor of SyncPrimitive. To keep the examples
			
 
				+simple, we create a ZooKeeper object the first time we instantiate either a barrier
			
 
				+object or a queue object, and we declare a static variable that is a reference
			
 
				+to this object. The subsequent instances of Barrier and Queue check whether a
			
 
				+ZooKeeper object exists. Alternatively, we could have the application creating a
			
 
				+ZooKeeper object and passing it to the constructor of Barrier and Queue.
			
 
				+
			
 
				+We use the process() method to process notifications triggered due to watches.
			
 
				+In the following discussion, we present code that sets watches. A watch is internal
			
 
				+structure that enables ZooKeeper to notify a client of a change to a node. For example,
			
 
				+if a client is waiting for other clients to leave a barrier, then it can set a watch and
			
 
				+wait for modifications to a particular node, which can indicate that it is the end of the wait.
			
 
				+This point becomes clear once we go over the examples.
			
 
				+
			
 
				+<a name="sc_barriers"></a>
			
 
				+
			
 
				+## Barriers
			
 
				+
			
 
				+A barrier is a primitive that enables a group of processes to synchronize the
			
 
				+beginning and the end of a computation. The general idea of this implementation
			
 
				+is to have a barrier node that serves the purpose of being a parent for individual
			
 
				+process nodes. Suppose that we call the barrier node "/b1". Each process "p" then
			
 
				+creates a node "/b1/p". Once enough processes have created their corresponding
			
 
				+nodes, joined processes can start the computation.
			
 
				+
			
 
				+In this example, each process instantiates a Barrier object, and its constructor takes as parameters:
			
 
				+
			
 
				+* the address of a ZooKeeper server (e.g., "zoo1.foo.com:2181")
			
 
				+* the path of the barrier node on ZooKeeper (e.g., "/b1")
			
 
				+* the size of the group of processes
			
 
				+
			
 
				+The constructor of Barrier passes the address of the Zookeeper server to the
			
 
				+constructor of the parent class. The parent class creates a ZooKeeper instance if
			
 
				+one does not exist. The constructor of Barrier then creates a
			
 
				+barrier node on ZooKeeper, which is the parent node of all process nodes, and
			
 
				+we call root (**Note:** This is not the ZooKeeper root "/").
			
 
				+
			
 
				+    /**
			
 
				+     * Barrier constructor
			
 
				+     *
			
 
				+     * @param address
			
 
				+     * @param root
			
 
				+     * @param size
			
 
				+     */
			
 
				+    Barrier(String address, String root, int size) {
			
 
				+        super(address);
			
 
				+        this.root = root;
			
 
				+        this.size = size;
			
 
				+        // Create barrier node
			
 
				+        if (zk != null) {
			
 
				+            try {
			
 
				+                Stat s = zk.exists(root, false);
			
 
				+                if (s == null) {
			
 
				+                    zk.create(root, new byte[0], Ids.OPEN_ACL_UNSAFE,
			
 
				+                            CreateMode.PERSISTENT);
			
 
				+                }
			
 
				+            } catch (KeeperException e) {
			
 
				+                System.out
			
 
				+                        .println("Keeper exception when instantiating queue: "
			
 
				+                                + e.toString());
			
 
				+            } catch (InterruptedException e) {
			
 
				+                System.out.println("Interrupted exception");
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        // My node name
			
 
				+        try {
			
 
				+            name = new String(InetAddress.getLocalHost().getCanonicalHostName().toString());
			
 
				+        } catch (UnknownHostException e) {
			
 
				+            System.out.println(e.toString());
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+To enter the barrier, a process calls enter(). The process creates a node under
			
 
				+the root to represent it, using its host name to form the node name. It then wait
			
 
				+until enough processes have entered the barrier. A process does it by checking
			
 
				+the number of children the root node has with "getChildren()", and waiting for
			
 
				+notifications in the case it does not have enough. To receive a notification when
			
 
				+there is a change to the root node, a process has to set a watch, and does it
			
 
				+through the call to "getChildren()". In the code, we have that "getChildren()"
			
 
				+has two parameters. The first one states the node to read from, and the second is
			
 
				+a boolean flag that enables the process to set a watch. In the code the flag is true.
			
 
				+
			
 
				+    /**
			
 
				+     * Join barrier
			
 
				+     *
			
 
				+     * @return
			
 
				+     * @throws KeeperException
			
 
				+     * @throws InterruptedException
			
 
				+     */
			
 
				+
			
 
				+    boolean enter() throws KeeperException, InterruptedException{
			
 
				+        zk.create(root + "/" + name, new byte[0], Ids.OPEN_ACL_UNSAFE,
			
 
				+                CreateMode.EPHEMERAL_SEQUENTIAL);
			
 
				+        while (true) {
			
 
				+            synchronized (mutex) {
			
 
				+                List<String> list = zk.getChildren(root, true);
			
 
				+
			
 
				+                if (list.size() < size) {
			
 
				+                    mutex.wait();
			
 
				+                } else {
			
 
				+                    return true;
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+Note that enter() throws both KeeperException and InterruptedException, so it is
			
 
				+the responsibility of the application to catch and handle such exceptions.
			
 
				+
			
 
				+Once the computation is finished, a process calls leave() to leave the barrier.
			
 
				+First it deletes its corresponding node, and then it gets the children of the root
			
 
				+node. If there is at least one child, then it waits for a notification (obs: note
			
 
				+that the second parameter of the call to getChildren() is true, meaning that
			
 
				+ZooKeeper has to set a watch on the the root node). Upon reception of a notification,
			
 
				+it checks once more whether the root node has any children.
			
 
				+
			
 
				+    /**
			
 
				+     * Wait until all reach barrier
			
 
				+     *
			
 
				+     * @return
			
 
				+     * @throws KeeperException
			
 
				+     * @throws InterruptedException
			
 
				+     */
			
 
				+
			
 
				+    boolean leave() throws KeeperException, InterruptedException {
			
 
				+        zk.delete(root + "/" + name, 0);
			
 
				+        while (true) {
			
 
				+            synchronized (mutex) {
			
 
				+                List<String> list = zk.getChildren(root, true);
			
 
				+                    if (list.size() > 0) {
			
 
				+                        mutex.wait();
			
 
				+                    } else {
			
 
				+                        return true;
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+<a name="sc_producerConsumerQueues"></a>
			
 
				+
			
 
				+## Producer-Consumer Queues
			
 
				+
			
 
				+A producer-consumer queue is a distributed data structure that groups of processes
			
 
				+use to generate and consume items. Producer processes create new elements and add
			
 
				+them to the queue. Consumer processes remove elements from the list, and process them.
			
 
				+In this implementation, the elements are simple integers. The queue is represented
			
 
				+by a root node, and to add an element to the queue, a producer process creates a new node,
			
 
				+a child of the root node.
			
 
				+
			
 
				+The following excerpt of code corresponds to the constructor of the object. As
			
 
				+with Barrier objects, it first calls the constructor of the parent class, SyncPrimitive,
			
 
				+that creates a ZooKeeper object if one doesn't exist. It then verifies if the root
			
 
				+node of the queue exists, and creates if it doesn't.
			
 
				+
			
 
				+    /**
			
 
				+     * Constructor of producer-consumer queue
			
 
				+     *
			
 
				+     * @param address
			
 
				+     * @param name
			
 
				+     */
			
 
				+    Queue(String address, String name) {
			
 
				+        super(address);
			
 
				+        this.root = name;
			
 
				+        // Create ZK node name
			
 
				+        if (zk != null) {
			
 
				+            try {
			
 
				+                Stat s = zk.exists(root, false);
			
 
				+                if (s == null) {
			
 
				+                    zk.create(root, new byte[0], Ids.OPEN_ACL_UNSAFE,
			
 
				+                            CreateMode.PERSISTENT);
			
 
				+                }
			
 
				+            } catch (KeeperException e) {
			
 
				+                System.out
			
 
				+                        .println("Keeper exception when instantiating queue: "
			
 
				+                                + e.toString());
			
 
				+            } catch (InterruptedException e) {
			
 
				+                System.out.println("Interrupted exception");
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+A producer process calls "produce()" to add an element to the queue, and passes
			
 
				+an integer as an argument. To add an element to the queue, the method creates a
			
 
				+new node using "create()", and uses the SEQUENCE flag to instruct ZooKeeper to
			
 
				+append the value of the sequencer counter associated to the root node. In this way,
			
 
				+we impose a total order on the elements of the queue, thus guaranteeing that the
			
 
				+oldest element of the queue is the next one consumed.
			
 
				+
			
 
				+    /**
			
 
				+     * Add element to the queue.
			
 
				+     *
			
 
				+     * @param i
			
 
				+     * @return
			
 
				+     */
			
 
				+
			
 
				+    boolean produce(int i) throws KeeperException, InterruptedException{
			
 
				+        ByteBuffer b = ByteBuffer.allocate(4);
			
 
				+        byte[] value;
			
 
				+
			
 
				+        // Add child with value i
			
 
				+        b.putInt(i);
			
 
				+        value = b.array();
			
 
				+        zk.create(root + "/element", value, Ids.OPEN_ACL_UNSAFE,
			
 
				+                    CreateMode.PERSISTENT_SEQUENTIAL);
			
 
				+
			
 
				+        return true;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+To consume an element, a consumer process obtains the children of the root node,
			
 
				+reads the node with smallest counter value, and returns the element. Note that
			
 
				+if there is a conflict, then one of the two contending processes won't be able to
			
 
				+delete the node and the delete operation will throw an exception.
			
 
				+
			
 
				+A call to getChildren() returns the list of children in lexicographic order.
			
 
				+As lexicographic order does not necessary follow the numerical order of the counter
			
 
				+values, we need to decide which element is the smallest. To decide which one has
			
 
				+the smallest counter value, we traverse the list, and remove the prefix "element"
			
 
				+from each one.
			
 
				+
			
 
				+    /**
			
 
				+     * Remove first element from the queue.
			
 
				+     *
			
 
				+     * @return
			
 
				+     * @throws KeeperException
			
 
				+     * @throws InterruptedException
			
 
				+     */
			
 
				+    int consume() throws KeeperException, InterruptedException{
			
 
				+        int retvalue = -1;
			
 
				+        Stat stat = null;
			
 
				+
			
 
				+        // Get the first element available
			
 
				+        while (true) {
			
 
				+            synchronized (mutex) {
			
 
				+                List<String> list = zk.getChildren(root, true);
			
 
				+                if (list.size() == 0) {
			
 
				+                    System.out.println("Going to wait");
			
 
				+                    mutex.wait();
			
 
				+                } else {
			
 
				+                    Integer min = new Integer(list.get(0).substring(7));
			
 
				+                    for(String s : list){
			
 
				+                        Integer tempValue = new Integer(s.substring(7));
			
 
				+                        //System.out.println("Temporary value: " + tempValue);
			
 
				+                        if(tempValue < min) min = tempValue;
			
 
				+                    }
			
 
				+                    System.out.println("Temporary value: " + root + "/element" + min);
			
 
				+                    byte[] b = zk.getData(root + "/element" + min,
			
 
				+                                false, stat);
			
 
				+                    zk.delete(root + "/element" + min, 0);
			
 
				+                    ByteBuffer buffer = ByteBuffer.wrap(b);
			
 
				+                    retvalue = buffer.getInt();
			
 
				+
			
 
				+                    return retvalue;
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+<a name="Complete+example"></a>
			
 
				+
			
 
				+## Complete example
			
 
				+
			
 
				+In the following section you can find a complete command line application to demonstrate the above mentioned
			
 
				+recipes. Use the following command to run it.
			
 
				+
			
 
				+    ZOOBINDIR="[path_to_distro]/bin"
			
 
				+    . "$ZOOBINDIR"/zkEnv.sh
			
 
				+    java SyncPrimitive [Test Type] [ZK server] [No of elements] [Client type]
			
 
				+
			
 
				+<a name="Queue+test"></a>
			
 
				+
			
 
				+### Queue test
			
 
				+
			
 
				+Start a producer to create 100 elements
			
 
				+
			
 
				+    java SyncPrimitive qTest localhost 100 p
			
 
				+
			
 
				+
			
 
				+Start a consumer to consume 100 elements
			
 
				+
			
 
				+    java SyncPrimitive qTest localhost 100 c
			
 
				+
			
 
				+<a name="Barrier+test"></a>
			
 
				+
			
 
				+### Barrier test
			
 
				+
			
 
				+Start a barrier with 2 participants (start as many times as many participants you'd like to enter)
			
 
				+
			
 
				+    java SyncPrimitive bTest localhost 2
			
 
				+
			
 
				+<a name="sc_sourceListing"></a>
			
 
				+
			
 
				+### Source Listing
			
 
				+
			
 
				+#### SyncPrimitive.Java
			
 
				+
			
 
				+    import java.io.IOException;
			
 
				+    import java.net.InetAddress;
			
 
				+    import java.net.UnknownHostException;
			
 
				+    import java.nio.ByteBuffer;
			
 
				+    import java.util.List;
			
 
				+    import java.util.Random;
			
 
				+
			
 
				+    import org.apache.zookeeper.CreateMode;
			
 
				+    import org.apache.zookeeper.KeeperException;
			
 
				+    import org.apache.zookeeper.WatchedEvent;
			
 
				+    import org.apache.zookeeper.Watcher;
			
 
				+    import org.apache.zookeeper.ZooKeeper;
			
 
				+    import org.apache.zookeeper.ZooDefs.Ids;
			
 
				+    import org.apache.zookeeper.data.Stat;
			
 
				+
			
 
				+    public class SyncPrimitive implements Watcher {
			
 
				+
			
 
				+        static ZooKeeper zk = null;
			
 
				+        static Integer mutex;
			
 
				+        String root;
			
 
				+
			
 
				+        SyncPrimitive(String address) {
			
 
				+            if(zk == null){
			
 
				+                try {
			
 
				+                    System.out.println("Starting ZK:");
			
 
				+                    zk = new ZooKeeper(address, 3000, this);
			
 
				+                    mutex = new Integer(-1);
			
 
				+                    System.out.println("Finished starting ZK: " + zk);
			
 
				+                } catch (IOException e) {
			
 
				+                    System.out.println(e.toString());
			
 
				+                    zk = null;
			
 
				+                }
			
 
				+            }
			
 
				+            //else mutex = new Integer(-1);
			
 
				+        }
			
 
				+
			
 
				+        synchronized public void process(WatchedEvent event) {
			
 
				+            synchronized (mutex) {
			
 
				+                //System.out.println("Process: " + event.getType());
			
 
				+                mutex.notify();
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        /**
			
 
				+         * Barrier
			
 
				+         */
			
 
				+        static public class Barrier extends SyncPrimitive {
			
 
				+            int size;
			
 
				+            String name;
			
 
				+
			
 
				+            /**
			
 
				+             * Barrier constructor
			
 
				+             *
			
 
				+             * @param address
			
 
				+             * @param root
			
 
				+             * @param size
			
 
				+             */
			
 
				+            Barrier(String address, String root, int size) {
			
 
				+                super(address);
			
 
				+                this.root = root;
			
 
				+                this.size = size;
			
 
				+
			
 
				+                // Create barrier node
			
 
				+                if (zk != null) {
			
 
				+                    try {
			
 
				+                        Stat s = zk.exists(root, false);
			
 
				+                        if (s == null) {
			
 
				+                            zk.create(root, new byte[0], Ids.OPEN_ACL_UNSAFE,
			
 
				+                                    CreateMode.PERSISTENT);
			
 
				+                        }
			
 
				+                    } catch (KeeperException e) {
			
 
				+                        System.out
			
 
				+                                .println("Keeper exception when instantiating queue: "
			
 
				+                                        + e.toString());
			
 
				+                    } catch (InterruptedException e) {
			
 
				+                        System.out.println("Interrupted exception");
			
 
				+                    }
			
 
				+                }
			
 
				+
			
 
				+                // My node name
			
 
				+                try {
			
 
				+                    name = new String(InetAddress.getLocalHost().getCanonicalHostName().toString());
			
 
				+                } catch (UnknownHostException e) {
			
 
				+                    System.out.println(e.toString());
			
 
				+                }
			
 
				+
			
 
				+            }
			
 
				+
			
 
				+            /**
			
 
				+             * Join barrier
			
 
				+             *
			
 
				+             * @return
			
 
				+             * @throws KeeperException
			
 
				+             * @throws InterruptedException
			
 
				+             */
			
 
				+
			
 
				+            boolean enter() throws KeeperException, InterruptedException{
			
 
				+                zk.create(root + "/" + name, new byte[0], Ids.OPEN_ACL_UNSAFE,
			
 
				+                        CreateMode.EPHEMERAL_SEQUENTIAL);
			
 
				+                while (true) {
			
 
				+                    synchronized (mutex) {
			
 
				+                        List<String> list = zk.getChildren(root, true);
			
 
				+
			
 
				+                        if (list.size() < size) {
			
 
				+                            mutex.wait();
			
 
				+                        } else {
			
 
				+                            return true;
			
 
				+                        }
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            /**
			
 
				+             * Wait until all reach barrier
			
 
				+             *
			
 
				+             * @return
			
 
				+             * @throws KeeperException
			
 
				+             * @throws InterruptedException
			
 
				+             */
			
 
				+            boolean leave() throws KeeperException, InterruptedException{
			
 
				+                zk.delete(root + "/" + name, 0);
			
 
				+                while (true) {
			
 
				+                    synchronized (mutex) {
			
 
				+                        List<String> list = zk.getChildren(root, true);
			
 
				+                            if (list.size() > 0) {
			
 
				+                                mutex.wait();
			
 
				+                            } else {
			
 
				+                                return true;
			
 
				+                            }
			
 
				+                        }
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+        /**
			
 
				+         * Producer-Consumer queue
			
 
				+         */
			
 
				+        static public class Queue extends SyncPrimitive {
			
 
				+
			
 
				+            /**
			
 
				+             * Constructor of producer-consumer queue
			
 
				+             *
			
 
				+             * @param address
			
 
				+             * @param name
			
 
				+             */
			
 
				+            Queue(String address, String name) {
			
 
				+                super(address);
			
 
				+                this.root = name;
			
 
				+                // Create ZK node name
			
 
				+                if (zk != null) {
			
 
				+                    try {
			
 
				+                        Stat s = zk.exists(root, false);
			
 
				+                        if (s == null) {
			
 
				+                            zk.create(root, new byte[0], Ids.OPEN_ACL_UNSAFE,
			
 
				+                                    CreateMode.PERSISTENT);
			
 
				+                        }
			
 
				+                    } catch (KeeperException e) {
			
 
				+                        System.out
			
 
				+                                .println("Keeper exception when instantiating queue: "
			
 
				+                                        + e.toString());
			
 
				+                    } catch (InterruptedException e) {
			
 
				+                        System.out.println("Interrupted exception");
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            /**
			
 
				+             * Add element to the queue.
			
 
				+             *
			
 
				+             * @param i
			
 
				+             * @return
			
 
				+             */
			
 
				+
			
 
				+            boolean produce(int i) throws KeeperException, InterruptedException{
			
 
				+                ByteBuffer b = ByteBuffer.allocate(4);
			
 
				+                byte[] value;
			
 
				+
			
 
				+                // Add child with value i
			
 
				+                b.putInt(i);
			
 
				+                value = b.array();
			
 
				+                zk.create(root + "/element", value, Ids.OPEN_ACL_UNSAFE,
			
 
				+                            CreateMode.PERSISTENT_SEQUENTIAL);
			
 
				+
			
 
				+                return true;
			
 
				+            }
			
 
				+
			
 
				+            /**
			
 
				+             * Remove first element from the queue.
			
 
				+             *
			
 
				+             * @return
			
 
				+             * @throws KeeperException
			
 
				+             * @throws InterruptedException
			
 
				+             */
			
 
				+            int consume() throws KeeperException, InterruptedException{
			
 
				+                int retvalue = -1;
			
 
				+                Stat stat = null;
			
 
				+
			
 
				+                // Get the first element available
			
 
				+                while (true) {
			
 
				+                    synchronized (mutex) {
			
 
				+                        List<String> list = zk.getChildren(root, true);
			
 
				+                        if (list.size() == 0) {
			
 
				+                            System.out.println("Going to wait");
			
 
				+                            mutex.wait();
			
 
				+                        } else {
			
 
				+                            Integer min = new Integer(list.get(0).substring(7));
			
 
				+                            String minNode = list.get(0);
			
 
				+                            for(String s : list){
			
 
				+                                Integer tempValue = new Integer(s.substring(7));
			
 
				+                                //System.out.println("Temporary value: " + tempValue);
			
 
				+                                if(tempValue < min) {
			
 
				+                                    min = tempValue;
			
 
				+                                    minNode = s;
			
 
				+                                }
			
 
				+                            }
			
 
				+                            System.out.println("Temporary value: " + root + "/" + minNode);
			
 
				+                            byte[] b = zk.getData(root + "/" + minNode,
			
 
				+                            false, stat);
			
 
				+                            zk.delete(root + "/" + minNode, 0);
			
 
				+                            ByteBuffer buffer = ByteBuffer.wrap(b);
			
 
				+                            retvalue = buffer.getInt();
			
 
				+
			
 
				+                            return retvalue;
			
 
				+                        }
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        public static void main(String args[]) {
			
 
				+            if (args[0].equals("qTest"))
			
 
				+                queueTest(args);
			
 
				+            else
			
 
				+                barrierTest(args);
			
 
				+        }
			
 
				+
			
 
				+        public static void queueTest(String args[]) {
			
 
				+            Queue q = new Queue(args[1], "/app1");
			
 
				+
			
 
				+            System.out.println("Input: " + args[1]);
			
 
				+            int i;
			
 
				+            Integer max = new Integer(args[2]);
			
 
				+
			
 
				+            if (args[3].equals("p")) {
			
 
				+                System.out.println("Producer");
			
 
				+                for (i = 0; i < max; i++)
			
 
				+                    try{
			
 
				+                        q.produce(10 + i);
			
 
				+                    } catch (KeeperException e){
			
 
				+
			
 
				+                    } catch (InterruptedException e){
			
 
				+
			
 
				+                    }
			
 
				+            } else {
			
 
				+                System.out.println("Consumer");
			
 
				+
			
 
				+                for (i = 0; i < max; i++) {
			
 
				+                    try{
			
 
				+                        int r = q.consume();
			
 
				+                        System.out.println("Item: " + r);
			
 
				+                    } catch (KeeperException e){
			
 
				+                        i--;
			
 
				+                    } catch (InterruptedException e){
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        public static void barrierTest(String args[]) {
			
 
				+            Barrier b = new Barrier(args[1], "/b1", new Integer(args[2]));
			
 
				+            try{
			
 
				+                boolean flag = b.enter();
			
 
				+                System.out.println("Entered barrier: " + args[2]);
			
 
				+                if(!flag) System.out.println("Error when entering the barrier");
			
 
				+            } catch (KeeperException e){
			
 
				+            } catch (InterruptedException e){
			
 
				+            }
			
 
				+
			
 
				+            // Generate random integer
			
 
				+            Random rand = new Random();
			
 
				+            int r = rand.nextInt(100);
			
 
				+            // Loop for rand iterations
			
 
				+            for (int i = 0; i < r; i++) {
			
 
				+                try {
			
 
				+                    Thread.sleep(100);
			
 
				+                } catch (InterruptedException e) {
			
 
				+                }
			
 
				+            }
			
 
				+            try{
			
 
				+                b.leave();
			
 
				+            } catch (KeeperException e){
			
 
				+
			
 
				+            } catch (InterruptedException e){
			
 
				+
			
 
				+            }
			
 
				+            System.out.println("Left barrier");
			
 
				+        }
			
 
				+    }
			
 
				+