Преглед изворни кода

ZOOKEEPER-3153: Create MarkDown files and build process for them

In this sub-task we have transformed the Forest XML documents into MarkDown (.md) files and provided a (maven based) solution to create HTML documentation from them.
PDF support is dropped since it is not really used and makes everything overcomplicated.

The generated HTML content looks similar to the one generated from Forest XMLs, but not identical with them.

Change-Id: Id35984eca5d37b9e3074eab939be5c9b4cc80257

Author: Tamas Penzes <tamaas@cloudera.com>

Reviewers: andor@apache.org

Closes #643 from tamaashu/ZOOKEEPER-3153
Tamas Penzes пре 6 година
родитељ
комит
d6a12a80ba
49 измењених фајлова са 9966 додато и 0 уклоњено
  1. 236 0
      pom.xml
  2. 61 0
      zookeeper-docs/pom.xml
  3. 18 0
      zookeeper-docs/src/main/resources/markdown/html/footer.html
  4. 128 0
      zookeeper-docs/src/main/resources/markdown/html/header.html
  5. BIN
      zookeeper-docs/src/main/resources/markdown/images/2pc.jpg
  6. BIN
      zookeeper-docs/src/main/resources/markdown/images/bk-overview.jpg
  7. BIN
      zookeeper-docs/src/main/resources/markdown/images/favicon.ico
  8. BIN
      zookeeper-docs/src/main/resources/markdown/images/hadoop-logo.jpg
  9. BIN
      zookeeper-docs/src/main/resources/markdown/images/state_dia.dia
  10. BIN
      zookeeper-docs/src/main/resources/markdown/images/state_dia.jpg
  11. BIN
      zookeeper-docs/src/main/resources/markdown/images/zkarch.jpg
  12. BIN
      zookeeper-docs/src/main/resources/markdown/images/zkcomponents.jpg
  13. BIN
      zookeeper-docs/src/main/resources/markdown/images/zknamespace.jpg
  14. BIN
      zookeeper-docs/src/main/resources/markdown/images/zkperfRW-3.2.jpg
  15. BIN
      zookeeper-docs/src/main/resources/markdown/images/zkperfRW.jpg
  16. BIN
      zookeeper-docs/src/main/resources/markdown/images/zkperfreliability.jpg
  17. BIN
      zookeeper-docs/src/main/resources/markdown/images/zkservice.jpg
  18. BIN
      zookeeper-docs/src/main/resources/markdown/images/zookeeper_small.gif
  19. 58 0
      zookeeper-docs/src/main/resources/markdown/index.md
  20. 627 0
      zookeeper-docs/src/main/resources/markdown/javaExample.md
  21. 416 0
      zookeeper-docs/src/main/resources/markdown/recipes.md
  22. 167 0
      zookeeper-docs/src/main/resources/markdown/skin/basic.css
  23. BIN
      zookeeper-docs/src/main/resources/markdown/skin/chapter.gif
  24. BIN
      zookeeper-docs/src/main/resources/markdown/skin/chapter_open.gif
  25. BIN
      zookeeper-docs/src/main/resources/markdown/skin/current.gif
  26. 40 0
      zookeeper-docs/src/main/resources/markdown/skin/getBlank.js
  27. 45 0
      zookeeper-docs/src/main/resources/markdown/skin/getMenu.js
  28. BIN
      zookeeper-docs/src/main/resources/markdown/skin/header_white_line.gif
  29. 57 0
      zookeeper-docs/src/main/resources/markdown/skin/init.js
  30. BIN
      zookeeper-docs/src/main/resources/markdown/skin/instruction_arrow.png
  31. 48 0
      zookeeper-docs/src/main/resources/markdown/skin/menu.js
  32. BIN
      zookeeper-docs/src/main/resources/markdown/skin/page.gif
  33. 54 0
      zookeeper-docs/src/main/resources/markdown/skin/print.css
  34. BIN
      zookeeper-docs/src/main/resources/markdown/skin/printer.gif
  35. 159 0
      zookeeper-docs/src/main/resources/markdown/skin/profile.css
  36. 1257 0
      zookeeper-docs/src/main/resources/markdown/skin/prototype.js
  37. 531 0
      zookeeper-docs/src/main/resources/markdown/skin/screen.css
  38. 1575 0
      zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md
  39. 47 0
      zookeeper-docs/src/main/resources/markdown/zookeeperHierarchicalQuorums.md
  40. 370 0
      zookeeper-docs/src/main/resources/markdown/zookeeperInternals.md
  41. 118 0
      zookeeper-docs/src/main/resources/markdown/zookeeperJMX.md
  42. 106 0
      zookeeper-docs/src/main/resources/markdown/zookeeperObservers.md
  43. 22 0
      zookeeper-docs/src/main/resources/markdown/zookeeperOtherInfo.md
  44. 343 0
      zookeeper-docs/src/main/resources/markdown/zookeeperOver.md
  45. 1519 0
      zookeeper-docs/src/main/resources/markdown/zookeeperProgrammers.md
  46. 61 0
      zookeeper-docs/src/main/resources/markdown/zookeeperQuotas.md
  47. 873 0
      zookeeper-docs/src/main/resources/markdown/zookeeperReconfig.md
  48. 364 0
      zookeeper-docs/src/main/resources/markdown/zookeeperStarted.md
  49. 666 0
      zookeeper-docs/src/main/resources/markdown/zookeeperTutorial.md

+ 236 - 0
pom.xml

@@ -0,0 +1,236 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+<!--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache</groupId>
+    <artifactId>apache</artifactId>
+    <version>18</version>
+    <relativePath/>
+    <!-- no parent resolution -->
+  </parent>
+  <groupId>org.apache.zookeeper</groupId>
+  <artifactId>zookeeper</artifactId>
+  <packaging>pom</packaging>
+  <version>2.6.0-SNAPSHOT</version>
+  <name>Apache ZooKeeper</name>
+  <description>
+    ZooKeeper is a centralized service for maintaining configuration information, naming,
+    providing distributed synchronization, and providing group services. All of these kinds
+    of services are used in some form or another by distributed applications. Each time they
+    are implemented there is a lot of work that goes into fixing the bugs and race conditions
+    that are inevitable. Because of the difficulty of implementing these kinds of services,
+    applications initially usually skimp on them ,which make them brittle in the presence of
+    change and difficult to manage. Even when done correctly, different implementations of
+    these services lead to management complexity when the applications are deployed.
+  </description>
+  <url>http://zookeeper.apache.org</url>
+  <inceptionYear>2008</inceptionYear>
+  <!-- Set here so we can consistently use the correct name, even on branches with
+       an ASF parent pom older than v15. Also uses the url from v18.
+    -->
+  <licenses>
+    <license>
+      <name>Apache License, Version 2.0</name>
+      <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
+      <distribution>repo</distribution>
+    </license>
+  </licenses>
+
+  <modules>
+    <module>zookeeper-docs</module>
+  </modules>
+  <scm>
+    <connection>scm:git:git://git.apache.org/zookeeper.git</connection>
+    <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/zookeeper.git</developerConnection>
+    <url>https://git-wip-us.apache.org/repos/asf?p=zookeeper.git</url>
+  </scm>
+  <issueManagement>
+    <system>JIRA</system>
+    <url>http://issues.apache.org/jira/browse/ZOOKEEPER</url>
+  </issueManagement>
+  <ciManagement>
+    <system>hudson</system>
+    <url>http://hudson.zones.apache.org/hudson/view/ZooKeeper/job/ZooKeeper-TRUNK/</url>
+  </ciManagement>
+  <mailingLists>
+    <mailingList>
+      <name>User List</name>
+      <subscribe>user-subscribe@zookeeper.apache.org</subscribe>
+      <unsubscribe>user-unsubscribe@zookeeper.apache.org</unsubscribe>
+      <post>user@zookeeper.apache.org</post>
+      <archive>http://mail-archives.apache.org/mod_mbox/zookeeper-user/</archive>
+    </mailingList>
+    <mailingList>
+      <name>Developer List</name>
+      <subscribe>dev-subscribe@zookeeper.apache.org</subscribe>
+      <unsubscribe>dev-unsubscribe@zookeeper.apache.org</unsubscribe>
+      <post>dev@zookeeper.apache.org</post>
+      <archive>http://mail-archives.apache.org/mod_mbox/zookeeper-dev/</archive>
+    </mailingList>
+    <mailingList>
+      <name>Commits List</name>
+      <subscribe>commits-subscribe@zookeeper.apache.org</subscribe>
+      <unsubscribe>commits-unsubscribe@zookeeper.apache.org</unsubscribe>
+      <archive>http://mail-archives.apache.org/mod_mbox/zookeeper-commits/</archive>
+    </mailingList>
+    <mailingList>
+      <name>Issues List</name>
+      <subscribe>issues-subscribe@zookeeper.apache.org</subscribe>
+      <unsubscribe>issues-unsubscribe@zookeeper.apache.org</unsubscribe>
+      <archive>http://mail-archives.apache.org/mod_mbox/zookeeper-issues/</archive>
+    </mailingList>
+    <mailingList>
+      <name>Builds List</name>
+      <subscribe>builds-subscribe@zookeeper.apache.org</subscribe>
+      <unsubscribe>builds-unsubscribe@zookeeper.apache.org</unsubscribe>
+      <archive>http://mail-archives.apache.org/mod_mbox/zookeeper-builds/</archive>
+    </mailingList>
+  </mailingLists>
+  <developers>
+    <developer>
+      <id>tdunning</id>
+      <name>Ted Dunning	</name>
+      <email>tdunning@apache.org</email>
+      <timezone>-8</timezone>
+    </developer>
+    <developer>
+      <id>camille</id>
+      <name>Camille Fournier</name>
+      <email>camille@apache.org</email>
+      <timezone>-5</timezone>
+    </developer>
+    <developer>
+      <id>phunt</id>
+      <name>Patrick Hunt</name>
+      <email>phunt@apache.org</email>
+      <timezone>-8</timezone>
+    </developer>
+    <developer>
+      <id>fpj</id>
+      <name>Flavio Junqueira</name>
+      <email>fpj@apache.org</email>
+      <timezone>+0</timezone>
+    </developer>
+    <developer>
+      <id>ivank</id>
+      <name>Ivan Kelly</name>
+      <email>ivank@apache.org</email>
+      <timezone>+2</timezone>
+    </developer>
+    <developer>
+      <id>mahadev</id>
+      <name>Mahadev Konar</name>
+      <email>mahadev@apache.org</email>
+      <timezone>-8</timezone>
+    </developer>
+    <developer>
+      <id>michim</id>
+      <name>Michi Mutsuzaki</name>
+      <email>michim@apache.org</email>
+      <timezone>-8</timezone>
+    </developer>
+    <developer>
+      <id>cnauroth</id>
+      <name>Chris Nauroth</name>
+      <email>cnauroth@apache.org</email>
+      <timezone>-8</timezone>
+    </developer>
+    <developer>
+      <id>breed</id>
+      <name>Benjamin Reed</name>
+      <email>breed@apache.org</email>
+      <timezone>-8</timezone>
+    </developer>
+    <developer>
+      <id>henry</id>
+      <name>Henry Robinson</name>
+      <email>henry@apache.org</email>
+      <timezone>-8</timezone>
+    </developer>
+    <developer>
+      <id>rgs</id>
+      <name>Raul Gutierrez Segales</name>
+      <email>rgs@apache.org</email>
+      <timezone>-8</timezone>
+    </developer>
+    <developer>
+      <id>rakeshr</id>
+      <name>Rakesh Radhakrishnan</name>
+      <email>rakeshr@apache.org</email>
+      <timezone>+5:30</timezone>
+    </developer>
+    <developer>
+      <id>hanm</id>
+      <name>Michael Han</name>
+      <email>hanm@apache.org</email>
+      <timezone>-8</timezone>
+    </developer>
+    <developer>
+      <id>gkesavan</id>
+      <name>Giridharan Kesavan</name>
+      <email>gkesavan@apache.org</email>
+      <timezone>-8</timezone>
+    </developer>
+    <developer>
+      <id>akornev</id>
+      <name>Andrew Kornev</name>
+      <email>akornev@apache.org</email>
+    </developer>
+    <developer>
+      <id>shralex</id>
+      <name>Alex Shraer</name>
+      <email>shralex@apache.org</email>
+      <timezone>-8</timezone>
+    </developer>
+    <developer>
+      <id>thawan</id>
+      <name>Thawan Kooburat</name>
+      <email>thawan@apache.org</email>
+      <timezone>-8</timezone>
+    </developer>
+    <developer>
+      <id>hdeng</id>
+      <name>Hongchao Deng</name>
+      <email>hdeng@apache.org</email>
+      <timezone>-8</timezone>
+    </developer>
+    <developer>
+      <id>arshad</id>
+      <name>Mohammad Arshad</name>
+      <email>arshad@apache.org</email>
+      <timezone>+5:30</timezone>
+    </developer>
+    <developer>
+      <id>afine</id>
+      <name>Abraham Fine</name>
+      <email>afine@apache.org</email>
+      <timezone>-8</timezone>
+    </developer>
+    <developer>
+      <id>andor</id>
+      <name>Andor Molnar</name>
+      <email>andor@apache.org</email>
+      <timezone>+1</timezone>
+    </developer>
+  </developers>
+
+</project>

+ 61 - 0
zookeeper-docs/pom.xml

@@ -0,0 +1,61 @@
+<?xml version="1.0"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <!--
+    /**
+     * Licensed to the Apache Software Foundation (ASF) under one
+     * or more contributor license agreements.  See the NOTICE file
+     * distributed with this work for additional information
+     * regarding copyright ownership.  The ASF licenses this file
+     * to you under the Apache License, Version 2.0 (the
+     * "License"); you may not use this file except in compliance
+     * with the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    -->
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.zookeeper</groupId>
+        <artifactId>zookeeper</artifactId>
+        <version>2.6.0-SNAPSHOT</version>
+        <relativePath>..</relativePath>
+    </parent>
+
+    <groupId>org.apache.zookeeper</groupId>
+    <artifactId>zookeeper-docs</artifactId>
+    <version>2.6.0-SNAPSHOT</version>
+    <name>Apache ZooKeeper - Documentation</name>
+    <description>Documentation</description>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>com.ruleoftech</groupId>
+                <artifactId>markdown-page-generator-plugin</artifactId>
+                <version>0.10</version>
+                <executions>
+                    <execution>
+                        <phase>process-sources</phase>
+                        <goals>
+                            <goal>generate</goal>
+                        </goals>
+                    </execution>
+                </executions>
+                <configuration>
+                    <headerHtmlFile>${project.basedir}/src/main/resources/markdown/html/header.html</headerHtmlFile>
+                    <footerHtmlFile>${project.basedir}/src/main/resources/markdown/html/footer.html</footerHtmlFile>
+                    <copyDirectories>images,skin</copyDirectories>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>

+ 18 - 0
zookeeper-docs/src/main/resources/markdown/html/footer.html

@@ -0,0 +1,18 @@
+</div>
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+    <div class="lastmodified">
+        <script type="text/javascript">
+        <!--
+            document.write("Last Published: " + document.lastModified);
+        //  -->
+        </script>
+    </div>
+    <div class="copyright">
+        Copyright &copy; <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+    </div>
+    <div id="logos"></div>
+</div>
+</body>
+</html>

+ 128 - 0
zookeeper-docs/src/main/resources/markdown/html/header.html

@@ -0,0 +1,128 @@
+
+<!DOCTYPE html>
+<html>
+<head>
+    <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+    <title>ZooKeeper: Because Coordinating Distributed Systems is a Zoo</title>
+    <link type="text/css" href="skin/basic.css" rel="stylesheet">
+    <link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+    <link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+    <link type="text/css" href="skin/profile.css" rel="stylesheet">
+    <script src="skin/getBlank.js" language="javascript" type="text/javascript"></script>
+    <script src="skin/getMenu.js" language="javascript" type="text/javascript"></script>
+    <script src="skin/init.js" language="javascript" type="text/javascript"></script>
+    <link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init();">
+<div id="top">
+    <div class="breadtrail">
+        <a href="http://www.apache.org/">Apache</a> &gt; <a href="http://zookeeper.apache.org/">ZooKeeper</a>
+    </div>
+    <div class="header">
+        <div class="grouplogo">
+            <a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
+        </div>
+        <div class="projectlogo">
+            <a href="http://zookeeper.apache.org/"><img class="logoImage" alt="ZooKeeper" src="images/zookeeper_small.gif" title="ZooKeeper: distributed coordination"></a>
+        </div>
+        <div class="searchbox">
+            <form action="http://www.google.com/search" method="get">
+                <input value="zookeeper.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp;
+                <input name="Search" value="Search" type="submit">
+            </form>
+        </div>
+        <ul id="tabs">
+            <li>
+                <a class="unselected" href="http://zookeeper.apache.org/">Project</a>
+            </li>
+            <li>
+                <a class="unselected" href="https://cwiki.apache.org/confluence/display/ZOOKEEPER/">Wiki</a>
+            </li>
+            <li class="current">
+                <a class="selected" href="index.html">ZooKeeper 3.6 Documentation</a>
+            </li>
+        </ul>
+    </div>
+</div>
+<div id="main">
+    <div id="publishedStrip">
+        <div id="level2tabs"></div>
+        <script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+    </div>
+    <div class="breadtrail">
+        &nbsp;
+    </div>
+    <div id="menu">
+        <div onclick="SwitchMenu('menu_1', 'skin/')" id="menu_1Title" class="menutitle">Overview</div>
+        <div id="menu_1" class="menuitemgroup">
+            <div class="menuitem">
+                <a href="index.html">Welcome</a>
+            </div>
+            <div class="menuitem">
+                <a href="zookeeperOver.html">Overview</a>
+            </div>
+            <div class="menuitem">
+                <a href="zookeeperStarted.html">Getting Started</a>
+            </div>
+            <div class="menuitem">
+                <a href="releasenotes.html">Release Notes</a>
+            </div>
+        </div>
+        <div onclick="SwitchMenu('menu_2', 'skin/')" id="menu_2Title" class="menutitle">Developer</div>
+        <div id="menu_2" class="menuitemgroup">
+            <div class="menuitem">
+                <a href="api/index.html">API Docs</a>
+            </div>
+            <div class="menuitem">
+                <a href="zookeeperProgrammers.html">Programmer's Guide</a>
+            </div>
+            <div class="menuitem">
+                <a href="javaExample.html">Java Example</a>
+            </div>
+            <div class="menuitem">
+                <a href="zookeeperTutorial.html">Barrier and Queue Tutorial</a>
+            </div>
+            <div class="menuitem">
+                <a href="recipes.html">Recipes</a>
+            </div>
+        </div>
+        <div onclick="SwitchMenu('menu_3', 'skin/')" id="menu_3Title" class="menutitle">Admin &amp; Ops</div>
+        <div id="menu_3" class="menuitemgroup">
+            <div class="menuitem">
+                <a href="zookeeperAdmin.html">Administrator's Guide</a>
+            </div>
+            <div class="menuitem">
+                <a href="zookeeperQuotas.html">Quota Guide</a>
+            </div>
+            <div class="menuitem">
+                <a href="zookeeperJMX.html">JMX</a>
+            </div>
+            <div class="menuitem">
+                <a href="zookeeperObservers.html">Observers Guide</a>
+            </div>
+            <div class="menuitem">
+                <a href="zookeeperReconfig.html">Dynamic Reconfiguration</a>
+            </div>
+        </div>
+        <div onclick="SwitchMenu('menu_4', 'skin/')" id="menu_4Title" class="menutitle">Contributor</div>
+        <div id="menu_4" class="menuitemgroup">
+            <div class="menuitem">
+                <a href="zookeeperInternals.html">ZooKeeper Internals</a>
+            </div>
+        </div>
+        <div onclick="SwitchMenu('menu_5', 'skin/')" id="menu_5Title" class="menutitle">Miscellaneous</div>
+        <div id="menu_5" class="menuitemgroup">
+            <div class="menuitem">
+                <a href="https://cwiki.apache.org/confluence/display/ZOOKEEPER">Wiki</a>
+            </div>
+            <div class="menuitem">
+                <a href="https://cwiki.apache.org/confluence/display/ZOOKEEPER/FAQ">FAQ</a>
+            </div>
+            <div class="menuitem">
+                <a href="http://zookeeper.apache.org/mailing_lists.html">Mailing Lists</a>
+            </div>
+        </div>
+    </div>
+    <div id="content">

BIN
zookeeper-docs/src/main/resources/markdown/images/2pc.jpg


BIN
zookeeper-docs/src/main/resources/markdown/images/bk-overview.jpg


BIN
zookeeper-docs/src/main/resources/markdown/images/favicon.ico


BIN
zookeeper-docs/src/main/resources/markdown/images/hadoop-logo.jpg


BIN
zookeeper-docs/src/main/resources/markdown/images/state_dia.dia


BIN
zookeeper-docs/src/main/resources/markdown/images/state_dia.jpg


BIN
zookeeper-docs/src/main/resources/markdown/images/zkarch.jpg


BIN
zookeeper-docs/src/main/resources/markdown/images/zkcomponents.jpg


BIN
zookeeper-docs/src/main/resources/markdown/images/zknamespace.jpg


BIN
zookeeper-docs/src/main/resources/markdown/images/zkperfRW-3.2.jpg


BIN
zookeeper-docs/src/main/resources/markdown/images/zkperfRW.jpg


BIN
zookeeper-docs/src/main/resources/markdown/images/zkperfreliability.jpg


BIN
zookeeper-docs/src/main/resources/markdown/images/zkservice.jpg


BIN
zookeeper-docs/src/main/resources/markdown/images/zookeeper_small.gif


+ 58 - 0
zookeeper-docs/src/main/resources/markdown/index.md

@@ -0,0 +1,58 @@
+<!--
+Copyright 2002-2004 The Apache Software Foundation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+//-->
+
+## ZooKeeper: Because Coordinating Distributed Systems is a Zoo
+
+ZooKeeper is a high-performance coordination service for
+distributed applications.  It exposes common services - such as
+naming, configuration management, synchronization, and group
+services - in a simple interface so you don't have to write them
+from scratch.  You can use it off-the-shelf to implement
+consensus, group management, leader election, and presence
+protocols. And you can build on it for your own, specific needs.
+
+The following documents describe concepts and procedures to get
+you started using ZooKeeper. If you have more questions, please
+ask the [mailing list](http://zookeeper.apache.org/mailing_lists.html) or browse the
+archives.
+
++ **ZooKeeper Overview**
+    Technical Overview Documents for Client Developers, Adminstrators, and Contributors
+    + [Overview](zookeeperOver.html) - a bird's eye view of ZooKeeper, including design concepts and architecture
+    + [Getting Started](zookeeperStarted.html) - a tutorial-style guide for developers to install, run, and program to ZooKeeper
+    + [Release Notes](releasenotes.html) - new developer and user facing features, improvements, and incompatibilities
++ **Developers**
+    Documents for Developers using the ZooKeeper Client API
+    + [API Docs](index.html) - the technical reference to ZooKeeper Client APIs
+    + [Programmer's Guide](zookeeperProgrammers.html) - a client application developer's guide to ZooKeeper
+    + [ZooKeeper Java Example](javaExample.html) - a simple Zookeeper client appplication, written in Java
+    + [Barrier and Queue Tutorial](zookeeperTutorial.html) - sample implementations of barriers and queues
+    + [ZooKeeper Recipes](recipes.html) - higher level solutions to common problems in distributed applications
++ **Administrators & Operators**
+    Documents for Administrators and Operations Engineers of ZooKeeper Deployments
+    + [Administrator's Guide](zookeeperAdmin.html) - a guide for system administrators and anyone else who might deploy ZooKeeper
+    + [Quota Guide](zookeeperQuotas.html) - a guide for system administrators on Quotas in ZooKeeper.
+    + [JMX](zookeeperJMX.html) - how to enable JMX in ZooKeeper
+    + [Hierarchical quorums](zookeeperHierarchicalQuorums.html)
+    + [Observers](zookeeperObservers.html) - non-voting ensemble members that easily improve ZooKeeper's scalability
+    + [Dynamic Reconfiguration](zookeeperReconfig.html) - a guide on how to use dynamic reconfiguration in ZooKeeper
++ **Contributors**
+    Documents for Developers Contributing to the ZooKeeper Open Source Project
+    + [ZooKeeper Internals](zookeeperInternals.html) - assorted topics on the inner workings of ZooKeeper
++ **Miscellaneous ZooKeeper Documentation**
+    + [Wiki](https://cwiki.apache.org/confluence/display/ZOOKEEPER)
+    + [FAQ](https://cwiki.apache.org/confluence/display/ZOOKEEPER/FAQ)
+

+ 627 - 0
zookeeper-docs/src/main/resources/markdown/javaExample.md

@@ -0,0 +1,627 @@
+<!--
+Copyright 2002-2004 The Apache Software Foundation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+//-->
+
+# ZooKeeper Java Example
+
+* [A Simple Watch Client](#ch_Introduction)
+    * [Requirements](#sc_requirements)
+    * [Program Design](#sc_design)
+* [The Executor Class](#sc_executor)
+* [The DataMonitor Class](#sc_DataMonitor)
+* [Complete Source Listings](#sc_completeSourceCode)
+
+<a name="ch_Introduction"></a>
+
+## A Simple Watch Client
+
+To introduce you to the ZooKeeper Java API, we develop here a very simple
+watch client. This ZooKeeper client watches a ZooKeeper node for changes
+and responds to by starting or stopping a program.
+
+<a name="sc_requirements"></a>
+
+### Requirements
+
+The client has four requirements:
+
+* It takes as parameters:
+  * the address of the ZooKeeper service
+  * the name of a znode - the one to be watched
+  * the name of a file to write the output to
+  * an executable with arguments.
+* It fetches the data associated with the znode and starts the executable.
+* If the znode changes, the client refetches the contents and restarts the executable.
+* If the znode disappears, the client kills the executable.
+
+<a name="sc_design"></a>
+
+### Program Design
+
+Conventionally, ZooKeeper applications are broken into two units, one which maintains the connection,
+and the other which monitors data.  In this application, the class called the **Executor**
+maintains the ZooKeeper connection, and the class called the  **DataMonitor** monitors the data
+in the ZooKeeper tree. Also, Executor contains the main thread and contains the execution logic.
+It is responsible for what little user interaction there is, as well as interaction with the exectuable program you
+pass in as an argument and which the sample (per the requirements) shuts down and restarts, according to the
+state of the znode.
+
+<a name="sc_executor"></a>
+
+## The Executor Class
+
+The Executor object is the primary container of the sample application. It contains
+both the **ZooKeeper** object, **DataMonitor**, as described above in
+[Program Design](#sc_design).
+
+
+    // from the Executor class...
+
+    public static void main(String[] args) {
+        if (args.length < 4) {
+            System.err
+                    .println("USAGE: Executor hostPort znode filename program [args ...]");
+            System.exit(2);
+        }
+        String hostPort = args[0];
+        String znode = args[1];
+        String filename = args[2];
+        String exec[] = new String[args.length - 3];
+        System.arraycopy(args, 3, exec, 0, exec.length);
+        try {
+            new Executor(hostPort, znode, filename, exec).run();
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    public Executor(String hostPort, String znode, String filename,
+            String exec[]) throws KeeperException, IOException {
+        this.filename = filename;
+        this.exec = exec;
+        zk = new ZooKeeper(hostPort, 3000, this);
+        dm = new DataMonitor(zk, znode, null, this);
+    }
+
+    public void run() {
+        try {
+            synchronized (this) {
+                while (!dm.dead) {
+                    wait();
+                }
+            }
+        } catch (InterruptedException e) {
+        }
+    }
+
+
+Recall that the Executor's job is to start and stop the executable whose name you pass in on the command line.
+It does this in response to events fired by the ZooKeeper object. As you can see in the code above, the Executor passes
+a reference to itself as the Watcher argument in the ZooKeeper constructor. It also passes a reference to itself
+as DataMonitorListener argument to the DataMonitor constructor. Per the Executor's definition, it implements both these
+interfaces:
+
+    public class Executor implements Watcher, Runnable, DataMonitor.DataMonitorListener {
+    ...
+
+
+The **Watcher** interface is defined by the ZooKeeper Java API.
+ZooKeeper uses it to communicate back to its container. It supports only one method, `process()`, and ZooKeeper uses
+it to communciates generic events that the main thread would be intersted in, such as the state of the ZooKeeper connection or the ZooKeeper session.The Executor
+in this example simply forwards those events down to the DataMonitor to decide what to do with them. It does this simply to illustrate
+the point that, by convention, the Executor or some Executor-like object "owns" the ZooKeeper connection, but it is free to delegate the events to other
+events to other objects. It also uses this as the default channel on which to fire watch events. (More on this later.)
+
+
+    public void process(WatchedEvent event) {
+        dm.process(event);
+    }
+
+
+The **DataMonitorListener**
+interface, on the other hand, is not part of the the ZooKeeper API. It is a completely custom interface,
+designed for this sample application. The DataMonitor object uses it to communicate back to its container, which
+is also the the Executor object.The DataMonitorListener interface looks like this:
+
+
+    public interface DataMonitorListener {
+        /**
+        * The existence status of the node has changed.
+        */
+        void exists(byte data[]);
+
+        /**
+        * The ZooKeeper session is no longer valid.
+        *
+        * @param rc
+        * the ZooKeeper reason code
+        */
+        void closing(int rc);
+    }
+
+
+This interface is defined in the DataMonitor class and implemented in the Executor class.
+When `Executor.exists()` is invoked,
+the Executor decides whether to start up or shut down per the requirements. Recall that the requires say to kill the executable when the
+znode ceases to _exist_.
+
+When `Executor.closing()`
+is invoked, the Executor decides whether or not to shut itself down in response to the ZooKeeper connection permanently disappearing.
+
+As you might have guessed, DataMonitor is the object that invokes
+these methods, in response to changes in ZooKeeper's state.
+
+Here are Executor's implementation of
+`DataMonitorListener.exists()` and `DataMonitorListener.closing`:
+
+
+    public void exists( byte[] data ) {
+        if (data == null) {
+            if (child != null) {
+                System.out.println("Killing process");
+                child.destroy();
+                try {
+                    child.waitFor();
+                } catch (InterruptedException e) {
+               }
+            }
+            child = null;
+        } else {
+            if (child != null) {
+                System.out.println("Stopping child");
+                child.destroy();
+                try {
+                   child.waitFor();
+                } catch (InterruptedException e) {
+                e.printStackTrace();
+                }
+            }
+            try {
+                FileOutputStream fos = new FileOutputStream(filename);
+                fos.write(data);
+                fos.close();
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
+            try {
+                System.out.println("Starting child");
+                child = Runtime.getRuntime().exec(exec);
+                new StreamWriter(child.getInputStream(), System.out);
+                new StreamWriter(child.getErrorStream(), System.err);
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
+        }
+    }
+
+    public void closing(int rc) {
+        synchronized (this) {
+            notifyAll();
+        }
+    }
+
+
+<a name="sc_DataMonitor"></a>
+
+## The DataMonitor Class
+
+The DataMonitor class has the meat of the ZooKeeper logic. It is mostly
+asynchronous and event driven. DataMonitor kicks things off in the constructor with:
+
+
+    public DataMonitor(ZooKeeper zk, String znode, Watcher chainedWatcher,
+            DataMonitorListener listener) {
+        this.zk = zk;
+        this.znode = znode;
+        this.chainedWatcher = chainedWatcher;
+        this.listener = listener;
+
+        // Get things started by checking if the node exists. We are going
+        // to be completely event driven
+
+
+The call to `ZooKeeper.exists()` checks for the existence of the znode,
+sets a watch, and passes a reference to itself (`this`)
+as the completion callback object. In this sense, it kicks things off, since the
+real processing happens when the watch is triggered.
+
+######Note
+
+>Don't confuse the completion callback with the watch callback. The `ZooKeeper.exists()`
+completion callback, which happens to be the method `StatCallback.processResult()` implemented
+in the DataMonitor object, is invoked when the asynchronous _setting of the watch_ operation
+(by `ZooKeeper.exists()`) completes on the server.
+
+>The triggering of the watch, on the other hand, sends an event to the _Executor_ object, since
+the Executor registered as the Watcher of the ZooKeeper object.
+
+>As an aside, you might note that the DataMonitor could also register itself as the Watcher
+for this particular watch event. This is new to ZooKeeper 3.0.0 (the support of multiple Watchers). In this
+example, however, DataMonitor does not register as the Watcher.
+
+When the `ZooKeeper.exists()` operation completes on the server, the ZooKeeper API invokes this completion callback on
+the client:
+
+
+    public void processResult(int rc, String path, Object ctx, Stat stat) {
+        boolean exists;
+        switch (rc) {
+        case Code.Ok:
+            exists = true;
+            break;
+        case Code.NoNode:
+            exists = false;
+            break;
+        case Code.SessionExpired:
+        case Code.NoAuth:
+            dead = true;
+            listener.closing(rc);
+            return;
+        default:
+            // Retry errors
+            zk.exists(znode, true, this, null);
+            return;
+        }
+
+        byte b[] = null;
+        if (exists) {
+            try {
+                b = zk.getData(znode, false, null);
+            } catch (KeeperException e) {
+                // We don't need to worry about recovering now. The watch
+                // callbacks will kick off any exception handling
+                e.printStackTrace();
+            } catch (InterruptedException e) {
+                return;
+            }
+        }     
+        if ((b == null &amp;&amp; b != prevData)
+            || (b != null &amp;&amp; !Arrays.equals(prevData, b))) {
+            listener.exists(b);</emphasis>
+            prevData = b;
+        }
+    }
+
+
+The code first checks the error codes for znode existence, fatal errors, and
+recoverable errors. If the file (or znode) exists, it gets the data from the znode, and
+then invoke the exists() callback of Executor if the state has changed. Note,
+it doesn't have to do any Exception processing for the getData call because it
+has watches pending for anything that could cause an error: if the node is deleted
+before it calls `ZooKeeper.getData()`, the watch event set by
+the `ZooKeeper.exists()` triggers a callback;
+if there is a communication error, a connection watch event fires when
+the connection comes back up.
+
+Finally, notice how DataMonitor processes watch events:
+
+
+    public void process(WatchedEvent event) {
+        String path = event.getPath();
+        if (event.getType() == Event.EventType.None) {
+            // We are are being told that the state of the
+            // connection has changed
+            switch (event.getState()) {
+            case SyncConnected:
+                // In this particular example we don't need to do anything
+                // here - watches are automatically re-registered with
+                // server and any watches triggered while the client was
+                // disconnected will be delivered (in order of course)
+                break;
+            case Expired:
+                // It's all over
+                dead = true;
+                listener.closing(KeeperException.Code.SessionExpired);
+                break;
+            }
+        } else {
+            if (path != null && path.equals(znode)) {
+                // Something has changed on the node, let's find out
+                zk.exists(znode, true, this, null);
+            }
+        }
+        if (chainedWatcher != null) {
+            chainedWatcher.process(event);
+        }
+    }
+
+
+If the client-side ZooKeeper libraries can re-establish the
+communication channel (SyncConnected event) to ZooKeeper before
+session expiration (Expired event) all of the session's watches will
+automatically be re-established with the server (auto-reset of watches
+is new in ZooKeeper 3.0.0). See [ZooKeeper Watches](zookeeperProgrammers.html#ch_zkWatches)
+in the programmer guide for more on this. A bit lower down in this
+function, when DataMonitor gets an event for a znode, it calls`ZooKeeper.exists()` to find out what has changed.
+
+<a name="sc_completeSourceCode"></a>
+
+## Complete Source Listings
+
+### Executor.java
+
+
+    /**
+     * A simple example program to use DataMonitor to start and
+     * stop executables based on a znode. The program watches the
+     * specified znode and saves the data that corresponds to the
+     * znode in the filesystem. It also starts the specified program
+     * with the specified arguments when the znode exists and kills
+     * the program if the znode goes away.
+     */
+    import java.io.FileOutputStream;
+    import java.io.IOException;
+    import java.io.InputStream;
+    import java.io.OutputStream;
+
+    import org.apache.zookeeper.KeeperException;
+    import org.apache.zookeeper.WatchedEvent;
+    import org.apache.zookeeper.Watcher;
+    import org.apache.zookeeper.ZooKeeper;
+
+    public class Executor
+        implements Watcher, Runnable, DataMonitor.DataMonitorListener
+    {
+        String znode;
+        DataMonitor dm;
+        ZooKeeper zk;
+        String filename;
+        String exec[];
+        Process child;
+
+        public Executor(String hostPort, String znode, String filename,
+                String exec[]) throws KeeperException, IOException {
+            this.filename = filename;
+            this.exec = exec;
+            zk = new ZooKeeper(hostPort, 3000, this);
+            dm = new DataMonitor(zk, znode, null, this);
+        }
+
+        /**
+         * @param args
+         */
+        public static void main(String[] args) {
+            if (args.length < 4) {
+                System.err
+                        .println("USAGE: Executor hostPort znode filename program [args ...]");
+                System.exit(2);
+            }
+            String hostPort = args[0];
+            String znode = args[1];
+            String filename = args[2];
+            String exec[] = new String[args.length - 3];
+            System.arraycopy(args, 3, exec, 0, exec.length);
+            try {
+                new Executor(hostPort, znode, filename, exec).run();
+            } catch (Exception e) {
+                e.printStackTrace();
+            }
+        }
+
+        /***************************************************************************
+         * We do process any events ourselves, we just need to forward them on.
+         *
+         * @see org.apache.zookeeper.Watcher#process(org.apache.zookeeper.proto.WatcherEvent)
+         */
+        public void process(WatchedEvent event) {
+            dm.process(event);
+        }
+
+        public void run() {
+            try {
+                synchronized (this) {
+                    while (!dm.dead) {
+                        wait();
+                    }
+                }
+            } catch (InterruptedException e) {
+            }
+        }
+
+        public void closing(int rc) {
+            synchronized (this) {
+                notifyAll();
+            }
+        }
+
+        static class StreamWriter extends Thread {
+            OutputStream os;
+
+            InputStream is;
+
+            StreamWriter(InputStream is, OutputStream os) {
+                this.is = is;
+                this.os = os;
+                start();
+            }
+
+            public void run() {
+                byte b[] = new byte[80];
+                int rc;
+                try {
+                    while ((rc = is.read(b)) > 0) {
+                        os.write(b, 0, rc);
+                    }
+                } catch (IOException e) {
+                }
+
+            }
+        }
+
+        public void exists(byte[] data) {
+            if (data == null) {
+                if (child != null) {
+                    System.out.println("Killing process");
+                    child.destroy();
+                    try {
+                        child.waitFor();
+                    } catch (InterruptedException e) {
+                    }
+                }
+                child = null;
+            } else {
+                if (child != null) {
+                    System.out.println("Stopping child");
+                    child.destroy();
+                    try {
+                        child.waitFor();
+                    } catch (InterruptedException e) {
+                        e.printStackTrace();
+                    }
+                }
+                try {
+                    FileOutputStream fos = new FileOutputStream(filename);
+                    fos.write(data);
+                    fos.close();
+                } catch (IOException e) {
+                    e.printStackTrace();
+                }
+                try {
+                    System.out.println("Starting child");
+                    child = Runtime.getRuntime().exec(exec);
+                    new StreamWriter(child.getInputStream(), System.out);
+                    new StreamWriter(child.getErrorStream(), System.err);
+                } catch (IOException e) {
+                    e.printStackTrace();
+                }
+            }
+        }
+    }
+
+
+### DataMonitor.java
+
+
+    /**
+     * A simple class that monitors the data and existence of a ZooKeeper
+     * node. It uses asynchronous ZooKeeper APIs.
+     */
+    import java.util.Arrays;
+
+    import org.apache.zookeeper.KeeperException;
+    import org.apache.zookeeper.WatchedEvent;
+    import org.apache.zookeeper.Watcher;
+    import org.apache.zookeeper.ZooKeeper;
+    import org.apache.zookeeper.AsyncCallback.StatCallback;
+    import org.apache.zookeeper.KeeperException.Code;
+    import org.apache.zookeeper.data.Stat;
+
+    public class DataMonitor implements Watcher, StatCallback {
+
+        ZooKeeper zk;
+        String znode;
+        Watcher chainedWatcher;
+        boolean dead;
+        DataMonitorListener listener;
+        byte prevData[];
+
+        public DataMonitor(ZooKeeper zk, String znode, Watcher chainedWatcher,
+                DataMonitorListener listener) {
+            this.zk = zk;
+            this.znode = znode;
+            this.chainedWatcher = chainedWatcher;
+            this.listener = listener;
+            // Get things started by checking if the node exists. We are going
+            // to be completely event driven
+            zk.exists(znode, true, this, null);
+        }
+
+        /**
+         * Other classes use the DataMonitor by implementing this method
+         */
+        public interface DataMonitorListener {
+            /**
+             * The existence status of the node has changed.
+             */
+            void exists(byte data[]);
+
+            /**
+             * The ZooKeeper session is no longer valid.
+             *
+             * @param rc
+             *                the ZooKeeper reason code
+             */
+            void closing(int rc);
+        }
+
+        public void process(WatchedEvent event) {
+            String path = event.getPath();
+            if (event.getType() == Event.EventType.None) {
+                // We are are being told that the state of the
+                // connection has changed
+                switch (event.getState()) {
+                case SyncConnected:
+                    // In this particular example we don't need to do anything
+                    // here - watches are automatically re-registered with
+                    // server and any watches triggered while the client was
+                    // disconnected will be delivered (in order of course)
+                    break;
+                case Expired:
+                    // It's all over
+                    dead = true;
+                    listener.closing(KeeperException.Code.SessionExpired);
+                    break;
+                }
+            } else {
+                if (path != null && path.equals(znode)) {
+                    // Something has changed on the node, let's find out
+                    zk.exists(znode, true, this, null);
+                }
+            }
+            if (chainedWatcher != null) {
+                chainedWatcher.process(event);
+            }
+        }
+
+        public void processResult(int rc, String path, Object ctx, Stat stat) {
+            boolean exists;
+            switch (rc) {
+            case Code.Ok:
+                exists = true;
+                break;
+            case Code.NoNode:
+                exists = false;
+                break;
+            case Code.SessionExpired:
+            case Code.NoAuth:
+                dead = true;
+                listener.closing(rc);
+                return;
+            default:
+                // Retry errors
+                zk.exists(znode, true, this, null);
+                return;
+            }
+
+            byte b[] = null;
+            if (exists) {
+                try {
+                    b = zk.getData(znode, false, null);
+                } catch (KeeperException e) {
+                    // We don't need to worry about recovering now. The watch
+                    // callbacks will kick off any exception handling
+                    e.printStackTrace();
+                } catch (InterruptedException e) {
+                    return;
+                }
+            }
+            if ((b == null && b != prevData)
+                    || (b != null && !Arrays.equals(prevData, b))) {
+                listener.exists(b);
+                prevData = b;
+            }
+        }
+    }
+

+ 416 - 0
zookeeper-docs/src/main/resources/markdown/recipes.md

@@ -0,0 +1,416 @@
+<!--
+Copyright 2002-2004 The Apache Software Foundation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+//-->
+
+# ZooKeeper Recipes and Solutions
+
+* [A Guide to Creating Higher-level Constructs with ZooKeeper](#ch_recipes)
+    * [Important Note About Error Handling](#sc_recipes_errorHandlingNote)
+    * [Out of the Box Applications: Name Service, Configuration, Group Membership](#sc_outOfTheBox)
+    * [Barriers](#sc_recipes_eventHandles)
+        * [Double Barriers](#sc_doubleBarriers)
+    * [Queues](#sc_recipes_Queues)
+        * [Priority Queues](#sc_recipes_priorityQueues)
+    * [Locks](#sc_recipes_Locks)
+        * [Recoverable Errors and the GUID](#sc_recipes_GuidNote)
+        * [Shared Locks](#Shared+Locks)
+        * [Revocable Shared Locks](#sc_revocableSharedLocks)
+    * [Two-phased Commit](#sc_recipes_twoPhasedCommit)
+    * [Leader Election](#sc_leaderElection)
+
+<a name="ch_recipes"></a>
+
+## A Guide to Creating Higher-level Constructs with ZooKeeper
+
+In this article, you'll find guidelines for using
+ZooKeeper to implement higher order functions. All of them are conventions
+implemented at the client and do not require special support from
+ZooKeeper. Hopfully the community will capture these conventions in client-side libraries
+to ease their use and to encourage standardization.
+
+One of the most interesting things about ZooKeeper is that even
+though ZooKeeper uses _asynchronous_ notifications, you
+can use it to build _synchronous_ consistency
+primitives, such as queues and locks. As you will see, this is possible
+because ZooKeeper imposes an overall order on updates, and has mechanisms
+to expose this ordering.
+
+Note that the recipes below attempt to employ best practices. In
+particular, they avoid polling, timers or anything else that would result
+in a "herd effect", causing bursts of traffic and limiting
+scalability.
+
+There are many useful functions that can be imagined that aren't
+included here - revocable read-write priority locks, as just one example.
+And some of the constructs mentioned here - locks, in particular -
+illustrate certain points, even though you may find other constructs, such
+as event handles or queues, a more practical means of performing the same
+function. In general, the examples in this section are designed to
+stimulate thought.
+
+<a name="sc_recipes_errorHandlingNote"></a>
+
+### Important Note About Error Handling
+
+When implementing the recipes you must handle recoverable exceptions
+(see the [FAQ](https://cwiki.apache.org/confluence/display/ZOOKEEPER/FAQ)). In
+particular, several of the recipes employ sequential ephemeral
+nodes. When creating a sequential ephemeral node there is an error case in
+which the create() succeeds on the server but the server crashes before
+returning the name of the node to the client. When the client reconnects its
+session is still valid and, thus, the node is not removed. The implication is
+that it is difficult for the client to know if its node was created or not. The
+recipes below include measures to handle this.
+
+<a name="sc_outOfTheBox"></a>
+
+### Out of the Box Applications: Name Service, Configuration, Group Membership
+
+Name service and configuration are two of the primary applications
+of ZooKeeper. These two functions are provided directly by the ZooKeeper
+API.
+
+Another function directly provided by ZooKeeper is _group
+membership_. The group is represented by a node. Members of the
+group create ephemeral nodes under the group node. Nodes of the members
+that fail abnormally will be removed automatically when ZooKeeper detects
+the failure.
+
+<a name="sc_recipes_eventHandles"></a>
+
+### Barriers
+
+Distributed systems use _barriers_
+to block processing of a set of nodes until a condition is met
+at which time all the nodes are allowed to proceed. Barriers are
+implemented in ZooKeeper by designating a barrier node. The
+barrier is in place if the barrier node exists. Here's the
+pseudo code:
+
+1. Client calls the ZooKeeper API's **exists()** function on the barrier node, with
+  _watch_ set to true.
+1. If **exists()** returns false, the
+  barrier is gone and the client proceeds
+1. Else, if **exists()** returns true,
+  the clients wait for a watch event from ZooKeeper for the barrier
+  node.
+1. When the watch event is triggered, the client reissues the
+  **exists( )** call, again waiting until
+  the barrier node is removed.
+
+<a name="sc_doubleBarriers"></a>
+
+#### Double Barriers
+
+Double barriers enable clients to synchronize the beginning and
+the end of a computation. When enough processes have joined the barrier,
+processes start their computation and leave the barrier once they have
+finished. This recipe shows how to use a ZooKeeper node as a
+barrier.
+
+The pseudo code in this recipe represents the barrier node as
+_b_. Every client process _p_
+registers with the barrier node on entry and unregisters when it is
+ready to leave. A node registers with the barrier node via the **Enter** procedure below, it waits until
+_x_ client process register before proceeding with
+the computation. (The _x_ here is up to you to
+determine for your system.)
+
+| **Enter**                         | **Leave**                     |
+|-----------------------------------|-------------------------------|
+| 1. Create a name __n_ = _b_+“/”+_p__ | 1. **L = getChildren(b, false)** |
+| 2. Set watch: **exists(_b_ + ‘‘/ready’’, true)** | 2. if no children, exit |
+| 3. Create child: **create(_n_, EPHEMERAL)**  | 3. if _p_ is only process node in L, delete(n) and exit |
+| 4. **L = getChildren(b, false)**  | 4. if _p_ is the lowest process node in L, wait on highest process node in L |
+| 5. if fewer children in L than_x_, wait for watch event  | 5. else **delete(_n_)**if still exists and wait on lowest process node in L |
+| 6. else **create(b + ‘‘/ready’’, REGULAR)** | 6. goto 1 |
+
+On entering, all processes watch on a ready node and
+create an ephemeral node as a child of the barrier node. Each process
+but the last enters the barrier and waits for the ready node to appear
+at line 5. The process that creates the xth node, the last process, will
+see x nodes in the list of children and create the ready node, waking up
+the other processes. Note that waiting processes wake up only when it is
+time to exit, so waiting is efficient.
+
+On exit, you can't use a flag such as _ready_
+because you are watching for process nodes to go away. By using
+ephemeral nodes, processes that fail after the barrier has been entered
+do not prevent correct processes from finishing. When processes are
+ready to leave, they need to delete their process nodes and wait for all
+other processes to do the same.
+
+Processes exit when there are no process nodes left as children of
+_b_. However, as an efficiency, you can use the
+lowest process node as the ready flag. All other processes that are
+ready to exit watch for the lowest existing process node to go away, and
+the owner of the lowest process watches for any other process node
+(picking the highest for simplicity) to go away. This means that only a
+single process wakes up on each node deletion except for the last node,
+which wakes up everyone when it is removed.
+
+<a name="sc_recipes_Queues"></a>
+
+### Queues
+
+Distributed queues are a common data structure. To implement a
+distributed queue in ZooKeeper, first designate a znode to hold the queue,
+the queue node. The distributed clients put something into the queue by
+calling create() with a pathname ending in "queue-", with the
+_sequence_ and _ephemeral_ flags in
+the create() call set to true. Because the _sequence_
+flag is set, the new pathnames will have the form
+_path-to-queue-node_/queue-X, where X is a monotonic increasing number. A
+client that wants to be removed from the queue calls ZooKeeper's **getChildren( )** function, with
+_watch_ set to true on the queue node, and begins
+processing nodes with the lowest number. The client does not need to issue
+another **getChildren( )** until it exhausts
+the list obtained from the first **getChildren(
+)** call. If there are are no children in the queue node, the
+reader waits for a watch notification to check the queue again.
+
+######Note
+>There now exists a Queue implementation in ZooKeeper
+recipes directory. This is distributed with the release --
+zookeeper-recipes/zookeeper-recipes-queue directory of the release artifact.
+
+<a name="sc_recipes_priorityQueues"></a>
+
+#### Priority Queues
+
+To implement a priority queue, you need only make two simple
+changes to the generic [queue
+recipe](#sc_recipes_Queues) . First, to add to a queue, the pathname ends with
+"queue-YY" where YY is the priority of the element with lower numbers
+representing higher priority (just like UNIX). Second, when removing
+from the queue, a client uses an up-to-date children list meaning that
+the client will invalidate previously obtained children lists if a watch
+notification triggers for the queue node.
+
+<a name="sc_recipes_Locks"></a>
+
+### Locks
+
+Fully distributed locks that are globally synchronous, meaning at
+any snapshot in time no two clients think they hold the same lock. These
+can be implemented using ZooKeeeper. As with priority queues, first define
+a lock node.
+
+######Note
+>There now exists a Lock implementation in ZooKeeper
+recipes directory. This is distributed with the release --
+zookeeper-recipes/zookeeper-recipes-lock directory of the release artifact.
+
+Clients wishing to obtain a lock do the following:
+
+1. Call **create( )** with a pathname
+  of "_locknode_/guid-lock-" and the _sequence_ and
+  _ephemeral_ flags set. The _guid_
+  is needed in case the create() result is missed. See the note below.
+1. Call **getChildren( )** on the lock
+  node _without_ setting the watch flag (this is
+  important to avoid the herd effect).
+1. If the pathname created in step **1** has the lowest sequence number suffix, the
+  client has the lock and the client exits the protocol.
+1. The client calls **exists( )** with
+  the watch flag set on the path in the lock directory with the next
+  lowest sequence number.
+1. if **exists( )** returns false, go
+  to step **2**. Otherwise, wait for a
+  notification for the pathname from the previous step before going to
+  step **2**.
+
+The unlock protocol is very simple: clients wishing to release a
+lock simply delete the node they created in step 1.
+
+Here are a few things to notice:
+
+* The removal of a node will only cause one client to wake up
+  since each node is watched by exactly one client. In this way, you
+  avoid the herd effect.
+
+* There is no polling or timeouts.
+
+* Because of the way you implement locking, it is easy to see the
+  amount of lock contention, break locks, debug locking problems,
+  etc.
+
+<a name="sc_recipes_GuidNote"></a>
+
+#### Recoverable Errors and the GUID
+
+* If a recoverable error occurs calling **create()** the
+  client should call **getChildren()** and check for a node
+  containing the _guid_ used in the path name.
+  This handles the case (noted [above](#sc_recipes_errorHandlingNote)) of
+  the create() succeeding on the server but the server crashing before returning the name
+  of the new node.
+
+<a name="Shared+Locks"></a>
+
+#### Shared Locks
+
+You can implement shared locks by with a few changes to the lock
+protocol:
+
+| **Obtaining a read lock:** | **Obtaining a write lock:** |
+|----------------------------|-----------------------------|
+| 1. Call **create( )** to create a node with pathname "*guid-/read-*". This is the lock node use later in the protocol. Make sure to set both the _sequence_ and _ephemeral_ flags. | 1. Call **create( )** to create a node with pathname "*guid-/write-*". This is the lock node spoken of later in the protocol. Make sure to set both _sequence_ and _ephemeral_ flags. |
+| 2. Call **getChildren( )** on the lock node _without_ setting the _watch_ flag - this is important, as it avoids the herd effect. | 2. Call **getChildren( )** on the lock node _without_ setting the _watch_ flag - this is important, as it avoids the herd effect. |
+| 3. If there are no children with a pathname starting with "*write-*" and having a lower sequence number than the node created in step **1**, the client has the lock and can exit the protocol. | 3. If there are no children with a lower sequence number than the node created in step **1**, the client has the lock and the client exits the protocol. |
+| 4. Otherwise, call **exists( )**, with _watch_ flag, set on the node in lock directory with pathname staring with "*write-*" having the next lowest sequence number. | 4. Call **exists( ),** with _watch_ flag set, on the node with the pathname that has the next lowest sequence number. |
+| 5. If **exists( )** returns _false_, goto step **2**. | 5. If **exists( )** returns _false_, goto step **2**. Otherwise, wait for a notification for the pathname from the previous step before going to step **2**. |
+| 6. Otherwise, wait for a notification for the pathname from the previous step before going to step **2** |  |
+
+Notes:
+
+* It might appear that this recipe creates a herd effect:
+  when there is a large group of clients waiting for a read
+  lock, and all getting notified more or less simultaneously
+  when the "*write-*" node with the lowest
+  sequence number is deleted. In fact. that's valid behavior:
+  as all those waiting reader clients should be released since
+  they have the lock. The herd effect refers to releasing a
+  "herd" when in fact only a single or a small number of
+  machines can proceed.
+
+* See the [note for Locks](#sc_recipes_GuidNote) on how to use the guid in the node.
+
+<a name="sc_revocableSharedLocks"></a>
+
+#### Revocable Shared Locks
+
+With minor modifications to the Shared Lock protocol, you make
+shared locks revocable by modifying the shared lock protocol:
+
+In step **1**, of both obtain reader
+and writer lock protocols, call **getData(
+)** with _watch_ set, immediately after the
+call to **create( )**. If the client
+subsequently receives notification for the node it created in step
+**1**, it does another **getData( )** on that node, with
+_watch_ set and looks for the string "unlock", which
+signals to the client that it must release the lock. This is because,
+according to this shared lock protocol, you can request the client with
+the lock give up the lock by calling **setData()** on the lock node, writing "unlock" to that node.
+
+Note that this protocol requires the lock holder to consent to
+releasing the lock. Such consent is important, especially if the lock
+holder needs to do some processing before releasing the lock. Of course
+you can always implement _Revocable Shared Locks with Freaking
+Laser Beams_ by stipulating in your protocol that the revoker
+is allowed to delete the lock node if after some length of time the lock
+isn't deleted by the lock holder.
+
+<a name="sc_recipes_twoPhasedCommit"></a>
+
+### Two-phased Commit
+
+A two-phase commit protocol is an algorithm that lets all clients in
+a distributed system agree either to commit a transaction or abort.
+
+In ZooKeeper, you can implement a two-phased commit by having a
+coordinator create a transaction node, say "/app/Tx", and one child node
+per participating site, say "/app/Tx/s_i". When coordinator creates the
+child node, it leaves the content undefined. Once each site involved in
+the transaction receives the transaction from the coordinator, the site
+reads each child node and sets a watch. Each site then processes the query
+and votes "commit" or "abort" by writing to its respective node. Once the
+write completes, the other sites are notified, and as soon as all sites
+have all votes, they can decide either "abort" or "commit". Note that a
+node can decide "abort" earlier if some site votes for "abort".
+
+An interesting aspect of this implementation is that the only role
+of the coordinator is to decide upon the group of sites, to create the
+ZooKeeper nodes, and to propagate the transaction to the corresponding
+sites. In fact, even propagating the transaction can be done through
+ZooKeeper by writing it in the transaction node.
+
+There are two important drawbacks of the approach described above.
+One is the message complexity, which is O(n²). The second is the
+impossibility of detecting failures of sites through ephemeral nodes. To
+detect the failure of a site using ephemeral nodes, it is necessary that
+the site create the node.
+
+To solve the first problem, you can have only the coordinator
+notified of changes to the transaction nodes, and then notify the sites
+once coordinator reaches a decision. Note that this approach is scalable,
+but it's is slower too, as it requires all communication to go through the
+coordinator.
+
+To address the second problem, you can have the coordinator
+propagate the transaction to the sites, and have each site creating its
+own ephemeral node.
+
+<a name="sc_leaderElection"></a>
+
+### Leader Election
+
+A simple way of doing leader election with ZooKeeper is to use the
+**SEQUENCE|EPHEMERAL** flags when creating
+znodes that represent "proposals" of clients. The idea is to have a znode,
+say "/election", such that each znode creates a child znode "/election/guid-n_"
+with both flags SEQUENCE|EPHEMERAL. With the sequence flag, ZooKeeper
+automatically appends a sequence number that is greater than any one
+previously appended to a child of "/election". The process that created
+the znode with the smallest appended sequence number is the leader.
+
+That's not all, though. It is important to watch for failures of the
+leader, so that a new client arises as the new leader in the case the
+current leader fails. A trivial solution is to have all application
+processes watching upon the current smallest znode, and checking if they
+are the new leader when the smallest znode goes away (note that the
+smallest znode will go away if the leader fails because the node is
+ephemeral). But this causes a herd effect: upon a failure of the current
+leader, all other processes receive a notification, and execute
+getChildren on "/election" to obtain the current list of children of
+"/election". If the number of clients is large, it causes a spike on the
+number of operations that ZooKeeper servers have to process. To avoid the
+herd effect, it is sufficient to watch for the next znode down on the
+sequence of znodes. If a client receives a notification that the znode it
+is watching is gone, then it becomes the new leader in the case that there
+is no smaller znode. Note that this avoids the herd effect by not having
+all clients watching the same znode.
+
+Here's the pseudo code:
+
+Let ELECTION be a path of choice of the application. To volunteer to
+be a leader:
+
+1. Create znode z with path "ELECTION/guid-n_" with both SEQUENCE and
+  EPHEMERAL flags;
+1. Let C be the children of "ELECTION", and i be the sequence
+  number of z;
+1. Watch for changes on "ELECTION/guid-n_j", where j is the largest
+  sequence number such that j < i and n_j is a znode in C;
+
+Upon receiving a notification of znode deletion:
+
+1. Let C be the new set of children of ELECTION;
+1. If z is the smallest node in C, then execute leader
+  procedure;
+1. Otherwise, watch for changes on "ELECTION/guid-n_j", where j is the
+  largest sequence number such that j < i and n_j is a znode in C;
+
+Notes:
+
+* Note that the znode having no preceding znode on the list of
+  children does not imply that the creator of this znode is aware that it is
+  the current leader. Applications may consider creating a separate znode
+  to acknowledge that the leader has executed the leader procedure.
+
+* See the [note for Locks](#sc_recipes_GuidNote) on how to use the guid in the node.
+
+

+ 167 - 0
zookeeper-docs/src/main/resources/markdown/skin/basic.css

@@ -0,0 +1,167 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+/**
+ * General
+ */
+
+img { border: 0; }
+
+#content table {
+  border: 0;
+  width: 100%;
+}
+/*Hack to get IE to render the table at 100%*/
+* html #content table { margin-left: -3px; }
+
+#content th,
+#content td {
+  margin: 0;
+  padding: 0;
+  vertical-align: top;
+}
+
+.clearboth {
+  clear: both;
+}
+
+.note, .warning, .fixme {
+  clear:right;
+  border: solid black 1px;
+  margin: 1em 3em;
+}
+
+.note .label {
+  background: #369;
+  color: white;
+  font-weight: bold;
+  padding: 5px 10px;
+}
+.note .content {
+  background: #F0F0FF;
+  color: black;
+  line-height: 120%;
+  font-size: 90%;
+  padding: 5px 10px;
+}
+.warning .label {
+  background: #C00;
+  color: white;
+  font-weight: bold;
+  padding: 5px 10px;
+}
+.warning .content {
+  background: #FFF0F0;
+  color: black;
+  line-height: 120%;
+  font-size: 90%;
+  padding: 5px 10px;
+}
+.fixme .label {
+  background: #C6C600;
+  color: black;
+  font-weight: bold;
+  padding: 5px 10px;
+}
+.fixme .content {
+  padding: 5px 10px;
+}
+
+/**
+ * Typography
+ */
+
+body {
+  font-family: verdana, "Trebuchet MS", arial, helvetica, sans-serif;
+  font-size: 100%;
+}
+
+#content {
+  font-family: Georgia, Palatino, Times, serif;
+  font-size: 95%;
+}
+#tabs {
+  font-size: 70%;
+}
+#menu {
+  font-size: 80%;
+}
+#footer {
+  font-size: 70%;
+}
+
+h1, h2, h3, h4, h5, h6 {
+  font-family: "Trebuchet MS", verdana, arial, helvetica, sans-serif;
+  font-weight: bold;
+  margin-top: 1em;
+  margin-bottom: .5em;
+}
+
+h1 {
+    margin-top: 0;
+    margin-bottom: 1em;
+  font-size: 1.4em;
+}
+#content h1 {
+  font-size: 160%;
+  margin-bottom: .5em;
+}
+#menu h1 {
+  margin: 0;
+  padding: 10px;
+  background: #336699;
+  color: white;
+}
+h2 { font-size: 120%; }
+h3 { font-size: 100%; }
+h4 { font-size: 90%; }
+h5 { font-size: 80%; }
+h6 { font-size: 75%; }
+
+p {
+  line-height: 120%;
+  text-align: left;
+  margin-top: .5em;
+  margin-bottom: 1em;
+}
+
+#content li,
+#content th,
+#content td,
+#content li ul,
+#content li ol{
+  margin-top: .5em;
+  margin-bottom: .5em;
+}
+
+
+#content li li,
+#minitoc-area li{
+  margin-top: 0em;
+  margin-bottom: 0em;
+}
+
+#content .attribution {
+  text-align: right;
+  font-style: italic;
+  font-size: 85%;
+  margin-top: 1em;
+}
+
+.codefrag {
+  font-family: "Courier New", Courier, monospace;
+  font-size: 110%;
+}

BIN
zookeeper-docs/src/main/resources/markdown/skin/chapter.gif


BIN
zookeeper-docs/src/main/resources/markdown/skin/chapter_open.gif


BIN
zookeeper-docs/src/main/resources/markdown/skin/current.gif


+ 40 - 0
zookeeper-docs/src/main/resources/markdown/skin/getBlank.js

@@ -0,0 +1,40 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+/**
+ * getBlank script - when included in a html file and called from a form text field, will set the value of this field to ""
+ * if the text value is still the standard value.
+ * getPrompt script - when included in a html file and called from a form text field, will set the value of this field to the prompt
+ * if the text value is empty.
+ *
+ * Typical usage:
+ * <script type="text/javascript" language="JavaScript" src="getBlank.js"></script>
+ * <input type="text" id="query" value="Search the site:" onFocus="getBlank (this, 'Search the site:');" onBlur="getBlank (this, 'Search the site:');"/>
+ */
+<!--
+function getBlank (form, stdValue){
+if (form.value == stdValue){
+	form.value = '';
+	}
+return true;
+}
+function getPrompt (form, stdValue){
+if (form.value == ''){
+	form.value = stdValue;
+	}
+return true;
+}
+//-->

+ 45 - 0
zookeeper-docs/src/main/resources/markdown/skin/getMenu.js

@@ -0,0 +1,45 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+/**
+ * This script, when included in a html file, can be used to make collapsible menus
+ *
+ * Typical usage:
+ * <script type="text/javascript" language="JavaScript" src="menu.js"></script>
+ */
+
+if (document.getElementById){ 
+  document.write('<style type="text/css">.menuitemgroup{display: none;}</style>')
+}
+
+
+function SwitchMenu(obj, thePath)
+{
+var open = 'url("'+thePath + 'chapter_open.gif")';
+var close = 'url("'+thePath + 'chapter.gif")';
+  if(document.getElementById)  {
+    var el = document.getElementById(obj);
+    var title = document.getElementById(obj+'Title');
+
+    if(el.style.display != "block"){ 
+      title.style.backgroundImage = open;
+      el.style.display = "block";
+    }else{
+      title.style.backgroundImage = close;
+      el.style.display = "none";
+    }
+  }// end -  if(document.getElementById) 
+}//end - function SwitchMenu(obj)

BIN
zookeeper-docs/src/main/resources/markdown/skin/header_white_line.gif


+ 57 - 0
zookeeper-docs/src/main/resources/markdown/skin/init.js

@@ -0,0 +1,57 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+/**
+ * This script, when included in a html file, can be used to make collapsible menus
+ *
+ * Typical usage:
+ * <script type="text/javascript" language="JavaScript" src="menu.js"></script>
+ */
+
+function getFileName(url){
+    var fileName = url.substring(url.lastIndexOf('/')+1);
+    return fileName;
+}
+
+function init(){
+    var url = window .location.pathname;
+    var fileName = getFileName(url);
+
+    var menuItemGroup = document.getElementById("menu").children;
+
+    for (i = 0; i < menuItemGroup.length; i++) {
+        if("menutitle" === menuItemGroup[i].className){
+            continue;
+        }
+        var menuItem = menuItemGroup[i].children;
+        if(menuItem.length>0){
+            for (j = 0; j < menuItem.length; j++) {
+                if(menuItem[j].firstElementChild != null){
+                    var linkItem = menuItem[j].firstElementChild;
+                    if('a' === linkItem.localName){
+                        var linkFile = getFileName(linkItem.href);
+                        if(fileName === linkFile && linkItem.href.lastIndexOf("api/index.html")<0){
+                            linkItem.className = "selected";
+                            linkItem.parentNode.parentNode.className = "selectedmenuitemgroup";
+                            var title = document.getElementById(linkItem.parentNode.parentNode.id+"Title");
+                            title.className="menutitle selected";
+                        }
+                    }
+                }
+            }
+        }
+    }
+}

BIN
zookeeper-docs/src/main/resources/markdown/skin/instruction_arrow.png


+ 48 - 0
zookeeper-docs/src/main/resources/markdown/skin/menu.js

@@ -0,0 +1,48 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+/**
+ * This script, when included in a html file, can be used to make collapsible menus
+ *
+ * Typical usage:
+ * <script type="text/javascript" language="JavaScript" src="menu.js"></script>
+ */
+
+if (document.getElementById){ 
+  document.write('<style type="text/css">.menuitemgroup{display: none;}</style>')
+}
+
+function SwitchMenu(obj)
+{
+  if(document.getElementById)  {
+    var el = document.getElementById(obj);
+    var title = document.getElementById(obj+'Title');
+
+    if(obj.indexOf("_selected_")==0&&el.style.display == ""){
+      el.style.display = "block";
+      title.className = "pagegroupselected";
+    }
+
+    if(el.style.display != "block"){
+      el.style.display = "block";
+      title.className = "pagegroupopen";
+    }
+    else{
+      el.style.display = "none";
+      title.className = "pagegroup";
+    }
+  }// end -  if(document.getElementById) 
+}//end - function SwitchMenu(obj)

BIN
zookeeper-docs/src/main/resources/markdown/skin/page.gif


+ 54 - 0
zookeeper-docs/src/main/resources/markdown/skin/print.css

@@ -0,0 +1,54 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+body {
+  font-family: Georgia, Palatino, serif;
+  font-size: 12pt;
+  background: white;
+}
+
+#tabs,
+#menu,
+#content .toc {
+  display: none;
+}
+
+#content {
+  width: auto;
+  padding: 0;
+  float: none !important;
+  color: black;
+  background: inherit;
+}
+
+a:link, a:visited {
+  color: #336699;
+  background: inherit;
+  text-decoration: underline;
+}
+
+#top .logo {
+  padding: 0;
+  margin: 0 0 2em 0;
+}
+
+#footer {
+  margin-top: 4em;
+}
+
+acronym {
+  border: 0;
+}

BIN
zookeeper-docs/src/main/resources/markdown/skin/printer.gif


+ 159 - 0
zookeeper-docs/src/main/resources/markdown/skin/profile.css

@@ -0,0 +1,159 @@
+
+
+/* ==================== aural ============================ */
+
+@media aural {
+  h1, h2, h3, h4, h5, h6 { voice-family: paul, male; stress: 20; richness: 90 }
+  h1 { pitch: x-low; pitch-range: 90 }
+  h2 { pitch: x-low; pitch-range: 80 }
+  h3 { pitch: low; pitch-range: 70 }
+  h4 { pitch: medium; pitch-range: 60 }
+  h5 { pitch: medium; pitch-range: 50 }
+  h6 { pitch: medium; pitch-range: 40 }
+  li, dt, dd { pitch: medium; richness: 60 }
+  dt { stress: 80 }
+  pre, code, tt { pitch: medium; pitch-range: 0; stress: 0; richness: 80 }
+  em { pitch: medium; pitch-range: 60; stress: 60; richness: 50 }
+  strong { pitch: medium; pitch-range: 60; stress: 90; richness: 90 }
+  dfn { pitch: high; pitch-range: 60; stress: 60 }
+  s, strike { richness: 0 }
+  i { pitch: medium; pitch-range: 60; stress: 60; richness: 50 }
+  b { pitch: medium; pitch-range: 60; stress: 90; richness: 90 }
+  u { richness: 0 }
+  
+  :link { voice-family: harry, male }
+  :visited { voice-family: betty, female }
+  :active { voice-family: betty, female; pitch-range: 80; pitch: x-high }
+}
+  
+#top          { background-color: #FFFFFF;}  
+ 
+#top .header .current { background-color: #4C6C8F;} 
+#top .header .current a:link {  color: #ffffff;  }
+#top .header .current a:visited { color: #ffffff; }
+#top .header .current a:hover { color: #ffffff; }
+ 
+#tabs li      { background-color: #E5E4D9 ;} 
+#tabs li a:link {  color: #000000;  }
+#tabs li a:visited { color: #000000; }
+#tabs li a:hover { color: #000000; }
+
+#level2tabs a.selected      { background-color: #4C6C8F ;} 
+#level2tabs a:link {  color: #ffffff;  }
+#level2tabs a:visited { color: #ffffff; }
+#level2tabs a:hover { color: #ffffff; }
+
+#level2tabs { background-color: #E5E4D9;}
+#level2tabs a.unselected:link {  color: #000000;  }
+#level2tabs a.unselected:visited { color: #000000; }
+#level2tabs a.unselected:hover { color: #000000; }
+
+.heading { background-color: #E5E4D9;} 
+
+.boxed { background-color: #E5E4D9;} 
+.underlined_5 	{border-bottom: solid 5px #E5E4D9;}
+.underlined_10 	{border-bottom: solid 10px #E5E4D9;}
+table caption { 
+background-color: #E5E4D9; 
+color: #000000;
+}
+    
+#feedback {
+color: #FFFFFF;
+background: #4C6C8F;
+text-align: center;
+}
+#feedback #feedbackto {
+color: #FFFFFF;
+}   
+
+#publishedStrip { 
+color: #FFFFFF;
+background: #4C6C8F; 
+}
+
+#publishedStrip { 
+color: #000000;
+background: #E5E4D9; 
+}
+
+#menu a.selected  { background-color: #CFDCED;
+  border-color: #999999;
+  color: #000000;}
+#menu a.selected:visited {  color: #000000;}
+
+#menu           { border-color: #999999;}
+#menu .menupageitemgroup  { border-color: #999999;}
+
+#menu      { background-color: #4C6C8F;} 
+#menu  {  color: #ffffff;} 
+#menu a:link {  color: #ffffff;} 
+#menu a:visited {  color: #ffffff;} 
+#menu a:hover {  
+background-color: #4C6C8F;
+color: #ffffff;} 
+
+#menu h1 {
+color: #000000;
+background-color: #cfdced;
+}   
+ 
+#top .searchbox { 
+background-color: #E5E4D9 ;
+color: #000000; 
+} 
+ 
+#menu .menupageitemgroup     { 
+background-color: #E5E4D9;
+}
+#menu .menupageitem {
+color: #000000;
+} 
+#menu .menupageitem a:link {  color: #000000;} 
+#menu .menupageitem a:visited {  color: #000000;} 
+#menu .menupageitem a:hover {  
+background-color: #E5E4D9;
+color: #000000;
+}
+
+body{ 
+background-color: #ffffff;
+color: #000000;
+} 
+a:link { color:#0000ff} 
+a:visited { color:#009999} 
+a:hover { color:#6587ff} 
+
+ 
+.ForrestTable      { background-color: #ccc;} 
+ 
+.ForrestTable td   { background-color: #ffffff;} 
+ 
+.highlight        { background-color: #ffff00;} 
+ 
+.fixme        { border-color: #c60;} 
+ 
+.note         { border-color: #069;} 
+ 
+.warning         { border-color: #900;}
+ 
+#footer       { background-color: #E5E4D9;} 
+/* extra-css */
+    
+    p.quote {
+      margin-left: 2em;
+      padding: .5em;
+      background-color: #f0f0f0;
+      font-family: monospace;
+    }
+
+    pre {
+      margin-left: 0em;
+      padding: 0.5em;
+      background-color: #f0f0f0;
+      font-family: monospace;
+    }
+
+
+
+  

+ 1257 - 0
zookeeper-docs/src/main/resources/markdown/skin/prototype.js

@@ -0,0 +1,1257 @@
+/*  Prototype JavaScript framework, version 1.4.0_pre4
+ *  (c) 2005 Sam Stephenson <sam@conio.net>
+ *
+ *  THIS FILE IS AUTOMATICALLY GENERATED. When sending patches, please diff
+ *  against the source tree, available from the Prototype darcs repository. 
+ *
+ *  Prototype is freely distributable under the terms of an MIT-style license.
+ *
+ *  For details, see the Prototype web site: http://prototype.conio.net/
+ *
+/*--------------------------------------------------------------------------*/
+
+var Prototype = {
+  Version: '1.4.0_pre4',
+  
+  emptyFunction: function() {},
+  K: function(x) {return x}
+}
+
+var Class = {
+  create: function() {
+    return function() { 
+      this.initialize.apply(this, arguments);
+    }
+  }
+}
+
+var Abstract = new Object();
+
+Object.extend = function(destination, source) {
+  for (property in source) {
+    destination[property] = source[property];
+  }
+  return destination;
+}
+
+Function.prototype.bind = function(object) {
+  var __method = this;
+  return function() {
+    return __method.apply(object, arguments);
+  }
+}
+
+Function.prototype.bindAsEventListener = function(object) {
+  var __method = this;
+  return function(event) {
+    return __method.call(object, event || window.event);
+  }
+}
+
+Number.prototype.toColorPart = function() {
+  var digits = this.toString(16);
+  if (this < 16) return '0' + digits;
+  return digits;
+}
+
+var Try = {
+  these: function() {
+    var returnValue;
+
+    for (var i = 0; i < arguments.length; i++) {
+      var lambda = arguments[i];
+      try {
+        returnValue = lambda();
+        break;
+      } catch (e) {}
+    }
+
+    return returnValue;
+  }
+}
+
+/*--------------------------------------------------------------------------*/
+
+var PeriodicalExecuter = Class.create();
+PeriodicalExecuter.prototype = {
+  initialize: function(callback, frequency) {
+    this.callback = callback;
+    this.frequency = frequency;
+    this.currentlyExecuting = false;
+
+    this.registerCallback();
+  },
+
+  registerCallback: function() {
+    setInterval(this.onTimerEvent.bind(this), this.frequency * 1000);
+  },
+
+  onTimerEvent: function() {
+    if (!this.currentlyExecuting) {
+      try { 
+        this.currentlyExecuting = true;
+        this.callback(); 
+      } finally { 
+        this.currentlyExecuting = false;
+      }
+    }
+  }
+}
+
+/*--------------------------------------------------------------------------*/
+
+function $() {
+  var elements = new Array();
+
+  for (var i = 0; i < arguments.length; i++) {
+    var element = arguments[i];
+    if (typeof element == 'string')
+      element = document.getElementById(element);
+
+    if (arguments.length == 1) 
+      return element;
+
+    elements.push(element);
+  }
+
+  return elements;
+}
+
+if (!Array.prototype.push) {
+  Array.prototype.push = function() {
+		var startLength = this.length;
+		for (var i = 0; i < arguments.length; i++)
+      this[startLength + i] = arguments[i];
+	  return this.length;
+  }
+}
+
+if (!Function.prototype.apply) {
+  // Based on code from http://www.youngpup.net/
+  Function.prototype.apply = function(object, parameters) {
+    var parameterStrings = new Array();
+    if (!object)     object = window;
+    if (!parameters) parameters = new Array();
+    
+    for (var i = 0; i < parameters.length; i++)
+      parameterStrings[i] = 'parameters[' + i + ']';
+    
+    object.__apply__ = this;
+    var result = eval('object.__apply__(' + 
+      parameterStrings.join(', ') + ')');
+    object.__apply__ = null;
+    
+    return result;
+  }
+}
+
+Object.extend(String.prototype, {
+  stripTags: function() {
+    return this.replace(/<\/?[^>]+>/gi, '');
+  },
+
+  escapeHTML: function() {
+    var div = document.createElement('div');
+    var text = document.createTextNode(this);
+    div.appendChild(text);
+    return div.innerHTML;
+  },
+
+  unescapeHTML: function() {
+    var div = document.createElement('div');
+    div.innerHTML = this.stripTags();
+    return div.childNodes[0].nodeValue;
+  },
+  
+  parseQuery: function() {
+    var str = this;
+    if (str.substring(0,1) == '?') {
+      str = this.substring(1);
+    }
+    var result = {};
+    var pairs = str.split('&');
+    for (var i = 0; i < pairs.length; i++) {
+      var pair = pairs[i].split('=');
+      result[pair[0]] = pair[1];
+    }
+    return result;
+  }
+});
+
+
+var _break    = new Object();
+var _continue = new Object();
+
+var Enumerable = {
+  each: function(iterator) {
+    var index = 0;
+    try {
+      this._each(function(value) {
+        try {
+          iterator(value, index++);
+        } catch (e) {
+          if (e != _continue) throw e;
+        }
+      });
+    } catch (e) {
+      if (e != _break) throw e;
+    }
+  },
+  
+  all: function(iterator) {
+    var result = true;
+    this.each(function(value, index) {
+      if (!(result &= (iterator || Prototype.K)(value, index))) 
+        throw _break;
+    });
+    return result;
+  },
+  
+  any: function(iterator) {
+    var result = true;
+    this.each(function(value, index) {
+      if (result &= (iterator || Prototype.K)(value, index)) 
+        throw _break;
+    });
+    return result;
+  },
+  
+  collect: function(iterator) {
+    var results = [];
+    this.each(function(value, index) {
+      results.push(iterator(value, index));
+    });
+    return results;
+  },
+  
+  detect: function (iterator) {
+    var result;
+    this.each(function(value, index) {
+      if (iterator(value, index)) {
+        result = value;
+        throw _break;
+      }
+    });
+    return result;
+  },
+  
+  findAll: function(iterator) {
+    var results = [];
+    this.each(function(value, index) {
+      if (iterator(value, index))
+        results.push(value);
+    });
+    return results;
+  },
+  
+  grep: function(pattern, iterator) {
+    var results = [];
+    this.each(function(value, index) {
+      var stringValue = value.toString();
+      if (stringValue.match(pattern))
+        results.push((iterator || Prototype.K)(value, index));
+    })
+    return results;
+  },
+  
+  include: function(object) {
+    var found = false;
+    this.each(function(value) {
+      if (value == object) {
+        found = true;
+        throw _break;
+      }
+    });
+    return found;
+  },
+  
+  inject: function(memo, iterator) {
+    this.each(function(value, index) {
+      memo = iterator(memo, value, index);
+    });
+    return memo;
+  },
+  
+  invoke: function(method) {
+    var args = $A(arguments).slice(1);
+    return this.collect(function(value) {
+      return value[method].apply(value, args);
+    });
+  },
+  
+  max: function(iterator) {
+    var result;
+    this.each(function(value, index) {
+      value = (iterator || Prototype.K)(value, index);
+      if (value >= (result || value))
+        result = value;
+    });
+    return result;
+  },
+  
+  min: function(iterator) {
+    var result;
+    this.each(function(value, index) {
+      value = (iterator || Prototype.K)(value, index);
+      if (value <= (result || value))
+        result = value;
+    });
+    return result;
+  },
+  
+  partition: function(iterator) {
+    var trues = [], falses = [];
+    this.each(function(value, index) {
+      ((iterator || Prototype.K)(value, index) ? 
+        trues : falses).push(value);
+    });
+    return [trues, falses];
+  },
+  
+  pluck: function(property) {
+    var results = [];
+    this.each(function(value, index) {
+      results.push(value[property]);
+    });
+    return results;
+  },
+  
+  reject: function(iterator) {
+    var results = [];
+    this.each(function(value, index) {
+      if (!iterator(value, index))
+        results.push(value);
+    });
+    return results;
+  },
+  
+  sortBy: function(iterator) {
+    return this.collect(function(value, index) {
+      return {value: value, criteria: iterator(value, index)};
+    }).sort(function(left, right) {
+      var a = left.criteria, b = right.criteria;
+      return a < b ? -1 : a > b ? 1 : 0;
+    }).pluck('value');
+  },
+  
+  toArray: function() {
+    return this.collect(Prototype.K);
+  },
+  
+  zip: function() {
+    var iterator = Prototype.K, args = $A(arguments);
+    if (typeof args.last() == 'function')
+      iterator = args.pop();
+
+    var collections = [this].concat(args).map($A);
+    return this.map(function(value, index) {
+      iterator(value = collections.pluck(index));
+      return value;
+    });
+  }
+}
+
+Object.extend(Enumerable, {
+  map:     Enumerable.collect,
+  find:    Enumerable.detect,
+  select:  Enumerable.findAll,
+  member:  Enumerable.include,
+  entries: Enumerable.toArray
+});
+
+$A = Array.from = function(iterable) {
+  var results = [];
+  for (var i = 0; i < iterable.length; i++)
+    results.push(iterable[i]);
+  return results;
+}
+
+Object.extend(Array.prototype, {
+  _each: function(iterator) {
+    for (var i = 0; i < this.length; i++)
+      iterator(this[i]);
+  },
+  
+  first: function() {
+    return this[0];
+  },
+  
+  last: function() {
+    return this[this.length - 1];
+  }
+});
+
+Object.extend(Array.prototype, Enumerable);
+
+
+var Ajax = {
+  getTransport: function() {
+    return Try.these(
+      function() {return new ActiveXObject('Msxml2.XMLHTTP')},
+      function() {return new ActiveXObject('Microsoft.XMLHTTP')},
+      function() {return new XMLHttpRequest()}
+    ) || false;
+  }
+}
+
+Ajax.Base = function() {};
+Ajax.Base.prototype = {
+  setOptions: function(options) {
+    this.options = {
+      method:       'post',
+      asynchronous: true,
+      parameters:   ''
+    }
+    Object.extend(this.options, options || {});
+  },
+
+  responseIsSuccess: function() {
+    return this.transport.status == undefined
+        || this.transport.status == 0 
+        || (this.transport.status >= 200 && this.transport.status < 300);
+  },
+
+  responseIsFailure: function() {
+    return !this.responseIsSuccess();
+  }
+}
+
+Ajax.Request = Class.create();
+Ajax.Request.Events = 
+  ['Uninitialized', 'Loading', 'Loaded', 'Interactive', 'Complete'];
+
+Ajax.Request.prototype = Object.extend(new Ajax.Base(), {
+  initialize: function(url, options) {
+    this.transport = Ajax.getTransport();
+    this.setOptions(options);
+    this.request(url);
+  },
+
+  request: function(url) {
+    var parameters = this.options.parameters || '';
+    if (parameters.length > 0) parameters += '&_=';
+
+    try {
+      if (this.options.method == 'get')
+        url += '?' + parameters;
+
+      this.transport.open(this.options.method, url,
+        this.options.asynchronous);
+
+      if (this.options.asynchronous) {
+        this.transport.onreadystatechange = this.onStateChange.bind(this);
+        setTimeout((function() {this.respondToReadyState(1)}).bind(this), 10);
+      }
+
+      this.setRequestHeaders();
+
+      var body = this.options.postBody ? this.options.postBody : parameters;
+      this.transport.send(this.options.method == 'post' ? body : null);
+
+    } catch (e) {
+    }
+  },
+
+  setRequestHeaders: function() {
+    var requestHeaders = 
+      ['X-Requested-With', 'XMLHttpRequest',
+       'X-Prototype-Version', Prototype.Version];
+
+    if (this.options.method == 'post') {
+      requestHeaders.push('Content-type', 
+        'application/x-www-form-urlencoded');
+
+      /* Force "Connection: close" for Mozilla browsers to work around
+       * a bug where XMLHttpReqeuest sends an incorrect Content-length
+       * header. See Mozilla Bugzilla #246651. 
+       */
+      if (this.transport.overrideMimeType)
+        requestHeaders.push('Connection', 'close');
+    }
+
+    if (this.options.requestHeaders)
+      requestHeaders.push.apply(requestHeaders, this.options.requestHeaders);
+
+    for (var i = 0; i < requestHeaders.length; i += 2)
+      this.transport.setRequestHeader(requestHeaders[i], requestHeaders[i+1]);
+  },
+
+  onStateChange: function() {
+    var readyState = this.transport.readyState;
+    if (readyState != 1)
+      this.respondToReadyState(this.transport.readyState);
+  },
+
+  respondToReadyState: function(readyState) {
+    var event = Ajax.Request.Events[readyState];
+
+    if (event == 'Complete')
+      (this.options['on' + this.transport.status]
+       || this.options['on' + (this.responseIsSuccess() ? 'Success' : 'Failure')]
+       || Prototype.emptyFunction)(this.transport);
+
+    (this.options['on' + event] || Prototype.emptyFunction)(this.transport);
+
+    /* Avoid memory leak in MSIE: clean up the oncomplete event handler */
+    if (event == 'Complete')
+      this.transport.onreadystatechange = Prototype.emptyFunction;
+  }
+});
+
+Ajax.Updater = Class.create();
+Ajax.Updater.ScriptFragment = '(?:<script.*?>)((\n|.)*?)(?:<\/script>)';
+
+Object.extend(Object.extend(Ajax.Updater.prototype, Ajax.Request.prototype), {
+  initialize: function(container, url, options) {
+    this.containers = {
+      success: container.success ? $(container.success) : $(container),
+      failure: container.failure ? $(container.failure) :
+        (container.success ? null : $(container))
+    }
+
+    this.transport = Ajax.getTransport();
+    this.setOptions(options);
+
+    var onComplete = this.options.onComplete || Prototype.emptyFunction;
+    this.options.onComplete = (function() {
+      this.updateContent();
+      onComplete(this.transport);
+    }).bind(this);
+
+    this.request(url);
+  },
+
+  updateContent: function() {
+    var receiver = this.responseIsSuccess() ?
+      this.containers.success : this.containers.failure;
+
+    var match    = new RegExp(Ajax.Updater.ScriptFragment, 'img');
+    var response = this.transport.responseText.replace(match, '');
+    var scripts  = this.transport.responseText.match(match);
+
+    if (receiver) {
+      if (this.options.insertion) {
+        new this.options.insertion(receiver, response);
+      } else {
+        receiver.innerHTML = response;
+      }
+    }
+
+    if (this.responseIsSuccess()) {
+      if (this.onComplete)
+        setTimeout((function() {this.onComplete(
+          this.transport)}).bind(this), 10);
+    }
+
+    if (this.options.evalScripts && scripts) {
+      match = new RegExp(Ajax.Updater.ScriptFragment, 'im');
+      setTimeout((function() {
+        for (var i = 0; i < scripts.length; i++)
+          eval(scripts[i].match(match)[1]);
+      }).bind(this), 10);
+    }
+  }
+});
+
+Ajax.PeriodicalUpdater = Class.create();
+Ajax.PeriodicalUpdater.prototype = Object.extend(new Ajax.Base(), {
+  initialize: function(container, url, options) {
+    this.setOptions(options);
+    this.onComplete = this.options.onComplete;
+
+    this.frequency = (this.options.frequency || 2);
+    this.decay = 1;
+
+    this.updater = {};
+    this.container = container;
+    this.url = url;
+
+    this.start();
+  },
+
+  start: function() {
+    this.options.onComplete = this.updateComplete.bind(this);
+    this.onTimerEvent();
+  },
+
+  stop: function() {
+    this.updater.onComplete = undefined;
+    clearTimeout(this.timer);
+    (this.onComplete || Ajax.emptyFunction).apply(this, arguments);
+  },
+
+  updateComplete: function(request) {
+    if (this.options.decay) {
+      this.decay = (request.responseText == this.lastText ? 
+        this.decay * this.options.decay : 1);
+
+      this.lastText = request.responseText;
+    }
+    this.timer = setTimeout(this.onTimerEvent.bind(this), 
+      this.decay * this.frequency * 1000);
+  },
+
+  onTimerEvent: function() {
+    this.updater = new Ajax.Updater(this.container, this.url, this.options);
+  }
+});
+
+document.getElementsByClassName = function(className) {
+  var children = document.getElementsByTagName('*') || document.all;
+  var elements = new Array();
+  
+  for (var i = 0; i < children.length; i++) {
+    var child = children[i];
+    var classNames = child.className.split(' ');
+    for (var j = 0; j < classNames.length; j++) {
+      if (classNames[j] == className) {
+        elements.push(child);
+        break;
+      }
+    }
+  }
+  
+  return elements;
+}
+
+/*--------------------------------------------------------------------------*/
+
+if (!window.Element) {
+  var Element = new Object();
+}
+
+Object.extend(Element, {
+  toggle: function() {
+    for (var i = 0; i < arguments.length; i++) {
+      var element = $(arguments[i]);
+      element.style.display = 
+        (element.style.display == 'none' ? '' : 'none');
+    }
+  },
+
+  hide: function() {
+    for (var i = 0; i < arguments.length; i++) {
+      var element = $(arguments[i]);
+      element.style.display = 'none';
+    }
+  },
+
+  show: function() {
+    for (var i = 0; i < arguments.length; i++) {
+      var element = $(arguments[i]);
+      element.style.display = '';
+    }
+  },
+
+  remove: function(element) {
+    element = $(element);
+    element.parentNode.removeChild(element);
+  },
+   
+  getHeight: function(element) {
+    element = $(element);
+    return element.offsetHeight; 
+  },
+
+  hasClassName: function(element, className) {
+    element = $(element);
+    if (!element)
+      return;
+    var a = element.className.split(' ');
+    for (var i = 0; i < a.length; i++) {
+      if (a[i] == className)
+        return true;
+    }
+    return false;
+  },
+
+  addClassName: function(element, className) {
+    element = $(element);
+    Element.removeClassName(element, className);
+    element.className += ' ' + className;
+  },
+
+  removeClassName: function(element, className) {
+    element = $(element);
+    if (!element)
+      return;
+    var newClassName = '';
+    var a = element.className.split(' ');
+    for (var i = 0; i < a.length; i++) {
+      if (a[i] != className) {
+        if (i > 0)
+          newClassName += ' ';
+        newClassName += a[i];
+      }
+    }
+    element.className = newClassName;
+  },
+  
+  // removes whitespace-only text node children
+  cleanWhitespace: function(element) {
+    var element = $(element);
+    for (var i = 0; i < element.childNodes.length; i++) {
+      var node = element.childNodes[i];
+      if (node.nodeType == 3 && !/\S/.test(node.nodeValue)) 
+        Element.remove(node);
+    }
+  }
+});
+
+var Toggle = new Object();
+Toggle.display = Element.toggle;
+
+/*--------------------------------------------------------------------------*/
+
+Abstract.Insertion = function(adjacency) {
+  this.adjacency = adjacency;
+}
+
+Abstract.Insertion.prototype = {
+  initialize: function(element, content) {
+    this.element = $(element);
+    this.content = content;
+    
+    if (this.adjacency && this.element.insertAdjacentHTML) {
+      this.element.insertAdjacentHTML(this.adjacency, this.content);
+    } else {
+      this.range = this.element.ownerDocument.createRange();
+      if (this.initializeRange) this.initializeRange();
+      this.fragment = this.range.createContextualFragment(this.content);
+      this.insertContent();
+    }
+  }
+}
+
+var Insertion = new Object();
+
+Insertion.Before = Class.create();
+Insertion.Before.prototype = Object.extend(new Abstract.Insertion('beforeBegin'), {
+  initializeRange: function() {
+    this.range.setStartBefore(this.element);
+  },
+  
+  insertContent: function() {
+    this.element.parentNode.insertBefore(this.fragment, this.element);
+  }
+});
+
+Insertion.Top = Class.create();
+Insertion.Top.prototype = Object.extend(new Abstract.Insertion('afterBegin'), {
+  initializeRange: function() {
+    this.range.selectNodeContents(this.element);
+    this.range.collapse(true);
+  },
+  
+  insertContent: function() {  
+    this.element.insertBefore(this.fragment, this.element.firstChild);
+  }
+});
+
+Insertion.Bottom = Class.create();
+Insertion.Bottom.prototype = Object.extend(new Abstract.Insertion('beforeEnd'), {
+  initializeRange: function() {
+    this.range.selectNodeContents(this.element);
+    this.range.collapse(this.element);
+  },
+  
+  insertContent: function() {
+    this.element.appendChild(this.fragment);
+  }
+});
+
+Insertion.After = Class.create();
+Insertion.After.prototype = Object.extend(new Abstract.Insertion('afterEnd'), {
+  initializeRange: function() {
+    this.range.setStartAfter(this.element);
+  },
+  
+  insertContent: function() {
+    this.element.parentNode.insertBefore(this.fragment, 
+      this.element.nextSibling);
+  }
+});
+
+var Field = {
+  clear: function() {
+    for (var i = 0; i < arguments.length; i++)
+      $(arguments[i]).value = '';
+  },
+
+  focus: function(element) {
+    $(element).focus();
+  },
+  
+  present: function() {
+    for (var i = 0; i < arguments.length; i++)
+      if ($(arguments[i]).value == '') return false;
+    return true;
+  },
+  
+  select: function(element) {
+    $(element).select();
+  },
+   
+  activate: function(element) {
+    $(element).focus();
+    $(element).select();
+  }
+}
+
+/*--------------------------------------------------------------------------*/
+
+var Form = {
+  serialize: function(form) {
+    var elements = Form.getElements($(form));
+    var queryComponents = new Array();
+    
+    for (var i = 0; i < elements.length; i++) {
+      var queryComponent = Form.Element.serialize(elements[i]);
+      if (queryComponent)
+        queryComponents.push(queryComponent);
+    }
+    
+    return queryComponents.join('&');
+  },
+  
+  getElements: function(form) {
+    var form = $(form);
+    var elements = new Array();
+
+    for (tagName in Form.Element.Serializers) {
+      var tagElements = form.getElementsByTagName(tagName);
+      for (var j = 0; j < tagElements.length; j++)
+        elements.push(tagElements[j]);
+    }
+    return elements;
+  },
+  
+  getInputs: function(form, typeName, name) {
+    var form = $(form);
+    var inputs = form.getElementsByTagName('input');
+    
+    if (!typeName && !name)
+      return inputs;
+      
+    var matchingInputs = new Array();
+    for (var i = 0; i < inputs.length; i++) {
+      var input = inputs[i];
+      if ((typeName && input.type != typeName) ||
+          (name && input.name != name)) 
+        continue;
+      matchingInputs.push(input);
+    }
+
+    return matchingInputs;
+  },
+
+  disable: function(form) {
+    var elements = Form.getElements(form);
+    for (var i = 0; i < elements.length; i++) {
+      var element = elements[i];
+      element.blur();
+      element.disabled = 'true';
+    }
+  },
+
+  enable: function(form) {
+    var elements = Form.getElements(form);
+    for (var i = 0; i < elements.length; i++) {
+      var element = elements[i];
+      element.disabled = '';
+    }
+  },
+
+  focusFirstElement: function(form) {
+    var form = $(form);
+    var elements = Form.getElements(form);
+    for (var i = 0; i < elements.length; i++) {
+      var element = elements[i];
+      if (element.type != 'hidden' && !element.disabled) {
+        Field.activate(element);
+        break;
+      }
+    }
+  },
+
+  reset: function(form) {
+    $(form).reset();
+  }
+}
+
+Form.Element = {
+  serialize: function(element) {
+    var element = $(element);
+    var method = element.tagName.toLowerCase();
+    var parameter = Form.Element.Serializers[method](element);
+    
+    if (parameter)
+      return encodeURIComponent(parameter[0]) + '=' + 
+        encodeURIComponent(parameter[1]);                   
+  },
+  
+  getValue: function(element) {
+    var element = $(element);
+    var method = element.tagName.toLowerCase();
+    var parameter = Form.Element.Serializers[method](element);
+    
+    if (parameter) 
+      return parameter[1];
+  }
+}
+
+Form.Element.Serializers = {
+  input: function(element) {
+    switch (element.type.toLowerCase()) {
+      case 'submit':
+      case 'hidden':
+      case 'password':
+      case 'text':
+        return Form.Element.Serializers.textarea(element);
+      case 'checkbox':  
+      case 'radio':
+        return Form.Element.Serializers.inputSelector(element);
+    }
+    return false;
+  },
+
+  inputSelector: function(element) {
+    if (element.checked)
+      return [element.name, element.value];
+  },
+
+  textarea: function(element) {
+    return [element.name, element.value];
+  },
+
+  select: function(element) {
+    var value = '';
+    if (element.type == 'select-one') {
+      var index = element.selectedIndex;
+      if (index >= 0)
+        value = element.options[index].value || element.options[index].text;
+    } else {
+      value = new Array();
+      for (var i = 0; i < element.length; i++) {
+        var opt = element.options[i];
+        if (opt.selected)
+          value.push(opt.value || opt.text);
+      }
+    }
+    return [element.name, value];
+  }
+}
+
+/*--------------------------------------------------------------------------*/
+
+var $F = Form.Element.getValue;
+
+/*--------------------------------------------------------------------------*/
+
+Abstract.TimedObserver = function() {}
+Abstract.TimedObserver.prototype = {
+  initialize: function(element, frequency, callback) {
+    this.frequency = frequency;
+    this.element   = $(element);
+    this.callback  = callback;
+    
+    this.lastValue = this.getValue();
+    this.registerCallback();
+  },
+  
+  registerCallback: function() {
+    setInterval(this.onTimerEvent.bind(this), this.frequency * 1000);
+  },
+  
+  onTimerEvent: function() {
+    var value = this.getValue();
+    if (this.lastValue != value) {
+      this.callback(this.element, value);
+      this.lastValue = value;
+    }
+  }
+}
+
+Form.Element.Observer = Class.create();
+Form.Element.Observer.prototype = Object.extend(new Abstract.TimedObserver(), {
+  getValue: function() {
+    return Form.Element.getValue(this.element);
+  }
+});
+
+Form.Observer = Class.create();
+Form.Observer.prototype = Object.extend(new Abstract.TimedObserver(), {
+  getValue: function() {
+    return Form.serialize(this.element);
+  }
+});
+
+/*--------------------------------------------------------------------------*/
+
+Abstract.EventObserver = function() {}
+Abstract.EventObserver.prototype = {
+  initialize: function(element, callback) {
+    this.element  = $(element);
+    this.callback = callback;
+    
+    this.lastValue = this.getValue();
+    if (this.element.tagName.toLowerCase() == 'form')
+      this.registerFormCallbacks();
+    else
+      this.registerCallback(this.element);
+  },
+  
+  onElementEvent: function() {
+    var value = this.getValue();
+    if (this.lastValue != value) {
+      this.callback(this.element, value);
+      this.lastValue = value;
+    }
+  },
+  
+  registerFormCallbacks: function() {
+    var elements = Form.getElements(this.element);
+    for (var i = 0; i < elements.length; i++)
+      this.registerCallback(elements[i]);
+  },
+  
+  registerCallback: function(element) {
+    if (element.type) {
+      switch (element.type.toLowerCase()) {
+        case 'checkbox':  
+        case 'radio':
+          element.target = this;
+          element.prev_onclick = element.onclick || Prototype.emptyFunction;
+          element.onclick = function() {
+            this.prev_onclick(); 
+            this.target.onElementEvent();
+          }
+          break;
+        case 'password':
+        case 'text':
+        case 'textarea':
+        case 'select-one':
+        case 'select-multiple':
+          element.target = this;
+          element.prev_onchange = element.onchange || Prototype.emptyFunction;
+          element.onchange = function() {
+            this.prev_onchange(); 
+            this.target.onElementEvent();
+          }
+          break;
+      }
+    }    
+  }
+}
+
+Form.Element.EventObserver = Class.create();
+Form.Element.EventObserver.prototype = Object.extend(new Abstract.EventObserver(), {
+  getValue: function() {
+    return Form.Element.getValue(this.element);
+  }
+});
+
+Form.EventObserver = Class.create();
+Form.EventObserver.prototype = Object.extend(new Abstract.EventObserver(), {
+  getValue: function() {
+    return Form.serialize(this.element);
+  }
+});
+
+
+if (!window.Event) {
+  var Event = new Object();
+}
+
+Object.extend(Event, {
+  KEY_BACKSPACE: 8,
+  KEY_TAB:       9,
+  KEY_RETURN:   13,
+  KEY_ESC:      27,
+  KEY_LEFT:     37,
+  KEY_UP:       38,
+  KEY_RIGHT:    39,
+  KEY_DOWN:     40,
+  KEY_DELETE:   46,
+
+  element: function(event) {
+    return event.target || event.srcElement;
+  },
+
+  isLeftClick: function(event) {
+    return (((event.which) && (event.which == 1)) ||
+            ((event.button) && (event.button == 1)));
+  },
+
+  pointerX: function(event) {
+    return event.pageX || (event.clientX + 
+      (document.documentElement.scrollLeft || document.body.scrollLeft));
+  },
+
+  pointerY: function(event) {
+    return event.pageY || (event.clientY + 
+      (document.documentElement.scrollTop || document.body.scrollTop));
+  },
+
+  stop: function(event) {
+    if (event.preventDefault) { 
+      event.preventDefault(); 
+      event.stopPropagation(); 
+    } else {
+      event.returnValue = false;
+    }
+  },
+
+  // find the first node with the given tagName, starting from the
+  // node the event was triggered on; traverses the DOM upwards
+  findElement: function(event, tagName) {
+    var element = Event.element(event);
+    while (element.parentNode && (!element.tagName ||
+        (element.tagName.toUpperCase() != tagName.toUpperCase())))
+      element = element.parentNode;
+    return element;
+  },
+
+  observers: false,
+  
+  _observeAndCache: function(element, name, observer, useCapture) {
+    if (!this.observers) this.observers = [];
+    if (element.addEventListener) {
+      this.observers.push([element, name, observer, useCapture]);
+      element.addEventListener(name, observer, useCapture);
+    } else if (element.attachEvent) {
+      this.observers.push([element, name, observer, useCapture]);
+      element.attachEvent('on' + name, observer);
+    }
+  },
+  
+  unloadCache: function() {
+    if (!Event.observers) return;
+    for (var i = 0; i < Event.observers.length; i++) {
+      Event.stopObserving.apply(this, Event.observers[i]);
+      Event.observers[i][0] = null;
+    }
+    Event.observers = false;
+  },
+
+  observe: function(element, name, observer, useCapture) {
+    var element = $(element);
+    useCapture = useCapture || false;
+    
+    if (name == 'keypress' &&
+        ((/Konqueror|Safari|KHTML/.test(navigator.userAgent)) 
+        || element.attachEvent))
+      name = 'keydown';
+    
+    this._observeAndCache(element, name, observer, useCapture);
+  },
+
+  stopObserving: function(element, name, observer, useCapture) {
+    var element = $(element);
+    useCapture = useCapture || false;
+    
+    if (name == 'keypress' &&
+        ((/Konqueror|Safari|KHTML/.test(navigator.userAgent)) 
+        || element.detachEvent))
+      name = 'keydown';
+    
+    if (element.removeEventListener) {
+      element.removeEventListener(name, observer, useCapture);
+    } else if (element.detachEvent) {
+      element.detachEvent('on' + name, observer);
+    }
+  }
+});
+
+/* prevent memory leaks in IE */
+Event.observe(window, 'unload', Event.unloadCache, false);
+
+var Position = {
+
+  // set to true if needed, warning: firefox performance problems
+  // NOT neeeded for page scrolling, only if draggable contained in
+  // scrollable elements
+  includeScrollOffsets: false, 
+
+  // must be called before calling withinIncludingScrolloffset, every time the
+  // page is scrolled
+  prepare: function() {
+    this.deltaX =  window.pageXOffset 
+                || document.documentElement.scrollLeft 
+                || document.body.scrollLeft 
+                || 0;
+    this.deltaY =  window.pageYOffset 
+                || document.documentElement.scrollTop 
+                || document.body.scrollTop 
+                || 0;
+  },
+
+  realOffset: function(element) {
+    var valueT = 0, valueL = 0;
+    do {
+      valueT += element.scrollTop  || 0;
+      valueL += element.scrollLeft || 0; 
+      element = element.parentNode;
+    } while (element);
+    return [valueL, valueT];
+  },
+
+  cumulativeOffset: function(element) {
+    var valueT = 0, valueL = 0;
+    do {
+      valueT += element.offsetTop  || 0;
+      valueL += element.offsetLeft || 0;
+      element = element.offsetParent;
+    } while (element);
+    return [valueL, valueT];
+  },
+
+  // caches x/y coordinate pair to use with overlap
+  within: function(element, x, y) {
+    if (this.includeScrollOffsets)
+      return this.withinIncludingScrolloffsets(element, x, y);
+    this.xcomp = x;
+    this.ycomp = y;
+    this.offset = this.cumulativeOffset(element);
+
+    return (y >= this.offset[1] &&
+            y <  this.offset[1] + element.offsetHeight &&
+            x >= this.offset[0] && 
+            x <  this.offset[0] + element.offsetWidth);
+  },
+
+  withinIncludingScrolloffsets: function(element, x, y) {
+    var offsetcache = this.realOffset(element);
+
+    this.xcomp = x + offsetcache[0] - this.deltaX;
+    this.ycomp = y + offsetcache[1] - this.deltaY;
+    this.offset = this.cumulativeOffset(element);
+
+    return (this.ycomp >= this.offset[1] &&
+            this.ycomp <  this.offset[1] + element.offsetHeight &&
+            this.xcomp >= this.offset[0] && 
+            this.xcomp <  this.offset[0] + element.offsetWidth);
+  },
+
+  // within must be called directly before
+  overlap: function(mode, element) {  
+    if (!mode) return 0;  
+    if (mode == 'vertical') 
+      return ((this.offset[1] + element.offsetHeight) - this.ycomp) / 
+        element.offsetHeight;
+    if (mode == 'horizontal')
+      return ((this.offset[0] + element.offsetWidth) - this.xcomp) / 
+        element.offsetWidth;
+  },
+
+  clone: function(source, target) {
+    source = $(source);
+    target = $(target);
+    target.style.position = 'absolute';
+    var offsets = this.cumulativeOffset(source);
+    target.style.top    = offsets[1] + 'px';
+    target.style.left   = offsets[0] + 'px';
+    target.style.width  = source.offsetWidth + 'px';
+    target.style.height = source.offsetHeight + 'px';
+  }
+}

+ 531 - 0
zookeeper-docs/src/main/resources/markdown/skin/screen.css

@@ -0,0 +1,531 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+body {  margin: 0px 0px 0px 0px; font-family: Verdana, Helvetica, sans-serif; }
+
+h1     { font-size : 160%; margin: 0px 0px 0px 0px;  padding: 0px; }
+h2     { font-size : 140%; margin: 1em 0px 0.8em 0px; padding: 0px; font-weight : bold;}
+h3     { font-size : 130%; margin: 0.8em 0px 0px 0px; padding: 0px; font-weight : bold; }
+.h3 { margin: 22px 0px 3px 0px; }
+h4     { font-size : 120%; margin: 0.7em 0px 0px 0px; padding: 0px; font-weight : normal; text-align: left; }
+.h4 { margin: 18px 0px 0px 0px; }
+h4.faq { font-size : 120%; margin: 18px 0px 0px 0px; padding: 0px; font-weight : bold;   text-align: left; }
+h5     { font-size : 100%; margin: 14px 0px 0px 0px; padding: 0px; font-weight : normal; text-align: left; }
+
+/**
+* table
+*/
+table .title { background-color: #000000; }
+.ForrestTable         {
+    color: #ffffff;
+    background-color: #7099C5;
+    width: 100%;
+    font-size : 100%;
+    empty-cells: show;
+}
+table caption {
+    padding-left: 5px;
+    color: white;
+    text-align: left;
+    font-weight: bold;
+    background-color: #000000;
+}
+.ForrestTable td {
+    color: black;
+    background-color: #f0f0ff;
+}
+.ForrestTable th { text-align: center; }
+/**
+ * Page Header
+ */
+
+#top {
+    position: relative;
+    float: left;
+    width: 100%;
+    background: #294563; /* if you want a background in the header, put it here */
+}
+
+#top .breadtrail {
+    background: #CFDCED;
+    color: black;
+    border-bottom: solid 1px white;
+    padding: 3px 10px;
+    font-size: 75%;
+}
+#top .breadtrail a { color: black; }
+
+#top .header {
+    float: left;
+    width: 100%;
+    background: url("header_white_line.gif") repeat-x bottom;
+}
+
+#top .grouplogo {
+    padding: 7px 0 10px 10px;
+    float: left;
+    text-align: left;
+}
+#top .projectlogo {
+    padding: 7px 0 10px 10px;
+    float: left;
+    width: 33%;
+    text-align: right;
+}
+#top .projectlogoA1 {
+    padding: 7px 0 10px 10px;
+    float: right;
+}
+html>body #top .searchbox {
+    bottom: 0px;
+}
+#top .searchbox {
+    position: absolute;
+    right: 10px;
+    height: 42px;
+    font-size: 70%;
+    white-space: nowrap;
+    bottom: -1px; /* compensate for IE rendering issue */
+    border-radius: 5px 5px 0px 0px;
+}
+
+#top .searchbox form {
+    padding: 5px 10px;
+    margin: 0;
+}
+#top .searchbox p {
+    padding: 0 0 2px 0;
+    margin: 0;
+}
+#top .searchbox input {
+    font-size: 100%;
+}
+
+#tabs {
+    clear: both;
+    padding-left: 10px;
+    margin: 0;
+    list-style: none;
+}
+
+#tabs li {
+    float: left;
+    margin: 0 3px 0 0;
+    padding: 0;
+    border-radius: 5px 5px 0px 0px;
+}
+
+/*background: url("tab-left.gif") no-repeat left top;*/
+#tabs li a {
+    float: left;
+    display: block;
+    font-family: verdana, arial, sans-serif;
+    text-decoration: none;
+    color: black;
+    white-space: nowrap;
+    padding: 5px 15px 4px;
+    width: .1em; /* IE/Win fix */
+}
+
+#tabs li a:hover {
+   
+    cursor: pointer;
+    text-decoration:underline;
+}
+
+#tabs > li a { width: auto; } /* Rest of IE/Win fix */
+
+/* Commented Backslash Hack hides rule from IE5-Mac \*/
+#tabs a { float: none; }
+/* End IE5-Mac hack */
+
+#top .header .current {
+    background-color: #4C6C8F;
+}
+#top .header .current a {
+    font-weight: bold;
+    padding-bottom: 5px;
+    color: white;
+}
+#publishedStrip {
+    padding-right: 10px;
+    padding-left: 20px;
+    padding-top: 3px;
+    padding-bottom:3px;
+    color: #ffffff;
+    font-size : 60%;
+    font-weight: bold;
+    background-color: #4C6C8F;
+    text-align:right;
+}
+
+#level2tabs {
+margin: 0;
+float:left;
+position:relative;
+
+}
+
+
+
+#level2tabs  a:hover {
+   
+    cursor: pointer;
+    text-decoration:underline;
+    
+}
+
+#level2tabs  a{
+   
+    cursor: pointer;
+    text-decoration:none;
+    background-image: url('chapter.gif');
+    background-repeat: no-repeat;
+    background-position: center left;
+    padding-left: 6px;
+    margin-left: 6px;
+}
+
+/*
+*    border-top: solid #4C6C8F 15px;
+*/
+#main {
+    position: relative;
+    background: white;
+    clear:both;
+}
+#main .breadtrail {
+    clear:both;
+    position: relative;
+    background: #CFDCED;
+    color: black;
+    border-bottom: solid 1px black;
+    border-top: solid 1px black;
+    padding: 0px 180px;
+    font-size: 75%;
+    z-index:10;
+}
+
+img.corner {
+   width: 15px;
+   height: 15px;
+   border: none;
+   display: block !important;
+}
+
+img.cornersmall {
+   width: 5px;
+   height: 5px;
+   border: none;
+   display: block !important;
+}
+/**
+ * Side menu
+ */
+#menu a {  font-weight: normal; text-decoration: none;}
+#menu a:visited {  font-weight: normal; }
+#menu a:active {  font-weight: normal; }
+#menu a:hover {  font-weight: normal;  text-decoration:underline;}
+
+#menuarea { width:10em;}
+#menu {
+    position: relative;
+    float: left;
+    width: 160px;
+    padding-top: 0px;
+    padding-bottom: 15px;
+    top:-18px;
+    left:10px;
+    z-index: 20;
+    background-color: #f90;
+    font-size : 70%;
+    border-radius: 0px 0px 15px 15px;
+}
+
+.menutitle {
+        cursor:pointer;
+        padding: 3px 12px;
+        margin-left: 10px;
+        background-image: url('chapter.gif');
+        background-repeat: no-repeat;
+        background-position: center left;
+        font-weight : bold;
+}
+
+.menutitle.selected {
+        background-image: url('chapter_open.gif');
+}
+
+.menutitle:hover{text-decoration:underline;cursor: pointer;}
+
+#menu .menuitemgroup {
+        margin: 0px 0px 6px 8px;
+        padding: 0px;
+        font-weight : bold; }
+
+#menu .selectedmenuitemgroup{
+        margin: 0px 0px 0px 8px;
+        padding: 0px;
+        font-weight : normal; 
+       
+        }
+
+#menu .menuitem {
+        padding: 2px 0px 1px 13px;
+        background-image: url('page.gif');
+        background-repeat: no-repeat;
+        background-position: center left;
+        font-weight : normal;
+        margin-left: 10px;
+}
+
+#menu .selected {
+        font-style : normal;
+        margin-right: 10px;
+         
+}
+.menuitem .selected {
+        border-style: solid;
+        border-width: 1px;
+}
+#menu .menupageitemgroup {
+        padding: 3px 0px 4px 6px;
+        font-style : normal;
+        border-bottom: 1px solid ;
+        border-left: 1px solid ;
+        border-right: 1px solid ;
+        margin-right: 10px;
+}
+#menu .menupageitem {
+        font-style : normal;
+        font-weight : normal;
+        border-width: 0px;
+        font-size : 90%;
+}
+#menu .searchbox {
+    text-align: center;
+}
+#menu .searchbox form {
+    padding: 3px 3px;
+    margin: 0;
+}
+#menu .searchbox input {
+    font-size: 100%;
+}
+
+#content {
+    padding: 20px 20px 20px 180px;
+    margin: 0;
+    font : small Verdana, Helvetica, sans-serif;
+    font-size : 80%;
+}
+
+#content ul {
+    margin: 0;
+    padding: 0 25px;
+}
+#content li {
+    padding: 0 5px;
+}
+#feedback {
+    color: black;
+    background: #CFDCED;
+    text-align:center;
+    margin-top: 5px;
+}
+#feedback #feedbackto {
+    font-size: 90%;
+    color: black;
+}
+#footer {
+    clear: both;
+    position: relative; /* IE bugfix (http://www.dracos.co.uk/web/css/ie6floatbug/) */
+    width: 100%;
+    background: #CFDCED;
+    border-top: solid 1px #4C6C8F;
+    color: black;
+}
+#footer .copyright {
+    position: relative; /* IE bugfix cont'd */
+    padding: 5px;
+    margin: 0;
+    width: 60%;
+}
+#footer .lastmodified {
+    position: relative; /* IE bugfix cont'd */
+    float: right;
+    width: 30%;
+    padding: 5px;
+    margin: 0;
+    text-align: right;
+}
+#footer a { color: white; }
+
+#footer #logos {
+    text-align: left;
+}
+
+
+/**
+ * Misc Styles
+ */
+
+acronym { cursor: help; }
+.boxed      { background-color: #a5b6c6;}
+.underlined_5     {border-bottom: solid 5px #4C6C8F;}
+.underlined_10     {border-bottom: solid 10px #4C6C8F;}
+/* ==================== snail trail ============================ */
+
+.trail {
+  position: relative; /* IE bugfix cont'd */
+  font-size: 70%;
+  text-align: right;
+  float: right;
+  margin: -10px 5px 0px 5px;
+  padding: 0;
+}
+
+#motd-area {
+    position:relative;
+    float:right;
+    width: 35%;
+    background-color: #f0f0ff;
+    border: solid 1px #4C6C8F;
+    margin: 0px 0px 10px 10px;
+    padding: 5px;
+}
+
+#minitoc-area {
+    border-top: solid 1px #4C6C8F;
+    border-bottom: solid 1px #4C6C8F;
+    margin: 15px 10% 5px 15px;
+   /* margin-bottom: 15px;
+    margin-left: 15px;
+    margin-right: 10%;*/
+    padding-bottom: 7px;
+    padding-top: 5px;
+}
+.minitoc {
+    list-style-image: url('current.gif');
+    font-weight: normal;
+}
+
+.abstract{
+    text-align:justify;
+    }
+
+li p {
+    margin: 0;
+    padding: 0;
+}
+
+.pdflink {
+    position: relative; /* IE bugfix cont'd */
+    float: right;
+    margin: 0px 5px;
+    padding: 0;
+}
+.pdflink br {
+    margin-top: -10px;
+    padding-left: 1px;
+}
+.pdflink a {
+    display: block;
+    font-size: 70%;
+    text-align: center;
+    margin: 0;
+    padding: 0;
+}
+
+.pdflink img {
+    display: block;
+    height: 16px;
+    width: 16px;
+}
+.xmllink {
+    position: relative; /* IE bugfix cont'd */
+    float: right;
+    margin: 0px 5px;
+    padding: 0;
+}
+.xmllink br {
+    margin-top: -10px;
+    padding-left: 1px;
+}
+.xmllink a {
+    display: block;
+    font-size: 70%;
+    text-align: center;
+    margin: 0;
+    padding: 0;
+}
+
+.xmllink img {
+    display: block;
+    height: 16px;
+    width: 16px;
+}
+.podlink {
+    position: relative; /* IE bugfix cont'd */
+    float: right;
+    margin: 0px 5px;
+    padding: 0;
+}
+.podlink br {
+    margin-top: -10px;
+    padding-left: 1px;
+}
+.podlink a {
+    display: block;
+    font-size: 70%;
+    text-align: center;
+    margin: 0;
+    padding: 0;
+}
+
+.podlink img {
+    display: block;
+    height: 16px;
+    width: 16px;
+}
+
+.printlink {
+    position: relative; /* IE bugfix cont'd */
+    float: right;
+}
+.printlink br {
+    margin-top: -10px;
+    padding-left: 1px;
+}
+.printlink a {
+    display: block;
+    font-size: 70%;
+    text-align: center;
+    margin: 0;
+    padding: 0;
+}
+.printlink img {
+    display: block;
+    height: 16px;
+    width: 16px;
+}
+
+p.instruction {
+  display: list-item;
+  list-style-image: url('../instruction_arrow.png');
+  list-style-position: outside;
+  margin-left: 2em;
+} 

+ 1575 - 0
zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md

@@ -0,0 +1,1575 @@
+<!--
+Copyright 2002-2004 The Apache Software Foundation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+//-->
+
+# ZooKeeper Administrator's Guide
+
+### A Guide to Deployment and Administration
+
+* [Deployment](#ch_deployment)
+    * [System Requirements](#sc_systemReq)
+        * [Supported Platforms](#sc_supportedPlatforms)
+        * [Required Software](#sc_requiredSoftware)
+    * [Clustered (Multi-Server) Setup](#sc_zkMulitServerSetup)
+    * [Single Server and Developer Setup](#sc_singleAndDevSetup)
+* [Administration](#ch_administration)
+    * [Designing a ZooKeeper Deployment](#sc_designing)
+        * [Cross Machine Requirements](#sc_CrossMachineRequirements)
+        * [Single Machine Requirements](#Single+Machine+Requirements)
+    * [Provisioning](#sc_provisioning)
+    * [Things to Consider: ZooKeeper Strengths and Limitations](#sc_strengthsAndLimitations)
+    * [Administering](#sc_administering)
+    * [Maintenance](#sc_maintenance)
+        * [Ongoing Data Directory Cleanup](#Ongoing+Data+Directory+Cleanup)
+        * [Debug Log Cleanup (log4j)](#Debug+Log+Cleanup+%28log4j%29)
+    * [Supervision](#sc_supervision)
+    * [Monitoring](#sc_monitoring)
+    * [Logging](#sc_logging)
+    * [Troubleshooting](#sc_troubleshooting)
+    * [Configuration Parameters](#sc_configuration)
+        * [Minimum Configuration](#sc_minimumConfiguration)
+        * [Advanced Configuration](#sc_advancedConfiguration)
+        * [Cluster Options](#sc_clusterOptions)
+        * [Encryption, Authentication, Authorization Options](#sc_authOptions)
+        * [Experimental Options/Features](#Experimental+Options%2FFeatures)
+        * [Unsafe Options](#Unsafe+Options)
+        * [Disabling data directory autocreation](#Disabling+data+directory+autocreation)
+        * [Enabling db existence validation](#sc_db_existence_validation)
+        * [Performance Tuning Options](#sc_performance_options)
+        * [Communication using the Netty framework](#Communication+using+the+Netty+framework)
+        * [AdminServer configuration](#sc_adminserver_config)
+    * [ZooKeeper Commands](#sc_zkCommands)
+        * [The Four Letter Words](#sc_4lw)
+        * [The AdminServer](#sc_adminserver)
+    * [Data File Management](#sc_dataFileManagement)
+        * [The Data Directory](#The+Data+Directory)
+        * [The Log Directory](#The+Log+Directory)
+        * [File Management](#sc_filemanagement)
+        * [Recovery - TxnLogToolkit](#Recovery+-+TxnLogToolkit)
+    * [Things to Avoid](#sc_commonProblems)
+    * [Best Practices](#sc_bestPractices)
+
+<a name="ch_deployment"></a>
+
+## Deployment
+
+This section contains information about deploying Zookeeper and
+covers these topics:
+
+* [System Requirements](#sc_systemReq)
+* [Clustered (Multi-Server) Setup](#sc_zkMulitServerSetup)
+* [Single Server and Developer Setup](#sc_singleAndDevSetup)
+
+The first two sections assume you are interested in installing
+ZooKeeper in a production environment such as a datacenter. The final
+section covers situations in which you are setting up ZooKeeper on a
+limited basis - for evaluation, testing, or development - but not in a
+production environment.
+
+<a name="sc_systemReq"></a>
+
+### System Requirements
+
+<a name="sc_supportedPlatforms"></a>
+
+#### Supported Platforms
+
+ZooKeeper consists of multiple components.  Some components are
+supported broadly, and other components are supported only on a smaller
+set of platforms.
+
+* **Client** is the Java client
+  library, used by applications to connect to a ZooKeeper ensemble.
+* **Server** is the Java server
+  that runs on the ZooKeeper ensemble nodes.
+* **Native Client** is a client
+  implemented in C, similar to the Java client, used by applications
+  to connect to a ZooKeeper ensemble.
+* **Contrib** refers to multiple
+  optional add-on components.
+
+The following matrix describes the level of support committed for
+running each component on different operating system platforms.
+
+##### Support Matrix
+
+| Operating System | Client | Server | Native Client | Contrib |
+|------------------|--------|--------|---------------|---------|
+| GNU/Linux | Development and Production | Development and Production | Development and Production | Development and Production |
+| Solaris | Development and Production | Development and Production | Not Supported | Not Supported |
+| FreeBSD | Development and Production | Development and Production | Not Supported | Not Supported |
+| Windows | Development and Production | Development and Production | Not Supported | Not Supported |
+| Mac OS X | Development Only | Development Only | Not Supported | Not Supported |
+
+For any operating system not explicitly mentioned as supported in
+the matrix, components may or may not work.  The ZooKeeper community
+will fix obvious bugs that are reported for other platforms, but there
+is no full support.
+
+<a name="sc_requiredSoftware"></a>
+
+#### Required Software
+
+ZooKeeper runs in Java, release 1.7 or greater (JDK 7 or
+greater, FreeBSD support requires openjdk7).  It runs as an
+_ensemble_ of ZooKeeper servers. Three
+ZooKeeper servers is the minimum recommended size for an
+ensemble, and we also recommend that they run on separate
+machines. At Yahoo!, ZooKeeper is usually deployed on
+dedicated RHEL boxes, with dual-core processors, 2GB of RAM,
+and 80GB IDE hard drives.
+
+<a name="sc_zkMulitServerSetup"></a>
+
+### Clustered (Multi-Server) Setup
+
+For reliable ZooKeeper service, you should deploy ZooKeeper in a
+cluster known as an _ensemble_. As long as a majority
+of the ensemble are up, the service will be available. Because Zookeeper
+requires a majority, it is best to use an
+odd number of machines. For example, with four machines ZooKeeper can
+only handle the failure of a single machine; if two machines fail, the
+remaining two machines do not constitute a majority. However, with five
+machines ZooKeeper can handle the failure of two machines.
+
+######Note
+>As mentioned in the
+[ZooKeeper Getting Started Guide](zookeeperStarted.html)
+, a minimum of three servers are required for a fault tolerant
+clustered setup, and it is strongly recommended that you have an
+odd number of servers.
+
+>Usually three servers is more than enough for a production
+install, but for maximum reliability during maintenance, you may
+wish to install five servers. With three servers, if you perform
+maintenance on one of them, you are vulnerable to a failure on one
+of the other two servers during that maintenance. If you have five
+of them running, you can take one down for maintenance, and know
+that you're still OK if one of the other four suddenly fails.
+
+>Your redundancy considerations should include all aspects of
+your environment. If you have three ZooKeeper servers, but their
+network cables are all plugged into the same network switch, then
+the failure of that switch will take down your entire ensemble.
+
+Here are the steps to setting a server that will be part of an
+ensemble. These steps should be performed on every host in the
+ensemble:
+
+1. Install the Java JDK. You can use the native packaging system
+  for your system, or download the JDK from:
+  [http://java.sun.com/javase/downloads/index.jsp](http://java.sun.com/javase/downloads/index.jsp)
+  
+2. Set the Java heap size. This is very important to avoid
+  swapping, which will seriously degrade ZooKeeper performance. To
+  determine the correct value, use load tests, and make sure you are
+  well below the usage limit that would cause you to swap. Be
+  conservative - use a maximum heap size of 3GB for a 4GB
+  machine.
+  
+3. Install the ZooKeeper Server Package. It can be downloaded
+  from:
+  [http://zookeeper.apache.org/releases.html](http://zookeeper.apache.org/releases.html)
+  
+4. Create a configuration file. This file can be called anything.
+  Use the following settings as a starting point:
+
+        tickTime=2000
+        dataDir=/var/lib/zookeeper/
+        clientPort=2181
+        initLimit=5
+        syncLimit=2
+        server.1=zoo1:2888:3888
+        server.2=zoo2:2888:3888
+        server.3=zoo3:2888:3888
+
+     You can find the meanings of these and other configuration
+  settings in the section [Configuration Parameters](#sc_configuration). A word
+  though about a few here:
+  Every machine that is part of the ZooKeeper ensemble should know
+  about every other machine in the ensemble. You accomplish this with
+  the series of lines of the form **server.id=host:port:port**. The parameters **host** and **port** are straightforward. You attribute the
+  server id to each machine by creating a file named
+  *myid*, one for each server, which resides in
+  that server's data directory, as specified by the configuration file
+  parameter **dataDir**.
+  
+5. The myid file
+  consists of a single line containing only the text of that machine's
+  id. So *myid* of server 1 would contain the text
+  "1" and nothing else. The id must be unique within the
+  ensemble and should have a value between 1 and 255.
+  **IMPORTANT:** if you enable extended features such
+   as TTL Nodes (see below) the id must be between 1 
+   and 254 due to internal limitations.
+  
+6. Create an initialization marker file *initialize*
+  in the same directory as *myid*. This file indicates
+  that an empty data directory is expected. When present, an empty data base
+  is created and the marker file deleted. When not present, an empty data
+  directory will mean this peer will not have voting rights and it will not
+  populate the data directory until it communicates with an active leader.
+  Intended use is to only create this file when bringing up a new
+  ensemble.
+  
+7. If your configuration file is set up, you can start a
+  ZooKeeper server:
+  
+        $ java -cp zookeeper.jar:lib/slf4j-api-1.7.5.jar:lib/slf4j-log4j12-1.7.5.jar:lib/log4j-1.2.17.jar:conf \\
+        org.apache.zookeeper.server.quorum.QuorumPeerMain zoo.cfg
+       
+  QuorumPeerMain starts a ZooKeeper server,
+  [JMX](http://java.sun.com/javase/technologies/core/mntr-mgmt/javamanagement/)
+  management beans are also registered which allows
+  management through a JMX management console.
+  The [ZooKeeper JMX
+  document](zookeeperJMX.html) contains details on managing ZooKeeper with JMX.
+  See the script _bin/zkServer.sh_,
+  which is included in the release, for an example
+  of starting server instances.
+8. Test your deployment by connecting to the hosts:
+  In Java, you can run the following command to execute
+  simple operations:
+  
+        $ bin/zkCli.sh -server 127.0.0.1:2181
+
+<a name="sc_singleAndDevSetup"></a>
+
+### Single Server and Developer Setup
+
+If you want to setup ZooKeeper for development purposes, you will
+probably want to setup a single server instance of ZooKeeper, and then
+install either the Java or C client-side libraries and bindings on your
+development machine.
+
+The steps to setting up a single server instance are the similar
+to the above, except the configuration file is simpler. You can find the
+complete instructions in the [Installing and
+Running ZooKeeper in Single Server Mode](zookeeperStarted.html#sc_InstallingSingleMode) section of the [ZooKeeper Getting Started
+Guide](zookeeperStarted.html).
+
+For information on installing the client side libraries, refer to
+the [Bindings](zookeeperProgrammers.html#ch_bindings)
+section of the [ZooKeeper
+Programmer's Guide](zookeeperProgrammers.html).
+
+<a name="ch_administration"></a>
+
+## Administration
+
+This section contains information about running and maintaining
+ZooKeeper and covers these topics:
+
+* [Designing a ZooKeeper Deployment](#sc_designing)
+* [Provisioning](#sc_provisioning)
+* [Things to Consider: ZooKeeper Strengths and Limitations](#sc_strengthsAndLimitations)
+* [Administering](#sc_administering)
+* [Maintenance](#sc_maintenance)
+* [Supervision](#sc_supervision)
+* [Monitoring](#sc_monitoring)
+* [Logging](#sc_logging)
+* [Troubleshooting](#sc_troubleshooting)
+* [Configuration Parameters](#sc_configuration)
+* [ZooKeeper Commands](#sc_zkCommands)
+* [Data File Management](#sc_dataFileManagement)
+* [Things to Avoid](#sc_commonProblems)
+* [Best Practices](#sc_bestPractices)
+
+<a name="sc_designing"></a>
+
+### Designing a ZooKeeper Deployment
+
+The reliability of ZooKeeper rests on two basic assumptions.
+
+1. Only a minority of servers in a deployment
+  will fail. _Failure_ in this context
+  means a machine crash, or some error in the network that
+  partitions a server off from the majority.
+1. Deployed machines operate correctly. To
+  operate correctly means to execute code correctly, to have
+  clocks that work properly, and to have storage and network
+  components that perform consistently.
+
+The sections below contain considerations for ZooKeeper
+administrators to maximize the probability for these assumptions
+to hold true. Some of these are cross-machines considerations,
+and others are things you should consider for each and every
+machine in your deployment.
+
+<a name="sc_CrossMachineRequirements"></a>
+
+#### Cross Machine Requirements
+
+For the ZooKeeper service to be active, there must be a
+majority of non-failing machines that can communicate with
+each other. To create a deployment that can tolerate the
+failure of F machines, you should count on deploying 2xF+1
+machines.  Thus, a deployment that consists of three machines
+can handle one failure, and a deployment of five machines can
+handle two failures. Note that a deployment of six machines
+can only handle two failures since three machines is not a
+majority.  For this reason, ZooKeeper deployments are usually
+made up of an odd number of machines.
+
+To achieve the highest probability of tolerating a failure
+you should try to make machine failures independent. For
+example, if most of the machines share the same switch,
+failure of that switch could cause a correlated failure and
+bring down the service. The same holds true of shared power
+circuits, cooling systems, etc.
+
+<a name="Single+Machine+Requirements"></a>
+
+#### Single Machine Requirements
+
+If ZooKeeper has to contend with other applications for
+access to resources like storage media, CPU, network, or
+memory, its performance will suffer markedly.  ZooKeeper has
+strong durability guarantees, which means it uses storage
+media to log changes before the operation responsible for the
+change is allowed to complete. You should be aware of this
+dependency then, and take great care if you want to ensure
+that ZooKeeper operations aren’t held up by your media. Here
+are some things you can do to minimize that sort of
+degradation:
+
+* ZooKeeper's transaction log must be on a dedicated
+  device. (A dedicated partition is not enough.) ZooKeeper
+  writes the log sequentially, without seeking Sharing your
+  log device with other processes can cause seeks and
+  contention, which in turn can cause multi-second
+  delays.
+* Do not put ZooKeeper in a situation that can cause a
+  swap. In order for ZooKeeper to function with any sort of
+  timeliness, it simply cannot be allowed to swap.
+  Therefore, make certain that the maximum heap size given
+  to ZooKeeper is not bigger than the amount of real memory
+  available to ZooKeeper.  For more on this, see
+  [Things to Avoid](#sc_commonProblems)
+  below.
+
+<a name="sc_provisioning"></a>
+
+### Provisioning
+
+<a name="sc_strengthsAndLimitations"></a>
+
+### Things to Consider: ZooKeeper Strengths and Limitations
+
+<a name="sc_administering"></a>
+
+### Administering
+
+<a name="sc_maintenance"></a>
+
+### Maintenance
+
+Little long term maintenance is required for a ZooKeeper
+cluster however you must be aware of the following:
+
+<a name="Ongoing+Data+Directory+Cleanup"></a>
+
+#### Ongoing Data Directory Cleanup
+
+The ZooKeeper [Data
+Directory](#var_datadir) contains files which are a persistent copy
+of the znodes stored by a particular serving ensemble. These
+are the snapshot and transactional log files. As changes are
+made to the znodes these changes are appended to a
+transaction log. Occasionally, when a log grows large, a
+snapshot of the current state of all znodes will be written
+to the filesystem and a new transaction log file is created
+for future transactions. During snapshotting, ZooKeeper may
+continue appending incoming transactions to the old log file.
+Therefore, some transactions which are newer than a snapshot
+may be found in the last transaction log preceding the
+snapshot.
+
+A ZooKeeper server **will not remove
+old snapshots and log files** when using the default
+configuration (see autopurge below), this is the
+responsibility of the operator. Every serving environment is
+different and therefore the requirements of managing these
+files may differ from install to install (backup for example).
+
+The PurgeTxnLog utility implements a simple retention
+policy that administrators can use. The [API docs](index.html) contains details on
+calling conventions (arguments, etc...).
+
+In the following example the last count snapshots and
+their corresponding logs are retained and the others are
+deleted.  The value of <count> should typically be
+greater than 3 (although not required, this provides 3 backups
+in the unlikely event a recent log has become corrupted). This
+can be run as a cron job on the ZooKeeper server machines to
+clean up the logs daily.
+
+    java -cp zookeeper.jar:lib/slf4j-api-1.7.5.jar:lib/slf4j-log4j12-1.7.5.jar:lib/log4j-1.2.17.jar:conf org.apache.zookeeper.server.PurgeTxnLog <dataDir> <snapDir> -n <count>
+
+
+Automatic purging of the snapshots and corresponding
+transaction logs was introduced in version 3.4.0 and can be
+enabled via the following configuration parameters **autopurge.snapRetainCount** and **autopurge.purgeInterval**. For more on
+this, see [Advanced Configuration](#sc_advancedConfiguration)
+below.
+
+<a name="Debug+Log+Cleanup+%28log4j%29"></a>
+
+#### Debug Log Cleanup (log4j)
+
+See the section on [logging](#sc_logging) in this document. It is
+expected that you will setup a rolling file appender using the
+in-built log4j feature. The sample configuration file in the
+release tar's conf/log4j.properties provides an example of
+this.
+
+<a name="sc_supervision"></a>
+
+### Supervision
+
+You will want to have a supervisory process that manages
+each of your ZooKeeper server processes (JVM). The ZK server is
+designed to be "fail fast" meaning that it will shutdown
+(process exit) if an error occurs that it cannot recover
+from. As a ZooKeeper serving cluster is highly reliable, this
+means that while the server may go down the cluster as a whole
+is still active and serving requests. Additionally, as the
+cluster is "self healing" the failed server once restarted will
+automatically rejoin the ensemble w/o any manual
+interaction.
+
+Having a supervisory process such as [daemontools](http://cr.yp.to/daemontools.html) or
+[SMF](http://en.wikipedia.org/wiki/Service\_Management\_Facility)
+(other options for supervisory process are also available, it's
+up to you which one you would like to use, these are just two
+examples) managing your ZooKeeper server ensures that if the
+process does exit abnormally it will automatically be restarted
+and will quickly rejoin the cluster.
+
+It is also recommended to configure the ZooKeeper server process to
+terminate and dump its heap if an OutOfMemoryError** occurs.  This is achieved
+by launching the JVM with the following arguments on Linux and Windows
+respectively.  The *zkServer.sh* and
+*zkServer.cmd* scripts that ship with ZooKeeper set
+these options.
+
+    -XX:+HeapDumpOnOutOfMemoryError -XX:OnOutOfMemoryError='kill -9 %p'
+
+    "-XX:+HeapDumpOnOutOfMemoryError" "-XX:OnOutOfMemoryError=cmd /c taskkill /pid %%%%p /t /f"
+
+<a name="sc_monitoring"></a>
+
+### Monitoring
+
+The ZooKeeper service can be monitored in one of two
+primary ways; 1) the command port through the use of [4 letter words](#sc_zkCommands) and 2) [JMX](zookeeperJMX.html). See the appropriate section for
+your environment/requirements.
+
+<a name="sc_logging"></a>
+
+### Logging
+
+ZooKeeper uses **[SLF4J](http://www.slf4j.org)**
+version 1.7.5 as its logging infrastructure. For backward compatibility it is bound to
+**LOG4J** but you can use
+**[LOGBack](http://logback.qos.ch/)**
+or any other supported logging framework of your choice.
+
+The ZooKeeper default *log4j.properties*
+file resides in the *conf* directory. Log4j requires that
+*log4j.properties* either be in the working directory
+(the directory from which ZooKeeper is run) or be accessible from the classpath.
+
+For more information about SLF4J, see
+[its manual](http://www.slf4j.org/manual.html).
+
+For more information about LOG4J, see
+[Log4j Default Initialization Procedure](http://logging.apache.org/log4j/1.2/manual.html#defaultInit)
+of the log4j manual.
+
+<a name="sc_troubleshooting"></a>
+
+### Troubleshooting
+
+* *Server not coming up because of file corruption* :
+    A server might not be able to read its database and fail to come up because of
+    some file corruption in the transaction logs of the ZooKeeper server. You will
+    see some IOException on loading ZooKeeper database. In such a case,
+    make sure all the other servers in your ensemble are up and  working. Use "stat"
+    command on the command port to see if they are in good health. After you have verified that
+    all the other servers of the ensemble are up, you can go ahead and clean the database
+    of the corrupt server. Delete all the files in datadir/version-2 and datalogdir/version-2/.
+    Restart the server.
+
+<a name="sc_configuration"></a>
+
+### Configuration Parameters
+
+ZooKeeper's behavior is governed by the ZooKeeper configuration
+file. This file is designed so that the exact same file can be used by
+all the servers that make up a ZooKeeper server assuming the disk
+layouts are the same. If servers use different configuration files, care
+must be taken to ensure that the list of servers in all of the different
+configuration files match.
+
+######Note
+>In 3.5.0 and later, some of these parameters should be placed in
+a dynamic configuration file. If they are placed in the static
+configuration file, ZooKeeper will automatically move them over to the
+dynamic configuration file. See [Dynamic Reconfiguration](zookeeperReconfig.html) for more information.
+
+<a name="sc_minimumConfiguration"></a>
+
+#### Minimum Configuration
+
+Here are the minimum configuration keywords that must be defined
+in the configuration file:
+
+* *clientPort* :
+    the port to listen for client connections; that is, the
+    port that clients attempt to connect to.
+
+* *secureClientPort* :
+    the port to listen on for secure client connections using SSL.
+    **clientPort** specifies
+    the port for plaintext connections while **secureClientPort** specifies the port for SSL
+    connections. Specifying both enables mixed-mode while omitting
+    either will disable that mode.
+    Note that SSL feature will be enabled when user plugs-in
+    zookeeper.serverCnxnFactory, zookeeper.clientCnxnSocket as Netty.
+
+* *dataDir* :
+    the location where ZooKeeper will store the in-memory
+    database snapshots and, unless specified otherwise, the
+    transaction log of updates to the database.
+    ######Note
+    >Be careful where you put the transaction log. A
+    dedicated transaction log device is key to consistent good
+    performance. Putting the log on a busy device will adversely
+    effect performance.
+
+* *tickTime* :
+    the length of a single tick, which is the basic time unit
+    used by ZooKeeper, as measured in milliseconds. It is used to
+    regulate heartbeats, and timeouts. For example, the minimum
+    session timeout will be two ticks.
+
+<a name="sc_advancedConfiguration"></a>
+
+#### Advanced Configuration
+
+The configuration settings in the section are optional. You can
+use them to further fine tune the behaviour of your ZooKeeper servers.
+Some can also be set using Java system properties, generally of the
+form _zookeeper.keyword_. The exact system
+property, when available, is noted below.
+
+* *dataLogDir* :
+    (No Java system property)
+    This option will direct the machine to write the
+    transaction log to the **dataLogDir** rather than the **dataDir**. This allows a dedicated log
+    device to be used, and helps avoid competition between logging
+    and snapshots.
+    ######Note
+    >Having a dedicated log device has a large impact on
+    throughput and stable latencies. It is highly recommended to
+    dedicate a log device and set **dataLogDir** to point to a directory on
+    that device, and then make sure to point **dataDir** to a directory
+    _not_ residing on that device.
+
+* *globalOutstandingLimit* :
+    (Java system property: **zookeeper.globalOutstandingLimit.**)
+    Clients can submit requests faster than ZooKeeper can
+    process them, especially if there are a lot of clients. To
+    prevent ZooKeeper from running out of memory due to queued
+    requests, ZooKeeper will throttle clients so that there is no
+    more than globalOutstandingLimit outstanding requests in the
+    system. The default limit is 1,000.
+
+* *preAllocSize* :
+    (Java system property: **zookeeper.preAllocSize**)
+    To avoid seeks ZooKeeper allocates space in the
+    transaction log file in blocks of preAllocSize kilobytes. The
+    default block size is 64M. One reason for changing the size of
+    the blocks is to reduce the block size if snapshots are taken
+    more often. (Also, see **snapCount**).
+
+* *snapCount* :
+    (Java system property: **zookeeper.snapCount**)
+    ZooKeeper records its transactions using snapshots and
+    a transaction log (think write-ahead log).The number of
+    transactions recorded in the transaction log before a snapshot
+    can be taken (and the transaction log rolled) is determined
+    by snapCount. In order to prevent all of the machines in the quorum
+    from taking a snapshot at the same time, each ZooKeeper server
+    will take a snapshot when the number of transactions in the transaction log
+    reaches a runtime generated random value in the \[snapCount/2+1, snapCount]
+    range.The default snapCount is 100,000.
+
+* *maxClientCnxns* :
+    (No Java system property)
+    Limits the number of concurrent connections (at the socket
+    level) that a single client, identified by IP address, may make
+    to a single member of the ZooKeeper ensemble. This is used to
+    prevent certain classes of DoS attacks, including file
+    descriptor exhaustion. The default is 60. Setting this to 0
+    entirely removes the limit on concurrent connections.
+
+* *clientPortAddress* :
+    **New in 3.3.0:** the
+    address (ipv4, ipv6 or hostname) to listen for client
+    connections; that is, the address that clients attempt
+    to connect to. This is optional, by default we bind in
+    such a way that any connection to the **clientPort** for any
+    address/interface/nic on the server will be
+    accepted.
+
+* *minSessionTimeout* :
+    (No Java system property)
+    **New in 3.3.0:** the
+    minimum session timeout in milliseconds that the server
+    will allow the client to negotiate. Defaults to 2 times
+    the **tickTime**.
+
+* *maxSessionTimeout* :
+    (No Java system property)
+    **New in 3.3.0:** the
+    maximum session timeout in milliseconds that the server
+    will allow the client to negotiate. Defaults to 20 times
+    the **tickTime**.
+
+* *fsync.warningthresholdms* :
+    (Java system property: **zookeeper.fsync.warningthresholdms**)
+    **New in 3.3.4:** A
+    warning message will be output to the log whenever an
+    fsync in the Transactional Log (WAL) takes longer than
+    this value. The values is specified in milliseconds and
+    defaults to 1000. This value can only be set as a
+    system property.
+
+* *autopurge.snapRetainCount* :
+    (No Java system property)
+    **New in 3.4.0:**
+    When enabled, ZooKeeper auto purge feature retains
+    the **autopurge.snapRetainCount** most
+    recent snapshots and the corresponding transaction logs in the
+    **dataDir** and **dataLogDir** respectively and deletes the rest.
+    Defaults to 3. Minimum value is 3.
+
+* *autopurge.purgeInterval* :
+    (No Java system property)
+    **New in 3.4.0:** The
+    time interval in hours for which the purge task has to
+    be triggered. Set to a positive integer (1 and above)
+    to enable the auto purging. Defaults to 0.
+
+* *syncEnabled* :
+    (Java system property: **zookeeper.observer.syncEnabled**)
+    **New in 3.4.6, 3.5.0:**
+    The observers now log transaction and write snapshot to disk
+    by default like the participants. This reduces the recovery time
+    of the observers on restart. Set to "false" to disable this
+    feature. Default is "true"
+
+<a name="sc_clusterOptions"></a>
+
+#### Cluster Options
+
+The options in this section are designed for use with an ensemble
+of servers -- that is, when deploying clusters of servers.
+
+* *electionAlg* :
+    (No Java system property)
+    Election implementation to use. A value of "1" corresponds to the
+    non-authenticated UDP-based version of fast leader election, "2"
+    corresponds to the authenticated UDP-based version of fast
+    leader election, and "3" corresponds to TCP-based version of
+    fast leader election. Currently, algorithm 3 is the default.
+    ######Note
+    >The implementations of leader election 1, and 2 are now
+    **deprecated**. We have the intention
+    of removing them in the next release, at which point only the
+    FastLeaderElection will be available.
+
+* *initLimit* :
+    (No Java system property)
+    Amount of time, in ticks (see [tickTime](#id_tickTime)), to allow followers to
+    connect and sync to a leader. Increased this value as needed, if
+    the amount of data managed by ZooKeeper is large.
+
+* *leaderServes* :
+    (Java system property: zookeeper.**leaderServes**)
+    Leader accepts client connections. Default value is "yes".
+    The leader machine coordinates updates. For higher update
+    throughput at the slight expense of read throughput the leader
+    can be configured to not accept clients and focus on
+    coordination. The default to this option is yes, which means
+    that a leader will accept client connections.
+    ######Note
+    >Turning on leader selection is highly recommended when
+    you have more than three ZooKeeper servers in an ensemble.
+
+* *server.x=[hostname]:nnnnn[:nnnnn], etc* :
+    (No Java system property)
+    servers making up the ZooKeeper ensemble. When the server
+    starts up, it determines which server it is by looking for the
+    file *myid* in the data directory. That file
+    contains the server number, in ASCII, and it should match
+    **x** in **server.x** in the left hand side of this
+    setting.
+    The list of servers that make up ZooKeeper servers that is
+    used by the clients must match the list of ZooKeeper servers
+    that each ZooKeeper server has.
+    There are two port numbers **nnnnn**.
+    The first followers use to connect to the leader, and the second is for
+    leader election. If you want to test multiple servers on a single machine, then
+    different ports can be used for each server.
+
+* *syncLimit* :
+    (No Java system property)
+    Amount of time, in ticks (see [tickTime](#id_tickTime)), to allow followers to sync
+    with ZooKeeper. If followers fall too far behind a leader, they
+    will be dropped.
+
+* *group.x=nnnnn[:nnnnn]* :
+    (No Java system property)
+    Enables a hierarchical quorum construction."x" is a group identifier
+    and the numbers following the "=" sign correspond to server identifiers.
+    The left-hand side of the assignment is a colon-separated list of server
+    identifiers. Note that groups must be disjoint and the union of all groups
+    must be the ZooKeeper ensemble.
+    You will find an example [here](zookeeperHierarchicalQuorums.html)
+
+* *weight.x=nnnnn* :
+    (No Java system property)
+    Used along with "group", it assigns a weight to a server when
+    forming quorums. Such a value corresponds to the weight of a server
+    when voting. There are a few parts of ZooKeeper that require voting
+    such as leader election and the atomic broadcast protocol. By default
+    the weight of server is 1. If the configuration defines groups, but not
+    weights, then a value of 1 will be assigned to all servers.
+    You will find an example [here](zookeeperHierarchicalQuorums.html)
+
+* *cnxTimeout* :
+    (Java system property: zookeeper.**cnxTimeout**)
+    Sets the timeout value for opening connections for leader election notifications.
+    Only applicable if you are using electionAlg 3.
+    ######Note
+    >Default value is 5 seconds.
+
+* *standaloneEnabled* :
+    (No Java system property)
+    **New in 3.5.0:**
+    When set to false, a single server can be started in replicated
+    mode, a lone participant can run with observers, and a cluster
+    can reconfigure down to one node, and up from one node. The
+    default is true for backwards compatibility. It can be set
+    using QuorumPeerConfig's setStandaloneEnabled method or by
+    adding "standaloneEnabled=false" or "standaloneEnabled=true"
+    to a server's config file.
+
+* *reconfigEnabled* :
+    (No Java system property)
+    **New in 3.5.3:**
+    This controls the enabling or disabling of
+    [Dynamic Reconfiguration](zookeeperReconfig.html) feature. When the feature
+    is enabled, users can perform reconfigure operations through
+    the ZooKeeper client API or through ZooKeeper command line tools
+    assuming users are authorized to perform such operations.
+    When the feature is disabled, no user, including the super user,
+    can perform a reconfiguration. Any attempt to reconfigure will return an error.
+    **"reconfigEnabled"** option can be set as
+    **"reconfigEnabled=false"** or
+    **"reconfigEnabled=true"**
+    to a server's config file, or using QuorumPeerConfig's
+    setReconfigEnabled method. The default value is false.
+    If present, the value should be consistent across every server in
+    the entire ensemble. Setting the value as true on some servers and false
+    on other servers will cause inconsistent behavior depending on which server
+    is elected as leader. If the leader has a setting of
+    **"reconfigEnabled=true"**, then the ensemble
+    will have reconfig feature enabled. If the leader has a setting of
+    **"reconfigEnabled=false"**, then the ensemble
+    will have reconfig feature disabled. It is thus recommended to have a consistent
+    value for **"reconfigEnabled"** across servers
+    in the ensemble.
+
+* *4lw.commands.whitelist* :
+    (Java system property: **zookeeper.4lw.commands.whitelist**)
+    **New in 3.5.3:**
+    A list of comma separated [Four Letter Words](#sc_4lw)
+    commands that user wants to use. A valid Four Letter Words
+    command must be put in this list else ZooKeeper server will
+    not enable the command.
+    By default the whitelist only contains "srvr" command
+    which zkServer.sh uses. The rest of four letter word commands are disabled
+    by default.
+    Here's an example of the configuration that enables stat, ruok, conf, and isro
+    command while disabling the rest of Four Letter Words command:
+
+        4lw.commands.whitelist=stat, ruok, conf, isro
+
+
+If you really need enable all four letter word commands by default, you can use
+the asterisk option so you don't have to include every command one by one in the list.
+As an example, this will enable all four letter word commands:
+
+
+    4lw.commands.whitelist=*
+
+
+* *tcpKeepAlive* :
+    (Java system property: **zookeeper.tcpKeepAlive**)
+    **New in 3.5.4:**
+    Setting this to true sets the TCP keepAlive flag on the
+    sockets used by quorum members to perform elections.
+    This will allow for connections between quorum members to
+    remain up when there is network infrastructure that may
+    otherwise break them. Some NATs and firewalls may terminate
+    or lose state for long running or idle connections.
+    Enabling this option relies on OS level settings to work
+    properly, check your operating system's options regarding TCP
+    keepalive for more information.  Defaults to
+    **false**.
+
+<a name="sc_authOptions"></a>
+
+#### Encryption, Authentication, Authorization Options
+
+The options in this section allow control over
+encryption/authentication/authorization performed by the service.
+
+* *DigestAuthenticationProvider.superDigest* :
+    (Java system property: **zookeeper.DigestAuthenticationProvider.superDigest**)
+    By default this feature is **disabled**
+    **New in 3.2:**
+    Enables a ZooKeeper ensemble administrator to access the
+    znode hierarchy as a "super" user. In particular no ACL
+    checking occurs for a user authenticated as
+    super.
+    org.apache.zookeeper.server.auth.DigestAuthenticationProvider
+    can be used to generate the superDigest, call it with
+    one parameter of "super:<password>". Provide the
+    generated "super:<data>" as the system property value
+    when starting each server of the ensemble.
+    When authenticating to a ZooKeeper server (from a
+    ZooKeeper client) pass a scheme of "digest" and authdata
+    of "super:<password>". Note that digest auth passes
+    the authdata in plaintext to the server, it would be
+    prudent to use this authentication method only on
+    localhost (not over the network) or over an encrypted
+    connection.
+
+* *X509AuthenticationProvider.superUser* :
+    (Java system property: **zookeeper.X509AuthenticationProvider.superUser**)
+    The SSL-backed way to enable a ZooKeeper ensemble
+    administrator to access the znode hierarchy as a "super" user.
+    When this parameter is set to an X500 principal name, only an
+    authenticated client with that principal will be able to bypass
+    ACL checking and have full privileges to all znodes.
+
+* *zookeeper.superUser* :
+    (Java system property: **zookeeper.superUser**)
+    Similar to **zookeeper.X509AuthenticationProvider.superUser**
+    but is generic for SASL based logins. It stores the name of
+    a user that can access the znode hierarchy as a "super" user.
+
+* *ssl.keyStore.location and ssl.keyStore.password* :
+    (Java system properties: **zookeeper.ssl.keyStore.location** and **zookeeper.ssl.keyStore.password**)
+    Specifies the file path to a JKS containing the local
+    credentials to be used for SSL connections, and the
+    password to unlock the file.
+
+* *ssl.trustStore.location and ssl.trustStore.password* :
+    (Java system properties: **zookeeper.ssl.trustStore.location** and **zookeeper.ssl.trustStore.password**)
+    Specifies the file path to a JKS containing the remote
+    credentials to be used for SSL connections, and the
+    password to unlock the file.
+
+* *ssl.authProvider* :
+    (Java system property: **zookeeper.ssl.authProvider**)
+    Specifies a subclass of **org.apache.zookeeper.auth.X509AuthenticationProvider**
+    to use for secure client authentication. This is useful in
+    certificate key infrastructures that do not use JKS. It may be
+    necessary to extend **javax.net.ssl.X509KeyManager** and **javax.net.ssl.X509TrustManager**
+    to get the desired behavior from the SSL stack. To configure the
+    ZooKeeper server to use the custom provider for authentication,
+    choose a scheme name for the custom AuthenticationProvider and
+    set the property **zookeeper.authProvider.[scheme]** to the fully-qualified class name of the custom
+    implementation. This will load the provider into the ProviderRegistry.
+    Then set this property **zookeeper.ssl.authProvider=[scheme]** and that provider
+    will be used for secure authentication.
+
+<a name="Experimental+Options%2FFeatures"></a>
+
+#### Experimental Options/Features
+
+New features that are currently considered experimental.
+
+* *Read Only Mode Server* :
+    (Java system property: **readonlymode.enabled**)
+    **New in 3.4.0:**
+    Setting this value to true enables Read Only Mode server
+    support (disabled by default). ROM allows clients
+    sessions which requested ROM support to connect to the
+    server even when the server might be partitioned from
+    the quorum. In this mode ROM clients can still read
+    values from the ZK service, but will be unable to write
+    values and see changes from other clients. See
+    ZOOKEEPER-784 for more details.
+
+<a name="Unsafe+Options"></a>
+
+#### Unsafe Options
+
+The following options can be useful, but be careful when you use
+them. The risk of each is explained along with the explanation of what
+the variable does.
+
+* *forceSync* :
+    (Java system property: **zookeeper.forceSync**)
+    Requires updates to be synced to media of the transaction
+    log before finishing processing the update. If this option is
+    set to no, ZooKeeper will not require updates to be synced to
+    the media.
+
+* *jute.maxbuffer:* :
+    (Java system property:**jute.maxbuffer**)
+    This option can only be set as a Java system property.
+    There is no zookeeper prefix on it. It specifies the maximum
+    size of the data that can be stored in a znode. The default is
+    0xfffff, or just under 1M. If this option is changed, the system
+    property must be set on all servers and clients otherwise
+    problems will arise. This is really a sanity check. ZooKeeper is
+    designed to store data on the order of kilobytes in size.
+
+* *skipACL* :
+    (Java system property: **zookeeper.skipACL**)
+    Skips ACL checks. This results in a boost in throughput,
+    but opens up full access to the data tree to everyone.
+
+* *quorumListenOnAllIPs* :
+    When set to true the ZooKeeper server will listen
+    for connections from its peers on all available IP addresses,
+    and not only the address configured in the server list of the
+    configuration file. It affects the connections handling the
+    ZAB protocol and the Fast Leader Election protocol. Default
+    value is **false**.
+
+<a name="Disabling+data+directory+autocreation"></a>
+
+#### Disabling data directory autocreation
+
+**New in 3.5:** The default
+behavior of a ZooKeeper server is to automatically create the
+data directory (specified in the configuration file) when
+started if that directory does not already exist. This can be
+inconvenient and even dangerous in some cases. Take the case
+where a configuration change is made to a running server,
+wherein the **dataDir** parameter
+is accidentally changed. When the ZooKeeper server is
+restarted it will create this non-existent directory and begin
+serving - with an empty znode namespace. This scenario can
+result in an effective "split brain" situation (i.e. data in
+both the new invalid directory and the original valid data
+store). As such is would be good to have an option to turn off
+this autocreate behavior. In general for production
+environments this should be done, unfortunately however the
+default legacy behavior cannot be changed at this point and
+therefore this must be done on a case by case basis. This is
+left to users and to packagers of ZooKeeper distributions.
+
+When running **zkServer.sh** autocreate can be disabled
+by setting the environment variable **ZOO_DATADIR_AUTOCREATE_DISABLE** to 1.
+When running ZooKeeper servers directly from class files this
+can be accomplished by setting **zookeeper.datadir.autocreate=false** on
+the java command line, i.e. **-Dzookeeper.datadir.autocreate=false**
+
+When this feature is disabled, and the ZooKeeper server
+determines that the required directories do not exist it will
+generate an error and refuse to start.
+
+A new script **zkServer-initialize.sh** is provided to
+support this new feature. If autocreate is disabled it is
+necessary for the user to first install ZooKeeper, then create
+the data directory (and potentially txnlog directory), and
+then start the server. Otherwise as mentioned in the previous
+paragraph the server will not start. Running **zkServer-initialize.sh** will create the
+required directories, and optionally setup the myid file
+(optional command line parameter). This script can be used
+even if the autocreate feature itself is not used, and will
+likely be of use to users as this (setup, including creation
+of the myid file) has been an issue for users in the past.
+Note that this script ensures the data directories exist only,
+it does not create a config file, but rather requires a config
+file to be available in order to execute.
+
+<a name="sc_db_existence_validation"></a>
+
+#### Enabling db existence validation
+
+**New in 3.6.0:** The default
+behavior of a ZooKeeper server on startup when no data tree
+is found is to set zxid to zero and join the quorum as a
+voting member. This can be dangerous if some event (e.g. a
+rogue 'rm -rf') has removed the data directory while the
+server was down since this server may help elect a leader
+that is missing transactions. Enabling db existence validation
+will change the behavior on startup when no data tree is
+found: the server joins the ensemble as a non-voting participant
+until it is able to sync with the leader and acquire an up-to-date
+version of the ensemble data. To indicate an empty data tree is
+expected (ensemble creation), the user should place a file
+'initialize' in the same directory as 'myid'. This file will
+be detected and deleted by the server on startup.
+
+Initialization validation can be enabled when running
+ZooKeeper servers directly from class files by setting
+**zookeeper.db.autocreate=false**
+on the java command line, i.e.
+**-Dzookeeper.db.autocreate=false**.
+Running **zkServer-initialize.sh**
+will create the required initialization file.
+
+<a name="sc_performance_options"></a>
+
+#### Performance Tuning Options
+
+**New in 3.5.0:** Several subsystems have been reworked
+to improve read throughput. This includes multi-threading of the NIO communication subsystem and
+request processing pipeline (Commit Processor). NIO is the default client/server communication
+subsystem. Its threading model comprises 1 acceptor thread, 1-N selector threads and 0-M
+socket I/O worker threads. In the request processing pipeline the system can be configured
+to process multiple read request at once while maintaining the same consistency guarantee
+(same-session read-after-write). The Commit Processor threading model comprises 1 main
+thread and 0-N worker threads.
+
+The default values are aimed at maximizing read throughput on a dedicated ZooKeeper machine.
+Both subsystems need to have sufficient amount of threads to achieve peak read throughput.
+
+* *zookeeper.nio.numSelectorThreads* :
+    (Java system property only: **zookeeper.nio.numSelectorThreads**)
+    **New in 3.5.0:**
+    Number of NIO selector threads. At least 1 selector thread required.
+    It is recommended to use more than one selector for large numbers
+    of client connections. The default value is sqrt( number of cpu cores / 2 ).
+
+* *zookeeper.nio.numWorkerThreads* :
+    (Java system property only: **zookeeper.nio.numWorkerThreads**)
+    **New in 3.5.0:**
+    Number of NIO worker threads. If configured with 0 worker threads, the selector threads
+    do the socket I/O directly. The default value is 2 times the number of cpu cores.
+
+* *zookeeper.commitProcessor.numWorkerThreads* :
+    (Java system property only: **zookeeper.commitProcessor.numWorkerThreads**)
+    **New in 3.5.0:**
+    Number of Commit Processor worker threads. If configured with 0 worker threads, the main thread
+    will process the request directly. The default value is the number of cpu cores.
+
+* *znode.container.checkIntervalMs* :
+    (Java system property only)
+    **New in 3.6.0:** The
+    time interval in milliseconds for each check of candidate container
+    and ttl nodes. Default is "60000".
+
+* *znode.container.maxPerMinute* :
+    (Java system property only)
+    **New in 3.6.0:** The
+    maximum number of container and ttl nodes that can be deleted per
+    minute. This prevents herding during container deletion.
+    Default is "10000".
+
+<a name="Communication+using+the+Netty+framework"></a>
+
+#### Communication using the Netty framework
+
+[Netty](http://netty.io)
+is an NIO based client/server communication framework, it
+simplifies (over NIO being used directly) many of the
+complexities of network level communication for java
+applications. Additionally the Netty framework has built
+in support for encryption (SSL) and authentication
+(certificates). These are optional features and can be
+turned on or off individually.
+
+In versions 3.5+, a ZooKeeper server can use Netty
+instead of NIO (default option) by setting the environment
+variable **zookeeper.serverCnxnFactory**
+to **org.apache.zookeeper.server.NettyServerCnxnFactory**;
+for the client, set **zookeeper.clientCnxnSocket**
+to **org.apache.zookeeper.ClientCnxnSocketNetty**.
+
+TBD - tuning options for netty - currently there are none that are netty specific but we should add some. Esp around max bound on the number of reader worker threads netty creates.
+
+TBD - how to manage encryption
+
+TBD - how to manage certificates
+
+<a name="sc_adminserver_config"></a>
+
+#### AdminServer configuration
+
+**New in 3.5.0:** The following
+options are used to configure the [AdminServer](#sc_adminserver).
+
+* *admin.enableServer* :
+    (Java system property: **zookeeper.admin.enableServer**)
+    Set to "false" to disable the AdminServer.  By default the
+    AdminServer is enabled.
+
+* *admin.serverAddress* :
+    (Java system property: **zookeeper.admin.serverAddress**)
+    The address the embedded Jetty server listens on. Defaults to 0.0.0.0.
+
+* *admin.serverPort* :
+    (Java system property: **zookeeper.admin.serverPort**)
+    The port the embedded Jetty server listens on.  Defaults to 8080.
+
+* *admin.idleTimeout* :
+    (Java system property: **zookeeper.admin.idleTimeout**)
+    Set the maximum idle time in milliseconds that a connection can wait
+    before sending or receiving data. Defaults to 30000 ms.
+
+* *admin.commandURL* :
+    (Java system property: **zookeeper.admin.commandURL**)
+    The URL for listing and issuing commands relative to the
+    root URL.  Defaults to "/commands".
+
+<a name="sc_zkCommands"></a>
+
+### ZooKeeper Commands
+
+<a name="sc_4lw"></a>
+
+#### The Four Letter Words
+
+ZooKeeper responds to a small set of commands. Each command is
+composed of four letters. You issue the commands to ZooKeeper via telnet
+or nc, at the client port.
+
+Three of the more interesting commands: "stat" gives some
+general information about the server and connected clients,
+while "srvr" and "cons" give extended details on server and
+connections respectively.
+
+**New in 3.5.3:**
+Four Letter Words need to be explicitly white listed before using.
+Please refer **4lw.commands.whitelist**
+described in [cluster configuration section](#sc_clusterOptions) for details.
+Moving forward, Four Letter Words will be deprecated, please use
+[AdminServer](#sc_adminserver) instead.
+
+* *conf* :
+    **New in 3.3.0:** Print
+    details about serving configuration.
+
+* *cons* :
+    **New in 3.3.0:** List
+    full connection/session details for all clients connected
+    to this server. Includes information on numbers of packets
+    received/sent, session id, operation latencies, last
+    operation performed, etc...
+
+* *crst* :
+    **New in 3.3.0:** Reset
+    connection/session statistics for all connections.
+
+* *dump* :
+    Lists the outstanding sessions and ephemeral nodes. This
+    only works on the leader.
+
+* *envi* :
+    Print details about serving environment
+
+* *ruok* :
+    Tests if server is running in a non-error state. The server
+    will respond with imok if it is running. Otherwise it will not
+    respond at all.
+    A response of "imok" does not necessarily indicate that the
+    server has joined the quorum, just that the server process is active
+    and bound to the specified client port. Use "stat" for details on
+    state wrt quorum and client connection information.
+
+* *srst* :
+    Reset server statistics.
+
+* *srvr* :
+    **New in 3.3.0:** Lists
+    full details for the server.
+
+* *stat* :
+    Lists brief details for the server and connected
+    clients.
+
+* *wchs* :
+    **New in 3.3.0:** Lists
+    brief information on watches for the server.
+
+* *wchc* :
+    **New in 3.3.0:** Lists
+    detailed information on watches for the server, by
+    session.  This outputs a list of sessions(connections)
+    with associated watches (paths). Note, depending on the
+    number of watches this operation may be expensive (ie
+    impact server performance), use it carefully.
+
+* *dirs* :
+    **New in 3.5.1:**
+    Shows the total size of snapshot and log files in bytes
+
+* *wchp* :
+    **New in 3.3.0:** Lists
+    detailed information on watches for the server, by path.
+    This outputs a list of paths (znodes) with associated
+    sessions. Note, depending on the number of watches this
+    operation may be expensive (ie impact server performance),
+    use it carefully.
+
+* *mntr* :
+    **New in 3.4.0:** Outputs a list
+    of variables that could be used for monitoring the health of the cluster.
+
+
+    $ echo mntr | nc localhost 2185
+                  zk_version  3.4.0
+                  zk_avg_latency  0
+                  zk_max_latency  0
+                  zk_min_latency  0
+                  zk_packets_received 70
+                  zk_packets_sent 69
+                  zk_outstanding_requests 0
+                  zk_server_state leader
+                  zk_znode_count   4
+                  zk_watch_count  0
+                  zk_ephemerals_count 0
+                  zk_approximate_data_size    27
+                  zk_followers    4                   - only exposed by the Leader
+                  zk_synced_followers 4               - only exposed by the Leader
+                  zk_pending_syncs    0               - only exposed by the Leader
+                  zk_open_file_descriptor_count 23    - only available on Unix platforms
+                  zk_max_file_descriptor_count 1024   - only available on Unix platforms
+
+
+The output is compatible with java properties format and the content
+may change over time (new keys added). Your scripts should expect changes.
+ATTENTION: Some of the keys are platform specific and some of the keys are only exported by the Leader.
+The output contains multiple lines with the following format:
+
+
+    key \t value
+
+
+* *isro* :
+    **New in 3.4.0:** Tests if
+    server is running in read-only mode.  The server will respond with
+    "ro" if in read-only mode or "rw" if not in read-only mode.
+
+* *gtmk* :
+    Gets the current trace mask as a 64-bit signed long value in
+    decimal format.  See `stmk` for an explanation of
+    the possible values.
+
+* *stmk* :
+    Sets the current trace mask.  The trace mask is 64 bits,
+    where each bit enables or disables a specific category of trace
+    logging on the server.  Log4J must be configured to enable
+    `TRACE` level first in order to see trace logging
+    messages.  The bits of the trace mask correspond to the following
+    trace logging categories.
+    
+    | Trace Mask Bit Values |                     |
+    |-----------------------|---------------------|
+    | 0b0000000000 | Unused, reserved for future use. |
+    | 0b0000000010 | Logs client requests, excluding ping requests. |
+    | 0b0000000100 | Unused, reserved for future use. |
+    | 0b0000001000 | Logs client ping requests. |
+    | 0b0000010000 | Logs packets received from the quorum peer that is the current leader, excluding ping requests. |
+    | 0b0000100000 | Logs addition, removal and validation of client sessions. |
+    | 0b0001000000 | Logs delivery of watch events to client sessions. |
+    | 0b0010000000 | Logs ping packets received from the quorum peer that is the current leader. |
+    | 0b0100000000 | Unused, reserved for future use. |
+    | 0b1000000000 | Unused, reserved for future use. |
+
+    All remaining bits in the 64-bit value are unused and
+    reserved for future use.  Multiple trace logging categories are
+    specified by calculating the bitwise OR of the documented values.
+    The default trace mask is 0b0100110010.  Thus, by default, trace
+    logging includes client requests, packets received from the
+    leader and sessions.
+    To set a different trace mask, send a request containing the
+    `stmk` four-letter word followed by the trace
+    mask represented as a 64-bit signed long value.  This example uses
+    the Perl `pack` function to construct a trace
+    mask that enables all trace logging categories described above and
+    convert it to a 64-bit signed long value with big-endian byte
+    order.  The result is appended to `stmk` and sent
+    to the server using netcat.  The server responds with the new
+    trace mask in decimal format.
+
+
+    $ perl -e "print 'stmk', pack('q>', 0b0011111010)" | nc localhost 2181
+    250
+
+
+Here's an example of the **ruok**
+command:
+
+
+    $ echo ruok | nc 127.0.0.1 5111
+        imok
+
+
+<a name="sc_adminserver"></a>
+
+#### The AdminServer
+
+**New in 3.5.0:** The AdminServer is
+an embedded Jetty server that provides an HTTP interface to the four
+letter word commands.  By default, the server is started on port 8080,
+and commands are issued by going to the URL "/commands/\[command name]",
+e.g., http://localhost:8080/commands/stat.  The command response is
+returned as JSON.  Unlike the original protocol, commands are not
+restricted to four-letter names, and commands can have multiple names;
+for instance, "stmk" can also be referred to as "set_trace_mask".  To
+view a list of all available commands, point a browser to the URL
+/commands (e.g., http://localhost:8080/commands).  See the [AdminServer configuration options](#sc_adminserver_config)
+for how to change the port and URLs.
+
+The AdminServer is enabled by default, but can be disabled by either:
+
+* Setting the zookeeper.admin.enableServer system
+  property to false.
+* Removing Jetty from the classpath.  (This option is
+  useful if you would like to override ZooKeeper's jetty
+  dependency.)
+
+Note that the TCP four letter word interface is still available if
+the AdminServer is disabled.
+
+<a name="sc_dataFileManagement"></a>
+
+### Data File Management
+
+ZooKeeper stores its data in a data directory and its transaction
+log in a transaction log directory. By default these two directories are
+the same. The server can (and should) be configured to store the
+transaction log files in a separate directory than the data files.
+Throughput increases and latency decreases when transaction logs reside
+on a dedicated log devices.
+
+<a name="The+Data+Directory"></a>
+
+#### The Data Directory
+
+This directory has two or three files in it:
+
+* *myid* - contains a single integer in
+  human readable ASCII text that represents the server id.
+* *initialize* - presence indicates lack of
+  data tree is expected. Cleaned up once data tree is created.
+* *snapshot.<zxid>* - holds the fuzzy
+  snapshot of a data tree.
+
+Each ZooKeeper server has a unique id. This id is used in two
+places: the *myid* file and the configuration file.
+The *myid* file identifies the server that
+corresponds to the given data directory. The configuration file lists
+the contact information for each server identified by its server id.
+When a ZooKeeper server instance starts, it reads its id from the
+*myid* file and then, using that id, reads from the
+configuration file, looking up the port on which it should
+listen.
+
+The *snapshot* files stored in the data
+directory are fuzzy snapshots in the sense that during the time the
+ZooKeeper server is taking the snapshot, updates are occurring to the
+data tree. The suffix of the *snapshot* file names
+is the _zxid_, the ZooKeeper transaction id, of the
+last committed transaction at the start of the snapshot. Thus, the
+snapshot includes a subset of the updates to the data tree that
+occurred while the snapshot was in process. The snapshot, then, may
+not correspond to any data tree that actually existed, and for this
+reason we refer to it as a fuzzy snapshot. Still, ZooKeeper can
+recover using this snapshot because it takes advantage of the
+idempotent nature of its updates. By replaying the transaction log
+against fuzzy snapshots ZooKeeper gets the state of the system at the
+end of the log.
+
+<a name="The+Log+Directory"></a>
+
+#### The Log Directory
+
+The Log Directory contains the ZooKeeper transaction logs.
+Before any update takes place, ZooKeeper ensures that the transaction
+that represents the update is written to non-volatile storage. A new
+log file is started when the number of transactions written to the
+current log file reaches a (variable) threshold. The threshold is
+computed using the same parameter which influences the frequency of
+snapshotting (see snapCount above). The log file's suffix is the first
+zxid written to that log.
+
+<a name="sc_filemanagement"></a>
+
+#### File Management
+
+The format of snapshot and log files does not change between
+standalone ZooKeeper servers and different configurations of
+replicated ZooKeeper servers. Therefore, you can pull these files from
+a running replicated ZooKeeper server to a development machine with a
+stand-alone ZooKeeper server for trouble shooting.
+
+Using older log and snapshot files, you can look at the previous
+state of ZooKeeper servers and even restore that state. The
+LogFormatter class allows an administrator to look at the transactions
+in a log.
+
+The ZooKeeper server creates snapshot and log files, but
+never deletes them. The retention policy of the data and log
+files is implemented outside of the ZooKeeper server. The
+server itself only needs the latest complete fuzzy snapshot, all log
+files following it, and the last log file preceding it.  The latter
+requirement is necessary to include updates which happened after this
+snapshot was started but went into the existing log file at that time.
+This is possible because snapshotting and rolling over of logs
+proceed somewhat independently in ZooKeeper. See the
+[maintenance](#sc_maintenance) section in
+this document for more details on setting a retention policy
+and maintenance of ZooKeeper storage.
+
+######Note
+>The data stored in these files is not encrypted. In the case of
+storing sensitive data in ZooKeeper, necessary measures need to be
+taken to prevent unauthorized access. Such measures are external to
+ZooKeeper (e.g., control access to the files) and depend on the
+individual settings in which it is being deployed.
+
+<a name="Recovery+-+TxnLogToolkit"></a>
+
+####Recovery - TxnLogToolkit
+
+TxnLogToolkit is a command line tool shipped with ZooKeeper which
+is capable of recovering transaction log entries with broken CRC.
+
+Running it without any command line parameters or with the `-h,--help` argument, it outputs the following help page:
+
+    $ bin/zkTxnLogToolkit.sh
+    usage: TxnLogToolkit [-dhrv] txn_log_file_name
+    -d,--dump      Dump mode. Dump all entries of the log file. (this is the default)
+    -h,--help      Print help message
+    -r,--recover   Recovery mode. Re-calculate CRC for broken entries.
+    -v,--verbose   Be verbose in recovery mode: print all entries, not just fixed ones.
+    -y,--yes       Non-interactive mode: repair all CRC errors without asking
+    
+The default behaviour is safe: it dumps the entries of the given
+transaction log file to the screen: (same as using `-d,--dump` parameter)
+
+    $ bin/zkTxnLogToolkit.sh log.100000001
+    ZooKeeper Transactional Log File with dbid 0 txnlog format version 2
+    4/5/18 2:15:58 PM CEST session 0x16295bafcc40000 cxid 0x0 zxid 0x100000001 createSession 30000
+    CRC ERROR - 4/5/18 2:16:05 PM CEST session 0x16295bafcc40000 cxid 0x1 zxid 0x100000002 closeSession null
+    4/5/18 2:16:05 PM CEST session 0x16295bafcc40000 cxid 0x1 zxid 0x100000002 closeSession null
+    4/5/18 2:16:12 PM CEST session 0x26295bafcc90000 cxid 0x0 zxid 0x100000003 createSession 30000
+    4/5/18 2:17:34 PM CEST session 0x26295bafcc90000 cxid 0x0 zxid 0x200000001 closeSession null
+    4/5/18 2:17:34 PM CEST session 0x16295bd23720000 cxid 0x0 zxid 0x200000002 createSession 30000
+    4/5/18 2:18:02 PM CEST session 0x16295bd23720000 cxid 0x2 zxid 0x200000003 create '/andor,#626262,v{s{31,s{'world,'anyone}}},F,1
+    EOF reached after 6 txns.
+
+There's a CRC error in the 2nd entry of the above transaction log file. In **dump**
+mode, the toolkit only prints this information to the screen without touching the original file. In
+**recovery** mode (`-r,--recover` flag) the original file still remains
+untouched and all transactions will be copied over to a new txn log file with ".fixed" suffix. It recalculates
+CRC values and copies the calculated value, if it doesn't match the original txn entry.
+By default, the tool works interactively: it asks for confirmation whenever CRC error encountered.
+
+    $ bin/zkTxnLogToolkit.sh -r log.100000001
+    ZooKeeper Transactional Log File with dbid 0 txnlog format version 2
+    CRC ERROR - 4/5/18 2:16:05 PM CEST session 0x16295bafcc40000 cxid 0x1 zxid 0x100000002 closeSession null
+    Would you like to fix it (Yes/No/Abort) ?
+
+Answering **Yes** means the newly calculated CRC value will be outputted
+to the new file. **No** means that the original CRC value will be copied over.
+**Abort** will abort the entire operation and exits.
+(In this case the ".fixed" will not be deleted and left in a half-complete state: contains only entries which
+have already been processed or only the header if the operation was aborted at the first entry.)
+
+    $ bin/zkTxnLogToolkit.sh -r log.100000001
+    ZooKeeper Transactional Log File with dbid 0 txnlog format version 2
+    CRC ERROR - 4/5/18 2:16:05 PM CEST session 0x16295bafcc40000 cxid 0x1 zxid 0x100000002 closeSession null
+    Would you like to fix it (Yes/No/Abort) ? y
+    EOF reached after 6 txns.
+    Recovery file log.100000001.fixed has been written with 1 fixed CRC error(s)
+
+The default behaviour of recovery is to be silent: only entries with CRC error get printed to the screen.
+One can turn on verbose mode with the `-v,--verbose` parameter to see all records.
+Interactive mode can be turned off with the `-y,--yes` parameter. In this case all CRC errors will be fixed
+in the new transaction file.
+
+<a name="sc_commonProblems"></a>
+
+### Things to Avoid
+
+Here are some common problems you can avoid by configuring
+ZooKeeper correctly:
+
+* *inconsistent lists of servers* :
+    The list of ZooKeeper servers used by the clients must match
+    the list of ZooKeeper servers that each ZooKeeper server has.
+    Things work okay if the client list is a subset of the real list,
+    but things will really act strange if clients have a list of
+    ZooKeeper servers that are in different ZooKeeper clusters. Also,
+    the server lists in each Zookeeper server configuration file
+    should be consistent with one another.
+
+* *incorrect placement of transaction log* :
+    The most performance critical part of ZooKeeper is the
+    transaction log. ZooKeeper syncs transactions to media before it
+    returns a response. A dedicated transaction log device is key to
+    consistent good performance. Putting the log on a busy device will
+    adversely effect performance. If you only have one storage device,
+    put trace files on NFS and increase the snapshotCount; it doesn't
+    eliminate the problem, but it should mitigate it.
+
+* *incorrect Java heap size* :
+    You should take special care to set your Java max heap size
+    correctly. In particular, you should not create a situation in
+    which ZooKeeper swaps to disk. The disk is death to ZooKeeper.
+    Everything is ordered, so if processing one request swaps the
+    disk, all other queued requests will probably do the same. the
+    disk. DON'T SWAP.
+    Be conservative in your estimates: if you have 4G of RAM, do
+    not set the Java max heap size to 6G or even 4G. For example, it
+    is more likely you would use a 3G heap for a 4G machine, as the
+    operating system and the cache also need memory. The best and only
+    recommend practice for estimating the heap size your system needs
+    is to run load tests, and then make sure you are well below the
+    usage limit that would cause the system to swap.
+
+* *Publicly accessible deployment* :
+    A ZooKeeper ensemble is expected to operate in a trusted computing environment.
+    It is thus recommended to deploy ZooKeeper behind a firewall.
+
+<a name="sc_bestPractices"></a>
+
+### Best Practices
+
+For best results, take note of the following list of good
+Zookeeper practices:
+
+For multi-tenant installations see the [section](zookeeperProgrammers.html#ch_zkSessions)
+detailing ZooKeeper "chroot" support, this can be very useful
+when deploying many applications/services interfacing to a
+single ZooKeeper cluster.
+
+

+ 47 - 0
zookeeper-docs/src/main/resources/markdown/zookeeperHierarchicalQuorums.md

@@ -0,0 +1,47 @@
+<!--
+Copyright 2002-2004 The Apache Software Foundation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+//-->
+
+# Introduction to hierarchical quorums
+
+This document gives an example of how to use hierarchical quorums. The basic idea is
+very simple. First, we split servers into groups, and add a line for each group listing
+the servers that form this group. Next we have to assign a weight to each server.
+
+The following example shows how to configure a system with three groups of three servers
+each, and we assign a weight of 1 to each server:
+
+
+    group.1=1:2:3
+    group.2=4:5:6
+    group.3=7:8:9
+
+    weight.1=1
+    weight.2=1
+    weight.3=1
+    weight.4=1
+    weight.5=1
+    weight.6=1
+    weight.7=1
+    weight.8=1
+    weight.9=1
+
+
+When running the system, we are able to form a quorum once we have a majority of votes from
+a majority of non-zero-weight groups. Groups that have zero weight are discarded and not
+considered when forming quorums. Looking at the example, we are able to form a quorum once
+we have votes from at least two servers from each of two different groups.
+
+

+ 370 - 0
zookeeper-docs/src/main/resources/markdown/zookeeperInternals.md

@@ -0,0 +1,370 @@
+<!--
+Copyright 2002-2004 The Apache Software Foundation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+//-->
+
+# ZooKeeper Internals
+
+* [Introduction](#ch_Introduction)
+* [Atomic Broadcast](#sc_atomicBroadcast)
+    * [Guarantees, Properties, and Definitions](#sc_guaranteesPropertiesDefinitions)
+    * [Leader Activation](#sc_leaderElection)
+    * [Active Messaging](#sc_activeMessaging)
+    * [Summary](#sc_summary)
+    * [Comparisons](#sc_comparisons)
+* [Quorums](#sc_quorum)
+* [Logging](#sc_logging)
+    * [Developer Guidelines](#sc_developerGuidelines)
+        * [Logging at the Right Level](#sc_rightLevel)
+        * [Use of Standard slf4j Idioms](#sc_slf4jIdioms)
+
+<a name="ch_Introduction"></a>
+
+## Introduction
+
+This document contains information on the inner workings of ZooKeeper.
+So far, it discusses these topics:
+
+* [Atomic Broadcast](#sc_atomicBroadcast)
+* [Logging](#sc_logging)
+
+<a name="sc_atomicBroadcast"></a>
+
+## Atomic Broadcast
+
+At the heart of ZooKeeper is an atomic messaging system that keeps all of the servers in sync.
+
+<a name="sc_guaranteesPropertiesDefinitions"></a>
+
+### Guarantees, Properties, and Definitions
+
+The specific guarantees provided by the messaging system used by ZooKeeper are the following:
+
+* *_Reliable delivery_* :
+    If a message, m, is delivered
+    by one server, it will be eventually delivered by all servers.
+
+* *_Total order_* :
+    If a message is
+    delivered before message b by one server, a will be delivered before b by all
+    servers. If a and b are delivered messages, either a will be delivered before b
+    or b will be delivered before a.
+
+* *_Causal order_* :
+    If a message b is sent after a message a has been delivered by the sender of b,
+    a must be ordered before b. If a sender sends c after sending b, c must be ordered after b.
+
+The ZooKeeper messaging system also needs to be efficient, reliable, and easy to
+implement and maintain. We make heavy use of messaging, so we need the system to
+be able to handle thousands of requests per second. Although we can require at
+least k+1 correct servers to send new messages, we must be able to recover from
+correlated failures such as power outages. When we implemented the system we had
+little time and few engineering resources, so we needed a protocol that is
+accessible to engineers and is easy to implement. We found that our protocol
+satisfied all of these goals.
+
+Our protocol assumes that we can construct point-to-point FIFO channels between
+the servers. While similar services usually assume message delivery that can
+lose or reorder messages, our assumption of FIFO channels is very practical
+given that we use TCP for communication. Specifically we rely on the following property of TCP:
+
+* *_Ordered delivery_* :
+    Data is delivered in the same order it is sent and a message m is
+    delivered only after all messages sent before m have been delivered.
+    (The corollary to this is that if message m is lost all messages after m will be lost.)
+
+* *_No message after close_* :
+    Once a FIFO channel is closed, no messages will be received from it.
+
+FLP proved that consensus cannot be achieved in asynchronous distributed systems
+if failures are possible. To ensure we achieve consensus in the presence of failures
+we use timeouts. However, we rely on times for liveness not for correctness. So,
+if timeouts stop working (clocks malfunction for example) the messaging system may
+hang, but it will not violate its guarantees.
+
+When describing the ZooKeeper messaging protocol we will talk of packets,
+proposals, and messages:
+
+* *_Packet_* :
+    a sequence of bytes sent through a FIFO channel
+
+* *_Proposal_* :
+    a unit of agreement. Proposals are agreed upon by exchanging packets
+    with a quorum of ZooKeeper servers. Most proposals contain messages, however the
+    NEW_LEADER proposal is an example of a proposal that does not correspond to a message.
+
+* *_Message_* :
+    a sequence of bytes to be atomically broadcast to all ZooKeeper
+    servers. A message put into a proposal and agreed upon before it is delivered.
+
+As stated above, ZooKeeper guarantees a total order of messages, and it also
+guarantees a total order of proposals. ZooKeeper exposes the total ordering using
+a ZooKeeper transaction id (_zxid_). All proposals will be stamped with a zxid when
+it is proposed and exactly reflects the total ordering. Proposals are sent to all
+ZooKeeper servers and committed when a quorum of them acknowledge the proposal.
+If a proposal contains a message, the message will be delivered when the proposal
+is committed. Acknowledgement means the server has recorded the proposal to persistent storage.
+Our quorums have the requirement that any pair of quorum must have at least one server
+in common. We ensure this by requiring that all quorums have size (_n/2+1_) where
+n is the number of servers that make up a ZooKeeper service.
+
+The zxid has two parts: the epoch and a counter. In our implementation the zxid
+is a 64-bit number. We use the high order 32-bits for the epoch and the low order
+32-bits for the counter. Because it has two parts represent the zxid both as a
+number and as a pair of integers, (_epoch, count_). The epoch number represents a
+change in leadership. Each time a new leader comes into power it will have its
+own epoch number. We have a simple algorithm to assign a unique zxid to a proposal:
+the leader simply increments the zxid to obtain a unique zxid for each proposal. _Leadership activation will ensure that only one leader uses a given epoch, so our
+simple algorithm guarantees that every proposal will have a unique id._
+
+ZooKeeper messaging consists of two phases:
+
+* *_Leader activation_* :
+    In this phase a leader establishes the correct state of the system
+    and gets ready to start making proposals.
+
+* *_Active messaging_* :
+    In this phase a leader accepts messages to propose and coordinates message delivery.
+
+ZooKeeper is a holistic protocol. We do not focus on individual proposals, rather
+look at the stream of proposals as a whole. Our strict ordering allows us to do this
+efficiently and greatly simplifies our protocol. Leadership activation embodies
+this holistic concept. A leader becomes active only when a quorum of followers
+(The leader counts as a follower as well. You can always vote for yourself ) has synced
+up with the leader, they have the same state. This state consists of all of the
+proposals that the leader believes have been committed and the proposal to follow
+the leader, the NEW_LEADER proposal. (Hopefully you are thinking to
+yourself, _Does the set of proposals that the leader believes has been committed
+included all the proposals that really have been committed?_ The answer is _yes_.
+Below, we make clear why.)
+
+<a name="sc_leaderElection"></a>
+
+### Leader Activation
+
+Leader activation includes leader election. We currently have two leader election
+algorithms in ZooKeeper: LeaderElection and FastLeaderElection (AuthFastLeaderElection
+is a variant of FastLeaderElection that uses UDP and allows servers to perform a simple
+form of authentication to avoid IP spoofing). ZooKeeper messaging doesn't care about the
+exact method of electing a leader has long as the following holds:
+
+* The leader has seen the highest zxid of all the followers.
+* A quorum of servers have committed to following the leader.
+
+Of these two requirements only the first, the highest zxid amoung the followers
+needs to hold for correct operation. The second requirement, a quorum of followers,
+just needs to hold with high probability. We are going to recheck the second requirement,
+so if a failure happens during or after the leader election and quorum is lost,
+we will recover by abandoning leader activation and running another election.
+
+After leader election a single server will be designated as a leader and start
+waiting for followers to connect. The rest of the servers will try to connect to
+the leader. The leader will sync up with followers by sending any proposals they
+are missing, or if a follower is missing too many proposals, it will send a full
+snapshot of the state to the follower.
+
+There is a corner case in which a follower that has proposals, U, not seen
+by a leader arrives. Proposals are seen in order, so the proposals of U will have a zxids
+higher than zxids seen by the leader. The follower must have arrived after the
+leader election, otherwise the follower would have been elected leader given that
+it has seen a higher zxid. Since committed proposals must be seen by a quorum of
+servers, and a quorum of servers that elected the leader did not see U, the proposals
+of you have not been committed, so they can be discarded. When the follower connects
+to the leader, the leader will tell the follower to discard U.
+
+A new leader establishes a zxid to start using for new proposals by getting the
+epoch, e, of the highest zxid it has seen and setting the next zxid to use to be
+(e+1, 0), fter the leader syncs with a follower, it will propose a NEW_LEADER
+proposal. Once the NEW_LEADER proposal has been committed, the leader will activate
+and start receiving and issuing proposals.
+
+It all sounds complicated but here are the basic rules of operation during leader
+activation:
+
+* A follower will ACK the NEW_LEADER proposal after it has synced with the leader.
+* A follower will only ACK a NEW_LEADER proposal with a given zxid from a single server.
+* A new leader will COMMIT the NEW_LEADER proposal when a quorum of followers have ACKed it.
+* A follower will commit any state it received from the leader when the NEW_LEADER proposal is COMMIT.
+* A new leader will not accept new proposals until the NEW_LEADER proposal has been COMMITED.
+
+If leader election terminates erroneously, we don't have a problem since the
+NEW_LEADER proposal will not be committed since the leader will not have quorum.
+When this happens, the leader and any remaining followers will timeout and go back
+to leader election.
+
+<a name="sc_activeMessaging"></a>
+
+### Active Messaging
+
+Leader Activation does all the heavy lifting. Once the leader is coronated he can
+start blasting out proposals. As long as he remains the leader no other leader can
+emerge since no other leader will be able to get a quorum of followers. If a new
+leader does emerge,
+it means that the leader has lost quorum, and the new leader will clean up any
+mess left over during her leadership activation.
+
+ZooKeeper messaging operates similar to a classic two-phase commit.
+
+![Two phase commit](images/2pc.jpg)
+
+All communication channels are FIFO, so everything is done in order. Specifically
+the following operating constraints are observed:
+
+* The leader sends proposals to all followers using
+  the same order. Moreover, this order follows the order in which requests have been
+  received. Because we use FIFO channels this means that followers also receive proposals in order.
+* Followers process messages in the order they are received. This
+  means that messages will be ACKed in order and the leader will receive ACKs from
+  followers in order, due to the FIFO channels. It also means that if message $m$
+  has been written to non-volatile storage, all messages that were proposed before
+  $m$ have been written to non-volatile storage.
+* The leader will issue a COMMIT to all followers as soon as a
+  quorum of followers have ACKed a message. Since messages are ACKed in order,
+  COMMITs will be sent by the leader as received by the followers in order.
+* COMMITs are processed in order. Followers deliver a proposals
+  message when that proposal is committed.
+
+<a name="sc_summary"></a>
+
+### Summary
+
+So there you go. Why does it work? Specifically, why does a set of proposals
+believed by a new leader always contain any proposal that has actually been committed?
+First, all proposals have a unique zxid, so unlike other protocols, we never have
+to worry about two different values being proposed for the same zxid; followers
+(a leader is also a follower) see and record proposals in order; proposals are
+committed in order; there is only one active leader at a time since followers only
+follow a single leader at a time; a new leader has seen all committed proposals
+from the previous epoch since it has seen the highest zxid from a quorum of servers;
+any uncommited proposals from a previous epoch seen by a new leader will be committed
+by that leader before it becomes active.
+
+<a name="sc_comparisons"></a>
+
+### Comparisons
+
+Isn't this just Multi-Paxos? No, Multi-Paxos requires some way of assuring that
+there is only a single coordinator. We do not count on such assurances. Instead
+we use the leader activation to recover from leadership change or old leaders
+believing they are still active.
+
+Isn't this just Paxos? Your active messaging phase looks just like phase 2 of Paxos?
+Actually, to us active messaging looks just like 2 phase commit without the need to
+handle aborts. Active messaging is different from both in the sense that it has
+cross proposal ordering requirements. If we do not maintain strict FIFO ordering of
+all packets, it all falls apart. Also, our leader activation phase is different from
+both of them. In particular, our use of epochs allows us to skip blocks of uncommitted
+proposals and to not worry about duplicate proposals for a given zxid.
+
+<a name="sc_quorum"></a>
+
+## Quorums
+
+Atomic broadcast and leader election use the notion of quorum to guarantee a consistent
+view of the system. By default, ZooKeeper uses majority quorums, which means that every
+voting that happens in one of these protocols requires a majority to vote on. One example is
+acknowledging a leader proposal: the leader can only commit once it receives an
+acknowledgement from a quorum of servers.
+
+If we extract the properties that we really need from our use of majorities, we have that we only
+need to guarantee that groups of processes used to validate an operation by voting (e.g., acknowledging
+a leader proposal) pairwise intersect in at least one server. Using majorities guarantees such a property.
+However, there are other ways of constructing quorums different from majorities. For example, we can assign
+weights to the votes of servers, and say that the votes of some servers are more important. To obtain a quorum,
+we get enough votes so that the sum of weights of all votes is larger than half of the total sum of all weights.
+
+A different construction that uses weights and is useful in wide-area deployments (co-locations) is a hierarchical
+one. With this construction, we split the servers into disjoint groups and assign weights to processes. To form
+a quorum, we have to get a hold of enough servers from a majority of groups G, such that for each group g in G,
+the sum of votes from g is larger than half of the sum of weights in g. Interestingly, this construction enables
+smaller quorums. If we have, for example, 9 servers, we split them into 3 groups, and assign a weight of 1 to each
+server, then we are able to form quorums of size 4. Note that two subsets of processes composed each of a majority
+of servers from each of a majority of groups necessarily have a non-empty intersection. It is reasonable to expect
+that a majority of co-locations will have a majority of servers available with high probability.
+
+With ZooKeeper, we provide a user with the ability of configuring servers to use majority quorums, weights, or a
+hierarchy of groups.
+
+<a name="sc_logging"></a>
+
+## Logging
+
+Zookeeper uses [slf4j](http://www.slf4j.org/index.html) as an abstraction layer for logging. [log4j](http://logging.apache.org/log4j) in version 1.2 is chosen as the final logging implementation for now.
+For better embedding support, it is planned in the future to leave the decision of choosing the final logging implementation to the end user.
+Therefore, always use the slf4j api to write log statements in the code, but configure log4j for how to log at runtime.
+Note that slf4j has no FATAL level, former messages at FATAL level have been moved to ERROR level.
+For information on configuring log4j for
+ZooKeeper, see the [Logging](zookeeperAdmin.html#sc_logging) section
+of the [ZooKeeper Administrator's Guide.](zookeeperAdmin.html)
+
+<a name="sc_developerGuidelines"></a>
+
+### Developer Guidelines
+
+Please follow the  [slf4j manual](http://www.slf4j.org/manual.html) when creating log statements within code.
+Also read the[FAQ on performance](http://www.slf4j.org/faq.html#logging\_performance)
+, when creating log statements. Patch reviewers will look for the following:
+
+<a name="sc_rightLevel"></a>
+
+#### Logging at the Right Level
+
+There are several levels of logging in slf4j.
+
+It's important to pick the right one. In order of higher to lower severity:
+
+1. ERROR level designates error events that might still allow the application to continue running.
+1. WARN level designates potentially harmful situations.
+1. INFO level designates informational messages that highlight the progress of the application at coarse-grained level.
+1. DEBUG Level designates fine-grained informational events that are most useful to debug an application.
+1. TRACE Level designates finer-grained informational events than the DEBUG.
+
+ZooKeeper is typically run in production such that log messages of INFO level
+severity and higher (more severe) are output to the log.
+
+<a name="sc_slf4jIdioms"></a>
+
+#### Use of Standard slf4j Idioms
+
+_Static Message Logging_
+
+    LOG.debug("process completed successfully!");
+
+However when creating parameterized messages are required, use formatting anchors.
+
+    LOG.debug("got {} messages in {} minutes",new Object[]{count,time});
+
+_Naming_
+
+Loggers should be named after the class in which they are used.
+
+    public class Foo {
+        private static final Logger LOG = LoggerFactory.getLogger(Foo.class);
+        ....
+        public Foo() {
+            LOG.info("constructing Foo");
+
+_Exception handling_
+
+    try {
+        // code
+    } catch (XYZException e) {
+        // do this
+        LOG.error("Something bad happened", e);
+        // don't do this (generally)
+        // LOG.error(e);
+        // why? because "don't do" case hides the stack trace
+
+        // continue process here as you need... recover or (re)throw
+    }

+ 118 - 0
zookeeper-docs/src/main/resources/markdown/zookeeperJMX.md

@@ -0,0 +1,118 @@
+<!--
+Copyright 2002-2004 The Apache Software Foundation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+//-->
+
+# ZooKeeper JMX
+
+* [JMX](#ch_jmx)
+* [Starting ZooKeeper with JMX enabled](#ch_starting)
+* [Run a JMX console](#ch_console)
+* [ZooKeeper MBean Reference](#ch_reference)
+
+<a name="ch_jmx"></a>
+
+## JMX
+
+Apache ZooKeeper has extensive support for JMX, allowing you
+to view and manage a ZooKeeper serving ensemble.
+
+This document assumes that you have basic knowledge of
+JMX. See [Sun JMX Technology](http://java.sun.com/javase/technologies/core/mntr-mgmt/javamanagement/) page to get started with JMX.
+
+See the [JMX Management Guide](http://java.sun.com/javase/6/docs/technotes/guides/management/agent.html) for details on setting up local and
+remote management of VM instances. By default the included
+_zkServer.sh_ supports only local management -
+review the linked document to enable support for remote management
+(beyond the scope of this document).
+
+<a name="ch_starting"></a>
+
+## Starting ZooKeeper with JMX enabled
+
+The class
+_org.apache.zookeeper.server.quorum.QuorumPeerMain_
+will start a JMX manageable ZooKeeper server. This class
+registers the proper MBeans during initalization to support JMX
+monitoring and management of the
+instance. See _bin/zkServer.sh_ for one
+example of starting ZooKeeper using QuorumPeerMain.
+
+<a name="ch_console"></a>
+
+## Run a JMX console
+
+There are a number of JMX consoles available which can connect
+to the running server. For this example we will use Sun's
+_jconsole_.
+
+The Java JDK ships with a simple JMX console
+named [jconsole](http://java.sun.com/developer/technicalArticles/J2SE/jconsole.html)
+which can be used to connect to ZooKeeper and inspect a running
+server. Once you've started ZooKeeper using QuorumPeerMain
+start _jconsole_, which typically resides in
+_JDK_HOME/bin/jconsole_
+
+When the "new connection" window is displayed either connect
+to local process (if jconsole started on same host as Server) or
+use the remote process connection.
+
+By default the "overview" tab for the VM is displayed (this
+is a great way to get insight into the VM btw). Select
+the "MBeans" tab.
+
+You should now see _org.apache.ZooKeeperService_
+on the left hand side. Expand this item and depending on how you've
+started the server you will be able to monitor and manage various
+service related features.
+
+Also note that ZooKeeper will register log4j MBeans as
+well. In the same section along the left hand side you will see
+"log4j". Expand that to manage log4j through JMX. Of particular
+interest is the ability to dynamically change the logging levels
+used by editing the appender and root thresholds. Log4j MBean
+registration can be disabled by passing
+_-Dzookeeper.jmx.log4j.disable=true_ to the JVM
+when starting ZooKeeper.
+
+<a name="ch_reference"></a>
+
+## ZooKeeper MBean Reference
+
+This table details JMX for a server participating in a
+replicated ZooKeeper ensemble (ie not standalone). This is the
+typical case for a production environment.
+
+### MBeans, their names and description
+
+| MBean | MBean Object Name | Description                               |
+|-----------|-------------------|-------------------------------------------------|
+| Quorum | ReplicatedServer_id<#> | Represents the Quorum, or Ensemble - parent of all cluster members. Note that the object name includes the "myid" of the server (name suffix) that your JMX agent has connected to. |
+| LocalPeer/RemotePeer | replica.<#> | Represents a local or remote peer (ie server participating in the ensemble). Note that the object name includes the "myid" of the server (name suffix). |
+| LeaderElection | LeaderElection | Represents a ZooKeeper cluster leader election which is in progress. Provides information about the election, such as when it started. |
+| Leader | Leader | Indicates that the parent replica is the leader and provides attributes/operations for that server. Note that Leader is a subclass of ZooKeeperServer, so it provides all of the information normally associated with a ZooKeeperServer node. |
+| Follower | Follower | Indicates that the parent replica is a follower and provides attributes/operations for that server. Note that Follower is a subclass of ZooKeeperServer, so it provides all of the information normally associated with a ZooKeeperServer node. |
+| DataTree | InMemoryDataTree | Statistics on the in memory znode database, also operations to access finer (and more computationally intensive) statistics on the data (such as ephemeral count). InMemoryDataTrees are children of ZooKeeperServer nodes. |
+| ServerCnxn | <session_id> | Statistics on each client connection, also operations on those connections (such as termination). Note the object name is the session id of the connection in hex form. |
+
+This table details JMX for a standalone server. Typically
+standalone is only used in development situations.
+
+### MBeans, their names and description
+
+| MBean | MBean Object Name | Description            |
+|-------|-------------------|------------------------|
+| ZooKeeperServer | StandaloneServer_port<#> | Statistics on the running server, also operations to reset these attributes. Note that the object name includes the client port of the server (name suffix). |
+| DataTree | InMemoryDataTree | Statistics on the in memory znode database, also operations to access finer (and more computationally intensive) statistics on the data (such as ephemeral count). |
+| ServerCnxn | < session_id > | Statistics on each client connection, also operations on those connections (such as termination). Note the object name is the session id of the connection in hex form. |

+ 106 - 0
zookeeper-docs/src/main/resources/markdown/zookeeperObservers.md

@@ -0,0 +1,106 @@
+<!--
+Copyright 2002-2004 The Apache Software Foundation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+//-->
+
+# ZooKeeper Observers
+
+* [Observers: Scaling ZooKeeper Without Hurting Write Performance](#ch_Introduction)
+* [How to use Observers](#sc_UsingObservers)
+* [Example use cases](#ch_UseCases)
+
+<a name="ch_Introduction"></a>
+
+## Observers: Scaling ZooKeeper Without Hurting Write Performance
+
+Although ZooKeeper performs very well by having clients connect directly
+to voting members of the ensemble, this architecture makes it hard to
+scale out to huge numbers of clients. The problem is that as we add more
+voting members, the write performance drops. This is due to the fact that
+a write operation requires the agreement of (in general) at least half the
+nodes in an ensemble and therefore the cost of a vote can increase
+significantly as more voters are added.
+
+We have introduced a new type of ZooKeeper node called
+an _Observer_ which helps address this problem and
+further improves ZooKeeper's scalability. Observers are non-voting members
+of an ensemble which only hear the results of votes, not the agreement
+protocol that leads up to them. Other than this simple distinction,
+Observers function exactly the same as Followers - clients may connect to
+them and send read and write requests to them. Observers forward these
+requests to the Leader like Followers do, but they then simply wait to
+hear the result of the vote. Because of this, we can increase the number
+of Observers as much as we like without harming the performance of votes.
+
+Observers have other advantages. Because they do not vote, they are not a
+critical part of the ZooKeeper ensemble. Therefore they can fail, or be
+disconnected from the cluster, without harming the availability of the
+ZooKeeper service. The benefit to the user is that Observers may connect
+over less reliable network links than Followers. In fact, Observers may be
+used to talk to a ZooKeeper server from another data center. Clients of
+the Observer will see fast reads, as all reads are served locally, and
+writes result in minimal network traffic as the number of messages
+required in the absence of the vote protocol is smaller.
+
+<a name="sc_UsingObservers"></a>
+
+## How to use Observers
+
+Setting up a ZooKeeper ensemble that uses Observers is very simple,
+and requires just two changes to your config files. Firstly, in the config
+file of every node that is to be an Observer, you must place this line:
+
+    peerType=observer
+
+This line tells ZooKeeper that the server is to be an Observer. Secondly,
+in every server config file, you must add :observer to the server
+definition line of each Observer. For example:
+
+    server.1:localhost:2181:3181:observer
+
+This tells every other server that server.1 is an Observer, and that they
+should not expect it to vote. This is all the configuration you need to do
+to add an Observer to your ZooKeeper cluster. Now you can connect to it as
+though it were an ordinary Follower. Try it out, by running:
+
+    $ bin/zkCli.sh -server localhost:2181
+
+where localhost:2181 is the hostname and port number of the Observer as
+specified in every config file. You should see a command line prompt
+through which you can issue commands like _ls_ to query
+the ZooKeeper service.
+
+<a name="ch_UseCases"></a>
+
+## Example use cases
+
+Two example use cases for Observers are listed below. In fact, wherever
+you wish to scale the number of clients of your ZooKeeper ensemble, or
+where you wish to insulate the critical part of an ensemble from the load
+of dealing with client requests, Observers are a good architectural
+choice.
+
+* As a datacenter bridge: Forming a ZK ensemble between two
+  datacenters is a problematic endeavour as the high variance in latency
+  between the datacenters could lead to false positive failure detection
+  and partitioning. However if the ensemble runs entirely in one
+  datacenter, and the second datacenter runs only Observers, partitions
+  aren't problematic as the ensemble remains connected. Clients of the
+  Observers may still see and issue proposals.
+* As a link to a message bus: Some companies have expressed an
+  interest in using ZK as a component of a persistent reliable message
+  bus. Observers would give a natural integration point for this work: a
+  plug-in mechanism could be used to attach the stream of proposals an
+  Observer sees to a publish-subscribe system, again without loading the
+  core ensemble.

+ 22 - 0
zookeeper-docs/src/main/resources/markdown/zookeeperOtherInfo.md

@@ -0,0 +1,22 @@
+<!--
+Copyright 2002-2004 The Apache Software Foundation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+//-->
+
+# ZooKeeper
+
+## Other Info
+
+currently empty
+

+ 343 - 0
zookeeper-docs/src/main/resources/markdown/zookeeperOver.md

@@ -0,0 +1,343 @@
+<!--
+Copyright 2002-2004 The Apache Software Foundation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+//-->
+
+# ZooKeeper
+
+* [ZooKeeper: A Distributed Coordination Service for Distributed Applications](#ch_DesignOverview)
+    * [Design Goals](#sc_designGoals)
+    * [Data model and the hierarchical namespace](#sc_dataModelNameSpace)
+    * [Nodes and ephemeral nodes](#Nodes+and+ephemeral+nodes)
+    * [Conditional updates and watches](#Conditional+updates+and+watches)
+    * [Guarantees](#Guarantees)
+    * [Simple API](#Simple+API)
+    * [Implementation](#Implementation)
+    * [Uses](#Uses)
+    * [Performance](#Performance)
+    * [Reliability](#Reliability)
+    * [The ZooKeeper Project](#The+ZooKeeper+Project)
+
+<a name="ch_DesignOverview"></a>
+
+## ZooKeeper: A Distributed Coordination Service for Distributed Applications
+
+ZooKeeper is a distributed, open-source coordination service for
+distributed applications. It exposes a simple set of primitives that
+distributed applications can build upon to implement higher level services
+for synchronization, configuration maintenance, and groups and naming. It
+is designed to be easy to program to, and uses a data model styled after
+the familiar directory tree structure of file systems. It runs in Java and
+has bindings for both Java and C.
+
+Coordination services are notoriously hard to get right. They are
+especially prone to errors such as race conditions and deadlock. The
+motivation behind ZooKeeper is to relieve distributed applications the
+responsibility of implementing coordination services from scratch.
+
+<a name="sc_designGoals"></a>
+
+### Design Goals
+
+**ZooKeeper is simple.** ZooKeeper
+allows distributed processes to coordinate with each other through a
+shared hierarchal namespace which is organized similarly to a standard
+file system. The name space consists of data registers - called znodes,
+in ZooKeeper parlance - and these are similar to files and directories.
+Unlike a typical file system, which is designed for storage, ZooKeeper
+data is kept in-memory, which means ZooKeeper can achieve high
+throughput and low latency numbers.
+
+The ZooKeeper implementation puts a premium on high performance,
+highly available, strictly ordered access. The performance aspects of
+ZooKeeper means it can be used in large, distributed systems. The
+reliability aspects keep it from being a single point of failure. The
+strict ordering means that sophisticated synchronization primitives can
+be implemented at the client.
+
+**ZooKeeper is replicated.** Like the
+distributed processes it coordinates, ZooKeeper itself is intended to be
+replicated over a sets of hosts called an ensemble.
+
+![ZooKeeper Service](images/zkservice.jpg)
+
+The servers that make up the ZooKeeper service must all know about
+each other. They maintain an in-memory image of state, along with a
+transaction logs and snapshots in a persistent store. As long as a
+majority of the servers are available, the ZooKeeper service will be
+available.
+
+Clients connect to a single ZooKeeper server. The client maintains
+a TCP connection through which it sends requests, gets responses, gets
+watch events, and sends heart beats. If the TCP connection to the server
+breaks, the client will connect to a different server.
+
+**ZooKeeper is ordered.** ZooKeeper
+stamps each update with a number that reflects the order of all
+ZooKeeper transactions. Subsequent operations can use the order to
+implement higher-level abstractions, such as synchronization
+primitives.
+
+**ZooKeeper is fast.** It is
+especially fast in "read-dominant" workloads. ZooKeeper applications run
+on thousands of machines, and it performs best where reads are more
+common than writes, at ratios of around 10:1.
+
+<a name="sc_dataModelNameSpace"></a>
+
+### Data model and the hierarchical namespace
+
+The name space provided by ZooKeeper is much like that of a
+standard file system. A name is a sequence of path elements separated by
+a slash (/). Every node in ZooKeeper's name space is identified by a
+path.
+
+#### ZooKeeper's Hierarchical Namespace
+
+![ZooKeeper's Hierarchical Namespace](images/zknamespace.jpg)
+
+<a name="Nodes+and+ephemeral+nodes"></a>
+
+### Nodes and ephemeral nodes
+
+Unlike standard file systems, each node in a ZooKeeper
+namespace can have data associated with it as well as children. It is
+like having a file-system that allows a file to also be a directory.
+(ZooKeeper was designed to store coordination data: status information,
+configuration, location information, etc., so the data stored at each
+node is usually small, in the byte to kilobyte range.) We use the term
+_znode_ to make it clear that we are talking about
+ZooKeeper data nodes.
+
+Znodes maintain a stat structure that includes version numbers for
+data changes, ACL changes, and timestamps, to allow cache validations
+and coordinated updates. Each time a znode's data changes, the version
+number increases. For instance, whenever a client retrieves data it also
+receives the version of the data.
+
+The data stored at each znode in a namespace is read and written
+atomically. Reads get all the data bytes associated with a znode and a
+write replaces all the data. Each node has an Access Control List (ACL)
+that restricts who can do what.
+
+ZooKeeper also has the notion of ephemeral nodes. These znodes
+exists as long as the session that created the znode is active. When the
+session ends the znode is deleted. Ephemeral nodes are useful when you
+want to implement _[tbd]_.
+
+<a name="Conditional+updates+and+watches"></a>
+
+### Conditional updates and watches
+
+ZooKeeper supports the concept of _watches_.
+Clients can set a watch on a znode. A watch will be triggered and
+removed when the znode changes. When a watch is triggered, the client
+receives a packet saying that the znode has changed. If the
+connection between the client and one of the Zoo Keeper servers is
+broken, the client will receive a local notification. These can be used
+to _[tbd]_.
+
+<a name="Guarantees"></a>
+
+### Guarantees
+
+ZooKeeper is very fast and very simple. Since its goal, though, is
+to be a basis for the construction of more complicated services, such as
+synchronization, it provides a set of guarantees. These are:
+
+* Sequential Consistency - Updates from a client will be applied
+  in the order that they were sent.
+* Atomicity - Updates either succeed or fail. No partial
+  results.
+* Single System Image - A client will see the same view of the
+  service regardless of the server that it connects to.
+
+* Reliability - Once an update has been applied, it will persist
+  from that time forward until a client overwrites the update.
+
+* Timeliness - The clients view of the system is guaranteed to
+  be up-to-date within a certain time bound.
+
+For more information on these, and how they can be used, see
+_[tbd]_
+
+<a name="Simple+API"></a>
+
+### Simple API
+
+One of the design goals of ZooKeeper is provide a very simple
+programming interface. As a result, it supports only these
+operations:
+
+* *create* :
+    creates a node at a location in the tree
+
+* *delete* :
+    deletes a node
+
+* *exists* :
+    tests if a node exists at a location
+
+* *get data* :
+    reads the data from a node
+
+* *set data* :
+    writes data to a node
+
+* *get children* :
+    retrieves a list of children of a node
+
+* *sync* :
+    waits for data to be propagated
+
+For a more in-depth discussion on these, and how they can be used
+to implement higher level operations, please refer to
+_[tbd]_
+
+<a name="Implementation"></a>
+
+### Implementation
+
+[ZooKeeper Components](#zkComponents) shows the high-level components
+of the ZooKeeper service. With the exception of the request processor,
+each of
+the servers that make up the ZooKeeper service replicates its own copy
+of each of the components.
+
+<a name="zkComponents"></a>
+
+![ZooKeeper Components](images/zkcomponents.jpg)
+
+The replicated database is an in-memory database containing the
+entire data tree. Updates are logged to disk for recoverability, and
+writes are serialized to disk before they are applied to the in-memory
+database.
+
+Every ZooKeeper server services clients. Clients connect to
+exactly one server to submit irequests. Read requests are serviced from
+the local replica of each server database. Requests that change the
+state of the service, write requests, are processed by an agreement
+protocol.
+
+As part of the agreement protocol all write requests from clients
+are forwarded to a single server, called the
+_leader_. The rest of the ZooKeeper servers, called
+_followers_, receive message proposals from the
+leader and agree upon message delivery. The messaging layer takes care
+of replacing leaders on failures and syncing followers with
+leaders.
+
+ZooKeeper uses a custom atomic messaging protocol. Since the
+messaging layer is atomic, ZooKeeper can guarantee that the local
+replicas never diverge. When the leader receives a write request, it
+calculates what the state of the system is when the write is to be
+applied and transforms this into a transaction that captures this new
+state.
+
+<a name="Uses"></a>
+
+### Uses
+
+The programming interface to ZooKeeper is deliberately simple.
+With it, however, you can implement higher order operations, such as
+synchronizations primitives, group membership, ownership, etc. Some
+distributed applications have used it to: _[tbd: add uses from
+white paper and video presentation.]_ For more information, see
+_[tbd]_
+
+<a name="Performance"></a>
+
+### Performance
+
+ZooKeeper is designed to be highly performant. But is it? The
+results of the ZooKeeper's development team at Yahoo! Research indicate
+that it is. (See [ZooKeeper Throughput as the Read-Write Ratio Varies](#zkPerfRW).) It is especially high
+performance in applications where reads outnumber writes, since writes
+involve synchronizing the state of all servers. (Reads outnumbering
+writes is typically the case for a coordination service.)
+
+<a name="zkPerfRW"></a>
+
+![ZooKeeper Throughput as the Read-Write Ratio Varies](images/zkperfRW-3.2.jpg)
+
+The [ZooKeeper Throughput as the Read-Write Ratio Varies](#zkPerfRW) is a throughput
+graph of ZooKeeper release 3.2 running on servers with dual 2Ghz
+Xeon and two SATA 15K RPM drives.  One drive was used as a
+dedicated ZooKeeper log device. The snapshots were written to
+the OS drive. Write requests were 1K writes and the reads were
+1K reads.  "Servers" indicate the size of the ZooKeeper
+ensemble, the number of servers that make up the
+service. Approximately 30 other servers were used to simulate
+the clients. The ZooKeeper ensemble was configured such that
+leaders do not allow connections from clients.
+
+######Note
+>In version 3.2 r/w performance improved by ~2x compared to
+ the [previous 3.1 release](http://zookeeper.apache.org/docs/r3.1.1/zookeeperOver.html#Performance).
+
+Benchmarks also indicate that it is reliable, too.
+[Reliability in the Presence of Errors](#zkPerfReliability) shows how a deployment responds to
+various failures. The events marked in the figure are the following:
+
+1. Failure and recovery of a follower
+1. Failure and recovery of a different follower
+1. Failure of the leader
+1. Failure and recovery of two followers
+1. Failure of another leader
+
+<a name="Reliability"></a>
+
+### Reliability
+
+To show the behavior of the system over time as
+failures are injected we ran a ZooKeeper service made up of
+7 machines. We ran the same saturation benchmark as before,
+but this time we kept the write percentage at a constant
+30%, which is a conservative ratio of our expected
+workloads.
+
+<a name="zkPerfReliability"></a>
+
+![Reliability in the Presence of Errors](images/zkperfreliability.jpg)
+
+The are a few important observations from this graph. First, if
+followers fail and recover quickly, then ZooKeeper is able to sustain a
+high throughput despite the failure. But maybe more importantly, the
+leader election algorithm allows for the system to recover fast enough
+to prevent throughput from dropping substantially. In our observations,
+ZooKeeper takes less than 200ms to elect a new leader. Third, as
+followers recover, ZooKeeper is able to raise throughput again once they
+start processing requests.
+
+<a name="The+ZooKeeper+Project"></a>
+
+### The ZooKeeper Project
+
+ZooKeeper has been
+[successfully used](https://cwiki.apache.org/confluence/display/ZOOKEEPER/PoweredBy)
+in many industrial applications.  It is used at Yahoo! as the
+coordination and failure recovery service for Yahoo! Message
+Broker, which is a highly scalable publish-subscribe system
+managing thousands of topics for replication and data
+delivery.  It is used by the Fetching Service for Yahoo!
+crawler, where it also manages failure recovery. A number of
+Yahoo! advertising systems also use ZooKeeper to implement
+reliable services.
+
+All users and developers are encouraged to join the
+community and contribute their expertise. See the
+[Zookeeper Project on Apache](http://zookeeper.apache.org/)
+for more information.
+
+

+ 1519 - 0
zookeeper-docs/src/main/resources/markdown/zookeeperProgrammers.md

@@ -0,0 +1,1519 @@
+<!--
+Copyright 2002-2004 The Apache Software Foundation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+//-->
+
+# ZooKeeper Programmer's Guide
+
+### Developing Distributed Applications that use ZooKeeper
+
+* [Introduction](#_introduction)
+* [The ZooKeeper Data Model](#ch_zkDataModel)
+    * [ZNodes](#sc_zkDataModel_znodes)
+        * [Watches](#sc_zkDataMode_watches)
+        * [Data Access](#Data+Access)
+        * [Ephemeral Nodes](#Ephemeral+Nodes)
+        * [Sequence Nodes -- Unique Naming](#Sequence+Nodes+--+Unique+Naming)
+        * [Container Nodes](#Container+Nodes)
+        * [TTL Nodes](#TTL+Nodes)
+    * [Time in ZooKeeper](#sc_timeInZk)
+    * [ZooKeeper Stat Structure](#sc_zkStatStructure)
+* [ZooKeeper Sessions](#ch_zkSessions)
+* [ZooKeeper Watches](#ch_zkWatches)
+    * [Semantics of Watches](#sc_WatchSemantics)
+    * [Remove Watches](#sc_WatchRemoval)
+    * [What ZooKeeper Guarantees about Watches](#sc_WatchGuarantees)
+    * [Things to Remember about Watches](#sc_WatchRememberThese)
+* [ZooKeeper access control using ACLs](#sc_ZooKeeperAccessControl)
+    * [ACL Permissions](#sc_ACLPermissions)
+        * [Builtin ACL Schemes](#sc_BuiltinACLSchemes)
+        * [ZooKeeper C client API](#ZooKeeper+C+client+API)
+* [Pluggable ZooKeeper authentication](#sc_ZooKeeperPluggableAuthentication)
+* [Consistency Guarantees](#ch_zkGuarantees)
+* [Bindings](#ch_bindings)
+    * [Java Binding](#Java+Binding)
+        * [Client Configuration Parameters](#sc_java_client_configuration)
+    * [C Binding](#C+Binding)
+        * [Installation](#Installation)
+        * [Building Your Own C Client](#Building+Your+Own+C+Client)
+* [Building Blocks: A Guide to ZooKeeper Operations](#ch_guideToZkOperations)
+    * [Handling Errors](#sc_errorsZk)
+    * [Connecting to ZooKeeper](#sc_connectingToZk)
+    * [Read Operations](#sc_readOps)
+    * [Write Operations](#sc_writeOps)
+    * [Handling Watches](#sc_handlingWatches)
+    * [Miscelleaneous ZooKeeper Operations](#sc_miscOps)
+* [Program Structure, with Simple Example](#ch_programStructureWithExample)
+* [Gotchas: Common Problems and Troubleshooting](#ch_gotchas)
+
+<a name="_introduction"></a>
+
+## Introduction
+
+This document is a guide for developers wishing to create
+distributed applications that take advantage of ZooKeeper's coordination
+services. It contains conceptual and practical information.
+
+The first four sections of this guide present higher level
+discussions of various ZooKeeper concepts. These are necessary both for an
+understanding of how ZooKeeper works as well how to work with it. It does
+not contain source code, but it does assume a familiarity with the
+problems associated with distributed computing. The sections in this first
+group are:
+
+* [The ZooKeeper Data Model](#ch_zkDataModel)
+* [ZooKeeper Sessions](#ch_zkSessions)
+* [ZooKeeper Watches](#ch_zkWatches)
+* [Consistency Guarantees](#ch_zkGuarantees)
+
+The next four sections provide practical programming
+information. These are:
+
+* [Building Blocks: A Guide to ZooKeeper Operations](#ch_guideToZkOperations)
+* [Bindings](#ch_bindings)
+* [Program Structure, with Simple Example](#ch_programStructureWithExample)
+  _[tbd]_
+* [Gotchas: Common Problems and Troubleshooting](#ch_gotchas)
+
+The book concludes with an [appendix](#apx_linksToOtherInfo) containing links to other
+useful, ZooKeeper-related information.
+
+Most of information in this document is written to be accessible as
+stand-alone reference material. However, before starting your first
+ZooKeeper application, you should probably at least read the chaptes on
+the [ZooKeeper Data Model](#ch_zkDataModel) and [ZooKeeper Basic Operations](#ch_guideToZkOperations). Also,
+the [Simple Programmming
+Example](#ch_programStructureWithExample) _[tbd]_ is helpful for understanding the basic
+structure of a ZooKeeper client application.
+
+<a name="ch_zkDataModel"></a>
+
+## The ZooKeeper Data Model
+
+ZooKeeper has a hierarchal name space, much like a distributed file
+system. The only difference is that each node in the namespace can have
+data associated with it as well as children. It is like having a file
+system that allows a file to also be a directory. Paths to nodes are
+always expressed as canonical, absolute, slash-separated paths; there are
+no relative reference. Any unicode character can be used in a path subject
+to the following constraints:
+
+* The null character (\\u0000) cannot be part of a path name. (This
+  causes problems with the C binding.)
+* The following characters can't be used because they don't
+  display well, or render in confusing ways: \\u0001 - \\u001F and \\u007F
+  - \\u009F.
+* The following characters are not allowed: \\ud800 - uF8FF,
+  \\uFFF0 - uFFFF.
+* The "." character can be used as part of another name, but "."
+  and ".." cannot alone be used to indicate a node along a path,
+  because ZooKeeper doesn't use relative paths. The following would be
+  invalid: "/a/b/./c" or "/a/b/../c".
+* The token "zookeeper" is reserved.
+
+<a name="sc_zkDataModel_znodes"></a>
+
+### ZNodes
+
+Every node in a ZooKeeper tree is referred to as a
+_znode_. Znodes maintain a stat structure that
+includes version numbers for data changes, acl changes. The stat
+structure also has timestamps. The version number, together with the
+timestamp, allows ZooKeeper to validate the cache and to coordinate
+updates. Each time a znode's data changes, the version number increases.
+For instance, whenever a client retrieves data, it also receives the
+version of the data. And when a client performs an update or a delete,
+it must supply the version of the data of the znode it is changing. If
+the version it supplies doesn't match the actual version of the data,
+the update will fail. (This behavior can be overridden. For more
+information see... )_[tbd...]_
+
+######Note
+
+>In distributed application engineering, the word
+_node_ can refer to a generic host machine, a
+server, a member of an ensemble, a client process, etc. In the ZooKeeper
+documentation, _znodes_ refer to the data nodes.
+_Servers_ refer to machines that make up the
+ZooKeeper service; _quorum peers_ refer to the
+servers that make up an ensemble; client refers to any host or process
+which uses a ZooKeeper service.
+
+Znodes are the main enitity that a programmer access. They have
+several characteristics that are worth mentioning here.
+
+<a name="sc_zkDataMode_watches"></a>
+
+#### Watches
+
+Clients can set watches on znodes. Changes to that znode trigger
+the watch and then clear the watch. When a watch triggers, ZooKeeper
+sends the client a notification. More information about watches can be
+found in the section
+[ZooKeeper Watches](#ch_zkWatches).
+
+<a name="Data+Access"></a>
+
+#### Data Access
+
+The data stored at each znode in a namespace is read and written
+atomically. Reads get all the data bytes associated with a znode and a
+write replaces all the data. Each node has an Access Control List
+(ACL) that restricts who can do what.
+
+ZooKeeper was not designed to be a general database or large
+object store. Instead, it manages coordination data. This data can
+come in the form of configuration, status information, rendezvous, etc.
+A common property of the various forms of coordination data is that
+they are relatively small: measured in kilobytes.
+The ZooKeeper client and the server implementations have sanity checks
+to ensure that znodes have less than 1M of data, but the data should
+be much less than that on average. Operating on relatively large data
+sizes will cause some operations to take much more time than others and
+will affect the latencies of some operations because of the extra time
+needed to move more data over the network and onto storage media. If
+large data storage is needed, the usually pattern of dealing with such
+data is to store it on a bulk storage system, such as NFS or HDFS, and
+store pointers to the storage locations in ZooKeeper.
+
+<a name="Ephemeral+Nodes"></a>
+
+#### Ephemeral Nodes
+
+ZooKeeper also has the notion of ephemeral nodes. These znodes
+exists as long as the session that created the znode is active. When
+the session ends the znode is deleted. Because of this behavior
+ephemeral znodes are not allowed to have children.
+
+<a name="Sequence+Nodes+--+Unique+Naming"></a>
+
+#### Sequence Nodes -- Unique Naming
+
+When creating a znode you can also request that
+ZooKeeper append a monotonically increasing counter to the end
+of path. This counter is unique to the parent znode. The
+counter has a format of %010d -- that is 10 digits with 0
+(zero) padding (the counter is formatted in this way to
+simplify sorting), i.e. "<path>0000000001". See
+[Queue
+Recipe](recipes.html#sc_recipes_Queues) for an example use of this feature. Note: the
+counter used to store the next sequence number is a signed int
+(4bytes) maintained by the parent node, the counter will
+overflow when incremented beyond 2147483647 (resulting in a
+name "<path>-2147483648").
+
+<a name="Container+Nodes"></a>
+
+#### Container Nodes
+
+**Added in 3.6.0**
+
+ZooKeeper has the notion of container znodes. Container znodes are
+special purpose znodes useful for recipes such as leader, lock, etc.
+When the last child of a container is deleted, the container becomes
+a candidate to be deleted by the server at some point in the future.
+
+Given this property, you should be prepared to get
+KeeperException.NoNodeException when creating children inside of
+container znodes. i.e. when creating child znodes inside of container znodes
+always check for KeeperException.NoNodeException and recreate the container
+znode when it occurs.
+
+<a name="TTL+Nodes"></a>
+
+#### TTL Nodes
+
+**Added in 3.6.0**
+
+When creating PERSISTENT or PERSISTENT_SEQUENTIAL znodes,
+you can optionally set a TTL in milliseconds for the znode. If the znode
+is not modified within the TTL and has no children it will become a candidate
+to be deleted by the server at some point in the future.
+
+Note: TTL Nodes must be enabled via System property as they
+are disabled by default. See the [Administrator's Guide](zookeeperAdmin.html#sc_configuration) for
+details. If you attempt to create TTL Nodes without the
+proper System property set the server will throw
+KeeperException.UnimplementedException.
+
+<a name="sc_timeInZk"></a>
+ 
+### Time in ZooKeeper
+
+ZooKeeper tracks time multiple ways:
+
+* **Zxid**
+  Every change to the ZooKeeper state receives a stamp in the
+  form of a _zxid_ (ZooKeeper Transaction Id).
+  This exposes the total ordering of all changes to ZooKeeper. Each
+  change will have a unique zxid and if zxid1 is smaller than zxid2
+  then zxid1 happened before zxid2.
+* **Version numbers**
+  Every change to a node will cause an increase to one of the
+  version numbers of that node. The three version numbers are version
+  (number of changes to the data of a znode), cversion (number of
+  changes to the children of a znode), and aversion (number of changes
+  to the ACL of a znode).
+* **Ticks**
+  When using multi-server ZooKeeper, servers use ticks to define
+  timing of events such as status uploads, session timeouts,
+  connection timeouts between peers, etc. The tick time is only
+  indirectly exposed through the minimum session timeout (2 times the
+  tick time); if a client requests a session timeout less than the
+  minimum session timeout, the server will tell the client that the
+  session timeout is actually the minimum session timeout.
+* **Real time**
+  ZooKeeper doesn't use real time, or clock time, at all except
+  to put timestamps into the stat structure on znode creation and
+  znode modification.
+
+<a name="sc_zkStatStructure"></a>
+
+### ZooKeeper Stat Structure
+
+The Stat structure for each znode in ZooKeeper is made up of the
+following fields:
+
+* **czxid**
+  The zxid of the change that caused this znode to be
+  created.
+* **mzxid**
+  The zxid of the change that last modified this znode.
+* **pzxid**
+  The zxid of the change that last modified children of this znode.
+* **ctime**
+  The time in milliseconds from epoch when this znode was
+  created.
+* **mtime**
+  The time in milliseconds from epoch when this znode was last
+  modified.
+* **version**
+  The number of changes to the data of this znode.
+* **cversion**
+  The number of changes to the children of this znode.
+* **aversion**
+  The number of changes to the ACL of this znode.
+* **ephemeralOwner**
+  The session id of the owner of this znode if the znode is an
+  ephemeral node. If it is not an ephemeral node, it will be
+  zero.
+* **dataLength**
+  The length of the data field of this znode.
+* **numChildren**
+  The number of children of this znode.
+
+<a name="ch_zkSessions"></a>
+
+## ZooKeeper Sessions
+
+A ZooKeeper client establishes a session with the ZooKeeper
+service by creating a handle to the service using a language
+binding. Once created, the handle starts of in the CONNECTING state
+and the client library tries to connect to one of the servers that
+make up the ZooKeeper service at which point it switches to the
+CONNECTED state. During normal operation will be in one of these
+two states. If an unrecoverable error occurs, such as session
+expiration or authentication failure, or if the application explicitly
+closes the handle, the handle will move to the CLOSED state.
+The following figure shows the possible state transitions of a
+ZooKeeper client:
+
+![State transitions](images/state_dia.jpg)
+
+To create a client session the application code must provide
+a connection string containing a comma separated list of host:port pairs,
+each corresponding to a ZooKeeper server (e.g. "127.0.0.1:4545" or
+"127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002"). The ZooKeeper
+client library will pick an arbitrary server and try to connect to
+it. If this connection fails, or if the client becomes
+disconnected from the server for any reason, the client will
+automatically try the next server in the list, until a connection
+is (re-)established.
+
+**Added in 3.2.0**: An
+optional "chroot" suffix may also be appended to the connection
+string. This will run the client commands while interpreting all
+paths relative to this root (similar to the unix chroot
+command). If used the example would look like:
+"127.0.0.1:4545/app/a" or
+"127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002/app/a" where the
+client would be rooted at "/app/a" and all paths would be relative
+to this root - ie getting/setting/etc...  "/foo/bar" would result
+in operations being run on "/app/a/foo/bar" (from the server
+perspective). This feature is particularly useful in multi-tenant
+environments where each user of a particular ZooKeeper service
+could be rooted differently. This makes re-use much simpler as
+each user can code his/her application as if it were rooted at
+"/", while actual location (say /app/a) could be determined at
+deployment time.
+
+When a client gets a handle to the ZooKeeper service,
+ZooKeeper creates a ZooKeeper session, represented as a 64-bit
+number, that it assigns to the client. If the client connects to a
+different ZooKeeper server, it will send the session id as a part
+of the connection handshake.  As a security measure, the server
+creates a password for the session id that any ZooKeeper server
+can validate.The password is sent to the client with the session
+id when the client establishes the session. The client sends this
+password with the session id whenever it reestablishes the session
+with a new server.
+
+One of the parameters to the ZooKeeper client library call
+to create a ZooKeeper session is the session timeout in
+milliseconds. The client sends a requested timeout, the server
+responds with the timeout that it can give the client. The current
+implementation requires that the timeout be a minimum of 2 times
+the tickTime (as set in the server configuration) and a maximum of
+20 times the tickTime. The ZooKeeper client API allows access to
+the negotiated timeout.
+
+When a client (session) becomes partitioned from the ZK
+serving cluster it will begin searching the list of servers that
+were specified during session creation. Eventually, when
+connectivity between the client and at least one of the servers is
+re-established, the session will either again transition to the
+"connected" state (if reconnected within the session timeout
+value) or it will transition to the "expired" state (if
+reconnected after the session timeout). It is not advisable to
+create a new session object (a new ZooKeeper.class or zookeeper
+handle in the c binding) for disconnection. The ZK client library
+will handle reconnect for you. In particular we have heuristics
+built into the client library to handle things like "herd effect",
+etc... Only create a new session when you are notified of session
+expiration (mandatory).
+
+Session expiration is managed by the ZooKeeper cluster
+itself, not by the client. When the ZK client establishes a
+session with the cluster it provides a "timeout" value detailed
+above. This value is used by the cluster to determine when the
+client's session expires. Expirations happens when the cluster
+does not hear from the client within the specified session timeout
+period (i.e. no heartbeat). At session expiration the cluster will
+delete any/all ephemeral nodes owned by that session and
+immediately notify any/all connected clients of the change (anyone
+watching those znodes). At this point the client of the expired
+session is still disconnected from the cluster, it will not be
+notified of the session expiration until/unless it is able to
+re-establish a connection to the cluster. The client will stay in
+disconnected state until the TCP connection is re-established with
+the cluster, at which point the watcher of the expired session
+will receive the "session expired" notification.
+
+Example state transitions for an expired session as seen by
+the expired session's watcher:
+
+1. 'connected' : session is established and client
+  is communicating with cluster (client/server communication is
+  operating properly)
+1. .... client is partitioned from the
+  cluster
+1. 'disconnected' : client has lost connectivity
+  with the cluster
+1. .... time elapses, after 'timeout' period the
+  cluster expires the session, nothing is seen by client as it is
+  disconnected from cluster
+1. .... time elapses, the client regains network
+  level connectivity with the cluster
+1. 'expired' : eventually the client reconnects to
+  the cluster, it is then notified of the
+  expiration
+
+Another parameter to the ZooKeeper session establishment
+call is the default watcher. Watchers are notified when any state
+change occurs in the client. For example if the client loses
+connectivity to the server the client will be notified, or if the
+client's session expires, etc... This watcher should consider the
+initial state to be disconnected (i.e. before any state changes
+events are sent to the watcher by the client lib). In the case of
+a new connection, the first event sent to the watcher is typically
+the session connection event.
+
+The session is kept alive by requests sent by the client. If
+the session is idle for a period of time that would timeout the
+session, the client will send a PING request to keep the session
+alive. This PING request not only allows the ZooKeeper server to
+know that the client is still active, but it also allows the
+client to verify that its connection to the ZooKeeper server is
+still active. The timing of the PING is conservative enough to
+ensure reasonable time to detect a dead connection and reconnect
+to a new server.
+
+Once a connection to the server is successfully established
+(connected) there are basically two cases where the client lib generates
+connectionloss (the result code in c binding, exception in Java -- see
+the API documentation for binding specific details) when either a synchronous or
+asynchronous operation is performed and one of the following holds:
+
+1. The application calls an operation on a session that is no
+  longer alive/valid
+1. The ZooKeeper client disconnects from a server when there
+  are pending operations to that server, i.e., there is a pending asynchronous call.
+
+**Added in 3.2.0 -- SessionMovedException**. There is an internal
+exception that is generally not seen by clients called the SessionMovedException.
+This exception occurs because a request was received on a connection for a session
+which has been reestablished on a different server. The normal cause of this error is
+a client that sends a request to a server, but the network packet gets delayed, so
+the client times out and connects to a new server. When the delayed packet arrives at
+the first server, the old server detects that the session has moved, and closes the
+client connection. Clients normally do not see this error since they do not read
+from those old connections. (Old connections are usually closed.) One situation in which this
+condition can be seen is when two clients try to reestablish the same connection using
+a saved session id and password. One of the clients will reestablish the connection
+and the second client will be disconnected (causing the pair to attempt to re-establish
+its connection/session indefinitely).
+
+**Updating the list of servers**.  We allow a client to
+update the connection string by providing a new comma separated list of host:port pairs,
+each corresponding to a ZooKeeper server. The function invokes a probabilistic load-balancing
+algorithm which may cause the client to disconnect from its current host with the goal
+to achieve expected uniform number of connections per server in the new list.
+In case the current host to which the client is connected is not in the new list
+this call will always cause the connection to be dropped. Otherwise, the decision
+is based on whether the number of servers has increased or decreased and by how much.
+
+For example, if the previous connection string contained 3 hosts and now the list contains
+these 3 hosts and 2 more hosts, 40% of clients connected to each of the 3 hosts will
+move to one of the new hosts in order to balance the load. The algorithm will cause the client
+to drop its connection to the current host to which it is connected with probability 0.4 and in this
+case cause the client to connect to one of the 2 new hosts, chosen at random.
+
+Another example -- suppose we have 5 hosts and now update the list to remove 2 of the hosts,
+the clients connected to the 3 remaining hosts will stay connected, whereas all clients connected
+to the 2 removed hosts will need to move to one of the 3 hosts, chosen at random. If the connection
+is dropped, the client moves to a special mode where he chooses a new server to connect to using the
+probabilistic algorithm, and not just round robin.
+
+In the first example, each client decides to disconnect with probability 0.4 but once the decision is
+made, it will try to connect to a random new server and only if it cannot connect to any of the new
+servers will it try to connect to the old ones. After finding a server, or trying all servers in the
+new list and failing to connect, the client moves back to the normal mode of operation where it picks
+an arbitrary server from the connectString and attempt to connect to it. If that fails, is will continue
+trying different random servers in round robin. (see above the algorithm used to initially choose a server)
+
+<a name="ch_zkWatches"></a>
+
+## ZooKeeper Watches
+
+All of the read operations in ZooKeeper - **getData()**, **getChildren()**, and **exists()** - have the option of setting a watch as a
+side effect. Here is ZooKeeper's definition of a watch: a watch event is
+one-time trigger, sent to the client that set the watch, which occurs when
+the data for which the watch was set changes. There are three key points
+to consider in this definition of a watch:
+
+* **One-time trigger**
+  One watch event will be sent to the client when the data has changed.
+  For example, if a client does a getData("/znode1", true) and later the
+  data for /znode1 is changed or deleted, the client will get a watch
+  event for /znode1. If /znode1 changes again, no watch event will be
+  sent unless the client has done another read that sets a new
+  watch.
+* **Sent to the client**
+  This implies that an event is on the way to the client, but may
+  not reach the client before the successful return code to the change
+  operation reaches the client that initiated the change. Watches are
+  sent asynchronously to watchers. ZooKeeper provides an ordering
+  guarantee: a client will never see a change for which it has set a
+  watch until it first sees the watch event. Network delays or other
+  factors may cause different clients to see watches and return codes
+  from updates at different times. The key point is that everything seen
+  by the different clients will have a consistent order.
+* **The data for which the watch was
+  set**
+  This refers to the different ways a node can change.  It
+  helps to think of ZooKeeper as maintaining two lists of
+  watches: data watches and child watches.  getData() and
+  exists() set data watches. getChildren() sets child
+  watches. Alternatively, it may help to think of watches being
+  set according to the kind of data returned. getData() and
+  exists() return information about the data of the node,
+  whereas getChildren() returns a list of children.  Thus,
+  setData() will trigger data watches for the znode being set
+  (assuming the set is successful). A successful create() will
+  trigger a data watch for the znode being created and a child
+  watch for the parent znode. A successful delete() will trigger
+  both a data watch and a child watch (since there can be no
+  more children) for a znode being deleted as well as a child
+  watch for the parent znode.
+
+Watches are maintained locally at the ZooKeeper server to which the
+client is connected. This allows watches to be lightweight to set,
+maintain, and dispatch. When a client connects to a new server, the watch
+will be triggered for any session events. Watches will not be received
+while disconnected from a server. When a client reconnects, any previously
+registered watches will be reregistered and triggered if needed. In
+general this all occurs transparently. There is one case where a watch
+may be missed: a watch for the existence of a znode not yet created will
+be missed if the znode is created and deleted while disconnected.
+
+<a name="sc_WatchSemantics"></a>
+
+### Semantics of Watches
+
+We can set watches with the three calls that read the state of
+ZooKeeper: exists, getData, and getChildren. The following list details
+the events that a watch can trigger and the calls that enable them:
+
+* **Created event:**
+  Enabled with a call to exists.
+* **Deleted event:**
+  Enabled with a call to exists, getData, and getChildren.
+* **Changed event:**
+  Enabled with a call to exists and getData.
+* **Child event:**
+  Enabled with a call to getChildren.
+
+<a name="sc_WatchRemoval"></a>
+
+### Remove Watches
+
+We can remove the watches registered on a znode with a call to
+removeWatches. Also, a ZooKeeper client can remove watches locally even
+if there is no server connection by setting the local flag to true. The
+following list details the events which will be triggered after the
+successful watch removal.
+
+* **Child Remove event:**
+  Watcher which was added with a call to getChildren.
+* **Data Remove event:**
+  Watcher which was added with a call to exists or getData.
+
+<a name="sc_WatchGuarantees"></a>
+
+### What ZooKeeper Guarantees about Watches
+
+With regard to watches, ZooKeeper maintains these
+guarantees:
+
+* Watches are ordered with respect to other events, other
+  watches, and asynchronous replies. The ZooKeeper client libraries
+  ensures that everything is dispatched in order.
+
+* A client will see a watch event for a znode it is watching
+  before seeing the new data that corresponds to that znode.
+
+* The order of watch events from ZooKeeper corresponds to the
+  order of the updates as seen by the ZooKeeper service.
+
+<a name="sc_WatchRememberThese"></a>
+
+### Things to Remember about Watches
+
+* Watches are one time triggers; if you get a watch event and
+  you want to get notified of future changes, you must set another
+  watch.
+
+* Because watches are one time triggers and there is latency
+  between getting the event and sending a new request to get a watch
+  you cannot reliably see every change that happens to a node in
+  ZooKeeper. Be prepared to handle the case where the znode changes
+  multiple times between getting the event and setting the watch
+  again. (You may not care, but at least realize it may
+  happen.)
+
+* A watch object, or function/context pair, will only be
+  triggered once for a given notification. For example, if the same
+  watch object is registered for an exists and a getData call for the
+  same file and that file is then deleted, the watch object would
+  only be invoked once with the deletion notification for the file.
+
+* When you disconnect from a server (for example, when the
+  server fails), you will not get any watches until the connection
+  is reestablished. For this reason session events are sent to all
+  outstanding watch handlers. Use session events to go into a safe
+  mode: you will not be receiving events while disconnected, so your
+  process should act conservatively in that mode.
+
+<a name="sc_ZooKeeperAccessControl"></a>
+
+## ZooKeeper access control using ACLs
+
+ZooKeeper uses ACLs to control access to its znodes (the
+data nodes of a ZooKeeper data tree). The ACL implementation is
+quite similar to UNIX file access permissions: it employs
+permission bits to allow/disallow various operations against a
+node and the scope to which the bits apply. Unlike standard UNIX
+permissions, a ZooKeeper node is not limited by the three standard
+scopes for user (owner of the file), group, and world
+(other). ZooKeeper does not have a notion of an owner of a
+znode. Instead, an ACL specifies sets of ids and permissions that
+are associated with those ids.
+
+Note also that an ACL pertains only to a specific znode. In
+particular it does not apply to children. For example, if
+_/app_ is only readable by ip:172.16.16.1 and
+_/app/status_ is world readable, anyone will
+be able to read _/app/status_; ACLs are not
+recursive.
+
+ZooKeeper supports pluggable authentication schemes. Ids are
+specified using the form _scheme:expression_,
+where _scheme_ is the authentication scheme
+that the id corresponds to. The set of valid expressions are defined
+by the scheme. For example, _ip:172.16.16.1_ is
+an id for a host with the address _172.16.16.1_
+using the _ip_ scheme, whereas _digest:bob:password_
+is an id for the user with the name of _bob_ using
+the _digest_ scheme.
+
+When a client connects to ZooKeeper and authenticates
+itself, ZooKeeper associates all the ids that correspond to a
+client with the clients connection. These ids are checked against
+the ACLs of znodes when a clients tries to access a node. ACLs are
+made up of pairs of _(scheme:expression,
+perms)_. The format of
+the _expression_ is specific to the scheme. For
+example, the pair _(ip:19.22.0.0/16, READ)_
+gives the _READ_ permission to any clients with
+an IP address that starts with 19.22.
+
+<a name="sc_ACLPermissions"></a>
+
+### ACL Permissions
+
+ZooKeeper supports the following permissions:
+
+* **CREATE**: you can create a child node
+* **READ**: you can get data from a node and list its children.
+* **WRITE**: you can set data for a node
+* **DELETE**: you can delete a child node
+* **ADMIN**: you can set permissions
+
+The _CREATE_
+and _DELETE_ permissions have been broken out
+of the _WRITE_ permission for finer grained
+access controls. The cases for _CREATE_
+and _DELETE_ are the following:
+
+You want A to be able to do a set on a ZooKeeper node, but
+not be able to _CREATE_
+or _DELETE_ children.
+
+_CREATE_
+without _DELETE_: clients create requests by
+creating ZooKeeper nodes in a parent directory. You want all
+clients to be able to add, but only request processor can
+delete. (This is kind of like the APPEND permission for
+files.)
+
+Also, the _ADMIN_ permission is there
+since ZooKeeper doesn’t have a notion of file owner. In some
+sense the _ADMIN_ permission designates the
+entity as the owner. ZooKeeper doesn’t support the LOOKUP
+permission (execute permission bit on directories to allow you
+to LOOKUP even though you can't list the directory). Everyone
+implicitly has LOOKUP permission. This allows you to stat a
+node, but nothing more. (The problem is, if you want to call
+zoo_exists() on a node that doesn't exist, there is no
+permission to check.)
+
+<a name="sc_BuiltinACLSchemes"></a>
+
+#### Builtin ACL Schemes
+
+ZooKeeeper has the following built in schemes:
+
+* **world** has a
+  single id, _anyone_, that represents
+  anyone.
+* **auth** is a special
+  scheme which ignores any provided expression and instead uses the current user,
+  credentials, and scheme. Any expression (whether _user_ like with SASL
+  authentication or _user:password_ like with DIGEST authentication) provided is ignored
+  by the ZooKeeper server when persisting the ACL. However, the expression must still be
+  provided in the ACL because the ACL must match the form _scheme:expression:perms_.
+  This scheme is provided as a convenience as it is a common use-case for
+  a user to create a znode and then restrict access to that znode to only that user.
+  If there is no authenticated user, setting an ACL with the auth scheme will fail.
+* **digest** uses
+  a _username:password_ string to generate
+  MD5 hash which is then used as an ACL ID
+  identity. Authentication is done by sending
+  the _username:password_ in clear text. When
+  used in the ACL the expression will be
+  the _username:base64_
+  encoded _SHA1_
+  password _digest_.
+* **ip** uses the
+  client host IP as an ACL ID identity. The ACL expression is of
+  the form _addr/bits_ where the most
+  significant _bits_
+  of _addr_ are matched against the most
+  significant _bits_ of the client host
+  IP.
+* **x509** uses the client
+  X500 Principal as an ACL ID identity. The ACL expression is the exact
+  X500 Principal name of a client. When using the secure port, clients
+  are automatically authenticated and their auth info for the x509 scheme
+  is set.
+
+<a name="ZooKeeper+C+client+API"></a>
+
+#### ZooKeeper C client API
+
+The following constants are provided by the ZooKeeper C
+library:
+
+* _const_ _int_ ZOO_PERM_READ; //can read node’s value and list its children
+* _const_ _int_ ZOO_PERM_WRITE;// can set the node’s value
+* _const_ _int_ ZOO_PERM_CREATE; //can create children
+* _const_ _int_ ZOO_PERM_DELETE;// can delete children
+* _const_ _int_ ZOO_PERM_ADMIN; //can execute set_acl()
+* _const_ _int_ ZOO_PERM_ALL;// all of the above flags OR’d together
+
+The following are the standard ACL IDs:
+
+* _struct_ Id ZOO_ANYONE_ID_UNSAFE; //(‘world’,’anyone’)
+* _struct_ Id ZOO_AUTH_IDS;// (‘auth’,’’)
+
+ZOO_AUTH_IDS empty identity string should be interpreted as “the identity of the creator”.
+
+ZooKeeper client comes with three standard ACLs:
+
+* _struct_ ACL_vector ZOO_OPEN_ACL_UNSAFE; //(ZOO_PERM_ALL,ZOO_ANYONE_ID_UNSAFE)
+* _struct_ ACL_vector ZOO_READ_ACL_UNSAFE;// (ZOO_PERM_READ, ZOO_ANYONE_ID_UNSAFE)
+* _struct_ ACL_vector ZOO_CREATOR_ALL_ACL; //(ZOO_PERM_ALL,ZOO_AUTH_IDS)
+
+The ZOO_OPEN_ACL_UNSAFE is completely open free for all
+ACL: any application can execute any operation on the node and
+can create, list and delete its children. The
+ZOO_READ_ACL_UNSAFE is read-only access for any
+application. CREATE_ALL_ACL grants all permissions to the
+creator of the node. The creator must have been authenticated by
+the server (for example, using “_digest_”
+scheme) before it can create nodes with this ACL.
+
+The following ZooKeeper operations deal with ACLs:
+
+* _int_ _zoo_add_auth_
+  (zhandle_t \*zh,_const_ _char_*
+  scheme,_const_ _char_*
+  cert, _int_ certLen, void_completion_t
+  completion, _const_ _void_
+  \*data);
+
+The application uses the zoo_add_auth function to
+authenticate itself to the server. The function can be called
+multiple times if the application wants to authenticate using
+different schemes and/or identities.
+
+* _int_ _zoo_create_
+  (zhandle_t \*zh, _const_ _char_
+  \*path, _const_ _char_
+  \*value,_int_
+  valuelen, _const_ _struct_
+  ACL_vector \*acl, _int_
+  flags,_char_
+  \*realpath, _int_
+  max_realpath_len);
+
+zoo_create(...) operation creates a new node. The acl
+parameter is a list of ACLs associated with the node. The parent
+node must have the CREATE permission bit set.
+
+* _int_ _zoo_get_acl_
+  (zhandle_t \*zh, _const_ _char_
+  \*path,_struct_ ACL_vector
+  \*acl, _struct_ Stat \*stat);
+
+This operation returns a node’s ACL info.
+
+* _int_ _zoo_set_acl_
+  (zhandle_t \*zh, _const_ _char_
+  \*path, _int_
+  version,_const_ _struct_
+  ACL_vector \*acl);
+
+This function replaces node’s ACL list with a new one. The
+node must have the ADMIN permission set.
+
+Here is a sample code that makes use of the above APIs to
+authenticate itself using the “_foo_” scheme
+and create an ephemeral node “/xyz” with create-only
+permissions.
+
+######Note
+>This is a very simple example which is intended to show
+how to interact with ZooKeeper ACLs
+specifically. See *.../trunk/zookeeper-client/zookeeper-client-c/src/cli.c*
+for an example of a C client implementation
+
+
+
+    #include <string.h>
+    #include <errno.h>
+    
+    #include "zookeeper.h"
+    
+    static zhandle_t *zh;
+    
+    /**
+     * In this example this method gets the cert for your
+     *   environment -- you must provide
+     */
+    char *foo_get_cert_once(char* id) { return 0; }
+
+    /** Watcher function -- empty for this example, not something you should
+     * do in real code */
+    void watcher(zhandle_t *zzh, int type, int state, const char *path,
+             void *watcherCtx) {}
+    
+    int main(int argc, char argv) {
+      char buffer[512];
+      char p[2048];
+      char *cert=0;
+      char appId[64];
+
+      strcpy(appId, "example.foo_test");
+      cert = foo_get_cert_once(appId);
+      if(cert!=0) {
+        fprintf(stderr,
+            "Certificate for appid [%s] is [%s]\n",appId,cert);
+        strncpy(p,cert, sizeof(p)-1);
+        free(cert);
+      } else {
+        fprintf(stderr, "Certificate for appid [%s] not found\n",appId);
+        strcpy(p, "dummy");
+      }
+    
+      zoo_set_debug_level(ZOO_LOG_LEVEL_DEBUG);
+    
+      zh = zookeeper_init("localhost:3181", watcher, 10000, 0, 0, 0);
+      if (!zh) {
+        return errno;
+      }
+      if(zoo_add_auth(zh,"foo",p,strlen(p),0,0)!=ZOK)
+        return 2;
+    
+      struct ACL CREATE_ONLY_ACL[] = {{ZOO_PERM_CREATE, ZOO_AUTH_IDS}};
+      struct ACL_vector CREATE_ONLY = {1, CREATE_ONLY_ACL};
+      int rc = zoo_create(zh,"/xyz","value", 5, &CREATE_ONLY, ZOO_EPHEMERAL,
+                      buffer, sizeof(buffer)-1);
+    
+      /** this operation will fail with a ZNOAUTH error */
+      int buflen= sizeof(buffer);
+      struct Stat stat;
+      rc = zoo_get(zh, "/xyz", 0, buffer, &buflen, &stat);
+      if (rc) {
+        fprintf(stderr, "Error %d for %s\n", rc, __LINE__);
+      }
+    
+      zookeeper_close(zh);
+      return 0;
+    }
+
+
+<a name="sc_ZooKeeperPluggableAuthentication"></a>
+
+## Pluggable ZooKeeper authentication
+
+ZooKeeper runs in a variety of different environments with
+various different authentication schemes, so it has a completely
+pluggable authentication framework. Even the builtin authentication
+schemes use the pluggable authentication framework.
+
+To understand how the authentication framework works, first you must
+understand the two main authentication operations. The framework
+first must authenticate the client. This is usually done as soon as
+the client connects to a server and consists of validating information
+sent from or gathered about a client and associating it with the connection.
+The second operation handled by the framework is finding the entries in an
+ACL that correspond to client. ACL entries are <_idspec,
+permissions_> pairs. The _idspec_ may be
+a simple string match against the authentication information associated
+with the connection or it may be a expression that is evaluated against that
+information. It is up to the implementation of the authentication plugin
+to do the match. Here is the interface that an authentication plugin must
+implement:
+
+
+    public interface AuthenticationProvider {
+        String getScheme();
+        KeeperException.Code handleAuthentication(ServerCnxn cnxn, byte authData[]);
+        boolean isValid(String id);
+        boolean matches(String id, String aclExpr);
+        boolean isAuthenticated();
+    }
+
+
+The first method _getScheme_ returns the string
+that identifies the plugin. Because we support multiple methods of authentication,
+an authentication credential or an _idspec_ will always be
+prefixed with _scheme:_. The ZooKeeper server uses the scheme
+returned by the authentication plugin to determine which ids the scheme
+applies to.
+
+_handleAuthentication_ is called when a client
+sends authentication information to be associated with a connection. The
+client specifies the scheme to which the information corresponds. The
+ZooKeeper server passes the information to the authentication plugin whose
+_getScheme_ matches the scheme passed by the client. The
+implementor of _handleAuthentication_ will usually return
+an error if it determines that the information is bad, or it will associate information
+with the connection using _cnxn.getAuthInfo().add(new Id(getScheme(), data))_.
+
+The authentication plugin is involved in both setting and using ACLs. When an
+ACL is set for a znode, the ZooKeeper server will pass the id part of the entry to
+the _isValid(String id)_ method. It is up to the plugin to verify
+that the id has a correct form. For example, _ip:172.16.0.0/16_
+is a valid id, but _ip:host.com_ is not. If the new ACL includes
+an "auth" entry, _isAuthenticated_ is used to see if the
+authentication information for this scheme that is assocatied with the connection
+should be added to the ACL. Some schemes
+should not be included in auth. For example, the IP address of the client is not
+considered as an id that should be added to the ACL if auth is specified.
+
+ZooKeeper invokes _matches(String id, String aclExpr)_ when checking an ACL. It
+needs to match authentication information of the client against the relevant ACL
+entries. To find the entries which apply to the client, the ZooKeeper server will
+find the scheme of each entry and if there is authentication information
+from that client for that scheme, _matches(String id, String aclExpr)_
+will be called with _id_ set to the authentication information
+that was previously added to the connection by _handleAuthentication_ and
+_aclExpr_ set to the id of the ACL entry. The authentication plugin
+uses its own logic and matching scheme to determine if _id_ is included
+in _aclExpr_.
+
+There are two built in authentication plugins: _ip_ and
+_digest_. Additional plugins can adding using system properties. At
+startup the ZooKeeper server will look for system properties that start with
+"zookeeper.authProvider." and interpret the value of those properties as the class name
+of an authentication plugin. These properties can be set using the
+_-Dzookeeeper.authProvider.X=com.f.MyAuth_ or adding entries such as
+the following in the server configuration file:
+
+
+    authProvider.1=com.f.MyAuth
+    authProvider.2=com.f.MyAuth2
+
+
+Care should be taking to ensure that the suffix on the property is unique. If there are
+duplicates such as _-Dzookeeeper.authProvider.X=com.f.MyAuth -Dzookeeper.authProvider.X=com.f.MyAuth2_,
+only one will be used. Also all servers must have the same plugins defined, otherwise clients using
+the authentication schemes provided by the plugins will have problems connecting to some servers.
+
+**Added in 3.6.0**: An alternate abstraction is available for pluggable
+authentication. It provides additional arguments.
+
+
+    public abstract class ServerAuthenticationProvider implements AuthenticationProvider {
+        public abstract KeeperException.Code handleAuthentication(ServerObjs serverObjs, byte authData[]);
+        public abstract boolean matches(ServerObjs serverObjs, MatchValues matchValues);
+    }
+
+
+Instead of implementing AuthenticationProvider you extend ServerAuthenticationProvider. Your handleAuthentication()
+and matches() methods will then receive the additional parameters (via ServerObjs and MatchValues).
+
+* **ZooKeeperServer**
+  The ZooKeeperServer instance
+* **ServerCnxn**
+  The current connection
+* **path**
+  The ZNode path being operated on (or null if not used)
+* **perm**
+  The operation value or 0
+* **setAcls**
+  When the setAcl() method is being operated on, the list of ACLs that are being set
+
+<a name="ch_zkGuarantees"></a>
+
+## Consistency Guarantees
+
+ZooKeeper is a high performance, scalable service. Both reads and
+write operations are designed to be fast, though reads are faster than
+writes. The reason for this is that in the case of reads, ZooKeeper can
+serve older data, which in turn is due to ZooKeeper's consistency
+guarantees:
+
+* *Sequential Consistency* :
+    Updates from a client will be applied in the order that they
+    were sent.
+
+* *Atomicity* :
+    Updates either succeed or fail -- there are no partial
+    results.
+
+* *Single System Image* :
+    A client will see the same view of the service regardless of
+    the server that it connects to.
+
+* *Reliability* :
+    Once an update has been applied, it will persist from that
+    time forward until a client overwrites the update. This guarantee
+    has two corollaries:
+    1. If a client gets a successful return code, the update will
+      have been applied. On some failures (communication errors,
+      timeouts, etc) the client will not know if the update has
+      applied or not. We take steps to minimize the failures, but the
+      guarantee is only present with successful return codes.
+      (This is called the _monotonicity condition_ in Paxos.)
+    1. Any updates that are seen by the client, through a read
+      request or successful update, will never be rolled back when
+      recovering from server failures.
+
+* *Timeliness* :
+    The clients view of the system is guaranteed to be up-to-date
+    within a certain time bound (on the order of tens of seconds).
+    Either system changes will be seen by a client within this bound, or
+    the client will detect a service outage.
+
+Using these consistency guarantees it is easy to build higher level
+functions such as leader election, barriers, queues, and read/write
+revocable locks solely at the ZooKeeper client (no additions needed to
+ZooKeeper). See [Recipes and Solutions](recipes.html)
+for more details.
+
+######Note
+
+>Sometimes developers mistakenly assume one other guarantee that
+ZooKeeper does _not_ in fact make. This is:
+> * Simultaneously Consistent Cross-Client Views* :
+    ZooKeeper does not guarantee that at every instance in
+    time, two different clients will have identical views of
+    ZooKeeper data. Due to factors like network delays, one client
+    may perform an update before another client gets notified of the
+    change. Consider the scenario of two clients, A and B. If client
+    A sets the value of a znode /a from 0 to 1, then tells client B
+    to read /a, client B may read the old value of 0, depending on
+    which server it is connected to. If it
+    is important that Client A and Client B read the same value,
+    Client B should should call the **sync()** method from the ZooKeeper API
+    method before it performs its read.
+    So, ZooKeeper by itself doesn't guarantee that changes occur
+    synchronously across all servers, but ZooKeeper
+    primitives can be used to construct higher level functions that
+    provide useful client synchronization. (For more information,
+    see the [ZooKeeper Recipes](recipes.html).
+    _[tbd:..]_).
+
+<a name="ch_bindings"></a>
+
+## Bindings
+
+The ZooKeeper client libraries come in two languages: Java and C.
+The following sections describe these.
+
+<a name="Java+Binding"></a>
+
+### Java Binding
+
+There are two packages that make up the ZooKeeper Java binding:
+**org.apache.zookeeper** and **org.apache.zookeeper.data**. The rest of the
+packages that make up ZooKeeper are used internally or are part of the
+server implementation. The **org.apache.zookeeper.data** package is made up of
+generated classes that are used simply as containers.
+
+The main class used by a ZooKeeper Java client is the **ZooKeeper** class. Its two constructors differ only
+by an optional session id and password. ZooKeeper supports session
+recovery accross instances of a process. A Java program may save its
+session id and password to stable storage, restart, and recover the
+session that was used by the earlier instance of the program.
+
+When a ZooKeeper object is created, two threads are created as
+well: an IO thread and an event thread. All IO happens on the IO thread
+(using Java NIO). All event callbacks happen on the event thread.
+Session maintenance such as reconnecting to ZooKeeper servers and
+maintaining heartbeat is done on the IO thread. Responses for
+synchronous methods are also processed in the IO thread. All responses
+to asynchronous methods and watch events are processed on the event
+thread. There are a few things to notice that result from this
+design:
+
+* All completions for asynchronous calls and watcher callbacks
+  will be made in order, one at a time. The caller can do any
+  processing they wish, but no other callbacks will be processed
+  during that time.
+* Callbacks do not block the processing of the IO thread or the
+  processing of the synchronous calls.
+* Synchronous calls may not return in the correct order. For
+  example, assume a client does the following processing: issues an
+  asynchronous read of node **/a** with
+  _watch_ set to true, and then in the completion
+  callback of the read it does a synchronous read of **/a**. (Maybe not good practice, but not illegal
+  either, and it makes for a simple example.)
+  Note that if there is a change to **/a** between the asynchronous read and the
+  synchronous read, the client library will receive the watch event
+  saying **/a** changed before the
+  response for the synchronous read, but because the completion
+  callback is blocking the event queue, the synchronous read will
+  return with the new value of **/a**
+  before the watch event is processed.
+
+Finally, the rules associated with shutdown are straightforward:
+once a ZooKeeper object is closed or receives a fatal event
+(SESSION_EXPIRED and AUTH_FAILED), the ZooKeeper object becomes invalid.
+On a close, the two threads shut down and any further access on zookeeper
+handle is undefined behavior and should be avoided.
+
+<a name="sc_java_client_configuration"></a>
+
+#### Client Configuration Parameters
+
+The following list contains configuration properties for the Java client. You can set any
+of these properties using Java system properties. For server properties, please check the
+following reference
+[Server configuration section.](zookeeperAdmin.html#sc_configuration)
+
+* *zookeeper.sasl.client* :
+    Set the value to **false** to disable
+    SASL authentication. Default is **true**.
+
+* *zookeeper.sasl.clientconfig* :
+    Specifies the context key in the JAAS login file. Default is "Client".
+
+* *zookeeper.sasl.client.username* :
+    Traditionally, a principal is divided into three parts: the primary, the instance, and the realm.
+    The format of a typical Kerberos V5 principal is primary/instance@REALM.
+    zookeeper.sasl.client.username specifies the primary part of the server principal. Default
+    is "zookeeper". Instance part is derived from the server IP. Finally server's principal is
+    username/IP@realm, where username is the value of zookeeper.sasl.client.username, IP is
+    the server IP, and realm is the value of zookeeper.server.realm.
+
+* *zookeeper.server.realm* :
+    Realm part of the server principal. By default it is the client principal realm.
+
+* *zookeeper.disableAutoWatchReset* :
+    This switch controls whether automatic watch resetting is enabled. Clients automatically
+    reset watches during session reconnect by default, this option allows the client to turn off
+    this behavior by setting zookeeper.disableAutoWatchReset to **true**.
+
+* *zookeeper.client.secure* :
+    If you want to connect to the server secure client port, you need to set this property to
+    **true**
+    on the client. This will connect to server using SSL with specified credentials. Note that
+    it requires the Netty client.
+
+* *zookeeper.clientCnxnSocket* :
+    Specifies which ClientCnxnSocket to be used. Possible values are
+    **org.apache.zookeeper.ClientCnxnSocketNIO**
+    and
+    **org.apache.zookeeper.ClientCnxnSocketNetty**
+    . Default is
+    **org.apache.zookeeper.ClientCnxnSocketNIO**
+    . If you want to connect to server's secure client port, you need to set this property to
+    **org.apache.zookeeper.ClientCnxnSocketNetty**
+    on client.
+
+* *zookeeper.ssl.keyStore.location and zookeeper.ssl.keyStore.password* :
+    Specifies the file path to a JKS containing the local credentials to be used for SSL connections,
+    and the password to unlock the file.
+
+* *zookeeper.ssl.trustStore.location and zookeeper.ssl.trustStore.password* :
+    Specifies the file path to a JKS containing the remote credentials to be used for SSL connections,
+    and the password to unlock the file.
+
+* *jute.maxbuffer* :
+    It specifies the maximum size of the incoming data from the server. The default value is 4194304
+    Bytes , or just 4 MB. This is really a sanity check. The ZooKeeper server is designed to store and send
+    data on the order of kilobytes. If incoming data length is more than this value, an IOException
+    is raised.
+
+* *zookeeper.kinit* :
+    Specifies path to kinit binary. Default is "/usr/bin/kinit".
+
+<a name="C+Binding"></a>
+
+### C Binding
+
+The C binding has a single-threaded and multi-threaded library.
+The multi-threaded library is easiest to use and is most similar to the
+Java API. This library will create an IO thread and an event dispatch
+thread for handling connection maintenance and callbacks. The
+single-threaded library allows ZooKeeper to be used in event driven
+applications by exposing the event loop used in the multi-threaded
+library.
+
+The package includes two shared libraries: zookeeper_st and
+zookeeper_mt. The former only provides the asynchronous APIs and
+callbacks for integrating into the application's event loop. The only
+reason this library exists is to support the platforms were a
+_pthread_ library is not available or is unstable
+(i.e. FreeBSD 4.x). In all other cases, application developers should
+link with zookeeper_mt, as it includes support for both Sync and Async
+API.
+
+<a name="Installation"></a>
+
+#### Installation
+
+If you're building the client from a check-out from the Apache
+repository, follow the steps outlined below. If you're building from a
+project source package downloaded from apache, skip to step **3**.
+
+1. Run `ant compile_jute` from the ZooKeeper
+  top level directory (*.../trunk*).
+  This will create a directory named "generated" under
+  *.../trunk/zookeeper-client/zookeeper-client-c*.
+1. Change directory to the*.../trunk/zookeeper-client/zookeeper-client-c*
+  and run `autoreconf -if` to bootstrap **autoconf**, **automake** and **libtool**. Make sure you have **autoconf version 2.59** or greater installed.
+  Skip to step**4**.
+1. If you are building from a project source package,
+  unzip/untar the source tarball and cd to the*
+              zookeeper-x.x.x/zookeeper-client/zookeeper-client-c* directory.
+1. Run `./configure <your-options>` to
+  generate the makefile. Here are some of options the **configure** utility supports that can be
+  useful in this step:
+  * `--enable-debug`
+    Enables optimization and enables debug info compiler
+    options. (Disabled by default.)
+  * `--without-syncapi`
+    Disables Sync API support; zookeeper_mt library won't be
+    built. (Enabled by default.)
+  * `--disable-static`
+    Do not build static libraries. (Enabled by
+    default.)
+  * `--disable-shared`
+    Do not build shared libraries. (Enabled by
+    default.)
+######Note
+>See INSTALL for general information about running **configure**.
+1. Run `make` or `make
+  install` to build the libraries and install them.
+1. To generate doxygen documentation for the ZooKeeper API, run
+  `make doxygen-doc`. All documentation will be
+  placed in a new subfolder named docs. By default, this command
+  only generates HTML. For information on other document formats,
+  run `./configure --help`
+
+<a name="Building+Your+Own+C+Client"></a>
+
+#### Building Your Own C Client
+
+In order to be able to use the ZooKeeper C API in your application
+you have to remember to
+
+1. Include ZooKeeper header: `#include <zookeeper/zookeeper.h>`
+1. If you are building a multithreaded client, compile with
+  `-DTHREADED` compiler flag to enable the multi-threaded version of
+  the library, and then link against against the
+  _zookeeper_mt_ library. If you are building a
+  single-threaded client, do not compile with `-DTHREADED`, and be
+  sure to link against the_zookeeper_st_library.
+
+######Note
+>See *.../trunk/zookeeper-client/zookeeper-client-c/src/cli.c*
+for an example of a C client implementation
+
+<a name="ch_guideToZkOperations"></a>
+
+## Building Blocks: A Guide to ZooKeeper Operations
+
+This section surveys all the operations a developer can perform
+against a ZooKeeper server. It is lower level information than the earlier
+concepts chapters in this manual, but higher level than the ZooKeeper API
+Reference. It covers these topics:
+
+* [Connecting to ZooKeeper](#sc_connectingToZk)
+
+<a name="sc_errorsZk"></a>
+
+### Handling Errors
+
+Both the Java and C client bindings may report errors. The Java client binding does so by throwing KeeperException, calling code() on the exception will return the specific error code. The C client binding returns an error code as defined in the enum ZOO_ERRORS. API callbacks indicate result code for both language bindings. See the API documentation (javadoc for Java, doxygen for C) for full details on the possible errors and their meaning.
+
+<a name="sc_connectingToZk"></a>
+
+### Connecting to ZooKeeper
+
+Before we begin, you will have to set up a running Zookeeper server so that we can start developing the client. For C client bindings, we will be using the multithreaded library(zookeeper_mt) with a simple example written in C. To establish a connection with Zookeeper server, we make use of C API - _zookeeper_init_ with the following signature:
+
+    int zookeeper_init(const char *host, watcher_fn fn, int recv_timeout, const clientid_t *clientid, void *context, int flags);
+
+* **host* :
+    Connection string to zookeeper server in the format of host:port. If there are multiple servers, use comma as separator after specifying the host:port pairs. Eg: "127.0.0.1:2181,127.0.0.1:3001,127.0.0.1:3002"
+
+* *fn* :
+    Watcher function to process events when a notification is triggered.
+
+* *recv_timeout* :
+    Session expiration time in milliseconds.
+
+* **clientid* :
+    We can specify 0 for a new session. If a session has already establish previously, we could provide that client ID and it would reconnect to that previous session.
+
+* **context* :
+    Context object that can be associated with the zkhandle_t handler. If it is not used, we can set it to 0.
+
+* *flags* :
+    In an initiation, we can leave it for 0.
+
+We will demonstrate client that outputs "Connected to Zookeeper" after successful connection or an error message otherwise. Let's call the following code _zkClient.cc_ :
+
+
+    #include <stdio.h>
+    #include <zookeeper/zookeeper.h>
+    #include <errno.h>
+    using namespace std;
+
+    // Keeping track of the connection state
+    static int connected = 0;
+    static int expired   = 0;
+
+    // *zkHandler handles the connection with Zookeeper
+    static zhandle_t *zkHandler;
+
+    // watcher function would process events
+    void watcher(zhandle_t *zkH, int type, int state, const char *path, void *watcherCtx)
+    {
+        if (type == ZOO_SESSION_EVENT) {
+
+            // state refers to states of zookeeper connection.
+            // To keep it simple, we would demonstrate these 3: ZOO_EXPIRED_SESSION_STATE, ZOO_CONNECTED_STATE, ZOO_NOTCONNECTED_STATE
+            // If you are using ACL, you should be aware of an authentication failure state - ZOO_AUTH_FAILED_STATE
+            if (state == ZOO_CONNECTED_STATE) {
+                connected = 1;
+            } else if (state == ZOO_NOTCONNECTED_STATE ) {
+                connected = 0;
+            } else if (state == ZOO_EXPIRED_SESSION_STATE) {
+                expired = 1;
+                connected = 0;
+                zookeeper_close(zkH);
+            }
+        }
+    }
+
+    int main(){
+        zoo_set_debug_level(ZOO_LOG_LEVEL_DEBUG);
+
+        // zookeeper_init returns the handler upon a successful connection, null otherwise
+        zkHandler = zookeeper_init("localhost:2181", watcher, 10000, 0, 0, 0);
+
+        if (!zkHandler) {
+            return errno;
+        }else{
+            printf("Connection established with Zookeeper. \n");
+        }
+
+        // Close Zookeeper connection
+        zookeeper_close(zkHandler);
+
+        return 0;
+    }
+
+
+Compile the code with the multithreaded library mentioned before.
+
+`> g++ -Iinclude/ zkClient.cpp -lzookeeper_mt -o Client`
+
+Run the client.
+
+`> ./Client`
+
+From the output, you should see "Connected to Zookeeper" along with Zookeeper's DEBUG messages if the connection is successful.
+
+<a name="sc_readOps"></a>
+
+### Read Operations
+
+<a name="sc_writeOps"></a>
+
+### Write Operations
+
+<a name="sc_handlingWatches"></a>
+
+### Handling Watches
+
+<a name="sc_miscOps"></a>
+
+### Miscelleaneous ZooKeeper Operations
+
+<a name="ch_programStructureWithExample"></a>
+
+## Program Structure, with Simple Example
+
+_[tbd]_
+
+<a name="ch_gotchas"></a>
+
+## Gotchas: Common Problems and Troubleshooting
+
+So now you know ZooKeeper. It's fast, simple, your application
+works, but wait ... something's wrong. Here are some pitfalls that
+ZooKeeper users fall into:
+
+1. If you are using watches, you must look for the connected watch
+  event. When a ZooKeeper client disconnects from a server, you will
+  not receive notification of changes until reconnected. If you are
+  watching for a znode to come into existence, you will miss the event
+  if the znode is created and deleted while you are disconnected.
+1. You must test ZooKeeper server failures. The ZooKeeper service
+  can survive failures as long as a majority of servers are active. The
+  question to ask is: can your application handle it? In the real world
+  a client's connection to ZooKeeper can break. (ZooKeeper server
+  failures and network partitions are common reasons for connection
+  loss.) The ZooKeeper client library takes care of recovering your
+  connection and letting you know what happened, but you must make sure
+  that you recover your state and any outstanding requests that failed.
+  Find out if you got it right in the test lab, not in production - test
+  with a ZooKeeper service made up of a several of servers and subject
+  them to reboots.
+1. The list of ZooKeeper servers used by the client must match the
+  list of ZooKeeper servers that each ZooKeeper server has. Things can
+  work, although not optimally, if the client list is a subset of the
+  real list of ZooKeeper servers, but not if the client lists ZooKeeper
+  servers not in the ZooKeeper cluster.
+1. Be careful where you put that transaction log. The most
+  performance-critical part of ZooKeeper is the transaction log.
+  ZooKeeper must sync transactions to media before it returns a
+  response. A dedicated transaction log device is key to consistent good
+  performance. Putting the log on a busy device will adversely effect
+  performance. If you only have one storage device, put trace files on
+  NFS and increase the snapshotCount; it doesn't eliminate the problem,
+  but it can mitigate it.
+1. Set your Java max heap size correctly. It is very important to
+  _avoid swapping._ Going to disk unnecessarily will
+  almost certainly degrade your performance unacceptably. Remember, in
+  ZooKeeper, everything is ordered, so if one request hits the disk, all
+  other queued requests hit the disk.
+  To avoid swapping, try to set the heapsize to the amount of
+  physical memory you have, minus the amount needed by the OS and cache.
+  The best way to determine an optimal heap size for your configurations
+  is to _run load tests_. If for some reason you
+  can't, be conservative in your estimates and choose a number well
+  below the limit that would cause your machine to swap. For example, on
+  a 4G machine, a 3G heap is a conservative estimate to start
+  with.
+
+## Links to Other Information
+
+Outside the formal documentation, there're several other sources of
+information for ZooKeeper developers.
+
+* *ZooKeeper Whitepaper _[tbd: find url]_* :
+    The definitive discussion of ZooKeeper design and performance,
+    by Yahoo! Research
+
+* *API Reference _[tbd: find url]_* :
+    The complete reference to the ZooKeeper API
+
+* *[ZooKeeper Talk at the Hadoup Summit 2008](http://us.dl1.yimg.com/download.yahoo.com/dl/ydn/zookeeper.m4v)* :
+    A video introduction to ZooKeeper, by Benjamin Reed of Yahoo!
+    Research
+
+* *[Barrier and Queue Tutorial](https://cwiki.apache.org/confluence/display/ZOOKEEPER/Tutorial)* :
+    The excellent Java tutorial by Flavio Junqueira, implementing
+    simple barriers and producer-consumer queues using ZooKeeper.
+
+* *[ZooKeeper - A Reliable, Scalable Distributed Coordination System](https://cwiki.apache.org/confluence/display/ZOOKEEPER/ZooKeeperArticles)* :
+    An article by Todd Hoff (07/15/2008)
+
+* *[ZooKeeper Recipes](recipes.html)* :
+    Pseudo-level discussion of the implementation of various
+    synchronization solutions with ZooKeeper: Event Handles, Queues,
+    Locks, and Two-phase Commits.
+
+* *_[tbd]_* :
+    Any other good sources anyone can think of...
+

+ 61 - 0
zookeeper-docs/src/main/resources/markdown/zookeeperQuotas.md

@@ -0,0 +1,61 @@
+<!--
+Copyright 2002-2004 The Apache Software Foundation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+//-->
+
+# ZooKeeper Quota's Guide
+
+### A Guide to Deployment and Administration
+
+* [Quotas](#zookeeper_quotas)
+    * [Setting Quotas](#Setting+Quotas)
+    * [Listing Quotas](#Listing+Quotas)
+    * [Deleting Quotas](#Deleting+Quotas)
+
+<a name="zookeeper_quotas"></a>
+
+## Quotas
+
+ZooKeeper has both namespace and bytes quotas. You can use the ZooKeeperMain class to setup quotas.
+ZooKeeper prints _WARN_ messages if users exceed the quota assigned to them. The messages
+are printed in the log of the ZooKeeper.
+
+    $ bin/zkCli.sh -server host:port**
+
+The above command gives you a command line option of using quotas.
+
+<a name="Setting+Quotas"></a>
+
+### Setting Quotas
+
+You can use _setquota_ to set a quota on a ZooKeeper node. It has an option of setting quota with
+`-n` (for namespace)
+and `-b` (for bytes).
+
+The ZooKeeper quota are stored in ZooKeeper itself in /zookeeper/quota. To disable other people from
+changing the quota's set the ACL for /zookeeper/quota such that only admins are able to read and write to it.
+
+<a name="Listing+Quotas"></a>
+
+### Listing Quotas
+
+You can use _listquota_ to list a quota on a ZooKeeper node.
+
+<a name="Deleting+Quotas"></a>
+
+### Deleting Quotas
+
+You can use _delquota_ to delete quota on a ZooKeeper node.
+
+

+ 873 - 0
zookeeper-docs/src/main/resources/markdown/zookeeperReconfig.md

@@ -0,0 +1,873 @@
+<!--
+Copyright 2002-2004 The Apache Software Foundation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+//-->
+
+# ZooKeeper Dynamic Reconfiguration
+
+* [Overview](#ch_reconfig_intro)
+* [Changes to Configuration Format](#ch_reconfig_format)
+    * [Specifying the client port](#sc_reconfig_clientport)
+    * [The standaloneEnabled flag](#sc_reconfig_standaloneEnabled)
+    * [The reconfigEnabled flag](#sc_reconfig_reconfigEnabled)
+    * [Dynamic configuration file](#sc_reconfig_file)
+    * [Backward compatibility](#sc_reconfig_backward)
+* [Upgrading to 3.5.0](#ch_reconfig_upgrade)
+* [Dynamic Reconfiguration of the ZooKeeper Ensemble](#ch_reconfig_dyn)
+    * [API](#ch_reconfig_api)
+    * [Security](#sc_reconfig_access_control)
+    * [Retrieving the current dynamic configuration](#sc_reconfig_retrieving)
+    * [Modifying the current dynamic configuration](#sc_reconfig_modifying)
+        * [General](#sc_reconfig_general)
+        * [Incremental mode](#sc_reconfig_incremental)
+        * [Non-incremental mode](#sc_reconfig_nonincremental)
+        * [Conditional reconfig](#sc_reconfig_conditional)
+        * [Error conditions](#sc_reconfig_errors)
+        * [Additional comments](#sc_reconfig_additional)
+* [Rebalancing Client Connections](#ch_reconfig_rebalancing)
+
+<a name="ch_reconfig_intro"></a>
+
+## Overview
+
+Prior to the 3.5.0 release, the membership and all other configuration
+parameters of Zookeeper were static - loaded during boot and immutable at
+runtime. Operators resorted to ''rolling restarts'' - a manually intensive
+and error-prone method of changing the configuration that has caused data
+loss and inconsistency in production.
+
+Starting with 3.5.0, “rolling restarts” are no longer needed!
+ZooKeeper comes with full support for automated configuration changes: the
+set of Zookeeper servers, their roles (participant / observer), all ports,
+and even the quorum system can be changed dynamically, without service
+interruption and while maintaining data consistency. Reconfigurations are
+performed immediately, just like other operations in ZooKeeper. Multiple
+changes can be done using a single reconfiguration command. The dynamic
+reconfiguration functionality does not limit operation concurrency, does
+not require client operations to be stopped during reconfigurations, has a
+very simple interface for administrators and no added complexity to other
+client operations.
+
+New client-side features allow clients to find out about configuration
+changes and to update the connection string (list of servers and their
+client ports) stored in their ZooKeeper handle. A probabilistic algorithm
+is used to rebalance clients across the new configuration servers while
+keeping the extent of client migrations proportional to the change in
+ensemble membership.
+
+This document provides the administrator manual for reconfiguration.
+For a detailed description of the reconfiguration algorithms, performance
+measurements, and more, please see our paper:
+
+* *Shraer, A., Reed, B., Malkhi, D., Junqueira, F. Dynamic
+Reconfiguration of Primary/Backup Clusters. In _USENIX Annual
+Technical Conference (ATC)_(2012), 425-437* :
+    Links: [paper (pdf)](https://www.usenix.org/system/files/conference/atc12/atc12-final74.pdf), [slides (pdf)](https://www.usenix.org/sites/default/files/conference/protected-files/shraer\_atc12\_slides.pdf), [video](https://www.usenix.org/conference/atc12/technical-sessions/presentation/shraer), [hadoop summit slides](http://www.slideshare.net/Hadoop\_Summit/dynamic-reconfiguration-of-zookeeper)
+
+**Note:** Starting with 3.5.3, the dynamic reconfiguration
+feature is disabled by default, and has to be explicitly turned on via
+[reconfigEnabled](zookeeperAdmin.html#sc_advancedConfiguration) configuration option.
+
+<a name="ch_reconfig_format"></a>
+
+## Changes to Configuration Format
+
+<a name="sc_reconfig_clientport"></a>
+
+### Specifying the client port
+
+A client port of a server is the port on which the server accepts
+client connection requests. Starting with 3.5.0 the
+_clientPort_ and _clientPortAddress_ configuration parameters should no longer be used. Instead,
+this information is now part of the server keyword specification, which
+becomes as follows:
+
+    server.<positive id> = <address1>:<port1>:<port2>[:role];[<client port address>:]<client port>**
+
+The client port specification is to the right of the semicolon. The
+client port address is optional, and if not specified it defaults to
+"0.0.0.0". As usual, role is also optional, it can be
+_participant_ or _observer_
+(_participant_ by default).
+
+Examples of legal server statements:
+
+    server.5 = 125.23.63.23:1234:1235;1236
+    server.5 = 125.23.63.23:1234:1235:participant;1236
+    server.5 = 125.23.63.23:1234:1235:observer;1236
+    server.5 = 125.23.63.23:1234:1235;125.23.63.24:1236
+    server.5 = 125.23.63.23:1234:1235:participant;125.23.63.23:1236
+
+<a name="sc_reconfig_standaloneEnabled"></a>
+
+### The _standaloneEnabled_ flag
+
+Prior to 3.5.0, one could run ZooKeeper in Standalone mode or in a
+Distributed mode. These are separate implementation stacks, and
+switching between them during run time is not possible. By default (for
+backward compatibility) _standaloneEnabled_ is set to
+_true_. The consequence of using this default is that
+if started with a single server the ensemble will not be allowed to
+grow, and if started with more than one server it will not be allowed to
+shrink to contain fewer than two participants.
+
+Setting the flag to _false_ instructs the system
+to run the Distributed software stack even if there is only a single
+participant in the ensemble. To achieve this the (static) configuration
+file should contain:
+
+    standaloneEnabled=false**
+
+With this setting it is possible to start a ZooKeeper ensemble
+containing a single participant and to dynamically grow it by adding
+more servers. Similarly, it is possible to shrink an ensemble so that
+just a single participant remains, by removing servers.
+
+Since running the Distributed mode allows more flexibility, we
+recommend setting the flag to _false_. We expect that
+the legacy Standalone mode will be deprecated in the future.
+
+<a name="sc_reconfig_reconfigEnabled"></a>
+
+### The _reconfigEnabled_ flag
+
+Starting with 3.5.0 and prior to 3.5.3, there is no way to disable
+dynamic reconfiguration feature. We would like to offer the option of
+disabling reconfiguration feature because with reconfiguration enabled,
+we have a security concern that a malicious actor can make arbitrary changes
+to the configuration of a ZooKeeper ensemble, including adding a compromised
+server to the ensemble. We prefer to leave to the discretion of the user to
+decide whether to enable it or not and make sure that the appropriate security
+measure are in place. So in 3.5.3 the [reconfigEnabled](zookeeperAdmin.html#sc_advancedConfiguration) configuration option is introduced
+such that the reconfiguration feature can be completely disabled and any attempts
+to reconfigure a cluster through reconfig API with or without authentication
+will fail by default, unless **reconfigEnabled** is set to
+**true**.
+
+To set the option to true, the configuration file (zoo.cfg) should contain:
+
+    reconfigEnabled=true
+
+<a name="sc_reconfig_file"></a>
+
+### Dynamic configuration file
+
+Starting with 3.5.0 we're distinguishing between dynamic
+configuration parameters, which can be changed during runtime, and
+static configuration parameters, which are read from a configuration
+file when a server boots and don't change during its execution. For now,
+the following configuration keywords are considered part of the dynamic
+configuration: _server_, _group_
+and _weight_.
+
+Dynamic configuration parameters are stored in a separate file on
+the server (which we call the dynamic configuration file). This file is
+linked from the static config file using the new
+_dynamicConfigFile_ keyword.
+
+**Example**
+
+#### zoo_replicated1.cfg
+
+
+    tickTime=2000
+    dataDir=/zookeeper/data/zookeeper1
+    initLimit=5
+    syncLimit=2
+    dynamicConfigFile=/zookeeper/conf/zoo_replicated1.cfg.dynamic
+
+
+#### zoo_replicated1.cfg.dynamic
+
+
+    server.1=125.23.63.23:2780:2783:participant;2791
+    server.2=125.23.63.24:2781:2784:participant;2792
+    server.3=125.23.63.25:2782:2785:participant;2793
+
+
+When the ensemble configuration changes, the static configuration
+parameters remain the same. The dynamic parameters are pushed by
+ZooKeeper and overwrite the dynamic configuration files on all servers.
+Thus, the dynamic configuration files on the different servers are
+usually identical (they can only differ momentarily when a
+reconfiguration is in progress, or if a new configuration hasn't
+propagated yet to some of the servers). Once created, the dynamic
+configuration file should not be manually altered. Changed are only made
+through the new reconfiguration commands outlined below. Note that
+changing the config of an offline cluster could result in an
+inconsistency with respect to configuration information stored in the
+ZooKeeper log (and the special configuration znode, populated from the
+log) and is therefore highly discouraged.
+
+**Example 2**
+
+Users may prefer to initially specify a single configuration file.
+The following is thus also legal:
+
+#### zoo_replicated1.cfg
+
+
+    tickTime=2000
+    dataDir=/zookeeper/data/zookeeper1
+    initLimit=5
+    syncLimit=2
+    clientPort=
+
+
+The configuration files on each server will be automatically split
+into dynamic and static files, if they are not already in this format.
+So the configuration file above will be automatically transformed into
+the two files in Example 1. Note that the clientPort and
+clientPortAddress lines (if specified) will be automatically removed
+during this process, if they are redundant (as in the example above).
+The original static configuration file is backed up (in a .bak
+file).
+
+<a name="sc_reconfig_backward"></a>
+
+### Backward compatibility
+
+We still support the old configuration format. For example, the
+following configuration file is acceptable (but not recommended):
+
+#### zoo_replicated1.cfg
+
+    tickTime=2000
+    dataDir=/zookeeper/data/zookeeper1
+    initLimit=5
+    syncLimit=2
+    clientPort=2791
+    server.1=125.23.63.23:2780:2783:participant
+    server.2=125.23.63.24:2781:2784:participant
+    server.3=125.23.63.25:2782:2785:participant
+
+
+During boot, a dynamic configuration file is created and contains
+the dynamic part of the configuration as explained earlier. In this
+case, however, the line "clientPort=2791" will remain in the static
+configuration file of server 1 since it is not redundant -- it was not
+specified as part of the "server.1=..." using the format explained in
+the section [Changes to Configuration Format](#ch_reconfig_format). If a reconfiguration
+is invoked that sets the client port of server 1, we remove
+"clientPort=2791" from the static configuration file (the dynamic file
+now contain this information as part of the specification of server
+1).
+
+<a name="ch_reconfig_upgrade"></a>
+
+## Upgrading to 3.5.0
+
+Upgrading a running ZooKeeper ensemble to 3.5.0 should be done only
+after upgrading your ensemble to the 3.4.6 release. Note that this is only
+necessary for rolling upgrades (if you're fine with shutting down the
+system completely, you don't have to go through 3.4.6). If you attempt a
+rolling upgrade without going through 3.4.6 (for example from 3.4.5), you
+may get the following error:
+
+    2013-01-30 11:32:10,663 [myid:2] - INFO [localhost/127.0.0.1:2784:QuorumCnxManager$Listener@498] - Received connection request /127.0.0.1:60876
+    2013-01-30 11:32:10,663 [myid:2] - WARN [localhost/127.0.0.1:2784:QuorumCnxManager@349] - Invalid server id: -65536
+
+During a rolling upgrade, each server is taken down in turn and
+rebooted with the new 3.5.0 binaries. Before starting the server with
+3.5.0 binaries, we highly recommend updating the configuration file so
+that all server statements "server.x=..." contain client ports (see the
+section [Specifying the client port](#sc_reconfig_clientport)). As explained earlier
+you may leave the configuration in a single file, as well as leave the
+clientPort/clientPortAddress statements (although if you specify client
+ports in the new format, these statements are now redundant).
+
+<a name="ch_reconfig_dyn"></a>
+
+## Dynamic Reconfiguration of the ZooKeeper Ensemble
+
+The ZooKeeper Java and C API were extended with getConfig and reconfig
+commands that facilitate reconfiguration. Both commands have a synchronous
+(blocking) variant and an asynchronous one. We demonstrate these commands
+here using the Java CLI, but note that you can similarly use the C CLI or
+invoke the commands directly from a program just like any other ZooKeeper
+command.
+
+<a name="ch_reconfig_api"></a>
+
+### API
+
+There are two sets of APIs for both Java and C client.
+
+* ***Reconfiguration API*** :
+    Reconfiguration API is used to reconfigure the ZooKeeper cluster.
+    Starting with 3.5.3, reconfiguration Java APIs are moved into ZooKeeperAdmin class
+    from ZooKeeper class, and use of this API requires ACL setup and user
+    authentication (see [Security](#sc_reconfig_access_control) for more information.).
+
+* ***Get Configuration API*** :
+    Get configuration APIs are used to retrieve ZooKeeper cluster configuration information
+    stored in /zookeeper/config znode. Use of this API does not require specific setup or authentication,
+    because /zookeeper/config is readable to any users.
+
+<a name="sc_reconfig_access_control"></a>
+
+### Security
+
+Prior to **3.5.3**, there is no enforced security mechanism
+over reconfig so any ZooKeeper clients that can connect to ZooKeeper server ensemble
+will have the ability to change the state of a ZooKeeper cluster via reconfig.
+It is thus possible for a malicious client to add compromised server to an ensemble,
+e.g., add a compromised server, or remove legitimate servers.
+Cases like these could be security vulnerabilities on a case by case basis.
+
+To address this security concern, we introduced access control over reconfig
+starting from **3.5.3** such that only a specific set of users
+can use reconfig commands or APIs, and these users need be configured explicitly. In addition,
+the setup of ZooKeeper cluster must enable authentication so ZooKeeper clients can be authenticated.
+
+We also provides an escape hatch for users who operate and interact with a ZooKeeper ensemble in a secured
+environment (i.e. behind company firewall). For those users who want to use reconfiguration feature but
+don't want the overhead of configuring an explicit list of authorized user for reconfig access checks,
+they can set ["skipACL"](zookeeperAdmin.html#sc_authOptions) to "yes" which will
+skip ACL check and allow any user to reconfigure cluster.
+
+Overall, ZooKeeper provides flexible configuration options for the reconfigure feature
+that allow a user to choose based on user's security requirement.
+We leave to the discretion of the user to decide appropriate security measure are in place.
+
+* ***Access Control*** :
+    The dynamic configuration is stored in a special znode
+    ZooDefs.CONFIG_NODE = /zookeeper/config. This node by default is read only
+    for all users, except super user and users that's explicitly configured for write
+    access.
+    Clients that need to use reconfig commands or reconfig API should be configured as users
+    that have write access to CONFIG_NODE. By default, only the super user has full control including
+    write access to CONFIG_NODE. Additional users can be granted write access through superuser
+    by setting an ACL that has write permission associated with specified user.
+    A few examples of how to setup ACLs and use reconfiguration API with authentication can be found in
+    ReconfigExceptionTest.java and TestReconfigServer.cc.
+
+* ***Authentication*** :
+    Authentication of users is orthogonal to the access control and is delegated to
+    existing authentication mechanism supported by ZooKeeper's pluggable authentication schemes.
+    See [ZooKeeper and SASL](https://cwiki.apache.org/confluence/display/ZOOKEEPER/Zookeeper+and+SASL) for more details on this topic.
+
+* ***Disable ACL check*** :
+    ZooKeeper supports ["skipACL"](zookeeperAdmin.html#sc_authOptions) option such that ACL
+    check will be completely skipped, if skipACL is set to "yes". In such cases any unauthenticated
+    users can use reconfig API.
+
+<a name="sc_reconfig_retrieving"></a>
+
+### Retrieving the current dynamic configuration
+
+The dynamic configuration is stored in a special znode
+ZooDefs.CONFIG_NODE = /zookeeper/config. The new
+`config` CLI command reads this znode (currently it is
+simply a wrapper to `get /zookeeper/config`). As with
+normal reads, to retrieve the latest committed value you should do a
+`sync` first.
+
+    [zk: 127.0.0.1:2791(CONNECTED) 3] config
+    server.1=localhost:2780:2783:participant;localhost:2791
+    server.2=localhost:2781:2784:participant;localhost:2792
+    server.3=localhost:2782:2785:participant;localhost:2793
+
+Notice the last line of the output. This is the configuration
+version. The version equals to the zxid of the reconfiguration command
+which created this configuration. The version of the first established
+configuration equals to the zxid of the NEWLEADER message sent by the
+first successfully established leader. When a configuration is written
+to a dynamic configuration file, the version automatically becomes part
+of the filename and the static configuration file is updated with the
+path to the new dynamic configuration file. Configuration files
+corresponding to earlier versions are retained for backup
+purposes.
+
+During boot time the version (if it exists) is extracted from the
+filename. The version should never be altered manually by users or the
+system administrator. It is used by the system to know which
+configuration is most up-to-date. Manipulating it manually can result in
+data loss and inconsistency.
+
+Just like a `get` command, the
+`config` CLI command accepts the _-w_
+flag for setting a watch on the znode, and _-s_ flag for
+displaying the Stats of the znode. It additionally accepts a new flag
+_-c_ which outputs only the version and the client
+connection string corresponding to the current configuration. For
+example, for the configuration above we would get:
+
+    [zk: 127.0.0.1:2791(CONNECTED) 17] config -c
+    400000003 localhost:2791,localhost:2793,localhost:2792
+
+Note that when using the API directly, this command is called
+`getConfig`.
+
+As any read command it returns the configuration known to the
+follower to which your client is connected, which may be slightly
+out-of-date. One can use the `sync` command for
+stronger guarantees. For example using the Java API:
+
+    zk.sync(ZooDefs.CONFIG_NODE, void_callback, context);
+    zk.getConfig(watcher, callback, context);
+
+Note: in 3.5.0 it doesn't really matter which path is passed to the
+`sync()` command as all the server's state is brought
+up to date with the leader (so one could use a different path instead of
+ZooDefs.CONFIG_NODE). However, this may change in the future.
+
+<a name="sc_reconfig_modifying"></a>
+
+### Modifying the current dynamic configuration
+
+Modifying the configuration is done through the
+`reconfig` command. There are two modes of
+reconfiguration: incremental and non-incremental (bulk). The
+non-incremental simply specifies the new dynamic configuration of the
+system. The incremental specifies changes to the current configuration.
+The `reconfig` command returns the new
+configuration.
+
+A few examples are in: *ReconfigTest.java*,
+*ReconfigRecoveryTest.java* and
+*TestReconfigServer.cc*.
+
+<a name="sc_reconfig_general"></a>
+
+#### General
+
+**Removing servers:** Any server can
+be removed, including the leader (although removing the leader will
+result in a short unavailability, see Figures 6 and 8 in the [paper](https://www.usenix.org/conference/usenixfederatedconferencesweek/dynamic-recon%EF%AC%81guration-primarybackup-clusters)). The server will not be shut-down automatically.
+Instead, it becomes a "non-voting follower". This is somewhat similar
+to an observer in that its votes don't count towards the Quorum of
+votes necessary to commit operations. However, unlike a non-voting
+follower, an observer doesn't actually see any operation proposals and
+does not ACK them. Thus a non-voting follower has a more significant
+negative effect on system throughput compared to an observer.
+Non-voting follower mode should only be used as a temporary mode,
+before shutting the server down, or adding it as a follower or as an
+observer to the ensemble. We do not shut the server down automatically
+for two main reasons. The first reason is that we do not want all the
+clients connected to this server to be immediately disconnected,
+causing a flood of connection requests to other servers. Instead, it
+is better if each client decides when to migrate independently. The
+second reason is that removing a server may sometimes (rarely) be
+necessary in order to change it from "observer" to "participant" (this
+is explained in the section [Additional comments](#sc_reconfig_additional)).
+
+Note that the new configuration should have some minimal number of
+participants in order to be considered legal. If the proposed change
+would leave the cluster with less than 2 participants and standalone
+mode is enabled (standaloneEnabled=true, see the section [The _standaloneEnabled_ flag](#sc_reconfig_standaloneEnabled)), the reconfig will not be
+processed (BadArgumentsException). If standalone mode is disabled
+(standaloneEnabled=false) then its legal to remain with 1 or more
+participants.
+
+**Adding servers:** Before a
+reconfiguration is invoked, the administrator must make sure that a
+quorum (majority) of participants from the new configuration are
+already connected and synced with the current leader. To achieve this
+we need to connect a new joining server to the leader before it is
+officially part of the ensemble. This is done by starting the joining
+server using an initial list of servers which is technically not a
+legal configuration of the system but (a) contains the joiner, and (b)
+gives sufficient information to the joiner in order for it to find and
+connect to the current leader. We list a few different options of
+doing this safely.
+
+1. Initial configuration of joiners is comprised of servers in
+  the last committed configuration and one or more joiners, where
+  **joiners are listed as observers.**
+  For example, if servers D and E are added at the same time to (A,
+  B, C) and server C is being removed, the initial configuration of
+  D could be (A, B, C, D) or (A, B, C, D, E), where D and E are
+  listed as observers. Similarly, the configuration of E could be
+  (A, B, C, E) or (A, B, C, D, E), where D and E are listed as
+  observers. **Note that listing the joiners as
+  observers will not actually make them observers - it will only
+  prevent them from accidentally forming a quorum with other
+  joiners.** Instead, they will contact the servers in the
+  current configuration and adopt the last committed configuration
+  (A, B, C), where the joiners are absent. Configuration files of
+  joiners are backed up and replaced automatically as this happens.
+  After connecting to the current leader, joiners become non-voting
+  followers until the system is reconfigured and they are added to
+  the ensemble (as participant or observer, as appropriate).
+1. Initial configuration of each joiner is comprised of servers
+  in the last committed configuration + **the
+  joiner itself, listed as a participant.** For example, to
+  add a new server D to a configuration consisting of servers (A, B,
+  C), the administrator can start D using an initial configuration
+  file consisting of servers (A, B, C, D). If both D and E are added
+  at the same time to (A, B, C), the initial configuration of D
+  could be (A, B, C, D) and the configuration of E could be (A, B,
+  C, E). Similarly, if D is added and C is removed at the same time,
+  the initial configuration of D could be (A, B, C, D). Never list
+  more than one joiner as participant in the initial configuration
+  (see warning below).
+1. Whether listing the joiner as an observer or as participant,
+  it is also fine not to list all the current configuration servers,
+  as long as the current leader is in the list. For example, when
+  adding D we could start D with a configuration file consisting of
+  just (A, D) if A is the current leader. however this is more
+  fragile since if A fails before D officially joins the ensemble, D
+  doesn’t know anyone else and therefore the administrator will have
+  to intervene and restart D with another server list.
+
+######Note
+>##### Warning
+
+>Never specify more than one joining server in the same initial
+configuration as participants. Currently, the joining servers don’t
+know that they are joining an existing ensemble; if multiple joiners
+are listed as participants they may form an independent quorum
+creating a split-brain situation such as processing operations
+independently from your main ensemble. It is OK to list multiple
+joiners as observers in an initial config.
+
+If the configuration of existing servers changes or they become unavailable
+before the joiner succeeds to connect and learn obout configuration changes, the
+joiner may need to be restarted with an updated configuration file in order to be
+able to connect.
+
+Finally, note that once connected to the leader, a joiner adopts
+the last committed configuration, in which it is absent (the initial
+config of the joiner is backed up before being rewritten). If the
+joiner restarts in this state, it will not be able to boot since it is
+absent from its configuration file. In order to start it you’ll once
+again have to specify an initial configuration.
+
+**Modifying server parameters:** One
+can modify any of the ports of a server, or its role
+(participant/observer) by adding it to the ensemble with different
+parameters. This works in both the incremental and the bulk
+reconfiguration modes. It is not necessary to remove the server and
+then add it back; just specify the new parameters as if the server is
+not yet in the system. The server will detect the configuration change
+and perform the necessary adjustments. See an example in the section
+[Incremental mode](#sc_reconfig_incremental) and an exception to this
+rule in the section [Additional comments](#sc_reconfig_additional).
+
+It is also possible to change the Quorum System used by the
+ensemble (for example, change the Majority Quorum System to a
+Hierarchical Quorum System on the fly). This, however, is only allowed
+using the bulk (non-incremental) reconfiguration mode. In general,
+incremental reconfiguration only works with the Majority Quorum
+System. Bulk reconfiguration works with both Hierarchical and Majority
+Quorum Systems.
+
+**Performance Impact:** There is
+practically no performance impact when removing a follower, since it
+is not being automatically shut down (the effect of removal is that
+the server's votes are no longer being counted). When adding a server,
+there is no leader change and no noticeable performance disruption.
+For details and graphs please see Figures 6, 7 and 8 in the [paper](https://www.usenix.org/conference/usenixfederatedconferencesweek/dynamic-recon%EF%AC%81guration-primarybackup-clusters).
+
+The most significant disruption will happen when a leader change
+is caused, in one of the following cases:
+
+1. Leader is removed from the ensemble.
+1. Leader's role is changed from participant to observer.
+1. The port used by the leader to send transactions to others
+  (quorum port) is modified.
+
+In these cases we perform a leader hand-off where the old leader
+nominates a new leader. The resulting unavailability is usually
+shorter than when a leader crashes since detecting leader failure is
+unnecessary and electing a new leader can usually be avoided during a
+hand-off (see Figures 6 and 8 in the [paper](https://www.usenix.org/conference/usenixfederatedconferencesweek/dynamic-recon%EF%AC%81guration-primarybackup-clusters)).
+
+When the client port of a server is modified, it does not drop
+existing client connections. New connections to the server will have
+to use the new client port.
+
+**Progress guarantees:** Up to the
+invocation of the reconfig operation, a quorum of the old
+configuration is required to be available and connected for ZooKeeper
+to be able to make progress. Once reconfig is invoked, a quorum of
+both the old and of the new configurations must be available. The
+final transition happens once (a) the new configuration is activated,
+and (b) all operations scheduled before the new configuration is
+activated by the leader are committed. Once (a) and (b) happen, only a
+quorum of the new configuration is required. Note, however, that
+neither (a) nor (b) are visible to a client. Specifically, when a
+reconfiguration operation commits, it only means that an activation
+message was sent out by the leader. It does not necessarily mean that
+a quorum of the new configuration got this message (which is required
+in order to activate it) or that (b) has happened. If one wants to
+make sure that both (a) and (b) has already occurred (for example, in
+order to know that it is safe to shut down old servers that were
+removed), one can simply invoke an update
+(`set-data`, or some other quorum operation, but not
+a `sync`) and wait for it to commit. An alternative
+way to achieve this was to introduce another round to the
+reconfiguration protocol (which, for simplicity and compatibility with
+Zab, we decided to avoid).
+
+<a name="sc_reconfig_incremental"></a>
+
+#### Incremental mode
+
+The incremental mode allows adding and removing servers to the
+current configuration. Multiple changes are allowed. For
+example:
+
+    > reconfig -remove 3 -add
+    server.5=125.23.63.23:1234:1235;1236
+
+Both the add and the remove options get a list of comma separated
+arguments (no spaces):
+
+    > reconfig -remove 3,4 -add
+    server.5=localhost:2111:2112;2113,6=localhost:2114:2115:observer;2116
+
+The format of the server statement is exactly the same as
+described in the section [Specifying the client port](#sc_reconfig_clientport) and
+includes the client port. Notice that here instead of "server.5=" you
+can just say "5=". In the example above, if server 5 is already in the
+system, but has different ports or is not an observer, it is updated
+and once the configuration commits becomes an observer and starts
+using these new ports. This is an easy way to turn participants into
+observers and vise versa or change any of their ports, without
+rebooting the server.
+
+ZooKeeper supports two types of Quorum Systems – the simple
+Majority system (where the leader commits operations after receiving
+ACKs from a majority of voters) and a more complex Hierarchical
+system, where votes of different servers have different weights and
+servers are divided into voting groups. Currently, incremental
+reconfiguration is allowed only if the last proposed configuration
+known to the leader uses a Majority Quorum System
+(BadArgumentsException is thrown otherwise).
+
+Incremental mode - examples using the Java API:
+
+    List<String> leavingServers = new ArrayList<String>();
+    leavingServers.add("1");
+    leavingServers.add("2");
+    byte[] config = zk.reconfig(null, leavingServers, null, -1, new Stat());
+
+    List<String> leavingServers = new ArrayList<String>();
+    List<String> joiningServers = new ArrayList<String>();
+    leavingServers.add("1");
+    joiningServers.add("server.4=localhost:1234:1235;1236");
+    byte[] config = zk.reconfig(joiningServers, leavingServers, null, -1, new Stat());
+
+    String configStr = new String(config);
+    System.out.println(configStr);
+
+There is also an asynchronous API, and an API accepting comma
+separated Strings instead of List<String>. See
+src/java/main/org/apache/zookeeper/ZooKeeper.java.
+
+<a name="sc_reconfig_nonincremental"></a>
+
+#### Non-incremental mode
+
+The second mode of reconfiguration is non-incremental, whereby a
+client gives a complete specification of the new dynamic system
+configuration. The new configuration can either be given in place or
+read from a file:
+
+    > reconfig -file newconfig.cfg
+    
+//newconfig.cfg is a dynamic config file, see [Dynamic configuration file](#sc_reconfig_file)
+
+    > reconfig -members
+    server.1=125.23.63.23:2780:2783:participant;2791,server.2=125.23.63.24:2781:2784:participant;2792,server.3=125.23.63.25:2782:2785:participant;2793}}
+
+The new configuration may use a different Quorum System. For
+example, you may specify a Hierarchical Quorum System even if the
+current ensemble uses a Majority Quorum System.
+
+Bulk mode - example using the Java API:
+
+    List<String> newMembers = new ArrayList<String>();
+    newMembers.add("server.1=1111:1234:1235;1236");
+    newMembers.add("server.2=1112:1237:1238;1239");
+    newMembers.add("server.3=1114:1240:1241:observer;1242");
+
+    byte[] config = zk.reconfig(null, null, newMembers, -1, new Stat());
+
+    String configStr = new String(config);
+    System.out.println(configStr);
+
+There is also an asynchronous API, and an API accepting comma
+separated String containing the new members instead of
+List<String>. See
+src/java/main/org/apache/zookeeper/ZooKeeper.java.
+
+<a name="sc_reconfig_conditional"></a>
+
+#### Conditional reconfig
+
+Sometimes (especially in non-incremental mode) a new proposed
+configuration depends on what the client "believes" to be the current
+configuration, and should be applied only to that configuration.
+Specifically, the `reconfig` succeeds only if the
+last configuration at the leader has the specified version.
+
+    > reconfig -file <filename> -v <version>
+
+In the previously listed Java examples, instead of -1 one could
+specify a configuration version to condition the
+reconfiguration.
+
+<a name="sc_reconfig_errors"></a>
+
+#### Error conditions
+
+In addition to normal ZooKeeper error conditions, a
+reconfiguration may fail for the following reasons:
+
+1. another reconfig is currently in progress
+  (ReconfigInProgress)
+1. the proposed change would leave the cluster with less than 2
+  participants, in case standalone mode is enabled, or, if
+  standalone mode is disabled then its legal to remain with 1 or
+  more participants (BadArgumentsException)
+1. no quorum of the new configuration was connected and
+  up-to-date with the leader when the reconfiguration processing
+  began (NewConfigNoQuorum)
+1. `-v x` was specified, but the version
+`y` of the latest configuration is not
+`x` (BadVersionException)
+1. an incremental reconfiguration was requested but the last
+  configuration at the leader uses a Quorum System which is
+  different from the Majority system (BadArgumentsException)
+1. syntax error (BadArgumentsException)
+1. I/O exception when reading the configuration from a file
+  (BadArgumentsException)
+
+Most of these are illustrated by test-cases in
+*ReconfigFailureCases.java*.
+
+<a name="sc_reconfig_additional"></a>
+
+#### Additional comments
+
+**Liveness:** To better understand
+the difference between incremental and non-incremental
+reconfiguration, suppose that client C1 adds server D to the system
+while a different client C2 adds server E. With the non-incremental
+mode, each client would first invoke `config` to find
+out the current configuration, and then locally create a new list of
+servers by adding its own suggested server. The new configuration can
+then be submitted using the non-incremental
+`reconfig` command. After both reconfigurations
+complete, only one of E or D will be added (not both), depending on
+which client's request arrives second to the leader, overwriting the
+previous configuration. The other client can repeat the process until
+its change takes effect. This method guarantees system-wide progress
+(i.e., for one of the clients), but does not ensure that every client
+succeeds. To have more control C2 may request to only execute the
+reconfiguration in case the version of the current configuration
+hasn't changed, as explained in the section [Conditional reconfig](#sc_reconfig_conditional). In this way it may avoid blindly
+overwriting the configuration of C1 if C1's configuration reached the
+leader first.
+
+With incremental reconfiguration, both changes will take effect as
+they are simply applied by the leader one after the other to the
+current configuration, whatever that is (assuming that the second
+reconfig request reaches the leader after it sends a commit message
+for the first reconfig request -- currently the leader will refuse to
+propose a reconfiguration if another one is already pending). Since
+both clients are guaranteed to make progress, this method guarantees
+stronger liveness. In practice, multiple concurrent reconfigurations
+are probably rare. Non-incremental reconfiguration is currently the
+only way to dynamically change the Quorum System. Incremental
+configuration is currently only allowed with the Majority Quorum
+System.
+
+**Changing an observer into a
+follower:** Clearly, changing a server that participates in
+voting into an observer may fail if error (2) occurs, i.e., if fewer
+than the minimal allowed number of participants would remain. However,
+converting an observer into a participant may sometimes fail for a
+more subtle reason: Suppose, for example, that the current
+configuration is (A, B, C, D), where A is the leader, B and C are
+followers and D is an observer. In addition, suppose that B has
+crashed. If a reconfiguration is submitted where D is said to become a
+follower, it will fail with error (3) since in this configuration, a
+majority of voters in the new configuration (any 3 voters), must be
+connected and up-to-date with the leader. An observer cannot
+acknowledge the history prefix sent during reconfiguration, and
+therefore it does not count towards these 3 required servers and the
+reconfiguration will be aborted. In case this happens, a client can
+achieve the same task by two reconfig commands: first invoke a
+reconfig to remove D from the configuration and then invoke a second
+command to add it back as a participant (follower). During the
+intermediate state D is a non-voting follower and can ACK the state
+transfer performed during the second reconfig comand.
+
+<a name="ch_reconfig_rebalancing"></a>
+
+## Rebalancing Client Connections
+
+When a ZooKeeper cluster is started, if each client is given the same
+connection string (list of servers), the client will randomly choose a
+server in the list to connect to, which makes the expected number of
+client connections per server the same for each of the servers. We
+implemented a method that preserves this property when the set of servers
+changes through reconfiguration. See Sections 4 and 5.1 in the [paper](https://www.usenix.org/conference/usenixfederatedconferencesweek/dynamic-recon%EF%AC%81guration-primarybackup-clusters).
+
+In order for the method to work, all clients must subscribe to
+configuration changes (by setting a watch on /zookeeper/config either
+directly or through the `getConfig` API command). When
+the watch is triggered, the client should read the new configuration by
+invoking `sync` and `getConfig` and if
+the configuration is indeed new invoke the
+`updateServerList` API command. To avoid mass client
+migration at the same time, it is better to have each client sleep a
+random short period of time before invoking
+`updateServerList`.
+
+A few examples can be found in:
+*StaticHostProviderTest.java* and
+*TestReconfig.cc*
+
+Example (this is not a recipe, but a simplified example just to
+explain the general idea):
+
+    public void process(WatchedEvent event) {
+        synchronized (this) {
+            if (event.getType() == EventType.None) {
+                connected = (event.getState() == KeeperState.SyncConnected);
+                notifyAll();
+            } else if (event.getPath()!=null &&  event.getPath().equals(ZooDefs.CONFIG_NODE)) {
+                // in prod code never block the event thread!
+                zk.sync(ZooDefs.CONFIG_NODE, this, null);
+                zk.getConfig(this, this, null);
+            }
+        }
+    }
+    
+    public void processResult(int rc, String path, Object ctx, byte[] data, Stat stat) {
+        if (path!=null &&  path.equals(ZooDefs.CONFIG_NODE)) {
+            String config[] = ConfigUtils.getClientConfigStr(new String(data)).split(" ");   // similar to config -c
+            long version = Long.parseLong(config[0], 16);
+            if (this.configVersion == null){
+                 this.configVersion = version;
+            } else if (version > this.configVersion) {
+                hostList = config[1];
+                try {
+                    // the following command is not blocking but may cause the client to close the socket and
+                    // migrate to a different server. In practice its better to wait a short period of time, chosen
+                    // randomly, so that different clients migrate at different times
+                    zk.updateServerList(hostList);
+                } catch (IOException e) {
+                    System.err.println("Error updating server list");
+                    e.printStackTrace();
+                }
+                this.configVersion = version;
+            }
+        }
+    }

+ 364 - 0
zookeeper-docs/src/main/resources/markdown/zookeeperStarted.md

@@ -0,0 +1,364 @@
+<!--
+Copyright 2002-2004 The Apache Software Foundation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+//-->
+
+# ZooKeeper Getting Started Guide
+
+* [Getting Started: Coordinating Distributed Applications with ZooKeeper](#getting-started-coordinating-distributed-applications-with-zooKeeper)
+    * [Pre-requisites](#sc_Prerequisites)
+    * [Download](#sc_Download)
+    * [Standalone Operation](#sc_InstallingSingleMode)
+    * [Managing ZooKeeper Storage](#sc_FileManagement)
+    * [Connecting to ZooKeeper](#sc_ConnectingToZooKeeper)
+    * [Programming to ZooKeeper](#sc_ProgrammingToZooKeeper)
+    * [Running Replicated ZooKeeper](#sc_RunningReplicatedZooKeeper)
+    * [Other Optimizations](#other-optimizations)
+
+<a name="getting-started-coordinating-distributed-applications-with-zooKeeper"></a>
+
+## Getting Started: Coordinating Distributed Applications with ZooKeeper
+
+This document contains information to get you started quickly with
+ZooKeeper. It is aimed primarily at developers hoping to try it out, and
+contains simple installation instructions for a single ZooKeeper server, a
+few commands to verify that it is running, and a simple programming
+example. Finally, as a convenience, there are a few sections regarding
+more complicated installations, for example running replicated
+deployments, and optimizing the transaction log. However for the complete
+instructions for commercial deployments, please refer to the [ZooKeeper
+Administrator's Guide](zookeeperAdmin.html).
+
+<a name="sc_Prerequisites"></a>
+
+### Pre-requisites
+
+See [System Requirements](zookeeperAdmin.html#sc_systemReq) in the Admin guide.
+
+<a name="sc_Download"></a>
+
+### Download
+
+To get a ZooKeeper distribution, download a recent
+[stable](http://zookeeper.apache.org/releases.html) release from one of the Apache Download
+Mirrors.
+
+<a name="sc_InstallingSingleMode"></a>
+
+### Standalone Operation
+
+Setting up a ZooKeeper server in standalone mode is
+straightforward. The server is contained in a single JAR file,
+so installation consists of creating a configuration.
+
+Once you've downloaded a stable ZooKeeper release unpack
+it and cd to the root
+
+To start ZooKeeper you need a configuration file. Here is a sample,
+create it in **conf/zoo.cfg**:
+
+
+    tickTime=2000
+    dataDir=/var/lib/zookeeper
+    clientPort=2181
+
+
+This file can be called anything, but for the sake of this
+discussion call
+it **conf/zoo.cfg**. Change the
+value of **dataDir** to specify an
+existing (empty to start with) directory.  Here are the meanings
+for each of the fields:
+
+* ***tickTime*** :
+    the basic time unit in milliseconds used by ZooKeeper. It is
+    used to do heartbeats and the minimum session timeout will be
+    twice the tickTime.
+
+* ***dataDir*** :
+    the location to store the in-memory database snapshots and,
+    unless specified otherwise, the transaction log of updates to the
+    database.
+
+* ***clientPort*** :
+    the port to listen for client connections
+
+Now that you created the configuration file, you can start
+ZooKeeper:
+
+
+    bin/zkServer.sh start
+
+
+ZooKeeper logs messages using log4j -- more detail
+available in the
+[Logging](zookeeperProgrammers.html#Logging)
+section of the Programmer's Guide. You will see log messages
+coming to the console (default) and/or a log file depending on
+the log4j configuration.
+
+The steps outlined here run ZooKeeper in standalone mode. There is
+no replication, so if ZooKeeper process fails, the service will go down.
+This is fine for most development situations, but to run ZooKeeper in
+replicated mode, please see [Running Replicated
+ZooKeeper](#sc_RunningReplicatedZooKeeper).
+
+<a name="sc_FileManagement"></a>
+
+### Managing ZooKeeper Storage
+
+For long running production systems ZooKeeper storage must
+be managed externally (dataDir and logs). See the section on
+[maintenance](zookeeperAdmin.html#sc_maintenance) for
+more details.
+
+<a name="sc_ConnectingToZooKeeper"></a>
+
+### Connecting to ZooKeeper
+
+
+    $ bin/zkCli.sh -server 127.0.0.1:2181
+
+
+This lets you perform simple, file-like operations.
+
+Once you have connected, you should see something like:
+
+
+    Connecting to localhost:2181
+    log4j:WARN No appenders could be found for logger (org.apache.zookeeper.ZooKeeper).
+    log4j:WARN Please initialize the log4j system properly.
+    Welcome to ZooKeeper!
+    JLine support is enabled
+    [zkshell: 0]
+
+From the shell, type `help` to get a listing of commands that can be executed from the client, as in:
+
+
+    [zkshell: 0] help
+    ZooKeeper host:port cmd args
+        get path [watch]
+        ls path [watch]
+        set path data [version]
+        delquota [-n|-b] path
+        quit
+        printwatches on|off
+        create path data acl
+        stat path [watch]
+        listquota path
+        history
+        setAcl path acl
+        getAcl path
+        sync path
+        redo cmdno
+        addauth scheme auth
+        delete path [version]
+        deleteall path
+        setquota -n|-b val path
+
+
+From here, you can try a few simple commands to get a feel for this simple command line interface.  First, start by issuing the list command, as
+in `ls`, yielding:
+
+
+    [zkshell: 8] ls /
+    [zookeeper]
+
+
+Next, create a new znode by running `create /zk_test my_data`. This creates a new znode and associates the string "my_data" with the node.
+You should see:
+
+
+    [zkshell: 9] create /zk_test my_data
+    Created /zk_test
+
+
+Issue another `ls /` command to see what the directory looks like:
+
+
+    [zkshell: 11] ls /
+    [zookeeper, zk_test]
+
+
+Notice that the zk_test directory has now been created.
+
+Next, verify that the data was associated with the znode by running the `get` command, as in:
+
+
+    [zkshell: 12] get /zk_test
+    my_data
+    cZxid = 5
+    ctime = Fri Jun 05 13:57:06 PDT 2009
+    mZxid = 5
+    mtime = Fri Jun 05 13:57:06 PDT 2009
+    pZxid = 5
+    cversion = 0
+    dataVersion = 0
+    aclVersion = 0
+    ephemeralOwner = 0
+    dataLength = 7
+    numChildren = 0
+
+
+We can change the data associated with zk_test by issuing the `set` command, as in:
+
+
+    [zkshell: 14] set /zk_test junk
+    cZxid = 5
+    ctime = Fri Jun 05 13:57:06 PDT 2009
+    mZxid = 6
+    mtime = Fri Jun 05 14:01:52 PDT 2009
+    pZxid = 5
+    cversion = 0
+    dataVersion = 1
+    aclVersion = 0
+    ephemeralOwner = 0
+    dataLength = 4
+    numChildren = 0
+    [zkshell: 15] get /zk_test
+    junk
+    cZxid = 5
+    ctime = Fri Jun 05 13:57:06 PDT 2009
+    mZxid = 6
+    mtime = Fri Jun 05 14:01:52 PDT 2009
+    pZxid = 5
+    cversion = 0
+    dataVersion = 1
+    aclVersion = 0
+    ephemeralOwner = 0
+    dataLength = 4
+    numChildren = 0
+
+
+(Notice we did a `get` after setting the data and it did, indeed, change.
+
+Finally, let's `delete` the node by issuing:
+
+
+    [zkshell: 16] delete /zk_test
+    [zkshell: 17] ls /
+    [zookeeper]
+    [zkshell: 18]
+
+
+That's it for now.  To explore more, continue with the rest of this document and see the [Programmer's Guide](zookeeperProgrammers.html).
+
+<a name="sc_ProgrammingToZooKeeper"></a>
+
+### Programming to ZooKeeper
+
+ZooKeeper has a Java bindings and C bindings. They are
+functionally equivalent. The C bindings exist in two variants: single
+threaded and multi-threaded. These differ only in how the messaging loop
+is done. For more information, see the [Programming
+Examples in the ZooKeeper Programmer's Guide](zookeeperProgrammers.html#ch_programStructureWithExample) for
+sample code using of the different APIs.
+
+<a name="sc_RunningReplicatedZooKeeper"></a>
+
+### Running Replicated ZooKeeper
+
+Running ZooKeeper in standalone mode is convenient for evaluation,
+some development, and testing. But in production, you should run
+ZooKeeper in replicated mode. A replicated group of servers in the same
+application is called a _quorum_, and in replicated
+mode, all servers in the quorum have copies of the same configuration
+file.
+
+######Note
+>For replicated mode, a minimum of three servers are required,
+and it is strongly recommended that you have an odd number of
+servers. If you only have two servers, then you are in a
+situation where if one of them fails, there are not enough
+machines to form a majority quorum. Two servers is inherently
+**less** stable than a single server, because there are two single
+points of failure.
+
+The required
+**conf/zoo.cfg**
+file for replicated mode is similar to the one used in standalone
+mode, but with a few differences. Here is an example:
+
+    tickTime=2000
+    dataDir=/var/lib/zookeeper
+    clientPort=2181
+    initLimit=5
+    syncLimit=2
+    server.1=zoo1:2888:3888
+    server.2=zoo2:2888:3888
+    server.3=zoo3:2888:3888
+
+The new entry, **initLimit** is
+timeouts ZooKeeper uses to limit the length of time the ZooKeeper
+servers in quorum have to connect to a leader. The entry **syncLimit** limits how far out of date a server can
+be from a leader.
+
+With both of these timeouts, you specify the unit of time using
+**tickTime**. In this example, the timeout
+for initLimit is 5 ticks at 2000 milleseconds a tick, or 10
+seconds.
+
+The entries of the form _server.X_ list the
+servers that make up the ZooKeeper service. When the server starts up,
+it knows which server it is by looking for the file
+_myid_ in the data directory. That file has the
+contains the server number, in ASCII.
+
+Finally, note the two port numbers after each server
+name: " 2888" and "3888". Peers use the former port to connect
+to other peers. Such a connection is necessary so that peers
+can communicate, for example, to agree upon the order of
+updates. More specifically, a ZooKeeper server uses this port
+to connect followers to the leader. When a new leader arises, a
+follower opens a TCP connection to the leader using this
+port. Because the default leader election also uses TCP, we
+currently require another port for leader election. This is the
+second port in the server entry.
+
+######Note
+>If you want to test multiple servers on a single
+machine, specify the servername
+as _localhost_ with unique quorum &
+leader election ports (i.e. 2888:3888, 2889:3889, 2890:3890 in
+the example above) for each server.X in that server's config
+file. Of course separate _dataDir_s and
+distinct _clientPort_s are also necessary
+(in the above replicated example, running on a
+single _localhost_, you would still have
+three config files).
+
+>Please be aware that setting up multiple servers on a single
+machine will not create any redundancy. If something were to
+happen which caused the machine to die, all of the zookeeper
+servers would be offline. Full redundancy requires that each
+server have its own machine. It must be a completely separate
+physical server. Multiple virtual machines on the same physical
+host are still vulnerable to the complete failure of that host.
+
+<a name="other-optimizations"></a>
+
+### Other Optimizations
+
+There are a couple of other configuration parameters that can
+greatly increase performance:
+
+* To get low latencies on updates it is important to
+  have a dedicated transaction log directory. By default
+  transaction logs are put in the same directory as the data
+  snapshots and _myid_ file. The dataLogDir
+  parameters indicates a different directory to use for the
+  transaction logs.
+* _[tbd: what is the other config param?]_
+
+

+ 666 - 0
zookeeper-docs/src/main/resources/markdown/zookeeperTutorial.md

@@ -0,0 +1,666 @@
+<!--
+Copyright 2002-2004 The Apache Software Foundation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+//-->
+
+# Programming with ZooKeeper - A basic tutorial
+
+* [Introduction](#ch_Introduction)
+* [Barriers](#sc_barriers)
+* [Producer-Consumer Queues](#sc_producerConsumerQueues)
+* [Complete example](#Complete+example)
+    * [Queue test](#Queue+test)
+    * [Barrier test](#Barrier+test)
+    * [Source Listing](#sc_sourceListing)
+
+<a name="ch_Introduction"></a>
+
+## Introduction
+
+In this tutorial, we show simple implementations of barriers and
+producer-consumer queues using ZooKeeper. We call the respective classes Barrier and Queue.
+These examples assume that you have at least one ZooKeeper server running.
+
+Both primitives use the following common excerpt of code:
+
+    static ZooKeeper zk = null;
+    static Integer mutex;
+
+    String root;
+
+    SyncPrimitive(String address) {
+        if(zk == null){
+            try {
+                System.out.println("Starting ZK:");
+                zk = new ZooKeeper(address, 3000, this);
+                mutex = new Integer(-1);
+                System.out.println("Finished starting ZK: " + zk);
+            } catch (IOException e) {
+                System.out.println(e.toString());
+                zk = null;
+            }
+        }
+    }
+
+    synchronized public void process(WatchedEvent event) {
+        synchronized (mutex) {
+            mutex.notify();
+        }
+    }
+
+
+
+Both classes extend SyncPrimitive. In this way, we execute steps that are
+common to all primitives in the constructor of SyncPrimitive. To keep the examples
+simple, we create a ZooKeeper object the first time we instantiate either a barrier
+object or a queue object, and we declare a static variable that is a reference
+to this object. The subsequent instances of Barrier and Queue check whether a
+ZooKeeper object exists. Alternatively, we could have the application creating a
+ZooKeeper object and passing it to the constructor of Barrier and Queue.
+
+We use the process() method to process notifications triggered due to watches.
+In the following discussion, we present code that sets watches. A watch is internal
+structure that enables ZooKeeper to notify a client of a change to a node. For example,
+if a client is waiting for other clients to leave a barrier, then it can set a watch and
+wait for modifications to a particular node, which can indicate that it is the end of the wait.
+This point becomes clear once we go over the examples.
+
+<a name="sc_barriers"></a>
+
+## Barriers
+
+A barrier is a primitive that enables a group of processes to synchronize the
+beginning and the end of a computation. The general idea of this implementation
+is to have a barrier node that serves the purpose of being a parent for individual
+process nodes. Suppose that we call the barrier node "/b1". Each process "p" then
+creates a node "/b1/p". Once enough processes have created their corresponding
+nodes, joined processes can start the computation.
+
+In this example, each process instantiates a Barrier object, and its constructor takes as parameters:
+
+* the address of a ZooKeeper server (e.g., "zoo1.foo.com:2181")
+* the path of the barrier node on ZooKeeper (e.g., "/b1")
+* the size of the group of processes
+
+The constructor of Barrier passes the address of the Zookeeper server to the
+constructor of the parent class. The parent class creates a ZooKeeper instance if
+one does not exist. The constructor of Barrier then creates a
+barrier node on ZooKeeper, which is the parent node of all process nodes, and
+we call root (**Note:** This is not the ZooKeeper root "/").
+
+    /**
+     * Barrier constructor
+     *
+     * @param address
+     * @param root
+     * @param size
+     */
+    Barrier(String address, String root, int size) {
+        super(address);
+        this.root = root;
+        this.size = size;
+        // Create barrier node
+        if (zk != null) {
+            try {
+                Stat s = zk.exists(root, false);
+                if (s == null) {
+                    zk.create(root, new byte[0], Ids.OPEN_ACL_UNSAFE,
+                            CreateMode.PERSISTENT);
+                }
+            } catch (KeeperException e) {
+                System.out
+                        .println("Keeper exception when instantiating queue: "
+                                + e.toString());
+            } catch (InterruptedException e) {
+                System.out.println("Interrupted exception");
+            }
+        }
+
+        // My node name
+        try {
+            name = new String(InetAddress.getLocalHost().getCanonicalHostName().toString());
+        } catch (UnknownHostException e) {
+            System.out.println(e.toString());
+        }
+    }
+
+
+To enter the barrier, a process calls enter(). The process creates a node under
+the root to represent it, using its host name to form the node name. It then wait
+until enough processes have entered the barrier. A process does it by checking
+the number of children the root node has with "getChildren()", and waiting for
+notifications in the case it does not have enough. To receive a notification when
+there is a change to the root node, a process has to set a watch, and does it
+through the call to "getChildren()". In the code, we have that "getChildren()"
+has two parameters. The first one states the node to read from, and the second is
+a boolean flag that enables the process to set a watch. In the code the flag is true.
+
+    /**
+     * Join barrier
+     *
+     * @return
+     * @throws KeeperException
+     * @throws InterruptedException
+     */
+
+    boolean enter() throws KeeperException, InterruptedException{
+        zk.create(root + "/" + name, new byte[0], Ids.OPEN_ACL_UNSAFE,
+                CreateMode.EPHEMERAL_SEQUENTIAL);
+        while (true) {
+            synchronized (mutex) {
+                List<String> list = zk.getChildren(root, true);
+
+                if (list.size() < size) {
+                    mutex.wait();
+                } else {
+                    return true;
+                }
+            }
+        }
+    }
+
+
+Note that enter() throws both KeeperException and InterruptedException, so it is
+the responsibility of the application to catch and handle such exceptions.
+
+Once the computation is finished, a process calls leave() to leave the barrier.
+First it deletes its corresponding node, and then it gets the children of the root
+node. If there is at least one child, then it waits for a notification (obs: note
+that the second parameter of the call to getChildren() is true, meaning that
+ZooKeeper has to set a watch on the the root node). Upon reception of a notification,
+it checks once more whether the root node has any children.
+
+    /**
+     * Wait until all reach barrier
+     *
+     * @return
+     * @throws KeeperException
+     * @throws InterruptedException
+     */
+
+    boolean leave() throws KeeperException, InterruptedException {
+        zk.delete(root + "/" + name, 0);
+        while (true) {
+            synchronized (mutex) {
+                List<String> list = zk.getChildren(root, true);
+                    if (list.size() > 0) {
+                        mutex.wait();
+                    } else {
+                        return true;
+                    }
+                }
+            }
+        }
+
+
+<a name="sc_producerConsumerQueues"></a>
+
+## Producer-Consumer Queues
+
+A producer-consumer queue is a distributed data structure that groups of processes
+use to generate and consume items. Producer processes create new elements and add
+them to the queue. Consumer processes remove elements from the list, and process them.
+In this implementation, the elements are simple integers. The queue is represented
+by a root node, and to add an element to the queue, a producer process creates a new node,
+a child of the root node.
+
+The following excerpt of code corresponds to the constructor of the object. As
+with Barrier objects, it first calls the constructor of the parent class, SyncPrimitive,
+that creates a ZooKeeper object if one doesn't exist. It then verifies if the root
+node of the queue exists, and creates if it doesn't.
+
+    /**
+     * Constructor of producer-consumer queue
+     *
+     * @param address
+     * @param name
+     */
+    Queue(String address, String name) {
+        super(address);
+        this.root = name;
+        // Create ZK node name
+        if (zk != null) {
+            try {
+                Stat s = zk.exists(root, false);
+                if (s == null) {
+                    zk.create(root, new byte[0], Ids.OPEN_ACL_UNSAFE,
+                            CreateMode.PERSISTENT);
+                }
+            } catch (KeeperException e) {
+                System.out
+                        .println("Keeper exception when instantiating queue: "
+                                + e.toString());
+            } catch (InterruptedException e) {
+                System.out.println("Interrupted exception");
+            }
+        }
+    }
+
+
+A producer process calls "produce()" to add an element to the queue, and passes
+an integer as an argument. To add an element to the queue, the method creates a
+new node using "create()", and uses the SEQUENCE flag to instruct ZooKeeper to
+append the value of the sequencer counter associated to the root node. In this way,
+we impose a total order on the elements of the queue, thus guaranteeing that the
+oldest element of the queue is the next one consumed.
+
+    /**
+     * Add element to the queue.
+     *
+     * @param i
+     * @return
+     */
+
+    boolean produce(int i) throws KeeperException, InterruptedException{
+        ByteBuffer b = ByteBuffer.allocate(4);
+        byte[] value;
+
+        // Add child with value i
+        b.putInt(i);
+        value = b.array();
+        zk.create(root + "/element", value, Ids.OPEN_ACL_UNSAFE,
+                    CreateMode.PERSISTENT_SEQUENTIAL);
+
+        return true;
+    }
+
+
+To consume an element, a consumer process obtains the children of the root node,
+reads the node with smallest counter value, and returns the element. Note that
+if there is a conflict, then one of the two contending processes won't be able to
+delete the node and the delete operation will throw an exception.
+
+A call to getChildren() returns the list of children in lexicographic order.
+As lexicographic order does not necessary follow the numerical order of the counter
+values, we need to decide which element is the smallest. To decide which one has
+the smallest counter value, we traverse the list, and remove the prefix "element"
+from each one.
+
+    /**
+     * Remove first element from the queue.
+     *
+     * @return
+     * @throws KeeperException
+     * @throws InterruptedException
+     */
+    int consume() throws KeeperException, InterruptedException{
+        int retvalue = -1;
+        Stat stat = null;
+
+        // Get the first element available
+        while (true) {
+            synchronized (mutex) {
+                List<String> list = zk.getChildren(root, true);
+                if (list.size() == 0) {
+                    System.out.println("Going to wait");
+                    mutex.wait();
+                } else {
+                    Integer min = new Integer(list.get(0).substring(7));
+                    for(String s : list){
+                        Integer tempValue = new Integer(s.substring(7));
+                        //System.out.println("Temporary value: " + tempValue);
+                        if(tempValue < min) min = tempValue;
+                    }
+                    System.out.println("Temporary value: " + root + "/element" + min);
+                    byte[] b = zk.getData(root + "/element" + min,
+                                false, stat);
+                    zk.delete(root + "/element" + min, 0);
+                    ByteBuffer buffer = ByteBuffer.wrap(b);
+                    retvalue = buffer.getInt();
+
+                    return retvalue;
+                    }
+                }
+            }
+        }
+    }
+
+
+<a name="Complete+example"></a>
+
+## Complete example
+
+In the following section you can find a complete command line application to demonstrate the above mentioned
+recipes. Use the following command to run it.
+
+    ZOOBINDIR="[path_to_distro]/bin"
+    . "$ZOOBINDIR"/zkEnv.sh
+    java SyncPrimitive [Test Type] [ZK server] [No of elements] [Client type]
+
+<a name="Queue+test"></a>
+
+### Queue test
+
+Start a producer to create 100 elements
+
+    java SyncPrimitive qTest localhost 100 p
+
+
+Start a consumer to consume 100 elements
+
+    java SyncPrimitive qTest localhost 100 c
+
+<a name="Barrier+test"></a>
+
+### Barrier test
+
+Start a barrier with 2 participants (start as many times as many participants you'd like to enter)
+
+    java SyncPrimitive bTest localhost 2
+
+<a name="sc_sourceListing"></a>
+
+### Source Listing
+
+#### SyncPrimitive.Java
+
+    import java.io.IOException;
+    import java.net.InetAddress;
+    import java.net.UnknownHostException;
+    import java.nio.ByteBuffer;
+    import java.util.List;
+    import java.util.Random;
+
+    import org.apache.zookeeper.CreateMode;
+    import org.apache.zookeeper.KeeperException;
+    import org.apache.zookeeper.WatchedEvent;
+    import org.apache.zookeeper.Watcher;
+    import org.apache.zookeeper.ZooKeeper;
+    import org.apache.zookeeper.ZooDefs.Ids;
+    import org.apache.zookeeper.data.Stat;
+
+    public class SyncPrimitive implements Watcher {
+
+        static ZooKeeper zk = null;
+        static Integer mutex;
+        String root;
+
+        SyncPrimitive(String address) {
+            if(zk == null){
+                try {
+                    System.out.println("Starting ZK:");
+                    zk = new ZooKeeper(address, 3000, this);
+                    mutex = new Integer(-1);
+                    System.out.println("Finished starting ZK: " + zk);
+                } catch (IOException e) {
+                    System.out.println(e.toString());
+                    zk = null;
+                }
+            }
+            //else mutex = new Integer(-1);
+        }
+
+        synchronized public void process(WatchedEvent event) {
+            synchronized (mutex) {
+                //System.out.println("Process: " + event.getType());
+                mutex.notify();
+            }
+        }
+
+        /**
+         * Barrier
+         */
+        static public class Barrier extends SyncPrimitive {
+            int size;
+            String name;
+
+            /**
+             * Barrier constructor
+             *
+             * @param address
+             * @param root
+             * @param size
+             */
+            Barrier(String address, String root, int size) {
+                super(address);
+                this.root = root;
+                this.size = size;
+
+                // Create barrier node
+                if (zk != null) {
+                    try {
+                        Stat s = zk.exists(root, false);
+                        if (s == null) {
+                            zk.create(root, new byte[0], Ids.OPEN_ACL_UNSAFE,
+                                    CreateMode.PERSISTENT);
+                        }
+                    } catch (KeeperException e) {
+                        System.out
+                                .println("Keeper exception when instantiating queue: "
+                                        + e.toString());
+                    } catch (InterruptedException e) {
+                        System.out.println("Interrupted exception");
+                    }
+                }
+
+                // My node name
+                try {
+                    name = new String(InetAddress.getLocalHost().getCanonicalHostName().toString());
+                } catch (UnknownHostException e) {
+                    System.out.println(e.toString());
+                }
+
+            }
+
+            /**
+             * Join barrier
+             *
+             * @return
+             * @throws KeeperException
+             * @throws InterruptedException
+             */
+
+            boolean enter() throws KeeperException, InterruptedException{
+                zk.create(root + "/" + name, new byte[0], Ids.OPEN_ACL_UNSAFE,
+                        CreateMode.EPHEMERAL_SEQUENTIAL);
+                while (true) {
+                    synchronized (mutex) {
+                        List<String> list = zk.getChildren(root, true);
+
+                        if (list.size() < size) {
+                            mutex.wait();
+                        } else {
+                            return true;
+                        }
+                    }
+                }
+            }
+
+            /**
+             * Wait until all reach barrier
+             *
+             * @return
+             * @throws KeeperException
+             * @throws InterruptedException
+             */
+            boolean leave() throws KeeperException, InterruptedException{
+                zk.delete(root + "/" + name, 0);
+                while (true) {
+                    synchronized (mutex) {
+                        List<String> list = zk.getChildren(root, true);
+                            if (list.size() > 0) {
+                                mutex.wait();
+                            } else {
+                                return true;
+                            }
+                        }
+                    }
+                }
+            }
+
+        /**
+         * Producer-Consumer queue
+         */
+        static public class Queue extends SyncPrimitive {
+
+            /**
+             * Constructor of producer-consumer queue
+             *
+             * @param address
+             * @param name
+             */
+            Queue(String address, String name) {
+                super(address);
+                this.root = name;
+                // Create ZK node name
+                if (zk != null) {
+                    try {
+                        Stat s = zk.exists(root, false);
+                        if (s == null) {
+                            zk.create(root, new byte[0], Ids.OPEN_ACL_UNSAFE,
+                                    CreateMode.PERSISTENT);
+                        }
+                    } catch (KeeperException e) {
+                        System.out
+                                .println("Keeper exception when instantiating queue: "
+                                        + e.toString());
+                    } catch (InterruptedException e) {
+                        System.out.println("Interrupted exception");
+                    }
+                }
+            }
+
+            /**
+             * Add element to the queue.
+             *
+             * @param i
+             * @return
+             */
+
+            boolean produce(int i) throws KeeperException, InterruptedException{
+                ByteBuffer b = ByteBuffer.allocate(4);
+                byte[] value;
+
+                // Add child with value i
+                b.putInt(i);
+                value = b.array();
+                zk.create(root + "/element", value, Ids.OPEN_ACL_UNSAFE,
+                            CreateMode.PERSISTENT_SEQUENTIAL);
+
+                return true;
+            }
+
+            /**
+             * Remove first element from the queue.
+             *
+             * @return
+             * @throws KeeperException
+             * @throws InterruptedException
+             */
+            int consume() throws KeeperException, InterruptedException{
+                int retvalue = -1;
+                Stat stat = null;
+
+                // Get the first element available
+                while (true) {
+                    synchronized (mutex) {
+                        List<String> list = zk.getChildren(root, true);
+                        if (list.size() == 0) {
+                            System.out.println("Going to wait");
+                            mutex.wait();
+                        } else {
+                            Integer min = new Integer(list.get(0).substring(7));
+                            String minNode = list.get(0);
+                            for(String s : list){
+                                Integer tempValue = new Integer(s.substring(7));
+                                //System.out.println("Temporary value: " + tempValue);
+                                if(tempValue < min) {
+                                    min = tempValue;
+                                    minNode = s;
+                                }
+                            }
+                            System.out.println("Temporary value: " + root + "/" + minNode);
+                            byte[] b = zk.getData(root + "/" + minNode,
+                            false, stat);
+                            zk.delete(root + "/" + minNode, 0);
+                            ByteBuffer buffer = ByteBuffer.wrap(b);
+                            retvalue = buffer.getInt();
+
+                            return retvalue;
+                        }
+                    }
+                }
+            }
+        }
+
+        public static void main(String args[]) {
+            if (args[0].equals("qTest"))
+                queueTest(args);
+            else
+                barrierTest(args);
+        }
+
+        public static void queueTest(String args[]) {
+            Queue q = new Queue(args[1], "/app1");
+
+            System.out.println("Input: " + args[1]);
+            int i;
+            Integer max = new Integer(args[2]);
+
+            if (args[3].equals("p")) {
+                System.out.println("Producer");
+                for (i = 0; i < max; i++)
+                    try{
+                        q.produce(10 + i);
+                    } catch (KeeperException e){
+
+                    } catch (InterruptedException e){
+
+                    }
+            } else {
+                System.out.println("Consumer");
+
+                for (i = 0; i < max; i++) {
+                    try{
+                        int r = q.consume();
+                        System.out.println("Item: " + r);
+                    } catch (KeeperException e){
+                        i--;
+                    } catch (InterruptedException e){
+                    }
+                }
+            }
+        }
+
+        public static void barrierTest(String args[]) {
+            Barrier b = new Barrier(args[1], "/b1", new Integer(args[2]));
+            try{
+                boolean flag = b.enter();
+                System.out.println("Entered barrier: " + args[2]);
+                if(!flag) System.out.println("Error when entering the barrier");
+            } catch (KeeperException e){
+            } catch (InterruptedException e){
+            }
+
+            // Generate random integer
+            Random rand = new Random();
+            int r = rand.nextInt(100);
+            // Loop for rand iterations
+            for (int i = 0; i < r; i++) {
+                try {
+                    Thread.sleep(100);
+                } catch (InterruptedException e) {
+                }
+            }
+            try{
+                b.leave();
+            } catch (KeeperException e){
+
+            } catch (InterruptedException e){
+
+            }
+            System.out.println("Left barrier");
+        }
+    }
+