فهرست منبع

ZOOKEEPER-25. Fuse module for Zookeeper. (Swee Lim, Bart, Patrick Hunt and Andrew Kornev via Mahadev)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/zookeeper/trunk@685624 13f79535-47bb-0310-9956-ffa450edef68
Mahadev Konar 17 سال پیش
والد
کامیت
6810b0a75b

+ 4 - 0
src/contrib/zkfuse/Makefile.am

@@ -0,0 +1,4 @@
+## Process this file with automake to produce Makefile.in
+
+SUBDIRS = src
+

+ 62 - 0
src/contrib/zkfuse/README.txt

@@ -0,0 +1,62 @@
+Original authors of zkfuse are Swee Lim & Bartlomiej M Niechwiej of Yahoo.
+'
+ZooKeeper FUSE (File System in Userspace)
+=========================================
+
+Pre-requisites
+--------------
+1. Linux system with 2.6.X kernel.
+2. Fuse (Filesystem in Userspace) must be installed on the build node. 
+3. Development build libraries:
+  a. fuse
+  b. log4cxx
+  c. pthread
+
+Build instructions
+------------------
+1. cd into this directory
+2. autoreconf -if
+3. ./configure
+4. make
+5. zkfuse binary is under the src directory
+
+Testing Zkfuse
+--------------
+1. Depending on permission on /dev/fuse, you may need to sudo -u root.
+   * If /dev/fuse has permissions 0600, then you have to run Zkfuse as root.
+   * If /dev/fuse has permissions 0666, then you can run Zkfuse as any user.
+2. Create or find a mount point that you have "rwx" permission. 
+   * e.g. mkdir -p /tmp/zkfuse
+3. Run Zkfuse as follows:
+   zkfuse -z <hostspec> -m /tmp/zkfuse -d
+   -z specifies ZooKeeper address(es) <host>:<port>
+   -m specifies the mount point
+   -d specifies the debug mode.
+   For additional command line options, try "zkfuse -h".
+
+FAQ
+---
+Q. How to fix "warning: macro `AM_PATH_CPPUNIT' not found in library"?
+A. * install cppunit (src or pkg) on build machine
+
+Q. Why can't Zkfuse cannot write to current directory?
+A. * If Zkfuse is running as root on a NFS mounted file system, it will not
+     have root permissions because root user is mapped to another user by
+     NFS admin.
+   * If you run Zkfuse as root, it is a good idea to run Zkfuse from a
+     directory that you have write access to. This will allow core files
+     to be saved.
+
+Q. Why Zkfuse cannot mount?
+A. * Check that the mount point exists and you have "rwx" permissions.
+   * Check that previous mounts have been umounted. If Zkfuse does not 
+     exit cleanly, its mount point may have to be umounted manually. 
+     If you cannot umount manually, make sure that there no files is open 
+     within the mount point.
+
+Q. Why does Zkfuse complain about logging at startup?
+A. * Zkfuse uses log4cxx for logging. It is looking for log4cxx.properties
+     file to obtain its logging configuration.
+   * There is an example log4cxx.properties file in the Zkfuse source 
+     directory.
+

+ 133 - 0
src/contrib/zkfuse/configure.ac

@@ -0,0 +1,133 @@
+#                                               -*- Autoconf -*-
+# Process this file with autoconf to produce a configure script.
+
+AC_PREREQ(2.59)
+
+AC_INIT([zkfuse], [2.2.0])
+AM_INIT_AUTOMAKE(foreign)
+
+AC_CONFIG_SRCDIR([src/zkadapter.h])
+AM_CONFIG_HEADER([config.h])
+
+PACKAGE=zkfuse
+VERSION=1.0
+
+AC_SUBST(PACKAGE)
+AC_SUBST(VERSION)
+
+BUILD_PATH="`pwd`"
+
+# Checks for programs.
+AC_LANG_CPLUSPLUS
+AC_PROG_CXX
+
+# Checks for libraries.
+AC_CHECK_LIB([fuse], [main])
+AC_CHECK_LIB([log4cxx], [main])
+AC_CHECK_LIB([thread], [thr_create])
+AC_CHECK_LIB([pthread], [pthread_create])
+AC_CHECK_LIB([rt], [clock_gettime])
+AC_CHECK_LIB([socket], [socket])
+AC_CHECK_LIB([nsl], [gethostbyname])
+AC_CHECK_LIB([ulockmgr], [ulockmgr_op])
+
+ZOOKEEPER_PATH=${BUILD_PATH}/../../c
+AC_CHECK_LIB(zookeeper_mt, main, [ZOOKEEPER_LD="-L${ZOOKEEPER_PATH}/.libs -lzookeeper_mt"],,["-L${ZOOKEEPER_PATH}/.libs"])
+
+AC_SUBST(ZOOKEEPER_PATH)
+AC_SUBST(ZOOKEEPER_LD)
+
+# Checks for header files.
+AC_HEADER_DIRENT
+AC_HEADER_STDC
+AC_CHECK_HEADERS([fcntl.h stdlib.h string.h sys/time.h unistd.h])
+
+# Checks for typedefs, structures, and compiler characteristics.
+AC_HEADER_STDBOOL
+AC_C_CONST
+AC_TYPE_UID_T
+AC_C_INLINE
+AC_TYPE_INT32_T
+AC_TYPE_INT64_T
+AC_TYPE_MODE_T
+AC_TYPE_OFF_T
+AC_TYPE_SIZE_T
+AC_CHECK_MEMBERS([struct stat.st_blksize])
+AC_STRUCT_ST_BLOCKS
+AC_HEADER_TIME
+AC_TYPE_UINT32_T
+AC_TYPE_UINT64_T
+AC_TYPE_UINT8_T
+AC_C_VOLATILE
+
+# Checks for library functions.
+AC_FUNC_UTIME_NULL
+AC_CHECK_FUNCS([gettimeofday memset mkdir rmdir strdup strerror strstr strtol strtoul strtoull utime])
+
+AC_CONFIG_FILES([Makefile])
+AC_CONFIG_FILES([src/Makefile])
+AC_OUTPUT
+#                                               -*- Autoconf -*-
+# Process this file with autoconf to produce a configure script.
+
+AC_PREREQ(2.59)
+
+AC_INIT([zkfuse], [2.2.0])
+AM_INIT_AUTOMAKE(foreign)
+
+AC_CONFIG_SRCDIR([src/zkadapter.h])
+AM_CONFIG_HEADER([config.h])
+
+PACKAGE=zkfuse
+VERSION=1.0
+
+AC_SUBST(PACKAGE)
+AC_SUBST(VERSION)
+
+BUILD_PATH="`pwd`"
+
+# Checks for programs.
+AC_LANG_CPLUSPLUS
+AC_PROG_CXX
+
+# Checks for libraries.
+AC_CHECK_LIB([fuse], [main])
+AC_CHECK_LIB([log4cxx], [main])
+AC_CHECK_LIB([thread], [thr_create])
+AC_CHECK_LIB([pthread], [pthread_create])
+AC_CHECK_LIB([rt], [clock_gettime])
+AC_CHECK_LIB([socket], [socket])
+AC_CHECK_LIB([nsl], [gethostbyname])
+AC_CHECK_LIB([ulockmgr], [ulockmgr_op])
+
+ZOOKEEPER_PATH=${BUILD_PATH}/../../c
+AC_CHECK_LIB(zookeeper_mt, main, [ZOOKEEPER_LD="-L${ZOOKEEPER_PATH}/.libs -lzookeeper_mt"],,["-L${ZOOKEEPER_PATH}/.libs"])
+
+AC_SUBST(ZOOKEEPER_PATH)
+AC_SUBST(ZOOKEEPER_LD)
+
+# Checks for header files.
+AC_HEADER_DIRENT
+AC_HEADER_STDC
+AC_CHECK_HEADERS([fcntl.h stdlib.h string.h sys/time.h unistd.h])
+
+# Checks for typedefs, structures, and compiler characteristics.
+AC_HEADER_STDBOOL
+AC_C_CONST
+AC_TYPE_UID_T
+AC_C_INLINE
+AC_TYPE_MODE_T
+AC_TYPE_OFF_T
+AC_TYPE_SIZE_T
+AC_CHECK_MEMBERS([struct stat.st_blksize])
+AC_STRUCT_ST_BLOCKS
+AC_HEADER_TIME
+AC_C_VOLATILE
+
+# Checks for library functions.
+AC_FUNC_UTIME_NULL
+AC_CHECK_FUNCS([gettimeofday memset mkdir rmdir strdup strerror strstr strtol strtoul strtoull utime])
+
+AC_CONFIG_FILES([Makefile])
+AC_CONFIG_FILES([src/Makefile])
+AC_OUTPUT

+ 7 - 0
src/contrib/zkfuse/src/Makefile.am

@@ -0,0 +1,7 @@
+AM_CXXFLAGS = -I${ZOOKEEPER_PATH}/include -I${ZOOKEEPER_PATH}/generated \
+  -I$(top_srcdir)/include -I/usr/include -D_FILE_OFFSET_BITS=64 -D_REENTRANT
+
+noinst_PROGRAMS = zkfuse
+
+zkfuse_SOURCES = zkfuse.cc zkadapter.cc thread.cc log.cc
+zkfuse_LDADD = ${ZOOKEEPER_LD}

+ 154 - 0
src/contrib/zkfuse/src/blockingqueue.h

@@ -0,0 +1,154 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ 
+#ifndef __BLOCKINGQUEUE_H__
+#define __BLOCKINGQUEUE_H__
+ 
+#include <deque>
+
+#include "mutex.h"
+ 
+using namespace std;
+USING_ZKFUSE_NAMESPACE
+
+namespace zk {
+ 
+/**
+ * \brief An unbounded blocking queue of elements of type E.
+ * 
+ * <p>
+ * This class is thread safe.
+ */
+template <class E>
+class BlockingQueue {
+    public:
+        
+        /**
+         * \brief Adds the specified element to this queue, waiting if necessary 
+         * \brief for space to become available.
+         * 
+         * @param e the element to be added
+         */
+        void put(E e);
+        
+        /**
+         * \brief Retrieves and removes the head of this queue, waiting if 
+         * \brief no elements are present in this queue.
+         * 
+         * @param timeout how long to wait until an element becomes availabe, 
+         *                in milliseconds; if <code>0</code> then wait forever
+         * @param timedOut if not NULL then set to true whether this function timed out
+         * @return the element from the queue
+         */
+        E take(int32_t timeout = 0, bool *timedOut = NULL);
+        
+        /**
+         * Returns the current size of this blocking queue.
+         * 
+         * @return the number of elements in this queue
+         */
+        int size() const;
+        
+        /**
+         * \brief Returns whether this queue is empty or not.
+         * 
+         * @return true if this queue has no elements; false otherwise
+         */
+        bool empty() const;
+        
+    private:
+        
+        /**
+         * The queue of elements. Deque is used to provide O(1) time 
+         * for head elements removal.
+         */
+        deque<E> m_queue;
+        
+        /**
+         * The mutex used for queue synchronization.
+         */
+        mutable zkfuse::Mutex m_mutex;
+        
+        /**
+         * The conditionial variable associated with the mutex above.
+         */
+        mutable Cond m_cond;
+        
+};
+
+template<class E>
+int BlockingQueue<E>::size() const {
+    int size;
+    m_mutex.Acquire();
+    size = m_queue.size();
+    m_mutex.Release();
+    return size;
+}
+
+template<class E>
+bool BlockingQueue<E>::empty() const {
+    bool isEmpty;
+    m_mutex.Acquire();
+    isEmpty = m_queue.empty();
+    m_mutex.Release();
+    return isEmpty;
+}
+
+template<class E> 
+void BlockingQueue<E>::put(E e) {
+    m_mutex.Acquire();
+    m_queue.push_back( e );
+    m_cond.Signal();
+    m_mutex.Release();
+}
+
+template<class E> 
+    E BlockingQueue<E>::take(int32_t timeout, bool *timedOut) {
+    m_mutex.Acquire();
+    bool hasResult = true;
+    while (m_queue.empty()) {
+        if (timeout <= 0) {
+            m_cond.Wait( m_mutex );
+        } else {
+            if (!m_cond.Wait( m_mutex, timeout )) {
+                hasResult = false;
+                break;
+            }
+        }
+    }
+    if (hasResult) {
+        E e = m_queue.front();
+        m_queue.pop_front();            
+        m_mutex.Release();
+        if (timedOut) {
+            *timedOut = false;
+        }
+        return e;
+    } else {
+        m_mutex.Release();
+        if (timedOut) {
+            *timedOut = true;
+        }
+        return E();
+    }
+}
+
+}
+
+#endif  /* __BLOCKINGQUEUE_H__ */
+

+ 1226 - 0
src/contrib/zkfuse/src/doxygen.cfg

@@ -0,0 +1,1226 @@
+# Doxyfile 1.4.3
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+#       TAG = value [value, ...]
+# For lists items can also be appended using:
+#       TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded 
+# by quotes) that should identify the project.
+
+PROJECT_NAME           = ZkFuse
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number. 
+# This could be handy for archiving the generated documentation or 
+# if some version control system is used.
+
+PROJECT_NUMBER         = 
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) 
+# base path where the generated documentation will be put. 
+# If a relative path is entered, it will be relative to the location 
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = doc
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 
+# 4096 sub-directories (in 2 levels) under the output directory of each output 
+# format and will distribute the generated files over these directories. 
+# Enabling this option can be useful when feeding doxygen a huge amount of 
+# source files, where putting all generated files in the same directory would 
+# otherwise cause performance problems for the file system.
+
+CREATE_SUBDIRS         = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all 
+# documentation generated by doxygen is written. Doxygen will use this 
+# information to generate all constant output in the proper language. 
+# The default language is English, other supported languages are: 
+# Brazilian, Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, 
+# Dutch, Finnish, French, German, Greek, Hungarian, Italian, Japanese, 
+# Japanese-en (Japanese with English messages), Korean, Korean-en, Norwegian, 
+# Polish, Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish, 
+# Swedish, and Ukrainian.
+
+OUTPUT_LANGUAGE        = English
+
+# This tag can be used to specify the encoding used in the generated output. 
+# The encoding is not always determined by the language that is chosen, 
+# but also whether or not the output is meant for Windows or non-Windows users. 
+# In case there is a difference, setting the USE_WINDOWS_ENCODING tag to YES 
+# forces the Windows encoding (this is the default for the Windows binary), 
+# whereas setting the tag to NO uses a Unix-style encoding (the default for 
+# all platforms other than Windows).
+
+USE_WINDOWS_ENCODING   = NO
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will 
+# include brief member descriptions after the members that are listed in 
+# the file and class documentation (similar to JavaDoc). 
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend 
+# the brief description of a member or function before the detailed description. 
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the 
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator 
+# that is used to form the text in various listings. Each string 
+# in this list, if found as the leading text of the brief description, will be 
+# stripped from the text and the result after processing the whole list, is 
+# used as the annotated text. Otherwise, the brief description is used as-is. 
+# If left blank, the following values are used ("$name" is automatically 
+# replaced with the name of the entity): "The $name class" "The $name widget" 
+# "The $name file" "is" "provides" "specifies" "contains" 
+# "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF       = 
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then 
+# Doxygen will generate a detailed section even if there is only a brief 
+# description.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all 
+# inherited members of a class in the documentation of that class as if those 
+# members were ordinary class members. Constructors, destructors and assignment 
+# operators of the base classes will not be shown.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full 
+# path before files name in the file list and in the header files. If set 
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES        = YES
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag 
+# can be used to strip a user-defined part of the path. Stripping is 
+# only done if one of the specified strings matches the left-hand part of 
+# the path. The tag can be used to show relative paths in the file list. 
+# If left blank the directory from which doxygen is run is used as the 
+# path to strip.
+
+STRIP_FROM_PATH        = 
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of 
+# the path mentioned in the documentation of a class, which tells 
+# the reader which header file to include in order to use a class. 
+# If left blank only the name of the header file containing the class 
+# definition is used. Otherwise one should specify the include paths that 
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH    = 
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter 
+# (but less readable) file names. This can be useful is your file systems 
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen 
+# will interpret the first line (until the first dot) of a JavaDoc-style 
+# comment as the brief description. If set to NO, the JavaDoc 
+# comments will behave just like the Qt-style comments (thus requiring an 
+# explicit @brief command for a brief description.
+
+JAVADOC_AUTOBRIEF      = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen 
+# treat a multi-line C++ special comment block (i.e. a block of //! or /// 
+# comments) as a brief description. This used to be the default behaviour. 
+# The new default is to treat a multi-line C++ comment block as a detailed 
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the DETAILS_AT_TOP tag is set to YES then Doxygen 
+# will output the detailed description near the top, like JavaDoc.
+# If set to NO, the detailed description appears after the member 
+# documentation.
+
+DETAILS_AT_TOP         = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented 
+# member inherits the documentation from any documented member that it 
+# re-implements.
+
+INHERIT_DOCS           = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC 
+# tag is set to YES, then doxygen will reuse the documentation of the first 
+# member in the group (if any) for the other members of the group. By default 
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce 
+# a new page for each member. If set to NO, the documentation of a member will 
+# be part of the file/class/namespace that contains it.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab. 
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE               = 8
+
+# This tag can be used to specify a number of aliases that acts 
+# as commands in the documentation. An alias has the form "name=value". 
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to 
+# put the command \sideeffect (or @sideeffect) in the documentation, which 
+# will result in a user-defined paragraph with heading "Side Effects:". 
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                = 
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C 
+# sources only. Doxygen will then generate output that is more tailored for C. 
+# For instance, some of the names that are used will be different. The list 
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C  = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java sources 
+# only. Doxygen will then generate output that is more tailored for Java. 
+# For instance, namespaces will be presented as packages, qualified scopes 
+# will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of 
+# the same type (for instance a group of public functions) to be put as a 
+# subgroup of that type (e.g. under the Public Functions section). Set it to 
+# NO to prevent subgrouping. Alternatively, this can be done per class using 
+# the \nosubgrouping command.
+
+SUBGROUPING            = YES
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in 
+# documentation are documented, even if no documentation was available. 
+# Private class members and static file members will be hidden unless 
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL            = NO
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class 
+# will be included in the documentation.
+
+EXTRACT_PRIVATE        = YES
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file 
+# will be included in the documentation.
+
+EXTRACT_STATIC         = YES
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) 
+# defined locally in source files will be included in the documentation. 
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. When set to YES local 
+# methods, which are defined in the implementation section but not in 
+# the interface are included in the documentation. 
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS  = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all 
+# undocumented members of documented classes, files or namespaces. 
+# If set to NO (the default) these members will be included in the 
+# various overviews, but no documentation section is generated. 
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all 
+# undocumented classes that are normally visible in the class hierarchy. 
+# If set to NO (the default) these classes will be included in the various 
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all 
+# friend (class|struct|union) declarations. 
+# If set to NO (the default) these declarations will be included in the 
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any 
+# documentation blocks found inside the body of a function. 
+# If set to NO (the default) these blocks will be appended to the 
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation 
+# that is typed after a \internal command is included. If the tag is set 
+# to NO (the default) then the documentation will be excluded. 
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS          = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate 
+# file names in lower-case letters. If set to YES upper-case letters are also 
+# allowed. This is useful if you have classes or files whose names only differ 
+# in case and if your file system supports case sensitive file names. Windows 
+# and Mac users are advised to set this option to NO.
+
+CASE_SENSE_NAMES       = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen 
+# will show members with their full class and namespace scopes in the 
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen 
+# will put a list of the files that are included by a file in the documentation 
+# of that file.
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] 
+# is inserted in the documentation for inline members.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen 
+# will sort the (detailed) documentation of file and class members 
+# alphabetically by member name. If set to NO the members will appear in 
+# declaration order.
+
+SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the 
+# brief documentation of file, namespace and class members alphabetically 
+# by member name. If set to NO (the default) the members will appear in 
+# declaration order.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be 
+# sorted by fully-qualified names, including namespaces. If set to 
+# NO (the default), the class list will be sorted only by class name, 
+# not including the namespace part. 
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the 
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or 
+# disable (NO) the todo list. This list is created by putting \todo 
+# commands in the documentation.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or 
+# disable (NO) the test list. This list is created by putting \test 
+# commands in the documentation.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or 
+# disable (NO) the bug list. This list is created by putting \bug 
+# commands in the documentation.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or 
+# disable (NO) the deprecated list. This list is created by putting 
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional 
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       = 
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines 
+# the initial value of a variable or define consists of for it to appear in 
+# the documentation. If the initializer consists of more lines than specified 
+# here it will be hidden. Use a value of 0 to hide initializers completely. 
+# The appearance of the initializer of individual variables and defines in the 
+# documentation can be controlled using \showinitializer or \hideinitializer 
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated 
+# at the bottom of the documentation of classes and structs. If set to YES the 
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES        = YES
+
+# If the sources in your project are distributed over multiple directories 
+# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy 
+# in the documentation.
+
+SHOW_DIRECTORIES       = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that 
+# doxygen should invoke to get the current version for each file (typically from the 
+# version control system). Doxygen will invoke the program by executing (via 
+# popen()) the command <command> <input-file>, where <command> is the value of 
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file 
+# provided by doxygen. Whatever the progam writes to standard output 
+# is used as the file version. See the manual for examples.
+
+FILE_VERSION_FILTER    = 
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated 
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are 
+# generated by doxygen. Possible values are YES and NO. If left blank 
+# NO is used.
+
+WARNINGS               = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings 
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will 
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for 
+# potential errors in the documentation, such as not documenting some 
+# parameters in a documented function, or documenting parameters that 
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR      = YES
+
+# This WARN_NO_PARAMDOC option can be abled to get warnings for 
+# functions that are documented, but have no documentation for their parameters 
+# or return value. If set to NO (the default) doxygen will only warn about 
+# wrong or incomplete parameter documentation, but not about the absence of 
+# documentation.
+
+WARN_NO_PARAMDOC       = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that 
+# doxygen can produce. The string should contain the $file, $line, and $text 
+# tags, which will be replaced by the file and line number from which the 
+# warning originated and the warning text. Optionally the format may contain 
+# $version, which will be replaced by the version of the file (if it could 
+# be obtained via FILE_VERSION_FILTER)
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning 
+# and error messages should be written. If left blank the output is written 
+# to stderr.
+
+WARN_LOGFILE           = 
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain 
+# documented source files. You may enter file names like "myfile.cpp" or 
+# directories like "/usr/src/myproject". Separate the files or directories 
+# with spaces.
+
+INPUT                  = 
+
+# If the value of the INPUT tag contains directories, you can use the 
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 
+# and *.h) to filter out the source-files in the directories. If left 
+# blank the following patterns are tested: 
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx 
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm
+
+FILE_PATTERNS          = 
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories 
+# should be searched for input files as well. Possible values are YES and NO. 
+# If left blank NO is used.
+
+RECURSIVE              = NO
+
+# The EXCLUDE tag can be used to specify files and/or directories that should 
+# excluded from the INPUT source files. This way you can easily exclude a 
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE                = 
+
+# The EXCLUDE_SYMLINKS tag can be used select whether or not files or 
+# directories that are symbolic links (a Unix filesystem feature) are excluded 
+# from the input.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the 
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude 
+# certain files from those directories.
+
+EXCLUDE_PATTERNS       = 
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or 
+# directories that contain example code fragments that are included (see 
+# the \include command).
+
+EXAMPLE_PATH           = 
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the 
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 
+# and *.h) to filter out the source-files in the directories. If left 
+# blank all files are included.
+
+EXAMPLE_PATTERNS       = 
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be 
+# searched for input files to be used with the \include or \dontinclude 
+# commands irrespective of the value of the RECURSIVE tag. 
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or 
+# directories that contain image that are included in the documentation (see 
+# the \image command).
+
+IMAGE_PATH             = 
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should 
+# invoke to filter for each input file. Doxygen will invoke the filter program 
+# by executing (via popen()) the command <filter> <input-file>, where <filter> 
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an 
+# input file. Doxygen will then use the output that the filter program writes 
+# to standard output.  If FILTER_PATTERNS is specified, this tag will be 
+# ignored.
+
+INPUT_FILTER           = 
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern 
+# basis.  Doxygen will compare the file name with each pattern and apply the 
+# filter if there is a match.  The filters are a list of the form: 
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further 
+# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER 
+# is applied to all files.
+
+FILTER_PATTERNS        = 
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using 
+# INPUT_FILTER) will be used to filter the input files when producing source 
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES    = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will 
+# be generated. Documented entities will be cross-referenced with these sources. 
+# Note: To get rid of all source code in the generated output, make sure also 
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER         = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body 
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct 
+# doxygen to hide any special comment blocks from generated source code 
+# fragments. Normal C and C++ comments will always remain visible.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES (the default) 
+# then for each documented function all documented 
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = YES
+
+# If the REFERENCES_RELATION tag is set to YES (the default) 
+# then for each documented function all documented entities 
+# called/used by that function will be listed.
+
+REFERENCES_RELATION    = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code 
+# will point to the HTML generated by the htags(1) tool instead of doxygen 
+# built-in source browser. The htags tool is part of GNU's global source 
+# tagging system (see http://www.gnu.org/software/global/global.html). You 
+# will need version 4.8.6 or higher.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen 
+# will generate a verbatim copy of the header file for each class for 
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS       = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index 
+# of all compounds will be generated. Enable this if the project 
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX     = NO
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then 
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns 
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX    = 5
+
+# In case all classes in a project start with a common prefix, all 
+# classes will be put under the same header in the alphabetical index. 
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that 
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX          = 
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will 
+# generate HTML output.
+
+GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for 
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank 
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for 
+# each generated HTML page. If it is left blank doxygen will generate a 
+# standard header.
+
+HTML_HEADER            = 
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for 
+# each generated HTML page. If it is left blank doxygen will generate a 
+# standard footer.
+
+HTML_FOOTER            = 
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading 
+# style sheet that is used by each HTML page. It can be used to 
+# fine-tune the look of the HTML output. If the tag is left blank doxygen 
+# will generate a default style sheet. Note that doxygen will try to copy 
+# the style sheet file to the HTML output directory, so don't put your own 
+# stylesheet in the HTML output directory as well, or it will be erased!
+
+HTML_STYLESHEET        = 
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, 
+# files or namespaces will be aligned in HTML using tables. If set to 
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS     = YES
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files 
+# will be generated that can be used as input for tools like the 
+# Microsoft HTML help workshop to generate a compressed HTML help file (.chm) 
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP      = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can 
+# be used to specify the file name of the resulting .chm file. You 
+# can add a path in front of the file if the result should not be 
+# written to the html output directory.
+
+CHM_FILE               = 
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can 
+# be used to specify the location (absolute path including file name) of 
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run 
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION           = 
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag 
+# controls if a separate .chi index file is generated (YES) or that 
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI           = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag 
+# controls whether a binary table of contents is generated (YES) or a 
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members 
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND             = NO
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at 
+# top of each HTML page. The value NO (the default) enables the index and 
+# the value YES disables it.
+
+DISABLE_INDEX          = NO
+
+# This tag can be used to set the number of enum values (range [1..20]) 
+# that doxygen will group on one line in the generated HTML documentation.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be
+# generated containing a tree-like index structure (just like the one that 
+# is generated for HTML Help). For this to work a browser that supports 
+# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+, 
+# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are 
+# probably better off using the HTML help feature.
+
+GENERATE_TREEVIEW      = NO
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be 
+# used to set the initial width (in pixels) of the frame in which the tree 
+# is shown.
+
+TREEVIEW_WIDTH         = 250
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will 
+# generate Latex output.
+
+GENERATE_LATEX         = YES
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be 
+# invoked. If left blank `latex' will be used as the default command name.
+
+LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to 
+# generate index for LaTeX. If left blank `makeindex' will be used as the 
+# default command name.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact 
+# LaTeX documents. This may be useful for small projects and may help to 
+# save some trees in general.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used 
+# by the printer. Possible values are: a4, a4wide, letter, legal and 
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE             = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX 
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES         = 
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for 
+# the generated latex document. The header should contain everything until 
+# the first chapter. If it is left blank doxygen will generate a 
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER           = 
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated 
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will 
+# contain links (just like the HTML output) instead of page references 
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS         = NO
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of 
+# plain latex in the generated Makefile. Set this option to YES to get a 
+# higher quality PDF documentation.
+
+USE_PDFLATEX           = NO
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. 
+# command to the generated LaTeX files. This will instruct LaTeX to keep 
+# running if errors occur, instead of asking the user for help. 
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE        = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not 
+# include the index chapters (such as File Index, Compound Index, etc.) 
+# in the output.
+
+LATEX_HIDE_INDICES     = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output 
+# The RTF output is optimized for Word 97 and may not look very pretty with 
+# other RTF readers or editors.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact 
+# RTF documents. This may be useful for small projects and may help to 
+# save some trees in general.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated 
+# will contain hyperlink fields. The RTF file will 
+# contain links (just like the HTML output) instead of page references. 
+# This makes the output suitable for online browsing using WORD or other 
+# programs which support those fields. 
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's 
+# config file, i.e. a series of assignments. You only have to provide 
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE    = 
+
+# Set optional variables used in the generation of an rtf document. 
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE    = 
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will 
+# generate man pages
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to 
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION          = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output, 
+# then it will generate one additional man file for each entity 
+# documented in the real man page(s). These additional files 
+# only source the real man page, but without them the man command 
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will 
+# generate an XML file that captures the structure of 
+# the code including all documentation.
+
+GENERATE_XML           = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT             = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema, 
+# which can be used by a validating XML parser to check the 
+# syntax of the XML files.
+
+XML_SCHEMA             = 
+
+# The XML_DTD tag can be used to specify an XML DTD, 
+# which can be used by a validating XML parser to check the 
+# syntax of the XML files.
+
+XML_DTD                = 
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will 
+# dump the program listings (including syntax highlighting 
+# and cross-referencing information) to the XML output. Note that 
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING     = YES
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will 
+# generate an AutoGen Definitions (see autogen.sf.net) file 
+# that captures the structure of the code including all 
+# documentation. Note that this feature is still experimental 
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will 
+# generate a Perl module file that captures the structure of 
+# the code including all documentation. Note that this 
+# feature is still experimental and incomplete at the 
+# moment.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate 
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able 
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be 
+# nicely formatted so it can be parsed by a human reader.  This is useful 
+# if you want to understand what is going on.  On the other hand, if this 
+# tag is set to NO the size of the Perl module output will be much smaller 
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file 
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. 
+# This is useful so different doxyrules.make files included by the same 
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX = 
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor   
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will 
+# evaluate all C-preprocessor directives found in the sources and include 
+# files.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro 
+# names in the source code. If set to NO (the default) only conditional 
+# compilation will be performed. Macro expansion can be done in a controlled 
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION        = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES 
+# then the macro expansion is limited to the macros specified with the 
+# PREDEFINED and EXPAND_AS_PREDEFINED tags.
+
+EXPAND_ONLY_PREDEF     = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files 
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that 
+# contain include files that are not input files but should be processed by 
+# the preprocessor.
+
+INCLUDE_PATH           = 
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard 
+# patterns (like *.h and *.hpp) to filter out the header-files in the 
+# directories. If left blank, the patterns specified with FILE_PATTERNS will 
+# be used.
+
+INCLUDE_FILE_PATTERNS  = 
+
+# The PREDEFINED tag can be used to specify one or more macro names that 
+# are defined before the preprocessor is started (similar to the -D option of 
+# gcc). The argument of the tag is a list of macros of the form: name 
+# or name=definition (no spaces). If the definition and the = are 
+# omitted =1 is assumed. To prevent a macro definition from being 
+# undefined via #undef or recursively expanded use the := operator 
+# instead of the = operator.
+
+PREDEFINED             = 
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then 
+# this tag can be used to specify a list of macro names that should be expanded. 
+# The macro definition that is found in the sources will be used. 
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      = 
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then 
+# doxygen's preprocessor will remove all function-like macros that are alone 
+# on a line, have an all uppercase name, and do not end with a semicolon. Such 
+# function macros are typically used for boiler-plate code, and will confuse 
+# the parser if not removed.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references   
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles. 
+# Optionally an initial location of the external documentation 
+# can be added for each tagfile. The format of a tag file without 
+# this location is as follows: 
+#   TAGFILES = file1 file2 ... 
+# Adding location for the tag files is done as follows: 
+#   TAGFILES = file1=loc1 "file2 = loc2" ... 
+# where "loc1" and "loc2" can be relative or absolute paths or 
+# URLs. If a location is present for each tag, the installdox tool 
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen 
+# is run, you must also specify the path to the tagfile here.
+
+TAGFILES               = 
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create 
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE       = 
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed 
+# in the class index. If set to NO only the inherited external classes 
+# will be listed.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed 
+# in the modules index. If set to NO, only the current project's groups will 
+# be listed.
+
+EXTERNAL_GROUPS        = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script 
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH              = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool   
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will 
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base 
+# or super classes. Setting the tag to NO turns the diagrams off. Note that 
+# this option is superseded by the HAVE_DOT option below. This is only a 
+# fallback. It is recommended to install and use dot, since it yields more 
+# powerful graphs.
+
+CLASS_DIAGRAMS         = YES
+
+# If set to YES, the inheritance and collaboration graphs will hide 
+# inheritance and usage relations if the target is undocumented 
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is 
+# available from the path. This tool is part of Graphviz, a graph visualization 
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section 
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT               = NO
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen 
+# will generate a graph for each documented class showing the direct and 
+# indirect inheritance relations. Setting this tag to YES will force the 
+# the CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen 
+# will generate a graph for each documented class showing the direct and 
+# indirect implementation dependencies (inheritance, containment, and 
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen 
+# will generate a graph for groups, showing the direct groups dependencies
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and 
+# collaboration diagrams in a style similar to the OMG's Unified Modeling 
+# Language.
+
+UML_LOOK               = NO
+
+# If set to YES, the inheritance and collaboration graphs will show the 
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS     = NO
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT 
+# tags are set to YES then doxygen will generate a graph for each documented 
+# file showing the direct and indirect include dependencies of the file with 
+# other documented files.
+
+INCLUDE_GRAPH          = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and 
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each 
+# documented header file showing the documented files that directly or 
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH and HAVE_DOT tags are set to YES then doxygen will 
+# generate a call dependency graph for every global function or class method. 
+# Note that enabling this option will significantly increase the time of a run. 
+# So in most cases it will be better to enable call graphs for selected 
+# functions only using the \callgraph command.
+
+CALL_GRAPH             = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen 
+# will graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES 
+# then doxygen will show the dependencies a directory has on other directories 
+# in a graphical way. The dependency relations are determined by the #include
+# relations between the files in the directories.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images 
+# generated by dot. Possible values are png, jpg, or gif
+# If left blank png will be used.
+
+DOT_IMAGE_FORMAT       = png
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be 
+# found. If left blank, it is assumed the dot tool can be found in the path.
+
+DOT_PATH               = 
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that 
+# contain dot files that are included in the documentation (see the 
+# \dotfile command).
+
+DOTFILE_DIRS           = 
+
+# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width 
+# (in pixels) of the graphs generated by dot. If a graph becomes larger than 
+# this value, doxygen will try to truncate the graph, so that it fits within 
+# the specified constraint. Beware that most browsers cannot cope with very 
+# large images.
+
+MAX_DOT_GRAPH_WIDTH    = 1024
+
+# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height 
+# (in pixels) of the graphs generated by dot. If a graph becomes larger than 
+# this value, doxygen will try to truncate the graph, so that it fits within 
+# the specified constraint. Beware that most browsers cannot cope with very 
+# large images.
+
+MAX_DOT_GRAPH_HEIGHT   = 1024
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the 
+# graphs generated by dot. A depth value of 3 means that only nodes reachable 
+# from the root by following a path via at most 3 edges will be shown. Nodes 
+# that lay further from the root node will be omitted. Note that setting this 
+# option to 1 or 2 may greatly reduce the computation time needed for large 
+# code bases. Also note that a graph may be further truncated if the graph's 
+# image dimensions are not sufficient to fit the graph (see MAX_DOT_GRAPH_WIDTH 
+# and MAX_DOT_GRAPH_HEIGHT). If 0 is used for the depth value (the default), 
+# the graph is not depth-constrained.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent 
+# background. This is disabled by default, which results in a white background. 
+# Warning: Depending on the platform used, enabling this option may lead to 
+# badly anti-aliased labels on the edges of a graph (i.e. they become hard to 
+# read).
+
+DOT_TRANSPARENT        = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output 
+# files in one run (i.e. multiple -o and -T options on the command line). This 
+# makes dot run faster, but since only newer versions of dot (>1.8.10) 
+# support this, this feature is disabled by default.
+
+DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will 
+# generate a legend page explaining the meaning of the various boxes and 
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will 
+# remove the intermediate dot files that are used to generate 
+# the various graphs.
+
+DOT_CLEANUP            = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to the search engine   
+#---------------------------------------------------------------------------
+
+# The SEARCHENGINE tag specifies whether or not a search engine should be 
+# used. If set to NO the values of all tags below this one will be ignored.
+
+SEARCHENGINE           = NO

+ 29 - 0
src/contrib/zkfuse/src/event.cc

@@ -0,0 +1,29 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "event.h"
+
+#define LOG_LEVEL LOG_FATAL
+#define MODULE_NAME "Event"
+
+using namespace std;
+
+namespace zkfuse {
+
+}       /* end of 'namespace zkfuse' */
+

+ 553 - 0
src/contrib/zkfuse/src/event.h

@@ -0,0 +1,553 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __EVENT_H__
+#define __EVENT_H__
+
+#include <string>
+#include <set>
+#include <deque>
+#include <algorithm>
+#ifdef GCC4
+#   include <tr1/memory>
+using namespace std::tr1;
+#else
+#   include <boost/shared_ptr.hpp>
+using namespace boost;
+#endif
+
+#include "log.h"
+#include "blockingqueue.h"
+#include "mutex.h"
+#include "thread.h"
+
+using namespace std;
+using namespace zk;
+
+namespace zkfuse {
+
+//forward declaration of EventSource
+template<typename E>
+class EventSource;
+
+/**
+ * \brief This interface is implemented by an observer
+ * \brief of a particular {@link EventSource}.
+ */
+template<typename E>
+class EventListener {
+    public:
+        
+        /**
+         * \brief This method is invoked whenever an event 
+         * \brief has been received by the event source being observed.
+         * 
+         * @param source the source the triggered the event
+         * @param e      the actual event being triggered
+         */
+        virtual void eventReceived(const EventSource<E> &source, const E &e) = 0;
+};            
+
+/**
+ * \brief This class represents a source of events.
+ * 
+ * <p>
+ * Each source can have many observers (listeners) attached to it
+ * and in case of an event, this source may propagate the event
+ * using {@link #fireEvent} method.
+ */
+template<typename E>           
+class EventSource {
+    public:
+        
+        /**
+         * \brief The type corresponding to the list of registered event listeners.
+         */
+        typedef set<EventListener<E> *> EventListeners;
+        
+        /**
+         * \brief Registers a new event listener.
+         * 
+         * @param listener the listener to be added to the set of listeners
+         */
+        void addListener(EventListener<E> *listener) {
+            m_listeners.insert( listener );
+        }
+        
+        /**
+         * \brief Removes an already registered listener.
+         * 
+         * @param listener the listener to be removed
+         */
+        void removeListener(EventListener<E> *listener) {
+            m_listeners.erase( listener );
+        }
+        
+        /**
+         * \brief Destructor.
+         */
+        virtual ~EventSource() {}
+        
+    protected:
+        
+        /**
+         * \brief Fires the given event to all registered listeners.
+         * 
+         * <p>
+         * This method essentially iterates over all listeners
+         * and invokes {@link fireEvent(EventListener<E> *listener, const E &event)}
+         * for each element. All derived classes are free to
+         * override the method to provide better error handling
+         * than the default implementation.
+         * 
+         * @param event the event to be propagated to all listeners
+         */
+        void fireEvent(const E &event);
+        
+        /**
+         * \brief Sends an event to the given listener.
+         * 
+         * @param listener the listener to whom pass the event
+         * @param event the event to be handled
+         */
+        virtual void fireEvent(EventListener<E> *listener, const E &event);
+        
+    private:
+        
+        /**
+         * The set of registered event listeners.
+         */
+        EventListeners m_listeners;            
+    
+};
+
+/**
+ * \brief The interface of a generic event wrapper.
+ */
+class AbstractEventWrapper {
+    public:
+        
+        /**
+         * \brief Destructor.
+         */
+        virtual ~AbstractEventWrapper() {}
+        
+        /**
+         * \brief Returns the underlying wrapee's data.
+         */
+        virtual void *getWrapee() = 0;
+};
+
+/**
+ * \brief A template based implementation of {@link AbstractEventWrapper}.
+ */
+template<typename E>
+class EventWrapper : public AbstractEventWrapper {
+    public:
+        EventWrapper(const E &e) : m_e(e) {
+        }
+        void *getWrapee() {
+            return &m_e;
+        }
+    private:
+        E m_e;
+};
+
+/**
+ * \brief This class represents a generic event.
+ */
+class GenericEvent {
+    public:
+        
+        /**
+         * \brief Constructor.
+         */
+        GenericEvent() : m_type(0) {}
+
+        /**
+         * \brief Constructor.
+         * 
+         * @param type the type of this event
+         * @param eventWarpper the wrapper around event's data
+         */
+        GenericEvent(int type, AbstractEventWrapper *eventWrapper) : 
+            m_type(type), m_eventWrapper(eventWrapper) {
+        }
+        
+        /**
+         * \brief Returns the type of this event.
+         * 
+         * @return type of this event
+         */
+        int getType() const { return m_type; }
+        
+        /**
+         * \brief Returns the event's data.
+         * 
+         * @return the event's data
+         */
+        void *getEvent() const { return m_eventWrapper->getWrapee(); }
+        
+    private:
+
+        /**
+         * The event type.
+         */
+        int m_type;
+
+        /**
+         * The event represented as abstract wrapper.
+         */
+        shared_ptr<AbstractEventWrapper> m_eventWrapper;
+        
+};
+    
+/**
+ * \brief This class adapts {@link EventListener} to a generic listener.
+ * Essentially this class listens on incoming events and fires them 
+ * as {@link GenericEvent}s.
+ */
+template<typename E, const int type>
+class EventListenerAdapter : public virtual EventListener<E>,
+                             public virtual EventSource<GenericEvent>
+{
+    public:
+        
+        /**
+         * \brief Constructor.
+         * 
+         * @param eventSource the source on which register this listener
+         */
+        EventListenerAdapter(EventSource<E> &eventSource) {
+            eventSource.addListener(this);
+        }
+        
+        void eventReceived(const EventSource<E> &source, const E &e) {
+            AbstractEventWrapper *wrapper = new EventWrapper<E>(e);
+            GenericEvent event(type, wrapper);
+            fireEvent( event );
+        }
+
+};        
+
+/**
+ * \brief This class provides an adapter between an asynchronous and synchronous 
+ * \brief event handling.
+ * 
+ * <p>
+ * This class queues up all received events and exposes them through 
+ * {@link #getNextEvent()} method.
+ */
+template<typename E>                  
+class SynchronousEventAdapter : public EventListener<E> {
+    public:
+        
+        void eventReceived(const EventSource<E> &source, const E &e) {
+            m_queue.put( e );
+        }
+
+        /**
+         * \brief Returns the next available event from the underlying queue,
+         * \brief possibly blocking, if no data is available.
+         * 
+         * @return the next available event
+         */
+        E getNextEvent() {
+            return m_queue.take();
+        }
+        
+        /**
+         * \brief Returns whether there are any events in the queue or not.
+         * 
+         * @return true if there is at least one event and 
+         *         the next call to {@link #getNextEvent} won't block
+         */
+        bool hasEvents() const {
+            return (m_queue.empty() ? false : true);
+        }
+        
+        /**
+         * \brief Destructor.
+         */
+        virtual ~SynchronousEventAdapter() {}
+
+    private:
+        
+        /**
+         * The blocking queue of all events received so far.
+         */
+        BlockingQueue<E> m_queue;
+        
+};
+
+/**
+ * This typedef defines the type of a timer Id.
+ */
+typedef int32_t TimerId;
+
+/**
+ * This class represents a timer event parametrized by the user's data type.
+ */
+template<typename T>
+class TimerEvent {
+    public:
+       
+        /**
+         * \brief Constructor.
+         * 
+         * @param id the ID of this event
+         * @param alarmTime when this event is to be triggered
+         * @param userData the user data associated with this event
+         */
+        TimerEvent(TimerId id, int64_t alarmTime, const T &userData) :
+            m_id(id), m_alarmTime(alarmTime), m_userData(userData) 
+        {}     
+
+        /**
+         * \brief Constructor.
+         */
+        TimerEvent() : m_id(-1), m_alarmTime(-1) {}
+                           
+        /**
+         * \brief Returns the ID.
+         * 
+         * @return the ID of this event
+         */
+        TimerId getID() const { return m_id; }
+        
+        /**
+         * \brief Returns the alarm time.
+         * 
+         * @return the alarm time
+         */
+        int64_t getAlarmTime() const { return m_alarmTime; }
+              
+        /**
+         * \brief Returns the user's data.
+         * 
+         * @return the user's data
+         */
+        T const &getUserData() const { return m_userData; }
+        
+        /**
+         * \brief Returns whether the given alarm time is less than this event's 
+         * \brief time.
+         */
+        bool operator<(const int64_t alarmTime) const {
+            return m_alarmTime < alarmTime;
+        }
+        
+    private:
+        
+        /**
+         * The ID of ths event.
+         */
+        TimerId m_id;
+        
+        /**
+         * The time at which this event triggers.
+         */
+        int64_t m_alarmTime;    
+        
+        /**
+         * The user specific data associated with this event.
+         */
+        T m_userData;
+        
+};
+
+template<typename T>
+class Timer : public EventSource<TimerEvent<T> > {
+    public:
+        
+        /**
+         * \brief Constructor.
+         */
+        Timer() : m_currentEventID(0), m_terminating(false) {
+            m_workerThread.Create( *this, &Timer<T>::sendAlarms );
+        }
+        
+        /**
+         * \brief Destructor.
+         */
+        ~Timer() {
+            m_terminating = true;
+            m_lock.notify();
+            m_workerThread.Join();
+        }
+        
+        /**
+         * \brief Schedules the given event <code>timeFromNow</code> milliseconds.
+         * 
+         * @param timeFromNow time from now, in milliseconds, when the event 
+         *                    should be triggered 
+         * @param userData the user data associated with the timer event
+         * 
+         * @return the ID of the newly created timer event
+         */
+        TimerId scheduleAfter(int64_t timeFromNow, const T &userData) {
+            return scheduleAt( getCurrentTimeMillis() + timeFromNow, userData );
+        }
+
+        /**
+         * \brief Schedules an event at the given time.
+         * 
+         * @param absTime absolute time, in milliseconds, at which the event 
+         *                should be triggered; the time is measured
+         *                from Jan 1st, 1970   
+         * @param userData the user data associated with the timer event
+         * 
+         * @return the ID of the newly created timer event
+         */
+        TimerId scheduleAt(int64_t absTime, const T &userData) {
+            m_lock.lock();
+            typename QueueType::iterator pos = 
+                    lower_bound( m_queue.begin(), m_queue.end(), absTime );
+            TimerId id = m_currentEventID++;
+            TimerEvent<T> event(id, absTime, userData); 
+            m_queue.insert( pos, event );
+            m_lock.notify();
+            m_lock.unlock();
+            return id;
+        }
+        
+        /**
+         * \brief Returns the current time since Jan 1, 1970, in milliseconds.
+         * 
+         * @return the current time in milliseconds
+         */
+        static int64_t getCurrentTimeMillis() {
+            struct timeval now;
+            gettimeofday( &now, NULL );
+            return now.tv_sec * 1000LL + now.tv_usec / 1000;
+        }
+
+        /**
+         * \brief Cancels the given timer event.
+         * 
+         * 
+         * @param eventID the ID of the event to be canceled
+         * 
+         * @return whether the event has been canceled
+         */
+        bool cancelAlarm(TimerId eventID) {
+            bool canceled = false;                      
+            m_lock.lock();
+            typename QueueType::iterator i;
+            for (i = m_queue.begin(); i != m_queue.end(); ++i) {
+                if (eventID == i->getID()) {
+                    m_queue.erase( i );
+                    canceled = true;
+                    break;
+                }
+            }
+            m_lock.unlock();
+            return canceled;
+        }
+        
+        /**
+         * Executes the main loop of the worker thread.
+         */
+        void sendAlarms() {
+            //iterate until terminating
+            while (!m_terminating) {
+                m_lock.lock();
+                //1 step - wait until there is an event in the queue
+                if (m_queue.empty()) {
+                    //wait up to 100ms to get next event
+                    m_lock.wait( 100 );
+                }     
+                bool fire = false;
+                if (!m_queue.empty()) {
+                    //retrieve the event from the queue and send it
+                    TimerEvent<T> event = m_queue.front();      
+                    //check whether we can send it right away
+                    int64_t timeToWait = 
+                        event.getAlarmTime() - getCurrentTimeMillis();
+                    if (timeToWait <= 0) {
+                        m_queue.pop_front();
+                        //we fire only if it's still in the queue and alarm
+                        //time has just elapsed (in case the top event
+                        //is canceled)
+                        fire = true;    
+                    } else {
+                        m_lock.wait( timeToWait );
+                    }
+                    m_lock.unlock();
+                    if (fire) {
+                        fireEvent( event );
+                    }
+                } else {
+                    m_lock.unlock();
+                }
+            }    
+        }
+        
+    private:
+        
+        /**
+         * The type of timer events queue.
+         */
+        typedef deque<TimerEvent<T> > QueueType;
+        
+        /**
+         * The current event ID, auto-incremented each time a new event 
+         * is created.
+         */
+        TimerId m_currentEventID;
+        
+        /**
+         * The queue of timer events sorted by {@link TimerEvent#alarmTime}.
+         */
+        QueueType m_queue;
+        
+        /**
+         * The lock used to guard {@link #m_queue}.
+         */
+        Lock m_lock;
+        
+        /**
+         * The thread that triggers alarms.
+         */
+        CXXThread<Timer<T> > m_workerThread;
+        
+        /**
+         * Whether {@link #m_workerThread}  is terminating.
+         */
+        volatile bool m_terminating;
+        
+};
+
+template<typename E>
+void EventSource<E>::fireEvent(const E &event) {
+    for (typename EventListeners::iterator i = m_listeners.begin(); 
+         i != m_listeners.end(); 
+         ++i) 
+    {
+        fireEvent( *i, event );
+    }
+}
+
+template<typename E>
+void EventSource<E>::fireEvent(EventListener<E> *listener, const E &event) {
+    listener->eventReceived( *this, event );
+}
+        
+}   /* end of 'namespace zkfuse' */
+
+#endif /* __EVENT_H__ */

+ 36 - 0
src/contrib/zkfuse/src/log.cc

@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string>
+  
+#include "log.h"
+
+using namespace std;
+  
+/**
+ * \brief This class encapsulates a log4cxx configuration.
+ */
+class LogConfiguration {
+    public:
+        LogConfiguration(const string &file) {
+            PropertyConfigurator::configureAndWatch( file, 5000 );
+        }
+};
+
+//enforces the configuration to be initialized
+static LogConfiguration logConfig( "log4cxx.properties" );

+ 116 - 0
src/contrib/zkfuse/src/log.h

@@ -0,0 +1,116 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOG_H__
+#define __LOG_H__
+
+#define ZKFUSE_NAMESPACE zkfuse
+#define START_ZKFUSE_NAMESPACE namespace ZKFUSE_NAMESPACE {
+#define END_ZKFUSE_NAMESPACE   }
+#define USING_ZKFUSE_NAMESPACE using namespace ZKFUSE_NAMESPACE;
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <pthread.h>
+
+#include <log4cxx/logger.h> 
+#include <log4cxx/propertyconfigurator.h> 
+#include <log4cxx/helpers/exception.h> 
+using namespace log4cxx; 
+using namespace log4cxx::helpers;
+
+#define PRINTIP(x) ((uint8_t*)&x)[0], ((uint8_t*)&x)[1], \
+                   ((uint8_t*)&x)[2], ((uint8_t*)&x)[3]
+
+#define IPFMT "%u.%u.%u.%u"
+
+#define DECLARE_LOGGER(varName) \
+extern LoggerPtr varName;
+
+#define DEFINE_LOGGER(varName, logName) \
+static LoggerPtr varName = Logger::getLogger( logName );
+
+#define MAX_BUFFER_SIZE 20000
+
+#define SPRINTF_LOG_MSG(buffer, fmt, args...) \
+    char buffer[MAX_BUFFER_SIZE]; \
+    snprintf( buffer, MAX_BUFFER_SIZE, fmt, ##args );
+
+// older versions of log4cxx don't support tracing
+#ifdef LOG4CXX_TRACE
+#define LOG_TRACE(logger, fmt, args...) \
+    if (logger->isTraceEnabled()) { \
+        SPRINTF_LOG_MSG( __tmp, fmt, ##args ); \
+        LOG4CXX_TRACE( logger, __tmp ); \
+    }
+#else
+#define LOG_TRACE(logger, fmt, args...) \
+    if (logger->isDebugEnabled()) { \
+        SPRINTF_LOG_MSG( __tmp, fmt, ##args ); \
+        LOG4CXX_DEBUG( logger, __tmp ); \
+    }
+#endif
+
+#define LOG_DEBUG(logger, fmt, args...) \
+    if (logger->isDebugEnabled()) { \
+        SPRINTF_LOG_MSG( __tmp, fmt, ##args ); \
+        LOG4CXX_DEBUG( logger, __tmp ); \
+    }
+
+#define LOG_INFO(logger, fmt, args...) \
+    if (logger->isInfoEnabled()) { \
+        SPRINTF_LOG_MSG( __tmp, fmt, ##args ); \
+        LOG4CXX_INFO( logger, __tmp ); \
+    }
+
+#define LOG_WARN(logger, fmt, args...) \
+    if (logger->isWarnEnabled()) { \
+        SPRINTF_LOG_MSG( __tmp, fmt, ##args ); \
+        LOG4CXX_WARN( logger, __tmp ); \
+    }
+
+#define LOG_ERROR(logger, fmt, args...) \
+    if (logger->isErrorEnabled()) { \
+        SPRINTF_LOG_MSG( __tmp, fmt, ##args ); \
+        LOG4CXX_ERROR( logger, __tmp ); \
+    }
+
+#define LOG_FATAL(logger, fmt, args...) \
+    if (logger->isFatalEnabled()) { \
+        SPRINTF_LOG_MSG( __tmp, fmt, ##args ); \
+        LOG4CXX_FATAL( logger, __tmp ); \
+    }
+
+#ifdef DISABLE_TRACE
+#   define TRACE(logger, x)
+#else   
+#   define TRACE(logger, x) \
+class Trace { \
+ public: \
+    Trace(const void* p) : _p(p) { \
+        LOG_TRACE(logger, "%s %p Enter", __PRETTY_FUNCTION__, p); \
+    } \
+    ~Trace() { \
+        LOG_TRACE(logger, "%s %p Exit", __PRETTY_FUNCTION__, _p); \
+    } \
+    const void* _p; \
+} traceObj(x);
+#endif  /* DISABLE_TRACE */
+    
+#endif  /* __LOG_H__ */
+

+ 12 - 0
src/contrib/zkfuse/src/log4cxx.properties

@@ -0,0 +1,12 @@
+# Set root logger level to DEBUG and its only appender to A1.
+log4j.rootLogger=TRACE, A1
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.A1=org.apache.log4cxx.ConsoleAppender
+
+# A1 uses PatternLayout.
+log4j.appender.A1.layout=org.apache.log4cxx.PatternLayout
+log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+
+log4j.category.zkfuse=TRACE
+

+ 169 - 0
src/contrib/zkfuse/src/mutex.h

@@ -0,0 +1,169 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MUTEX_H__
+#define __MUTEX_H__
+
+#include <pthread.h>
+#include <errno.h>
+#include <sys/time.h>
+
+#include "log.h"
+
+START_ZKFUSE_NAMESPACE
+
+class Cond;
+
+class Mutex {
+    friend class Cond;
+  public:
+    Mutex() {
+        pthread_mutexattr_init( &m_mutexAttr );
+        pthread_mutexattr_settype( &m_mutexAttr, PTHREAD_MUTEX_RECURSIVE_NP );
+        pthread_mutex_init( &mutex, &m_mutexAttr );
+    }
+    ~Mutex() {
+        pthread_mutex_destroy(&mutex);
+        pthread_mutexattr_destroy( &m_mutexAttr );
+    }
+    void Acquire() { Lock(); }
+    void Release() { Unlock(); }
+    void Lock() {
+        pthread_mutex_lock(&mutex);
+    }
+    int  TryLock() {
+        return pthread_mutex_trylock(&mutex);
+    }
+    void Unlock() {
+        pthread_mutex_unlock(&mutex);
+    }
+  private:
+    pthread_mutex_t mutex;
+    pthread_mutexattr_t m_mutexAttr;
+};
+
+class AutoLock {
+  public:
+    AutoLock(Mutex& mutex) : _mutex(mutex) {
+        mutex.Lock();
+    }
+    ~AutoLock() {
+        _mutex.Unlock();
+    }
+  private:
+    friend class AutoUnlockTemp;
+    Mutex& _mutex;
+};
+
+class AutoUnlockTemp {
+  public:
+    AutoUnlockTemp(AutoLock & autoLock) : _autoLock(autoLock) {
+        _autoLock._mutex.Unlock();
+    }
+    ~AutoUnlockTemp() {
+        _autoLock._mutex.Lock();
+    }
+  private:
+    AutoLock & _autoLock;
+};
+
+class Cond {
+  public:
+    Cond() {
+        static pthread_condattr_t attr;
+        static bool inited = false;
+        if(!inited) {
+            inited = true;
+            pthread_condattr_init(&attr);
+        }
+        pthread_cond_init(&_cond, &attr);
+    }
+    ~Cond() {
+        pthread_cond_destroy(&_cond);
+    }
+
+    void Wait(Mutex& mutex) {
+        pthread_cond_wait(&_cond, &mutex.mutex);
+    }
+
+    bool Wait(Mutex& mutex, long long int timeout) {
+        struct timeval now;
+        gettimeofday( &now, NULL );
+        struct timespec abstime;
+        int64_t microSecs = now.tv_sec * 1000000LL + now.tv_usec;
+        microSecs += timeout * 1000;
+        abstime.tv_sec = microSecs / 1000000LL;
+        abstime.tv_nsec = (microSecs % 1000000LL) * 1000;
+        if (pthread_cond_timedwait(&_cond, &mutex.mutex, &abstime) == ETIMEDOUT) {
+            return false;
+        } else {
+            return true;
+        }
+    }
+    
+    void Signal() {
+        pthread_cond_signal(&_cond);
+    }
+
+  private:
+    pthread_cond_t            _cond;
+};
+
+/**
+ * A wrapper class for {@link Mutex} and {@link Cond}.
+ */
+class Lock {
+    public:
+        
+        void lock() {
+            m_mutex.Lock();
+        }
+        
+        void unlock() {
+            m_mutex.Unlock();
+        }
+        
+        void wait() {
+            m_cond.Wait( m_mutex );
+        }
+
+        bool wait(long long int timeout) {
+            return m_cond.Wait( m_mutex, timeout );
+        }
+        
+        void notify() {
+            m_cond.Signal();
+        }
+
+    private:
+        
+        /**
+         * The mutex.
+         */
+        Mutex m_mutex;
+        
+        /**
+         * The condition associated with this lock's mutex.
+         */
+        Cond m_cond;         
+};
+
+END_ZKFUSE_NAMESPACE
+        
+#endif /* __MUTEX_H__ */
+

+ 41 - 0
src/contrib/zkfuse/src/thread.cc

@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <log.h>
+
+#include "thread.h"
+
+DEFINE_LOGGER( LOG, "Thread" )
+
+START_ZKFUSE_NAMESPACE
+
+void Thread::Create(void* ctx, ThreadFunc func)
+{
+    pthread_attr_t attr;
+    pthread_attr_init(&attr);
+    pthread_attr_setstacksize(&attr, _stackSize);
+    int ret = pthread_create(&mThread, &attr, func, ctx);
+    if(ret != 0) {
+        LOG_FATAL( LOG, "pthread_create failed: %s", strerror(errno) );
+    }
+    // pthread_attr_destroy(&attr); 
+    _ctx = ctx;
+    _func = func;
+}
+
+END_ZKFUSE_NAMESPACE

+ 99 - 0
src/contrib/zkfuse/src/thread.h

@@ -0,0 +1,99 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __THREAD_H__
+#define __THREAD_H__
+
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+#include <pthread.h>
+
+#include "log.h"
+
+START_ZKFUSE_NAMESPACE
+
+class Thread {
+  public:
+    static const size_t defaultStackSize = 1024 * 1024;
+    typedef void* (*ThreadFunc) (void*);
+    Thread(size_t stackSize = defaultStackSize) 
+      : _stackSize(stackSize), _ctx(NULL), _func(NULL) 
+    {
+        memset( &mThread, 0, sizeof(mThread) );
+    }
+    ~Thread() { }
+
+    void Create(void* ctx, ThreadFunc func);
+    void Join() {
+        //avoid SEGFAULT because of unitialized mThread
+        //in case Create(...) was never called
+        if (_func != NULL) {
+            pthread_join(mThread, 0);
+        }
+    }
+  private:
+    pthread_t mThread;  
+    void *_ctx;
+    ThreadFunc _func;
+    size_t _stackSize;
+};
+
+
+template<typename T>
+struct ThreadContext {
+    typedef void (T::*FuncPtr) (void);
+    ThreadContext(T& ctx, FuncPtr func) : _ctx(ctx), _func(func) {}
+    void run(void) {
+        (_ctx.*_func)();
+    }
+    T& _ctx;
+    FuncPtr   _func;
+};
+
+template<typename T>
+void* ThreadExec(void *obj) {
+    ThreadContext<T>* tc = (ThreadContext<T>*)(obj);
+    assert(tc != 0);
+    tc->run();
+    return 0;
+}
+
+template <typename T>
+class CXXThread : public Thread {
+  public:
+    typedef void (T::*FuncPtr) (void);
+    CXXThread(size_t stackSize = Thread::defaultStackSize) 
+      : Thread(stackSize), ctx(0) {}
+    ~CXXThread() { if (ctx) delete ctx; }
+
+    void Create(T& obj, FuncPtr func) {
+        assert(ctx == 0);
+        ctx = new ThreadContext<T>(obj, func);
+        Thread::Create(ctx, ThreadExec<T>);
+    }
+
+  private:
+    ThreadContext<T>* ctx;
+};
+
+    
+END_ZKFUSE_NAMESPACE
+
+#endif /* __THREAD_H__ */
+

+ 879 - 0
src/contrib/zkfuse/src/zkadapter.cc

@@ -0,0 +1,879 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <iostream>
+
+#include "blockingqueue.h"
+#include "thread.h"
+#include "zkadapter.h"
+
+using namespace std;
+using namespace zk;
+
+DEFINE_LOGGER( LOG, "zookeeper.adapter" )
+DEFINE_LOGGER( ZK_LOG, "zookeeper.core" )
+
+/**
+ * \brief A helper class to initialize ZK logging.
+ */
+class InitZooKeeperLogging
+{
+  public:
+    InitZooKeeperLogging() {
+        if (ZK_LOG->isDebugEnabled()
+#ifdef LOG4CXX_TRACE
+            || ZK_LOG->isTraceEnabled()
+#endif
+            ) 
+        {
+            zoo_set_debug_level( LOG_LEVEL_DEBUG );
+        } else if (ZK_LOG->isInfoEnabled()) {
+            zoo_set_debug_level( LOG_LEVEL_INFO );
+        } else if (ZK_LOG->isWarnEnabled()) {
+            zoo_set_debug_level( LOG_LEVEL_WARN );
+        } else {
+            zoo_set_debug_level( LOG_LEVEL_ERROR );
+        }
+    }
+};
+
+using namespace std;
+
+namespace zk
+{
+
+/**
+ * \brief This class provides logic for checking if a request can be retried.
+ */
+class RetryHandler
+{
+  public:
+    RetryHandler(const ZooKeeperConfig &zkConfig)
+        : m_zkConfig(zkConfig)
+    {
+        if (zkConfig.getAutoReconnect()) {
+            retries = 2;
+        } else {
+            retries = 0;
+        }
+    }
+        
+    /**
+     * \brief Attempts to fix a side effect of the given RC.
+     * 
+     * @param rc the ZK error code
+     * @return whether the error code has been handled and the caller should 
+     *         retry an operation the caused this error
+     */
+    bool handleRC(int rc)
+    {
+        TRACE( LOG, "handleRC" );
+
+        //check if the given error code is recoverable
+        if (!retryOnError(rc)) {
+            return false;
+        }
+        LOG_TRACE( LOG, "RC: %d, retries left: %d", rc, retries );
+        if (retries-- > 0) {
+            return true;
+        } else {
+            return false;
+        }
+    }
+        
+  private:
+    /**
+     * The ZK config.
+     */
+    const ZooKeeperConfig &m_zkConfig;
+        
+    /**
+     * The number of outstanding retries.
+     */
+    int retries;    
+        
+    /**
+     * Checks whether the given error entitles this adapter
+     * to retry the previous operation.
+     * 
+     * @param zkErrorCode one of the ZK error code
+     */
+    static bool retryOnError(int zkErrorCode)
+    {
+        return (zkErrorCode == ZCONNECTIONLOSS ||
+                zkErrorCode == ZOPERATIONTIMEOUT);
+    }
+};
+    
+    
+//the implementation of the global ZK event watcher
+void zkWatcher(zhandle_t *zh, int type, int state, const char *path)
+{
+    TRACE( LOG, "zkWatcher" );
+
+    //a workaround for buggy ZK API
+    string sPath = 
+        (path == NULL || 
+         state == SESSION_EVENT || 
+         state == NOTWATCHING_EVENT)
+        ? "" 
+        : string(path);
+    LOG_INFO( LOG,
+              "Received a ZK event - type: %d, state: %d, path: '%s'",
+              type, state, sPath.c_str() );
+    ZooKeeperAdapter *zka = (ZooKeeperAdapter *)zoo_get_context(zh);
+    if (zka != NULL) {
+        zka->enqueueEvent( type, state, sPath );
+    } else {
+        LOG_ERROR( LOG,
+                   "Skipping ZK event (type: %d, state: %d, path: '%s'), "
+                   "because ZK passed no context",
+                   type, state, sPath.c_str() );
+    }
+}
+
+
+
+// =======================================================================
+
+ZooKeeperAdapter::ZooKeeperAdapter(ZooKeeperConfig config, 
+                                   ZKEventListener *listener,
+                                   bool establishConnection) 
+    throw(ZooKeeperException)
+    : m_zkConfig(config),
+      mp_zkHandle(NULL), 
+      m_terminating(false),
+      m_connected(false),
+      m_state(AS_DISCONNECTED) 
+{
+    TRACE( LOG, "ZooKeeperAdapter" );
+
+    resetRemainingConnectTimeout();
+    
+    //enforce setting up appropriate ZK log level
+    static InitZooKeeperLogging INIT_ZK_LOGGING;
+    
+    if (listener != NULL) {
+        addListener(listener);
+    }
+
+    //start the event dispatcher thread
+    m_eventDispatcher.Create( *this, &ZooKeeperAdapter::processEvents );
+
+    //start the user event dispatcher thread
+    m_userEventDispatcher.Create( *this, &ZooKeeperAdapter::processUserEvents );
+    
+    //optionally establish the connection
+    if (establishConnection) {
+        reconnect();
+    }
+}
+
+ZooKeeperAdapter::~ZooKeeperAdapter()
+{
+    TRACE( LOG, "~ZooKeeperAdapter" );
+
+    try {
+        disconnect();
+    } catch (std::exception &e) {
+        LOG_ERROR( LOG, 
+                   "An exception while disconnecting from ZK: %s",
+                   e.what() );
+    }
+    m_terminating = true;
+    m_userEventDispatcher.Join();
+    m_eventDispatcher.Join();
+}
+
+void
+ZooKeeperAdapter::validatePath(const string &path) throw(ZooKeeperException)
+{
+    TRACE( LOG, "validatePath" );
+    
+    if (path.find( "/" ) != 0) {
+        throw ZooKeeperException( string("Node path must start with '/' but"
+                                         "it was '") +
+                                  path +
+                                  "'" );
+    }
+    if (path.length() > 1) {
+        if (path.rfind( "/" ) == path.length() - 1) {
+            throw ZooKeeperException( string("Node path must not end with "
+                                             "'/' but it was '") +
+                                      path +
+                                      "'" );
+        }
+        if (path.find( "//" ) != string::npos) {
+            throw ZooKeeperException( string("Node path must not contain "
+                                             "'//' but it was '") +
+                                      path +
+                                      "'" ); 
+        }
+    }
+}
+
+void
+ZooKeeperAdapter::disconnect()
+{
+    TRACE( LOG, "disconnect" );
+    LOG_TRACE( LOG, "mp_zkHandle: %p, state %d", mp_zkHandle, m_state );
+
+    m_stateLock.lock();
+    if (mp_zkHandle != NULL) {
+        zookeeper_close( mp_zkHandle );
+        mp_zkHandle = NULL;
+        setState( AS_DISCONNECTED );
+    }
+    m_stateLock.unlock();
+}
+
+void
+ZooKeeperAdapter::reconnect() throw(ZooKeeperException)
+{
+    TRACE( LOG, "reconnect" );
+    
+    m_stateLock.lock();
+    //clear the connection state
+    disconnect();
+    
+    //establish a new connection to ZooKeeper
+    mp_zkHandle = zookeeper_init( m_zkConfig.getHosts().c_str(), 
+                                  zkWatcher, 
+                                  m_zkConfig.getLeaseTimeout(),
+                                  NULL, this, 0);
+    resetRemainingConnectTimeout();
+    if (mp_zkHandle != NULL) {
+        setState( AS_CONNECTING );
+        m_stateLock.unlock();
+    } else {
+        m_stateLock.unlock();
+        throw ZooKeeperException( 
+            string("Unable to connect to ZK running at '") +
+                    m_zkConfig.getHosts() + "'" );
+    }
+    
+    LOG_DEBUG( LOG, "mp_zkHandle: %p, state %d", mp_zkHandle, m_state ); 
+}
+
+void
+ZooKeeperAdapter::handleEvent(int type, int state, const string &path)
+{
+    TRACE( LOG, "handleEvent" );
+    LOG_TRACE( LOG, 
+               "type: %d, state %d, path: %s",
+               type, state, path.c_str() );
+    Listener2Context context, context2;
+    //ignore internal ZK events
+    if (type != SESSION_EVENT && type != NOTWATCHING_EVENT) {
+        m_zkContextsMutex.Acquire();
+        //check if the user context is available
+        if (type == CHANGED_EVENT || type == DELETED_EVENT) {
+            //we may have two types of interest here, 
+            //in this case lets try to notify twice
+            context = findAndRemoveListenerContext( GET_NODE_DATA, path );
+            context2 = findAndRemoveListenerContext( NODE_EXISTS, path );
+            if (context.empty()) {
+                //make sure that the 2nd context is NULL and
+                // assign it to the 1st one
+                context = context2;
+                context2.clear();
+            }
+        } else if (type == CHILD_EVENT) {
+            context = findAndRemoveListenerContext( GET_NODE_CHILDREN, path );
+        } else if (type == CREATED_EVENT) {
+            context = findAndRemoveListenerContext( NODE_EXISTS, path );
+        }
+        m_zkContextsMutex.Release();
+    }
+    
+    handleEvent( type, state, path, context );
+    if (!context2.empty()) {
+        handleEvent( type, state, path, context2 );
+    }
+}
+
+void
+ZooKeeperAdapter::handleEvent(int type,
+                              int state,
+                              const string &path,
+                              const Listener2Context &listeners)
+{
+    TRACE( LOG, "handleEvents" );
+
+    if (listeners.empty()) {
+        //propagate with empty context
+        ZKWatcherEvent event(type, state, path);
+        fireEvent( event );
+    } else {
+        for (Listener2Context::const_iterator i = listeners.begin();
+             i != listeners.end();
+             ++i) {
+            ZKWatcherEvent event(type, state, path, i->second);
+            if (i->first != NULL) {
+                fireEvent( i->first, event );
+            } else {
+                fireEvent( event );
+            }
+        }
+    }
+}
+
+void 
+ZooKeeperAdapter::enqueueEvent(int type, int state, const string &path)
+{
+    TRACE( LOG, "enqueueEvents" );
+
+    m_events.put( ZKWatcherEvent( type, state, path ) );
+}
+
+void
+ZooKeeperAdapter::processEvents()
+{
+    TRACE( LOG, "processEvents" );
+
+    while (!m_terminating) {
+        bool timedOut = false;
+        ZKWatcherEvent source = m_events.take( 100, &timedOut );
+        if (!timedOut) {
+            if (source.getType() == SESSION_EVENT) {
+                LOG_INFO( LOG,
+                          "Received SESSION event, state: %d. Adapter state: %d",
+                          source.getState(), m_state );
+                m_stateLock.lock();
+                if (source.getState() == CONNECTED_STATE) {
+                    m_connected = true;
+                    resetRemainingConnectTimeout();
+                    setState( AS_CONNECTED );
+                } else if (source.getState() == CONNECTING_STATE) {
+                    m_connected = false;
+                    setState( AS_CONNECTING );
+                } else if (source.getState() == EXPIRED_SESSION_STATE) {
+                    LOG_INFO( LOG, "Received EXPIRED_SESSION event" );
+                    setState( AS_SESSION_EXPIRED );
+                }
+                m_stateLock.unlock();
+            }
+            m_userEvents.put( source );
+        }
+    }
+}
+
+void
+ZooKeeperAdapter::processUserEvents()
+{
+    TRACE( LOG, "processUserEvents" );
+
+    while (!m_terminating) {
+        bool timedOut = false;
+        ZKWatcherEvent source = m_userEvents.take( 100, &timedOut );
+        if (!timedOut) {
+            try {
+                handleEvent( source.getType(),
+                             source.getState(),
+                             source.getPath() );
+            } catch (std::exception &e) {
+                LOG_ERROR( LOG, 
+                           "Unable to process event (type: %d, state: %d, "
+                                   "path: %s), because of exception: %s",
+                           source.getType(),
+                           source.getState(),
+                           source.getPath().c_str(),
+                           e.what() );
+            }
+        }
+    }
+}
+
+void 
+ZooKeeperAdapter::registerContext(WatchableMethod method,
+                                  const string &path,
+                                  ZKEventListener *listener,
+                                  ContextType context)
+{
+    TRACE( LOG, "registerContext" );
+
+    m_zkContexts[method][path][listener] = context;
+}
+
+ZooKeeperAdapter::Listener2Context
+ZooKeeperAdapter::findAndRemoveListenerContext(WatchableMethod method,
+                                               const string &path)
+{
+    TRACE( LOG, "findAndRemoveListenerContext" );
+
+    Listener2Context listeners;
+    Path2Listener2Context::iterator elem = m_zkContexts[method].find( path );
+    if (elem != m_zkContexts[method].end()) {
+        listeners = elem->second;
+        m_zkContexts[method].erase( elem );
+    } 
+    return listeners;
+}
+
+void 
+ZooKeeperAdapter::setState(AdapterState newState)
+{
+    TRACE( LOG, "setState" );    
+    if (newState != m_state) {
+        LOG_INFO( LOG, "Adapter state transition: %d -> %d", m_state, newState );
+        m_state = newState;
+        m_stateLock.notify();
+    } else {
+        LOG_TRACE( LOG, "New state same as the current: %d", newState );
+    }
+}
+
+
+//TODO move this code to verifyConnection so reconnect()
+//is called from one place only
+void
+ZooKeeperAdapter::waitUntilConnected() 
+  throw(ZooKeeperException)
+{
+    TRACE( LOG, "waitUntilConnected" );    
+    long long int timeout = getRemainingConnectTimeout();
+    LOG_INFO( LOG,
+              "Waiting up to %lld ms until a connection to ZK is established",
+              timeout );
+    bool connected;
+    if (timeout > 0) {
+        long long int toWait = timeout;
+        while (m_state != AS_CONNECTED && toWait > 0) {
+            //check if session expired and reconnect if so
+            if (m_state == AS_SESSION_EXPIRED) {
+                LOG_INFO( LOG,
+                        "Reconnecting because the current session has expired" );
+                reconnect();
+            }
+            struct timeval now;
+            gettimeofday( &now, NULL );
+            int64_t milliSecs = -(now.tv_sec * 1000LL + now.tv_usec / 1000);
+            LOG_TRACE( LOG, "About to wait %lld ms", toWait );
+            m_stateLock.wait( toWait );
+            gettimeofday( &now, NULL );
+            milliSecs += now.tv_sec * 1000LL + now.tv_usec / 1000;
+            toWait -= milliSecs;
+        }
+        waitedForConnect( timeout - toWait );
+        LOG_INFO( LOG, "Waited %lld ms", timeout - toWait );
+    }
+    connected = (m_state == AS_CONNECTED);
+    if (!connected) {
+        if (timeout > 0) {
+            LOG_WARN( LOG, "Timed out while waiting for connection to ZK" );
+            throw ZooKeeperException("Timed out while waiting for "
+                                    "connection to ZK");
+        } else {
+            LOG_ERROR( LOG, "Global timeout expired and still not connected to ZK" );
+            throw ZooKeeperException("Global timeout expired and still not "
+                                     "connected to ZK");
+        }
+    }
+    LOG_INFO( LOG, "Connected!" );
+}
+
+void
+ZooKeeperAdapter::verifyConnection() throw(ZooKeeperException)
+{
+    TRACE( LOG, "verifyConnection" );
+
+    m_stateLock.lock();
+    try {
+        if (m_state == AS_DISCONNECTED) {
+            throw ZooKeeperException("Disconnected from ZK. " \
+                "Please use reconnect() before attempting to use any ZK API");
+        } else if (m_state != AS_CONNECTED) {
+            LOG_TRACE( LOG, "Checking if need to reconnect..." );
+            //we are not connected, so check if connection in progress...
+            if (m_state != AS_CONNECTING) {
+                LOG_TRACE( LOG, 
+                           "yes. Checking if allowed to auto-reconnect..." );
+                //...not in progres, so check if we can reconnect
+                if (!m_zkConfig.getAutoReconnect()) {
+                    //...too bad, disallowed :(
+                    LOG_TRACE( LOG, "no. Sorry." );
+                    throw ZooKeeperException("ZK connection is down and "
+                                             "auto-reconnect is not allowed");
+                } else {
+                    LOG_TRACE( LOG, "...yes. About to reconnect" );
+                }
+                //...we are good to retry the connection
+                reconnect();
+            } else {
+                LOG_TRACE( LOG, "...no, already in CONNECTING state" );
+            }               
+            //wait until the connection is established
+            waitUntilConnected(); 
+        }
+    } catch (ZooKeeperException &e) {
+        m_stateLock.unlock();
+        throw;
+    }
+    m_stateLock.unlock();
+}
+
+bool
+ZooKeeperAdapter::createNode(const string &path, 
+                             const string &value, 
+                             int flags, 
+                             bool createAncestors,
+                             string &returnPath) 
+    throw(ZooKeeperException) 
+{
+    TRACE( LOG, "createNode (internal)" );
+    validatePath( path );
+    
+    const int MAX_PATH_LENGTH = 1024;
+    char realPath[MAX_PATH_LENGTH];
+    realPath[0] = 0;
+    
+    int rc;
+    RetryHandler rh(m_zkConfig);
+    do {
+        verifyConnection();
+        rc = zoo_create( mp_zkHandle, 
+                         path.c_str(), 
+                         value.c_str(),
+                         value.length(),
+                         &OPEN_ACL_UNSAFE,
+                         flags,
+                         realPath,
+                         MAX_PATH_LENGTH );
+    } while (rc != ZOK && rh.handleRC(rc));
+    if (rc != ZOK) {
+        if (rc == ZNODEEXISTS) {
+            //the node already exists
+            LOG_WARN( LOG, "Error %d for %s", rc, path.c_str() );
+            return false;
+        } else if (rc == ZNONODE && createAncestors) {
+            LOG_WARN( LOG, "Error %d for %s", rc, path.c_str() );
+            //one of the ancestors doesn't exist so lets start from the root 
+            //and make sure the whole path exists, creating missing nodes if
+            //necessary
+            for (string::size_type pos = 1; pos != string::npos; ) {
+                pos = path.find( "/", pos );
+                if (pos != string::npos) {
+                    try {
+                        createNode( path.substr( 0, pos ), "", 0, true );
+                    } catch (ZooKeeperException &e) {
+                        throw ZooKeeperException( string("Unable to create "
+                                                         "node ") + 
+                                                  path, 
+                                                  rc );
+                    }
+                    pos++;
+                } else {
+                    //no more path components
+                    return createNode( path, value, flags, false, returnPath );
+                }
+            }
+        }
+        LOG_ERROR( LOG,"Error %d for %s", rc, path.c_str() );
+        throw ZooKeeperException( string("Unable to create node ") +
+                                  path,
+                                  rc );
+    } else {
+        LOG_INFO( LOG, "%s has been created", realPath );
+        returnPath = string( realPath );
+        return true;
+    }
+}
+
+bool
+ZooKeeperAdapter::createNode(const string &path,
+                             const string &value,
+                             int flags,
+                             bool createAncestors) 
+        throw(ZooKeeperException) 
+{
+    TRACE( LOG, "createNode" );
+
+    string createdPath;
+    return createNode( path, value, flags, createAncestors, createdPath );
+}
+
+int64_t
+ZooKeeperAdapter::createSequence(const string &path,
+                                 const string &value,
+                                 int flags,
+                                 bool createAncestors) 
+    throw(ZooKeeperException)
+{
+    TRACE( LOG, "createSequence" );
+
+    string createdPath;    
+    bool result = createNode( path,
+                              value,
+                              flags | SEQUENCE,
+                              createAncestors,
+                              createdPath );
+    if (!result) {
+        return -1;
+    } else {
+        //extract sequence number from the returned path
+        if (createdPath.find( path ) != 0) {
+            throw ZooKeeperException( string("Expecting returned path '") +
+                                      createdPath + 
+                                      "' to start with '" +
+                                      path +
+                                      "'" );
+        }
+        string seqSuffix =
+            createdPath.substr( path.length(), 
+                                createdPath.length() - path.length() );
+        char *ptr = NULL;
+        int64_t seq = strtol( seqSuffix.c_str(), &ptr, 10 );
+        if (ptr != NULL && *ptr != '\0') {
+            throw ZooKeeperException( string("Expecting a number but got ") +
+                                      seqSuffix );
+        }
+        return seq;
+    }
+}
+
+bool
+ZooKeeperAdapter::deleteNode(const string &path,
+                             bool recursive,
+                             int version)
+    throw(ZooKeeperException)
+{
+    TRACE( LOG, "deleteNode" );
+
+    validatePath( path );
+        
+    int rc;
+    RetryHandler rh(m_zkConfig);
+    do {
+        verifyConnection();
+        rc = zoo_delete( mp_zkHandle, path.c_str(), version );
+    } while (rc != ZOK && rh.handleRC(rc));
+    if (rc != ZOK) {
+        if (rc == ZNONODE) {
+            LOG_WARN( LOG, "Error %d for %s", rc, path.c_str() );
+            return false;
+        }
+        if (rc == ZNOTEMPTY && recursive) {
+            LOG_WARN( LOG, "Error %d for %s", rc, path.c_str() );
+            //get all children and delete them recursively...
+            vector<string> nodeList;
+            getNodeChildren( nodeList, path, false );
+            for (vector<string>::const_iterator i = nodeList.begin();
+                 i != nodeList.end();
+                 ++i) {
+                deleteNode( *i, true );
+            }
+            //...and finally attempt to delete the node again
+            return deleteNode( path, false ); 
+        }
+        LOG_ERROR( LOG, "Error %d for %s", rc, path.c_str() );
+        throw ZooKeeperException( string("Unable to delete node ") + path,
+                                  rc );
+    } else {
+        LOG_INFO( LOG, "%s has been deleted", path.c_str() );
+        return true;
+    }
+}
+
+bool
+ZooKeeperAdapter::nodeExists(const string &path,
+                             ZKEventListener *listener,
+                             void *context, Stat *stat)
+    throw(ZooKeeperException)
+{
+    TRACE( LOG, "nodeExists" );
+
+    validatePath( path );
+
+    struct Stat tmpStat;
+    if (stat == NULL) {
+        stat = &tmpStat;
+    }
+    memset( stat, 0, sizeof(Stat) );
+
+    int rc;
+    RetryHandler rh(m_zkConfig);
+    do {
+        verifyConnection();
+        if (context != NULL) {    
+            m_zkContextsMutex.Acquire();
+            rc = zoo_exists( mp_zkHandle,
+                             path.c_str(),
+                             (listener != NULL ? 1 : 0),
+                             stat );
+            if (rc == ZOK || rc == ZNONODE) {
+                registerContext( NODE_EXISTS, path, listener, context );
+            }
+            m_zkContextsMutex.Release();
+        } else {
+            rc = zoo_exists( mp_zkHandle,
+                             path.c_str(),
+                             (listener != NULL ? 1 : 0),
+                             stat );
+        }
+    } while (rc != ZOK && rh.handleRC(rc));
+    if (rc != ZOK) {
+        if (rc == ZNONODE) {
+            LOG_TRACE( LOG, "Node %s does not exist", path.c_str() );
+            return false;
+        }
+        LOG_ERROR( LOG, "Error %d for %s", rc, path.c_str() );
+        throw ZooKeeperException(
+                 string("Unable to check existence of node ") + path,
+                 rc );
+    } else {
+        return true;        
+    }
+}
+
+void
+ZooKeeperAdapter::getNodeChildren(vector<string> &nodeList,
+                                  const string &path, 
+                                  ZKEventListener *listener,
+                                  void *context)
+    throw (ZooKeeperException)
+{
+    TRACE( LOG, "getNodeChildren" );
+
+    validatePath( path );
+    
+    String_vector children;
+    memset( &children, 0, sizeof(children) );
+
+    int rc;
+    RetryHandler rh(m_zkConfig);
+    do {
+        verifyConnection();
+        if (context != NULL) {
+            m_zkContextsMutex.Acquire();
+            rc = zoo_get_children( mp_zkHandle,
+                                   path.c_str(), 
+                                   (listener != NULL ? 1 : 0), 
+                                   &children );
+            if (rc == ZOK) {
+                registerContext( GET_NODE_CHILDREN, path, listener, context );
+            }
+            m_zkContextsMutex.Release();
+        } else {
+            rc = zoo_get_children( mp_zkHandle,
+                                   path.c_str(), 
+                                   (listener != NULL ? 1 : 0),
+                                   &children );
+        }
+    } while (rc != ZOK && rh.handleRC(rc));
+    if (rc != ZOK) {
+        LOG_ERROR( LOG, "Error %d for %s", rc, path.c_str() );
+        throw ZooKeeperException( string("Unable to get children of node ") +
+                                  path, 
+                                  rc );
+    } else {
+        for (int i = 0; i < children.count; ++i) {
+            //convert each child's path from relative to absolute 
+            string absPath(path);
+            if (path != "/") {
+                absPath.append( "/" );
+            } 
+            absPath.append( children.data[i] ); 
+            nodeList.push_back( absPath );
+        }
+        //make sure the order is always deterministic
+        sort( nodeList.begin(), nodeList.end() );
+    }
+}
+
+string
+ZooKeeperAdapter::getNodeData(const string &path,
+                              ZKEventListener *listener,
+                              void *context, Stat *stat)
+    throw(ZooKeeperException)
+{
+    TRACE( LOG, "getNodeData" );
+
+    validatePath( path );
+   
+    const int MAX_DATA_LENGTH = 128 * 1024;
+    char buffer[MAX_DATA_LENGTH];
+    memset( buffer, 0, MAX_DATA_LENGTH );
+    struct Stat tmpStat;
+    if (stat == NULL) {
+        stat = &tmpStat;
+    }
+    memset( stat, 0, sizeof(Stat) );
+    
+    int rc;
+    int len;
+    RetryHandler rh(m_zkConfig);
+    do {
+        verifyConnection();
+        len = MAX_DATA_LENGTH - 1;
+        if (context != NULL) {
+            m_zkContextsMutex.Acquire();
+            rc = zoo_get( mp_zkHandle, 
+                          path.c_str(),
+                          (listener != NULL ? 1 : 0),
+                          buffer, &len, stat );
+            if (rc == ZOK) {
+                registerContext( GET_NODE_DATA, path, listener, context );
+            }
+            m_zkContextsMutex.Release();
+        } else {
+            rc = zoo_get( mp_zkHandle,
+                          path.c_str(),
+                          (listener != NULL ? 1 : 0),
+                          buffer, &len, stat );
+        }
+    } while (rc != ZOK && rh.handleRC(rc));
+    if (rc != ZOK) {
+        LOG_ERROR( LOG, "Error %d for %s", rc, path.c_str() );
+        throw ZooKeeperException( 
+            string("Unable to get data of node ") + path, rc 
+        );
+    } else {
+        return string( buffer, buffer + len );
+    }
+}
+
+void
+ZooKeeperAdapter::setNodeData(const string &path,
+                              const string &value,
+                              int version)
+    throw(ZooKeeperException)
+{
+    TRACE( LOG, "setNodeData" );
+
+    validatePath( path );
+
+    int rc;
+    RetryHandler rh(m_zkConfig);
+    do {
+        verifyConnection();
+        rc = zoo_set( mp_zkHandle,
+                      path.c_str(),
+                      value.c_str(),
+                      value.length(), version );
+    } while (rc != ZOK && rh.handleRC(rc));
+    if (rc != ZOK) {
+        LOG_ERROR( LOG, "Error %d for %s", rc, path.c_str() );
+        throw ZooKeeperException( string("Unable to set data for node ") +
+                                  path,
+                                  rc );
+    }
+}
+
+}   /* end of 'namespace zk' */
+

+ 718 - 0
src/contrib/zkfuse/src/zkadapter.h

@@ -0,0 +1,718 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ZKADAPTER_H__
+#define __ZKADAPTER_H__
+
+#include <string>
+#include <vector>
+#include <map>
+
+extern "C" {
+#include "zookeeper.h"
+}
+
+#include "log.h"
+#include "mutex.h"
+#include "thread.h"
+#include "blockingqueue.h"
+#include "event.h"
+
+using namespace std;
+using namespace zkfuse;
+
+namespace zk {
+    
+/**
+ * \brief A cluster related exception.
+ */
+class ZooKeeperException :
+    public std::exception
+{
+    public:
+        
+        /**
+         * \brief Constructor.
+         * 
+         * @param msg the detailed message associated with this exception
+         */
+        ZooKeeperException(const string &msg) : 
+            m_message(msg), m_zkErrorCode(0) 
+        {}
+
+        /**
+         * \brief Constructor.
+         * 
+         * @param msg the detailed message associated with this exception
+         * @param errorCode the ZK error code associated with this exception
+         */
+        ZooKeeperException(const string &msg, int errorCode) : 
+            m_zkErrorCode(errorCode) 
+        {
+            char tmp[100];
+            sprintf( tmp, " (ZK error code: %d)", errorCode );
+            m_message = msg + tmp;
+        }
+                
+        /**
+         * \brief Destructor.
+         */
+        ~ZooKeeperException() throw() {}
+        
+        /**
+         * \brief Returns detailed description of the exception.
+         */
+        const char *what() const throw() {
+            return m_message.c_str();
+        }
+        
+        /**
+         * \brief Returns the ZK error code.
+         */
+        int getZKErrorCode() const {
+            return m_zkErrorCode;
+        }
+
+    private:
+        
+        /**
+         * The detailed message associated with this exception.
+         */
+        string m_message;
+        
+        /**
+         * The optional error code received from ZK.
+         */
+        int m_zkErrorCode;
+        
+};
+    
+/**
+ * \brief This class encapsulates configuration of a ZK client.
+ */
+class ZooKeeperConfig
+{
+    public:
+        
+        /**
+         * \brief Constructor.
+         * 
+         * @param hosts the comma separated list of host and port pairs of ZK nodes
+         * @param leaseTimeout the lease timeout (heartbeat)
+         * @param autoReconnect whether to allow for auto-reconnect
+         * @param connectTimeout the connect timeout, in milliseconds;
+         */
+        ZooKeeperConfig(const string &hosts, 
+                        int leaseTimeout, 
+                        bool autoReconnect = true, 
+                        long long int connectTimeout = 15000) :
+            m_hosts(hosts), m_leaseTimeout(leaseTimeout), 
+                  m_autoReconnect(autoReconnect), m_connectTimeout(connectTimeout) {}
+        
+        /**
+         * \brief Returns the list of ZK hosts to connect to.
+         */
+        string getHosts() const { return m_hosts; }
+        
+        /**
+         * \brief Returns the lease timeout.
+         */
+        int getLeaseTimeout() const { return m_leaseTimeout; }
+        
+        /**
+         * \brief Returns whether {@link ZooKeeperAdapter} should attempt 
+         * \brief to automatically reconnect in case of a connection failure.
+         */
+        bool getAutoReconnect() const { return m_autoReconnect; }
+
+        /**
+         * \brief Gets the connect timeout.
+         * 
+         * @return the connect timeout
+         */
+        long long int getConnectTimeout() const { return m_connectTimeout; }
+                  
+    private:
+        
+        /**
+         * The host addresses of ZK nodes.
+         */
+        const string m_hosts;
+
+        /**
+         * The ZK lease timeout.
+         */
+        const int m_leaseTimeout;
+        
+        /**
+         * True if this adapater should attempt to autoreconnect in case 
+         * the current session has been dropped.
+         */
+        const bool m_autoReconnect;
+        
+        /**
+         * How long to wait, in milliseconds, before a connection 
+         * is established to ZK.
+         */
+        const long long int m_connectTimeout;
+        
+};
+
+/**
+ * \brief A data value object representing a watcher event received from the ZK.
+ */
+class ZKWatcherEvent
+{
+    public:
+
+        /**
+         * \brief The type representing the user's context.
+         */
+        typedef void *ContextType;
+        
+        /**
+         * \brief Constructor.
+         * 
+         * @param type the type of this event
+         * @param state the state of this event
+         * @param path the corresponding path, may be empty for some event types
+         * @param context the user specified context; possibly NULL
+         */
+        ZKWatcherEvent() : 
+            m_type(-1), m_state(-1), m_path(""), mp_context(NULL) {}
+                        
+        /**
+         * \brief Constructor.
+         * 
+         * @param type the type of this event
+         * @param state the state of this event
+         * @param path the corresponding path, may be empty for some event types
+         * @param context the user specified context; possibly NULL
+         */
+        ZKWatcherEvent(int type, int state, const string &path, 
+                       ContextType context = NULL) :
+            m_type(type), m_state(state), m_path(path), mp_context(context) {}
+        
+        int getType() const { return m_type; }
+        int getState() const { return m_state; }
+        string const &getPath() const { return m_path; }
+        ContextType getContext() const { return mp_context; }
+        
+        bool operator==(const ZKWatcherEvent &we) const {
+            return m_type == we.m_type && m_state == we.m_state 
+                    && m_path == we.m_path && mp_context == we.mp_context;
+        }
+        
+    private:
+        
+        /**
+         * The type of this event. It can be either CREATED_EVENT, DELETED_EVENT,
+         * CHANGED_EVENT, CHILD_EVENT, SESSION_EVENT or NOTWATCHING_EVENT. 
+         * See zookeeper.h for more details.
+         */
+        const int m_type;
+        
+        /**
+         * The state of ZK at the time of sending this event.
+         * It can be either CONNECTING_STATE, ASSOCIATING_STATE, 
+         * CONNECTED_STATE, EXPIRED_SESSION_STATE or AUTH_FAILED_STATE.
+         * See {@file zookeeper.h} for more details.
+         */
+        const int m_state;
+        
+        /**
+         * The corresponding path of the node in subject. It may be empty
+         * for some event types.
+         */
+        const string m_path;
+        
+        /**
+         * The pointer to the user specified context, possibly NULL.
+         */
+        ContextType mp_context;
+        
+};
+
+/**
+ * \brief The type definition of ZK event source.
+ */
+typedef EventSource<ZKWatcherEvent> ZKEventSource;
+
+/**
+ * \brief The type definition of ZK event listener.
+ */
+typedef EventListener<ZKWatcherEvent> ZKEventListener;
+           
+/**
+ * \brief This is a wrapper around ZK C synchrounous API.
+ */
+class ZooKeeperAdapter
+    : public ZKEventSource
+{
+    public:
+        /**
+         * \brief The global function that handles all ZK asynchronous notifications.
+         */
+        friend void zkWatcher(zhandle_t *, int, int, const char *);
+        
+        /**
+         * \brief The type representing the user's context.
+         */
+        typedef void *ContextType;
+        
+        /**
+         * \brief The map type of ZK event listener to user specified context mapping.
+         */
+        typedef map<ZKEventListener *, ContextType> Listener2Context;
+        
+        /**
+         * \brief The map type of ZK path's to listener's contexts.
+         */
+        typedef map<string, Listener2Context> Path2Listener2Context;
+                  
+        /**
+         * \brief All possible states of this client, in respect to 
+         * \brief connection to the ZK server.
+         */
+        enum AdapterState {
+            //mp_zkHandle is NULL
+            AS_DISCONNECTED = 0,
+            //mp_zkHandle is valid but this client is reconnecting
+            AS_CONNECTING,
+            //mp_zkHandle is valid and this client is connected
+            AS_CONNECTED,
+            //mp_zkHandle is valid, however no more calls can be made to ZK API
+            AS_SESSION_EXPIRED
+        };
+                
+        /**
+         * \brief Constructor.
+         * Attempts to create a ZK adapter, optionally connecting
+         * to the ZK. Note, that if the connection is to be established
+         * and the given listener is NULL, some events may be lost, 
+         * as they may arrive asynchronously before this method finishes.
+         * 
+         * @param config the ZK configuration
+         * @param listener the event listener to be used for listening 
+         *                 on incoming ZK events;
+         *                 if <code>NULL</code> not used
+         * @param establishConnection whether to establish connection to the ZK
+         * 
+         * @throw ZooKeeperException if cannot establish connection to the given ZK
+         */
+        ZooKeeperAdapter(ZooKeeperConfig config, 
+                         ZKEventListener *listener = NULL,
+                         bool establishConnection = false) 
+            throw(ZooKeeperException);
+
+        /**
+         * \brief Destructor.
+         */
+        ~ZooKeeperAdapter(); 
+                  
+        /**
+         * \brief Returns the current config.
+         */
+        const ZooKeeperConfig &getZooKeeperConfig() const {
+            return m_zkConfig;                      
+        }
+
+        /**
+         * \brief Restablishes connection to the ZK. 
+         * If this adapter is already connected, the current connection 
+         * will be dropped and a new connection will be established.
+         * 
+         * @throw ZooKeeperException if cannot establish connection to the ZK
+         */
+        void reconnect() throw(ZooKeeperException);
+        
+        /**
+         * \brief Disconnects from the ZK and unregisters {@link #mp_zkHandle}.
+         */
+        void disconnect();
+        
+        /**
+         * \brief Creates a new node identified by the given path. 
+         * This method will optionally attempt to create all missing ancestors.
+         * 
+         * @param path the absolute path name of the node to be created
+         * @param value the initial value to be associated with the node
+         * @param flags the ZK flags of the node to be created
+         * @param createAncestors if true and there are some missing ancestor nodes, 
+         *        this method will attempt to create them
+         * 
+         * @return true if the node has been successfully created; false otherwise
+         * @throw ZooKeeperException if the operation has failed
+         */ 
+        bool createNode(const string &path, 
+                        const string &value = "", 
+                        int flags = 0, 
+                        bool createAncestors = true) 
+            throw(ZooKeeperException);
+                  
+        /**
+         * \brief Creates a new sequence node using the give path as the prefix.
+         * This method will optionally attempt to create all missing ancestors.
+         * 
+         * @param path the absolute path name of the node to be created; 
+         * @param value the initial value to be associated with the node
+         * @param flags the ZK flags of the sequence node to be created 
+         *              (in addition to SEQUENCE)
+         * @param createAncestors if true and there are some missing ancestor 
+         *                        nodes, this method will attempt to create them
+         * 
+         * @return the sequence number associate with newly created node,
+         *         or -1 if it couldn't be created
+         * @throw ZooKeeperException if the operation has failed
+         */ 
+        int64_t createSequence(const string &path, 
+                               const string &value = "", 
+                               int flags = 0, 
+                               bool createAncestors = true) 
+            throw(ZooKeeperException);
+        
+        /**
+         * \brief Deletes a node identified by the given path.
+         * 
+         * @param path the absolute path name of the node to be deleted
+         * @param recursive if true this method will attempt to remove 
+         *                  all children of the given node if any exist
+         * @param version the expected version of the node. The function will 
+         *                fail if the actual version of the node does not match 
+         *                the expected version
+         * 
+         * @return true if the node has been deleted; false otherwise
+         * @throw ZooKeeperException if the operation has failed
+         */
+        bool deleteNode(const string &path, bool recursive = false, int version = -1) 
+            throw(ZooKeeperException);
+        
+        /**
+         * \brief Checks whether the given node exists or not.
+         * 
+         * @param path the absolute path name of the node to be checked
+         * @param listener the listener for ZK watcher events; 
+         *                 passing non <code>NULL</code> effectively establishes
+         *                 a ZK watch on the given node
+         * @param context the user specified context that is to be passed
+         *                in a corresponding {@link ZKWatcherEvent} at later time; 
+         *                not used if <code>listener</code> is <code>NULL</code>
+         * @param stat the optional node statistics to be filled in by ZK
+         * 
+         * @return true if the given node exists; false otherwise
+         * @throw ZooKeeperException if the operation has failed
+         */
+        bool nodeExists(const string &path, 
+                        ZKEventListener *listener = NULL, 
+                        void *context = NULL,
+                        Stat *stat = NULL) 
+            throw(ZooKeeperException);
+
+        /**
+         * \brief Retrieves list of all children of the given node.
+         * 
+         * @param path the absolute path name of the node for which to get children
+         * @param listener the listener for ZK watcher events; 
+         *                 passing non <code>NULL</code> effectively establishes
+         *                 a ZK watch on the given node
+         * @param context the user specified context that is to be passed
+         *                in a corresponding {@link ZKWatcherEvent} at later time; 
+         *                not used if <code>listener</code> is <code>NULL</code>
+         * 
+         * @return the list of absolute paths of child nodes, possibly empty
+         * @throw ZooKeeperException if the operation has failed
+         */
+        void getNodeChildren(vector<string> &children,
+                             const string &path, 
+                             ZKEventListener *listener = NULL, 
+                             void *context = NULL) 
+            throw(ZooKeeperException);
+                
+        /**
+         * \brief Gets the given node's data.
+         * 
+         * @param path the absolute path name of the node to get data from
+         * @param listener the listener for ZK watcher events; 
+         *                 passing non <code>NULL</code> effectively establishes
+         *                 a ZK watch on the given node
+         * @param context the user specified context that is to be passed
+         *                in a corresponding {@link ZKWatcherEvent} at later time; 
+         *                not used if <code>listener</code> is <code>NULL</code>
+         * @param stat the optional node statistics to be filled in by ZK
+         * 
+         * @return the node's data
+         * @throw ZooKeeperException if the operation has failed
+         */
+        string getNodeData(const string &path, 
+                           ZKEventListener *listener = NULL, 
+                           void *context = NULL,
+                           Stat *stat = NULL) 
+            throw(ZooKeeperException);
+        
+        /**
+         * \brief Sets the given node's data.
+         * 
+         * @param path the absolute path name of the node to get data from
+         * @param value the node's data to be set
+         * @param version the expected version of the node. The function will 
+         *                fail if the actual version of the node does not match 
+         *                the expected version
+         * 
+         * @throw ZooKeeperException if the operation has failed
+         */
+        void setNodeData(const string &path, const string &value, int version = -1) 
+            throw(ZooKeeperException);
+        
+        /**
+         * \brief Validates the given path to a node in ZK.
+         * 
+         * @param the path to be validated
+         * 
+         * @throw ZooKeeperException if the given path is not valid
+         *        (for instance it doesn't start with "/")
+         */
+        static void validatePath(const string &path) throw(ZooKeeperException);
+
+        /**
+         * Returns the current state of this adapter.
+         * 
+         * @return the current state of this adapter
+         * @see AdapterState
+         */
+        AdapterState getState() const {
+            return m_state;
+        }          
+        
+    private:
+        
+        /**
+         * This enum defines methods from this class than can trigger an event.
+         */
+        enum WatchableMethod {
+            NODE_EXISTS = 0,
+            GET_NODE_CHILDREN,
+            GET_NODE_DATA
+        };
+                
+        /**
+         * \brief Creates a new node identified by the given path. 
+         * This method is used internally to implement {@link createNode(...)} 
+         * and {@link createSequence(...)}. On success, this method will set
+         * <code>createdPath</code>.
+         * 
+         * @param path the absolute path name of the node to be created
+         * @param value the initial value to be associated with the node
+         * @param flags the ZK flags of the node to be created
+         * @param createAncestors if true and there are some missing ancestor nodes, 
+         *        this method will attempt to create them
+         * @param createdPath the actual path of the node that has been created; 
+         *        useful for sequences
+         * 
+         * @return true if the node has been successfully created; false otherwise
+         * @throw ZooKeeperException if the operation has failed
+         */ 
+        bool createNode(const string &path, 
+                        const string &value, 
+                        int flags, 
+                        bool createAncestors,
+                        string &createdPath) 
+            throw(ZooKeeperException);
+        
+        /**
+         * Handles an asynchronous event received from the ZK.
+         */
+        void handleEvent(int type, int state, const string &path);
+        
+        /**
+         * Handles an asynchronous event received from the ZK.
+         * This method iterates over all listeners and passes the event 
+         * to each of them.
+         */
+        void handleEvent(int type, int state, const string &path, 
+                         const Listener2Context &listeners);        
+        
+        /**
+         * \brief Enqueues the given event in {@link #m_events} queue.
+         */
+        void enqueueEvent(int type, int state, const string &path);
+        
+        /**
+         * \brief Processes all ZK adapter events in a loop.
+         */
+        void processEvents();
+
+        /**
+         * \brief Processes all user events in a loop.
+         */
+        void processUserEvents();
+
+        /**
+         * \brief Registers the given context in the {@link #m_zkContexts} 
+         * \brief contexts map.
+         * 
+         * @param method the method where the given path is being used
+         * @param path the path of interest
+         * @param listener the event listener to call back later on
+         * @param context the user specified context to be passed back to user
+         */
+        void registerContext(WatchableMethod method, const string &path, 
+                             ZKEventListener *listener, ContextType context);
+        
+        /**
+         * \brief Attempts to find a listener to context map in the contexts' 
+         * \brief map, based on the specified criteria.
+         * If the context is found, it will be removed the udnerlying map.
+         * 
+         * @param method the method type identify Listener2Context map
+         * @param path the path to be used to search in the Listener2Context map
+         * 
+         * @return the context map associated with the given method and path, 
+         *         or empty map if not found
+         */
+        Listener2Context findAndRemoveListenerContext(WatchableMethod method, 
+                                                      const string &path);
+
+        /**
+         * Sets the new state in case it's different then the current one.
+         * This method assumes that {@link #m_stateLock} has been already locked.
+         * 
+         * @param newState the new state to be set
+         */
+        void setState(AdapterState newState); 
+        
+        /**
+         * Waits until this client gets connected. The total wait time 
+         * is given by {@link getRemainingConnectTimeout()}.
+         * If a timeout elapses, this method will throw an exception.
+         * 
+         * @throw ZooKeeperException if unable to connect within the given timeout
+         */
+        void waitUntilConnected() 
+            throw(ZooKeeperException);
+                                      
+        /**
+         * Verifies whether the connection is established,
+         * optionally auto reconnecting.
+         * 
+         * @throw ZooKeeperConnection if this client is disconnected
+         *        and auto-reconnect failed or was not allowed
+         */
+        void verifyConnection() throw(ZooKeeperException);
+
+        /**
+         * Returns the remaining connect timeout. The timeout resets
+         * to {@link #m_connectTimeout} on a successfull connection to the ZK.
+         * 
+         * @return the remaining connect timeout, in milliseconds
+         */
+        long long int getRemainingConnectTimeout() { 
+            return m_remainingConnectTimeout; 
+        }
+        
+        /**
+         * Resets the remaining connect timeout to {@link #m_connectTimeout}.
+         */
+        void resetRemainingConnectTimeout() { 
+            m_remainingConnectTimeout = m_zkConfig.getConnectTimeout(); 
+        }
+        
+        /**
+         * Updates the remaining connect timeout to reflect the given wait time.
+         * 
+         * @param time the time for how long waited so far on connect to succeed
+         */
+        void waitedForConnect(long long time) { 
+            m_remainingConnectTimeout -= time; 
+        }
+                
+    private:
+        
+        /**
+         * The mutex use to protect {@link #m_zkContexts}.
+         */
+        zkfuse::Mutex m_zkContextsMutex;
+        
+        /**
+         * The map of registered ZK paths that are being watched.
+         * Each entry maps a function type to another map of registered contexts.
+         * 
+         * @see WatchableMethod
+         */
+        map<int, Path2Listener2Context> m_zkContexts;
+        
+        /**
+         * The current ZK configuration.
+         */
+        const ZooKeeperConfig m_zkConfig;
+
+        /**
+         * The current ZK session.
+         */
+        zhandle_t *mp_zkHandle;
+        
+        /**
+         * The blocking queue of all events waiting to be processed by ZK adapter.
+         */
+        BlockingQueue<ZKWatcherEvent> m_events;
+        
+        /**
+         * The blocking queue of all events waiting to be processed by users
+         * of ZK adapter.
+         */
+        BlockingQueue<ZKWatcherEvent> m_userEvents;
+        
+        /**
+         * The thread that dispatches all events from {@link #m_events} queue.
+         */
+        CXXThread<ZooKeeperAdapter> m_eventDispatcher;
+
+        /**
+         * The thread that dispatches all events from {@link #m_userEvents} queue.
+         */
+        CXXThread<ZooKeeperAdapter> m_userEventDispatcher;
+                
+        /**
+         * Whether {@link #m_eventDispatcher} is terminating.
+         */
+        volatile bool m_terminating;
+        
+        /**
+         * Whether this adapter is connected to the ZK.
+         */
+        volatile bool m_connected;
+        
+        /**
+         * The state of this adapter.
+         */
+        AdapterState m_state;
+        
+        /**
+         * The lock used to synchronize access to {@link #m_state}.
+         */
+        Lock m_stateLock;
+
+        /**
+         * How much time left for the connect to succeed, in milliseconds.
+         */
+        long long int m_remainingConnectTimeout;
+                
+};
+        
+}   /* end of 'namespace zk' */
+
+#endif /* __ZKADAPTER_H__ */

+ 4492 - 0
src/contrib/zkfuse/src/zkfuse.cc

@@ -0,0 +1,4492 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define FUSE_USE_VERSION 26
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#undef _GNU_SOURCE
+#define _GNU_SOURCE
+
+extern "C" {
+#include <fuse.h>
+#include <ulockmgr.h>
+}
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <errno.h>
+#include <sys/time.h>
+#ifdef HAVE_SETXATTR
+#include <sys/xattr.h>
+#endif
+
+#include <getopt.h>
+
+#include <iostream>
+#include <sstream>
+#include <map>
+#include <string>
+#include <boost/utility.hpp>
+#include <boost/weak_ptr.hpp>
+
+#include "log.h"
+#include "mutex.h"
+#include "zkadapter.h"
+
+#define ZOOKEEPER_ROOT_CHILDREN_WATCH_BUG
+
+/**
+   Typedef for ZooKeeperAdapter::Data.
+*/
+typedef std::string Data;
+/**
+   Typedef for ZooKeeperAdapter::NodeNames.
+*/
+typedef vector<std::string> NodeNames;
+
+#define MAX_DATA_SIZE 1024;
+
+DEFINE_LOGGER(LOG, "zkfuse");
+
+inline 
+uint64_t millisecsToSecs(uint64_t millisecs)
+{
+    return millisecs / 1000;
+}
+inline
+uint64_t secsToMillisecs(uint64_t secs)
+{
+    return secs * 1000;
+}
+inline
+uint64_t nanosecsToMillisecs(uint64_t nanosecs)
+{
+    return nanosecs * 1000000;
+}
+inline
+uint64_t timespecToMillisecs(const struct timespec & ts)
+{ 
+    return secsToMillisecs(ts.tv_sec) + nanosecsToMillisecs(ts.tv_nsec);
+}
+
+typedef boost::shared_ptr<ZooKeeperAdapter> ZooKeeperAdapterSharedPtr;
+
+/**
+ * ZkFuseCommon - holds immutable configuration objects.
+ *
+ * No locks are required to access these objects.
+ * A ZkFuseCommon instance is considered to be a data object and may be copied.
+ */
+class ZkFuseCommon 
+{
+  private:
+    /**
+      References the ZooKeeperAdapter instance to be used.
+     */
+    ZooKeeperAdapterSharedPtr _zkAdapter;
+    /** 
+      Path to the ZooKeeper root node.
+     */
+    std::string _rootPathName;
+    /**
+      Name used to access data "file" when the ZK node has 
+      children.
+     */
+    std::string _dataFileName;
+    /**
+      Suffix added to path components to force interpretation of 
+      path components as directory. This is usually only required
+      for the last component. For example, ZkFuse may consider
+      a leaf node a regular file, e.g. /a/b/c/leaf. The suffix
+      can be used to create child under this node, e.g.
+      mkdir /a/b/c/leaf{forceDirSuffix}/new_leaf.
+     */
+    std::string _forceDirSuffix;
+    /**
+      Prefix common to all metadata nodes created by ZkFuse.
+     */  
+    std::string _metadataNamePrefix;
+    /**
+      Path component name that identifies a directory metadata node.
+      A directory metadata node is currently empty. It is used by ZkFuse
+      to create a child when mkdir is used. This prevents ZkFuse
+      from interpreting the new child as a regular file.
+     */
+    std::string _dirMetadataName;
+    /**
+      Path component name that identifies a regular file metadata node.
+      A regular metadata node holds metadata required to implement
+      Posix regular file semantics, such as setting mtime.
+     */
+    std::string _regMetadataName;
+    /**
+      Number of not-in-use nodes to cache.
+     */
+    unsigned _cacheSize;
+    /**
+      Assume this userid owns all nodes.
+     */
+    const uid_t _uid;
+    /**
+      Assume this groupid owns all nodes.
+     */
+    const gid_t _gid;
+    /**
+      Blocksize used to calculate number of blocks used for stat.
+     */
+    const unsigned _blkSize;
+
+  public:
+    /**
+      Constructor.
+     */
+    ZkFuseCommon()
+      : _zkAdapter(),
+        _rootPathName("/"),
+        _dataFileName(),
+        _forceDirSuffix(),
+        _metadataNamePrefix(".zkfuse."),
+        _dirMetadataName(_metadataNamePrefix + "dir"),
+        _regMetadataName(_metadataNamePrefix + "file"),
+        _cacheSize(256),
+        _uid(geteuid()),
+        _gid(getegid()),
+        _blkSize(8192)
+    {
+    }
+    /**
+      Get root path name. Always "/".
+      \see _rootPathName
+     */
+    const std::string & getRootPathName() const
+    {
+        return _rootPathName;
+    }
+    /**
+      Get dataFileName - the name for synthesized files to access
+      ZooKeeper node data.
+      \see _dataFileName
+     */
+    const std::string & getDataFileName() const
+    {
+        return _dataFileName;
+    }
+    /**
+      Set dataFileName.
+      \see getDataFileName
+      \see _dataFileName
+     */
+    void setDataFileName(const std::string & dataFileName)
+    {
+        _dataFileName = dataFileName;
+    }
+    /**
+      Get metadataNamePrefix - the common prefix for all ZkFuse created
+      metadata ZooKeeper nodes.
+      \see _metadataNamePrefix
+     */
+    const std::string & getMetadataNamePrefix() const
+    {
+        return _metadataNamePrefix;
+    }
+    /**
+      Get forceDirSuffix - the suffix added to a path component to force
+      the path component to be treated like a directory.
+      \see _forceDirSuffix
+     */
+    const std::string & getForceDirSuffix() const
+    {
+        return _forceDirSuffix;
+    }
+    /**
+      Set forceDirSuffix.
+      \see getForceDirSuffix
+      \see _forceDirSuffix
+     */
+    void setForceDirSuffix(const std::string & forceDirSuffix)
+    {
+        _forceDirSuffix = forceDirSuffix;
+    }
+    /**
+      Get dirMetadataName - path component name of all directory 
+      metadata ZooKeeper nodes. 
+      \see _dirMetadataname
+     */
+    const std::string & getDirMetadataName() const
+    {
+        return _dirMetadataName;
+    }
+    /**
+      Get regMetadataName - path component name of all regular file 
+      metadata ZooKeeper nodes. 
+      \see _regMetadataname
+     */
+    const std::string & getRegMetadataName() const
+    {
+        return _regMetadataName;
+    }
+    /**
+      Get number of not-in-use ZkFuseFile instances to to cache.
+      \see _cacheSize
+     */
+    unsigned getCacheSize() const
+    {
+        return _cacheSize;
+    }
+    /**
+      Set cache size.
+      \see getCacheSize
+      \see _cacheSize
+     */
+    void setCacheSize(unsigned v) 
+    {
+        _cacheSize = v;
+    }
+    /** 
+      Get userid.
+      \see _uid
+     */
+    uid_t getUid() const
+    {
+        return _uid;
+    }
+    /**
+      Get groupid.
+      \see _gid
+     */
+    gid_t getGid() const
+    {
+        return _gid;
+    }
+    /**
+      Get block size.
+      \see _blkSize
+     */
+    unsigned getBlkSize() const
+    {
+        return _blkSize;
+    }
+    /**
+      Get ZooKeeperAdapter.
+      \see _zkAdapter.
+     */
+    const ZooKeeperAdapterSharedPtr & getZkAdapter() const
+    {
+        return _zkAdapter;
+    }
+    /**
+      Set ZooKeeperAdapter.
+      \see _zkAdaptor
+     */
+    void setZkAdapter(const ZooKeeperAdapterSharedPtr & zkAdapter)
+    {
+        _zkAdapter = zkAdapter;
+    }
+};
+
+/**
+  ZkFuseNameType - identifies the type of the ZkFuse path.
+ */
+enum ZkFuseNameType {
+    /**
+      ZkFuse path is not syntheiszed. 
+      ZkFuse should use its default rules to determine the Posix representation
+      of the path.
+     */
+    ZkFuseNameDefaultType = 0, 
+    /**
+      ZkFuse path is synthesized and identifies the data part of a
+      ZooKeeper node, i.e.  Posix regular file semantics is expected.
+     */
+    ZkFuseNameRegType = 1,
+    /**
+      ZkFuse path is synthesized and identifies the chidlren part of a
+      ZooKeeper node, i.e.  Posix directory semantics is expected.
+     */
+    ZkFuseNameDirType = 2
+};
+
+class ZkFuseFile;
+
+typedef ZkFuseFile * ZkFuseFilePtr;
+
+class ZkFuseHandleManagerFactory;
+
+/**
+  ZkFuseHandleManager - keeps track of all the ZkFuseFile instances 
+  allocated by a ZkFuseHandleManager instance and provides them
+  with a handle that can be used by FUSE. 
+
+  It maps a ZooKeeper path to a handle and a handle to a ZkFuse instance.
+  It also implements the methods that takes path names as arguments, such
+  as open, mknod, rmdir, and rename.
+
+  Memory management
+  - References ZkFuseFile instances using regular pointers
+    Smart pointer is not used because reference counts are needed to
+    determine how many time a node is opened as a regular file or
+    directory. This also avoids circular smart pointer references.
+  - Each ZkFuseFile instance holds a reference to its ZkFuseHandleManager
+    using a boost::shared_ptr. This ensures that the ZkFuseHandleManager
+    instance that has the handle for the ZkFuseFile instance does not
+    get garbage collected while the ZkFuseFile instance exists.
+
+  Concurrency control
+  - Except for the immutable ZkFuseCommon, all other member variables
+    are protected by _mutex.
+  - A method in this class can hold _mutex when it directly or
+    indirectly invokes ZkFuseFile methods. A ZkFuseFile method that holds
+    a ZkFuseFile instance _mutex cannot invoke a ZkFuseHandleManager
+    method that acquires the ZkFuseHandleManager instance's _mutex.
+    Otherwise, this may cause a dead lock.
+  - Methods that with names that begin with "_" do not acquire _mutex. 
+    They are usually called by public methods that acquire and hold _mutex.
+ */
+class ZkFuseHandleManager : boost::noncopyable
+{
+  private:
+    /**
+      Typedef of handle, which is an int.
+     */
+    typedef int Handle;
+    /**
+      Typedef of std::map used to map path to handle.
+     */
+    typedef std::map<std::string, Handle> Map;
+    /**
+      Typedef of std::vector used to map handle to ZkFuseFile instances.
+     */
+    typedef std::vector<ZkFuseFilePtr> Files;
+    /**
+      Typedef of std::vector used to hold unused handles.
+     */
+    typedef std::vector<Handle> FreeList;
+    /**
+      Typedef of boost::weak_ptr to the ZkFuseHandleManager instance.
+     */
+    typedef boost::weak_ptr<ZkFuseHandleManager> WeakPtr;
+
+    /* Only ZkFuseHandleManagerFactory can create instances of this class */
+    friend class ZkFuseHandleManagerFactory;
+
+    /**
+      Contains common configuration.
+      Immutable so that it can be accessed without locks.
+     */
+    const ZkFuseCommon _common;
+    /**
+      Maps a path name to a Handle.
+     */
+    Map _map;
+    /**
+      Maps a handle to a ZkFuseFile instances.
+      Also holds pointers to all known ZkFuseFile instances.
+      An element may point to an allocated ZkFuseFile instance or be NULL.
+
+      An allocated ZkFuseFile instance may be in one of the following states:
+      - in-use
+        Currently open, i.e. the ZkFuseFile instance's reference count 
+        greater than 0.
+      - in-cache
+        Not currently open, i.e. the ZkFuseFile instances's 
+        reference count is 0.
+     */
+    Files _files;
+    /**
+      List of free'ed handles.
+     */
+    FreeList _freeList;
+    /**
+      Mutex used to protect this instance.
+     */
+    mutable zkfuse::Mutex _mutex;
+    /**
+      Count of number of in-use entries.
+      It used to calculate number of cached nodes.
+      Number cached nodes is (_files.size() - _numInUse).
+     */
+    unsigned _numInUse;
+    /**
+      WeakPtr to myself.
+     */
+    WeakPtr _thisWeakPtr;
+   
+    /**
+      Obtain a handle for the given path.
+      - If path is not known, then allocate a new handle and increment
+        _numInUse, and set newFile to true. The allocated 
+        ZkFuseFile instance's reference count should be 1.
+      - If path is known, increase the corresponding 
+        ZkFuseFile instance's reference count.
+
+      \return the allocated handle.
+      \param path the path to lookup.
+      \param newFile indicates whether a new handle has been allocated.
+     */
+    Handle allocate(const std::string & path, bool & newFile);
+
+    /**
+      Constructor.
+
+      \param common the immutable common configuration.
+      \param reserve number of elements to pre-allocate for 
+                     _files and _freeList.
+     */
+    ZkFuseHandleManager(
+            const ZkFuseCommon & common, 
+            const unsigned reserve) 
+      : _common(common),
+        _files(), 
+        _freeList(), 
+        _mutex(),
+        _numInUse(0)
+    {
+        _files.reserve(reserve);
+        _files[0] = NULL; /* 0 never allocated */
+        _files.resize(1); 
+        _freeList.reserve(reserve);
+    }
+
+  public:
+    /** 
+      Typedef for boost::shared_ptr for this ZkFuseHandleManager class.
+     */
+    typedef boost::shared_ptr<ZkFuseHandleManager> SharedPtr;
+
+    /**
+      Destructor.
+     */
+    ~ZkFuseHandleManager()
+    {
+    }
+    /** 
+      Get the ZkFuseFile instance for a handle.
+
+      \return the ZkFuseFile instance identified by the handle.
+      \param handle get ZkFuseFile instance for this handle.
+     */
+    ZkFuseFilePtr getFile(Handle handle) const
+    {
+        AutoLock lock(_mutex);
+        return _files[handle];
+    }
+    /**
+      Get the immutable common configuration.
+
+      \return the common configuration instance.
+     */
+    const ZkFuseCommon & getCommon() const
+    {
+        return _common;
+    }
+    /**
+      Deallocate a previously allocated handle.
+      This decrements the reference count of the corresponding
+      ZkFuseFile instance. If the reference count becomes zero,
+      decrement _numInUse. It may also cause the ZkFuseFile instance
+      to be reclaimed if there are too many cached ZkFuseFile instances.
+
+      The ZkFuseFile instance should be reclaimed if the number of
+      unused ZkFuseFile instances exceeds the configured cache size, i.e.
+      (_files.size() - _numInUse) > _common.getCacheSize()
+      and the ZkFuseFile instance has a reference count of zero.
+
+      Reclaiming a ZkFuseFile instance involves removing the ZkFuseFile
+      instance's path to handle mapping from _map and the handle to the 
+      ZkFuseFile instance mapping from _files, adding the handle to 
+      the _freeList, and finally deleting the ZkFuseFile instance.
+
+      \param handle the handle that should be deallocated.
+     */
+    void deallocate(Handle handle);
+    /**
+      Handles ZooKeeper session events.
+      It invokes the known ZkFuseFile instances to let them know
+      that their watches will no longer be valid. 
+     */
+    void eventReceived(const ZKWatcherEvent & event);
+    /**
+      Get data from the specified the ZooKeeper path.
+
+      \return 0 if successful, otherwise return negative errno.
+      \param path the path of the ZooKeeper node.
+      \param data return data read.
+     */
+    int getData(const std::string & path, Data & data);
+    /**
+      Set data into the specified ZooKeeper path.
+
+      \return 0 if successful, otherwise return negative errno.
+      \param path the path of the ZooKeeper node.
+      \param data the data to be written.
+      \param exists set to true if this path exists.
+      \param doFlush set to true if new data should be flushed to ZooKeeper.
+     */
+    int setData(const std::string & path,
+                const Data & data,
+                bool exists,
+                bool doFlush);
+    /**
+      Create a ZooKeeper node to represent a ZkFuse file or directory.
+
+      \return handle if successful, otherwise return negative errno.
+      \param path to create.
+      \param mode should be either S_IFDIR for directory or 
+                  S_IFREG for regular file.
+      \param mayExist if set and the ZooKeeper node already exist, return
+                      valid handle instead of -EEXIST.
+      \param created returns whether a new ZooKeeper node had been created.
+     */
+    int mknod(const std::string & path, 
+              mode_t mode, 
+              bool mayExist, 
+              bool & created);
+    /**
+      Open a ZooKeeper node.  
+
+      The justCreated argument is used to differentiate if the _deleted flag 
+      of the ZkFuseFile instance is to be trusted  (i.e. the path 
+      does not exist in ZooKeeper.) The _deleted flag is trusted 
+      if the ZkFuseFile instance is known to exist in ZooKeeper after
+      invoking ZooKeeper with the path. 
+      
+      If justCreated is true, then the ZkFuseFile instance was just created. 
+      The ZkFuseFile constructor sets the _deleted flag to true because 
+      path is not known to exist and hence should not be accessed. 
+      The justCreated flag will force the ZkFuseFile instance to invoke 
+      ZooKeeper to determine if the path exists.
+
+      \return handle if successful, otherwise return negative errno.
+      \param path the path to open.
+      \param justCreated indicates if this is newly created ZkFuseFile instance.
+     */
+    int open(const std::string & path, bool justCreated);
+    /**
+      Remove a ZkFuse directory.
+
+      If force is not set, then the ZooKeeper node will be removed only
+      if it has no data and no child nodes except ZkFuse metadata nodes.
+
+      \return 0 if successful, otherwise return negative errno.
+      \param path the path to remove.
+      \param force force removal, i.e. bypass checks.
+      */
+    int rmdir(const char * path, bool force = false);
+    /**
+      Make a ZkFuse directory.
+
+      ZkFuse represents a ZooKeeper node with no data and no children 
+      as a regular file. In order to differentiate a newly created
+      directory from an empty regular file, mkdir will create a directory
+      metadata node as a child of the directory.
+
+      \return 0 if successful, otherwise return negative errno.
+      \param path the path of the directory to create.
+      \param mode create directory with this mode 
+                  (mode currently not implemented).
+     */
+    int mkdir(const char * path, mode_t mode);
+    /**
+      Remove a ZkFuse regular file.
+
+      A file is the abstraction for the data part of a ZooKeeper node.
+      - If ZkFuse represents a ZooKeeper node as a directory, the data part
+        of the node is represented by synthesizing a name for this file. This
+        synthesized name is visible through readdir if the ZooKeeper node's
+        data is not empty. Removing such a file is done by truncating 
+        the ZooKeeper node's data to 0 length.
+      - If ZkFuse represents a ZooKeeper node as a file, then removing the
+        is done by removing the ZooKeeper node (and its metadata).
+
+      \return 0 if successful, otherwise return negative errno.
+      \param path the path of the file to remove.
+     */
+    int unlink(const char * path);
+    /**
+      Get attributes of a ZkFuse regular file or directory.
+
+      \return 0 if successful, otherwise return negative errno.
+      \param path get attributes for this path
+      \param stbuf store attributes here.
+     */
+    int getattr(const char * path, struct stat & stbuf);
+    /**
+      Rename a ZkFuse regular file.
+
+      It creates a new ZooKeeper node at toPath, copies data and file
+      metadata from the ZooKeeper node at fromPath to the new node, 
+      and deletes the current ZooKeeper node. If the current ZooKeeper 
+      node is not deleted if the new ZooKeeper node cannot be created 
+      or the data copy fails.
+
+      It cannot be used to rename a directory.
+
+      \return 0 if successful, otherwise return negative errno.
+      \param fromPath the current path.
+      \param toPath rename to this path.
+     */
+    int rename(const char * fromPath, const char * toPath);
+    /**
+      Add a child ZooKeeper path to the children information cache
+      of the ZkFuseFile instance that caches the parent ZooKeeper node.
+
+      This is used to add a child path after a new ZooKeeper node has
+      been created to the children information cache of the parent
+      ZooKeeper node. This is needed because waiting for the children
+      changed event to update the cache may result in inconsistent local
+      views of the changes.
+      \see removeChildFromParent
+
+      \parama childPath the path of the child ZooKeeper node.
+     */
+    void addChildToParent(const std::string & childPath) const;
+    /**
+      Remove a child ZooKeeper path from the children information cache
+      of the ZkFuseFile instance that caches the parent ZooKeeper node.
+      
+      For example, this should happen whenever a path is deleted.
+      This child information cache of the parent will eventually be 
+      invalidated by watches. However, the delivery of the children 
+      change event may come after the next access and thus provide 
+      the client with an inconsistent view. One example is that 
+      client deletes the last file in a directory, but the children
+      changed event is not delivered before the client invokes rmdir.
+      to remove the parent. In this case, the rmdir fails because 
+      the cached children information of the parent indicates the 
+      "directory" is not empty.
+
+      \param childPath the path of the child ZooKeeper node.
+     */
+    void removeChildFromParent(const std::string & childPath) const;
+    /**
+      Return the path for the parent of the specified ZooKeeper path.
+
+      \return the parent path.
+      \param childPath the child path.
+     */
+    std::string getParentPath(const std::string & childPath) const;
+    /**
+      Return the ZooKeeper path from a ZkFuse path.
+
+      The ZkFuse path may be a synthesized path. For example, a synthesized
+      path is required to access the data part of a ZooKeeper node's 
+      data when ZkFuse represents the ZooKeeper node as directory. 
+      A synthesized path is also required to create a child ZooKeeper node
+      under a ZooKeeper node that is represented by a regular file.
+
+      \return the ZooKeeper path for path.
+      \param path the ZkFuse path, which may be a synthesized path.
+      \param nameType indicate whether the ZkFuse path is synthesized and
+                      whether the synthesized ZkFuse path identifies a
+                      directory or a regular file.
+     */
+    std::string getZkPath(const char * path, ZkFuseNameType & nameType) const;
+};
+
+/**
+  ZkFuseHandleManagerFactory - factory for ZkFuseHandleManager.
+  
+  This is the only way to create a ZkFuseHandleManager instance. 
+  to make sure that _thisWeakPtr of the instance is intialized 
+  after the instance is created.
+ */
+class ZkFuseHandleManagerFactory
+{
+  public:
+    /**
+      Create an instance of ZkFuseHandleManager.
+      
+      \return the created ZkFuseHandleManager instance.
+      \param common the common configuration.
+      \param reserve initially reserve space for this number of handles.
+     */
+    static ZkFuseHandleManager::SharedPtr create(
+       const ZkFuseCommon & common, 
+       unsigned reserve = 1000)
+    {
+        ZkFuseHandleManager::SharedPtr manager
+            (new ZkFuseHandleManager(common, reserve));
+        manager->_thisWeakPtr = manager;
+        return manager;
+    }
+};
+
+/**
+  ZkFuseAutoHandle - automatically closes handle.
+
+  It holds an opened handle and automatically closes this handle
+  when it is destroyed. This enables code that open a handle
+  to be exception safe.
+ */
+class ZkFuseAutoHandle
+{
+  private:
+    /**
+      Typedef for Handle which is an int.
+     */
+    typedef int Handle;
+    /**
+      Holds a reference to the ZkFuseHandlerManager instance that
+      allocated the handle.
+     */
+    ZkFuseHandleManager::SharedPtr _manager;
+    /**
+      The handle that should be closed when this instance is destroyed.
+      A valid handle has value that is equal or greater than 0.
+      A negative value indicates an error condition, usually the value
+      is a negative errno.
+     */
+    Handle _handle;
+    /**
+      Caches a reference to the ZkFuseFile instance with this handle.
+      This is a performance optimization so that _manager.getFile(_handle) 
+      is only called once when the handle is initialized.
+     */
+    ZkFuseFilePtr _file;
+
+    /**
+      Initialize reference to the ZkFuseFile instance with this handle.
+     */
+    void _initFile()
+    {
+        if (_handle >= 0) {
+            _file = _manager->getFile(_handle);
+        } else {
+            _file = NULL;
+        }
+    }
+
+  public:
+    /**
+      Constructor - takes an previously opened handle.
+
+      \param manager the ZkFuseHandleManager instance who allocated the handle.
+      \param handle the handle.
+     */
+    ZkFuseAutoHandle(
+        const ZkFuseHandleManager::SharedPtr & manager, 
+        int handle)
+      : _manager(manager),
+        _handle(handle),
+        _file()
+    {
+        _initFile();
+    }
+    /**
+      Constructor - open path and remember handle.
+
+      \param manager the ZkFuseHandleManager instance who allocated the handle.
+      \param path open this path and remember its handle in this instance.
+     */
+    ZkFuseAutoHandle( 
+        const ZkFuseHandleManager::SharedPtr & manager, 
+        const std::string & path)
+      : _manager(manager),
+        _handle(_manager->open(path, false)),
+        _file()
+    {
+        _initFile();
+    }
+    /**
+      Constructor - create path and remember handle.
+
+      The creation mode indicates whether the path identifies a regular file
+      or a directory.
+
+      \param manager the ZkFuseHandleManager instance who allocated the handle.
+      \param path create this path and remember its handle in this instance.
+      \param mode the creation mode for the path, should be either
+                  S_IFDIR or S_IFDIR.
+      \param mayExist, if set and the path already exists, 
+                       then the ZkFuseAutoHandle will hold the handle
+                       for the path instead of -EEXIST.
+                       If not set and the path does not exist, then the handle
+                       be -EEXIST.
+     */
+    ZkFuseAutoHandle( 
+        const ZkFuseHandleManager::SharedPtr & manager, 
+        const std::string & path,
+        mode_t mode,
+        bool mayExist)
+      : _manager(manager),
+        _handle(-1),
+        _file()
+    {
+        bool created;
+        _handle = _manager->mknod(path, mode, mayExist, created);
+        _initFile();
+    }
+    /**
+      Destructor - closes the handle.
+     */
+    ~ZkFuseAutoHandle()
+    {
+        reset();
+    }
+    /**
+      Get the handle.
+      \see _handle
+     */
+    int get() const
+    {
+        return _handle;
+    }
+    /**
+      Get the ZkFuseFile instance of the handle.
+      \see _file
+     */
+    ZkFuseFilePtr getFile() const
+    {
+        return _file;
+    }
+    /**
+      Forget the handle, don't close the handle.
+     */
+    void release() 
+    {
+        _handle = -1;
+        _file = NULL;
+    }
+    /**
+      Change the remembered handle.
+
+      It will close the current handle (if valid).
+     */
+    void reset(int handle = -1);
+};
+
+/**
+  ZkFuseStat - C++ wrapper for ZooKeeper Stat.
+
+  This wrapper provides ZooKeeper Stat will constructors that
+  initializes the instance variables of Stat.
+ */
+class ZkFuseStat : public Stat 
+{
+  public:
+    /**
+      Constructor - clear instance variables.
+     */
+    ZkFuseStat() 
+    {
+        clear();
+    }
+    /**
+      Destructor - do nothing.
+     */
+    ~ZkFuseStat()
+    {
+    }
+    /**
+      Clear instance variables.
+     */
+    void clear()
+    {
+        czxid = 0;
+        mzxid = 0;
+        ctime = 0;
+        mtime = 0;
+        version = 0;
+        cversion = 0;
+        aversion = 0;
+    }
+};
+
+/**
+  ZkFuseFile - an instance encapsulates the runtime state of an allocated
+  ZooKeeper node.
+
+  Memory management
+  - Referenced by the ZkFuseHandleManager that created this instance.
+  - Uses boost::shared_ptr to reference the ZkFuseHandleManager that 
+    created this instance. This makes sure that this ZkFuseHandleManager
+    instance cannot be deleted when it has allocated ZkFuseFile instances.
+  - A ZkFuseHandleManager deletes itself if it can be reclaimed.
+    It can be reclaimed if it has no watches, its reference count is zero,
+    and the ZkFuseHandleManager instance would have more than the 
+    configured number of cached ZkFuseFile instances. 
+  - A ZkFuseFile instance cannot be deleted if it has active watches on
+    its ZooKeeper node. When one of its watches fires, the ZkFuseFile
+    instance must exist because one of its methods will be invoked 
+    to process the event. If the ZkFuseFile instance has been deleted,
+    the method will access previously freed memory.
+
+  Concurrency control
+  - _mutex protects the instance variables of an instance.
+  - Callers should assume that a public method will acquire _mutex. 
+  - Methods of this class may not hold _mutex while invoking an
+    ZkFuseHandleManager instance.
+  - Methods that with names that begin with "_" do not acquire _mutex. 
+    They are usually called by public methods that acquire and hold _mutex.
+*/
+class ZkFuseFile : boost::noncopyable
+{
+  public:
+    /**
+      Maximum size for the data part of a ZooKeeper node.
+     */
+    static const unsigned maxDataFileSize = MAX_DATA_SIZE;
+
+  private:
+    /**
+      Mode returned by getattr for a ZkFuse directory.
+     */
+    static const mode_t dirMode = (S_IFDIR | 0777);
+    /**
+      Mode returned by getattr for a ZkFuse regular file.
+     */
+    static const mode_t regMode = (S_IFREG | 0777);
+
+    /**
+      References the ZkFuseHandleManager that created this instance.
+     */
+    ZkFuseHandleManager::SharedPtr _manager;
+    /**
+      Handle for this instance.
+     */
+    const int _handle;
+    /**
+      Path of the ZooKeeper node represented by this instance.
+     */
+    const std::string _path;
+    /**
+      Mutex that protects the instance variables of this instance.
+     */
+    mutable zkfuse::Mutex _mutex;
+    /**
+      Reference count for this instance, i.e. the number of opens 
+      minus the number of closes.
+     */
+    int _refCount;
+    /**
+      Indicates whether the ZooKeeper node exist.
+      This flag allows caching of deleted ZooKeeper node to avoid
+      repeated ZooKeeper lookups for a non-existent path, and avoid
+      using cached information. 
+      
+      Its value is true if 
+      - it is verified to exist (by calling ZooKeeper), or
+      - it is existence is unknown because ZooKeeper has not been
+        invoked to verify its path's existence.
+     */
+    bool _deleted;
+    /**
+      Count of current number directory opens minus directory closes.
+     */
+    int _openDirCount;
+    /**
+      Indicates whether cached children information is valid.
+      
+      It is true if the cached children information is valid.
+     */
+    bool _initializedChildren;
+    /**
+      Indicates whether there is an outstanding children watch.
+
+      It is true if it has an outstanding children watch.
+     */
+    bool _hasChildrenListener;
+    /**
+      Cached children information. 
+
+      The cache is valid if _initializedChildren is true.
+     */
+    NodeNames _children;
+
+    /**
+      Indicates whether the cached data is valid.
+
+      It is true if the cached data and ZooKeeper Stat are valid.
+     */
+    bool _initializedData;
+    /**
+      Indicates whether there is an outstanding data watch.
+
+      It is true if it has an outstanding data watch.
+     */
+    bool _hasDataListener;
+    /**
+      Indicates whether the cached data (_activeData) has been modified.
+
+      It is true if the cached data has been modified.
+     */
+    bool _dirtyData;
+    /**
+      Currently active data.
+
+      To maintain atomicity of updates and emulate Posix semantics, 
+      when a ZkFuse file remains open, the same data will be accessed
+      by the file's clients. The data will be flushed to ZooKeeper when
+      the flush method is called. The flush method may be called
+      explicitly by a client or implicitly when the ZkFuse file is no 
+      longer currently open.
+
+      _activeData and _activeStat stores the data and ZooKeeper Stat
+      that will be accessed by the file's clients.
+
+      If there are changes when the ZkFuse file is open, new data is
+      cached as latest data (by _latestData and _latestStat).
+     */
+    Data _activeData;
+    /**
+      Currently active ZooKeeper Stat.
+      \see _activeData
+     */
+    ZkFuseStat _activeStat;
+    /**
+      Latest data.
+      This is either the same as _activeData or it is newer. It is newer
+      is it has been updated by event triggered by a data watch.
+     */
+    Data _latestData;
+    /**
+      Latest ZooKeeper data.
+      This is either the same as _activeStat or it is newer. It is newer
+      is it has been updated by event triggered by a data watch.
+     */
+    ZkFuseStat _latestStat;
+
+    /**
+      Get userid.
+
+      \return the userid.
+     */
+    uid_t _getUid() const
+    {
+        return _manager->getCommon().getUid();
+    }
+    /**
+      Get groupid.
+
+      \return the groupid.
+     */
+    gid_t _getGid() const
+    {
+        return _manager->getCommon().getGid();
+    }
+    /** 
+      Get block size.
+
+      \return the block size.
+     */
+    unsigned _getBlkSize() const
+    {
+        return _manager->getCommon().getBlkSize();
+    }
+    /**
+      Get number of children, include metadata children in the count.
+
+      \return the number of children including metadata children.
+     */
+    unsigned _numChildrenIncludeMeta() const
+    {
+        unsigned count = _children.size();
+        LOG_DEBUG(LOG, "numChildrenIncludeMeta() returns %u", count);
+        return count;
+    }
+    /**
+      Get number of children, exclude metadata children in the count.
+
+      \return the number of children excluding metadata children.
+     */
+    unsigned _numChildrenExcludeMeta() const
+    {
+        unsigned count = 0;
+        for (NodeNames::const_iterator it = _children.begin();
+             it != _children.end();
+             it++) {
+            if (!_isMeta(*it)) {
+                count++;
+            }
+        }
+        LOG_DEBUG(LOG, "numChildrenExcludeMeta() returns %u", count);
+        return count;
+    }
+    /**
+      Whether the ZooKeeper node has children, include metadata
+      children.
+
+      \return true if it has children including metadata children.
+     */
+    bool _hasChildrenIncludeMeta() const
+    { 
+        return _numChildrenIncludeMeta() != 0;
+    }
+    /**
+      Return true if the ZooKeeper node has children, include metadata
+      children.
+
+      \return true if it has children excluding metadata children.
+     */
+    bool _hasChildrenExcludeMeta() const
+    {
+        return _numChildrenExcludeMeta() != 0;
+    }
+    /**
+      Whether the ZooKeeper node has data.
+
+      \return true if _activeData is not empty.
+     */
+    bool _hasData() const
+    {
+        return _activeData.empty() == false;
+    }
+    /**
+      Whether the ZooKeeper node has child with the specified path.
+
+      \return true if the ZooKeeper node has a child with the specified path.
+      \param childPath the path of the child.
+     */
+    bool _hasChildPath(const std::string & childPath) const
+    {
+        bool hasChild =
+            std::find(_children.begin(), _children.end(), childPath) 
+            != _children.end();
+        LOG_DEBUG(LOG, "hasChild(childPath %s) returns %d", 
+                  childPath.c_str(), hasChild);
+        return hasChild;
+    }
+    /**
+      Whether the given path component is a ZkFuse synthesized path
+      component.
+
+      A ZkFuse synthesized path component will begin with 
+      the metadataNamePrefix obtained from the common configuration.
+      \see _metadataNamePrefix
+
+      \return true if the path component is a ZkFuse synthesized path
+                   component.
+      \param childName the path component to check if it is synthesized by
+                       ZkFuse.
+     */
+    bool _isMeta(const std::string & childName) const
+    {
+        bool isMeta;
+        const std::string & prefix = 
+            _manager->getCommon().getMetadataNamePrefix();
+        unsigned offset = 
+            (_path.length() > 1 ?
+             _path.length() + 1 :
+             1 /* special case for root dir */ ); 
+        unsigned minLength = offset + prefix.length();
+        if (childName.length() < minLength ||
+            childName.compare(offset, prefix.length(), prefix) != 0) {
+            isMeta = false;
+        } else {
+            isMeta = true;
+        }
+        LOG_DEBUG(LOG, "isMeta(childName %s) returns %d", 
+                  childName.c_str(), isMeta);
+        return isMeta;
+    }
+    /**
+      Build a path for a specific child of the ZooKeeper node.
+ 
+      This is done by appending "/" (unless it is the ZooKeeper node
+      is the root node) and the name of the child.
+
+      \return the path for the specified child of the ZooKeeper node.
+      \param name the name of the child.
+     */
+    std::string _getChildPath(const std::string & name) const
+    {
+        return buildChildPath(_path, name);
+    }
+    /**
+      Whether the ZooKeeper node has a regular file metadata child node.
+
+      \return true if the ZooKeeper node has a regular file metadata child
+                   node.
+     */
+    bool _hasRegMetadata() const
+    {
+        bool res = _hasChildPath(
+                _getChildPath(_manager->getCommon().getRegMetadataName()));
+        LOG_DEBUG(LOG, "hasRegMetadata() returns %d", res);
+        return res;
+    }
+    /**
+      Whether the ZooKeeper node has a directory metadata child node.
+
+      \return true if the ZooKeeper node has a directory metadata child
+                   node.
+     */
+    bool _hasDirMetadata() const
+    {
+        bool res = _hasChildPath(
+                _getChildPath(_manager->getCommon().getDirMetadataName()));
+        LOG_DEBUG(LOG, "hasDirMetadata() returns %d", res);
+        return res;
+    }
+    /** 
+      Whether ZkFuse should present the ZooKeeper node as a ZkFuse regular
+      file.
+     
+      It should be a ZkFuse regular file it has no children or its 
+      only children is its regular file metadata child node.
+
+      \return true if the Zookeeper node should be presented as a ZkFuse
+                   regular file.
+     */
+    bool _isReg() const
+    {
+        unsigned numChildrenIncludeMeta = _numChildrenIncludeMeta();
+        bool res =
+            (numChildrenIncludeMeta == 0) ||
+            (numChildrenIncludeMeta == 1 && _hasRegMetadata() == true);
+        LOG_DEBUG(LOG, "isReg() returns %d", res);
+        return res;
+    }
+    /**
+      Whether ZkFuse should present the ZooKeeper node as a ZkFuse directory.
+     
+      It should be a ZkFuse directory if it should not be presented as
+      a ZkFuse regular directory.
+      \see _isReg
+
+      \return true if the Zookeeper node should be presented as a ZkFuse
+                   directory.
+     */
+    bool _isDir() const 
+    {
+        return !_isReg();
+    }
+    /**
+      Whether ZkFuse should present the ZooKeeper node as a ZkFuse regular
+      file by taking into account the specified ZkFuseNameType.
+
+      The ZkFuseNameType may override the default ZkFuse presentation of
+      a ZooKeeper node. 
+
+      \return true if ZkFuse should present the ZooKeeper node as a ZkFuse
+                   regular file.
+      \param nameType specifies the ZkFuseNameType.
+      \param doLock whether _mutex should be acquired, it should be true
+                    if the caller did not acquire _mutex.
+     */
+    bool _isRegNameType(ZkFuseNameType nameType, bool doLock = false) const
+    {
+        bool res;
+        switch (nameType) {
+          case ZkFuseNameRegType:
+            res = true;
+            break;
+          case ZkFuseNameDirType:
+            res = false;
+            break;
+          case ZkFuseNameDefaultType:
+          default: 
+            if (doLock) {
+                AutoLock lock(_mutex);
+                res = _isReg();
+            } else {
+                res = _isReg();
+            }
+            break;
+        }
+        LOG_DEBUG(LOG, "isRegNameType(nameType %d) returns %d", 
+                  int(nameType), res);
+        return res;
+    }
+    /**
+      Whether ZkFuse should present the ZooKeeper node as a ZkFuse 
+      directory by taking into account the specified ZkFuseNameType.
+
+      The ZkFuseNameType may override the default ZkFuse presentation of
+      a ZooKeeper node. 
+
+      \return true if ZkFuse should present the ZooKeeper node as a ZkFuse
+                   directory.
+      \param nameType specifies the ZkFuseNameType.
+      \param doLock whether _mutex should be acquired, it should be true
+                    if the caller did not acquire _mutex.
+     */
+    bool _isDirNameType(ZkFuseNameType nameType, bool doLock = false) const
+    {
+        bool res;
+        switch (nameType) {
+          case ZkFuseNameRegType:
+            res = false; 
+            break;
+          case ZkFuseNameDirType:
+            res = true;
+            break;
+          case ZkFuseNameDefaultType:
+          default: 
+            if (doLock) {
+                AutoLock lock(_mutex);
+                res = _isDir();
+            } else {
+                res = _isDir();
+            }
+            break;
+        }
+        LOG_DEBUG(LOG, "isDirNameType(nameType %d) returns %d", 
+                  int(nameType), res);
+        return res;
+    }
+    /**
+      ZkFuse regular file metadata.
+     */
+    struct Metadata {
+        /**
+          Version of the ZooKeeper node data that this metadata is good for.
+         */
+        uint32_t version;
+        /**
+          Acces time in milliseconds.
+         */
+        uint64_t atime;
+        /**
+          Modified time in milliseconds.
+         */
+        uint64_t mtime;
+
+        /**
+          Constructor.
+         */
+        Metadata() 
+          : version(0),
+            atime(0),
+            mtime(0)
+        {
+        }
+    };
+    /**
+      Encode Metadata into Data so that it can be stored in a metadata
+      ZooKeeper node.
+
+      Each Metadata attribute is encoded as "<key>: <value>" on single line
+      terminated by newline.
+
+      \param meta the input Metadata.
+      \param data the output Data after encoding.
+     */
+    void _encodeMetadata(const Metadata & meta, Data & data) const
+    {
+        LOG_DEBUG(LOG, "encodeMetadata()");
+        std::ostringstream oss;
+        oss << "version: " << meta.version << endl
+            << "atime: " << meta.atime << endl
+            << "mtime: " << meta.mtime << endl;
+        data = oss.str();
+    }
+    /**
+      Decode Data from a metadata child ZooKeeper node into Metadata. 
+
+      Data is a stream of "<key>: <value>" records separated by newline.
+
+      \param data the input Data.
+      \param meta the output Metadata after decoding.
+     */
+    void _decodeMetadata(const Data & data, Metadata & meta) const
+    {
+        LOG_DEBUG(LOG, "decodeMetadata(data %s)", data.c_str());
+        std::istringstream iss(data);
+        char key[128];
+        char value[1024];
+        while (!iss.eof()) {
+            key[0] = 0;
+            value[0] = 0;
+            iss.get(key, sizeof(key), ' ');
+            if (iss.eof()) {
+                break;
+            }
+            iss.ignore(32, ' ');
+            iss.getline(value, sizeof(value));
+            LOG_DEBUG(LOG, "key %s value %s", key, value);
+            if (strcmp(key, "version:") == 0) {
+                unsigned long long v = strtoull(value, NULL, 0);
+                LOG_DEBUG(LOG, "version: %llu", v);
+                meta.version = v;
+            }
+            else if (strcmp(key, "atime:") == 0) {
+                unsigned long long v = strtoull(value, NULL, 0);
+                LOG_DEBUG(LOG, "atime: %llu", v);
+                meta.atime = v;
+            }
+            else if (strcmp(key, "mtime:") == 0) {
+                unsigned long long v = strtoull(value, NULL, 0);
+                LOG_DEBUG(LOG, "mtime: %llu", v);
+                meta.mtime = v;
+            }
+            else {
+                LOG_WARN(LOG, "decodeMetadata: path %s unknown key %s %s\n",
+                         _path.c_str(), key, value);
+            }
+        }
+        LOG_DEBUG(LOG, "decodeMetadata done");
+    }
+    /**
+      Flush data to the ZooKeeper node.
+
+      If cached active data has been modified, flush it to the ZooKeeper node.
+      Returns -EIO if the data cannot be written because the cached active
+      data is not the expected version, i.e. ZooKeeper returns ZBADVERSION.
+      -EIO may also indicate a more general failure, such as unable to 
+      communicate with ZooKeeper.
+
+      \return 0 if successful, otherwise negative errno.
+     */
+    int _flush()
+    {
+        LOG_DEBUG(LOG, "flush() path %s", _path.c_str());
+
+        int res = 0;
+        try {
+            if (_dirtyData) {
+                LOG_DEBUG(LOG, "is dirty, active version %d",
+                          _activeStat.version);
+                _manager->getCommon().getZkAdapter()->
+                    setNodeData(_path, _activeData, _activeStat.version);
+                /* assumes version always increments by one if successful */
+                _deleted = false;
+                _activeStat.version++;
+                _dirtyData = false;
+                res = 0;
+            } 
+            else {
+                LOG_DEBUG(LOG, "not dirty");
+                res = 0;
+            }
+        } catch (const ZooKeeperException & e) {
+            if (e.getZKErrorCode() == ZBADVERSION) {
+                LOG_ERROR(LOG, "flush %s bad version, was %d",
+                          _path.c_str(), _activeStat.version);
+                res = -EIO;
+            } 
+            else {
+                LOG_ERROR(LOG, "flush %s exception %s", 
+                          _path.c_str(), e.what());
+                res = -EIO;
+            }
+        }
+
+        LOG_DEBUG(LOG, "flush returns %d", res);
+        return res;
+    }
+    /**
+      Truncate or expand the size of the cached active data.
+
+      This method only changes the size of the cached active data. 
+      This change is committed to ZooKeeper when the cached data 
+      is written to the ZooKeeper node by flush().
+
+      Return -EFBIG is the requested size exceeds the maximum.
+
+      \return 0 if successful, otherwise negative errno.
+      \param size the requested size.
+     */
+    int _truncate(off_t size) 
+    {
+        LOG_DEBUG(LOG, "truncate(size %zu) path %s", size, _path.c_str());
+        
+        int res = 0;
+
+        if (!_isInitialized()) {
+            LOG_DEBUG(LOG, "not initialized");
+            res = -EIO;
+        }
+        else if (size > _activeData.size()) {
+            if (size > maxDataFileSize) {
+                LOG_DEBUG(LOG, "size > maxDataFileSize");
+                res = -EFBIG;
+            } else {
+                LOG_DEBUG(LOG, "increase to size");
+                _activeData.insert(_activeData.begin() + 
+                                   (size - _activeData.size()), 0);
+                _dirtyData = true;
+                res = 0;
+            }
+        }
+        else if (size < _activeData.size()) {
+            LOG_DEBUG(LOG, "decrease to size");
+            _activeData.resize(size);
+            _dirtyData = true;
+            res = 0;
+        }
+        else {
+            LOG_DEBUG(LOG, "do nothing, same size");
+        }
+
+        LOG_DEBUG(LOG, "truncate returns %d", res);
+        return res;
+    }
+    /**
+      Remove a ZkFuse directory.
+
+      If force is true, then the ZooKeeper node and its decendants
+      will be deleted.
+
+      If force is false, then this method implements the semantics
+      of removing a ZkFuse directory. It will delete the ZooKeeper node
+      only if the ZooKeeper node have no data and no non-metadata 
+      children.
+      - Return -ENOTDIR if the ZooKeeper node is not considered
+        to be a directory (after taking into consideration the specified
+        ZkFuseNameType). 
+      - Return -ENOTEMPTY if the ZooKeeper node has data or it has 
+        non-metadata children.
+      - Return -ENOENT if the ZooKeeper cannot be deleted, usually this
+        is because it does not exist.
+
+      \return 0 if successful, otherwise negative errno.
+      \param nameType the ZkFuseNameType of the path used to specify the
+                      directory to be removed. It influences whether ZkFuse
+                      considers the ZooKeeper node to be a regular file or
+                      directory. \see ZkFuseNameType
+      \param force    set to true to bypass ZkFuse rmdir semantic check.
+     */
+    int _rmdir(ZkFuseNameType nameType, bool force)
+    {
+        LOG_DEBUG(LOG, "rmdir(nameType %d, force %d) path %s", 
+                  int(nameType), force, _path.c_str());
+
+        int res = 0;
+        try {
+            if (!force && !_isDirNameType(nameType)) {
+                LOG_DEBUG(LOG, "failed because not directory");
+                res = -ENOTDIR;
+            } 
+            else if (!force && _hasData()) {
+                /* rmdir cannot occur if there non-empty "data file" */
+                LOG_DEBUG(LOG, "failed because node has data");
+                res = -ENOTEMPTY;
+            } 
+            else if (!force && _hasChildrenExcludeMeta()) {
+                /* rmdir cannot occur if there are "subdirs" */
+                LOG_DEBUG(LOG, "failed because node has children");
+                res = -ENOTEMPTY;
+            } 
+            else {
+                LOG_DEBUG(LOG, "delete node");
+                bool deleted = _manager->getCommon().getZkAdapter()->
+                     deleteNode(_path, true);
+                if (deleted) {
+                    _deleted = true;
+                    _clearChildren();
+                    res = 0;
+                } else {
+                    /* TODO: differentiate delete error conditions,
+                     * e.g. access permission, not exists, ... ?
+                     */
+                    LOG_DEBUG(LOG, "delete failed");
+                    res = -ENOENT;
+                }
+            }
+        } catch (const std::exception & e) {
+            LOG_ERROR(LOG, "rmdir %s exception %s", _path.c_str(), e.what());
+            res = -EIO;
+        }
+
+        LOG_DEBUG(LOG, "rmdir returns %d", res);
+        return res;
+    }
+    /**
+      Remove a ZkFuse regular file.
+
+      This method implements the semantics of removing a ZkFuse regular file.
+      - If the ZkFuse regular file represents the data part of the 
+        ZooKeeper node which is presented as a ZkFuse directory, 
+        the regular file is virtually deleted by truncating the
+        ZooKeeper node's data. Readdir will not synthesize a regular 
+        file entry for the data part of a ZooKeeper node if 
+        the ZooKeeper node has no data.
+      - If the ZkFuse regular file represents the data part of the 
+        ZooKeeper node which is presented as a ZkFuse regular file,
+        the ZooKeeper node and its decendants are deleted.
+
+      Returns -EISDIR if the ZkFuse regular file cannot be deleted
+      because ZkFuse consider it to be a directory.
+
+      \return 0 if successful, otherwise negative errno.
+      \param nameType the ZkFuseNameType of the path used to specify the
+                      directory to be removed. It influences whether ZkFuse
+                      considers the ZooKeeper node to be a regular file or
+                      directory. \see ZkFuseNameType
+    */
+    int _unlink(ZkFuseNameType nameType) 
+    {
+        LOG_DEBUG(LOG, "unlink(nameType %d) path %s", 
+                  int(nameType), _path.c_str());
+
+        int res = 0;
+        switch (nameType) {
+          case ZkFuseNameRegType:
+            if (_isDir()) {
+                res = _truncate(0);
+            } else {
+                res = _rmdir(nameType, true);
+            }
+            break;
+          case ZkFuseNameDirType:
+            res = -EISDIR;
+            break;
+          case ZkFuseNameDefaultType:
+          default:
+            if (_isReg()) {
+                res = _rmdir(nameType, true);
+            } else {
+                res = -EISDIR;
+            }
+            break;
+        }
+
+        LOG_DEBUG(LOG, "unlink returns %d", res);
+        return res;
+    }
+    /**
+      Whether cached children and data are valid.
+
+      \return true if cached children and data are valid.
+     */
+    bool _isInitialized() const
+    {
+        return _initializedChildren && _initializedData;
+    }
+    /**
+      Clear and invalidate cached children information.
+     */
+    void _clearChildren()
+    {
+        _initializedChildren = false;
+        _children.clear();
+    }
+    /**
+      Clear and invalidate cached data.
+     */
+    void _clearData() 
+    {
+        _initializedData = false;
+        _dirtyData = false;
+        _activeData.clear();
+        _activeStat.clear();
+        _latestData.clear();
+        _latestStat.clear();
+    }
+    /**
+      Whether the ZkFuseFile instance is a zombie.
+      
+      It is a zombie if it is not currently open, i.e. its reference count
+      is 0.
+     */
+    bool _isZombie() const 
+    {
+        return (_refCount == 0);
+    }
+    /**
+      Whether the ZkFuseFile instance is currently opened as a regular file
+      only once.
+      
+      It is used to determine when the cached data can be replaced with
+      the latest data. \see _activeData.
+      
+      \return true if its currently opened as a regular file only once.
+     */
+    bool _isOnlyRegOpen() const
+    {
+        return ((_refCount - _openDirCount) == 1);
+    }
+    /**
+      Get attributes without accessing metadata.
+      
+      The atime and mtime returned does not take into consideration
+      overrides present in a matadata file.
+
+      \return 0 if successful, otherwise negative errno.
+      \param stbuf return attributes here.
+      \param nameType specifies the ZkFuseNameType of the ZkFuse path used
+                      to get attributes. It influences whether the directory
+                      or regular file attributes are returned.
+     */
+    int _getattrNoMetaAccess(struct stat & stbuf, ZkFuseNameType nameType) const
+    {
+        int res = 0;
+        if (_deleted) {
+            LOG_DEBUG(LOG, "deleted");
+            res = -ENOENT;
+        } 
+        else if (!_isInitialized()) {
+            LOG_DEBUG(LOG, "not initialized");
+            res = -EIO;
+        }
+        else {   
+            assert(_isInitialized());
+            bool isRegular = _isRegNameType(nameType);
+            if (isRegular) {
+                LOG_DEBUG(LOG, "regular");
+                stbuf.st_mode = regMode;
+                stbuf.st_nlink = 1;
+                stbuf.st_size = _activeData.size();
+            } else {
+                LOG_DEBUG(LOG, "directory");
+                stbuf.st_mode = dirMode;
+                stbuf.st_nlink = 
+                    _children.size() + (_activeData.empty() ? 0 : 1);
+                stbuf.st_size = stbuf.st_nlink;
+            }
+            stbuf.st_uid = _getUid();
+            stbuf.st_gid = _getGid();
+            /* IMPORTANT:
+             * Conversion to secs from millisecs must occur before 
+             * assigning to st_atime, st_mtime, and st_ctime. Otherwise
+             * truncating from 64-bit to 32-bit will cause lost of
+             * most significant 32-bits before converting to secs.
+             */
+            stbuf.st_atime = millisecsToSecs(_activeStat.mtime);
+            stbuf.st_mtime = millisecsToSecs(_activeStat.mtime);
+            stbuf.st_ctime = millisecsToSecs(_activeStat.ctime);
+            stbuf.st_blksize = _getBlkSize();
+            stbuf.st_blocks = 
+                (stbuf.st_size + stbuf.st_blksize - 1) / stbuf.st_blksize;
+            res = 0;
+        }
+        return res;
+    }
+    /**
+      Get the context that should be registered with the data and
+      children watches.
+
+      The returned context is a pointer to the ZkFuseFile instance
+      cast to the desired ContextType.
+
+      \return the context.
+     */
+    ZooKeeperAdapter::ContextType _getZkContext() const
+    {
+        return (ZooKeeperAdapter::ContextType) NULL;
+    }
+
+    /**
+      DataListener - listener that listens for ZooKeeper data events
+      and calls dataEventReceived on the ZkFuseFile instance 
+      identified by the event context.
+      \see dataEventReceived
+     */
+    class DataListener : public ZKEventListener {
+      public:
+       /**
+         Received a data event and invoke ZkFuseFile instance obtained from
+         event context to handle the event.
+        */
+        virtual void eventReceived(const ZKEventSource & source,
+                                   const ZKWatcherEvent & event)
+        {
+            assert(event.getContext() != 0);
+            ZkFuseFile * file = static_cast<ZkFuseFile *>(event.getContext());
+            file->dataEventReceived(event);
+        }
+    };
+    
+    /**
+      DataListener - listener that listens for ZooKeeper children events
+      and calls childrenEventReceived on the ZkFuseFile instance 
+      identified by the event context.
+      \see childrenEventReceived
+     */
+    class ChildrenListener : public ZKEventListener {
+      public:
+       /**
+         Received a children event and invoke ZkFuseFile instance obtained from
+         event context to handle the event.
+        */
+        virtual void eventReceived(const ZKEventSource & source,
+                                   const ZKWatcherEvent & event)
+        {
+            assert(event.getContext() != 0);
+            ZkFuseFile * file = static_cast<ZkFuseFile *>(event.getContext());
+            file->childrenEventReceived(event);
+        }
+    };
+    
+    /**
+      Globally shared DataListener. 
+     */
+    static DataListener _dataListener;
+    /**
+      Globally shared ChildrenListener. 
+     */
+    static ChildrenListener _childrenListener;
+
+  public:
+    /**
+      Constructor.
+
+      Sets reference count to one, i.e. it has been constructed because
+      a client is trying to open the path. \see _refCount.
+      Sets deleted to true. \see _deleted.
+      Sets number of currently directory opens to zero. \see _openDirCount.
+      Invalidate cach for children information and data. 
+
+      \param manager the ZkFuseHandleManager instance who is creating this 
+                     ZkFuseFile instance.
+      \param handle  the handle assigned by the ZkFuseHandleManager instance
+                     for this ZkFuseFile instance.
+      \param path    the ZooKeeper path represented by this ZkFuseFile instance.
+     */
+    ZkFuseFile(const ZkFuseHandleManager::SharedPtr & manager,
+               const int handle,
+               const std::string & path)
+      : _manager(manager),
+        _handle(handle),
+        _path(path),
+        _mutex(),
+        _refCount(1),
+        _deleted(true),
+        /* children stuff */
+        _openDirCount(0),
+        _initializedChildren(false),
+        _hasChildrenListener(false),
+        _children(),
+        /* data stuff */
+        _initializedData(false),
+        _hasDataListener(false),
+        _dirtyData(false), 
+        _activeData(),
+        _activeStat(),
+        _latestData(),
+        _latestStat()
+    {
+        LOG_DEBUG(LOG, "constructor() path %s", _path.c_str());
+    }
+    /**
+      Destructor.
+     */
+    ~ZkFuseFile()
+    {
+        LOG_DEBUG(LOG, "destructor() path %s", _path.c_str());
+
+        assert(_isZombie());
+        _clearChildren();
+        _clearData();
+    }
+    /**
+      Whether the ZooKeeper node represented by this ZkFuseFile instance
+      has been deleted.
+      \see _deleted
+
+      \return true if it is deleted.
+     */
+    bool isDeleted() const 
+    { 
+        AutoLock lock(_mutex);
+        return _deleted;
+    }
+    /**
+      Return the path of the ZooKeeper node represented by this ZkFuseFile
+      instance.
+      \see _path.
+
+      \return the ZooKeeper node's path.
+     */
+    const string & getPath() const 
+    {
+        return _path;
+    }
+    /**
+      Add a childPath to the children information cache.
+      
+      \return 0 if successful, otherwise return negative errno.
+      \param childPath the ZooKeeper path of the child.
+     */
+    int addChild(const std::string & childPath) 
+    {
+        LOG_DEBUG(LOG, "addChild(childPath %s) path %s", 
+                  childPath.c_str(), _path.c_str());
+
+        int res = 0;
+        {
+            AutoLock lock(_mutex);
+            if (_initializedChildren) {
+                NodeNames::iterator it = 
+                    std::find(_children.begin(), _children.end(), childPath);
+                if (it == _children.end()) {
+                    LOG_DEBUG(LOG, "child not found, adding child path");
+                    _children.push_back(childPath);
+                    res = 0;
+                } 
+                else {
+                    LOG_DEBUG(LOG, "child found");
+                    res = -EEXIST;
+                }
+            }
+        }
+        
+        LOG_DEBUG(LOG, "addChild returns %d", res);
+        return res;
+    }
+    /**
+      Remove a childPath from the children information cache.
+      
+      \return 0 if successful, otherwise return negative errno.
+      \param childPath the ZooKeeper path of the child.
+     */
+    int removeChild(const std::string & childPath) 
+    {
+        LOG_DEBUG(LOG, "removeChild(childPath %s) path %s", 
+                  childPath.c_str(), _path.c_str());
+
+        int res = 0;
+        {
+            AutoLock lock(_mutex);
+            if (_initializedChildren) {
+                NodeNames::iterator it = 
+                    std::find(_children.begin(), _children.end(), childPath);
+                if (it != _children.end()) {
+                    LOG_DEBUG(LOG, "child found");
+                    _children.erase(it);
+                    res = 0;
+                } 
+                else {
+                    LOG_DEBUG(LOG, "child not found");
+                    res = -ENOENT;
+                }
+            }
+        }
+        
+        LOG_DEBUG(LOG, "removeChild returns %d", res);
+        return res;
+    }
+    /**
+      Invalidate the cached children information and cached data.
+      \see _clearChildren
+      \see _clearData
+
+      \param clearChildren set to true to invalidate children information cache.
+      \param clearData set to true to invalidate data cache.
+     */
+    void clear(bool clearChildren = true, bool clearData = true)
+    {
+        LOG_DEBUG(LOG, "clear(clearChildren %d, clearData %d) path %s", 
+                  clearChildren, clearData, _path.c_str());
+
+        {
+            AutoLock lock(_mutex);
+            if (clearChildren) {
+                _clearChildren();
+            }
+            if (clearData) {
+                _clearData();
+            }
+        }
+    }
+    /** 
+      Whether reference count is zero.
+      \see _refCount
+
+      \return true if reference count is zero.
+     */
+    bool isZombie() const 
+    {
+        AutoLock lock(_mutex);
+
+        return (_refCount == 0);
+    }
+    /**
+      Increment the reference count of the ZkFuseFile instance.
+
+      This method may be called by a ZkFuseFileManager instance while
+      holding the ZkFuseFileManager's _mutex. To avoid deadlocks, 
+      this methods must never invoke a ZkFuseFileManager instance 
+      directly or indirectly while holding the ZkFuseFile instance's
+      _mutex.
+      \see _refCount
+
+      \return the post-increment reference count.
+      \param count value to increment the reference count by.
+     */
+    int incRefCount(int count = 1)
+    {
+        LOG_DEBUG(LOG, "incRefCount(count %d) path %s", count, _path.c_str());
+
+        int res = 0;
+        {
+            AutoLock lock(_mutex);
+            _refCount += count;
+            assert(_refCount >= 0);
+            res = _refCount;
+        }
+
+        LOG_DEBUG(LOG, "incRefCount returns %d", res); 
+        return res;
+    }
+    /**
+      Decrement the reference count of the ZkFuseFile instance.
+
+      This method may be called by a ZkFuseFileManager instance while
+      holding the ZkFuseFileManager's _mutex. To avoid deadlocks, 
+      this methods must never invoke a ZkFuseFileManager instance 
+      directly or indirectly while holding the ZkFuseFile instance's
+      _mutex.
+      \see _refCount
+
+      \return the post-decrement reference count.
+      \param count value to decrement the reference count by.
+     */
+    int decRefCount(int count = 1)
+    {
+        return incRefCount(-count);
+    }
+    /**
+      Increment the count of number times the ZkFuseFile instance has
+      been opened as a directory.
+      
+      This count is incremented by opendir and decremented by releasedir.
+      \see _openDirCount.
+
+      \return the post-increment count.
+      \param count the value to increment the count by.
+     */
+    int incOpenDirCount(int count = 1)
+    {
+        LOG_DEBUG(LOG, "incOpenDirCount(count %d) path %s", 
+                  count, _path.c_str());
+
+        int res = 0;
+        {
+            AutoLock lock(_mutex);
+            _openDirCount += count;
+            assert(_openDirCount >= 0);
+            res = _openDirCount;
+            assert(_openDirCount <= _refCount);
+        }
+
+        LOG_DEBUG(LOG, "incOpenDirCount returns %d", res); 
+        return res;
+
+    }
+    /**
+      Decrement the count of number times the ZkFuseFile instance has
+      been opened as a directory.
+      
+      This count is incremented by opendir and decremented by releasedir.
+      \see _openDirCount.
+
+      \return the post-decrement count.
+      \param count the value to decrement the count by.
+     */
+    int decOpenDirCount(int count = 1)
+    {
+        return incOpenDirCount(-count);
+    }
+    /**
+      Whether ZkFuse should present the ZooKeeper node as a ZkFuse 
+      directory by taking into account the specified ZkFuseNameType.
+
+      The ZkFuseNameType may override the default ZkFuse presentation of
+      a ZooKeeper node. 
+      \see _isDirNameType
+
+      \return true if ZkFuse should present the ZooKeeper node as a ZkFuse
+                   directory.
+      \param nameType specifies the ZkFuseNameType.
+     */
+    bool isDirNameType(ZkFuseNameType nameType) const
+    {
+        return _isDirNameType(nameType, true);
+    }
+    /**
+      Whether ZkFuse should present the ZooKeeper node as a ZkFuse 
+      regular file by taking into account the specified ZkFuseNameType.
+
+      The ZkFuseNameType may override the default ZkFuse presentation of
+      a ZooKeeper node. 
+      \see _isRegNameType
+
+      \return true if ZkFuse should present the ZooKeeper node as a ZkFuse
+                   regular file.
+      \param nameType specifies the ZkFuseNameType.
+     */
+    bool isRegNameType(ZkFuseNameType nameType) const
+    {
+        return _isRegNameType(nameType, true);
+    }
+    /**
+      Get the active data.
+      \see _activeData
+
+      \param data return data here.
+     */
+    void getData(Data & data) const
+    {
+        AutoLock lock(_mutex);
+
+        data = _activeData;
+    }
+    /**
+      Set the active data.
+      \see _activeData
+
+      Return -EFBIG is the data to be written is bigger than the maximum
+      permitted size (and no data is written).
+
+      \return 0 if successful, otherwise return negative errno.
+      \param data set to this data.
+      \param doFlush whether to flush the data to the ZooKeeper node.
+     */
+    int setData(const Data & data, bool doFlush)
+    {
+        LOG_DEBUG(LOG, "setData(doFlush %d) path %s", doFlush, _path.c_str());
+        int res = 0;
+
+        if (data.size() > maxDataFileSize) {
+            res = -EFBIG;
+        } 
+        else {
+            AutoLock lock(_mutex);
+            _activeData = data;
+            _dirtyData = true;
+            if (doFlush) {
+                res = _flush();
+            }
+        }
+
+        LOG_DEBUG(LOG, "setData() returns %d", res);
+        return res;
+    }
+    /**
+      Update the children information and the data caches as needed.
+
+      This method is invoked when a ZkFuse regular file or directory 
+      implemented by this ZkFuseFile instance is opened, e.g.
+      using open or opendir. It attempts to:
+      - make sure that the cache has valid children information
+      - register for watches for changes if no previous watches have
+        been registered.
+
+      The newFile flag indicates if the ZkFuseFile instance has just
+      been constructed and that ZooKeeper has not been contacted to
+      determine if the ZooKeeper path for this file really exist.
+      When a ZkFuseFile instance is created, the _deleted flag is set to
+      true because it is safer to assume that the ZooKeeper node does
+      not exist. The newFile flag causes the _deleted flag to be
+      ignored and ZooKeeper to be contacted to update the caches.
+
+      If the newFile flag is false, then the ZkFuseFile instance is
+      currently open and have been opened before. Hence, these previous
+      opens should have contacted ZooKeeper and would like learned from
+      ZooKeeper whether the ZooKeeper path exists. Therefore, 
+      the _deleted flag should be trustworthy, i.e. it has accurate 
+      information on whether the ZooKeeper path actually exists.
+
+      \return 0 if successful, otherwise return negative errno.
+      \param newFile set to true if the ZkFuseFile instance is newly created.
+     */
+    int update(bool newFile)
+    {
+        LOG_DEBUG(LOG, "update(newFile %d) path %s", newFile, _path.c_str());
+
+        int res = 0;
+        {
+            AutoLock lock(_mutex);
+
+            /* At this point, cannot be zombie.
+             */
+            assert(!_isZombie());
+            if (!newFile && _deleted) {
+                /* Deleted file, don't bother to update caches */
+                LOG_DEBUG(LOG, "deleted, not new file"); 
+                res = -ENOENT;
+            }
+            else {
+                try {
+                    LOG_DEBUG(LOG, "initialized children %d, data %d",
+                              _initializedChildren, _initializedData);
+                    LOG_DEBUG(LOG, "has children watch %d, data watch %d",
+                              _hasChildrenListener, _hasDataListener);
+                    /*
+                     * Children handling starts here.
+                     * If don't have children listener,
+                     *    then must establish listener.
+                     * If don't have cached children information, 
+                     *    then must get children information. 
+                     * It just happens, that the same ZooKeeper API 
+                     * is used for both.
+                     */
+                    if (_initializedChildren == false ||
+                        _hasChildrenListener == false
+#ifdef ZOOKEEPER_ROOT_CHILDREN_WATCH_BUG
+                        /* HACK for root node because changes to children
+                         * on a root node does not cause children watches to
+                         * fire.
+                         */
+                        || _path.length() == 1
+#endif // ZOOKEEPER_ROOT_CHILDREN_WATCH_BUG
+                    ) {
+                        LOG_DEBUG(LOG, "update children");
+                        NodeNames children;
+                        _manager->getCommon().getZkAdapter()->
+                          getNodeChildren( children, _path, 
+                                          &_childrenListener, _getZkContext());
+                        _hasChildrenListener = true;
+                        LOG_DEBUG(LOG, "update children done"); 
+                        _children.swap(children);
+                        _initializedChildren = true;
+                        /* Since getNodeChildren is successful, the
+                         * path must exist */
+                        _deleted = false;
+                    }
+                    else {
+                        /* Children information is fresh since 
+                         * it is initialized and and have been 
+                         * updated by listener.
+                         */
+                    }
+                    /*
+                     * Data handling starts here.
+                     */
+                    assert(newFile == false || _isOnlyRegOpen());
+                    if (!_isOnlyRegOpen()) {
+                        /* If is already currently opened by someone,
+                         * then don't update data with latest from ZooKeeper,
+                         * use current active data (which may be initialized 
+                         * or not).
+                         * \see _activeData
+                         */
+                        LOG_DEBUG(LOG, "node currently in-use, no data update");
+                    } 
+                    else {
+                        /* If not opened/reopened by someone else, 
+                         *    then perform more comprehensive checks of
+                         *    to make data and listener is setup correctly.
+                         * If don't have data listener,
+                         *    then must establish listener.
+                         * If don't have cached data, 
+                         *    then must get data.
+                         * It just happens, that the same ZooKeeper API 
+                         * is used for both.  
+                         */
+                        LOG_DEBUG(LOG, "node first use or reuse");
+                        if (_initializedData == false ||
+                            _hasDataListener == false) {
+                            /* Don't have any data for now or need to register
+                             * for callback */
+                            LOG_DEBUG(LOG, "update data");
+                            _latestData = 
+                                _manager->getCommon().getZkAdapter()->
+                                getNodeData(_path, &_dataListener, 
+                                            _getZkContext(), 
+                                            &_latestStat);
+                            _hasDataListener = true;
+                            LOG_DEBUG(LOG, 
+                                      "update data done, latest version %d",
+                                      _latestStat.version);
+                            /* Since getNodeData is successful, the
+                             * path must exist. */
+                            _deleted = false;
+                        } 
+                        else {
+                            /* Data is fresh since it is initialized and
+                             * and have been updated by listener.
+                             */
+                        }
+                        /* Update active data to the same as the most 
+                         * recently acquire data.
+                         */
+                        _activeData = _latestData;
+                        _activeStat = _latestStat;
+                        _initializedData = true;
+                        _dirtyData = false;
+                        LOG_DEBUG(LOG, "update set active version %d",
+                                  _activeStat.version);
+                    } 
+                    res = 0;
+                } catch (const ZooKeeperException & e) {
+                    /* May have ZNONODE exception if path does exist. */
+                    if (e.getZKErrorCode() == ZNONODE) {
+                        LOG_DEBUG(LOG, "update %s exception %s", 
+                                  _path.c_str(), e.what());
+                        /* Path does not exist, set _deleted, 
+                         * clear children information cache 
+                         */
+                        _deleted = true;
+                        _clearChildren();
+                        res = -ENOENT;
+                    } else {
+                        LOG_ERROR(LOG, "update %s exception %s", 
+                                  _path.c_str(), e.what());
+                        res = -EIO;
+                    }
+                }
+            }
+        }
+    
+        LOG_DEBUG(LOG, "update returns %d", res);
+        return res;
+    }
+    /**
+      Process a data event.
+
+      This method may:
+      - Invalidate the data cache.
+      - Invoke ZooKeeper to update the data cache and register a new
+        data watch so that the cache can be kept in-sync with the
+        ZooKeeper node's data.
+
+      This method does not change the active data. Active data will be
+      changed to a later version by update() at the appropriate time.
+      \see update.
+     */
+    void dataEventReceived(const ZKWatcherEvent & event) 
+    {
+        bool reclaim = false;
+        int eventType = event.getType();
+        int eventState = event.getState();
+
+        /*
+          IMPORTANT: 
+          
+          Do not mark ZkFuseFile instance as deleted when a DELETED_EVENT 
+          is received without checking with ZooKeeper. An example of 
+          problematic sequence would be:
+
+          1. Create node.
+          2. Set data and watch.
+          3. Delete node.
+          4. Create node.
+          5. Deleted event received.
+
+          It is a bug to mark the ZkFuseFile instance as deleted after 
+          step 5 because the node exists.
+          
+          Therefore, this method should always contact ZooKeeper to keep the
+          data cache (and deleted status) up-to-date if necessary.
+         */
+        LOG_DEBUG(LOG, "dataEventReceived() path %s, type %d, state %d",
+                  _path.c_str(), eventType, eventState);
+        {
+            AutoLock lock(_mutex);
+
+            _hasDataListener = false;
+            /* If zombie, then invalidate cached data.
+             * This clears _initializedData and eliminate 
+             * the need to get the latest data from ZooKeeper and
+             * re-register data watch. 
+             */
+            if (_isZombie() && _initializedData) {
+                LOG_DEBUG(LOG, "invalidate data");
+                _clearData();
+            }
+            else if ((_refCount - _openDirCount) > 0) {
+                /* Don't invalidate cached data because clients of currently
+                 * open files don't expect the data to change from under them.
+                 * If data acted upon by these clients have become stale,
+                 * then the clients will get an error when ZkFuse attempts to
+                 * flush dirty data. The clients will not get error 
+                 * notification if they don't modify the stale data.
+                 *
+                 * If data cache is cleared here, then the following code 
+                 * to update data cache and re-register data watch will not 
+                 * be executed and may result in the cached data being
+                 * out-of-sync with ZooKeeper.
+                 */
+                LOG_WARN(LOG, 
+                         "%s data has changed while in-use, "
+                         "type %d, state %d, refCount %d",
+                         _path.c_str(), eventType, eventState, _refCount);
+            }
+            /* If cache was valid and still connected
+             * then get the latest data from ZooKeeper 
+             * and re-register data watch. This is required to keep 
+             * the data cache in-sync with ZooKeeper.
+             */ 
+            if (_initializedData && 
+                eventState == CONNECTED_STATE 
+               ) {
+                try {
+                    LOG_DEBUG(LOG, "register data watcher");
+                    _latestData = 
+                        _manager->getCommon().getZkAdapter()->
+                        getNodeData(_path, &_dataListener, _getZkContext(), 
+                                    &_latestStat);
+                    _hasDataListener = true;
+                    LOG_DEBUG(LOG, 
+                              "get data done, version %u, cversion %u done",
+                              _latestStat.version, _latestStat.cversion);
+                    _deleted = false;
+                } catch (const ZooKeeperException & e) {
+                    if (e.getZKErrorCode() == ZNONODE) {
+                        _deleted = true;
+                        _clearChildren();
+                    }
+                    LOG_ERROR(LOG, "dataEventReceived %s exception %s", 
+                              _path.c_str(), e.what());
+                }
+            }
+        }
+        LOG_DEBUG(LOG, "dataEventReceived return %d", reclaim);
+    }
+    /**
+      Process a children event.
+
+      This method may:
+      - Invalidate the children information cache.
+      - Invoke ZooKeeper to update the children cache and register a new
+        data watch so that the cache can be kept in-sync with the
+        ZooKeeper node's children information.
+     */
+    void childrenEventReceived(const ZKWatcherEvent & event) 
+    {
+        bool reclaim = false;
+        int eventType = event.getType();
+        int eventState = event.getState();
+
+        LOG_DEBUG(LOG, "childrenEventReceived() path %s, type %d, state %d",
+                  _path.c_str(), eventType, eventState);
+        {
+            AutoLock lock(_mutex);
+
+            _hasChildrenListener = false;
+            /* If zombie or disconnected, then invalidate cached children 
+             * information. This clears _initializedChildren and eliminate 
+             * the need to get the latest children information and
+             * re-register children watch.
+             */
+            if (_initializedChildren && 
+                (_isZombie() || eventState != CONNECTED_STATE)) {
+                LOG_DEBUG(LOG, "invalidate children");
+                _clearChildren();
+            }
+            else if (_initializedChildren) {
+                /* Keep cached children information so that we have some
+                 * children information if get new children information
+                 * fails. If there is failure, then on next open, 
+                 * update() will attempt again to get children information
+                 * again because _hasChildrenListener will be false.
+                 *
+                 * If children information cache is cleared here, then
+                 * the following code to update children information cache
+                 * and re-register children watch will not be executed
+                 * and may result in the cached children information being
+                 * out-of-sync with ZooKeeper.
+                 *
+                 * The children cache will be cleared if unable to 
+                 * get children and re-establish watch.
+                 */
+                LOG_WARN(LOG, 
+                         "%s children has changed while in-use, "
+                         "type %d, state %d, refCount %d",
+                         _path.c_str(), eventType, eventState, _refCount);
+            }
+            /* If children cache was valid and still connected, 
+             * then get the latest children information from ZooKeeper 
+             * and re-register children watch. This is required to 
+             * keep the children information cache in-sync with ZooKeeper.
+             */ 
+            if (_initializedChildren && 
+                eventState == CONNECTED_STATE 
+               ) {
+                /* Should try to keep the cache in-sync, register call 
+                 * callback again and get current children.
+                 */ 
+                try {
+                    LOG_DEBUG(LOG, "update children");
+                    NodeNames children;
+                    _manager->getCommon().getZkAdapter()->
+                      getNodeChildren(children, _path, 
+                                      &_childrenListener, _getZkContext());
+                    _hasChildrenListener = true;
+                    LOG_DEBUG(LOG, "update children done");
+                    _children.swap(children);
+                    _deleted = false;
+                } catch (const ZooKeeperException & e) {
+                    if (e.getZKErrorCode() == ZNONODE) {
+                        _deleted = true;
+                        _clearChildren();
+                    }
+                    LOG_ERROR(LOG, "childrenEventReceived %s exception %s", 
+                              _path.c_str(), e.what());
+                    _children.clear();
+                }
+            }
+        }
+        LOG_DEBUG(LOG, "childrenEventReceived returns %d", reclaim);
+    }
+    /**
+      Truncate or expand the size of the cached active data.
+
+      This method only changes the size of the cached active data. 
+      This change is committed to ZooKeeper when the cached data 
+      is written to the ZooKeeper node by flush().
+
+      Return -EFBIG is the requested size exceeds the maximum.
+
+      \return 0 if successful, otherwise negative errno.
+      \param size the requested size.
+     */
+    int truncate(off_t size) 
+    {
+        int res = 0;
+
+        {
+            AutoLock lock(_mutex); 
+            res = _truncate(size);
+        }
+
+        return res;
+    }
+    /**
+      Copy range of active data into specified output buffer.
+
+      \return if successful, return number of bytes copied, otherwise
+              return negative errno.
+      \param buf  address of the output buffer.
+      \param size size of the output buffer and desired number of bytes to copy.
+      \param offset offset into active data to start copying from.
+     */
+    int read(char *buf, size_t size, off_t offset) const
+    {
+        LOG_DEBUG(LOG, "read(size %zu, off_t %zu) path %s", 
+                  size, offset, _path.c_str());
+
+        int res = 0;
+
+        {
+            AutoLock lock(_mutex);
+            if (!_initializedData) {
+                LOG_DEBUG(LOG, "not initialized");
+                res = -EIO;
+            }
+            else {
+                off_t fileSize = _activeData.size();
+                if (offset > fileSize) {
+                    LOG_DEBUG(LOG, "offset > fileSize %zu", fileSize);
+                    res = 0;
+                } 
+                else {
+                    if (offset + size > fileSize) {
+                        size = fileSize - offset;
+                        LOG_DEBUG(LOG, 
+                                  "reducing read size to %zu for fileSize %zu",
+                                  size, fileSize);
+                    }
+                    copy(_activeData.begin() + offset,
+                         _activeData.begin() + offset + size,
+                         buf);
+                    res = size;
+                }
+            }
+        }
+
+        LOG_DEBUG(LOG, "read returns %d", res);
+        return res; 
+    }
+    /**
+      Copy buffer content to active data.
+
+      \return if successful, return number of bytes copied, otherwise
+              return negative errno.
+      \param buf  address of the buffer.
+      \param size size of the input buffer and desired number of bytes to copy.
+      \param offset offset into active data to start copying to.
+     */
+    int write(const char *buf, size_t size, off_t offset)
+    {
+        LOG_DEBUG(LOG, "write(size %zu, off_t %zu) path %s", 
+                  size, offset, _path.c_str());
+
+        int res = 0;
+
+        {
+            AutoLock lock(_mutex);
+            if (!_initializedData) {
+                LOG_DEBUG(LOG, "not initialized");
+                res = -EIO;
+            }
+            else if (offset >= maxDataFileSize) {
+                LOG_DEBUG(LOG, "offset > maxDataFileSize %u", maxDataFileSize);
+                res = -ENOSPC;
+            }
+            else {
+                if (offset + size > maxDataFileSize) {
+                    LOG_DEBUG(LOG, 
+                              "reducing write size to %zu "
+                              "for maxDataFileSize %u",
+                              size, maxDataFileSize);
+                    size = maxDataFileSize - offset;
+                }
+                off_t fileSize = _activeData.size();
+                if (offset + size > fileSize) {
+                    LOG_DEBUG(LOG, "resizing to %zu", offset + size);
+                    _activeData.resize(offset + size);
+                } 
+                copy(buf, buf + size, _activeData.begin() + offset);
+                memcpy(&_activeData[offset], buf, size);
+                _dirtyData = true;
+                res = size;
+            }
+        }
+
+        LOG_DEBUG(LOG, "write returns %d", res);
+        return res; 
+    }
+    /**
+      Flush data to the ZooKeeper node.
+
+      If cached active data has been modified, flush it to the ZooKeeper node.
+      Returns -EIO if the data cannot be written because the cached active
+      data is not the expected version, i.e. ZooKeeper returns ZBADVERSION.
+      -EIO may also indicate a more general failure, such as unable to 
+      communicate with ZooKeeper.
+
+      \return 0 if successful, otherwise negative errno.
+     */
+    int flush()
+    {
+        int res = 0;
+        {
+            AutoLock lock(_mutex);
+            res = _flush();
+        }
+        return res;
+    }
+    /**
+      Close of the ZkFuse regular file represented by the ZkFuseFile instance.
+
+      This may: 
+      - Flush dirty data to the ZooKeeper node, and return the result of the
+        flush operation.
+      - Reclaim the ZkFuseFile instance. 
+        \see ZkFuseHandleManaer::reclaimIfNecessary
+
+      \return result of flush operation - 0 if successful, 
+              otherwise negative errno.
+     */
+    int close()
+    {
+        LOG_DEBUG(LOG, "close() path %s", _path.c_str());
+        int res = 0;
+
+        bool reclaim = false;
+        {
+            AutoLock lock(_mutex);
+            res = _flush();
+            if (_deleted) {
+                _clearData();
+                _clearChildren();
+            }
+        }
+        _manager->deallocate(_handle);
+
+        LOG_DEBUG(LOG, "close returns %d", res);
+        return res;
+    }
+    /**
+      Get ZkFuse regular file or directory attributes.
+
+      \return 0 if successful, otherwise negative errno.
+      \param stbuf return attributes here.
+      \param nameType specifies the ZkFuseNameType of the ZkFuse path used
+                      to get attributes. It influences whether the directory
+                      or regular file attributes are returned.
+     */
+    int getattr(struct stat & stbuf, ZkFuseNameType nameType) const
+    {
+        LOG_DEBUG(LOG, "getattr(nameType %d) path %s", 
+                  int(nameType), _path.c_str());
+
+        int res = 0;
+        int version = 0;
+        std::string metaPath;
+        {
+            AutoLock lock(_mutex);
+
+            res = _getattrNoMetaAccess(stbuf, nameType);
+            if (res == 0) {
+                version = _activeStat.version;
+                metaPath = _getChildPath( 
+                    ((stbuf.st_mode & S_IFMT) == S_IFREG) ? 
+                    _manager->getCommon().getRegMetadataName() :
+                    _manager->getCommon().getDirMetadataName());
+                if (_hasChildPath(metaPath) == false) {
+                    metaPath.clear();
+                }
+            }
+        }
+        if (res == 0 && metaPath.empty() == false) {
+            Data data;
+            int metaRes = _manager->getData(metaPath, data);
+            LOG_DEBUG(LOG, "metaRes %d dataSize %zu",
+                      metaRes, data.size());
+            if (metaRes == 0 && data.empty() == false) {
+                 Metadata metadata; 
+                 _decodeMetadata(data, metadata);
+                 LOG_DEBUG(LOG, "metadata version %u active version %u",
+                           metadata.version, version);
+                 if (metadata.version == version) {
+                     /* IMPORTANT: 
+                      * Must convert from millisecs to secs before setting
+                      * st_atime and st_mtime to avoid truncation error
+                      * due to 64-bit to 32-bit conversion.
+                      */
+                     stbuf.st_atime = millisecsToSecs(metadata.atime);
+                     stbuf.st_mtime = millisecsToSecs(metadata.mtime);
+                }
+            }
+        }
+    
+        LOG_DEBUG(LOG, "getattr returns %d", res);
+        return res;
+    }
+    /**
+      Read directory entries.
+      This interface is defined by FUSE.
+      
+      \return 0 if successful, otherwise negative errno.
+      \param buf output buffer to store output directory entries.
+      \param filler function used to fill the output buffer.
+      \param offset start filling from a specific offset.
+     */
+    int readdir(void *buf, fuse_fill_dir_t filler, off_t offset) const
+    {
+        LOG_DEBUG(LOG, "readdir(offset %zu) path %s", offset, _path.c_str());
+        int res = 0;
+
+        int dataFileIndex = -1;
+        unsigned leftTrim = 0;
+        typedef std::pair<std::string, int> DirEntry;
+        typedef std::vector<DirEntry> DirEntries; 
+        DirEntries dirEntries;
+
+        /* Get directory entries in two phase to avoid invoking
+         * ZkFuseHandleManager while holding _mutex.
+         * In first phase, get all the names of child nodes starting
+         * at offset. Also remember their index for use in second phase.
+         * The first phase hold _mutex.
+         */
+        {
+            AutoLock lock(_mutex);
+            if (!_isInitialized()) {
+                LOG_DEBUG(LOG, "not initialized");
+                res = -EIO;
+            }
+            else {
+                leftTrim = (_path.length() == 1 ? 1 : _path.length() + 1);
+                unsigned start = offset;
+                unsigned i;
+                for (i = start; i < _children.size(); i++) { 
+                    const std::string & childName = _children[i];
+                    if (_isMeta(childName)) {
+                        continue;
+                    }
+                    dirEntries.push_back(DirEntry(childName, i));
+                }
+                if (i == _children.size() && !_activeData.empty()) {
+                    dataFileIndex = i + 1;
+                }
+                res = 0;
+            }
+        }
+        
+        /* Second phase starts here.
+         * DONOT hold _mutex as this phase invokes ZkFuseHandleManager to
+         * get attributes for the directory entries.
+         */ 
+        if (res == 0) {
+            bool full = false;
+            for (DirEntries::const_iterator it = dirEntries.begin();
+                it != dirEntries.end();
+                it++) {
+               
+                ZkFuseAutoHandle childAutoHandle(_manager, it->first);
+                int childRes = childAutoHandle.get();
+                if (childRes >= 0) {
+                    struct stat stbuf; 
+                    int attrRes = childAutoHandle.getFile()->
+                        getattr(stbuf, ZkFuseNameDefaultType);
+                    if (attrRes == 0) {
+                        if (filler(buf, it->first.c_str() + leftTrim, 
+                                   &stbuf, it->second + 1)) {
+                            LOG_DEBUG(LOG, "filler full");
+                            full = true;
+                            break;
+                        } 
+                    }
+                }
+            } 
+            if (full == false && dataFileIndex != -1) { 
+                LOG_DEBUG(LOG, "include data file name");
+                struct stat stbuf; 
+                int attrRes = getattr(stbuf, ZkFuseNameRegType); 
+                if (attrRes == 0) {
+                    filler(buf, 
+                           _manager->getCommon().getDataFileName().c_str(), 
+                           &stbuf, dataFileIndex + 1);
+                }
+            }
+        }
+    
+        LOG_DEBUG(LOG, "readdir returns %d", res);
+        return res;
+    }
+    /**
+      Set the access time and modified time.
+
+      Set the access and modifieds times on the ZkFuse regular file
+      or directory represented by this ZkFuseFile instance.
+      
+      Since there is no interface to change these times on a 
+      ZooKeeper node, ZkFuse simulates this by writing to a 
+      metadata node which is a child node of the ZooKeeper node.
+      ZkFuse writes the current version, the specified access 
+      and modified times to the metadata node. 
+      
+      When get attributes is invoked, get attributes will check 
+      for the presence of this metadata node and if the version
+      number matches the current data version, then get attributes
+      will return the access and modified times stored in the 
+      metadata node.
+
+      \return 0 if successful, otherwise negative errno.
+      \param atime access time in milliseconds.
+      \param mtime modified time in milliseconds.
+      \param nameType specifies the ZkFuseNameType of the ZkFuse path used
+                      to set access and modified times. It influences 
+                      whether the directory or regular file access and
+                      modified times are set.
+     */
+    int utime(uint64_t atime, uint64_t mtime, ZkFuseNameType nameType) 
+    {
+        LOG_DEBUG(LOG, 
+                  "utime(atime %llu, mtime %llu, nameType %d) path %s",
+                  (unsigned long long) atime, 
+                  (unsigned long long) mtime, 
+                  (int) nameType, _path.c_str());
+
+        int res = 0;
+        std::string metaPath;
+        bool exists = false;
+        Data data;
+        {
+            AutoLock lock(_mutex);
+    
+            if (!_isInitialized()) {
+                LOG_DEBUG(LOG, "not initialized");
+                res = -EIO;
+            }
+            else {
+                bool isRegular = _isRegNameType(nameType);
+                Metadata metadata;
+                metadata.version = _activeStat.version;
+                metadata.atime = atime;
+                metadata.mtime = mtime;
+                metaPath = _getChildPath( 
+                    isRegular ?  
+                    _manager->getCommon().getRegMetadataName() :
+                    _manager->getCommon().getDirMetadataName());
+                exists = _hasChildPath(metaPath);
+                _encodeMetadata(metadata, data);
+                res = 0;
+            }
+        }
+        if (res == 0 && metaPath.empty() == false) { 
+            res = _manager->setData(metaPath, data, exists, true);
+        }
+
+        LOG_DEBUG(LOG, "utime returns %d", res);
+        return res;
+    }
+    /**
+      Remove a ZkFuse directory.
+
+      If force is true, then the ZooKeeper node and its decendants
+      will be deleted.
+
+      If force is false, then this method implements the semantics
+      of removing a ZkFuse directory. It will delete the ZooKeeper node
+      only if the ZooKeeper node have no data and no non-metadata 
+      children.
+      - Return -ENOTDIR if the ZooKeeper node is not considered
+        to be a directory (after taking into consideration the specified
+        ZkFuseNameType). 
+      - Return -ENOTEMPTY if the ZooKeeper node has data or it has 
+        non-metadata children.
+      - Return -ENOENT if the ZooKeeper cannot be deleted, usually this
+        is because it does not exist.
+
+      \return 0 if successful, otherwise negative errno.
+      \param nameType the ZkFuseNameType of the path used to specify the
+                      directory to be removed. It influences whether ZkFuse
+                      considers the ZooKeeper node to be a regular file or
+                      directory. \see ZkFuseNameType
+      \param force    set to true to bypass ZkFuse rmdir semantic check.
+     */
+    int rmdir(ZkFuseNameType nameType, bool force)
+    {
+        int res = 0;
+
+        {
+            AutoLock lock(_mutex);
+            res = _rmdir(nameType, force);
+        }
+        if (res == 0) {
+            _manager->removeChildFromParent(_path);
+        }
+        return res;
+    }
+    /**
+      Remove a ZkFuse regular file.
+
+      This method implements the semantics of removing a ZkFuse regular file.
+      - If the ZkFuse regular file represents the data part of the 
+        ZooKeeper node which is presented as a ZkFuse directory, 
+        the regular file is virtually deleted by truncating the
+        ZooKeeper node's data. Readdir will not synthesize a regular 
+        file entry for the data part of a ZooKeeper node if 
+        the ZooKeeper node has no data.
+      - If the ZkFuse regular file represents the data part of the 
+        ZooKeeper node which is presented as a ZkFuse regular file,
+        the ZooKeeper node and its decendants are deleted.
+
+      Returns -EISDIR if the ZkFuse regular file cannot be deleted
+      because ZkFuse consider it to be a directory.
+
+      \return 0 if successful, otherwise negative errno.
+      \param nameType the ZkFuseNameType of the path used to specify the
+                      directory to be removed. It influences whether ZkFuse
+                      considers the ZooKeeper node to be a regular file or
+                      directory. \see ZkFuseNameType
+    */
+    int unlink(ZkFuseNameType nameType) 
+    {
+        int res = 0;
+        {
+            AutoLock lock(_mutex);
+            res = _unlink(nameType);
+        }
+        if (res == 0) {
+            _manager->removeChildFromParent(_path);
+        }
+        return res;
+    }
+    /**
+      Utility function to construct a ZooKeeper path for a child
+      of a ZooKeeper node.
+      
+      \return the full path of the child.
+      \param  parent the parent's full path.
+      \param  child  the child's parent component.
+     */
+    static std::string buildChildPath(const std::string & parent,
+                                      const std::string & child)
+    {
+        std::string s;
+        s.reserve(parent.length() + child.length() + 32);
+        if (parent.length() > 1) {
+            // special case for root dir
+            s += parent;
+        }
+        s += "/";
+        s += child;
+        return s;
+    }
+};
+
+ZkFuseFile::DataListener ZkFuseFile::_dataListener;
+ZkFuseFile::ChildrenListener ZkFuseFile::_childrenListener;
+
+void ZkFuseAutoHandle::reset(int handle)
+{
+    int old = _handle;
+    ZkFuseFilePtr oldFile = _file;
+    _handle = handle;
+    _initFile();
+    if (old >= 0) {
+        assert(oldFile != NULL);
+        oldFile->close();
+    }
+}
+
+ZkFuseHandleManager::Handle 
+ZkFuseHandleManager::allocate(const std::string & path, bool & newFile)
+{
+    LOG_DEBUG(LOG, "allocate(path %s)", path.c_str());
+
+    Handle handle;
+    {
+        AutoLock lock(_mutex);
+        Map::iterator it = _map.find(path);
+        if (it == _map.end()) {
+            LOG_DEBUG(LOG, "not found");
+            if (_freeList.empty()) {
+                handle = _files.size();
+                _files.resize(handle + 1);
+                LOG_DEBUG(LOG, "free list empty, resize handle %d", handle);
+            } else {
+                handle = _freeList.back();
+                _freeList.pop_back();
+                LOG_DEBUG(LOG, "get from free list, handle %d", handle);
+            }
+            assert(_files[handle] == NULL);
+            _files[handle] = 
+                new ZkFuseFile(SharedPtr(_thisWeakPtr), handle, path);
+            /* Not really supposed to invoke the new ZkFuseFile instance 
+             * because this method is not supposed to invoke ZkFuseFile
+             * methods that while holding _mutex. However, it is safe
+             * to do without casuing deadlock because these methods
+             * are known not to invoke other methods, especially one
+             * that invoke this ZkFuseHandleManager instance.
+             */
+            assert(_files[handle]->incRefCount(0) == 1);
+            _map[path] = handle;
+            _numInUse++;
+            LOG_DEBUG(LOG, "numInUse %u", _numInUse);
+            newFile = true;
+        } else {
+            LOG_DEBUG(LOG, "found");
+            handle = it->second;
+            assert(_files[handle] != NULL);
+            int refCount = _files[handle]->incRefCount();
+            if (refCount == 1) {
+                _numInUse++;
+                LOG_DEBUG(LOG, "resurrecting zombie, numInUse %u", _numInUse);
+            }
+            newFile = false;
+        }
+    }
+
+    LOG_DEBUG(LOG, "allocate returns %d, newFile %d", handle, newFile);
+    return handle;
+}
+
+void ZkFuseHandleManager::deallocate(Handle handle) 
+{
+    LOG_DEBUG(LOG, "deallocate(handle %d)", handle);
+
+    if (handle >= 0) {
+        bool reclaim = false;
+        ZkFuseFilePtr file; 
+        {
+            AutoLock lock(_mutex);
+            file = _files[handle];
+            assert(file != NULL);
+            int refCount = file->decRefCount();
+            const std::string & path = file->getPath();
+            LOG_DEBUG(LOG, "path %s ref count %d", path.c_str(), refCount);
+            if (refCount == 0) {
+                _numInUse--;
+                unsigned numCached = _files.size() - _numInUse;
+                if (numCached > _common.getCacheSize()) {
+                   LOG_TRACE(LOG, 
+                             "reclaim path %s, cacheSize %u, filesSize %zu, "
+                             "numInUse %u", 
+                             path.c_str(),
+                             _common.getCacheSize(), _files.size(), _numInUse);
+                   _map.erase(path); 
+                   _files[handle] = NULL;
+                   _freeList.push_back(handle); 
+                   reclaim = true;
+                }
+            }
+        } 
+        if (reclaim) {
+            delete file;
+        }
+    }
+    else {
+        LOG_DEBUG(LOG, "handle invalid");
+    }
+
+    LOG_DEBUG(LOG, "deallocate done");
+}
+
+void ZkFuseHandleManager::eventReceived(const ZKWatcherEvent & event)
+{
+    int eventType = event.getType();
+    int eventState = event.getState();
+    const std::string & path = event.getPath();
+    LOG_DEBUG(LOG, "eventReceived() eventType %d, eventState %d, path %s",
+              eventType, eventState, path.c_str());
+
+    if (eventType == DELETED_EVENT ||
+        eventType == CHANGED_EVENT ||
+        eventType == CHILD_EVENT) {
+        {
+            AutoLock lock(_mutex);
+            Map::iterator it = _map.find(path);
+            if (it != _map.end()) {
+                LOG_DEBUG(LOG, "path found");
+                Handle handle = it->second;
+                ZkFuseFilePtr file = _files[handle];
+                assert(file != NULL);
+                /* Prevent the ZkFuseFile instance from being
+                 * deleted while handling the event.
+                 */
+                int refCount = file->incRefCount();
+                if (refCount == 1) {
+                    _numInUse++;
+                }
+                /* Pretent to be dir open.
+                 */
+                int dirCount = file->incOpenDirCount();
+                {
+                    /* _mutex is unlocked in this scope */
+                    AutoUnlockTemp autoUnlockTemp(lock);
+                    if (eventType == CHILD_EVENT) {
+                        file->childrenEventReceived(event);
+                    }
+                    else if (eventType == CHANGED_EVENT) {
+                        file->dataEventReceived(event);
+                    }
+                    else {
+                        assert(eventType == DELETED_EVENT);
+                        file->dataEventReceived(event);
+                        // file->childrenEventReceived(event);
+                    }
+                    file->decOpenDirCount();
+                    deallocate(handle);
+                }
+            }
+            else {
+                LOG_WARN(LOG, 
+                         "path %s not found for event type %d, event state %d",
+                          path.c_str(), eventType, eventState);
+            }
+        }
+    } 
+    else if (eventType == SESSION_EVENT) {
+        if (eventState == CONNECTING_STATE) {
+            LOG_TRACE(LOG, "*** CONNECTING ***");
+            {
+                AutoLock lock(_mutex);
+                for (int handle = 0; handle < _files.size(); handle++) { 
+                    ZkFuseFilePtr file = _files[handle];
+                    if (file != NULL) {
+                        /* prevent the ZkFuseFile instance from being 
+                         * deleted while handling the event. 
+                         */
+                        int refCount = file->incRefCount();
+                        if (refCount == 1) {
+                             _numInUse++;
+                        }
+                        /* Pretent to be dir open.
+                         */ 
+                        int dirCount = file->incOpenDirCount();
+                        {
+                            /* _mutex is unlocked in this scope */
+                            AutoUnlockTemp autoUnlockTemp(lock);
+                            file->dataEventReceived(event);
+                            file->childrenEventReceived(event);
+                            file->decOpenDirCount();
+                            deallocate(handle);
+                        }
+                        /* this will eventually call decrement ref count */
+                    }
+                }
+            }
+        }
+        else if (eventState == CONNECTED_STATE) {
+            LOG_TRACE(LOG, "*** CONNECTED ***");
+        }
+    }
+    else {
+        LOG_WARN(LOG, 
+                 "eventReceived ignoring event type %d, event state %d, "
+                 "path %s", eventType, eventState, path.c_str());
+    }
+}
+
+int ZkFuseHandleManager::getData(const std::string & path, 
+                                 Data & data) 
+{
+    LOG_DEBUG(LOG, "getData(path %s)", path.c_str());
+
+    int res = 0;
+    data.clear();
+    ZkFuseAutoHandle autoHandle(SharedPtr(_thisWeakPtr), path);
+    res = autoHandle.get();
+    if (res >= 0) {
+        autoHandle.getFile()->getData(data);
+        res = 0;
+    }
+
+    LOG_DEBUG(LOG, "getData returns %d", res);
+    return res;
+}
+
+int ZkFuseHandleManager::setData(const std::string & path, 
+                                 const Data & data, 
+                                 bool exists, 
+                                 bool doFlush) 
+{
+    LOG_DEBUG(LOG, "setData(path %s, exists %d)\n%s", 
+              path.c_str(), exists, data.c_str());
+
+    int res = 0;
+    if (exists) {
+        res = open(path, false);
+    } else {
+        bool created;
+        res = mknod(path, S_IFREG, true, created);
+    }
+    if (res >= 0) {
+        ZkFuseAutoHandle autoHandle(SharedPtr(_thisWeakPtr), res);
+        res = autoHandle.getFile()->setData(data, doFlush);
+    }
+
+    LOG_DEBUG(LOG, "setData returns %d", res);
+    return res;
+}
+
+int ZkFuseHandleManager::mknod(const std::string & path, 
+                               mode_t mode, 
+                               bool mayExist,
+                               bool & created)
+{
+    LOG_DEBUG(LOG, "mknod(path %s, mode %o, mayExist %d)", 
+              path.c_str(), mode, mayExist);
+
+    int res = 0;
+    created = false;
+    try {
+        if (S_ISREG(mode) == false && S_ISDIR(mode) == false) {
+            LOG_DEBUG(LOG, "bad mode %o", mode);
+            res = -EINVAL;
+        } 
+        else {
+            Data data;
+            LOG_DEBUG(LOG, "create %s", path.c_str());
+            created = 
+                _common.getZkAdapter()->createNode(path, data, 0, false);
+            if (created) {
+                LOG_DEBUG(LOG, "created");
+                if (S_ISDIR(mode)) {
+                    /* is mkdir - create directory marker */
+                    std::string dirMetaPath = ZkFuseFile::buildChildPath
+                        (path, _common.getDirMetadataName());
+                    LOG_DEBUG(LOG, "create %s", dirMetaPath.c_str());
+                    bool created;
+                    int metaRes = mknod(dirMetaPath, S_IFREG, true, created);
+                    if (metaRes >= 0) {
+                        getFile(metaRes)->close();
+                    }
+                }
+                addChildToParent(path);
+                LOG_DEBUG(LOG, "open after create");
+                res = open(path, true);
+            } else {
+                LOG_DEBUG(LOG, "create failed");
+                int openRes = open(path, false);
+                if (openRes >= 0) {
+                    if (mayExist == false) {
+                        LOG_DEBUG(LOG, "create failed because already exist");
+                        getFile(openRes)->close();
+                        res = -EEXIST;
+                    } else {
+                        res = openRes;
+                    }
+                } else {
+                    LOG_DEBUG(LOG, "create failed but does not exist");
+                    res = -ENOENT;
+                }
+            }
+        }
+    } catch (const ZooKeeperException & e) {
+        LOG_ERROR(LOG, "mknod %s exception %s", path.c_str(), e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "mknod returns %d created %d", res, created);
+    return res;
+}
+
+int ZkFuseHandleManager::mkdir(const char * path, mode_t mode)
+{
+    LOG_DEBUG(LOG, "mkdir(path %s, mode %o)", path, mode);
+
+    int res = 0;
+    try {
+        ZkFuseNameType nameType;
+        std::string zkPath = getZkPath(path, nameType);
+        mode = (mode & ~S_IFMT) | S_IFDIR;
+        ZkFuseAutoHandle autoHandle
+            (SharedPtr(_thisWeakPtr), zkPath, mode, false);
+        res = autoHandle.get();
+        if (res >= 0) {
+            res = 0;
+        }
+    } catch (const std::exception & e) {
+        LOG_ERROR(LOG, "mkdir %s exception %s", path, e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "mkdir returns %d", res);
+    return res;
+}
+
+int ZkFuseHandleManager::open(const std::string & path, bool justCreated)
+{
+    LOG_DEBUG(LOG, "open(path %s, justCreated %d)", 
+              path.c_str(), justCreated);
+
+    int res = 0;
+    try {
+        bool newFile;
+        Handle handle = allocate(path, newFile);
+        ZkFuseAutoHandle autoHandle(SharedPtr(_thisWeakPtr), handle);
+        res = getFile(handle)->update(newFile || justCreated);
+        if (res == 0) {
+            res = handle;
+            autoHandle.release();
+        }
+    } catch (const ZooKeeperException & e) {
+        LOG_ERROR(LOG, "open %s exception %s", path.c_str(), e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "open returns %d", res);
+    return res;
+}
+
+int ZkFuseHandleManager::rmdir(const char * path, bool force)
+{
+    LOG_DEBUG(LOG, "rmdir(path %s, force %d)", path, force);
+
+    int res = 0;
+
+    try {
+        ZkFuseNameType nameType;
+        std::string zkPath = getZkPath(path, nameType);
+        ZkFuseAutoHandle autoHandle(SharedPtr(_thisWeakPtr), zkPath);
+        res = autoHandle.get();
+        if (res >= 0) {
+            res = autoHandle.getFile()->rmdir(nameType, force);
+        } 
+    } catch (const std::exception & e) {
+        LOG_ERROR(LOG, "rmdir %s exception %s", path, e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "rmdir returns %d", res);
+    return res;
+}
+
+
+int 
+ZkFuseHandleManager::unlink(const char * path)
+{
+    LOG_DEBUG(LOG, "unlink(path %s)", path);
+
+    ZkFuseNameType nameType;
+    std::string zkPath = getZkPath(path, nameType);
+    ZkFuseAutoHandle autoHandle(SharedPtr(_thisWeakPtr), zkPath);
+    int res = autoHandle.get();
+    if (res >= 0) {
+        res = autoHandle.getFile()->unlink(nameType);
+    }
+
+    LOG_DEBUG(LOG, "unlink returns %d", res);
+    return res;
+}
+
+int ZkFuseHandleManager::getattr(const char *path, struct stat &stbuf)
+{
+    LOG_DEBUG(LOG, "getattr(path %s)", path);
+
+    int res = 0;
+    try {
+        ZkFuseNameType nameType;
+        std::string zkPath = getZkPath(path, nameType);
+        ZkFuseAutoHandle autoHandle(SharedPtr(_thisWeakPtr), zkPath);
+        res = autoHandle.get();
+        if (res >= 0) {
+            res = autoHandle.getFile()->getattr(stbuf, nameType);
+        } 
+    } catch (const std::exception & e) {
+        LOG_ERROR(LOG, "getattr %s exception %s", path, e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "getattr returns %d", res);
+    return res;
+}
+
+int 
+ZkFuseHandleManager::rename(const char * fromPath, const char * toPath)
+{
+    LOG_DEBUG(LOG, "rename(fromPath %s, toPath %s)", fromPath, toPath);
+
+    ZkFuseNameType fromNameType;
+    std::string fromZkPath = getZkPath(fromPath, fromNameType);
+    ZkFuseAutoHandle fromAutoHandle(SharedPtr(_thisWeakPtr), fromZkPath);
+    int res = fromAutoHandle.get();
+    if (res >= 0) {
+        LOG_DEBUG(LOG, "good fromPath");
+        if (fromAutoHandle.getFile()->isDirNameType(fromNameType)) {
+            LOG_DEBUG(LOG, "fromPath is directory");
+            res = -EISDIR;
+        }
+    }
+    if (res >= 0) {
+        ZkFuseNameType toNameType;
+        std::string toZkPath = getZkPath(toPath, toNameType);
+        bool created;
+        res = mknod(toZkPath.c_str(), S_IFREG, true, created);
+        if (res >= 0) {
+            ZkFuseAutoHandle toAutoHandle(SharedPtr(_thisWeakPtr), res);
+            if (toAutoHandle.getFile()->isDirNameType(toNameType)) {
+                LOG_DEBUG(LOG, "toPath is directory");
+                res = -EISDIR;
+            }
+            if (res >= 0) {
+                LOG_DEBUG(LOG, "copy data");
+                Data data; 
+                fromAutoHandle.getFile()->getData(data);
+                toAutoHandle.getFile()->setData(data, true);
+                LOG_DEBUG(LOG, "copy metadata");
+                struct stat stbuf;
+                int metaRes = 
+                    fromAutoHandle.getFile()->getattr(stbuf, fromNameType);
+                if (metaRes < 0) {
+                    LOG_DEBUG(LOG, "get metadata failed");
+                } 
+                else {
+                    metaRes = toAutoHandle.getFile()->
+                        utime(secsToMillisecs(stbuf.st_atime),
+                              secsToMillisecs(stbuf.st_mtime),
+                              toNameType);
+                    if (metaRes < 0) {
+                        LOG_DEBUG(LOG, "set metadata failed");
+                    }
+                }
+            }
+            if (created && res < 0) {
+                LOG_DEBUG(LOG, "undo create because copy data failed");
+                int rmRes = toAutoHandle.getFile()->rmdir(toNameType, true);
+            }
+        }
+    }
+    if (res >= 0) {
+        LOG_DEBUG(LOG, "copy successful, unlink fromPath");
+        res = fromAutoHandle.getFile()->unlink(fromNameType);
+    }
+
+    LOG_DEBUG(LOG, "rename returns %d", res);
+    return res;
+}
+
+void
+ZkFuseHandleManager::addChildToParent(const std::string & childPath) const
+{
+    LOG_DEBUG(LOG, "addChildToParent(childPath %s)", childPath.c_str());
+
+    std::string parentPath = getParentPath(childPath);
+    if (!parentPath.empty()) {
+        AutoLock lock(_mutex);
+        Map::const_iterator it = _map.find(parentPath);
+        if (it != _map.end()) {
+            Handle handle = it->second;
+            assert(_files[handle] != NULL);
+            _files[handle]->addChild(childPath);
+        } 
+    }
+    
+    LOG_DEBUG(LOG, "addChildToParent done");
+}
+
+void
+ZkFuseHandleManager::removeChildFromParent(const std::string & childPath) const
+{
+    LOG_DEBUG(LOG, "removeChildFromParent(childPath %s)", childPath.c_str());
+
+    std::string parentPath = getParentPath(childPath);
+    if (!parentPath.empty()) {
+        AutoLock lock(_mutex);
+        Map::const_iterator it = _map.find(parentPath);
+        if (it != _map.end()) {
+            Handle handle = it->second;
+            assert(_files[handle] != NULL);
+            _files[handle]->removeChild(childPath);
+        } 
+    }
+    
+    LOG_DEBUG(LOG, "removeChildFromParent done");
+}
+
+std::string
+ZkFuseHandleManager::getParentPath(const std::string & childPath) const
+{
+    std::string::size_type lastPos = childPath.rfind('/');
+    if (lastPos > 0) {
+        return std::string(childPath, 0, lastPos);
+    }
+    else {
+        assert(childPath[0] == '/');
+        return std::string();
+    }
+}
+
+std::string 
+ZkFuseHandleManager::getZkPath(const char * path, ZkFuseNameType & nameType)
+    const
+{
+    LOG_DEBUG(LOG, "getZkPath(path %s)", path);
+
+    std::string res;
+    unsigned pathLen = strlen(path);
+    const std::string & dataFileName = _common.getDataFileName();
+    unsigned dataSuffixLen = dataFileName.length();
+    const char * dataSuffix = dataFileName.c_str();
+    unsigned dataSuffixIncludeSlashLen = dataSuffixLen + 1;
+    const std::string & forceDirSuffix = _common.getForceDirSuffix();
+    unsigned forceDirSuffixLen = _common.getForceDirSuffix().length();
+    /* Check if path is "/". If so, it is always a directory.
+     */
+    if (pathLen == 1) {
+        assert(path[0] == '/');
+        res = _common.getRootPathName();
+        nameType = ZkFuseNameDirType;
+    }
+    /* Check if path ends of /{dataSuffix}, e.g. /foo/bar/{dataSuffix}.
+     * If so remove dataSuffix and nameType is ZkFuseNameRegType. 
+     */
+    else if (
+        (pathLen >= dataSuffixIncludeSlashLen) && 
+        (path[pathLen - dataSuffixIncludeSlashLen] == '/') &&
+        (strncmp(path + (pathLen - dataSuffixLen), 
+                 dataSuffix, dataSuffixLen) == 0) 
+       ) {
+        if ((pathLen - dataSuffixIncludeSlashLen) == 0) {
+            res = _common.getRootPathName();
+        } else { 
+            res.assign(path, pathLen - dataSuffixIncludeSlashLen);
+        }
+        nameType = ZkFuseNameRegType;
+    }
+    /* If not ZkFuseNameRegType, then check if path ends of 
+     * {forceDirSuffix}, e.g. /foo/bar{forceDirSuffix}.
+     * If so remove forceDirSuffix and nameType is ZkFuseNameDirType.
+     */
+    else if (forceDirSuffixLen > 0 &&
+        pathLen >= forceDirSuffixLen &&
+        strncmp(path + (pathLen - forceDirSuffixLen),
+                forceDirSuffix.c_str(), forceDirSuffixLen) == 0) {
+        res.assign(path, pathLen - forceDirSuffixLen);
+        nameType = ZkFuseNameDirType;
+    } 
+    /* If not ZkFuseNameRegType and not ZkFuseNameDirType, then
+     * it is ZkFuseNameDefaultType. ZkFuse will infer type from
+     * ZooKeeper node's content.
+     */
+    else {
+        res = path;
+        nameType = ZkFuseNameDefaultType;
+    }
+    /* Intermediate components of the path name may have 
+     * forceDirSuffix, e.g. /foo/bar{forceDirSuffix}/baz.
+     * If so, remove the intermediate {forceDirSuffix}es.
+     */
+    if (forceDirSuffixLen > 0) {
+        /* pos is an optimization to avoid always scanning from 
+         * beginning of path
+         */
+        unsigned pos = 0;
+        while ((res.length() - pos) > forceDirSuffixLen + 1) {
+            const char * found = 
+                strstr(res.c_str() + pos, forceDirSuffix.c_str());
+            if (found == NULL) {
+                break;
+            } 
+            if (found[forceDirSuffixLen] == '/' ||
+                found[forceDirSuffixLen] == '\0') {
+                pos = found - res.c_str();
+                res.erase(pos, forceDirSuffixLen);
+            }
+            else {
+                pos += forceDirSuffixLen;
+            }
+        }
+    }
+
+    LOG_DEBUG(LOG, "getZkPath returns %s, nameType %d", 
+              res.c_str(), int(nameType));
+    return res;
+}
+
+static ZkFuseHandleManager::SharedPtr singletonZkFuseHandleManager;
+
+inline const ZkFuseHandleManager::SharedPtr & zkFuseHandleManager()
+{
+    return singletonZkFuseHandleManager;
+}
+
+static 
+int zkfuse_getattr(const char *path, struct stat *stbuf)
+{
+    LOG_DEBUG(LOG, "zkfuse_getattr(path %s)", path);
+
+    int res = 0;
+    try {
+        res = zkFuseHandleManager()->getattr(path, *stbuf);
+    } catch (const std::exception & e) {
+        LOG_ERROR(LOG, "zkfuse_getattr %s exception %s", path, e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "zkfuse_getattr returns %d", res);
+    return res;
+}
+
+static 
+int zkfuse_fgetattr(const char *path, struct stat *stbuf,
+	            struct fuse_file_info *fi)
+{
+    LOG_DEBUG(LOG, "zkfuse_fgetattr(path %s)", path);
+
+    int res = 0;
+    int handle = fi->fh;
+    try {
+        if (handle <= 0) {
+            res = -EINVAL;
+        }
+        else {
+            res = zkFuseHandleManager()->getFile(handle)->
+                getattr(*stbuf, ZkFuseNameDefaultType);
+        }
+    } catch (const std::exception & e) {
+        LOG_ERROR(LOG, "zkfuse_fgetattr %s exception %s", path, e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "zkfuse_fgetattr returns %d", res);
+    return res;
+}
+
+static 
+int zkfuse_access(const char *path, int mask)
+{
+    /* not implemented */
+    return -1;
+}
+
+static 
+int zkfuse_readlink(const char *path, char *buf, size_t size)
+{
+    /* not implemented */
+    return -1;
+}
+
+static 
+int zkfuse_opendir(const char *path, struct fuse_file_info *fi)
+{ 
+    LOG_DEBUG(LOG, "zkfuse_opendir(path %s)", path);
+
+    int res = 0;
+    try {
+        ZkFuseNameType nameType;
+        std::string zkPath = zkFuseHandleManager()->getZkPath(path, nameType);
+        if (nameType == ZkFuseNameRegType) {
+            res = -ENOENT;
+        }
+        else {
+            ZkFuseAutoHandle autoHandle(zkFuseHandleManager(), zkPath);
+            res = autoHandle.get();
+            if (res >= 0) {
+                autoHandle.getFile()->incOpenDirCount();
+                autoHandle.release();
+                fi->fh = res;
+                res = 0;
+            }
+        }
+    } catch (const std::exception & e) {
+        LOG_ERROR(LOG, "zkfuse_opendir %s exception %s", path, e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "zkfuse_opendir returns %d", res);
+    return res;
+}
+
+static int 
+zkfuse_readdir(const char *path, void *buf, fuse_fill_dir_t filler, 
+               off_t offset, struct fuse_file_info *fi)
+{
+    LOG_DEBUG(LOG, "zkfuse_readdir(path %s, offset %zu)", path, offset);
+
+    int res = 0;
+    int handle = fi->fh;
+    try {
+        if (handle <= 0) {
+            res = -EINVAL;
+        }
+        else {
+            res = zkFuseHandleManager()->getFile(handle)->
+                readdir(buf, filler, offset);
+        }
+    } catch (const std::exception & e) {
+        LOG_ERROR(LOG, "zkfuse_readdir %s exception %s", path, e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "zkfuse_readdir returns %d", res);
+    return res;
+}
+
+static 
+int zkfuse_releasedir(const char *path, struct fuse_file_info *fi)
+{
+    LOG_DEBUG(LOG, "zkfuse_releasedir(path %s)", path);
+
+    int res = 0;
+    unsigned handle = fi->fh;
+    try {
+        if (handle <= 0) {
+            res = -EINVAL;
+        }
+        else {
+            zkFuseHandleManager()->getFile(handle)->decOpenDirCount();
+            zkFuseHandleManager()->getFile(handle)->close();
+        } 
+    } catch (const std::exception & e) {
+        LOG_ERROR(LOG, "zkfuse_releasedir %s exception %s", path, e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "zkfuse_releasedir returns %d", res);
+    return res;
+}
+
+static 
+int zkfuse_mknod(const char *path, mode_t mode, dev_t rdev)
+{
+    LOG_DEBUG(LOG, "zkfuse_mknod(path %s, mode %o)", path, mode);
+
+    int res = 0;
+    try {
+        ZkFuseNameType nameType;
+        std::string zkPath = zkFuseHandleManager()->getZkPath(path, nameType);
+        ZkFuseAutoHandle autoHandle(zkFuseHandleManager(), zkPath, mode, false);
+        res = autoHandle.get();
+        if (res >= 0) {
+            res = 0;
+        }
+    } catch (const std::exception & e) {
+        LOG_ERROR(LOG, "zkfuse_mknod %s exception %s", path, e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "zkfuse_mknod returns %d", res);
+    return res;
+}
+
+static int zkfuse_mkdir(const char *path, mode_t mode)
+{
+    LOG_DEBUG(LOG, "zkfuse_mkdir(path %s, mode %o", path, mode);
+
+    int res = 0;
+    try {
+        res = zkFuseHandleManager()->mkdir(path, mode);
+    } catch (const std::exception & e) {
+        LOG_ERROR(LOG, "zkfuse_mkdir %s exception %s", path, e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "zkfuse_mkdir returns %d", res);
+    return res;
+}
+
+static int zkfuse_unlink(const char *path)
+{
+    LOG_DEBUG(LOG, "zkfuse_unlink(path %s)", path);
+
+    int res = 0;
+    try {
+        res = zkFuseHandleManager()->unlink(path);
+    } catch (const std::exception & e) {
+        LOG_ERROR(LOG, "zkfuse_unlink %s exception %s", path, e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "zkfuse_unlink returns %d", res);
+    return res;
+}
+
+static int zkfuse_rmdir(const char *path)
+{
+    LOG_DEBUG(LOG, "zkfuse_rmdir(path %s)", path);
+
+    int res = 0;
+    try {
+        res = zkFuseHandleManager()->rmdir(path);
+    } catch (const std::exception & e) {
+        LOG_ERROR(LOG, "zkfuse_rmdir %s exception %s", path, e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "zkfuse_rmdir returns %d", res);
+
+    return res;
+}
+
+static int zkfuse_symlink(const char *from, const char *to)
+{
+    /* not implemented */
+    return -1;
+}
+
+static int zkfuse_rename(const char *from, const char *to)
+{
+    LOG_DEBUG(LOG, "zkfuse_rename(from %s, to %s)", from, to);
+
+    int res = 0;
+    try {
+        res = zkFuseHandleManager()->rename(from, to);
+    } catch (const std::exception & e) {
+        LOG_ERROR(LOG, "zkfuse_rename %s %s exception %s", from, to, e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "zkfuse_rename returns %d", res);
+
+    return res;
+}
+
+static int zkfuse_link(const char *from, const char *to)
+{
+    /* not implemented */
+    return -1;
+}
+
+static int zkfuse_chmod(const char *path, mode_t mode)
+{
+    LOG_DEBUG(LOG, "zkfuse_chmod(path %s, mode %o)", path, mode);
+    int res = 0;
+
+    LOG_DEBUG(LOG, "zkfuse_chmod returns %d", res);
+    return res;
+}
+
+static int zkfuse_chown(const char *path, uid_t uid, gid_t gid)
+{
+    LOG_DEBUG(LOG, "zkfuse_chown(path %s, uid %d, gid %d)", path, uid, gid);
+
+    int res = 0;
+
+    if (zkFuseHandleManager()->getCommon().getUid() == uid &&
+        zkFuseHandleManager()->getCommon().getGid() == gid) {
+        res = 0;
+    }
+    else {
+        res = -EPERM;
+    }
+
+    LOG_DEBUG(LOG, "zkfuse_chown returns %d", res);
+    return 0;
+}
+
+static int zkfuse_truncate(const char *path, off_t size)
+{
+    LOG_DEBUG(LOG, "zkfuse_truncate(path %s, size %zu)", path, size);
+
+    int res = 0;
+    try {
+        ZkFuseNameType nameType;
+        std::string zkPath = zkFuseHandleManager()->getZkPath(path, nameType);
+        ZkFuseAutoHandle autoHandle(zkFuseHandleManager(), zkPath);
+        res = autoHandle.get();
+        if (res >= 0) {
+            res = autoHandle.getFile()->truncate(size);
+        }
+    } catch (const std::exception & e) {
+        LOG_ERROR(LOG, "zkfuse_truncate %s exception %s", path, e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "zkfuse_truncate returns %d", res);
+    return res;
+}
+
+static 
+int zkfuse_ftruncate(const char *path, off_t size, struct fuse_file_info *fi)
+{
+    LOG_DEBUG(LOG, "zkfuse_ftruncate(path %s, size %zu)", path, size);
+
+    int res = 0;
+    unsigned handle = fi->fh;
+    try {
+        if (handle <= 0) {
+            res = -EINVAL;
+        }
+        else {
+            res = zkFuseHandleManager()->getFile(handle)->truncate(size);
+        }
+    } catch (const std::exception & e) {
+        LOG_ERROR(LOG, "zkfuse_ftruncate %s exception %s", path, e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "zkfuse_ftruncate returns %d", res);
+    return res;
+}
+
+static 
+int zkfuse_utimens(const char *path, const struct timespec ts[2])
+{
+    LOG_DEBUG(LOG, "zkfuse_utimens(path %s)", path);
+
+    int res = 0;
+    try {
+        uint64_t atime = timespecToMillisecs(ts[0]);
+        uint64_t mtime = timespecToMillisecs(ts[1]);
+        ZkFuseNameType nameType;
+        std::string zkPath = zkFuseHandleManager()->getZkPath(path, nameType);
+        ZkFuseAutoHandle autoHandle(zkFuseHandleManager(), zkPath);
+        res = autoHandle.get();
+        if (res >= 0) {
+            res = autoHandle.getFile()->utime(atime, mtime, nameType);
+        }
+    } catch (const std::exception & e) {
+        LOG_ERROR(LOG, "zkfuse_utimens %s exception %s", path, e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "zkfuse_utimens returns %d", res);
+    return res;
+}
+
+static 
+int zkfuse_create(const char *path, mode_t mode, struct fuse_file_info *fi)
+{
+	int fd;
+
+	fd = open(path, fi->flags, mode);
+	if (fd == -1)
+		return -errno;
+
+	fi->fh = fd;
+	return 0;
+}
+
+static 
+int zkfuse_open(const char *path, struct fuse_file_info *fi)
+{
+    LOG_DEBUG(LOG, "zkfuse_open(path %s, flags %o)", path, fi->flags);
+
+    int res = 0;
+    try {
+        ZkFuseNameType nameType;
+        std::string zkPath = zkFuseHandleManager()->getZkPath(path, nameType);
+        ZkFuseAutoHandle autoHandle(zkFuseHandleManager(), zkPath);
+        res = autoHandle.get();
+        if (res >= 0) {
+            if (autoHandle.getFile()->isDirNameType(nameType)) {
+                res = -ENOENT;
+            }
+        } 
+        if (res >= 0) {
+            autoHandle.release();
+            fi->fh = res;
+            res = 0;
+        }
+    } catch (const std::exception & e) {
+        LOG_ERROR(LOG, "zkfuse_open %s exception %s", path, e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "zkfuse_open returns %d", res);
+    return res;
+}
+
+static 
+int zkfuse_read(const char *path, char *buf, size_t size, off_t offset,
+		struct fuse_file_info *fi)
+{
+    LOG_DEBUG(LOG, "zkfuse_read(path %s, size %zu, offset %zu)", 
+              path, size, offset);
+
+    int res = 0;
+    unsigned handle = fi->fh;
+    try {
+        if (handle <= 0) {
+            res = -EINVAL;
+        }
+        else {
+            res = zkFuseHandleManager()->getFile(handle)-> 
+                read(buf, size, offset);
+        }
+    } catch (const std::exception & e) {
+        LOG_ERROR(LOG, "zkfuse_read %s exception %s", path, e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "zkfuse_read returns %d", res);
+    return res;
+}
+
+static 
+int zkfuse_write(const char *path, const char *buf, size_t size,
+                 off_t offset, struct fuse_file_info *fi)
+{
+    LOG_DEBUG(LOG, "zkfuse_write(path %s, size %zu, offset %zu)", 
+              path, size, offset);
+
+    int res = 0;
+    unsigned handle = fi->fh;
+    try {
+        if (handle <= 0) {
+            res = -EINVAL;
+        } 
+        else {
+            res = zkFuseHandleManager()->getFile(handle)-> 
+                write(buf, size, offset);
+        }
+    } catch (const std::exception & e) {
+        LOG_ERROR(LOG, "zkfuse_write %s exception %s", path, e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "zkfuse_write returns %d", res);
+    return res;
+}
+
+static int zkfuse_statfs(const char *path, struct statvfs *stbuf)
+{
+    /* not implemented */
+    return -1;
+}
+
+static 
+int zkfuse_flush(const char *path, struct fuse_file_info *fi)
+{
+    /* This is called from every close on an open file, so call the 
+       close on the underlying filesystem. But since flush may be
+       called multiple times for an open file, this must not really
+       close the file.  This is important if used on a network 
+       filesystem like NFS which flush the data/metadata on close() */
+
+    LOG_DEBUG(LOG, "zkfuse_flush(path %s)", path);
+
+    int res = 0;
+    unsigned handle = fi->fh;
+    try {
+        if (handle <= 0) {
+            res = -EINVAL;
+        }
+        else {
+            res = zkFuseHandleManager()->getFile(handle)->flush();
+        }
+    } catch (const std::exception & e) {
+        LOG_ERROR(LOG, "zkfuse_flush %s exception %s", path, e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "zkfuse_flush returns %d", res);
+    return res;
+}
+
+static 
+int zkfuse_release(const char *path, struct fuse_file_info *fi)
+{
+    LOG_DEBUG(LOG, "zkfuse_release(path %s)", path);
+
+    int res = 0;
+    unsigned handle = fi->fh;
+    try {
+        if (handle <= 0) {
+            res = -EINVAL;
+        }
+        else {
+            zkFuseHandleManager()->getFile(handle)->close();
+        } 
+    } catch (const std::exception & e) {
+        LOG_ERROR(LOG, "zkfuse_release %s exception %s", path, e.what());
+        res = -EIO;
+    }
+
+    LOG_DEBUG(LOG, "zkfuse_release returns %d", res);
+    return res;
+}
+
+static 
+int zkfuse_fsync(const char *path, int isdatasync, 
+                 struct fuse_file_info *fi)
+{
+    LOG_DEBUG(LOG, "zkfuse_fsync(path %s, isdatasync %d)", path, isdatasync);
+
+    (void) isdatasync;
+    int res = zkfuse_flush(path, fi);
+
+    LOG_DEBUG(LOG, "zkfuse_fsync returns %d", res);
+    return res;
+}
+
+#ifdef HAVE_SETXATTR
+/* xattr operations are optional and can safely be left unimplemented */
+static int zkfuse_setxattr(const char *path, const char *name, const char *value,
+			size_t size, int flags)
+{
+	int res = lsetxattr(path, name, value, size, flags);
+	if (res == -1)
+		return -errno;
+	return 0;
+}
+
+static int zkfuse_getxattr(const char *path, const char *name, char *value,
+			size_t size)
+{
+	int res = lgetxattr(path, name, value, size);
+	if (res == -1)
+		return -errno;
+	return res;
+}
+
+static int zkfuse_listxattr(const char *path, char *list, size_t size)
+{
+	int res = llistxattr(path, list, size);
+	if (res == -1)
+		return -errno;
+	return res;
+}
+
+static int zkfuse_removexattr(const char *path, const char *name)
+{
+	int res = lremovexattr(path, name);
+	if (res == -1)
+		return -errno;
+	return 0;
+}
+#endif /* HAVE_SETXATTR */
+
+static 
+int zkfuse_lock(const char *path, struct fuse_file_info *fi, int cmd,
+                struct flock *lock)
+{ 
+    (void) path;
+    return ulockmgr_op(fi->fh, cmd, lock, &fi->lock_owner,
+		       sizeof(fi->lock_owner));
+}
+
+
+static 
+void init_zkfuse_oper(fuse_operations & fo)
+{
+        memset(&fo, 0, sizeof(fuse_operations));
+	fo.getattr = zkfuse_getattr;
+	fo.fgetattr = zkfuse_fgetattr;
+	// fo.access = zkfuse_access;
+	// fo.readlink = zkfuse_readlink;
+	fo.opendir = zkfuse_opendir;
+	fo.readdir = zkfuse_readdir;
+	fo.releasedir = zkfuse_releasedir;
+	fo.mknod = zkfuse_mknod;
+	fo.mkdir = zkfuse_mkdir;
+	// fo.symlink = zkfuse_symlink;
+	fo.unlink = zkfuse_unlink;
+	fo.rmdir = zkfuse_rmdir;
+	fo.rename = zkfuse_rename;
+	// fo.link = zkfuse_link;
+	fo.chmod = zkfuse_chmod;
+	fo.chown = zkfuse_chown;
+	fo.truncate = zkfuse_truncate;
+	fo.ftruncate = zkfuse_ftruncate;
+	fo.utimens = zkfuse_utimens;
+	// fo.create = zkfuse_create;
+	fo.open = zkfuse_open;
+	fo.read = zkfuse_read;
+	fo.write = zkfuse_write;
+	fo.statfs = zkfuse_statfs;
+	fo.flush = zkfuse_flush;
+	fo.release = zkfuse_release;
+	fo.fsync = zkfuse_fsync;
+#ifdef HAVE_SETXATTR
+	// fo.setxattr = zkfuse_setxattr;
+	// fo.getxattr = zkfuse_getxattr;
+	// fo.listxattr = zkfuse_listxattr;
+	// fo.removexattr = zkfuse_removexattr;
+#endif
+	fo.lock = zkfuse_lock;
+};
+
+
+/**
+ * The listener of ZK events.
+ */
+class SessionEventListener : public ZKEventListener 
+{
+  private:
+    /** 
+      References the ZkFuseHandleManager instance that should be
+      invoked to service events.
+     */
+    ZkFuseHandleManager::SharedPtr _manager;
+
+  public:
+    /**
+      Sets the ZkFuseHandleManager instance that should be invoked
+      to service events.
+     */
+    void setManager(const ZkFuseHandleManager::SharedPtr & manager) 
+    {
+        _manager = manager;
+    }
+    /**
+      Received an event and invoke ZkFuseHandleManager instance to handle
+      received event.
+     */
+    virtual void eventReceived(const ZKEventSource & source,
+                               const ZKWatcherEvent & event)
+    {
+        _manager->eventReceived(event);
+    }
+};
+
+void 
+usage(int argc, char *argv[])
+{
+    cout 
+        << argv[0] 
+        << " usage: " 
+        << argv[0] 
+        << " [args-and-values]+" << endl
+        << "nodepath == a complete path to a ZooKeeper node" << endl
+        << "\t--cachesize=<cachesize> or -c <cachesize>:" << endl
+        << "    number of ZooKeeper nodes to cache." << endl
+        << "\t--debug or -d: " << endl
+        << "\t  enable fuse debug mode." << endl
+        << "\t--help or -h: " << endl
+        << "\t  print this message." << endl
+        << "\t--mount=<mountpoint> or -m <mountpoint>: " << endl
+        << "\t  specifies where to mount the zkfuse filesystem." << endl
+        << "\t--name or -n: " << endl
+        << "\t  name of file for accessing node data." << endl
+        << "\t--zookeeper=<hostspec> or -z <hostspec>: " << endl
+        << "\t  specifies information needed to connect to zeekeeper." << endl;
+}
+
+int 
+main(int argc, char *argv[])
+{
+    /**
+     * Initialize log4cxx 
+     */
+    const String file("log4cxx.properties");
+    PropertyConfigurator::configureAndWatch( file, 5000 );
+    LOG_INFO(LOG, "Starting zkfuse");
+
+    /**
+     * Supported operations.
+     */
+    enum ZkOption {
+        ZkOptionCacheSize = 1000,
+        ZkOptionDebug = 1001,
+        ZkOptionForceDirSuffix = 1002,
+        ZkOptionHelp = 1003,
+        ZkOptionMount = 1004,
+        ZkOptionName = 1005,
+        ZkOptionZookeeper = 1006,
+        ZkOptionInvalid = -1
+    };
+    
+    static const char *shortOptions = "c:df:hm:n:z:";
+    static struct option longOptions[] = {
+        { "cachesize", 1, 0, ZkOptionCacheSize },
+        { "debug", 0, 0, ZkOptionDebug },
+        { "forcedirsuffix", 1, 0, ZkOptionForceDirSuffix },
+        { "help", 0, 0, ZkOptionHelp },
+        { "mount", 1, 0, ZkOptionMount },
+        { "name", 1, 0, ZkOptionName },
+        { "zookeeper", 1, 0, ZkOptionZookeeper },
+        { 0, 0, 0, 0 }
+    };
+    
+    /**
+     * Parse arguments 
+     */
+    bool debugFlag = false;
+    std::string mountPoint = "/tmp/zkfuse";
+    std::string nameOfFile = "_data_";
+    std::string forceDirSuffix = "._dir_";
+    std::string zkHost;
+    unsigned cacheSize = 256;
+
+    while (true) {
+        int c;
+
+        c = getopt_long(argc, argv, shortOptions, longOptions, 0);
+        if (c == -1) {
+            break;
+        }
+
+        switch (c) {
+          case ZkOptionInvalid:
+            cerr 
+                << argv[0]
+                << ": ERROR: Did not specify legal argument!"
+                << endl;
+            return 99;
+          case 'c':
+          case ZkOptionCacheSize:
+            cacheSize = strtoul(optarg, NULL, 0);
+            break;
+          case 'd':
+          case ZkOptionDebug:
+            debugFlag = true;
+            break;
+          case 'f':
+          case ZkOptionForceDirSuffix:
+            forceDirSuffix = optarg;
+            break;
+          case 'h':
+          case ZkOptionHelp: 
+            usage(argc, argv);
+            return 0;
+          case 'm':
+          case ZkOptionMount:
+            mountPoint = optarg;
+            break;
+          case 'n':
+          case ZkOptionName:
+            nameOfFile = optarg;
+            break;
+          case 'z':
+          case ZkOptionZookeeper:
+            zkHost = optarg;
+            break;
+        }
+    }
+
+    /**
+     * Check that zkHost has a value, otherwise abort.
+     */
+    if (zkHost.empty()) {
+        cerr 
+            << argv[0] 
+            << ": ERROR: " 
+            << "required argument \"--zookeeper <hostspec>\" was not given!"
+            << endl;
+        return 99;
+    }
+    /**
+     * Check that zkHost has a value, otherwise abort.
+     */
+    if (forceDirSuffix.empty()) {
+        cerr 
+            << argv[0] 
+            << ": ERROR: " 
+            << "required argument \"--forcedirsuffix <suffix>\" " 
+               "not cannot be empty!"
+            << endl;
+        return 99;
+    }
+    /**
+     * Check nameOfFile has no forward slash
+     */
+    if (nameOfFile.find_first_of('/') != std::string::npos) {
+        cerr 
+            << argv[0] 
+            << ": ERROR: " 
+            << "'/' present in name which is not allowed"
+            << endl;
+        return 99;
+    }
+
+    if (debugFlag) {
+        cout
+            << "cacheSize = " 
+            << cacheSize  
+            << ", debug = "
+            << debugFlag 
+            << ", forceDirSuffix = \""
+            << forceDirSuffix
+            << "\", mount = \""
+            << mountPoint
+            << "\", name = \""
+            << nameOfFile
+            << "\", zookeeper = \""
+            << zkHost
+            << "\", optind = "
+            << optind
+            << ", argc = "
+            << argc
+            << ", current arg = \""
+            << (optind >= argc ? "NULL" : argv[optind])
+            << "\""
+            << endl;
+    }
+
+    SessionEventListener listener;
+    SynchronousEventAdapter<ZKWatcherEvent> eventAdapter;
+    LOG_INFO(LOG, "Create ZK adapter");
+    try {
+        /**
+         * Create an instance of ZK adapter.
+         */
+        std::string h(zkHost);
+        ZooKeeperConfig config(h, 1000, true, 10000);
+        ZkFuseCommon zkFuseCommon;
+        ZooKeeperAdapterSharedPtr zkPtr(
+            new ZooKeeperAdapter(
+                config, 
+                &listener,
+                false
+                )
+            );
+        zkFuseCommon.setZkAdapter(zkPtr);
+        zkFuseCommon.setDataFileName(nameOfFile);
+        zkFuseCommon.setForceDirSuffix(forceDirSuffix);
+        zkFuseCommon.setCacheSize(cacheSize);
+        singletonZkFuseHandleManager =
+            ZkFuseHandleManagerFactory::create(zkFuseCommon);
+        listener.setManager(singletonZkFuseHandleManager);
+        zkPtr->reconnect();
+
+    } catch (const ZooKeeperException & e) {
+        cerr 
+            << argv[0]
+            << ": ERROR: ZookKeeperException caught: "
+            << e.what() 
+            << endl;
+    } catch (std::exception & e) {
+        cerr 
+            << argv[0]
+            << ": ERROR: std::exception caught: "
+            << e.what() 
+            << endl;
+    }
+
+#ifdef ZOOKEEPER_ROOT_CHILDREN_WATCH_BUG
+    cerr << "ZOOKEEPER_ROOT_CHILDREN_WATCH_BUG enabled" << endl;
+#endif 
+    /**
+     * Initialize fuse 
+     */
+    LOG_INFO(LOG, "Initialize fuse");
+    umask(0); 
+    fuse_operations zkfuse_oper; 
+    init_zkfuse_oper(zkfuse_oper); 
+    int fakeArgc = debugFlag ? 3 : 2;
+    char * fakeArgv[] = {
+        argv[0],
+        strdup(mountPoint.c_str()),
+        debugFlag ? strdup("-d") : NULL,
+        NULL
+    };
+    int res = fuse_main(fakeArgc, fakeArgv, &zkfuse_oper, NULL);
+    for (unsigned i = 1; i <= 2; i++) {
+        if (fakeArgv[i] != NULL) {
+            free(fakeArgv[i]);
+        }
+    }
+
+    return res;
+}