1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347 |
- <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
- <html>
- <head>
- <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
- <meta content="Apache Forrest" name="Generator">
- <meta name="Forrest-version" content="0.9">
- <meta name="Forrest-skin-name" content="pelt">
- <title>ZooKeeper Programmer's Guide</title>
- <link type="text/css" href="skin/basic.css" rel="stylesheet">
- <link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
- <link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
- <link type="text/css" href="skin/profile.css" rel="stylesheet">
- <script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
- <link rel="shortcut icon" href="images/favicon.ico">
- </head>
- <body onload="init()">
- <script type="text/javascript">ndeSetTextSize();</script>
- <div id="top">
- <!--+
- |breadtrail
- +-->
- <div class="breadtrail">
- <a href="http://www.apache.org/">Apache</a> > <a href="http://zookeeper.apache.org/">ZooKeeper</a> > <a href="http://zookeeper.apache.org/">ZooKeeper</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
- </div>
- <!--+
- |header
- +-->
- <div class="header">
- <!--+
- |start group logo
- +-->
- <div class="grouplogo">
- <a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
- </div>
- <!--+
- |end group logo
- +-->
- <!--+
- |start Project Logo
- +-->
- <div class="projectlogo">
- <a href="http://zookeeper.apache.org/"><img class="logoImage" alt="ZooKeeper" src="images/zookeeper_small.gif" title="ZooKeeper: distributed coordination"></a>
- </div>
- <!--+
- |end Project Logo
- +-->
- <!--+
- |start Search
- +-->
- <div class="searchbox">
- <form action="http://www.google.com/search" method="get" class="roundtopsmall">
- <input value="zookeeper.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">
- <input name="Search" value="Search" type="submit">
- </form>
- </div>
- <!--+
- |end search
- +-->
- <!--+
- |start Tabs
- +-->
- <ul id="tabs">
- <li>
- <a class="unselected" href="http://zookeeper.apache.org/">Project</a>
- </li>
- <li>
- <a class="unselected" href="https://cwiki.apache.org/confluence/display/ZOOKEEPER/">Wiki</a>
- </li>
- <li class="current">
- <a class="selected" href="index.html">ZooKeeper 3.4 Documentation</a>
- </li>
- </ul>
- <!--+
- |end Tabs
- +-->
- </div>
- </div>
- <div id="main">
- <div id="publishedStrip">
- <!--+
- |start Subtabs
- +-->
- <div id="level2tabs"></div>
- <!--+
- |end Endtabs
- +-->
- <script type="text/javascript"><!--
- document.write("Last Published: " + document.lastModified);
- // --></script>
- </div>
- <!--+
- |breadtrail
- +-->
- <div class="breadtrail">
-
- </div>
- <!--+
- |start Menu, mainarea
- +-->
- <!--+
- |start Menu
- +-->
- <div id="menu">
- <div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Overview</div>
- <div id="menu_1.1" class="menuitemgroup">
- <div class="menuitem">
- <a href="index.html">Welcome</a>
- </div>
- <div class="menuitem">
- <a href="zookeeperOver.html">Overview</a>
- </div>
- <div class="menuitem">
- <a href="zookeeperStarted.html">Getting Started</a>
- </div>
- <div class="menuitem">
- <a href="releasenotes.html">Release Notes</a>
- </div>
- </div>
- <div onclick="SwitchMenu('menu_selected_1.2', 'skin/')" id="menu_selected_1.2Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Developer</div>
- <div id="menu_selected_1.2" class="selectedmenuitemgroup" style="display: block;">
- <div class="menuitem">
- <a href="api/index.html">API Docs</a>
- </div>
- <div class="menupage">
- <div class="menupagetitle">Programmer's Guide</div>
- </div>
- <div class="menuitem">
- <a href="javaExample.html">Java Example</a>
- </div>
- <div class="menuitem">
- <a href="zookeeperTutorial.html">Barrier and Queue Tutorial</a>
- </div>
- <div class="menuitem">
- <a href="recipes.html">Recipes</a>
- </div>
- </div>
- <div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">Admin & Ops</div>
- <div id="menu_1.3" class="menuitemgroup">
- <div class="menuitem">
- <a href="zookeeperAdmin.html">Administrator's Guide</a>
- </div>
- <div class="menuitem">
- <a href="zookeeperQuotas.html">Quota Guide</a>
- </div>
- <div class="menuitem">
- <a href="zookeeperJMX.html">JMX</a>
- </div>
- <div class="menuitem">
- <a href="zookeeperObservers.html">Observers Guide</a>
- </div>
- <div class="menuitem">
- <a href="zookeeperReconfig.html">Dynamic Reconfiguration</a>
- </div>
- </div>
- <div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">Contributor</div>
- <div id="menu_1.4" class="menuitemgroup">
- <div class="menuitem">
- <a href="zookeeperInternals.html">ZooKeeper Internals</a>
- </div>
- </div>
- <div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Miscellaneous</div>
- <div id="menu_1.5" class="menuitemgroup">
- <div class="menuitem">
- <a href="https://cwiki.apache.org/confluence/display/ZOOKEEPER">Wiki</a>
- </div>
- <div class="menuitem">
- <a href="https://cwiki.apache.org/confluence/display/ZOOKEEPER/FAQ">FAQ</a>
- </div>
- <div class="menuitem">
- <a href="http://zookeeper.apache.org/mailing_lists.html">Mailing Lists</a>
- </div>
- </div>
- <div id="credit"></div>
- <div id="roundbottom">
- <img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
- <!--+
- |alternative credits
- +-->
- <div id="credit2"></div>
- </div>
- <!--+
- |end Menu
- +-->
- <!--+
- |start content
- +-->
- <div id="content">
- <div title="Portable Document Format" class="pdflink">
- <a class="dida" href="zookeeperProgrammers.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
- PDF</a>
- </div>
- <h1>ZooKeeper Programmer's Guide</h1>
- <h3>Developing Distributed Applications that use ZooKeeper</h3>
- <div id="front-matter">
- <div id="minitoc-area">
- <ul class="minitoc">
- <li>
- <a href="#_introduction">Introduction</a>
- </li>
- <li>
- <a href="#ch_zkDataModel">The ZooKeeper Data Model</a>
- <ul class="minitoc">
- <li>
- <a href="#sc_zkDataModel_znodes">ZNodes</a>
- <ul class="minitoc">
- <li>
- <a href="#sc_zkDataMode_watches">Watches</a>
- </li>
- <li>
- <a href="#Data+Access">Data Access</a>
- </li>
- <li>
- <a href="#Ephemeral+Nodes">Ephemeral Nodes</a>
- </li>
- <li>
- <a href="#Sequence+Nodes+--+Unique+Naming">Sequence Nodes -- Unique Naming</a>
- </li>
- </ul>
- </li>
- <li>
- <a href="#sc_timeInZk">Time in ZooKeeper</a>
- </li>
- <li>
- <a href="#sc_zkStatStructure">ZooKeeper Stat Structure</a>
- </li>
- </ul>
- </li>
- <li>
- <a href="#ch_zkSessions">ZooKeeper Sessions</a>
- </li>
- <li>
- <a href="#ch_zkWatches">ZooKeeper Watches</a>
- <ul class="minitoc">
- <li>
- <a href="#sc_WatchSemantics">Semantics of Watches</a>
- </li>
- <li>
- <a href="#sc_WatchRemoval">Remove Watches</a>
- </li>
- <li>
- <a href="#sc_WatchGuarantees">What ZooKeeper Guarantees about Watches</a>
- </li>
- <li>
- <a href="#sc_WatchRememberThese">Things to Remember about Watches</a>
- </li>
- </ul>
- </li>
- <li>
- <a href="#sc_ZooKeeperAccessControl">ZooKeeper access control using ACLs</a>
- <ul class="minitoc">
- <li>
- <a href="#sc_ACLPermissions">ACL Permissions</a>
- <ul class="minitoc">
- <li>
- <a href="#sc_BuiltinACLSchemes">Builtin ACL Schemes</a>
- </li>
- <li>
- <a href="#ZooKeeper+C+client+API">ZooKeeper C client API</a>
- </li>
- </ul>
- </li>
- </ul>
- </li>
- <li>
- <a href="#sc_ZooKeeperPluggableAuthentication">Pluggable ZooKeeper authentication</a>
- </li>
- <li>
- <a href="#ch_zkGuarantees">Consistency Guarantees</a>
- </li>
- <li>
- <a href="#ch_bindings">Bindings</a>
- <ul class="minitoc">
- <li>
- <a href="#Java+Binding">Java Binding</a>
- </li>
- <li>
- <a href="#C+Binding">C Binding</a>
- <ul class="minitoc">
- <li>
- <a href="#Installation">Installation</a>
- </li>
- <li>
- <a href="#Using+the+C+Client">Using the C Client</a>
- </li>
- </ul>
- </li>
- </ul>
- </li>
- <li>
- <a href="#ch_guideToZkOperations">Building Blocks: A Guide to ZooKeeper Operations</a>
- <ul class="minitoc">
- <li>
- <a href="#sc_errorsZk">Handling Errors</a>
- </li>
- <li>
- <a href="#sc_connectingToZk">Connecting to ZooKeeper</a>
- </li>
- <li>
- <a href="#sc_readOps">Read Operations</a>
- </li>
- <li>
- <a href="#sc_writeOps">Write Operations</a>
- </li>
- <li>
- <a href="#sc_handlingWatches">Handling Watches</a>
- </li>
- <li>
- <a href="#sc_miscOps">Miscelleaneous ZooKeeper Operations</a>
- </li>
- </ul>
- </li>
- <li>
- <a href="#ch_programStructureWithExample">Program Structure, with Simple Example</a>
- </li>
- <li>
- <a href="#ch_gotchas">Gotchas: Common Problems and Troubleshooting</a>
- </li>
- </ul>
- </div>
- </div>
-
-
-
-
- <a name="_introduction"></a>
- <h2 class="h3">Introduction</h2>
- <div class="section">
- <p>This document is a guide for developers wishing to create
- distributed applications that take advantage of ZooKeeper's coordination
- services. It contains conceptual and practical information.</p>
- <p>The first four sections of this guide present higher level
- discussions of various ZooKeeper concepts. These are necessary both for an
- understanding of how ZooKeeper works as well how to work with it. It does
- not contain source code, but it does assume a familiarity with the
- problems associated with distributed computing. The sections in this first
- group are:</p>
- <ul>
-
- <li>
-
- <p>
- <a href="#ch_zkDataModel">The ZooKeeper Data Model</a>
- </p>
-
- </li>
-
- <li>
-
- <p>
- <a href="#ch_zkSessions">ZooKeeper Sessions</a>
- </p>
-
- </li>
-
- <li>
-
- <p>
- <a href="#ch_zkWatches">ZooKeeper Watches</a>
- </p>
-
- </li>
-
- <li>
-
- <p>
- <a href="#ch_zkGuarantees">Consistency Guarantees</a>
- </p>
-
- </li>
-
- </ul>
- <p>The next four sections provide practical programming
- information. These are:</p>
- <ul>
-
- <li>
-
- <p>
- <a href="#ch_guideToZkOperations">Building Blocks: A Guide to ZooKeeper Operations</a>
- </p>
-
- </li>
-
- <li>
-
- <p>
- <a href="#ch_bindings">Bindings</a>
- </p>
-
- </li>
-
- <li>
-
- <p>
- <a href="#ch_programStructureWithExample">Program Structure, with Simple Example</a>
- <em>[tbd]</em>
- </p>
-
- </li>
-
- <li>
-
- <p>
- <a href="#ch_gotchas">Gotchas: Common Problems and Troubleshooting</a>
- </p>
-
- </li>
-
- </ul>
- <p>The book concludes with an <a href="#apx_linksToOtherInfo">appendix</a> containing links to other
- useful, ZooKeeper-related information.</p>
- <p>Most of information in this document is written to be accessible as
- stand-alone reference material. However, before starting your first
- ZooKeeper application, you should probably at least read the chaptes on
- the <a href="#ch_zkDataModel">ZooKeeper Data Model</a> and <a href="#ch_guideToZkOperations">ZooKeeper Basic Operations</a>. Also,
- the <a href="#ch_programStructureWithExample">Simple Programmming
- Example</a> <em>[tbd]</em> is helpful for understanding the basic
- structure of a ZooKeeper client application.</p>
- </div>
-
- <a name="ch_zkDataModel"></a>
- <h2 class="h3">The ZooKeeper Data Model</h2>
- <div class="section">
- <p>ZooKeeper has a hierarchal name space, much like a distributed file
- system. The only difference is that each node in the namespace can have
- data associated with it as well as children. It is like having a file
- system that allows a file to also be a directory. Paths to nodes are
- always expressed as canonical, absolute, slash-separated paths; there are
- no relative reference. Any unicode character can be used in a path subject
- to the following constraints:</p>
- <ul>
-
- <li>
-
- <p>The null character (\u0000) cannot be part of a path name. (This
- causes problems with the C binding.)</p>
-
- </li>
-
- <li>
-
- <p>The following characters can't be used because they don't
- display well, or render in confusing ways: \u0001 - \u001F and \u007F
- - \u009F.</p>
-
- </li>
-
- <li>
-
- <p>The following characters are not allowed: \ud800 - uF8FF,
- \uFFF0 - uFFFF.</p>
-
- </li>
-
- <li>
-
- <p>The "." character can be used as part of another name, but "."
- and ".." cannot alone be used to indicate a node along a path,
- because ZooKeeper doesn't use relative paths. The following would be
- invalid: "/a/b/./c" or "/a/b/../c".</p>
-
- </li>
-
- <li>
-
- <p>The token "zookeeper" is reserved.</p>
-
- </li>
-
- </ul>
- <a name="sc_zkDataModel_znodes"></a>
- <h3 class="h4">ZNodes</h3>
- <p>Every node in a ZooKeeper tree is referred to as a
- <em>znode</em>. Znodes maintain a stat structure that
- includes version numbers for data changes, acl changes. The stat
- structure also has timestamps. The version number, together with the
- timestamp, allows ZooKeeper to validate the cache and to coordinate
- updates. Each time a znode's data changes, the version number increases.
- For instance, whenever a client retrieves data, it also receives the
- version of the data. And when a client performs an update or a delete,
- it must supply the version of the data of the znode it is changing. If
- the version it supplies doesn't match the actual version of the data,
- the update will fail. (This behavior can be overridden. For more
- information see... )<em>[tbd...]</em>
- </p>
- <div class="note">
- <div class="label">Note</div>
- <div class="content">
-
- <p>In distributed application engineering, the word
- <em>node</em> can refer to a generic host machine, a
- server, a member of an ensemble, a client process, etc. In the ZooKeeper
- documentation, <em>znodes</em> refer to the data nodes.
- <em>Servers</em> refer to machines that make up the
- ZooKeeper service; <em>quorum peers</em> refer to the
- servers that make up an ensemble; client refers to any host or process
- which uses a ZooKeeper service.</p>
-
- </div>
- </div>
- <p>Znodes are the main enitity that a programmer access. They have
- several characteristics that are worth mentioning here.</p>
- <a name="sc_zkDataMode_watches"></a>
- <h4>Watches</h4>
- <p>Clients can set watches on znodes. Changes to that znode trigger
- the watch and then clear the watch. When a watch triggers, ZooKeeper
- sends the client a notification. More information about watches can be
- found in the section
- <a href="#ch_zkWatches">ZooKeeper Watches</a>.</p>
- <a name="Data+Access"></a>
- <h4>Data Access</h4>
- <p>The data stored at each znode in a namespace is read and written
- atomically. Reads get all the data bytes associated with a znode and a
- write replaces all the data. Each node has an Access Control List
- (ACL) that restricts who can do what.</p>
- <p>ZooKeeper was not designed to be a general database or large
- object store. Instead, it manages coordination data. This data can
- come in the form of configuration, status information, rendezvous, etc.
- A common property of the various forms of coordination data is that
- they are relatively small: measured in kilobytes.
- The ZooKeeper client and the server implementations have sanity checks
- to ensure that znodes have less than 1M of data, but the data should
- be much less than that on average. Operating on relatively large data
- sizes will cause some operations to take much more time than others and
- will affect the latencies of some operations because of the extra time
- needed to move more data over the network and onto storage media. If
- large data storage is needed, the usually pattern of dealing with such
- data is to store it on a bulk storage system, such as NFS or HDFS, and
- store pointers to the storage locations in ZooKeeper.</p>
- <a name="Ephemeral+Nodes"></a>
- <h4>Ephemeral Nodes</h4>
- <p>ZooKeeper also has the notion of ephemeral nodes. These znodes
- exists as long as the session that created the znode is active. When
- the session ends the znode is deleted. Because of this behavior
- ephemeral znodes are not allowed to have children.</p>
- <a name="Sequence+Nodes+--+Unique+Naming"></a>
- <h4>Sequence Nodes -- Unique Naming</h4>
- <p>When creating a znode you can also request that
- ZooKeeper append a monotonically increasing counter to the end
- of path. This counter is unique to the parent znode. The
- counter has a format of %010d -- that is 10 digits with 0
- (zero) padding (the counter is formatted in this way to
- simplify sorting), i.e. "<path>0000000001". See
- <a href="recipes.html#sc_recipes_Queues">Queue
- Recipe</a> for an example use of this feature. Note: the
- counter used to store the next sequence number is a signed int
- (4bytes) maintained by the parent node, the counter will
- overflow when incremented beyond 2147483647 (resulting in a
- name "<path>-2147483647").</p>
- <a name="sc_timeInZk"></a>
- <h3 class="h4">Time in ZooKeeper</h3>
- <p>ZooKeeper tracks time multiple ways:</p>
- <ul>
-
- <li>
-
- <p>
- <strong>Zxid</strong>
- </p>
-
- <p>Every change to the ZooKeeper state receives a stamp in the
- form of a <em>zxid</em> (ZooKeeper Transaction Id).
- This exposes the total ordering of all changes to ZooKeeper. Each
- change will have a unique zxid and if zxid1 is smaller than zxid2
- then zxid1 happened before zxid2.</p>
-
- </li>
-
- <li>
-
- <p>
- <strong>Version numbers</strong>
- </p>
-
- <p>Every change to a node will cause an increase to one of the
- version numbers of that node. The three version numbers are version
- (number of changes to the data of a znode), cversion (number of
- changes to the children of a znode), and aversion (number of changes
- to the ACL of a znode).</p>
-
- </li>
-
- <li>
-
- <p>
- <strong>Ticks</strong>
- </p>
-
- <p>When using multi-server ZooKeeper, servers use ticks to define
- timing of events such as status uploads, session timeouts,
- connection timeouts between peers, etc. The tick time is only
- indirectly exposed through the minimum session timeout (2 times the
- tick time); if a client requests a session timeout less than the
- minimum session timeout, the server will tell the client that the
- session timeout is actually the minimum session timeout.</p>
-
- </li>
-
- <li>
-
- <p>
- <strong>Real time</strong>
- </p>
-
- <p>ZooKeeper doesn't use real time, or clock time, at all except
- to put timestamps into the stat structure on znode creation and
- znode modification.</p>
-
- </li>
-
- </ul>
- <a name="sc_zkStatStructure"></a>
- <h3 class="h4">ZooKeeper Stat Structure</h3>
- <p>The Stat structure for each znode in ZooKeeper is made up of the
- following fields:</p>
- <ul>
-
- <li>
-
- <p>
- <strong>czxid</strong>
- </p>
-
- <p>The zxid of the change that caused this znode to be
- created.</p>
-
- </li>
-
- <li>
-
- <p>
- <strong>mzxid</strong>
- </p>
-
- <p>The zxid of the change that last modified this znode.</p>
-
- </li>
-
- <li>
-
- <p>
- <strong>ctime</strong>
- </p>
-
- <p>The time in milliseconds from epoch when this znode was
- created.</p>
-
- </li>
-
- <li>
-
- <p>
- <strong>mtime</strong>
- </p>
-
- <p>The time in milliseconds from epoch when this znode was last
- modified.</p>
-
- </li>
-
- <li>
-
- <p>
- <strong>version</strong>
- </p>
-
- <p>The number of changes to the data of this znode.</p>
-
- </li>
-
- <li>
-
- <p>
- <strong>cversion</strong>
- </p>
-
- <p>The number of changes to the children of this znode.</p>
-
- </li>
-
- <li>
-
- <p>
- <strong>aversion</strong>
- </p>
-
- <p>The number of changes to the ACL of this znode.</p>
-
- </li>
-
- <li>
-
- <p>
- <strong>ephemeralOwner</strong>
- </p>
-
- <p>The session id of the owner of this znode if the znode is an
- ephemeral node. If it is not an ephemeral node, it will be
- zero.</p>
-
- </li>
-
- <li>
-
- <p>
- <strong>dataLength</strong>
- </p>
-
- <p>The length of the data field of this znode.</p>
-
- </li>
-
- <li>
-
- <p>
- <strong>numChildren</strong>
- </p>
-
- <p>The number of children of this znode.</p>
-
- </li>
-
- </ul>
- </div>
-
- <a name="ch_zkSessions"></a>
- <h2 class="h3">ZooKeeper Sessions</h2>
- <div class="section">
- <p>A ZooKeeper client establishes a session with the ZooKeeper
- service by creating a handle to the service using a language
- binding. Once created, the handle starts of in the CONNECTING state
- and the client library tries to connect to one of the servers that
- make up the ZooKeeper service at which point it switches to the
- CONNECTED state. During normal operation will be in one of these
- two states. If an unrecoverable error occurs, such as session
- expiration or authentication failure, or if the application explicitly
- closes the handle, the handle will move to the CLOSED state.
- The following figure shows the possible state transitions of a
- ZooKeeper client:</p>
- <img alt="" src="images/state_dia.jpg"><p>To create a client session the application code must provide
- a connection string containing a comma separated list of host:port pairs,
- each corresponding to a ZooKeeper server (e.g. "127.0.0.1:4545" or
- "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002"). The ZooKeeper
- client library will pick an arbitrary server and try to connect to
- it. If this connection fails, or if the client becomes
- disconnected from the server for any reason, the client will
- automatically try the next server in the list, until a connection
- is (re-)established.</p>
- <p>
- <strong>Added in 3.2.0</strong>: An
- optional "chroot" suffix may also be appended to the connection
- string. This will run the client commands while interpreting all
- paths relative to this root (similar to the unix chroot
- command). If used the example would look like:
- "127.0.0.1:4545/app/a" or
- "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002/app/a" where the
- client would be rooted at "/app/a" and all paths would be relative
- to this root - ie getting/setting/etc... "/foo/bar" would result
- in operations being run on "/app/a/foo/bar" (from the server
- perspective). This feature is particularly useful in multi-tenant
- environments where each user of a particular ZooKeeper service
- could be rooted differently. This makes re-use much simpler as
- each user can code his/her application as if it were rooted at
- "/", while actual location (say /app/a) could be determined at
- deployment time.</p>
- <p>When a client gets a handle to the ZooKeeper service,
- ZooKeeper creates a ZooKeeper session, represented as a 64-bit
- number, that it assigns to the client. If the client connects to a
- different ZooKeeper server, it will send the session id as a part
- of the connection handshake. As a security measure, the server
- creates a password for the session id that any ZooKeeper server
- can validate.The password is sent to the client with the session
- id when the client establishes the session. The client sends this
- password with the session id whenever it reestablishes the session
- with a new server.</p>
- <p>One of the parameters to the ZooKeeper client library call
- to create a ZooKeeper session is the session timeout in
- milliseconds. The client sends a requested timeout, the server
- responds with the timeout that it can give the client. The current
- implementation requires that the timeout be a minimum of 2 times
- the tickTime (as set in the server configuration) and a maximum of
- 20 times the tickTime. The ZooKeeper client API allows access to
- the negotiated timeout.</p>
- <p>When a client (session) becomes partitioned from the ZK
- serving cluster it will begin searching the list of servers that
- were specified during session creation. Eventually, when
- connectivity between the client and at least one of the servers is
- re-established, the session will either again transition to the
- "connected" state (if reconnected within the session timeout
- value) or it will transition to the "expired" state (if
- reconnected after the session timeout). It is not advisable to
- create a new session object (a new ZooKeeper.class or zookeeper
- handle in the c binding) for disconnection. The ZK client library
- will handle reconnect for you. In particular we have heuristics
- built into the client library to handle things like "herd effect",
- etc... Only create a new session when you are notified of session
- expiration (mandatory).</p>
- <p>Session expiration is managed by the ZooKeeper cluster
- itself, not by the client. When the ZK client establishes a
- session with the cluster it provides a "timeout" value detailed
- above. This value is used by the cluster to determine when the
- client's session expires. Expirations happens when the cluster
- does not hear from the client within the specified session timeout
- period (i.e. no heartbeat). At session expiration the cluster will
- delete any/all ephemeral nodes owned by that session and
- immediately notify any/all connected clients of the change (anyone
- watching those znodes). At this point the client of the expired
- session is still disconnected from the cluster, it will not be
- notified of the session expiration until/unless it is able to
- re-establish a connection to the cluster. The client will stay in
- disconnected state until the TCP connection is re-established with
- the cluster, at which point the watcher of the expired session
- will receive the "session expired" notification.</p>
- <p>Example state transitions for an expired session as seen by
- the expired session's watcher:</p>
- <ol>
-
- <li>
- <p>'connected' : session is established and client
- is communicating with cluster (client/server communication is
- operating properly)</p>
- </li>
-
- <li>
- <p>.... client is partitioned from the
- cluster</p>
- </li>
-
- <li>
- <p>'disconnected' : client has lost connectivity
- with the cluster</p>
- </li>
-
- <li>
- <p>.... time elapses, after 'timeout' period the
- cluster expires the session, nothing is seen by client as it is
- disconnected from cluster</p>
- </li>
-
- <li>
- <p>.... time elapses, the client regains network
- level connectivity with the cluster</p>
- </li>
-
- <li>
- <p>'expired' : eventually the client reconnects to
- the cluster, it is then notified of the
- expiration</p>
- </li>
-
- </ol>
- <p>Another parameter to the ZooKeeper session establishment
- call is the default watcher. Watchers are notified when any state
- change occurs in the client. For example if the client loses
- connectivity to the server the client will be notified, or if the
- client's session expires, etc... This watcher should consider the
- initial state to be disconnected (i.e. before any state changes
- events are sent to the watcher by the client lib). In the case of
- a new connection, the first event sent to the watcher is typically
- the session connection event.</p>
- <p>The session is kept alive by requests sent by the client. If
- the session is idle for a period of time that would timeout the
- session, the client will send a PING request to keep the session
- alive. This PING request not only allows the ZooKeeper server to
- know that the client is still active, but it also allows the
- client to verify that its connection to the ZooKeeper server is
- still active. The timing of the PING is conservative enough to
- ensure reasonable time to detect a dead connection and reconnect
- to a new server.</p>
- <p>
- Once a connection to the server is successfully established
- (connected) there are basically two cases where the client lib generates
- connectionloss (the result code in c binding, exception in Java -- see
- the API documentation for binding specific details) when either a synchronous or
- asynchronous operation is performed and one of the following holds:
- </p>
- <ol>
-
- <li>
- <p>The application calls an operation on a session that is no
- longer alive/valid</p>
- </li>
-
- <li>
- <p>The ZooKeeper client disconnects from a server when there
- are pending operations to that server, i.e., there is a pending asynchronous call.
- </p>
- </li>
-
- </ol>
- <p>
- <strong>Added in 3.2.0 -- SessionMovedException</strong>. There is an internal
- exception that is generally not seen by clients called the SessionMovedException.
- This exception occurs because a request was received on a connection for a session
- which has been reestablished on a different server. The normal cause of this error is
- a client that sends a request to a server, but the network packet gets delayed, so
- the client times out and connects to a new server. When the delayed packet arrives at
- the first server, the old server detects that the session has moved, and closes the
- client connection. Clients normally do not see this error since they do not read
- from those old connections. (Old connections are usually closed.) One situation in which this
- condition can be seen is when two clients try to reestablish the same connection using
- a saved session id and password. One of the clients will reestablish the connection
- and the second client will be disconnected (causing the pair to attempt to re-establish
- its connection/session indefinitely).</p>
- <p>
- <strong>Updating the list of servers</strong>. We allow a client to
- update the connection string by providing a new comma separated list of host:port pairs,
- each corresponding to a ZooKeeper server. The function invokes a probabilistic load-balancing
- algorithm which may cause the client to disconnect from its current host with the goal
- to achieve expected uniform number of connections per server in the new list.
- In case the current host to which the client is connected is not in the new list
- this call will always cause the connection to be dropped. Otherwise, the decision
- is based on whether the number of servers has increased or decreased and by how much.
- </p>
- <p>
- For example, if the previous connection string contained 3 hosts and now the list contains
- these 3 hosts and 2 more hosts, 40% of clients connected to each of the 3 hosts will
- move to one of the new hosts in order to balance the load. The algorithm will cause the client
- to drop its connection to the current host to which it is connected with probability 0.4 and in this
- case cause the client to connect to one of the 2 new hosts, chosen at random.
- </p>
- <p>
- Another example -- suppose we have 5 hosts and now update the list to remove 2 of the hosts,
- the clients connected to the 3 remaining hosts will stay connected, whereas all clients connected
- to the 2 removed hosts will need to move to one of the 3 hosts, chosen at random. If the connection
- is dropped, the client moves to a special mode where he chooses a new server to connect to using the
- probabilistic algorithm, and not just round robin.
- </p>
- <p>
- In the first example, each client decides to disconnect with probability 0.4 but once the decision is
- made, it will try to connect to a random new server and only if it cannot connect to any of the new
- servers will it try to connect to the old ones. After finding a server, or trying all servers in the
- new list and failing to connect, the client moves back to the normal mode of operation where it picks
- an arbitrary server from the connectString and attempt to connect to it. If that fails, is will continue
- trying different random servers in round robin. (see above the algorithm used to initially choose a server)
- </p>
- </div>
-
- <a name="ch_zkWatches"></a>
- <h2 class="h3">ZooKeeper Watches</h2>
- <div class="section">
- <p>All of the read operations in ZooKeeper - <strong>getData()</strong>, <strong>getChildren()</strong>, and <strong>exists()</strong> - have the option of setting a watch as a
- side effect. Here is ZooKeeper's definition of a watch: a watch event is
- one-time trigger, sent to the client that set the watch, which occurs when
- the data for which the watch was set changes. There are three key points
- to consider in this definition of a watch:</p>
- <ul>
-
- <li>
-
- <p>
- <strong>One-time trigger</strong>
- </p>
-
- <p>One watch event will be sent to the client when the data has changed.
- For example, if a client does a getData("/znode1", true) and later the
- data for /znode1 is changed or deleted, the client will get a watch
- event for /znode1. If /znode1 changes again, no watch event will be
- sent unless the client has done another read that sets a new
- watch.</p>
-
- </li>
-
- <li>
-
- <p>
- <strong>Sent to the client</strong>
- </p>
-
- <p>This implies that an event is on the way to the client, but may
- not reach the client before the successful return code to the change
- operation reaches the client that initiated the change. Watches are
- sent asynchronously to watchers. ZooKeeper provides an ordering
- guarantee: a client will never see a change for which it has set a
- watch until it first sees the watch event. Network delays or other
- factors may cause different clients to see watches and return codes
- from updates at different times. The key point is that everything seen
- by the different clients will have a consistent order.</p>
-
- </li>
-
- <li>
-
- <p>
- <strong>The data for which the watch was
- set</strong>
- </p>
-
- <p>This refers to the different ways a node can change. It
- helps to think of ZooKeeper as maintaining two lists of
- watches: data watches and child watches. getData() and
- exists() set data watches. getChildren() sets child
- watches. Alternatively, it may help to think of watches being
- set according to the kind of data returned. getData() and
- exists() return information about the data of the node,
- whereas getChildren() returns a list of children. Thus,
- setData() will trigger data watches for the znode being set
- (assuming the set is successful). A successful create() will
- trigger a data watch for the znode being created and a child
- watch for the parent znode. A successful delete() will trigger
- both a data watch and a child watch (since there can be no
- more children) for a znode being deleted as well as a child
- watch for the parent znode.</p>
-
- </li>
-
- </ul>
- <p>Watches are maintained locally at the ZooKeeper server to which the
- client is connected. This allows watches to be lightweight to set,
- maintain, and dispatch. When a client connects to a new server, the watch
- will be triggered for any session events. Watches will not be received
- while disconnected from a server. When a client reconnects, any previously
- registered watches will be reregistered and triggered if needed. In
- general this all occurs transparently. There is one case where a watch
- may be missed: a watch for the existence of a znode not yet created will
- be missed if the znode is created and deleted while disconnected.</p>
- <a name="sc_WatchSemantics"></a>
- <h3 class="h4">Semantics of Watches</h3>
- <p> We can set watches with the three calls that read the state of
- ZooKeeper: exists, getData, and getChildren. The following list details
- the events that a watch can trigger and the calls that enable them:
- </p>
- <ul>
-
- <li>
-
- <p>
- <strong>Created event:</strong>
- </p>
-
- <p>Enabled with a call to exists.</p>
-
- </li>
-
-
- <li>
-
- <p>
- <strong>Deleted event:</strong>
- </p>
-
- <p>Enabled with a call to exists, getData, and getChildren.</p>
-
- </li>
-
-
- <li>
-
- <p>
- <strong>Changed event:</strong>
- </p>
-
- <p>Enabled with a call to exists and getData.</p>
-
- </li>
-
-
- <li>
-
- <p>
- <strong>Child event:</strong>
- </p>
-
- <p>Enabled with a call to getChildren.</p>
-
- </li>
-
- </ul>
- <a name="sc_WatchRemoval"></a>
- <h3 class="h4">Remove Watches</h3>
- <p>We can remove the watches registered on a znode with a call to
- removeWatches. Also, a ZooKeeper client can remove watches locally even
- if there is no server connection by setting the local flag to true. The
- following list details the events which will be triggered after the
- successful watch removal.
- </p>
- <ul>
-
- <li>
-
- <p>
- <strong>Child Remove event:</strong>
- </p>
-
- <p>Watcher which was added with a call to getChildren.</p>
-
- </li>
-
-
- <li>
-
- <p>
- <strong>Data Remove event:</strong>
- </p>
-
- <p>Watcher which was added with a call to exists or getData.</p>
-
- </li>
-
- </ul>
- <a name="sc_WatchGuarantees"></a>
- <h3 class="h4">What ZooKeeper Guarantees about Watches</h3>
- <p>With regard to watches, ZooKeeper maintains these
- guarantees:</p>
- <ul>
-
- <li>
-
- <p>Watches are ordered with respect to other events, other
- watches, and asynchronous replies. The ZooKeeper client libraries
- ensures that everything is dispatched in order.</p>
-
- </li>
-
- </ul>
- <ul>
-
- <li>
-
- <p>A client will see a watch event for a znode it is watching
- before seeing the new data that corresponds to that znode.</p>
-
- </li>
-
- </ul>
- <ul>
-
- <li>
-
- <p>The order of watch events from ZooKeeper corresponds to the
- order of the updates as seen by the ZooKeeper service.</p>
-
- </li>
-
- </ul>
- <a name="sc_WatchRememberThese"></a>
- <h3 class="h4">Things to Remember about Watches</h3>
- <ul>
-
- <li>
-
- <p>Watches are one time triggers; if you get a watch event and
- you want to get notified of future changes, you must set another
- watch.</p>
-
- </li>
-
- </ul>
- <ul>
-
- <li>
-
- <p>Because watches are one time triggers and there is latency
- between getting the event and sending a new request to get a watch
- you cannot reliably see every change that happens to a node in
- ZooKeeper. Be prepared to handle the case where the znode changes
- multiple times between getting the event and setting the watch
- again. (You may not care, but at least realize it may
- happen.)</p>
-
- </li>
-
- </ul>
- <ul>
-
- <li>
-
- <p>A watch object, or function/context pair, will only be
- triggered once for a given notification. For example, if the same
- watch object is registered for an exists and a getData call for the
- same file and that file is then deleted, the watch object would
- only be invoked once with the deletion notification for the file.
- </p>
-
- </li>
-
- </ul>
- <ul>
-
- <li>
-
- <p>When you disconnect from a server (for example, when the
- server fails), you will not get any watches until the connection
- is reestablished. For this reason session events are sent to all
- outstanding watch handlers. Use session events to go into a safe
- mode: you will not be receiving events while disconnected, so your
- process should act conservatively in that mode.</p>
-
- </li>
-
- </ul>
- </div>
-
- <a name="sc_ZooKeeperAccessControl"></a>
- <h2 class="h3">ZooKeeper access control using ACLs</h2>
- <div class="section">
- <p>ZooKeeper uses ACLs to control access to its znodes (the
- data nodes of a ZooKeeper data tree). The ACL implementation is
- quite similar to UNIX file access permissions: it employs
- permission bits to allow/disallow various operations against a
- node and the scope to which the bits apply. Unlike standard UNIX
- permissions, a ZooKeeper node is not limited by the three standard
- scopes for user (owner of the file), group, and world
- (other). ZooKeeper does not have a notion of an owner of a
- znode. Instead, an ACL specifies sets of ids and permissions that
- are associated with those ids.</p>
- <p>Note also that an ACL pertains only to a specific znode. In
- particular it does not apply to children. For example, if
- <em>/app</em> is only readable by ip:172.16.16.1 and
- <em>/app/status</em> is world readable, anyone will
- be able to read <em>/app/status</em>; ACLs are not
- recursive.</p>
- <p>ZooKeeper supports pluggable authentication schemes. Ids are
- specified using the form <em>scheme:id</em>,
- where <em>scheme</em> is a the authentication scheme
- that the id corresponds to. For
- example, <em>ip:172.16.16.1</em> is an id for a
- host with the address <em>172.16.16.1</em>.</p>
- <p>When a client connects to ZooKeeper and authenticates
- itself, ZooKeeper associates all the ids that correspond to a
- client with the clients connection. These ids are checked against
- the ACLs of znodes when a clients tries to access a node. ACLs are
- made up of pairs of <em>(scheme:expression,
- perms)</em>. The format of
- the <em>expression</em> is specific to the scheme. For
- example, the pair <em>(ip:19.22.0.0/16, READ)</em>
- gives the <em>READ</em> permission to any clients with
- an IP address that starts with 19.22.</p>
- <a name="sc_ACLPermissions"></a>
- <h3 class="h4">ACL Permissions</h3>
- <p>ZooKeeper supports the following permissions:</p>
- <ul>
-
- <li>
- <p>
- <strong>CREATE</strong>: you can create a child node</p>
- </li>
-
- <li>
- <p>
- <strong>READ</strong>: you can get data from a node and list its children.</p>
- </li>
-
- <li>
- <p>
- <strong>WRITE</strong>: you can set data for a node</p>
- </li>
-
- <li>
- <p>
- <strong>DELETE</strong>: you can delete a child node</p>
- </li>
-
- <li>
- <p>
- <strong>ADMIN</strong>: you can set permissions</p>
- </li>
-
- </ul>
- <p>The <em>CREATE</em>
- and <em>DELETE</em> permissions have been broken out
- of the <em>WRITE</em> permission for finer grained
- access controls. The cases for <em>CREATE</em>
- and <em>DELETE</em> are the following:</p>
- <p>You want A to be able to do a set on a ZooKeeper node, but
- not be able to <em>CREATE</em>
- or <em>DELETE</em> children.</p>
- <p>
- <em>CREATE</em>
- without <em>DELETE</em>: clients create requests by
- creating ZooKeeper nodes in a parent directory. You want all
- clients to be able to add, but only request processor can
- delete. (This is kind of like the APPEND permission for
- files.)</p>
- <p>Also, the <em>ADMIN</em> permission is there
- since ZooKeeper doesn’t have a notion of file owner. In some
- sense the <em>ADMIN</em> permission designates the
- entity as the owner. ZooKeeper doesn’t support the LOOKUP
- permission (execute permission bit on directories to allow you
- to LOOKUP even though you can't list the directory). Everyone
- implicitly has LOOKUP permission. This allows you to stat a
- node, but nothing more. (The problem is, if you want to call
- zoo_exists() on a node that doesn't exist, there is no
- permission to check.)</p>
- <a name="sc_BuiltinACLSchemes"></a>
- <h4>Builtin ACL Schemes</h4>
- <p>ZooKeeeper has the following built in schemes:</p>
- <ul>
-
- <li>
- <p>
- <strong>world</strong> has a
- single id, <em>anyone</em>, that represents
- anyone.</p>
- </li>
-
- <li>
- <p>
- <strong>auth</strong> doesn't
- use any id, represents any authenticated
- user.</p>
- </li>
-
- <li>
- <p>
- <strong>digest</strong> uses
- a <em>username:password</em> string to generate
- MD5 hash which is then used as an ACL ID
- identity. Authentication is done by sending
- the <em>username:password</em> in clear text. When
- used in the ACL the expression will be
- the <em>username:base64</em>
- encoded <em>SHA1</em>
- password <em>digest</em>.</p>
-
- </li>
-
- <li>
- <p>
- <strong>ip</strong> uses the
- client host IP as an ACL ID identity. The ACL expression is of
- the form <em>addr/bits</em> where the most
- significant <em>bits</em>
- of <em>addr</em> are matched against the most
- significant <em>bits</em> of the client host
- IP.</p>
- </li>
-
- </ul>
- <a name="ZooKeeper+C+client+API"></a>
- <h4>ZooKeeper C client API</h4>
- <p>The following constants are provided by the ZooKeeper C
- library:</p>
- <ul>
-
- <li>
- <p>
- <em>const</em> <em>int</em> ZOO_PERM_READ; //can read node’s value and list its children</p>
- </li>
-
- <li>
- <p>
- <em>const</em> <em>int</em> ZOO_PERM_WRITE;// can set the node’s value</p>
- </li>
-
- <li>
- <p>
- <em>const</em> <em>int</em> ZOO_PERM_CREATE; //can create children</p>
- </li>
-
- <li>
- <p>
- <em>const</em> <em>int</em> ZOO_PERM_DELETE;// can delete children</p>
- </li>
-
- <li>
- <p>
- <em>const</em> <em>int</em> ZOO_PERM_ADMIN; //can execute set_acl()</p>
- </li>
-
- <li>
- <p>
- <em>const</em> <em>int</em> ZOO_PERM_ALL;// all of the above flags OR’d together</p>
- </li>
-
- </ul>
- <p>The following are the standard ACL IDs:</p>
- <ul>
-
- <li>
- <p>
- <em>struct</em> Id ZOO_ANYONE_ID_UNSAFE; //(‘world’,’anyone’)</p>
- </li>
-
- <li>
- <p>
- <em>struct</em> Id ZOO_AUTH_IDS;// (‘auth’,’’)</p>
- </li>
-
- </ul>
- <p>ZOO_AUTH_IDS empty identity string should be interpreted as “the identity of the creator”.</p>
- <p>ZooKeeper client comes with three standard ACLs:</p>
- <ul>
-
- <li>
- <p>
- <em>struct</em> ACL_vector ZOO_OPEN_ACL_UNSAFE; //(ZOO_PERM_ALL,ZOO_ANYONE_ID_UNSAFE)</p>
- </li>
-
- <li>
- <p>
- <em>struct</em> ACL_vector ZOO_READ_ACL_UNSAFE;// (ZOO_PERM_READ, ZOO_ANYONE_ID_UNSAFE)</p>
- </li>
-
- <li>
- <p>
- <em>struct</em> ACL_vector ZOO_CREATOR_ALL_ACL; //(ZOO_PERM_ALL,ZOO_AUTH_IDS)</p>
- </li>
-
- </ul>
- <p>The ZOO_OPEN_ACL_UNSAFE is completely open free for all
- ACL: any application can execute any operation on the node and
- can create, list and delete its children. The
- ZOO_READ_ACL_UNSAFE is read-only access for any
- application. CREATE_ALL_ACL grants all permissions to the
- creator of the node. The creator must have been authenticated by
- the server (for example, using “<em>digest</em>”
- scheme) before it can create nodes with this ACL.</p>
- <p>The following ZooKeeper operations deal with ACLs:</p>
- <ul>
- <li>
-
- <p>
- <em>int</em> <em>zoo_add_auth</em>
- (zhandle_t *zh,<em>const</em> <em>char</em>*
- scheme,<em>const</em> <em>char</em>*
- cert, <em>int</em> certLen, void_completion_t
- completion, <em>const</em> <em>void</em>
- *data);</p>
-
- </li>
- </ul>
- <p>The application uses the zoo_add_auth function to
- authenticate itself to the server. The function can be called
- multiple times if the application wants to authenticate using
- different schemes and/or identities.</p>
- <ul>
- <li>
-
- <p>
- <em>int</em> <em>zoo_create</em>
- (zhandle_t *zh, <em>const</em> <em>char</em>
- *path, <em>const</em> <em>char</em>
- *value,<em>int</em>
- valuelen, <em>const</em> <em>struct</em>
- ACL_vector *acl, <em>int</em>
- flags,<em>char</em>
- *realpath, <em>int</em>
- max_realpath_len);</p>
-
- </li>
- </ul>
- <p>zoo_create(...) operation creates a new node. The acl
- parameter is a list of ACLs associated with the node. The parent
- node must have the CREATE permission bit set.</p>
- <ul>
- <li>
-
- <p>
- <em>int</em> <em>zoo_get_acl</em>
- (zhandle_t *zh, <em>const</em> <em>char</em>
- *path,<em>struct</em> ACL_vector
- *acl, <em>struct</em> Stat *stat);</p>
-
- </li>
- </ul>
- <p>This operation returns a node’s ACL info.</p>
- <ul>
- <li>
-
- <p>
- <em>int</em> <em>zoo_set_acl</em>
- (zhandle_t *zh, <em>const</em> <em>char</em>
- *path, <em>int</em>
- version,<em>const</em> <em>struct</em>
- ACL_vector *acl);</p>
-
- </li>
- </ul>
- <p>This function replaces node’s ACL list with a new one. The
- node must have the ADMIN permission set.</p>
- <p>Here is a sample code that makes use of the above APIs to
- authenticate itself using the “<em>foo</em>” scheme
- and create an ephemeral node “/xyz” with create-only
- permissions.</p>
- <div class="note">
- <div class="label">Note</div>
- <div class="content">
- <p>This is a very simple example which is intended to show
- how to interact with ZooKeeper ACLs
- specifically. See <span class="codefrag filename">.../trunk/src/c/src/cli.c</span>
- for an example of a proper C client implementation</p>
-
- </div>
- </div>
- <pre class="code">
- #include <string.h>
- #include <errno.h>
- #include "zookeeper.h"
- static zhandle_t *zh;
- /**
- * In this example this method gets the cert for your
- * environment -- you must provide
- */
- char *foo_get_cert_once(char* id) { return 0; }
- /** Watcher function -- empty for this example, not something you should
- * do in real code */
- void watcher(zhandle_t *zzh, int type, int state, const char *path,
- void *watcherCtx) {}
- int main(int argc, char argv) {
- char buffer[512];
- char p[2048];
- char *cert=0;
- char appId[64];
- strcpy(appId, "example.foo_test");
- cert = foo_get_cert_once(appId);
- if(cert!=0) {
- fprintf(stderr,
- "Certificate for appid [%s] is [%s]\n",appId,cert);
- strncpy(p,cert, sizeof(p)-1);
- free(cert);
- } else {
- fprintf(stderr, "Certificate for appid [%s] not found\n",appId);
- strcpy(p, "dummy");
- }
- zoo_set_debug_level(ZOO_LOG_LEVEL_DEBUG);
- zh = zookeeper_init("localhost:3181", watcher, 10000, 0, 0, 0);
- if (!zh) {
- return errno;
- }
- if(zoo_add_auth(zh,"foo",p,strlen(p),0,0)!=ZOK)
- return 2;
- struct ACL CREATE_ONLY_ACL[] = {{ZOO_PERM_CREATE, ZOO_AUTH_IDS}};
- struct ACL_vector CREATE_ONLY = {1, CREATE_ONLY_ACL};
- int rc = zoo_create(zh,"/xyz","value", 5, &CREATE_ONLY, ZOO_EPHEMERAL,
- buffer, sizeof(buffer)-1);
- /** this operation will fail with a ZNOAUTH error */
- int buflen= sizeof(buffer);
- struct Stat stat;
- rc = zoo_get(zh, "/xyz", 0, buffer, &buflen, &stat);
- if (rc) {
- fprintf(stderr, "Error %d for %s\n", rc, __LINE__);
- }
- zookeeper_close(zh);
- return 0;
- }
- </pre>
- </div>
-
- <a name="sc_ZooKeeperPluggableAuthentication"></a>
- <h2 class="h3">Pluggable ZooKeeper authentication</h2>
- <div class="section">
- <p>ZooKeeper runs in a variety of different environments with
- various different authentication schemes, so it has a completely
- pluggable authentication framework. Even the builtin authentication
- schemes use the pluggable authentication framework.</p>
- <p>To understand how the authentication framework works, first you must
- understand the two main authentication operations. The framework
- first must authenticate the client. This is usually done as soon as
- the client connects to a server and consists of validating information
- sent from or gathered about a client and associating it with the connection.
- The second operation handled by the framework is finding the entries in an
- ACL that correspond to client. ACL entries are <<em>idspec,
- permissions</em>> pairs. The <em>idspec</em> may be
- a simple string match against the authentication information associated
- with the connection or it may be a expression that is evaluated against that
- information. It is up to the implementation of the authentication plugin
- to do the match. Here is the interface that an authentication plugin must
- implement:</p>
- <pre class="code">
- public interface AuthenticationProvider {
- String getScheme();
- KeeperException.Code handleAuthentication(ServerCnxn cnxn, byte authData[]);
- boolean isValid(String id);
- boolean matches(String id, String aclExpr);
- boolean isAuthenticated();
- }
- </pre>
- <p>The first method <em>getScheme</em> returns the string
- that identifies the plugin. Because we support multiple methods of authentication,
- an authentication credential or an <em>idspec</em> will always be
- prefixed with <em>scheme:</em>. The ZooKeeper server uses the scheme
- returned by the authentication plugin to determine which ids the scheme
- applies to.</p>
- <p>
- <em>handleAuthentication</em> is called when a client
- sends authentication information to be associated with a connection. The
- client specifies the scheme to which the information corresponds. The
- ZooKeeper server passes the information to the authentication plugin whose
- <em>getScheme</em> matches the scheme passed by the client. The
- implementor of <em>handleAuthentication</em> will usually return
- an error if it determines that the information is bad, or it will associate information
- with the connection using <em>cnxn.getAuthInfo().add(new Id(getScheme(), data))</em>.
- </p>
- <p>The authentication plugin is involved in both setting and using ACLs. When an
- ACL is set for a znode, the ZooKeeper server will pass the id part of the entry to
- the <em>isValid(String id)</em> method. It is up to the plugin to verify
- that the id has a correct form. For example, <em>ip:172.16.0.0/16</em>
- is a valid id, but <em>ip:host.com</em> is not. If the new ACL includes
- an "auth" entry, <em>isAuthenticated</em> is used to see if the
- authentication information for this scheme that is assocatied with the connection
- should be added to the ACL. Some schemes
- should not be included in auth. For example, the IP address of the client is not
- considered as an id that should be added to the ACL if auth is specified.</p>
- <p>ZooKeeper invokes
- <em>matches(String id, String aclExpr)</em> when checking an ACL. It
- needs to match authentication information of the client against the relevant ACL
- entries. To find the entries which apply to the client, the ZooKeeper server will
- find the scheme of each entry and if there is authentication information
- from that client for that scheme, <em>matches(String id, String aclExpr)</em>
- will be called with <em>id</em> set to the authentication information
- that was previously added to the connection by <em>handleAuthentication</em> and
- <em>aclExpr</em> set to the id of the ACL entry. The authentication plugin
- uses its own logic and matching scheme to determine if <em>id</em> is included
- in <em>aclExpr</em>.
- </p>
- <p>There are two built in authentication plugins: <em>ip</em> and
- <em>digest</em>. Additional plugins can adding using system properties. At
- startup the ZooKeeper server will look for system properties that start with
- "zookeeper.authProvider." and interpret the value of those properties as the class name
- of an authentication plugin. These properties can be set using the
- <em>-Dzookeeeper.authProvider.X=com.f.MyAuth</em> or adding entries such as
- the following in the server configuration file:</p>
- <pre class="code">
- authProvider.1=com.f.MyAuth
- authProvider.2=com.f.MyAuth2
- </pre>
- <p>Care should be taking to ensure that the suffix on the property is unique. If there are
- duplicates such as <em>-Dzookeeeper.authProvider.X=com.f.MyAuth -Dzookeeper.authProvider.X=com.f.MyAuth2</em>,
- only one will be used. Also all servers must have the same plugins defined, otherwise clients using
- the authentication schemes provided by the plugins will have problems connecting to some servers.
- </p>
- </div>
-
-
- <a name="ch_zkGuarantees"></a>
- <h2 class="h3">Consistency Guarantees</h2>
- <div class="section">
- <p>ZooKeeper is a high performance, scalable service. Both reads and
- write operations are designed to be fast, though reads are faster than
- writes. The reason for this is that in the case of reads, ZooKeeper can
- serve older data, which in turn is due to ZooKeeper's consistency
- guarantees:</p>
- <dl>
-
- <dt>
- <term>Sequential Consistency</term>
- </dt>
- <dd>
- <p>Updates from a client will be applied in the order that they
- were sent.</p>
- </dd>
-
- <dt>
- <term>Atomicity</term>
- </dt>
- <dd>
- <p>Updates either succeed or fail -- there are no partial
- results.</p>
- </dd>
-
- <dt>
- <term>Single System Image</term>
- </dt>
- <dd>
- <p>A client will see the same view of the service regardless of
- the server that it connects to.</p>
- </dd>
-
- <dt>
- <term>Reliability</term>
- </dt>
- <dd>
- <p>Once an update has been applied, it will persist from that
- time forward until a client overwrites the update. This guarantee
- has two corollaries:</p>
- <ol>
-
- <li>
-
- <p>If a client gets a successful return code, the update will
- have been applied. On some failures (communication errors,
- timeouts, etc) the client will not know if the update has
- applied or not. We take steps to minimize the failures, but the
- guarantee is only present with successful return codes.
- (This is called the <em>monotonicity condition</em> in Paxos.)</p>
-
- </li>
-
- <li>
-
- <p>Any updates that are seen by the client, through a read
- request or successful update, will never be rolled back when
- recovering from server failures.</p>
-
- </li>
-
- </ol>
- </dd>
-
- <dt>
- <term>Timeliness</term>
- </dt>
- <dd>
- <p>The clients view of the system is guaranteed to be up-to-date
- within a certain time bound (on the order of tens of seconds).
- Either system changes will be seen by a client within this bound, or
- the client will detect a service outage.</p>
- </dd>
-
- </dl>
- <p>Using these consistency guarantees it is easy to build higher level
- functions such as leader election, barriers, queues, and read/write
- revocable locks solely at the ZooKeeper client (no additions needed to
- ZooKeeper). See <a href="recipes.html">Recipes and Solutions</a>
- for more details.</p>
- <div class="note">
- <div class="label">Note</div>
- <div class="content">
-
- <p>Sometimes developers mistakenly assume one other guarantee that
- ZooKeeper does <em>not</em> in fact make. This is:</p>
-
- <dl>
-
- <dt>
- <term>Simultaneously Consistent Cross-Client Views</term>
- </dt>
- <dd>
- <p>ZooKeeper does not guarantee that at every instance in
- time, two different clients will have identical views of
- ZooKeeper data. Due to factors like network delays, one client
- may perform an update before another client gets notified of the
- change. Consider the scenario of two clients, A and B. If client
- A sets the value of a znode /a from 0 to 1, then tells client B
- to read /a, client B may read the old value of 0, depending on
- which server it is connected to. If it
- is important that Client A and Client B read the same value,
- Client B should should call the <strong>sync()</strong> method from the ZooKeeper API
- method before it performs its read.</p>
- <p>So, ZooKeeper by itself doesn't guarantee that changes occur
- synchronously across all servers, but ZooKeeper
- primitives can be used to construct higher level functions that
- provide useful client synchronization. (For more information,
- see the <a href="recipes.html">ZooKeeper Recipes</a>.
- <em>[tbd:..]</em>).</p>
- </dd>
-
- </dl>
-
- </div>
- </div>
- </div>
-
- <a name="ch_bindings"></a>
- <h2 class="h3">Bindings</h2>
- <div class="section">
- <p>The ZooKeeper client libraries come in two languages: Java and C.
- The following sections describe these.</p>
- <a name="Java+Binding"></a>
- <h3 class="h4">Java Binding</h3>
- <p>There are two packages that make up the ZooKeeper Java binding:
- <strong>org.apache.zookeeper</strong> and <strong>org.apache.zookeeper.data</strong>. The rest of the
- packages that make up ZooKeeper are used internally or are part of the
- server implementation. The <strong>org.apache.zookeeper.data</strong> package is made up of
- generated classes that are used simply as containers.</p>
- <p>The main class used by a ZooKeeper Java client is the <strong>ZooKeeper</strong> class. Its two constructors differ only
- by an optional session id and password. ZooKeeper supports session
- recovery accross instances of a process. A Java program may save its
- session id and password to stable storage, restart, and recover the
- session that was used by the earlier instance of the program.</p>
- <p>When a ZooKeeper object is created, two threads are created as
- well: an IO thread and an event thread. All IO happens on the IO thread
- (using Java NIO). All event callbacks happen on the event thread.
- Session maintenance such as reconnecting to ZooKeeper servers and
- maintaining heartbeat is done on the IO thread. Responses for
- synchronous methods are also processed in the IO thread. All responses
- to asynchronous methods and watch events are processed on the event
- thread. There are a few things to notice that result from this
- design:</p>
- <ul>
-
- <li>
-
- <p>All completions for asynchronous calls and watcher callbacks
- will be made in order, one at a time. The caller can do any
- processing they wish, but no other callbacks will be processed
- during that time.</p>
-
- </li>
-
- <li>
-
- <p>Callbacks do not block the processing of the IO thread or the
- processing of the synchronous calls.</p>
-
- </li>
-
- <li>
-
- <p>Synchronous calls may not return in the correct order. For
- example, assume a client does the following processing: issues an
- asynchronous read of node <strong>/a</strong> with
- <em>watch</em> set to true, and then in the completion
- callback of the read it does a synchronous read of <strong>/a</strong>. (Maybe not good practice, but not illegal
- either, and it makes for a simple example.)</p>
-
- <p>Note that if there is a change to <strong>/a</strong> between the asynchronous read and the
- synchronous read, the client library will receive the watch event
- saying <strong>/a</strong> changed before the
- response for the synchronous read, but because the completion
- callback is blocking the event queue, the synchronous read will
- return with the new value of <strong>/a</strong>
- before the watch event is processed.</p>
-
- </li>
-
- </ul>
- <p>Finally, the rules associated with shutdown are straightforward:
- once a ZooKeeper object is closed or receives a fatal event
- (SESSION_EXPIRED and AUTH_FAILED), the ZooKeeper object becomes invalid.
- On a close, the two threads shut down and any further access on zookeeper
- handle is undefined behavior and should be avoided. </p>
- <a name="C+Binding"></a>
- <h3 class="h4">C Binding</h3>
- <p>The C binding has a single-threaded and multi-threaded library.
- The multi-threaded library is easiest to use and is most similar to the
- Java API. This library will create an IO thread and an event dispatch
- thread for handling connection maintenance and callbacks. The
- single-threaded library allows ZooKeeper to be used in event driven
- applications by exposing the event loop used in the multi-threaded
- library.</p>
- <p>The package includes two shared libraries: zookeeper_st and
- zookeeper_mt. The former only provides the asynchronous APIs and
- callbacks for integrating into the application's event loop. The only
- reason this library exists is to support the platforms were a
- <em>pthread</em> library is not available or is unstable
- (i.e. FreeBSD 4.x). In all other cases, application developers should
- link with zookeeper_mt, as it includes support for both Sync and Async
- API.</p>
- <a name="Installation"></a>
- <h4>Installation</h4>
- <p>If you're building the client from a check-out from the Apache
- repository, follow the steps outlined below. If you're building from a
- project source package downloaded from apache, skip to step <strong>3</strong>.</p>
- <ol>
-
- <li>
-
- <p>Run <span class="codefrag command">ant compile_jute</span> from the ZooKeeper
- top level directory (<span class="codefrag filename">.../trunk</span>).
- This will create a directory named "generated" under
- <span class="codefrag filename">.../trunk/src/c</span>.</p>
-
- </li>
-
- <li>
-
- <p>Change directory to the<span class="codefrag filename">.../trunk/src/c</span>
- and run <span class="codefrag command">autoreconf -if</span> to bootstrap <strong>autoconf</strong>, <strong>automake</strong> and <strong>libtool</strong>. Make sure you have <strong>autoconf version 2.59</strong> or greater installed.
- Skip to step<strong> 4</strong>.</p>
-
- </li>
-
- <li>
-
- <p>If you are building from a project source package,
- unzip/untar the source tarball and cd to the<span class="codefrag filename">
- zookeeper-x.x.x/src/c</span> directory.</p>
-
- </li>
-
- <li>
-
- <p>Run <span class="codefrag command">./configure <your-options></span> to
- generate the makefile. Here are some of options the <strong>configure</strong> utility supports that can be
- useful in this step:</p>
-
- <ul>
-
- <li>
-
- <p>
- <span class="codefrag command">--enable-debug</span>
- </p>
-
- <p>Enables optimization and enables debug info compiler
- options. (Disabled by default.)</p>
-
- </li>
-
- <li>
-
- <p>
- <span class="codefrag command">--without-syncapi </span>
- </p>
-
- <p>Disables Sync API support; zookeeper_mt library won't be
- built. (Enabled by default.)</p>
-
- </li>
-
- <li>
-
- <p>
- <span class="codefrag command">--disable-static </span>
- </p>
-
- <p>Do not build static libraries. (Enabled by
- default.)</p>
-
- </li>
-
- <li>
-
- <p>
- <span class="codefrag command">--disable-shared</span>
- </p>
-
- <p>Do not build shared libraries. (Enabled by
- default.)</p>
-
- </li>
-
- </ul>
-
- <div class="note">
- <div class="label">Note</div>
- <div class="content">
-
- <p>See INSTALL for general information about running
- <strong>configure</strong>.</p>
-
- </div>
- </div>
-
- </li>
-
- <li>
-
- <p>Run <span class="codefrag command">make</span> or <span class="codefrag command">make
- install</span> to build the libraries and install them.</p>
-
- </li>
-
- <li>
-
- <p>To generate doxygen documentation for the ZooKeeper API, run
- <span class="codefrag command">make doxygen-doc</span>. All documentation will be
- placed in a new subfolder named docs. By default, this command
- only generates HTML. For information on other document formats,
- run <span class="codefrag command">./configure --help</span>
- </p>
-
- </li>
-
- </ol>
- <a name="Using+the+C+Client"></a>
- <h4>Using the C Client</h4>
- <p>You can test your client by running a ZooKeeper server (see
- instructions on the project wiki page on how to run it) and connecting
- to it using one of the cli applications that were built as part of the
- installation procedure. cli_mt (multithreaded, built against
- zookeeper_mt library) is shown in this example, but you could also use
- cli_st (singlethreaded, built against zookeeper_st library):</p>
- <p>
- <span class="codefrag command">$ cli_mt zookeeper_host:9876</span>
- </p>
- <p>This is a client application that gives you a shell for
- executing simple ZooKeeper commands. Once successfully started
- and connected to the server it displays a shell prompt. You
- can now enter ZooKeeper commands. For example, to create a
- node:</p>
- <p>
- <span class="codefrag command">> create /my_new_node</span>
- </p>
- <p>To verify that the node's been created:</p>
- <p>
- <span class="codefrag command">> ls /</span>
- </p>
- <p>You should see a list of node who are children of the root node
- "/".</p>
- <p>In order to be able to use the ZooKeeper API in your application
- you have to remember to</p>
- <ol>
-
- <li>
-
- <p>Include ZooKeeper header: #include
- <zookeeper/zookeeper.h></p>
-
- </li>
-
- <li>
-
- <p>If you are building a multithreaded client, compile with
- -DTHREADED compiler flag to enable the multi-threaded version of
- the library, and then link against against the
- <em>zookeeper_mt</em> library. If you are building a
- single-threaded client, do not compile with -DTHREADED, and be
- sure to link against the<em> zookeeper_st
- </em>library.</p>
-
- </li>
-
- </ol>
- <p>Refer to <a href="#ch_programStructureWithExample">Program Structure, with Simple Example</a>
- for examples of usage in Java and C.
- <em>[tbd]</em>
-
- </p>
- </div>
-
- <a name="ch_guideToZkOperations"></a>
- <h2 class="h3">Building Blocks: A Guide to ZooKeeper Operations</h2>
- <div class="section">
- <p>This section surveys all the operations a developer can perform
- against a ZooKeeper server. It is lower level information than the earlier
- concepts chapters in this manual, but higher level than the ZooKeeper API
- Reference. It covers these topics:</p>
- <ul>
-
- <li>
-
- <p>
- <a href="#sc_connectingToZk">Connecting to ZooKeeper</a>
- </p>
-
- </li>
-
- </ul>
- <a name="sc_errorsZk"></a>
- <h3 class="h4">Handling Errors</h3>
- <p>Both the Java and C client bindings may report errors. The Java client binding does so by throwing KeeperException, calling code() on the exception will return the specific error code. The C client binding returns an error code as defined in the enum ZOO_ERRORS. API callbacks indicate result code for both language bindings. See the API documentation (javadoc for Java, doxygen for C) for full details on the possible errors and their meaning.</p>
- <a name="sc_connectingToZk"></a>
- <h3 class="h4">Connecting to ZooKeeper</h3>
- <p></p>
- <a name="sc_readOps"></a>
- <h3 class="h4">Read Operations</h3>
- <p></p>
- <a name="sc_writeOps"></a>
- <h3 class="h4">Write Operations</h3>
- <p></p>
- <a name="sc_handlingWatches"></a>
- <h3 class="h4">Handling Watches</h3>
- <p></p>
- <a name="sc_miscOps"></a>
- <h3 class="h4">Miscelleaneous ZooKeeper Operations</h3>
- <p></p>
- </div>
-
- <a name="ch_programStructureWithExample"></a>
- <h2 class="h3">Program Structure, with Simple Example</h2>
- <div class="section">
- <p>
- <em>[tbd]</em>
- </p>
- </div>
-
- <a name="ch_gotchas"></a>
- <h2 class="h3">Gotchas: Common Problems and Troubleshooting</h2>
- <div class="section">
- <p>So now you know ZooKeeper. It's fast, simple, your application
- works, but wait ... something's wrong. Here are some pitfalls that
- ZooKeeper users fall into:</p>
- <ol>
-
- <li>
-
- <p>If you are using watches, you must look for the connected watch
- event. When a ZooKeeper client disconnects from a server, you will
- not receive notification of changes until reconnected. If you are
- watching for a znode to come into existance, you will miss the event
- if the znode is created and deleted while you are disconnected.</p>
-
- </li>
-
- <li>
-
- <p>You must test ZooKeeper server failures. The ZooKeeper service
- can survive failures as long as a majority of servers are active. The
- question to ask is: can your application handle it? In the real world
- a client's connection to ZooKeeper can break. (ZooKeeper server
- failures and network partitions are common reasons for connection
- loss.) The ZooKeeper client library takes care of recovering your
- connection and letting you know what happened, but you must make sure
- that you recover your state and any outstanding requests that failed.
- Find out if you got it right in the test lab, not in production - test
- with a ZooKeeper service made up of a several of servers and subject
- them to reboots.</p>
-
- </li>
-
- <li>
-
- <p>The list of ZooKeeper servers used by the client must match the
- list of ZooKeeper servers that each ZooKeeper server has. Things can
- work, although not optimally, if the client list is a subset of the
- real list of ZooKeeper servers, but not if the client lists ZooKeeper
- servers not in the ZooKeeper cluster.</p>
-
- </li>
-
- <li>
-
- <p>Be careful where you put that transaction log. The most
- performance-critical part of ZooKeeper is the transaction log.
- ZooKeeper must sync transactions to media before it returns a
- response. A dedicated transaction log device is key to consistent good
- performance. Putting the log on a busy device will adversely effect
- performance. If you only have one storage device, put trace files on
- NFS and increase the snapshotCount; it doesn't eliminate the problem,
- but it can mitigate it.</p>
-
- </li>
-
- <li>
-
- <p>Set your Java max heap size correctly. It is very important to
- <em>avoid swapping.</em> Going to disk unnecessarily will
- almost certainly degrade your performance unacceptably. Remember, in
- ZooKeeper, everything is ordered, so if one request hits the disk, all
- other queued requests hit the disk.</p>
-
- <p>To avoid swapping, try to set the heapsize to the amount of
- physical memory you have, minus the amount needed by the OS and cache.
- The best way to determine an optimal heap size for your configurations
- is to <em>run load tests</em>. If for some reason you
- can't, be conservative in your estimates and choose a number well
- below the limit that would cause your machine to swap. For example, on
- a 4G machine, a 3G heap is a conservative estimate to start
- with.</p>
-
- </li>
-
- </ol>
- </div>
-
- <a name="apx_linksToOtherInfo"></a>
- <appendix id="apx_linksToOtherInfo">
-
- <title>Links to Other Information</title>
-
- <p>Outside the formal documentation, there're several other sources of
- information for ZooKeeper developers.</p>
-
- <dl>
-
- <dt>
- <term>ZooKeeper Whitepaper <em>[tbd: find url]</em>
- </term>
- </dt>
- <dd>
- <p>The definitive discussion of ZooKeeper design and performance,
- by Yahoo! Research</p>
- </dd>
-
- <dt>
- <term>API Reference <em>[tbd: find url]</em>
- </term>
- </dt>
- <dd>
- <p>The complete reference to the ZooKeeper API</p>
- </dd>
-
- <dt>
- <term>
- <a href="http://us.dl1.yimg.com/download.yahoo.com/dl/ydn/zookeeper.m4v">ZooKeeper
- Talk at the Hadoup Summit 2008</a>
- </term>
- </dt>
- <dd>
- <p>A video introduction to ZooKeeper, by Benjamin Reed of Yahoo!
- Research</p>
- </dd>
-
- <dt>
- <term>
- <a href="https://cwiki.apache.org/confluence/display/ZOOKEEPER/Tutorial">Barrier and
- Queue Tutorial</a>
- </term>
- </dt>
- <dd>
- <p>The excellent Java tutorial by Flavio Junqueira, implementing
- simple barriers and producer-consumer queues using ZooKeeper.</p>
- </dd>
-
- <dt>
- <term>
- <a href="https://cwiki.apache.org/confluence/display/ZOOKEEPER/ZooKeeperArticles">ZooKeeper
- - A Reliable, Scalable Distributed Coordination System</a>
- </term>
- </dt>
- <dd>
- <p>An article by Todd Hoff (07/15/2008)</p>
- </dd>
-
- <dt>
- <term>
- <a href="recipes.html">ZooKeeper Recipes</a>
- </term>
- </dt>
- <dd>
- <p>Pseudo-level discussion of the implementation of various
- synchronization solutions with ZooKeeper: Event Handles, Queues,
- Locks, and Two-phase Commits.</p>
- </dd>
-
- <dt>
- <term>
- <em>[tbd]</em>
- </term>
- </dt>
- <dd>
- <p>Any other good sources anyone can think of...</p>
- </dd>
-
- </dl>
-
- </appendix>
- <p align="right">
- <font size="-2"></font>
- </p>
- </div>
- <!--+
- |end content
- +-->
- <div class="clearboth"> </div>
- </div>
- <div id="footer">
- <!--+
- |start bottomstrip
- +-->
- <div class="lastmodified">
- <script type="text/javascript"><!--
- document.write("Last Published: " + document.lastModified);
- // --></script>
- </div>
- <div class="copyright">
- Copyright ©
- 2008-2013 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
- </div>
- <!--+
- |end bottomstrip
- +-->
- </div>
- </body>
- </html>
|