소스 검색

ZOOKEEPER-5. Upgrade Feature in Zookeeper server.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/zookeeper/trunk@705421 13f79535-47bb-0310-9956-ffa450edef68
Patrick D. Hunt 16 년 전
부모
커밋
82627a6417

+ 2 - 0
CHANGES.txt

@@ -35,6 +35,8 @@ Backward compatibile changes:
 
   BUGFIXES: 
 
+  ZOOKEEPER-5. Upgrade Feature in Zookeeper server. (mahadev via phunt)
+
   ZOOKEEPER-194. Fix terminology in zookeeperAdmin.xml
   (Flavio Paiva Junqueira)
 

+ 7 - 0
bin/zkServer.sh

@@ -38,6 +38,13 @@ stop)
     echo kill | nc localhost $(grep clientPort $ZOOCFG | sed -e 's/.*=//')
     echo STOPPED
     ;;
+upgrade)
+    shift
+    echo "upgrading the servers to 3.*"
+    java "-Dzookeeper.log.dir=${ZOO_LOG_DIR}" "-Dzookeeper.root.logger=${ZOO_LOG4J_PROP}" \
+    -cp $CLASSPATH $JVMFLAGS org.apache.zookeeper.server.upgrade.UpgradeMain ${@} 
+    echo "Upgrading ... "
+    ;;
 restart)
     shift
     $0 stop ${@}

+ 11 - 0
build.xml

@@ -52,6 +52,8 @@
     <property name="test.build.dir" value="${build.dir}/test" />
     <property name="test.src.dir" value="${src.dir}/java/test"/>
     <property name="test.log.dir" value="${test.build.dir}/logs" />
+    <property name="test.data.dir" value="${test.build.dir}/data" />
+    <property name="test.data.upgrade.dir" value="${test.data.dir}/upgrade" />
     <property name="test.tmp.dir" value="${test.build.dir}/tmp" />
     <property name="test.output" value="no" />
     <property name="test.timeout" value="900000" />
@@ -505,8 +507,15 @@
     <target name="test-init" depends="jar,compile-test">
         <delete dir="${test.log.dir}" />
         <delete dir="${test.tmp.dir}" />
+        <delete dir="${test.data.upgrade.dir}" />
+        <delete dir="${test.data.dir}" />
         <mkdir dir="${test.log.dir}" />
         <mkdir dir="${test.tmp.dir}" />
+        <mkdir dir="${test.data.dir}" />
+        <mkdir dir="${test.data.upgrade.dir}" />
+        <copy todir="${test.data.upgrade.dir}">
+            <fileset dir="${basedir}/src/java/test/data/upgrade"/>
+        </copy> 
     </target>
 
     <target name="junit.run">
@@ -519,6 +528,7 @@
                dir="${basedir}" timeout="${test.timeout}"
                errorProperty="tests.failed" failureProperty="tests.failed">
           <sysproperty key="build.test.dir" value="${test.tmp.dir}" />
+          <sysproperty key="test.data.dir" value="${test.data.dir}" />
           <sysproperty key="log4j.configuration"
                        value="file:${basedir}/conf/log4j.properties" />
           <classpath refid="test.classpath"/>
@@ -656,6 +666,7 @@
                maxmemory="${test.junit.maxmem}" dir="${basedir}" timeout="${test.timeout}"
                errorProperty="tests.failed" failureProperty="tests.failed">
             <sysproperty key="build.test.dir" value="${test.tmp.dir}" />
+            <sysproperty key="test.data.dir" value="${test.data.dir}" />
             <sysproperty key="log4j.configuration"
                          value="file:${basedir}/conf/log4j.properties" />
             <classpath>

+ 26 - 1
src/java/main/org/apache/zookeeper/server/DataNode.java

@@ -43,7 +43,15 @@ public class DataNode implements Record {
         // default rather than public constructor
     }
 
-    DataNode(DataNode parent, byte data[], Long acl, StatPersisted stat) {
+    /**
+     * create a DataNode with parent, data, acls and stat
+     * @param parent the parent of this DataNode
+     * @param data the data to be set
+     * @param acl the acls for this node
+     * @param stat the stat for this node.
+     */
+  
+    public DataNode(DataNode parent, byte data[], Long acl, StatPersisted stat) {
         this.parent = parent;
         this.data = data;
         this.acl = acl;
@@ -51,6 +59,23 @@ public class DataNode implements Record {
         this.children = new HashSet<String>();
     }
 
+    /**
+     * convenience method for creating DataNode
+     * fully
+     * @param children
+     */
+    public void setChildren(HashSet<String> children) {
+        this.children = children;
+    }
+    
+    /**
+     * convenience methods to get the children
+     * @return the children of this datanode
+     */
+    public HashSet<String> getChildren() {
+        return this.children;
+    }
+    
     DataNode parent;
 
     byte data[];

+ 21 - 2
src/java/main/org/apache/zookeeper/server/DataTree.java

@@ -26,7 +26,6 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.util.TreeMap;
 import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.jute.Index;
@@ -75,7 +74,7 @@ public class DataTree {
     /**
      * This hashtable lists the paths of the ephemeral nodes of a session.
      */
-    private ConcurrentHashMap<Long, HashSet<String>> ephemerals = new ConcurrentHashMap<Long, HashSet<String>>();
+    private Map<Long, HashSet<String>> ephemerals = new ConcurrentHashMap<Long, HashSet<String>>();
 
     /**
      * this is map from longs to acl's. It saves acl's being stored 
@@ -112,6 +111,15 @@ public class DataTree {
         return cloned;
     }
     
+    public Map<Long, HashSet<String>> getEphemeralsMap() {
+        return ephemerals;
+    }
+    
+    public void setEphemerals(Map<Long, HashSet<String>> ephemerals) {
+        this.ephemerals = ephemerals;
+    }
+    
+    
     private long incrementIndex() {
        return ++aclIndex;
     }
@@ -178,6 +186,17 @@ public class DataTree {
         return ephemerals.keySet();
     }
 
+    /**
+     * just an accessor method to allow raw creation
+     * of datatree's from a bunch of datanodes
+     * @param path the path of the datanode
+     * @param node the datanode corresponding to this
+     * path
+     */
+    public void addDataNode(String path, DataNode node) {
+        nodes.put(path, node);
+    }
+    
     public DataNode getNode(String path) {
         return nodes.get(path);
     }

+ 5 - 5
src/java/main/org/apache/zookeeper/server/WatchManager.java

@@ -43,11 +43,11 @@ public class WatchManager {
     private HashMap<Watcher, HashSet<String>> watch2Paths = 
         new HashMap<Watcher, HashSet<String>>();
 
-    synchronized int size(){
+    public synchronized int size(){
         return watchTable.size();
     }
 
-    synchronized void addWatch(String path, Watcher watcher) {
+    public synchronized void addWatch(String path, Watcher watcher) {
         HashSet<Watcher> list = watchTable.get(path);
         if (list == null) {
             list = new HashSet<Watcher>();
@@ -63,7 +63,7 @@ public class WatchManager {
         paths.add(path);
     }
 
-    synchronized void removeWatcher(Watcher watcher) {
+    public synchronized void removeWatcher(Watcher watcher) {
         HashSet<String> paths = watch2Paths.remove(watcher);
         if (paths == null) {
             return;
@@ -79,11 +79,11 @@ public class WatchManager {
         }
     }
 
-    Set<Watcher> triggerWatch(String path, EventType type) {
+    public Set<Watcher> triggerWatch(String path, EventType type) {
         return triggerWatch(path, type, null);
     }
     
-    Set<Watcher> triggerWatch(String path, EventType type, Set<Watcher> supress) {
+    public Set<Watcher> triggerWatch(String path, EventType type, Set<Watcher> supress) {
         WatchedEvent e = new WatchedEvent(type,
                 KeeperState.SyncConnected, path);
         HashSet<Watcher> watchers;

+ 12 - 4
src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java

@@ -49,6 +49,8 @@ public class FileTxnSnapLog {
     File snapDir;
     TxnLog txnLog;
     SnapShot snapLog;
+    public final static int VERSION = 2;
+    public final static String version = "version-";
     
     private static final Logger LOG = Logger.getLogger(FileTxnSnapLog.class);
     
@@ -70,10 +72,16 @@ public class FileTxnSnapLog {
      * @param snapDir the snapshot directory
      */
     public FileTxnSnapLog(File dataDir, File snapDir) {
-        this.dataDir = dataDir;
-        this.snapDir = snapDir;
-        txnLog = new FileTxnLog(dataDir);
-        snapLog = new FileSnap(snapDir);
+        this.dataDir = new File(dataDir, version + VERSION);
+        this.snapDir = new File(snapDir, version + VERSION);
+        if (!this.dataDir.exists()) {
+            this.dataDir.mkdirs();
+        }
+        if (!this.snapDir.exists()) {
+            this.snapDir.mkdirs();
+        }
+        txnLog = new FileTxnLog(this.dataDir);
+        snapLog = new FileSnap(this.snapDir);
     }
     
     /**

+ 129 - 0
src/java/main/org/apache/zookeeper/server/upgrade/DataNodeV1.java

@@ -0,0 +1,129 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zookeeper.server.upgrade;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+
+import org.apache.jute.Index;
+import org.apache.jute.InputArchive;
+import org.apache.jute.OutputArchive;
+import org.apache.jute.Record;
+import org.apache.zookeeper.data.ACL;
+import org.apache.zookeeper.data.Stat;
+import org.apache.zookeeper.data.StatPersistedV1;
+
+/**
+ * This class contains the data for a node in the data tree.
+ * <p>
+ * A data node contains a reference to its parent, a byte array as its data, an
+ * array of ACLs, a stat object, and a set of its children's paths.
+ * 
+ */
+public class DataNodeV1 implements Record {
+    DataNodeV1() {
+        // default rather than public constructor
+    }
+
+    DataNodeV1(DataNodeV1 parent, byte data[], List<ACL> acl, StatPersistedV1 stat) {
+        this.parent = parent;
+        this.data = data;
+        this.acl = acl;
+        this.stat = stat;
+        this.children = new HashSet<String>();
+    }
+    
+
+    /**
+     * convenience method for creating DataNode
+     * fully
+     * @param children
+     */
+    public void setChildren(HashSet<String> children) {
+        this.children = children;
+    }
+    
+    /**
+     * convenience methods to get the children
+     * @return the children of this datanode
+     */
+    public HashSet<String> getChildren() {
+        return this.children;
+    }
+    
+    DataNodeV1 parent;
+
+    byte data[];
+
+    List<ACL> acl;
+
+    public StatPersistedV1 stat;
+
+    HashSet<String> children = new HashSet<String>();
+
+    public void copyStat(Stat to) {
+        to.setAversion(stat.getAversion());
+        to.setCtime(stat.getCtime());
+        to.setCversion(stat.getCversion());
+        to.setCzxid(stat.getCzxid());
+        to.setMtime(stat.getMtime());
+        to.setMzxid(stat.getMzxid());
+        to.setVersion(stat.getVersion());
+        to.setEphemeralOwner(stat.getEphemeralOwner());
+        to.setDataLength(data.length);
+        to.setNumChildren(children.size());
+    }
+
+    public void deserialize(InputArchive archive, String tag)
+            throws IOException {
+        archive.startRecord("node");
+        data = archive.readBuffer("data");
+        Index i = archive.startVector("acl");
+        if (i != null) {
+            acl = new ArrayList<ACL>();
+            while (!i.done()) {
+                ACL a = new ACL();
+                a.deserialize(archive, "aclEntry");
+                acl.add(a);
+                i.incr();
+            }
+        }
+        archive.endVector("acl");
+        stat = new StatPersistedV1();
+        stat.deserialize(archive, "stat");
+        archive.endRecord("node");
+    }
+
+    synchronized public void serialize(OutputArchive archive, String tag)
+            throws IOException {
+        archive.startRecord(this, "node");
+        archive.writeBuffer(data, "data");
+        archive.startVector(acl, "acl");
+        if (acl != null) {
+            for (ACL a : acl) {
+                a.serialize(archive, "aclEntry");
+            }
+        }
+        archive.endVector(acl, "acl");
+        stat.serialize(archive, "stat");
+        archive.endRecord(this, "node");
+    }
+}

+ 582 - 0
src/java/main/org/apache/zookeeper/server/upgrade/DataTreeV1.java

@@ -0,0 +1,582 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zookeeper.server.upgrade;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.jute.InputArchive;
+import org.apache.jute.OutputArchive;
+import org.apache.jute.Record;
+import org.apache.log4j.Logger;
+import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.Watcher;
+import org.apache.zookeeper.Watcher.Event.EventType;
+import org.apache.zookeeper.KeeperException.Code;
+import org.apache.zookeeper.Watcher.Event;
+import org.apache.zookeeper.ZooDefs.OpCode;
+import org.apache.zookeeper.data.ACL;
+import org.apache.zookeeper.data.Stat;
+import org.apache.zookeeper.data.StatPersistedV1;
+import org.apache.zookeeper.server.WatchManager;
+import org.apache.zookeeper.server.ZooTrace;
+import org.apache.zookeeper.txn.CreateTxn;
+import org.apache.zookeeper.txn.DeleteTxn;
+import org.apache.zookeeper.txn.ErrorTxn;
+import org.apache.zookeeper.txn.SetACLTxn;
+import org.apache.zookeeper.txn.SetDataTxn;
+import org.apache.zookeeper.txn.TxnHeader;
+
+/**
+ * This class maintains the tree data structure. It doesn't have any networking
+ * or client connection code in it so that it can be tested in a stand alone
+ * way.
+ * <p>
+ * The tree maintains two parallel data structures: a hashtable that maps from
+ * full paths to DataNodes and a tree of DataNodes. All accesses to a path is
+ * through the hashtable. The tree is traversed only when serializing to disk.
+ */
+public class DataTreeV1 {
+    private static final Logger LOG = Logger.getLogger(DataTreeV1.class);
+
+    /**
+     * This hashtable provides a fast lookup to the datanodes. The tree is the
+     * source of truth and is where all the locking occurs
+     */
+    private ConcurrentHashMap<String, DataNodeV1> nodes = new ConcurrentHashMap<String, DataNodeV1>();
+
+    private WatchManager dataWatches = new WatchManager();
+
+    private WatchManager childWatches = new WatchManager();
+
+    /**
+     * This hashtable lists the paths of the ephemeral nodes of a session.
+     */
+    private Map<Long, HashSet<String>> ephemerals = new ConcurrentHashMap<Long, HashSet<String>>();
+
+    /** A debug string * */
+    private String debug = "debug";
+    
+    /**
+     * return the ephemerals for this tree
+     * @return the ephemerals for this tree
+     */
+    public Map<Long, HashSet<String>>  getEphemeralsMap() {
+        return this.ephemerals;
+    }
+    
+    public void setEphemeralsMap(Map<Long, HashSet<String>> ephemerals) {
+        this.ephemerals = ephemerals;
+    }
+    
+    @SuppressWarnings("unchecked")
+    public HashSet<String> getEphemerals(long sessionId) {
+        HashSet<String> retv = ephemerals.get(sessionId);
+        if (retv == null) {
+            return new HashSet<String>();
+        }
+        HashSet<String> cloned = null;
+        synchronized(retv) {
+            cloned =  (HashSet<String>) retv.clone();
+        }
+        return cloned;
+    }
+    
+    public Collection<Long> getSessions() {
+        return ephemerals.keySet();
+    }
+
+    public DataNodeV1 getNode(String path) {
+        return nodes.get(path);
+    }
+
+    /**
+     * This is a pointer to the root of the DataTree. It is the source of truth,
+     * but we usually use the nodes hashmap to find nodes in the tree.
+     */
+    private DataNodeV1 root = new DataNodeV1(null, new byte[0], null, new StatPersistedV1());
+
+    public DataTreeV1() {
+        /* Rather than fight it, let root have an alias */
+        nodes.put("", root);
+        nodes.put("/", root);
+    }
+
+    static public void copyStatPersisted(StatPersistedV1 from, StatPersistedV1 to) {
+        to.setAversion(from.getAversion());
+        to.setCtime(from.getCtime());
+        to.setCversion(from.getCversion());
+        to.setCzxid(from.getCzxid());
+        to.setMtime(from.getMtime());
+        to.setMzxid(from.getMzxid());
+        to.setVersion(from.getVersion());
+        to.setEphemeralOwner(from.getEphemeralOwner());
+    }
+
+    static public void copyStat(Stat from, Stat to) {
+        to.setAversion(from.getAversion());
+        to.setCtime(from.getCtime());
+        to.setCversion(from.getCversion());
+        to.setCzxid(from.getCzxid());
+        to.setMtime(from.getMtime());
+        to.setMzxid(from.getMzxid());
+        to.setVersion(from.getVersion());
+        to.setEphemeralOwner(from.getEphemeralOwner());
+        to.setDataLength(from.getDataLength());
+        to.setNumChildren(from.getNumChildren());
+    }
+
+
+    // public void remooveInterest(String path, Watcher nw) {
+    // DataNode n = nodes.get(path);
+    // if (n == null) {
+    // synchronized (nonExistentWatches) {
+    // HashSet<Watcher> list = nonExistentWatches.get(path);
+    // if (list != null) {
+    // list.remove(nw);
+    // }
+    // }
+    // }
+    // synchronized (n) {
+    // n.dataWatchers.remove(nw);
+    // n.childWatchers.remove(nw);
+    // }
+    // }
+
+    /**
+     * @param path
+     * @param data
+     * @param acl
+     * @param ephemeralOwner
+     *                the session id that owns this node. -1 indicates this is
+     *                not an ephemeral node.
+     * @param zxid
+     * @param time
+     * @return the patch of the created node
+     * @throws KeeperException
+     */
+    public String createNode(String path, byte data[], List<ACL> acl,
+            long ephemeralOwner, long zxid, long time) 
+            throws KeeperException.NoNodeException, KeeperException.NodeExistsException {
+        int lastSlash = path.lastIndexOf('/');
+        String parentName = path.substring(0, lastSlash);
+        String childName = path.substring(lastSlash + 1);
+        StatPersistedV1 stat = new StatPersistedV1();
+        stat.setCtime(time);
+        stat.setMtime(time);
+        stat.setCzxid(zxid);
+        stat.setMzxid(zxid);
+        stat.setVersion(0);
+        stat.setAversion(0);
+        stat.setEphemeralOwner(ephemeralOwner);
+        DataNodeV1 parent = nodes.get(parentName);
+        if (parent == null) {
+            throw new KeeperException.NoNodeException();
+        }
+        synchronized (parent) {
+            if (parent.children.contains(childName)) {
+                throw new KeeperException.NodeExistsException();
+            }
+            int cver = parent.stat.getCversion();
+            cver++;
+            parent.stat.setCversion(cver);
+            DataNodeV1 child = new DataNodeV1(parent, data, acl, stat);
+            parent.children.add(childName);
+            nodes.put(path, child);
+            if (ephemeralOwner != 0) {
+                HashSet<String> list = ephemerals.get(ephemeralOwner);
+                if (list == null) {
+                    list = new HashSet<String>();
+                    ephemerals.put(ephemeralOwner, list);
+                }
+                synchronized(list) {
+                    list.add(path);
+                }
+            }
+        }
+        dataWatches.triggerWatch(path, Event.EventType.NodeCreated);
+        childWatches.triggerWatch(parentName.equals("")?"/":parentName, Event.EventType.NodeChildrenChanged);
+        return path;
+    }
+
+    public void deleteNode(String path) throws KeeperException.NoNodeException {
+        int lastSlash = path.lastIndexOf('/');
+        String parentName = path.substring(0, lastSlash);
+        String childName = path.substring(lastSlash + 1);
+        DataNodeV1 node = nodes.get(path);
+        if (node == null) {
+            throw new KeeperException.NoNodeException();
+        }
+        nodes.remove(path);
+        DataNodeV1 parent = nodes.get(parentName);
+        if (parent == null) {
+            throw new KeeperException.NoNodeException();
+        }
+        synchronized (parent) {
+            parent.children.remove(childName);
+            parent.stat.setCversion(parent.stat.getCversion() + 1);
+            long eowner = node.stat.getEphemeralOwner();
+            if (eowner != 0) {
+                HashSet<String> nodes = ephemerals.get(eowner);
+                if (nodes != null) {
+                    synchronized(nodes) {
+                        nodes.remove(path);
+                    }
+                }
+            }
+            node.parent = null;
+        }
+        Set<Watcher> processed =
+        dataWatches.triggerWatch(path, EventType.NodeDeleted);
+        childWatches.triggerWatch(path, EventType.NodeDeleted, processed);
+        childWatches.triggerWatch(parentName.equals("")?"/":parentName, EventType.NodeChildrenChanged);
+    }
+
+    public Stat setData(String path, byte data[], int version, long zxid,
+            long time) throws KeeperException.NoNodeException {
+        Stat s = new Stat();
+        DataNodeV1 n = nodes.get(path);
+        if (n == null) {
+            throw new KeeperException.NoNodeException();
+        }
+        synchronized (n) {
+            n.data = data;
+            n.stat.setMtime(time);
+            n.stat.setMzxid(zxid);
+            n.stat.setVersion(version);
+            n.copyStat(s);
+        }
+        dataWatches.triggerWatch(path, EventType.NodeDataChanged);
+        return s;
+    }
+
+    public byte[] getData(String path, Stat stat, Watcher watcher) throws KeeperException.NoNodeException {
+        DataNodeV1 n = nodes.get(path);
+        if (n == null) {
+            throw new KeeperException.NoNodeException();
+        }
+        synchronized (n) {
+            n.copyStat(stat);
+            if (watcher != null) {
+                dataWatches.addWatch(path, watcher);
+            }
+            return n.data;
+        }
+    }
+
+    public Stat statNode(String path, Watcher watcher) throws KeeperException.NoNodeException {
+        Stat stat = new Stat();
+        DataNodeV1 n = nodes.get(path);
+        if (watcher != null) {
+            dataWatches.addWatch(path, watcher);
+        }
+        if (n == null) {
+            throw new KeeperException.NoNodeException();
+        }
+        synchronized (n) {
+            n.copyStat(stat);
+            return stat;
+        }
+    }
+
+    public ArrayList<String> getChildren(String path, Stat stat, Watcher watcher) throws KeeperException.NoNodeException {
+        DataNodeV1 n = nodes.get(path);
+        if (n == null) {
+            throw new KeeperException.NoNodeException();
+        }
+        synchronized (n) {
+            ArrayList<String> children = new ArrayList<String>();
+            children.addAll(n.children);
+            if (watcher != null) {
+                childWatches.addWatch(path, watcher);
+            }
+            return children;
+        }
+    }
+
+    public Stat setACL(String path, List<ACL> acl, int version) throws KeeperException.NoNodeException {
+        Stat stat = new Stat();
+        DataNodeV1 n = nodes.get(path);
+        if (n == null) {
+            throw new KeeperException.NoNodeException();
+        }
+        synchronized (n) {
+            n.stat.setAversion(version);
+            n.acl = acl;
+            n.copyStat(stat);
+            return stat;
+        }
+    }
+
+    @SuppressWarnings("unchecked")
+    public List<ACL> getACL(String path, Stat stat) throws KeeperException.NoNodeException {
+        DataNodeV1 n = nodes.get(path);
+        if (n == null) {
+            throw new KeeperException.NoNodeException();
+        }
+        synchronized (n) {
+            n.copyStat(stat);
+            return new ArrayList<ACL>(n.acl);
+        }
+    }
+
+    static public class ProcessTxnResult {
+        public long clientId;
+
+        public int cxid;
+
+        public long zxid;
+
+        public int err;
+
+        public int type;
+
+        public String path;
+
+        public Stat stat;
+
+        /**
+         * Equality is defined as the clientId and the cxid being the same. This
+         * allows us to use hash tables to track completion of transactions.
+         *
+         * @see java.lang.Object#equals(java.lang.Object)
+         */
+        @Override
+        public boolean equals(Object o) {
+            if (o instanceof ProcessTxnResult) {
+                ProcessTxnResult other = (ProcessTxnResult) o;
+                return other.clientId == clientId && other.cxid == cxid;
+            }
+            return false;
+        }
+
+        /**
+         * See equals() to find the rational for how this hashcode is generated.
+         *
+         * @see ProcessTxnResult#equals(Object)
+         * @see java.lang.Object#hashCode()
+         */
+        @Override
+        public int hashCode() {
+            return (int) ((clientId ^ cxid) % Integer.MAX_VALUE);
+        }
+
+    }
+
+    public volatile long lastProcessedZxid = 0;
+
+    @SuppressWarnings("unchecked")
+    public ProcessTxnResult processTxn(TxnHeader header, Record txn) {
+        ProcessTxnResult rc = new ProcessTxnResult();
+
+        try {
+            rc.clientId = header.getClientId();
+            rc.cxid = header.getCxid();
+            rc.zxid = header.getZxid();
+            rc.type = header.getType();
+            rc.err = 0;
+            if (rc.zxid > lastProcessedZxid) {
+                lastProcessedZxid = rc.zxid;
+            }
+            switch (header.getType()) {
+            case OpCode.create:
+                CreateTxn createTxn = (CreateTxn) txn;
+                debug = "Create transaction for " + createTxn.getPath();
+                createNode(createTxn.getPath(), createTxn.getData(), createTxn
+                        .getAcl(), createTxn.getEphemeral() ? header
+                        .getClientId() : 0, header.getZxid(), header.getTime());
+                rc.path = createTxn.getPath();
+                break;
+            case OpCode.delete:
+                DeleteTxn deleteTxn = (DeleteTxn) txn;
+                debug = "Delete transaction for " + deleteTxn.getPath();
+                deleteNode(deleteTxn.getPath());
+                break;
+            case OpCode.setData:
+                SetDataTxn setDataTxn = (SetDataTxn) txn;
+                debug = "Set data for  transaction for " + setDataTxn.getPath();
+                rc.stat = setData(setDataTxn.getPath(), setDataTxn.getData(),
+                        setDataTxn.getVersion(), header.getZxid(), header
+                                .getTime());
+                break;
+            case OpCode.setACL:
+                SetACLTxn setACLTxn = (SetACLTxn) txn;
+                rc.stat = setACL(setACLTxn.getPath(), setACLTxn.getAcl(),
+                        setACLTxn.getVersion());
+                break;
+            case OpCode.closeSession:
+                killSession(header.getClientId());
+                break;
+            case OpCode.error:
+                ErrorTxn errTxn = (ErrorTxn) txn;
+                rc.err = errTxn.getErr();
+                break;
+            }
+        } catch (KeeperException e) {
+            // These are expected errors since we take a lazy snapshot
+            if (initialized
+                    || (e.getCode() != Code.NoNode && e.getCode() != Code.NodeExists)) {
+                LOG.warn(debug);
+                LOG.error("FIXMSG",e);
+            }
+        }
+        return rc;
+    }
+
+    void killSession(long session) {
+        // the list is already removed from the ephemerals
+        // so we do not have to worry about synchronyzing on
+        // the list. This is only called from FinalRequestProcessor
+        // so there is no need for synchornization. The list is not
+        // changed here. Only create and delete change the list which
+        // are again called from FinalRequestProcessor in sequence.
+        HashSet<String> list = ephemerals.remove(session);
+        if (list != null) {
+            for (String path : list) {
+                try {
+                    deleteNode(path);
+                    ZooTrace.logTraceMessage(LOG,
+                                             ZooTrace.SESSION_TRACE_MASK,
+                                             "Deleting ephemeral node "
+                                             + path + " for session 0x"
+                                             + Long.toHexString(session));
+                } catch (KeeperException e) {
+                    LOG.error("FIXMSG",e);
+                }
+            }
+        }
+    }
+
+    /**
+     * this method uses a stringbuilder to create a new
+     * path for children. This is faster than string
+     * appends ( str1 + str2).
+     * @param oa OutputArchive to write to.
+     * @param path a string builder.
+     * @throws IOException
+     * @throws InterruptedException
+     */
+    void serializeNode(OutputArchive oa, StringBuilder path)
+            throws IOException, InterruptedException {
+        String pathString = path.toString();
+        DataNodeV1 node = getNode(pathString);
+        if (node == null) {
+            return;
+        }
+        String children[] = null;
+        synchronized (node) {
+            scount++;
+            oa.writeString(pathString, "path");
+            oa.writeRecord(node, "node");
+            children = node.children.toArray(new String[node.children.size()]);
+        }
+        path.append('/');
+        int off = path.length();
+        if (children != null) {
+            for (String child : children) {
+                //since this is single buffer being resused
+                // we need
+                // to truncate the previous bytes of string.
+                path.delete(off, Integer.MAX_VALUE);
+                path.append(child);
+                serializeNode(oa, path);
+            }
+        }
+    }
+
+    int scount;
+
+    public boolean initialized = false;
+
+    public void serialize(OutputArchive oa, String tag) throws IOException,
+            InterruptedException {
+        scount = 0;
+        serializeNode(oa, new StringBuilder(""));
+        // / marks end of stream
+        // we need to check if clear had been called in between the snapshot.
+        if (root != null) {
+            oa.writeString("/", "path");
+        }
+    }
+
+    public void deserialize(InputArchive ia, String tag) throws IOException {
+        nodes.clear();
+        String path = ia.readString("path");
+        while (!path.equals("/")) {
+            DataNodeV1 node = new DataNodeV1();
+            ia.readRecord(node, "node");
+            nodes.put(path, node);
+            int lastSlash = path.lastIndexOf('/');
+            if (lastSlash == -1) {
+                root = node;
+            } else {
+                String parentPath = path.substring(0, lastSlash);
+                node.parent = nodes.get(parentPath);
+                node.parent.children.add(path.substring(lastSlash + 1));
+                long eowner = node.stat.getEphemeralOwner();
+                if (eowner != 0) {
+                    HashSet<String> list = ephemerals.get(eowner);
+                    if (list == null) {
+                        list = new HashSet<String>();
+                        ephemerals.put(eowner, list);
+                    }
+                    list.add(path);
+                }
+            }
+            path = ia.readString("path");
+        }
+        nodes.put("/", root);
+    }
+
+    public String dumpEphemerals() {
+        Set<Long> keys = ephemerals.keySet();
+        StringBuffer sb = new StringBuffer("Sessions with Ephemerals ("
+                + keys.size() + "):\n");
+        for (long k : keys) {
+            sb.append("0x" + Long.toHexString(k));
+            sb.append(":\n");
+            HashSet<String> tmp = ephemerals.get(k);
+            synchronized(tmp) {
+                for (String path : tmp) {
+                    sb.append("\t" + path + "\n");
+                }
+            }
+        }
+        return sb.toString();
+    }
+
+    public void removeCnxn(Watcher watcher) {
+        dataWatches.removeWatcher(watcher);
+        childWatches.removeWatcher(watcher);
+    }
+
+    public void clear() {
+        root = null;
+        nodes.clear();
+        ephemerals.clear();
+        // dataWatches = null;
+        // childWatches = null;
+    }
+}

+ 181 - 0
src/java/main/org/apache/zookeeper/server/upgrade/UpgradeMain.java

@@ -0,0 +1,181 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zookeeper.server.upgrade;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.log4j.Logger;
+import org.apache.zookeeper.server.DataTree;
+import org.apache.zookeeper.server.persistence.FileTxnSnapLog;
+
+/**
+ * This class upgrades the older database 
+ * to a new database for the zookeeper 
+ * servers.
+ * The way to run it is 
+ * java -class path zookeeper.jar Upgrade dataDir snapShotDir
+ * or using zookeeper scripts with zkServer -upgrade dataDir snapShotDir 
+ * it creates a backup in the dataDir/.bkup and snapShotDir/.bkup which 
+ * can be retrieved back to the snapShotDir and dataDir 
+ */
+public class UpgradeMain {
+    File snapShotDir;
+    File dataDir;
+    File bkupsnapShotDir;
+    File bkupdataDir;
+    File currentdataDir;
+    File currentsnapShotDir;
+    
+    private static final Logger LOG = Logger.getLogger(UpgradeMain.class);
+    private static final String USAGE = "Usage: UpgradeMain dataDir snapShotDir";
+    private static final int LASTVERSION = 1;
+    private static final int CURRENTVERSION = FileTxnSnapLog.VERSION;
+    private static final String dirName = FileTxnSnapLog.version;
+    private static final String manual = "Please take manual steps to " +
+    		"sanitize your database.\n Please read the upgrade manual";
+    
+     /**
+     * upgrade class that takes the two file 
+     * directories.
+     * @param dataDir the directory that contains the 
+     * transaction logs
+     * @param snapShotDir the directory that contains 
+     * the snapshots 
+     */
+    public UpgradeMain(File dataDir, File snapShotDir) {
+        this.snapShotDir = snapShotDir; 
+        this.dataDir = dataDir;
+        this.bkupdataDir = new File(dataDir, dirName + LASTVERSION);
+        this.bkupsnapShotDir = new File(snapShotDir, dirName + LASTVERSION );
+        this.currentsnapShotDir = new File(snapShotDir, dirName + CURRENTVERSION);
+        this.currentdataDir = new File(dataDir, dirName + CURRENTVERSION);
+    }
+ 
+    /**
+     * create all the bkup directories and the current
+     * database directories
+     * @throws IOException
+     */
+    private void createAllDirs() throws IOException {
+        String error = "backup directory " + bkupdataDir + " already exists";
+        LOG.info("Creating previous version data dir " + bkupdataDir);
+        if (!bkupdataDir.mkdirs()) {
+            LOG.error(error);
+            LOG.error(manual);
+            throw new IOException(error);
+        }
+        LOG.info("Creating previous version snapshot dir " + bkupdataDir);
+        if (!bkupsnapShotDir.mkdirs() && !bkupsnapShotDir.exists()) {
+            LOG.error(error);
+            LOG.error(manual);
+            throw new IOException(error);
+        }
+        error = "current directory " + currentdataDir + " already exists";
+        LOG.info("Creating current data dir " + currentdataDir);
+        if (!currentdataDir.mkdirs()) {
+            LOG.error(error);
+            LOG.error(manual);
+            throw new IOException(error);
+        }
+        LOG.info("Creating current snapshot dir " + currentdataDir);
+        if (!currentsnapShotDir.mkdirs() && !currentsnapShotDir.exists()) {
+            LOG.error(error);
+            LOG.error(manual);
+            throw new IOException(error);
+        }
+    }
+    
+    /**
+     * copy files from srcdir to dstdir that have the string 
+     * filter in the srcdir filenames
+     * @param srcDir the source directory
+     * @param dstDir the destination directory
+     * @param filter the filter of filenames that 
+     * need to be copied.
+     * @throws IOException
+     */
+    void copyFiles(File srcDir, File dstDir, String filter) throws IOException {
+        File[] list = srcDir.listFiles();
+        for (File file: list) {
+            String name = file.getName();
+            if (name.startsWith(filter)) {
+                // we need to copy this file
+                File dest = new File(dstDir, name);
+                LOG.info("Renaming " + file + " to " + dest);
+                if (!file.renameTo(dest)) {
+                    throw new IOException("Unable to rename " 
+                            + file + " to " +  dest);
+                }
+            }
+        }
+    }
+    
+    /**
+     * run the upgrade
+     * @return 0 if success else it failed
+     * @throws IOException
+     */
+    public void runUpgrade() throws IOException {
+        if (!dataDir.exists()) {
+            throw new IOException(dataDir + " does not exist");
+        }
+        if (!snapShotDir.exists()) {
+            throw new IOException(snapShotDir + " does not exist");
+        }
+        // create the bkup directorya
+        createAllDirs();
+        //copy all the files for backup
+        try {
+            copyFiles(dataDir, bkupdataDir, "log");
+            copyFiles(snapShotDir, bkupsnapShotDir, "snapshot");
+        } catch(IOException io) {
+            LOG.error("Failed in backing up.");
+            throw io;
+        }
+
+        //evrything is backed up
+        // read old database and create 
+        // an old snapshot
+        UpgradeSnapShotV1 upgrade = new UpgradeSnapShotV1(bkupdataDir, 
+                bkupsnapShotDir);
+        LOG.info("Creating new data tree");
+        DataTree dt = upgrade.getNewDataTree();
+        FileTxnSnapLog filesnapLog = new FileTxnSnapLog(dataDir, 
+                snapShotDir);
+        LOG.info("snapshotting the new datatree");
+        filesnapLog.save(dt, upgrade.getSessionWithTimeOuts());
+        //done saving.
+        LOG.info("Upgrade is complete");
+    }
+    
+    public static void main(String[] argv) {
+        if (argv.length < 2) {
+            LOG.error(USAGE);
+            System.exit(-1);
+        }
+        try {
+            UpgradeMain upgrade = new UpgradeMain(new File(argv[0]), new File(argv[1]));
+            upgrade.runUpgrade();
+        } catch(Throwable th) {
+            LOG.error("Upgrade Error: Please read the " +
+            		"docs for manual failure recovery ", th);
+        }
+    }
+}

+ 33 - 0
src/java/main/org/apache/zookeeper/server/upgrade/UpgradeSnapShot.java

@@ -0,0 +1,33 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zookeeper.server.upgrade;
+
+import java.io.IOException;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.zookeeper.server.DataTree;
+
+/**
+ * interface for snapshot conversion.
+ *
+ */
+public interface UpgradeSnapShot {
+    public DataTree getNewDataTree() throws IOException;
+    public ConcurrentHashMap<Long, Integer> getSessionWithTimeOuts();
+}

+ 323 - 0
src/java/main/org/apache/zookeeper/server/upgrade/UpgradeSnapShotV1.java

@@ -0,0 +1,323 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zookeeper.server.upgrade;
+
+import java.io.BufferedInputStream;
+import java.io.ByteArrayInputStream;
+import java.io.EOFException;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.jute.BinaryInputArchive;
+import org.apache.jute.InputArchive;
+import org.apache.jute.Record;
+import org.apache.log4j.Logger;
+import org.apache.zookeeper.ZooDefs.OpCode;
+import org.apache.zookeeper.data.StatPersisted;
+import org.apache.zookeeper.data.StatPersistedV1;
+import org.apache.zookeeper.server.DataNode;
+import org.apache.zookeeper.server.DataTree;
+import org.apache.zookeeper.server.Request;
+import org.apache.zookeeper.server.ZooTrace;
+import org.apache.zookeeper.server.persistence.FileTxnLog;
+import org.apache.zookeeper.server.persistence.Util;
+import org.apache.zookeeper.server.util.SerializeUtils;
+import org.apache.zookeeper.txn.CreateSessionTxn;
+import org.apache.zookeeper.txn.TxnHeader;
+
+/**
+ * This class gets the old snapshot 
+ * and the old dataDir and creates 
+ * an brand new snapshot that is 
+ * then converted to the new snapshot
+ * for upgrading.           
+ */
+public class UpgradeSnapShotV1 implements UpgradeSnapShot {
+    private static final Logger LOG = Logger.getLogger(UpgradeSnapShotV1.class);
+    
+    ConcurrentHashMap<Long, Integer> sessionsWithTimeouts = 
+        new ConcurrentHashMap<Long, Integer>();
+    File dataDir;
+    File snapShotDir;
+    DataTreeV1 oldDataTree;
+   
+    /**
+     * upgrade from version 1 to version 2
+     * @param dataDir
+     * @param snapShotDir
+     */
+    public UpgradeSnapShotV1(File dataDir, File snapShotDir) {
+        this.dataDir = dataDir;
+        this.snapShotDir = snapShotDir;
+        oldDataTree = new DataTreeV1();
+    }
+    
+    /**
+     * deseriluize from an inputarchive
+     * @param oldTree the tree to be created
+     * @param ia the input archive to be read from
+     * @param sessions the sessions to be created
+     * @throws IOException 
+     */
+    private void deserializeSnapshot(DataTreeV1 oldTree, InputArchive ia,
+            Map<Long, Integer> sessions) throws IOException {
+        int count = ia.readInt("count");
+        while (count > 0) {
+            long id = ia.readLong("id");
+            int to = ia.readInt("timeout");
+            sessions.put(id, to);
+            ZooTrace.logTraceMessage(LOG, ZooTrace.SESSION_TRACE_MASK,
+                    "loadData --- session in archive: " + id
+                    + " with timeout: " + to);
+            count--;
+        }
+        oldTree.deserialize(ia, "tree");
+    }
+    
+    /**
+     * play the log from this logstream into the datatree
+     * @param logStream
+     * @return
+     * @throws IOException
+     */
+    public long playLog(InputArchive logStream) throws IOException {
+        long highestZxid = 0;
+        try {
+            while (true) {
+                byte[] bytes = logStream.readBuffer("txnEntry");
+                if (bytes.length == 0) {
+                    // Since we preallocate, we define EOF to be an
+                    // empty transaction
+                    throw new EOFException();
+                }
+                InputArchive ia = BinaryInputArchive
+                        .getArchive(new ByteArrayInputStream(bytes));
+                TxnHeader hdr = new TxnHeader();
+                Record txn = SerializeUtils.deserializeTxn(ia, hdr);
+                if (logStream.readByte("EOR") != 'B') {
+                    LOG.warn("Last transaction was partial.");
+                    throw new EOFException();
+                }
+                if (hdr.getZxid() <= highestZxid && highestZxid != 0) {
+                    LOG.error(highestZxid + "(higestZxid) >= "
+                            + hdr.getZxid() + "(next log) for type "
+                            + hdr.getType());
+                } else {
+                    highestZxid = hdr.getZxid();
+                }
+                switch (hdr.getType()) {
+                case OpCode.createSession:
+                    sessionsWithTimeouts.put(hdr.getClientId(),
+                            ((CreateSessionTxn) txn).getTimeOut());
+                    ZooTrace.logTraceMessage(LOG,
+                                             ZooTrace.SESSION_TRACE_MASK,
+                            "playLog --- create session in log: 0x"
+                                    + Long.toHexString(hdr.getClientId())
+                                    + " with timeout: "
+                                    + ((CreateSessionTxn) txn).getTimeOut());
+                    // give dataTree a chance to sync its lastProcessedZxid
+                    oldDataTree.processTxn(hdr, txn);
+                    break;
+                case OpCode.closeSession:
+                    sessionsWithTimeouts.remove(hdr.getClientId());
+                    ZooTrace.logTraceMessage(LOG,
+                            ZooTrace.SESSION_TRACE_MASK,
+                            "playLog --- close session in log: 0x"
+                                    + Long.toHexString(hdr.getClientId()));
+                    oldDataTree.processTxn(hdr, txn);
+                    break;
+                default:
+                    oldDataTree.processTxn(hdr, txn);
+                }
+                Request r = new Request(null, 0, hdr.getCxid(), hdr.getType(),
+                        null, null);
+                r.txn = txn;
+                r.hdr = hdr;
+                r.zxid = hdr.getZxid();
+            }
+        } catch (EOFException e) {
+            // expected in some cases - see comments in try block
+        }
+        return highestZxid;
+    }
+
+   
+    
+    /**
+     * apply the log files to the datatree
+     * @param oldTree the datatreee to apply the logs to
+     * @param logFiles the logs to be applied
+     * @throws IOException
+     */
+    private long processLogFiles(DataTreeV1 oldTree, 
+            File[] logFiles) throws IOException {
+        long zxid = 0;
+        for (File f: logFiles) { 
+            LOG.warn("Processing log file: " + f);
+            InputStream logIs = 
+                new BufferedInputStream(new FileInputStream(f));
+            zxid = playLog(BinaryInputArchive.getArchive(logIs));
+            logIs.close();
+        }
+        return zxid;
+    }
+    
+    /**
+     * create the old snapshot database
+     * apply logs to it and create the final
+     * database
+     * @throws IOException
+     */
+    private void loadThisSnapShot() throws IOException {  
+        // pick the most recent snapshot
+        File snapshot = findMostRecentSnapshot();
+        if (snapshot == null) {
+            throw new IOException("Invalid snapshots " +
+            		"or not snapshots in " + snapShotDir);
+        }
+        InputStream inputstream = new BufferedInputStream(
+                new FileInputStream(snapshot));
+        InputArchive ia = BinaryInputArchive.getArchive(inputstream);
+        deserializeSnapshot(oldDataTree, ia, sessionsWithTimeouts);
+        //ok done with the snapshot 
+        // now apply the logs
+        long snapshotZxid = oldDataTree.lastProcessedZxid;
+        File[] files = FileTxnLog.getLogFiles(
+                dataDir.listFiles(), snapshotZxid);
+        long zxid = processLogFiles(oldDataTree, files);
+        //check for this zxid to be sane
+        if (zxid != oldDataTree.lastProcessedZxid) {
+            LOG.error("Zxids not equal " + " log zxid " +
+                    zxid + " datatree processed " + oldDataTree.lastProcessedZxid);
+        }
+    }
+    
+    /**
+     * find the most recent snapshot 
+     * in the snapshot directory
+     * @return
+     * @throws IOException
+     */
+    private File findMostRecentSnapshot() throws IOException {
+        List<File> files = Util.sortDataDir(snapShotDir.listFiles(),
+                "snapshot", false);
+        for (File f: files) {
+            if (Util.isValidSnapshot(f))
+                return f;
+        }
+        return null;
+    }
+    
+    /**
+     * convert the old stat to new stat
+     * @param oldStat the old stat
+     * @return the new stat
+     */
+    private StatPersisted convertStat(StatPersistedV1 oldStat) {
+        StatPersisted stat = new StatPersisted();
+        stat.setAversion(oldStat.getAversion());
+        stat.setCtime(oldStat.getCtime());
+        stat.setCversion(oldStat.getCversion());
+        stat.setCzxid(oldStat.getCzxid());
+        stat.setEphemeralOwner(oldStat.getEphemeralOwner());
+        stat.setMtime(oldStat.getMtime());
+        stat.setMzxid(oldStat.getMzxid());
+        stat.setVersion(oldStat.getVersion());
+        return stat;
+    }
+    
+    /**
+     * convert a given old datanode to new datanode
+     * @param dt the new datatree
+     * @param parent the parent of the datanode to be constructed
+     * @param oldDataNode the old datanode 
+     * @return the new datanode
+     */
+    private DataNode convertDataNode(DataTree dt, DataNode parent, 
+            DataNodeV1 oldDataNode) {
+        StatPersisted stat = convertStat(oldDataNode.stat);
+        DataNode dataNode =  new DataNode(parent, oldDataNode.data,
+                dt.convertAcls(oldDataNode.acl), stat);
+        dataNode.setChildren(oldDataNode.children);
+        return dataNode;
+    }
+    
+    /**
+     * recurse through the old datatree and construct the 
+     * new data tree
+     * @param dataTree the new datatree to be constructed
+     * @param path the path to start with
+     */
+    private void recurseThroughDataTree(DataTree dataTree, String path) {
+        if (path == null)
+            return;
+        DataNodeV1 oldDataNode = oldDataTree.getNode(path);
+        HashSet<String> children = oldDataNode.children;
+        DataNode parent = null;
+        if ("".equals(path)) {
+            parent = null;
+        }
+        else {
+            int lastSlash = path.lastIndexOf('/');
+            String parentPath = path.substring(0, lastSlash);
+            parent = dataTree.getNode(parentPath);
+        }
+        DataNode thisDatNode = convertDataNode(dataTree, parent,
+                                    oldDataNode);
+        dataTree.addDataNode(path, thisDatNode);
+        if (children == null || children.size() == 0) {
+            return;
+        }
+        else {
+            for (String child: children) {
+                recurseThroughDataTree(dataTree, path + "/" +child);
+            }
+        }
+    }   
+    
+    private DataTree convertThisSnapShot() throws IOException {
+        // create a datatree 
+        DataTree dataTree = new DataTree();
+        DataNodeV1 oldDataNode = oldDataTree.getNode("");
+        if (oldDataNode == null) {
+            //should never happen
+            LOG.error("Upgrading from an empty snapshot.");
+        }
+        
+        recurseThroughDataTree(dataTree, "");
+        dataTree.lastProcessedZxid = oldDataTree.lastProcessedZxid;
+        return dataTree;
+    }
+    
+    public DataTree getNewDataTree() throws IOException {
+        loadThisSnapShot();
+        DataTree dt = convertThisSnapShot();
+        return dt;
+    }
+    
+    public ConcurrentHashMap<Long, Integer> getSessionWithTimeOuts() {
+        return this.sessionsWithTimeouts;
+    }
+}

+ 110 - 0
src/java/test/org/apache/zookeeper/test/UpgradeTest.java

@@ -0,0 +1,110 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zookeeper.test;
+
+import static org.apache.zookeeper.test.ClientBase.CONNECTION_TIMEOUT;
+
+import java.io.File;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+
+import junit.framework.TestCase;
+
+import org.apache.log4j.Logger;
+import org.apache.zookeeper.CreateMode;
+import org.apache.zookeeper.WatchedEvent;
+import org.apache.zookeeper.Watcher;
+import org.apache.zookeeper.ZooKeeper;
+import org.apache.zookeeper.Watcher.Event.KeeperState;
+import org.apache.zookeeper.ZooDefs.Ids;
+import org.apache.zookeeper.data.Stat;
+import org.apache.zookeeper.server.NIOServerCnxn;
+import org.apache.zookeeper.server.ServerStats;
+import org.apache.zookeeper.server.SyncRequestProcessor;
+import org.apache.zookeeper.server.ZooKeeperServer;
+import org.apache.zookeeper.server.upgrade.UpgradeMain;
+
+public class UpgradeTest extends TestCase implements Watcher {
+    private final static Logger LOG = Logger.getLogger(UpgradeTest.class);
+    private static String HOSTPORT = "127.0.0.1:2359";
+    ZooKeeperServer zks;
+    private static final File testData = new File(
+            System.getProperty("test.data.dir", "build/test/data"));
+    private CountDownLatch startSignal;
+    
+    @Override
+    protected void setUp() throws Exception {
+        LOG.info("STARTING " + getName());
+        ServerStats.registerAsConcrete();
+    }
+    @Override
+    protected void tearDown() throws Exception {
+        ServerStats.unregister();
+        LOG.info("FINISHED " + getName());
+    }
+    
+    /**
+     * test the upgrade
+     * @throws Exception
+     */
+    public void testUpgrade() throws Exception {
+        File upgradeDir = new File(testData, "upgrade");
+        UpgradeMain upgrade = new UpgradeMain(upgradeDir, upgradeDir);
+        upgrade.runUpgrade();
+        zks = new ZooKeeperServer(upgradeDir, upgradeDir, 3000);
+        SyncRequestProcessor.snapCount = 1000;
+        final int PORT = Integer.parseInt(HOSTPORT.split(":")[1]);
+        NIOServerCnxn.Factory f = new NIOServerCnxn.Factory(PORT);
+        f.startup(zks);
+        LOG.info("starting up the zookeeper server .. waiting");
+        assertTrue("waiting for server being up", 
+                ClientBase.waitForServerUp(HOSTPORT,CONNECTION_TIMEOUT));
+        ZooKeeper zk = new ZooKeeper(HOSTPORT, 20000, this);
+        Stat stat = zk.exists("/", false);
+        List<String> children = zk.getChildren("/", false);
+        Collections.sort(children);
+        for (int i=0; i < 10; i++) {
+            assertTrue("data tree sanity check",
+                    ("test-"+ i).equals(children.get(i)));
+        }
+        //try creating one node
+        zk.create("/upgrade","upgrade".getBytes(), Ids.OPEN_ACL_UNSAFE,
+                CreateMode.PERSISTENT);
+        // check if its there
+        if (zk.exists("/upgrade",false) == null) {
+            assertTrue(false);
+        }
+        // bring down the server
+        f.shutdown();
+        assertTrue("waiting for server down",
+                   ClientBase.waitForServerDown(HOSTPORT,
+                           ClientBase.CONNECTION_TIMEOUT));
+        
+    }
+    
+    public void process(WatchedEvent event) {
+        LOG.info("Event:" + event.getState() + " " + event.getType() + " " + event.getPath());
+        if (event.getState() == KeeperState.SyncConnected
+                && startSignal != null && startSignal.getCount() > 0)
+        {              
+            startSignal.countDown();      
+        }
+    }
+}

+ 13 - 0
src/zookeeper.jute

@@ -49,7 +49,20 @@ module org.apache.zookeeper.data {
         int aversion;    // acl version
         long ephemeralOwner; // owner id if ephemeral, 0 otw
     }
+
+   // information explicitly stored by the version 1 database of servers 
+   class StatPersistedV1 {
+       long czxid; //created zxid
+       long mzxid; //last modified zxid
+       long ctime; //created
+       long mtime; //last modified
+       int version; //version
+       int cversion; //child version
+       int aversion; //acl version
+       long ephemeralOwner; //owner id if ephemeral. 0 otw
+    }
 }
+
 module org.apache.zookeeper.proto {
     class op_result_t {
         int rc;