Browse Source

HDFS-6293. Issues with OIV processing PB-based fsimages. Contributed by Kihwal Lee.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1594439 13f79535-47bb-0310-9956-ffa450edef68
Kihwal Lee 11 years ago
parent
commit
97f58955a6
34 changed files with 3519 additions and 44 deletions
  1. 2 0
      hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
  2. 3 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs
  3. 1 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
  4. 75 10
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
  5. 57 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java
  6. 8 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointConf.java
  7. 19 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
  8. 381 4
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
  9. 213 4
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java
  10. 46 11
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
  11. 6 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
  12. 56 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java
  13. 16 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
  14. 6 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
  15. 13 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/AbstractINodeDiff.java
  16. 52 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java
  17. 20 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileDiff.java
  18. 9 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/Snapshot.java
  19. 72 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java
  20. 17 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
  21. 172 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/DelimitedImageVisitor.java
  22. 36 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/DepthCounter.java
  23. 193 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionVisitor.java
  24. 83 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoader.java
  25. 821 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java
  26. 212 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageVisitor.java
  27. 111 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/IndentedImageVisitor.java
  28. 178 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/LsImageVisitor.java
  29. 118 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/NameDistributionVisitor.java
  30. 274 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewer.java
  31. 109 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TextWriterImageVisitor.java
  32. 88 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/XmlImageVisitor.java
  33. 43 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
  34. 9 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -356,6 +356,8 @@ Release 2.5.0 - UNRELEASED
 
     HDFS-6186. Pause deletion of blocks when the namenode starts up. (jing9)
 
+    HDFS-6293. Issues with OIV processing PB-based fsimages. (kihwal)
+
   OPTIMIZATIONS
 
     HDFS-6214. Webhdfs has poor throughput for files >2GB (daryn)

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs

@@ -49,6 +49,7 @@ function print_usage(){
   echo "  balancer             run a cluster balancing utility"
   echo "  jmxget               get JMX exported values from NameNode or DataNode."
   echo "  oiv                  apply the offline fsimage viewer to an fsimage"
+  echo "  oiv_legacy           apply the offline fsimage viewer to an legacy fsimage"
   echo "  oev                  apply the offline edits viewer to an edits file"
   echo "  fetchdt              fetch a delegation token from the NameNode"
   echo "  getconf              get config values from configuration"
@@ -161,6 +162,8 @@ elif [ "$COMMAND" = "jmxget" ] ; then
   CLASS=org.apache.hadoop.hdfs.tools.JMXGet
 elif [ "$COMMAND" = "oiv" ] ; then
   CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB
+elif [ "COMMAND" = "oiv_legacy" ] ; then
+  CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer
 elif [ "$COMMAND" = "oev" ] ; then
   CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
 elif [ "$COMMAND" = "fetchdt" ] ; then

+ 1 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

@@ -497,6 +497,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String  DFS_SECONDARY_NAMENODE_KERBEROS_INTERNAL_SPNEGO_PRINCIPAL_KEY = "dfs.secondary.namenode.kerberos.internal.spnego.principal";
   public static final String  DFS_NAMENODE_NAME_CACHE_THRESHOLD_KEY = "dfs.namenode.name.cache.threshold";
   public static final int     DFS_NAMENODE_NAME_CACHE_THRESHOLD_DEFAULT = 10;
+  public static final String  DFS_NAMENODE_LEGACY_OIV_IMAGE_DIR_KEY = "dfs.namenode.legacy-oiv-image.dir";
   
   public static final String  DFS_NAMESERVICES = "dfs.nameservices";
   public static final String  DFS_NAMESERVICE_ID = "dfs.nameservice.id";

+ 75 - 10
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java

@@ -18,9 +18,16 @@
 
 package org.apache.hadoop.hdfs.security.token.delegation;
 
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.protobuf.ByteString;
+import java.io.DataInput;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InterruptedIOException;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map.Entry;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -43,13 +50,9 @@ import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager;
 import org.apache.hadoop.security.token.delegation.DelegationKey;
 
-import java.io.DataInput;
-import java.io.IOException;
-import java.io.InterruptedIOException;
-import java.net.InetSocketAddress;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map.Entry;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import com.google.protobuf.ByteString;
 
 /**
  * A HDFS specific delegation token secret manager.
@@ -211,6 +214,18 @@ public class DelegationTokenSecretManager
     }
   }
 
+  /**
+   * Store the current state of the SecretManager for persistence
+   *
+   * @param out Output stream for writing into fsimage.
+   * @param sdPath String storage directory path
+   * @throws IOException
+   */
+  public synchronized void saveSecretManagerStateCompat(DataOutputStream out,
+      String sdPath) throws IOException {
+    serializerCompat.save(out, sdPath);
+  }
+
   public synchronized SecretManagerState saveSecretManagerState() {
     SecretManagerSection s = SecretManagerSection.newBuilder()
         .setCurrentId(currentId)
@@ -406,6 +421,56 @@ public class DelegationTokenSecretManager
       loadCurrentTokens(in);
     }
 
+    private void save(DataOutputStream out, String sdPath) throws IOException {
+      out.writeInt(currentId);
+      saveAllKeys(out, sdPath);
+      out.writeInt(delegationTokenSequenceNumber);
+      saveCurrentTokens(out, sdPath);
+    }
+
+    /**
+     * Private helper methods to save delegation keys and tokens in fsimage
+     */
+    private synchronized void saveCurrentTokens(DataOutputStream out,
+        String sdPath) throws IOException {
+      StartupProgress prog = NameNode.getStartupProgress();
+      Step step = new Step(StepType.DELEGATION_TOKENS, sdPath);
+      prog.beginStep(Phase.SAVING_CHECKPOINT, step);
+      prog.setTotal(Phase.SAVING_CHECKPOINT, step, currentTokens.size());
+      Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
+      out.writeInt(currentTokens.size());
+      Iterator<DelegationTokenIdentifier> iter = currentTokens.keySet()
+          .iterator();
+      while (iter.hasNext()) {
+        DelegationTokenIdentifier id = iter.next();
+        id.write(out);
+        DelegationTokenInformation info = currentTokens.get(id);
+        out.writeLong(info.getRenewDate());
+        counter.increment();
+      }
+      prog.endStep(Phase.SAVING_CHECKPOINT, step);
+    }
+
+    /*
+     * Save the current state of allKeys
+     */
+    private synchronized void saveAllKeys(DataOutputStream out, String sdPath)
+        throws IOException {
+      StartupProgress prog = NameNode.getStartupProgress();
+      Step step = new Step(StepType.DELEGATION_KEYS, sdPath);
+      prog.beginStep(Phase.SAVING_CHECKPOINT, step);
+      prog.setTotal(Phase.SAVING_CHECKPOINT, step, currentTokens.size());
+      Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
+      out.writeInt(allKeys.size());
+      Iterator<Integer> iter = allKeys.keySet().iterator();
+      while (iter.hasNext()) {
+        Integer key = iter.next();
+        allKeys.get(key).write(out);
+        counter.increment();
+      }
+      prog.endStep(Phase.SAVING_CHECKPOINT, step);
+    }
+
     /**
      * Private helper methods to load Delegation tokens from fsimage
      */

+ 57 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java

@@ -27,6 +27,7 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT;
 
 import java.io.DataInput;
+import java.io.DataOutputStream;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -61,10 +62,10 @@ import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo.Expiration;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveStats;
 import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
 import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
-import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
-import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
 import org.apache.hadoop.hdfs.protocolPB.PBHelper;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor;
@@ -953,6 +954,18 @@ public final class CacheManager {
     }
   }
 
+  /**
+   * Saves the current state of the CacheManager to the DataOutput. Used
+   * to persist CacheManager state in the FSImage.
+   * @param out DataOutput to persist state
+   * @param sdPath path of the storage directory
+   * @throws IOException
+   */
+  public void saveStateCompat(DataOutputStream out, String sdPath)
+      throws IOException {
+    serializerCompat.save(out, sdPath);
+  }
+
   public PersistState saveState() throws IOException {
     ArrayList<CachePoolInfoProto> pools = Lists
         .newArrayListWithCapacity(cachePools.size());
@@ -1072,6 +1085,12 @@ public final class CacheManager {
   }
 
   private final class SerializerCompat {
+    private void save(DataOutputStream out, String sdPath) throws IOException {
+      out.writeLong(nextDirectiveId);
+      savePools(out, sdPath);
+      saveDirectives(out, sdPath);
+    }
+
     private void load(DataInput in) throws IOException {
       nextDirectiveId = in.readLong();
       // pools need to be loaded first since directives point to their parent pool
@@ -1079,6 +1098,42 @@ public final class CacheManager {
       loadDirectives(in);
     }
 
+    /**
+     * Save cache pools to fsimage
+     */
+    private void savePools(DataOutputStream out,
+        String sdPath) throws IOException {
+      StartupProgress prog = NameNode.getStartupProgress();
+      Step step = new Step(StepType.CACHE_POOLS, sdPath);
+      prog.beginStep(Phase.SAVING_CHECKPOINT, step);
+      prog.setTotal(Phase.SAVING_CHECKPOINT, step, cachePools.size());
+      Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
+      out.writeInt(cachePools.size());
+      for (CachePool pool: cachePools.values()) {
+        FSImageSerialization.writeCachePoolInfo(out, pool.getInfo(true));
+        counter.increment();
+      }
+      prog.endStep(Phase.SAVING_CHECKPOINT, step);
+    }
+
+    /*
+     * Save cache entries to fsimage
+     */
+    private void saveDirectives(DataOutputStream out, String sdPath)
+        throws IOException {
+      StartupProgress prog = NameNode.getStartupProgress();
+      Step step = new Step(StepType.CACHE_ENTRIES, sdPath);
+      prog.beginStep(Phase.SAVING_CHECKPOINT, step);
+      prog.setTotal(Phase.SAVING_CHECKPOINT, step, directivesById.size());
+      Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
+      out.writeInt(directivesById.size());
+      for (CacheDirective directive : directivesById.values()) {
+        FSImageSerialization.writeCacheDirectiveInfo(out, directive.toInfo());
+        counter.increment();
+      }
+      prog.endStep(Phase.SAVING_CHECKPOINT, step);
+    }
+
     /**
      * Load cache pools from fsimage
      */

+ 8 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointConf.java

@@ -41,6 +41,9 @@ public class CheckpointConf {
 
   /** maxium number of retries when merge errors occur */
   private final int maxRetriesOnMergeError;
+
+  /** The output dir for legacy OIV image */
+  private final String legacyOivImageDir;
   
   public CheckpointConf(Configuration conf) {
     checkpointCheckPeriod = conf.getLong(
@@ -53,6 +56,7 @@ public class CheckpointConf {
                                   DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT);
     maxRetriesOnMergeError = conf.getInt(DFS_NAMENODE_CHECKPOINT_MAX_RETRIES_KEY,
                                   DFS_NAMENODE_CHECKPOINT_MAX_RETRIES_DEFAULT);
+    legacyOivImageDir = conf.get(DFS_NAMENODE_LEGACY_OIV_IMAGE_DIR_KEY);
     warnForDeprecatedConfigs(conf);
   }
   
@@ -83,4 +87,8 @@ public class CheckpointConf {
   public int getMaxRetriesOnMergeError() {
     return maxRetriesOnMergeError;
   }
+
+  public String getLegacyOivImageDir() {
+    return legacyOivImageDir;
+  }
 }

+ 19 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java

@@ -934,6 +934,25 @@ public class FSImage implements Closeable {
     storage.setMostRecentCheckpointInfo(txid, Time.now());
   }
 
+  /**
+   * Save FSimage in the legacy format. This is not for NN consumption,
+   * but for tools like OIV.
+   */
+  public void saveLegacyOIVImage(FSNamesystem source, String targetDir,
+      Canceler canceler) throws IOException {
+    FSImageCompression compression =
+        FSImageCompression.createCompression(conf);
+    long txid = getLastAppliedOrWrittenTxId();
+    SaveNamespaceContext ctx = new SaveNamespaceContext(source, txid,
+        canceler);
+    FSImageFormat.Saver saver = new FSImageFormat.Saver(ctx);
+    String imageFileName = NNStorage.getLegacyOIVImageFileName(txid);
+    File imageFile = new File(targetDir, imageFileName);
+    saver.save(imageFile, compression);
+    archivalManager.purgeOldLegacyOIVImages(targetDir, txid);
+  }
+
+
   /**
    * FSImageSaver is being run in a separate thread when saving
    * FSImage. There is one thread per each copy of the image.

+ 381 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java

@@ -21,14 +21,20 @@ import static org.apache.hadoop.util.Time.now;
 
 import java.io.DataInput;
 import java.io.DataInputStream;
+import java.io.DataOutputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.security.DigestInputStream;
+import java.security.DigestOutputStream;
 import java.security.MessageDigest;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
 
@@ -50,6 +56,7 @@ import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiffList;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
@@ -60,6 +67,7 @@ import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
+import org.apache.hadoop.hdfs.util.ReadOnlyList;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.io.Text;
@@ -69,8 +77,105 @@ import com.google.common.base.Preconditions;
 import com.google.common.annotations.VisibleForTesting;
 
 /**
- * This class loads and stores the FSImage of the NameNode. The file
- * src/main/proto/fsimage.proto describes the on-disk layout of the FSImage.
+ * Contains inner classes for reading or writing the on-disk format for
+ * FSImages.
+ *
+ * In particular, the format of the FSImage looks like:
+ * <pre>
+ * FSImage {
+ *   layoutVersion: int, namespaceID: int, numberItemsInFSDirectoryTree: long,
+ *   namesystemGenerationStampV1: long, namesystemGenerationStampV2: long,
+ *   generationStampAtBlockIdSwitch:long, lastAllocatedBlockId:
+ *   long transactionID: long, snapshotCounter: int, numberOfSnapshots: int,
+ *   numOfSnapshottableDirs: int,
+ *   {FSDirectoryTree, FilesUnderConstruction, SecretManagerState} (can be compressed)
+ * }
+ *
+ * FSDirectoryTree (if {@link Feature#FSIMAGE_NAME_OPTIMIZATION} is supported) {
+ *   INodeInfo of root, numberOfChildren of root: int
+ *   [list of INodeInfo of root's children],
+ *   [list of INodeDirectoryInfo of root's directory children]
+ * }
+ *
+ * FSDirectoryTree (if {@link Feature#FSIMAGE_NAME_OPTIMIZATION} not supported){
+ *   [list of INodeInfo of INodes in topological order]
+ * }
+ *
+ * INodeInfo {
+ *   {
+ *     localName: short + byte[]
+ *   } when {@link Feature#FSIMAGE_NAME_OPTIMIZATION} is supported
+ *   or
+ *   {
+ *     fullPath: byte[]
+ *   } when {@link Feature#FSIMAGE_NAME_OPTIMIZATION} is not supported
+ *   replicationFactor: short, modificationTime: long,
+ *   accessTime: long, preferredBlockSize: long,
+ *   numberOfBlocks: int (-1 for INodeDirectory, -2 for INodeSymLink),
+ *   {
+ *     nsQuota: long, dsQuota: long,
+ *     {
+ *       isINodeSnapshottable: byte,
+ *       isINodeWithSnapshot: byte (if isINodeSnapshottable is false)
+ *     } (when {@link Feature#SNAPSHOT} is supported),
+ *     fsPermission: short, PermissionStatus
+ *   } for INodeDirectory
+ *   or
+ *   {
+ *     symlinkString, fsPermission: short, PermissionStatus
+ *   } for INodeSymlink
+ *   or
+ *   {
+ *     [list of BlockInfo]
+ *     [list of FileDiff]
+ *     {
+ *       isINodeFileUnderConstructionSnapshot: byte,
+ *       {clientName: short + byte[], clientMachine: short + byte[]} (when
+ *       isINodeFileUnderConstructionSnapshot is true),
+ *     } (when {@link Feature#SNAPSHOT} is supported and writing snapshotINode),
+ *     fsPermission: short, PermissionStatus
+ *   } for INodeFile
+ * }
+ *
+ * INodeDirectoryInfo {
+ *   fullPath of the directory: short + byte[],
+ *   numberOfChildren: int, [list of INodeInfo of children INode],
+ *   {
+ *     numberOfSnapshots: int,
+ *     [list of Snapshot] (when NumberOfSnapshots is positive),
+ *     numberOfDirectoryDiffs: int,
+ *     [list of DirectoryDiff] (NumberOfDirectoryDiffs is positive),
+ *     number of children that are directories,
+ *     [list of INodeDirectoryInfo of the directory children] (includes
+ *     snapshot copies of deleted sub-directories)
+ *   } (when {@link Feature#SNAPSHOT} is supported),
+ * }
+ *
+ * Snapshot {
+ *   snapshotID: int, root of Snapshot: INodeDirectoryInfo (its local name is
+ *   the name of the snapshot)
+ * }
+ *
+ * DirectoryDiff {
+ *   full path of the root of the associated Snapshot: short + byte[],
+ *   childrenSize: int,
+ *   isSnapshotRoot: byte,
+ *   snapshotINodeIsNotNull: byte (when isSnapshotRoot is false),
+ *   snapshotINode: INodeDirectory (when SnapshotINodeIsNotNull is true), Diff
+ * }
+ *
+ * Diff {
+ *   createdListSize: int, [Local name of INode in created list],
+ *   deletedListSize: int, [INode in deleted list: INodeInfo]
+ * }
+ *
+ * FileDiff {
+ *   full path of the root of the associated Snapshot: short + byte[],
+ *   fileSize: long,
+ *   snapshotINodeIsNotNull: byte,
+ *   snapshotINode: INodeFile (when SnapshotINodeIsNotNull is true), Diff
+ * }
+ * </pre>
  */
 @InterfaceAudience.Private
 @InterfaceStability.Evolving
@@ -580,6 +685,11 @@ public class FSImageFormat {
       }
     }
 
+    /** @return The FSDirectory of the namesystem where the fsimage is loaded */
+    public FSDirectory getFSDirectoryInLoading() {
+      return namesystem.dir;
+    }
+
     public INode loadINodeWithLocalName(boolean isSnapshotINode, DataInput in,
         boolean updateINodeMap) throws IOException {
       return loadINodeWithLocalName(isSnapshotINode, in, updateINodeMap, null);
@@ -1009,7 +1119,7 @@ public class FSImageFormat {
       + " option to automatically rename these paths during upgrade.";
 
   /**
-   * Same as {@link #renameReservedPathsOnUpgrade}, but for a single
+   * Same as {@link #renameReservedPathsOnUpgrade(String)}, but for a single
    * byte array path component.
    */
   private static byte[] renameReservedComponentOnUpgrade(byte[] component,
@@ -1029,7 +1139,7 @@ public class FSImageFormat {
   }
 
   /**
-   * Same as {@link #renameReservedPathsOnUpgrade}, but for a single
+   * Same as {@link #renameReservedPathsOnUpgrade(String)}, but for a single
    * byte array path component.
    */
   private static byte[] renameReservedRootComponentOnUpgrade(byte[] component,
@@ -1050,4 +1160,271 @@ public class FSImageFormat {
     }
     return component;
   }
+
+  /**
+   * A one-shot class responsible for writing an image file.
+   * The write() function should be called once, after which the getter
+   * functions may be used to retrieve information about the file that was written.
+   *
+   * This is replaced by the PB-based FSImage. The class is to maintain
+   * compatibility for the external fsimage tool.
+   */
+  @Deprecated
+  static class Saver {
+    private static final int LAYOUT_VERSION = -51;
+    private final SaveNamespaceContext context;
+    /** Set to true once an image has been written */
+    private boolean saved = false;
+
+    /** The MD5 checksum of the file that was written */
+    private MD5Hash savedDigest;
+    private final ReferenceMap referenceMap = new ReferenceMap();
+
+    private final Map<Long, INodeFile> snapshotUCMap =
+        new HashMap<Long, INodeFile>();
+
+    /** @throws IllegalStateException if the instance has not yet saved an image */
+    private void checkSaved() {
+      if (!saved) {
+        throw new IllegalStateException("FSImageSaver has not saved an image");
+      }
+    }
+
+    /** @throws IllegalStateException if the instance has already saved an image */
+    private void checkNotSaved() {
+      if (saved) {
+        throw new IllegalStateException("FSImageSaver has already saved an image");
+      }
+    }
+
+
+    Saver(SaveNamespaceContext context) {
+      this.context = context;
+    }
+
+    /**
+     * Return the MD5 checksum of the image file that was saved.
+     */
+    MD5Hash getSavedDigest() {
+      checkSaved();
+      return savedDigest;
+    }
+
+    void save(File newFile, FSImageCompression compression) throws IOException {
+      checkNotSaved();
+
+      final FSNamesystem sourceNamesystem = context.getSourceNamesystem();
+      final INodeDirectory rootDir = sourceNamesystem.dir.rootDir;
+      final long numINodes = rootDir.getDirectoryWithQuotaFeature()
+          .getSpaceConsumed().get(Quota.NAMESPACE);
+      String sdPath = newFile.getParentFile().getParentFile().getAbsolutePath();
+      Step step = new Step(StepType.INODES, sdPath);
+      StartupProgress prog = NameNode.getStartupProgress();
+      prog.beginStep(Phase.SAVING_CHECKPOINT, step);
+      prog.setTotal(Phase.SAVING_CHECKPOINT, step, numINodes);
+      Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
+      long startTime = now();
+      //
+      // Write out data
+      //
+      MessageDigest digester = MD5Hash.getDigester();
+      FileOutputStream fout = new FileOutputStream(newFile);
+      DigestOutputStream fos = new DigestOutputStream(fout, digester);
+      DataOutputStream out = new DataOutputStream(fos);
+      try {
+        out.writeInt(LAYOUT_VERSION);
+        LayoutFlags.write(out);
+        // We use the non-locked version of getNamespaceInfo here since
+        // the coordinating thread of saveNamespace already has read-locked
+        // the namespace for us. If we attempt to take another readlock
+        // from the actual saver thread, there's a potential of a
+        // fairness-related deadlock. See the comments on HDFS-2223.
+        out.writeInt(sourceNamesystem.unprotectedGetNamespaceInfo()
+            .getNamespaceID());
+        out.writeLong(numINodes);
+        out.writeLong(sourceNamesystem.getGenerationStampV1());
+        out.writeLong(sourceNamesystem.getGenerationStampV2());
+        out.writeLong(sourceNamesystem.getGenerationStampAtblockIdSwitch());
+        out.writeLong(sourceNamesystem.getLastAllocatedBlockId());
+        out.writeLong(context.getTxId());
+        out.writeLong(sourceNamesystem.getLastInodeId());
+
+
+        sourceNamesystem.getSnapshotManager().write(out);
+
+        // write compression info and set up compressed stream
+        out = compression.writeHeaderAndWrapStream(fos);
+        LOG.info("Saving image file " + newFile +
+                 " using " + compression);
+
+        // save the root
+        saveINode2Image(rootDir, out, false, referenceMap, counter);
+        // save the rest of the nodes
+        saveImage(rootDir, out, true, false, counter);
+        prog.endStep(Phase.SAVING_CHECKPOINT, step);
+        // Now that the step is finished, set counter equal to total to adjust
+        // for possible under-counting due to reference inodes.
+        prog.setCount(Phase.SAVING_CHECKPOINT, step, numINodes);
+        // save files under construction
+        // TODO: for HDFS-5428, since we cannot break the compatibility of
+        // fsimage, we store part of the under-construction files that are only
+        // in snapshots in this "under-construction-file" section. As a
+        // temporary solution, we use "/.reserved/.inodes/<inodeid>" as their
+        // paths, so that when loading fsimage we do not put them into the lease
+        // map. In the future, we can remove this hack when we can bump the
+        // layout version.
+        sourceNamesystem.saveFilesUnderConstruction(out, snapshotUCMap);
+
+        context.checkCancelled();
+        sourceNamesystem.saveSecretManagerStateCompat(out, sdPath);
+        context.checkCancelled();
+        sourceNamesystem.getCacheManager().saveStateCompat(out, sdPath);
+        context.checkCancelled();
+        out.flush();
+        context.checkCancelled();
+        fout.getChannel().force(true);
+      } finally {
+        out.close();
+      }
+
+      saved = true;
+      // set md5 of the saved image
+      savedDigest = new MD5Hash(digester.digest());
+
+      LOG.info("Image file " + newFile + " of size " + newFile.length() +
+          " bytes saved in " + (now() - startTime)/1000 + " seconds.");
+    }
+
+    /**
+     * Save children INodes.
+     * @param children The list of children INodes
+     * @param out The DataOutputStream to write
+     * @param inSnapshot Whether the parent directory or its ancestor is in
+     *                   the deleted list of some snapshot (caused by rename or
+     *                   deletion)
+     * @param counter Counter to increment for namenode startup progress
+     * @return Number of children that are directory
+     */
+    private int saveChildren(ReadOnlyList<INode> children,
+        DataOutputStream out, boolean inSnapshot, Counter counter)
+        throws IOException {
+      // Write normal children INode.
+      out.writeInt(children.size());
+      int dirNum = 0;
+      int i = 0;
+      for(INode child : children) {
+        // print all children first
+        // TODO: for HDFS-5428, we cannot change the format/content of fsimage
+        // here, thus even if the parent directory is in snapshot, we still
+        // do not handle INodeUC as those stored in deleted list
+        saveINode2Image(child, out, false, referenceMap, counter);
+        if (child.isDirectory()) {
+          dirNum++;
+        } else if (inSnapshot && child.isFile()
+            && child.asFile().isUnderConstruction()) {
+          this.snapshotUCMap.put(child.getId(), child.asFile());
+        }
+        if (i++ % 50 == 0) {
+          context.checkCancelled();
+        }
+      }
+      return dirNum;
+    }
+
+    /**
+     * Save file tree image starting from the given root.
+     * This is a recursive procedure, which first saves all children and
+     * snapshot diffs of a current directory and then moves inside the
+     * sub-directories.
+     *
+     * @param current The current node
+     * @param out The DataoutputStream to write the image
+     * @param toSaveSubtree Whether or not to save the subtree to fsimage. For
+     *                      reference node, its subtree may already have been
+     *                      saved before.
+     * @param inSnapshot Whether the current directory is in snapshot
+     * @param counter Counter to increment for namenode startup progress
+     */
+    private void saveImage(INodeDirectory current, DataOutputStream out,
+        boolean toSaveSubtree, boolean inSnapshot, Counter counter)
+        throws IOException {
+      // write the inode id of the directory
+      out.writeLong(current.getId());
+
+      if (!toSaveSubtree) {
+        return;
+      }
+
+      final ReadOnlyList<INode> children = current
+          .getChildrenList(Snapshot.CURRENT_STATE_ID);
+      int dirNum = 0;
+      List<INodeDirectory> snapshotDirs = null;
+      DirectoryWithSnapshotFeature sf = current.getDirectoryWithSnapshotFeature();
+      if (sf != null) {
+        snapshotDirs = new ArrayList<INodeDirectory>();
+        sf.getSnapshotDirectory(snapshotDirs);
+        dirNum += snapshotDirs.size();
+      }
+
+      // 2. Write INodeDirectorySnapshottable#snapshotsByNames to record all
+      // Snapshots
+      if (current instanceof INodeDirectorySnapshottable) {
+        INodeDirectorySnapshottable snapshottableNode =
+            (INodeDirectorySnapshottable) current;
+        SnapshotFSImageFormat.saveSnapshots(snapshottableNode, out);
+      } else {
+        out.writeInt(-1); // # of snapshots
+      }
+
+      // 3. Write children INode
+      dirNum += saveChildren(children, out, inSnapshot, counter);
+
+      // 4. Write DirectoryDiff lists, if there is any.
+      SnapshotFSImageFormat.saveDirectoryDiffList(current, out, referenceMap);
+
+      // Write sub-tree of sub-directories, including possible snapshots of
+      // deleted sub-directories
+      out.writeInt(dirNum); // the number of sub-directories
+      for(INode child : children) {
+        if(!child.isDirectory()) {
+          continue;
+        }
+        // make sure we only save the subtree under a reference node once
+        boolean toSave = child.isReference() ?
+            referenceMap.toProcessSubtree(child.getId()) : true;
+        saveImage(child.asDirectory(), out, toSave, inSnapshot, counter);
+      }
+      if (snapshotDirs != null) {
+        for (INodeDirectory subDir : snapshotDirs) {
+          // make sure we only save the subtree under a reference node once
+          boolean toSave = subDir.getParentReference() != null ?
+              referenceMap.toProcessSubtree(subDir.getId()) : true;
+          saveImage(subDir, out, toSave, true, counter);
+        }
+      }
+    }
+
+    /**
+     * Saves inode and increments progress counter.
+     *
+     * @param inode INode to save
+     * @param out DataOutputStream to receive inode
+     * @param writeUnderConstruction boolean true if this is under construction
+     * @param referenceMap ReferenceMap containing reference inodes
+     * @param counter Counter to increment for namenode startup progress
+     * @throws IOException thrown if there is an I/O error
+     */
+    private void saveINode2Image(INode inode, DataOutputStream out,
+        boolean writeUnderConstruction, ReferenceMap referenceMap,
+        Counter counter) throws IOException {
+      FSImageSerialization.saveINode2Image(inode, out, writeUnderConstruction,
+        referenceMap);
+      // Intentionally do not increment counter for reference inodes, because it
+      // is too difficult at this point to assess whether or not this is a
+      // reference that counts toward quota.
+      if (!(inode instanceof INodeReference)) {
+        counter.increment();
+      }
+    }
+  }
 }

+ 213 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java

@@ -17,6 +17,11 @@
  */
 package org.apache.hadoop.hdfs.server.namenode;
 
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.Path;
@@ -31,20 +36,21 @@ import org.apache.hadoop.hdfs.protocol.LayoutVersion;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap;
 import org.apache.hadoop.hdfs.util.XMLUtils;
 import org.apache.hadoop.hdfs.util.XMLUtils.InvalidXmlException;
 import org.apache.hadoop.hdfs.util.XMLUtils.Stanza;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.ShortWritable;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableUtils;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.DataOutputStream;
-import java.io.IOException;
+import com.google.common.base.Preconditions;
 
 /**
  * Static utility functions for serializing various pieces of data in the correct
@@ -82,6 +88,26 @@ public class FSImageSerialization {
     final ShortWritable U_SHORT = new ShortWritable();
     final IntWritable U_INT = new IntWritable();
     final LongWritable U_LONG = new LongWritable();
+    final FsPermission FILE_PERM = new FsPermission((short) 0);
+  }
+
+  private static void writePermissionStatus(INodeAttributes inode,
+      DataOutput out) throws IOException {
+    final FsPermission p = TL_DATA.get().FILE_PERM;
+    p.fromShort(inode.getFsPermissionShort());
+    PermissionStatus.write(out, inode.getUserName(), inode.getGroupName(), p);
+  }
+
+  private static void writeBlocks(final Block[] blocks,
+      final DataOutput out) throws IOException {
+    if (blocks == null) {
+      out.writeInt(0);
+    } else {
+      out.writeInt(blocks.length);
+      for (Block blk : blocks) {
+        blk.write(out);
+      }
+    }
   }
 
   // Helper function that reads in an INodeUnderConstruction
@@ -127,6 +153,183 @@ public class FSImageSerialization {
     return file;
   }
 
+  // Helper function that writes an INodeUnderConstruction
+  // into the input stream
+  //
+  static void writeINodeUnderConstruction(DataOutputStream out, INodeFile cons,
+      String path) throws IOException {
+    writeString(path, out);
+    out.writeLong(cons.getId());
+    out.writeShort(cons.getFileReplication());
+    out.writeLong(cons.getModificationTime());
+    out.writeLong(cons.getPreferredBlockSize());
+
+    writeBlocks(cons.getBlocks(), out);
+    cons.getPermissionStatus().write(out);
+
+    FileUnderConstructionFeature uc = cons.getFileUnderConstructionFeature();
+    writeString(uc.getClientName(), out);
+    writeString(uc.getClientMachine(), out);
+
+    out.writeInt(0); //  do not store locations of last block
+  }
+
+  /**
+   * Serialize a {@link INodeFile} node
+   * @param node The node to write
+   * @param out The {@link DataOutputStream} where the fields are written
+   * @param writeBlock Whether to write block information
+   */
+  public static void writeINodeFile(INodeFile file, DataOutput out,
+      boolean writeUnderConstruction) throws IOException {
+    writeLocalName(file, out);
+    out.writeLong(file.getId());
+    out.writeShort(file.getFileReplication());
+    out.writeLong(file.getModificationTime());
+    out.writeLong(file.getAccessTime());
+    out.writeLong(file.getPreferredBlockSize());
+
+    writeBlocks(file.getBlocks(), out);
+    SnapshotFSImageFormat.saveFileDiffList(file, out);
+
+    if (writeUnderConstruction) {
+      if (file.isUnderConstruction()) {
+        out.writeBoolean(true);
+        final FileUnderConstructionFeature uc = file.getFileUnderConstructionFeature();
+        writeString(uc.getClientName(), out);
+        writeString(uc.getClientMachine(), out);
+      } else {
+        out.writeBoolean(false);
+      }
+    }
+
+    writePermissionStatus(file, out);
+  }
+
+  /** Serialize an {@link INodeFileAttributes}. */
+  public static void writeINodeFileAttributes(INodeFileAttributes file,
+      DataOutput out) throws IOException {
+    writeLocalName(file, out);
+    writePermissionStatus(file, out);
+    out.writeLong(file.getModificationTime());
+    out.writeLong(file.getAccessTime());
+
+    out.writeShort(file.getFileReplication());
+    out.writeLong(file.getPreferredBlockSize());
+  }
+
+  private static void writeQuota(Quota.Counts quota, DataOutput out)
+      throws IOException {
+    out.writeLong(quota.get(Quota.NAMESPACE));
+    out.writeLong(quota.get(Quota.DISKSPACE));
+  }
+
+  /**
+   * Serialize a {@link INodeDirectory}
+   * @param node The node to write
+   * @param out The {@link DataOutput} where the fields are written
+   */
+  public static void writeINodeDirectory(INodeDirectory node, DataOutput out)
+      throws IOException {
+    writeLocalName(node, out);
+    out.writeLong(node.getId());
+    out.writeShort(0);  // replication
+    out.writeLong(node.getModificationTime());
+    out.writeLong(0);   // access time
+    out.writeLong(0);   // preferred block size
+    out.writeInt(-1);   // # of blocks
+
+    writeQuota(node.getQuotaCounts(), out);
+
+    if (node instanceof INodeDirectorySnapshottable) {
+      out.writeBoolean(true);
+    } else {
+      out.writeBoolean(false);
+      out.writeBoolean(node.isWithSnapshot());
+    }
+
+    writePermissionStatus(node, out);
+  }
+
+  /**
+   * Serialize a {@link INodeDirectory}
+   * @param a The node to write
+   * @param out The {@link DataOutput} where the fields are written
+   */
+  public static void writeINodeDirectoryAttributes(
+      INodeDirectoryAttributes a, DataOutput out) throws IOException {
+    writeLocalName(a, out);
+    writePermissionStatus(a, out);
+    out.writeLong(a.getModificationTime());
+    writeQuota(a.getQuotaCounts(), out);
+  }
+
+  /**
+   * Serialize a {@link INodeSymlink} node
+   * @param node The node to write
+   * @param out The {@link DataOutput} where the fields are written
+   */
+  private static void writeINodeSymlink(INodeSymlink node, DataOutput out)
+      throws IOException {
+    writeLocalName(node, out);
+    out.writeLong(node.getId());
+    out.writeShort(0);  // replication
+    out.writeLong(0);   // modification time
+    out.writeLong(0);   // access time
+    out.writeLong(0);   // preferred block size
+    out.writeInt(-2);   // # of blocks
+
+    Text.writeString(out, node.getSymlinkString());
+    writePermissionStatus(node, out);
+  }
+
+  /** Serialize a {@link INodeReference} node */
+  private static void writeINodeReference(INodeReference ref, DataOutput out,
+      boolean writeUnderConstruction, ReferenceMap referenceMap
+      ) throws IOException {
+    writeLocalName(ref, out);
+    out.writeLong(ref.getId());
+    out.writeShort(0);  // replication
+    out.writeLong(0);   // modification time
+    out.writeLong(0);   // access time
+    out.writeLong(0);   // preferred block size
+    out.writeInt(-3);   // # of blocks
+
+    final boolean isWithName = ref instanceof INodeReference.WithName;
+    out.writeBoolean(isWithName);
+
+    if (!isWithName) {
+      Preconditions.checkState(ref instanceof INodeReference.DstReference);
+      // dst snapshot id
+      out.writeInt(((INodeReference.DstReference) ref).getDstSnapshotId());
+    } else {
+      out.writeInt(((INodeReference.WithName) ref).getLastSnapshotId());
+    }
+
+    final INodeReference.WithCount withCount
+        = (INodeReference.WithCount)ref.getReferredINode();
+    referenceMap.writeINodeReferenceWithCount(withCount, out,
+        writeUnderConstruction);
+  }
+
+  /**
+   * Save one inode's attributes to the image.
+   */
+  public static void saveINode2Image(INode node, DataOutput out,
+      boolean writeUnderConstruction, ReferenceMap referenceMap)
+      throws IOException {
+    if (node.isReference()) {
+      writeINodeReference(node.asReference(), out, writeUnderConstruction,
+          referenceMap);
+    } else if (node.isDirectory()) {
+      writeINodeDirectory(node.asDirectory(), out);
+    } else if (node.isSymlink()) {
+      writeINodeSymlink(node.asSymlink(), out);
+    } else if (node.isFile()) {
+      writeINodeFile(node.asFile(), out, writeUnderConstruction);
+    }
+  }
+
   // This should be reverted to package private once the ImageLoader
   // code is moved into this package. This method should not be called
   // by other code.
@@ -226,6 +429,12 @@ public class FSImageSerialization {
     in.readFully(createdNodeName);
     return createdNodeName;
   }
+
+  private static void writeLocalName(INodeAttributes inode, DataOutput out)
+      throws IOException {
+    final byte[] name = inode.getLocalNameBytes();
+    writeBytes(name, out);
+  }
   
   public static void writeBytes(byte[] data, DataOutput out)
       throws IOException {

+ 46 - 11
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -85,17 +85,7 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY;
 import static org.apache.hadoop.util.Time.now;
 
-import java.io.BufferedWriter;
-import java.io.ByteArrayInputStream;
-import java.io.DataInput;
-import java.io.DataInputStream;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.io.PrintWriter;
-import java.io.StringWriter;
+import java.io.*;
 import java.lang.management.ManagementFactory;
 import java.net.InetAddress;
 import java.net.URI;
@@ -6091,6 +6081,42 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     leaseManager.changeLease(src, dst);
   }
 
+  /**
+   * Serializes leases.
+   */
+  void saveFilesUnderConstruction(DataOutputStream out,
+      Map<Long, INodeFile> snapshotUCMap) throws IOException {
+    // This is run by an inferior thread of saveNamespace, which holds a read
+    // lock on our behalf. If we took the read lock here, we could block
+    // for fairness if a writer is waiting on the lock.
+    synchronized (leaseManager) {
+      Map<String, INodeFile> nodes = leaseManager.getINodesUnderConstruction();
+      for (Map.Entry<String, INodeFile> entry : nodes.entrySet()) {
+        // TODO: for HDFS-5428, because of rename operations, some
+        // under-construction files that are
+        // in the current fs directory can also be captured in the
+        // snapshotUCMap. We should remove them from the snapshotUCMap.
+        snapshotUCMap.remove(entry.getValue().getId());
+      }
+
+      out.writeInt(nodes.size() + snapshotUCMap.size()); // write the size
+      for (Map.Entry<String, INodeFile> entry : nodes.entrySet()) {
+        FSImageSerialization.writeINodeUnderConstruction(
+            out, entry.getValue(), entry.getKey());
+      }
+      for (Map.Entry<Long, INodeFile> entry : snapshotUCMap.entrySet()) {
+        // for those snapshot INodeFileUC, we use "/.reserved/.inodes/<inodeid>"
+        // as their paths
+        StringBuilder b = new StringBuilder();
+        b.append(FSDirectory.DOT_RESERVED_PATH_PREFIX)
+            .append(Path.SEPARATOR).append(FSDirectory.DOT_INODES_STRING)
+            .append(Path.SEPARATOR).append(entry.getValue().getId());
+        FSImageSerialization.writeINodeUnderConstruction(
+            out, entry.getValue(), b.toString());
+      }
+    }
+  }
+
   /**
    * @return all the under-construction files in the lease map
    */
@@ -6377,6 +6403,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     getEditLog().logSync();
   }
 
+  /**
+   * @param out save state of the secret manager
+   * @param sdPath String storage directory path
+   */
+  void saveSecretManagerStateCompat(DataOutputStream out, String sdPath)
+      throws IOException {
+    dtSecretManager.saveSecretManagerStateCompat(out, sdPath);
+  }
+
   SecretManagerState saveSecretManagerState() {
     return dtSecretManager.saveSecretManagerState();
   }

+ 6 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java

@@ -77,7 +77,8 @@ public class NNStorage extends Storage implements Closeable,
     IMAGE_ROLLBACK("fsimage_rollback"),
     EDITS_NEW ("edits.new"), // from "old" pre-HDFS-1073 format
     EDITS_INPROGRESS ("edits_inprogress"),
-    EDITS_TMP ("edits_tmp");
+    EDITS_TMP ("edits_tmp"),
+    IMAGE_LEGACY_OIV ("fsimage_legacy_oiv");  // For pre-PB format
 
     private String fileName = null;
     private NameNodeFile(String name) { this.fileName = name; }
@@ -693,6 +694,10 @@ public class NNStorage extends Storage implements Closeable,
     return getNameNodeFileName(NameNodeFile.IMAGE_ROLLBACK, txid);
   }
 
+  public static String getLegacyOIVImageFileName(long txid) {
+    return getNameNodeFileName(NameNodeFile.IMAGE_LEGACY_OIV, txid);
+  }
+
   private static String getNameNodeFileName(NameNodeFile nnf, long txid) {
     return String.format("%s_%019d", nnf.getName(), txid);
   }

+ 56 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java

@@ -18,11 +18,13 @@
 package org.apache.hadoop.hdfs.server.namenode;
 
 import java.io.File;
+import java.io.FilenameFilter;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.EnumSet;
+import java.util.Iterator;
 import java.util.List;
 import java.util.TreeSet;
 
@@ -233,4 +235,58 @@ public class NNStorageRetentionManager {
       }      
     }
   }
+
+  /**
+   * Delete old OIV fsimages. Since the target dir is not a full blown
+   * storage directory, we simply list and keep the latest ones. For the
+   * same reason, no storage inspector is used.
+   */
+  void purgeOldLegacyOIVImages(String dir, long txid) {
+    File oivImageDir = new File(dir);
+    final String oivImagePrefix = NameNodeFile.IMAGE_LEGACY_OIV.getName();
+    String filesInStorage[];
+
+    // Get the listing
+    filesInStorage = oivImageDir.list(new FilenameFilter() {
+      @Override
+      public boolean accept(File dir, String name) {
+        return name.matches(oivImagePrefix + "_(\\d+)");
+      }
+    });
+
+    // Check whether there is any work to do.
+    if (filesInStorage.length <= numCheckpointsToRetain) {
+      return;
+    }
+
+    // Create a sorted list of txids from the file names.
+    TreeSet<Long> sortedTxIds = new TreeSet<Long>();
+    for (String fName : filesInStorage) {
+      // Extract the transaction id from the file name.
+      long fTxId;
+      try {
+        fTxId = Long.parseLong(fName.substring(oivImagePrefix.length() + 1));
+      } catch (NumberFormatException nfe) {
+        // This should not happen since we have already filtered it.
+        // Log and continue.
+        LOG.warn("Invalid file name. Skipping " + fName);
+        continue;
+      }
+      sortedTxIds.add(Long.valueOf(fTxId));
+    }
+
+    int numFilesToDelete = sortedTxIds.size() - numCheckpointsToRetain;
+    Iterator<Long> iter = sortedTxIds.iterator();
+    while (numFilesToDelete > 0 && iter.hasNext()) {
+      long txIdVal = iter.next().longValue();
+      String fileName = NNStorage.getLegacyOIVImageFileName(txIdVal);
+      LOG.info("Deleting " + fileName);
+      File fileToDelete = new File(oivImageDir, fileName);
+      if (!fileToDelete.delete()) {
+        // deletion failed.
+        LOG.warn("Failed to delete image file: " + fileToDelete);
+      }
+      numFilesToDelete--;
+    }
+  }
 }

+ 16 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java

@@ -62,6 +62,7 @@ import org.apache.hadoop.hdfs.server.namenode.NNStorageRetentionManager.StorageP
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
+import org.apache.hadoop.hdfs.util.Canceler;
 import org.apache.hadoop.http.HttpConfig;
 import org.apache.hadoop.http.HttpServer2;
 import org.apache.hadoop.io.MD5Hash;
@@ -125,6 +126,7 @@ public class SecondaryNameNode implements Runnable,
 
   private Thread checkpointThread;
   private ObjectName nameNodeStatusBeanName;
+  private String legacyOivImageDir;
 
   @Override
   public String toString() {
@@ -289,6 +291,9 @@ public class SecondaryNameNode implements Runnable,
           NetUtils.getHostPortString(httpsAddress));
     }
 
+    legacyOivImageDir = conf.get(
+        DFSConfigKeys.DFS_NAMENODE_LEGACY_OIV_IMAGE_DIR_KEY);
+
     LOG.info("Checkpoint Period   :" + checkpointConf.getPeriod() + " secs "
         + "(" + checkpointConf.getPeriod() / 60 + " min)");
     LOG.info("Log Size Trigger    :" + checkpointConf.getTxnCount() + " txns");
@@ -497,6 +502,7 @@ public class SecondaryNameNode implements Runnable,
    * @return if the image is fetched from primary or not
    */
   @VisibleForTesting
+  @SuppressWarnings("deprecated")
   public boolean doCheckpoint() throws IOException {
     checkpointImage.ensureCurrentDirExists();
     NNStorage dstStorage = checkpointImage.getStorage();
@@ -559,11 +565,18 @@ public class SecondaryNameNode implements Runnable,
 
     LOG.warn("Checkpoint done. New Image Size: " 
              + dstStorage.getFsImageName(txid).length());
-    
+
+    if (legacyOivImageDir != null && !legacyOivImageDir.isEmpty()) {
+      try {
+        checkpointImage.saveLegacyOIVImage(namesystem, legacyOivImageDir,
+            new Canceler());
+      } catch (IOException e) {
+        LOG.warn("Failed to write legacy OIV image: ", e);
+      }
+    }
     return loadImage;
   }
-  
-  
+
   /**
    * @param opts The parameters passed to this program.
    * @exception Exception if the filesystem does not exist.

+ 6 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java

@@ -183,6 +183,12 @@ public class StandbyCheckpointer {
       txid = img.getStorage().getMostRecentCheckpointTxId();
       assert txid == thisCheckpointTxId : "expected to save checkpoint at txid=" +
         thisCheckpointTxId + " but instead saved at txid=" + txid;
+
+      // Save the legacy OIV image, if the output dir is defined.
+      String outputDir = checkpointConf.getLegacyOivImageDir();
+      if (outputDir != null && !outputDir.isEmpty()) {
+        img.saveLegacyOIVImage(namesystem, outputDir, canceler);
+      }
     } finally {
       namesystem.longReadUnlock();
     }

+ 13 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/AbstractINodeDiff.java

@@ -17,13 +17,17 @@
  */
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 
-import com.google.common.base.Preconditions;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.List;
+
 import org.apache.hadoop.hdfs.server.namenode.INode;
 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
 import org.apache.hadoop.hdfs.server.namenode.INodeAttributes;
 import org.apache.hadoop.hdfs.server.namenode.Quota;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap;
 
-import java.util.List;
+import com.google.common.base.Preconditions;
 
 /**
  * The difference of an inode between in two snapshots.
@@ -128,4 +132,11 @@ abstract class AbstractINodeDiff<N extends INode,
     return getClass().getSimpleName() + ": " + this.getSnapshotId() + " (post="
         + (posteriorDiff == null? null: posteriorDiff.getSnapshotId()) + ")";
   }
+
+  void writeSnapshot(DataOutput out) throws IOException {
+    out.writeInt(snapshotId);
+  }
+
+  abstract void write(DataOutput out, ReferenceMap referenceMap
+      ) throws IOException;
 }

+ 52 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java

@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 
+import java.io.DataOutput;
+import java.io.IOException;
 import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -32,6 +34,7 @@ import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffReportEntry;
 import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffType;
 import org.apache.hadoop.hdfs.server.namenode.Content;
 import org.apache.hadoop.hdfs.server.namenode.ContentSummaryComputationContext;
+import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
 import org.apache.hadoop.hdfs.server.namenode.INode;
 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
@@ -39,6 +42,7 @@ import org.apache.hadoop.hdfs.server.namenode.INodeDirectoryAttributes;
 import org.apache.hadoop.hdfs.server.namenode.INodeFile;
 import org.apache.hadoop.hdfs.server.namenode.INodeReference;
 import org.apache.hadoop.hdfs.server.namenode.Quota;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap;
 import org.apache.hadoop.hdfs.util.Diff;
 import org.apache.hadoop.hdfs.util.Diff.Container;
 import org.apache.hadoop.hdfs.util.Diff.ListType;
@@ -120,6 +124,35 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
       return counts;
     }
 
+    /** Serialize {@link #created} */
+    private void writeCreated(DataOutput out) throws IOException {
+      final List<INode> created = getList(ListType.CREATED);
+      out.writeInt(created.size());
+      for (INode node : created) {
+        // For INode in created list, we only need to record its local name
+        byte[] name = node.getLocalNameBytes();
+        out.writeShort(name.length);
+        out.write(name);
+      }
+    }
+
+    /** Serialize {@link #deleted} */
+    private void writeDeleted(DataOutput out,
+        ReferenceMap referenceMap) throws IOException {
+      final List<INode> deleted = getList(ListType.DELETED);
+      out.writeInt(deleted.size());
+      for (INode node : deleted) {
+        FSImageSerialization.saveINode2Image(node, out, true, referenceMap);
+      }
+    }
+
+    /** Serialize to out */
+    private void write(DataOutput out, ReferenceMap referenceMap
+        ) throws IOException {
+      writeCreated(out);
+      writeDeleted(out, referenceMap);
+    }
+
     /** Get the list of INodeDirectory contained in the deleted list */
     private void getDirsInDeleted(List<INodeDirectory> dirList) {
       for (INode node : getList(ListType.DELETED)) {
@@ -314,6 +347,25 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
       return childrenSize;
     }
 
+    @Override
+    void write(DataOutput out, ReferenceMap referenceMap) throws IOException {
+      writeSnapshot(out);
+      out.writeInt(childrenSize);
+
+      // Write snapshotINode
+      out.writeBoolean(isSnapshotRoot);
+      if (!isSnapshotRoot) {
+        if (snapshotINode != null) {
+          out.writeBoolean(true);
+          FSImageSerialization.writeINodeDirectoryAttributes(snapshotINode, out);
+        } else {
+          out.writeBoolean(false);
+        }
+      }
+      // Write diff. Node need to write poseriorDiff, since diffs is a list.
+      diff.write(out, referenceMap);
+    }
+
     @Override
     Quota.Counts destroyDiffAndCollectBlocks(INodeDirectory currentINode,
         BlocksMapUpdateInfo collectedBlocks, final List<INode> removedINodes) {

+ 20 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileDiff.java

@@ -17,13 +17,17 @@
  */
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
 import org.apache.hadoop.hdfs.server.namenode.INode;
 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
 import org.apache.hadoop.hdfs.server.namenode.INodeFile;
 import org.apache.hadoop.hdfs.server.namenode.INodeFileAttributes;
 import org.apache.hadoop.hdfs.server.namenode.Quota;
-
-import java.util.List;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap;
 
 /**
  * The difference of an {@link INodeFile} between two snapshots.
@@ -66,6 +70,20 @@ public class FileDiff extends
         + (snapshotINode == null? "?": snapshotINode.getFileReplication());
   }
 
+  @Override
+  void write(DataOutput out, ReferenceMap referenceMap) throws IOException {
+    writeSnapshot(out);
+    out.writeLong(fileSize);
+
+    // write snapshotINode
+    if (snapshotINode != null) {
+      out.writeBoolean(true);
+      FSImageSerialization.writeINodeFileAttributes(snapshotINode, out);
+    } else {
+      out.writeBoolean(false);
+    }
+  }
+
   @Override
   Quota.Counts destroyDiffAndCollectBlocks(INodeFile currentINode,
       BlocksMapUpdateInfo collectedBlocks, final List<INode> removedINodes) {

+ 9 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/Snapshot.java

@@ -18,6 +18,7 @@
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 
 import java.io.DataInput;
+import java.io.DataOutput;
 import java.io.IOException;
 import java.text.SimpleDateFormat;
 import java.util.Arrays;
@@ -30,6 +31,7 @@ import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.server.namenode.AclFeature;
 import org.apache.hadoop.hdfs.server.namenode.FSImageFormat;
+import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
 import org.apache.hadoop.hdfs.server.namenode.INode;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
 import org.apache.hadoop.hdfs.util.ReadOnlyList;
@@ -214,4 +216,11 @@ public class Snapshot implements Comparable<byte[]> {
   public String toString() {
     return getClass().getSimpleName() + "." + root.getLocalName() + "(id=" + id + ")";
   }
+
+  /** Serialize the fields to out */
+  void write(DataOutput out) throws IOException {
+    out.writeInt(id);
+    // write root
+    FSImageSerialization.writeINodeDirectory(root, out);
+  }
 }

+ 72 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java

@@ -29,21 +29,75 @@ import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.server.namenode.FSImageFormat;
 import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
 import org.apache.hadoop.hdfs.server.namenode.INode;
+import org.apache.hadoop.hdfs.server.namenode.INodeAttributes;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectoryAttributes;
+import org.apache.hadoop.hdfs.server.namenode.INodeFile;
 import org.apache.hadoop.hdfs.server.namenode.INodeFileAttributes;
 import org.apache.hadoop.hdfs.server.namenode.INodeReference;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.DirectoryDiff;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.DirectoryDiffList;
 import org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff;
 import org.apache.hadoop.hdfs.util.Diff.ListType;
-import org.apache.hadoop.hdfs.server.namenode.FSImageFormat.Loader;
+import org.apache.hadoop.hdfs.util.ReadOnlyList;
 
 /**
  * A helper class defining static methods for reading/writing snapshot related
  * information from/to FSImage.
  */
 public class SnapshotFSImageFormat {
+  /**
+   * Save snapshots and snapshot quota for a snapshottable directory.
+   * @param current The directory that the snapshots belongs to.
+   * @param out The {@link DataOutput} to write.
+   * @throws IOException
+   */
+  public static void saveSnapshots(INodeDirectorySnapshottable current,
+      DataOutput out) throws IOException {
+    // list of snapshots in snapshotsByNames
+    ReadOnlyList<Snapshot> snapshots = current.getSnapshotsByNames();
+    out.writeInt(snapshots.size());
+    for (Snapshot s : snapshots) {
+      // write the snapshot id
+      out.writeInt(s.getId());
+    }
+    // snapshot quota
+    out.writeInt(current.getSnapshotQuota());
+  }
+
+  /**
+   * Save SnapshotDiff list for an INodeDirectoryWithSnapshot.
+   * @param sNode The directory that the SnapshotDiff list belongs to.
+   * @param out The {@link DataOutput} to write.
+   */
+  private static <N extends INode, A extends INodeAttributes, D extends AbstractINodeDiff<N, A, D>>
+      void saveINodeDiffs(final AbstractINodeDiffList<N, A, D> diffs,
+      final DataOutput out, ReferenceMap referenceMap) throws IOException {
+    // Record the diffs in reversed order, so that we can find the correct
+    // reference for INodes in the created list when loading the FSImage
+    if (diffs == null) {
+      out.writeInt(-1); // no diffs
+    } else {
+      final List<D> list = diffs.asList();
+      final int size = list.size();
+      out.writeInt(size);
+      for (int i = size - 1; i >= 0; i--) {
+        list.get(i).write(out, referenceMap);
+      }
+    }
+  }
+
+  public static void saveDirectoryDiffList(final INodeDirectory dir,
+      final DataOutput out, final ReferenceMap referenceMap
+      ) throws IOException {
+    saveINodeDiffs(dir.getDiffs(), out, referenceMap);
+  }
+
+  public static void saveFileDiffList(final INodeFile file,
+      final DataOutput out) throws IOException {
+    saveINodeDiffs(file.getDiffs(), out, null);
+  }
+
   public static FileDiffList loadFileDiffList(DataInput in,
       FSImageFormat.Loader loader) throws IOException {
     final int size = in.readInt();
@@ -264,6 +318,23 @@ public class SnapshotFSImageFormat {
      * Used to record whether the subtree of the reference node has been saved 
      */
     private final Map<Long, Long> dirMap = new HashMap<Long, Long>();
+
+    public void writeINodeReferenceWithCount(
+        INodeReference.WithCount withCount, DataOutput out,
+        boolean writeUnderConstruction) throws IOException {
+      final INode referred = withCount.getReferredINode();
+      final long id = withCount.getId();
+      final boolean firstReferred = !referenceMap.containsKey(id);
+      out.writeBoolean(firstReferred);
+
+      if (firstReferred) {
+        FSImageSerialization.saveINode2Image(referred, out,
+            writeUnderConstruction, this);
+        referenceMap.put(id, withCount);
+      } else {
+        out.writeLong(id);
+      }
+    }
     
     public boolean toProcessSubtree(long id) {
       if (dirMap.containsKey(id)) {

+ 17 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java

@@ -18,6 +18,7 @@
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 
 import java.io.DataInput;
+import java.io.DataOutput;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -287,6 +288,22 @@ public class SnapshotManager implements SnapshotStatsMXBean {
     return snapshottables.values().toArray(
         new INodeDirectorySnapshottable[snapshottables.size()]);
   }
+
+  /**
+   * Write {@link #snapshotCounter}, {@link #numSnapshots},
+   * and all snapshots to the DataOutput.
+   */
+  public void write(DataOutput out) throws IOException {
+    out.writeInt(snapshotCounter);
+    out.writeInt(numSnapshots.get());
+
+    // write all snapshots.
+    for(INodeDirectorySnapshottable snapshottableDir : snapshottables.values()) {
+      for(Snapshot s : snapshottableDir.getSnapshotsByNames()) {
+        s.write(out);
+      }
+    }
+  }
   
   /**
    * Read values of {@link #snapshotCounter}, {@link #numSnapshots}, and

+ 172 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/DelimitedImageVisitor.java

@@ -0,0 +1,172 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+
+/**
+ * A DelimitedImageVisitor generates a text representation of the fsimage,
+ * with each element separated by a delimiter string.  All of the elements
+ * common to both inodes and inodes-under-construction are included. When 
+ * processing an fsimage with a layout version that did not include an 
+ * element, such as AccessTime, the output file will include a column
+ * for the value, but no value will be included.
+ * 
+ * Individual block information for each file is not currently included.
+ * 
+ * The default delimiter is tab, as this is an unlikely value to be included
+ * an inode path or other text metadata.  The delimiter value can be via the
+ * constructor.
+ */
+class DelimitedImageVisitor extends TextWriterImageVisitor {
+  private static final String defaultDelimiter = "\t"; 
+  
+  final private LinkedList<ImageElement> elemQ = new LinkedList<ImageElement>();
+  private long fileSize = 0l;
+  // Elements of fsimage we're interested in tracking
+  private final Collection<ImageElement> elementsToTrack;
+  // Values for each of the elements in elementsToTrack
+  private final AbstractMap<ImageElement, String> elements = 
+                                            new HashMap<ImageElement, String>();
+  private final String delimiter;
+
+  {
+    elementsToTrack = new ArrayList<ImageElement>();
+    
+    // This collection determines what elements are tracked and the order
+    // in which they are output
+    Collections.addAll(elementsToTrack,  ImageElement.INODE_PATH,
+                                         ImageElement.REPLICATION,
+                                         ImageElement.MODIFICATION_TIME,
+                                         ImageElement.ACCESS_TIME,
+                                         ImageElement.BLOCK_SIZE,
+                                         ImageElement.NUM_BLOCKS,
+                                         ImageElement.NUM_BYTES,
+                                         ImageElement.NS_QUOTA,
+                                         ImageElement.DS_QUOTA,
+                                         ImageElement.PERMISSION_STRING,
+                                         ImageElement.USER_NAME,
+                                         ImageElement.GROUP_NAME);
+  }
+  
+  public DelimitedImageVisitor(String filename) throws IOException {
+    this(filename, false);
+  }
+
+  public DelimitedImageVisitor(String outputFile, boolean printToScreen) 
+                                                           throws IOException {
+    this(outputFile, printToScreen, defaultDelimiter);
+  }
+  
+  public DelimitedImageVisitor(String outputFile, boolean printToScreen, 
+                               String delimiter) throws IOException {
+    super(outputFile, printToScreen);
+    this.delimiter = delimiter;
+    reset();
+  }
+
+  /**
+   * Reset the values of the elements we're tracking in order to handle
+   * the next file
+   */
+  private void reset() {
+    elements.clear();
+    for(ImageElement e : elementsToTrack) 
+      elements.put(e, null);
+    
+    fileSize = 0l;
+  }
+  
+  @Override
+  void leaveEnclosingElement() throws IOException {
+    ImageElement elem = elemQ.pop();
+
+    // If we're done with an inode, write out our results and start over
+    if(elem == ImageElement.INODE || 
+       elem == ImageElement.INODE_UNDER_CONSTRUCTION) {
+      writeLine();
+      write("\n");
+      reset();
+    }
+  }
+
+  /**
+   * Iterate through all the elements we're tracking and, if a value was
+   * recorded for it, write it out.
+   */
+  private void writeLine() throws IOException {
+    Iterator<ImageElement> it = elementsToTrack.iterator();
+    
+    while(it.hasNext()) {
+      ImageElement e = it.next();
+      
+      String v = null;
+      if(e == ImageElement.NUM_BYTES)
+        v = String.valueOf(fileSize);
+      else
+        v = elements.get(e);
+      
+      if(v != null)
+        write(v);
+      
+      if(it.hasNext())
+        write(delimiter);
+    }
+  }
+
+  @Override
+  void visit(ImageElement element, String value) throws IOException {
+    // Explicitly label the root path
+    if(element == ImageElement.INODE_PATH && value.equals(""))
+      value = "/";
+    
+    // Special case of file size, which is sum of the num bytes in each block
+    if(element == ImageElement.NUM_BYTES)
+      fileSize += Long.valueOf(value);
+    
+    if(elements.containsKey(element) && element != ImageElement.NUM_BYTES)
+      elements.put(element, value);
+    
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element) throws IOException {
+    elemQ.push(element);
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element, ImageElement key,
+      String value) throws IOException {
+    // Special case as numBlocks is an attribute of the blocks element
+    if(key == ImageElement.NUM_BLOCKS 
+        && elements.containsKey(ImageElement.NUM_BLOCKS))
+      elements.put(key, value);
+    
+    elemQ.push(element);
+  }
+  
+  @Override
+  void start() throws IOException { /* Nothing to do */ }
+}

+ 36 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/DepthCounter.java

@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/**
+ * Utility class for tracking descent into the structure of the
+ * Visitor class (ImageVisitor, EditsVisitor etc.)
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+public class DepthCounter {
+  private int depth = 0;
+
+  public void incLevel() { depth++; }
+  public void decLevel() { if(depth >= 1) depth--; }
+  public int  getLevel() { return depth; }
+}
+

+ 193 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionVisitor.java

@@ -0,0 +1,193 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+import java.util.LinkedList;
+
+/**
+ * File size distribution visitor.
+ * 
+ * <h3>Description.</h3>
+ * This is the tool for analyzing file sizes in the namespace image.
+ * In order to run the tool one should define a range of integers
+ * <tt>[0, maxSize]</tt> by specifying <tt>maxSize</tt> and a <tt>step</tt>.
+ * The range of integers is divided into segments of size <tt>step</tt>: 
+ * <tt>[0, s<sub>1</sub>, ..., s<sub>n-1</sub>, maxSize]</tt>,
+ * and the visitor calculates how many files in the system fall into 
+ * each segment <tt>[s<sub>i-1</sub>, s<sub>i</sub>)</tt>. 
+ * Note that files larger than <tt>maxSize</tt> always fall into 
+ * the very last segment.
+ * 
+ * <h3>Input.</h3>
+ * <ul>
+ * <li><tt>filename</tt> specifies the location of the image file;</li>
+ * <li><tt>maxSize</tt> determines the range <tt>[0, maxSize]</tt> of files
+ * sizes considered by the visitor;</li>
+ * <li><tt>step</tt> the range is divided into segments of size step.</li>
+ * </ul>
+ *
+ * <h3>Output.</h3>
+ * The output file is formatted as a tab separated two column table:
+ * Size and NumFiles. Where Size represents the start of the segment,
+ * and numFiles is the number of files form the image which size falls in 
+ * this segment.
+ */
+class FileDistributionVisitor extends TextWriterImageVisitor {
+  final private LinkedList<ImageElement> elemS = new LinkedList<ImageElement>();
+
+  private final static long MAX_SIZE_DEFAULT = 0x2000000000L;   // 1/8 TB = 2^37
+  private final static int INTERVAL_DEFAULT = 0x200000;         // 2 MB = 2^21
+
+  private int[] distribution;
+  private long maxSize;
+  private int step;
+
+  private int totalFiles;
+  private int totalDirectories;
+  private int totalBlocks;
+  private long totalSpace;
+  private long maxFileSize;
+
+  private FileContext current;
+
+  private boolean inInode = false;
+
+  /**
+   * File or directory information.
+   */
+  private static class FileContext {
+    String path;
+    long fileSize;
+    int numBlocks;
+    int replication;
+  }
+
+  public FileDistributionVisitor(String filename,
+                                 long maxSize,
+                                 int step) throws IOException {
+    super(filename, false);
+    this.maxSize = (maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize);
+    this.step = (step == 0 ? INTERVAL_DEFAULT : step);
+    long numIntervals = this.maxSize / this.step;
+    if(numIntervals >= Integer.MAX_VALUE)
+      throw new IOException("Too many distribution intervals " + numIntervals);
+    this.distribution = new int[1 + (int)(numIntervals)];
+    this.totalFiles = 0;
+    this.totalDirectories = 0;
+    this.totalBlocks = 0;
+    this.totalSpace = 0;
+    this.maxFileSize = 0;
+  }
+
+  @Override
+  void start() throws IOException {}
+
+  @Override
+  void finish() throws IOException {
+    output();
+    super.finish();
+  }
+
+  @Override
+  void finishAbnormally() throws IOException {
+    System.out.println("*** Image processing finished abnormally.  Ending ***");
+    output();
+    super.finishAbnormally();
+  }
+
+  private void output() throws IOException {
+    // write the distribution into the output file
+    write("Size\tNumFiles\n");
+    for(int i = 0; i < distribution.length; i++)
+      write(((long)i * step) + "\t" + distribution[i] + "\n");
+    System.out.println("totalFiles = " + totalFiles);
+    System.out.println("totalDirectories = " + totalDirectories);
+    System.out.println("totalBlocks = " + totalBlocks);
+    System.out.println("totalSpace = " + totalSpace);
+    System.out.println("maxFileSize = " + maxFileSize);
+  }
+
+  @Override
+  void leaveEnclosingElement() throws IOException {
+    ImageElement elem = elemS.pop();
+
+    if(elem != ImageElement.INODE &&
+       elem != ImageElement.INODE_UNDER_CONSTRUCTION)
+      return;
+    inInode = false;
+    if(current.numBlocks < 0) {
+      totalDirectories ++;
+      return;
+    }
+    totalFiles++;
+    totalBlocks += current.numBlocks;
+    totalSpace += current.fileSize * current.replication;
+    if(maxFileSize < current.fileSize)
+      maxFileSize = current.fileSize;
+    int high;
+    if(current.fileSize > maxSize)
+      high = distribution.length-1;
+    else
+      high = (int)Math.ceil((double)current.fileSize / step);
+    distribution[high]++;
+    if(totalFiles % 1000000 == 1)
+      System.out.println("Files processed: " + totalFiles
+          + "  Current: " + current.path);
+  }
+
+  @Override
+  void visit(ImageElement element, String value) throws IOException {
+    if(inInode) {
+      switch(element) {
+      case INODE_PATH:
+        current.path = (value.equals("") ? "/" : value);
+        break;
+      case REPLICATION:
+        current.replication = Integer.valueOf(value);
+        break;
+      case NUM_BYTES:
+        current.fileSize += Long.valueOf(value);
+        break;
+      default:
+        break;
+      }
+    }
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element) throws IOException {
+    elemS.push(element);
+    if(element == ImageElement.INODE ||
+       element == ImageElement.INODE_UNDER_CONSTRUCTION) {
+      current = new FileContext();
+      inInode = true;
+    }
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element,
+      ImageElement key, String value) throws IOException {
+    elemS.push(element);
+    if(element == ImageElement.INODE ||
+       element == ImageElement.INODE_UNDER_CONSTRUCTION)
+      inInode = true;
+    else if(element == ImageElement.BLOCKS)
+      current.numBlocks = Integer.parseInt(value);
+  }
+}

+ 83 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoader.java

@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * An ImageLoader can accept a DataInputStream to an Hadoop FSImage file
+ * and walk over its structure using the supplied ImageVisitor.
+ *
+ * Each implementation of ImageLoader is designed to rapidly process an
+ * image file.  As long as minor changes are made from one layout version
+ * to another, it is acceptable to tweak one implementation to read the next.
+ * However, if the layout version changes enough that it would make a
+ * processor slow or difficult to read, another processor should be created.
+ * This allows each processor to quickly read an image without getting
+ * bogged down in dealing with significant differences between layout versions.
+ */
+interface ImageLoader {
+
+  /**
+   * @param in DataInputStream pointing to an Hadoop FSImage file
+   * @param v Visit to apply to the FSImage file
+   * @param enumerateBlocks Should visitor visit each of the file blocks?
+   */
+  public void loadImage(DataInputStream in, ImageVisitor v,
+      boolean enumerateBlocks) throws IOException;
+
+  /**
+   * Can this processor handle the specified version of FSImage file?
+   *
+   * @param version FSImage version file
+   * @return True if this instance can process the file
+   */
+  public boolean canLoadVersion(int version);
+
+  /**
+   * Factory for obtaining version of image loader that can read
+   * a particular image format.
+   */
+  @InterfaceAudience.Private
+  public class LoaderFactory {
+    // Java doesn't support static methods on interfaces, which necessitates
+    // this factory class
+
+    /**
+     * Find an image loader capable of interpreting the specified
+     * layout version number.  If none, return null;
+     *
+     * @param version fsimage layout version number to be processed
+     * @return ImageLoader that can interpret specified version, or null
+     */
+    static public ImageLoader getLoader(int version) {
+      // Easy to add more image processors as they are written
+      ImageLoader[] loaders = { new ImageLoaderCurrent() };
+
+      for (ImageLoader l : loaders) {
+        if (l.canLoadVersion(version))
+          return l;
+      }
+
+      return null;
+    }
+  }
+}

+ 821 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java

@@ -0,0 +1,821 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates;
+import org.apache.hadoop.hdfs.protocol.LayoutFlags;
+import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
+import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
+import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
+import org.apache.hadoop.hdfs.server.namenode.INodeId;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeLayoutVersion;
+import org.apache.hadoop.hdfs.tools.offlineImageViewer.ImageVisitor.ImageElement;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.apache.hadoop.security.token.delegation.DelegationKey;
+
+/**
+ * ImageLoaderCurrent processes Hadoop FSImage files and walks over
+ * them using a provided ImageVisitor, calling the visitor at each element
+ * enumerated below.
+ *
+ * The only difference between v18 and v19 was the utilization of the
+ * stickybit.  Therefore, the same viewer can reader either format.
+ *
+ * Versions -19 fsimage layout (with changes from -16 up):
+ * Image version (int)
+ * Namepsace ID (int)
+ * NumFiles (long)
+ * Generation stamp (long)
+ * INodes (count = NumFiles)
+ *  INode
+ *    Path (String)
+ *    Replication (short)
+ *    Modification Time (long as date)
+ *    Access Time (long) // added in -16
+ *    Block size (long)
+ *    Num blocks (int)
+ *    Blocks (count = Num blocks)
+ *      Block
+ *        Block ID (long)
+ *        Num bytes (long)
+ *        Generation stamp (long)
+ *    Namespace Quota (long)
+ *    Diskspace Quota (long) // added in -18
+ *    Permissions
+ *      Username (String)
+ *      Groupname (String)
+ *      OctalPerms (short -> String)  // Modified in -19
+ *    Symlink (String) // added in -23
+ * NumINodesUnderConstruction (int)
+ * INodesUnderConstruction (count = NumINodesUnderConstruction)
+ *  INodeUnderConstruction
+ *    Path (bytes as string)
+ *    Replication (short)
+ *    Modification time (long as date)
+ *    Preferred block size (long)
+ *    Num blocks (int)
+ *    Blocks
+ *      Block
+ *        Block ID (long)
+ *        Num bytes (long)
+ *        Generation stamp (long)
+ *    Permissions
+ *      Username (String)
+ *      Groupname (String)
+ *      OctalPerms (short -> String)
+ *    Client Name (String)
+ *    Client Machine (String)
+ *    NumLocations (int)
+ *    DatanodeDescriptors (count = numLocations) // not loaded into memory
+ *      short                                    // but still in file
+ *      long
+ *      string
+ *      long
+ *      int
+ *      string
+ *      string
+ *      enum
+ *    CurrentDelegationKeyId (int)
+ *    NumDelegationKeys (int)
+ *      DelegationKeys (count = NumDelegationKeys)
+ *        DelegationKeyLength (vint)
+ *        DelegationKey (bytes)
+ *    DelegationTokenSequenceNumber (int)
+ *    NumDelegationTokens (int)
+ *    DelegationTokens (count = NumDelegationTokens)
+ *      DelegationTokenIdentifier
+ *        owner (String)
+ *        renewer (String)
+ *        realUser (String)
+ *        issueDate (vlong)
+ *        maxDate (vlong)
+ *        sequenceNumber (vint)
+ *        masterKeyId (vint)
+ *      expiryTime (long)     
+ *
+ */
+class ImageLoaderCurrent implements ImageLoader {
+  protected final DateFormat dateFormat = 
+                                      new SimpleDateFormat("yyyy-MM-dd HH:mm");
+  private static int[] versions = { -16, -17, -18, -19, -20, -21, -22, -23,
+      -24, -25, -26, -27, -28, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39,
+      -40, -41, -42, -43, -44, -45, -46, -47, -48, -49, -50, -51 };
+  private int imageVersion = 0;
+  
+  private final Map<Long, Boolean> subtreeMap = new HashMap<Long, Boolean>();
+  private final Map<Long, String> dirNodeMap = new HashMap<Long, String>();
+
+  /* (non-Javadoc)
+   * @see ImageLoader#canProcessVersion(int)
+   */
+  @Override
+  public boolean canLoadVersion(int version) {
+    for(int v : versions)
+      if(v == version) return true;
+
+    return false;
+  }
+
+  /* (non-Javadoc)
+   * @see ImageLoader#processImage(java.io.DataInputStream, ImageVisitor, boolean)
+   */
+  @Override
+  public void loadImage(DataInputStream in, ImageVisitor v,
+      boolean skipBlocks) throws IOException {
+    boolean done = false;
+    try {
+      v.start();
+      v.visitEnclosingElement(ImageElement.FS_IMAGE);
+
+      imageVersion = in.readInt();
+      if( !canLoadVersion(imageVersion))
+        throw new IOException("Cannot process fslayout version " + imageVersion);
+      if (NameNodeLayoutVersion.supports(Feature.ADD_LAYOUT_FLAGS, imageVersion)) {
+        LayoutFlags.read(in);
+      }
+
+      v.visit(ImageElement.IMAGE_VERSION, imageVersion);
+      v.visit(ImageElement.NAMESPACE_ID, in.readInt());
+
+      long numInodes = in.readLong();
+
+      v.visit(ImageElement.GENERATION_STAMP, in.readLong());
+
+      if (NameNodeLayoutVersion.supports(Feature.SEQUENTIAL_BLOCK_ID, imageVersion)) {
+        v.visit(ImageElement.GENERATION_STAMP_V2, in.readLong());
+        v.visit(ImageElement.GENERATION_STAMP_V1_LIMIT, in.readLong());
+        v.visit(ImageElement.LAST_ALLOCATED_BLOCK_ID, in.readLong());
+      }
+
+      if (NameNodeLayoutVersion.supports(Feature.STORED_TXIDS, imageVersion)) {
+        v.visit(ImageElement.TRANSACTION_ID, in.readLong());
+      }
+      
+      if (NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, imageVersion)) {
+        v.visit(ImageElement.LAST_INODE_ID, in.readLong());
+      }
+      
+      boolean supportSnapshot = NameNodeLayoutVersion.supports(Feature.SNAPSHOT,
+          imageVersion);
+      if (supportSnapshot) {
+        v.visit(ImageElement.SNAPSHOT_COUNTER, in.readInt());
+        int numSnapshots = in.readInt();
+        v.visit(ImageElement.NUM_SNAPSHOTS_TOTAL, numSnapshots);
+        for (int i = 0; i < numSnapshots; i++) {
+          processSnapshot(in, v);
+        }
+      }
+      
+      if (NameNodeLayoutVersion.supports(Feature.FSIMAGE_COMPRESSION, imageVersion)) {
+        boolean isCompressed = in.readBoolean();
+        v.visit(ImageElement.IS_COMPRESSED, String.valueOf(isCompressed));
+        if (isCompressed) {
+          String codecClassName = Text.readString(in);
+          v.visit(ImageElement.COMPRESS_CODEC, codecClassName);
+          CompressionCodecFactory codecFac = new CompressionCodecFactory(
+              new Configuration());
+          CompressionCodec codec = codecFac.getCodecByClassName(codecClassName);
+          if (codec == null) {
+            throw new IOException("Image compression codec not supported: "
+                + codecClassName);
+          }
+          in = new DataInputStream(codec.createInputStream(in));
+        }
+      }
+      processINodes(in, v, numInodes, skipBlocks, supportSnapshot);
+      subtreeMap.clear();
+      dirNodeMap.clear();
+
+      processINodesUC(in, v, skipBlocks);
+
+      if (NameNodeLayoutVersion.supports(Feature.DELEGATION_TOKEN, imageVersion)) {
+        processDelegationTokens(in, v);
+      }
+      
+      if (NameNodeLayoutVersion.supports(Feature.CACHING, imageVersion)) {
+        processCacheManagerState(in, v);
+      }
+      v.leaveEnclosingElement(); // FSImage
+      done = true;
+    } finally {
+      if (done) {
+        v.finish();
+      } else {
+        v.finishAbnormally();
+      }
+    }
+  }
+
+  /**
+   * Process CacheManager state from the fsimage.
+   */
+  private void processCacheManagerState(DataInputStream in, ImageVisitor v)
+      throws IOException {
+    v.visit(ImageElement.CACHE_NEXT_ENTRY_ID, in.readLong());
+    final int numPools = in.readInt();
+    for (int i=0; i<numPools; i++) {
+      v.visit(ImageElement.CACHE_POOL_NAME, Text.readString(in));
+      processCachePoolPermission(in, v);
+      v.visit(ImageElement.CACHE_POOL_WEIGHT, in.readInt());
+    }
+    final int numEntries = in.readInt();
+    for (int i=0; i<numEntries; i++) {
+      v.visit(ImageElement.CACHE_ENTRY_PATH, Text.readString(in));
+      v.visit(ImageElement.CACHE_ENTRY_REPLICATION, in.readShort());
+      v.visit(ImageElement.CACHE_ENTRY_POOL_NAME, Text.readString(in));
+    }
+  }
+  /**
+   * Process the Delegation Token related section in fsimage.
+   * 
+   * @param in DataInputStream to process
+   * @param v Visitor to walk over records
+   */
+  private void processDelegationTokens(DataInputStream in, ImageVisitor v)
+      throws IOException {
+    v.visit(ImageElement.CURRENT_DELEGATION_KEY_ID, in.readInt());
+    int numDKeys = in.readInt();
+    v.visitEnclosingElement(ImageElement.DELEGATION_KEYS,
+        ImageElement.NUM_DELEGATION_KEYS, numDKeys);
+    for(int i =0; i < numDKeys; i++) {
+      DelegationKey key = new DelegationKey();
+      key.readFields(in);
+      v.visit(ImageElement.DELEGATION_KEY, key.toString());
+    }
+    v.leaveEnclosingElement();
+    v.visit(ImageElement.DELEGATION_TOKEN_SEQUENCE_NUMBER, in.readInt());
+    int numDTokens = in.readInt();
+    v.visitEnclosingElement(ImageElement.DELEGATION_TOKENS,
+        ImageElement.NUM_DELEGATION_TOKENS, numDTokens);
+    for(int i=0; i<numDTokens; i++){
+      DelegationTokenIdentifier id = new  DelegationTokenIdentifier();
+      id.readFields(in);
+      long expiryTime = in.readLong();
+      v.visitEnclosingElement(ImageElement.DELEGATION_TOKEN_IDENTIFIER);
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_KIND,
+          id.getKind().toString());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_SEQNO,
+          id.getSequenceNumber());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_OWNER,
+          id.getOwner().toString());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_RENEWER,
+          id.getRenewer().toString());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_REALUSER,
+          id.getRealUser().toString());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_ISSUE_DATE,
+          id.getIssueDate());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_MAX_DATE,
+          id.getMaxDate());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_EXPIRY_TIME,
+          expiryTime);
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_MASTER_KEY_ID,
+          id.getMasterKeyId());
+      v.leaveEnclosingElement(); // DELEGATION_TOKEN_IDENTIFIER
+    }
+    v.leaveEnclosingElement(); // DELEGATION_TOKENS
+  }
+
+  /**
+   * Process the INodes under construction section of the fsimage.
+   *
+   * @param in DataInputStream to process
+   * @param v Visitor to walk over inodes
+   * @param skipBlocks Walk over each block?
+   */
+  private void processINodesUC(DataInputStream in, ImageVisitor v,
+      boolean skipBlocks) throws IOException {
+    int numINUC = in.readInt();
+
+    v.visitEnclosingElement(ImageElement.INODES_UNDER_CONSTRUCTION,
+                           ImageElement.NUM_INODES_UNDER_CONSTRUCTION, numINUC);
+
+    for(int i = 0; i < numINUC; i++) {
+      v.visitEnclosingElement(ImageElement.INODE_UNDER_CONSTRUCTION);
+      byte [] name = FSImageSerialization.readBytes(in);
+      String n = new String(name, "UTF8");
+      v.visit(ImageElement.INODE_PATH, n);
+      
+      if (NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, imageVersion)) {
+        long inodeId = in.readLong();
+        v.visit(ImageElement.INODE_ID, inodeId);
+      }
+      
+      v.visit(ImageElement.REPLICATION, in.readShort());
+      v.visit(ImageElement.MODIFICATION_TIME, formatDate(in.readLong()));
+
+      v.visit(ImageElement.PREFERRED_BLOCK_SIZE, in.readLong());
+      int numBlocks = in.readInt();
+      processBlocks(in, v, numBlocks, skipBlocks);
+
+      processPermission(in, v);
+      v.visit(ImageElement.CLIENT_NAME, FSImageSerialization.readString(in));
+      v.visit(ImageElement.CLIENT_MACHINE, FSImageSerialization.readString(in));
+
+      // Skip over the datanode descriptors, which are still stored in the
+      // file but are not used by the datanode or loaded into memory
+      int numLocs = in.readInt();
+      for(int j = 0; j < numLocs; j++) {
+        in.readShort();
+        in.readLong();
+        in.readLong();
+        in.readLong();
+        in.readInt();
+        FSImageSerialization.readString(in);
+        FSImageSerialization.readString(in);
+        WritableUtils.readEnum(in, AdminStates.class);
+      }
+
+      v.leaveEnclosingElement(); // INodeUnderConstruction
+    }
+
+    v.leaveEnclosingElement(); // INodesUnderConstruction
+  }
+
+  /**
+   * Process the blocks section of the fsimage.
+   *
+   * @param in Datastream to process
+   * @param v Visitor to walk over inodes
+   * @param skipBlocks Walk over each block?
+   */
+  private void processBlocks(DataInputStream in, ImageVisitor v,
+      int numBlocks, boolean skipBlocks) throws IOException {
+    v.visitEnclosingElement(ImageElement.BLOCKS,
+                            ImageElement.NUM_BLOCKS, numBlocks);
+    
+    // directory or symlink or reference node, no blocks to process    
+    if(numBlocks < 0) { 
+      v.leaveEnclosingElement(); // Blocks
+      return;
+    }
+    
+    if(skipBlocks) {
+      int bytesToSkip = ((Long.SIZE * 3 /* fields */) / 8 /*bits*/) * numBlocks;
+      if(in.skipBytes(bytesToSkip) != bytesToSkip)
+        throw new IOException("Error skipping over blocks");
+      
+    } else {
+      for(int j = 0; j < numBlocks; j++) {
+        v.visitEnclosingElement(ImageElement.BLOCK);
+        v.visit(ImageElement.BLOCK_ID, in.readLong());
+        v.visit(ImageElement.NUM_BYTES, in.readLong());
+        v.visit(ImageElement.GENERATION_STAMP, in.readLong());
+        v.leaveEnclosingElement(); // Block
+      }
+    }
+    v.leaveEnclosingElement(); // Blocks
+  }
+
+  /**
+   * Extract the INode permissions stored in the fsimage file.
+   *
+   * @param in Datastream to process
+   * @param v Visitor to walk over inodes
+   */
+  private void processPermission(DataInputStream in, ImageVisitor v)
+      throws IOException {
+    v.visitEnclosingElement(ImageElement.PERMISSIONS);
+    v.visit(ImageElement.USER_NAME, Text.readString(in));
+    v.visit(ImageElement.GROUP_NAME, Text.readString(in));
+    FsPermission fsp = new FsPermission(in.readShort());
+    v.visit(ImageElement.PERMISSION_STRING, fsp.toString());
+    v.leaveEnclosingElement(); // Permissions
+  }
+
+  /**
+   * Extract CachePool permissions stored in the fsimage file.
+   *
+   * @param in Datastream to process
+   * @param v Visitor to walk over inodes
+   */
+  private void processCachePoolPermission(DataInputStream in, ImageVisitor v)
+      throws IOException {
+    v.visitEnclosingElement(ImageElement.PERMISSIONS);
+    v.visit(ImageElement.CACHE_POOL_OWNER_NAME, Text.readString(in));
+    v.visit(ImageElement.CACHE_POOL_GROUP_NAME, Text.readString(in));
+    FsPermission fsp = new FsPermission(in.readShort());
+    v.visit(ImageElement.CACHE_POOL_PERMISSION_STRING, fsp.toString());
+    v.leaveEnclosingElement(); // Permissions
+  }
+
+  /**
+   * Process the INode records stored in the fsimage.
+   *
+   * @param in Datastream to process
+   * @param v Visitor to walk over INodes
+   * @param numInodes Number of INodes stored in file
+   * @param skipBlocks Process all the blocks within the INode?
+   * @param supportSnapshot Whether or not the imageVersion supports snapshot
+   * @throws VisitException
+   * @throws IOException
+   */
+  private void processINodes(DataInputStream in, ImageVisitor v,
+      long numInodes, boolean skipBlocks, boolean supportSnapshot)
+      throws IOException {
+    v.visitEnclosingElement(ImageElement.INODES,
+        ImageElement.NUM_INODES, numInodes);
+    
+    if (NameNodeLayoutVersion.supports(Feature.FSIMAGE_NAME_OPTIMIZATION, imageVersion)) {
+      if (!supportSnapshot) {
+        processLocalNameINodes(in, v, numInodes, skipBlocks);
+      } else {
+        processLocalNameINodesWithSnapshot(in, v, skipBlocks);
+      }
+    } else { // full path name
+      processFullNameINodes(in, v, numInodes, skipBlocks);
+    }
+
+    
+    v.leaveEnclosingElement(); // INodes
+  }
+  
+  /**
+   * Process image with full path name
+   * 
+   * @param in image stream
+   * @param v visitor
+   * @param numInodes number of indoes to read
+   * @param skipBlocks skip blocks or not
+   * @throws IOException if there is any error occurs
+   */
+  private void processLocalNameINodes(DataInputStream in, ImageVisitor v,
+      long numInodes, boolean skipBlocks) throws IOException {
+    // process root
+    processINode(in, v, skipBlocks, "", false);
+    numInodes--;
+    while (numInodes > 0) {
+      numInodes -= processDirectory(in, v, skipBlocks);
+    }
+  }
+  
+  private int processDirectory(DataInputStream in, ImageVisitor v,
+     boolean skipBlocks) throws IOException {
+    String parentName = FSImageSerialization.readString(in);
+    return processChildren(in, v, skipBlocks, parentName);
+  }
+  
+  /**
+   * Process image with local path name and snapshot support
+   * 
+   * @param in image stream
+   * @param v visitor
+   * @param skipBlocks skip blocks or not
+   */
+  private void processLocalNameINodesWithSnapshot(DataInputStream in,
+      ImageVisitor v, boolean skipBlocks) throws IOException {
+    // process root
+    processINode(in, v, skipBlocks, "", false);
+    processDirectoryWithSnapshot(in, v, skipBlocks);
+  }
+  
+  /**
+   * Process directories when snapshot is supported.
+   */
+  private void processDirectoryWithSnapshot(DataInputStream in, ImageVisitor v,
+      boolean skipBlocks) throws IOException {
+    // 1. load dir node id
+    long inodeId = in.readLong();
+    
+    String dirName = dirNodeMap.remove(inodeId);
+    Boolean visitedRef = subtreeMap.get(inodeId);
+    if (visitedRef != null) {
+      if (visitedRef.booleanValue()) { // the subtree has been visited
+        return;
+      } else { // first time to visit
+        subtreeMap.put(inodeId, true);
+      }
+    } // else the dir is not linked by a RefNode, thus cannot be revisited
+    
+    // 2. load possible snapshots
+    processSnapshots(in, v, dirName);
+    // 3. load children nodes
+    processChildren(in, v, skipBlocks, dirName);
+    // 4. load possible directory diff list
+    processDirectoryDiffList(in, v, dirName);
+    // recursively process sub-directories
+    final int numSubTree = in.readInt();
+    for (int i = 0; i < numSubTree; i++) {
+      processDirectoryWithSnapshot(in, v, skipBlocks);
+    }
+  }
+  
+  /**
+   * Process snapshots of a snapshottable directory
+   */
+  private void processSnapshots(DataInputStream in, ImageVisitor v,
+      String rootName) throws IOException {
+    final int numSnapshots = in.readInt();
+    if (numSnapshots >= 0) {
+      v.visitEnclosingElement(ImageElement.SNAPSHOTS,
+          ImageElement.NUM_SNAPSHOTS, numSnapshots);
+      for (int i = 0; i < numSnapshots; i++) {
+        // process snapshot
+        v.visitEnclosingElement(ImageElement.SNAPSHOT);
+        v.visit(ImageElement.SNAPSHOT_ID, in.readInt());
+        v.leaveEnclosingElement();
+      }
+      v.visit(ImageElement.SNAPSHOT_QUOTA, in.readInt());
+      v.leaveEnclosingElement();
+    }
+  }
+  
+  private void processSnapshot(DataInputStream in, ImageVisitor v)
+      throws IOException {
+    v.visitEnclosingElement(ImageElement.SNAPSHOT);
+    v.visit(ImageElement.SNAPSHOT_ID, in.readInt());
+    // process root of snapshot
+    v.visitEnclosingElement(ImageElement.SNAPSHOT_ROOT);
+    processINode(in, v, true, "", false);
+    v.leaveEnclosingElement();
+    v.leaveEnclosingElement();
+  }
+  
+  private void processDirectoryDiffList(DataInputStream in, ImageVisitor v,
+      String currentINodeName) throws IOException {
+    final int numDirDiff = in.readInt();
+    if (numDirDiff >= 0) {
+      v.visitEnclosingElement(ImageElement.SNAPSHOT_DIR_DIFFS,
+          ImageElement.NUM_SNAPSHOT_DIR_DIFF, numDirDiff);
+      for (int i = 0; i < numDirDiff; i++) {
+        // process directory diffs in reverse chronological oder
+        processDirectoryDiff(in, v, currentINodeName); 
+      }
+      v.leaveEnclosingElement();
+    }
+  }
+  
+  private void processDirectoryDiff(DataInputStream in, ImageVisitor v,
+      String currentINodeName) throws IOException {
+    v.visitEnclosingElement(ImageElement.SNAPSHOT_DIR_DIFF);
+    int snapshotId = in.readInt();
+    v.visit(ImageElement.SNAPSHOT_DIFF_SNAPSHOTID, snapshotId);
+    v.visit(ImageElement.SNAPSHOT_DIR_DIFF_CHILDREN_SIZE, in.readInt());
+    
+    // process snapshotINode
+    boolean useRoot = in.readBoolean();
+    if (!useRoot) {
+      if (in.readBoolean()) {
+        v.visitEnclosingElement(ImageElement.SNAPSHOT_INODE_DIRECTORY_ATTRIBUTES);
+        if (NameNodeLayoutVersion.supports(Feature.OPTIMIZE_SNAPSHOT_INODES, imageVersion)) {
+          processINodeDirectoryAttributes(in, v, currentINodeName);
+        } else {
+          processINode(in, v, true, currentINodeName, true);
+        }
+        v.leaveEnclosingElement();
+      }
+    }
+    
+    // process createdList
+    int createdSize = in.readInt();
+    v.visitEnclosingElement(ImageElement.SNAPSHOT_DIR_DIFF_CREATEDLIST,
+        ImageElement.SNAPSHOT_DIR_DIFF_CREATEDLIST_SIZE, createdSize);
+    for (int i = 0; i < createdSize; i++) {
+      String createdNode = FSImageSerialization.readString(in);
+      v.visit(ImageElement.SNAPSHOT_DIR_DIFF_CREATED_INODE, createdNode);
+    }
+    v.leaveEnclosingElement();
+    
+    // process deletedList
+    int deletedSize = in.readInt();
+    v.visitEnclosingElement(ImageElement.SNAPSHOT_DIR_DIFF_DELETEDLIST,
+        ImageElement.SNAPSHOT_DIR_DIFF_DELETEDLIST_SIZE, deletedSize);
+    for (int i = 0; i < deletedSize; i++) {
+      v.visitEnclosingElement(ImageElement.SNAPSHOT_DIR_DIFF_DELETED_INODE);
+      processINode(in, v, false, currentINodeName, true);
+      v.leaveEnclosingElement();
+    }
+    v.leaveEnclosingElement();
+    v.leaveEnclosingElement();
+  }
+
+  private void processINodeDirectoryAttributes(DataInputStream in, ImageVisitor v,
+      String parentName) throws IOException {
+    final String pathName = readINodePath(in, parentName);
+    v.visit(ImageElement.INODE_PATH, pathName);
+    processPermission(in, v);
+    v.visit(ImageElement.MODIFICATION_TIME, formatDate(in.readLong()));
+
+    v.visit(ImageElement.NS_QUOTA, in.readLong());
+    v.visit(ImageElement.DS_QUOTA, in.readLong());
+  }
+
+  /** Process children under a directory */
+  private int processChildren(DataInputStream in, ImageVisitor v,
+      boolean skipBlocks, String parentName) throws IOException {
+    int numChildren = in.readInt();
+    for (int i = 0; i < numChildren; i++) {
+      processINode(in, v, skipBlocks, parentName, false);
+    }
+    return numChildren;
+  }
+  
+  /**
+   * Process image with full path name
+   * 
+   * @param in image stream
+   * @param v visitor
+   * @param numInodes number of indoes to read
+   * @param skipBlocks skip blocks or not
+   * @throws IOException if there is any error occurs
+   */
+  private void processFullNameINodes(DataInputStream in, ImageVisitor v,
+      long numInodes, boolean skipBlocks) throws IOException {
+    for(long i = 0; i < numInodes; i++) {
+      processINode(in, v, skipBlocks, null, false);
+    }
+  }
+ 
+  private String readINodePath(DataInputStream in, String parentName)
+      throws IOException {
+    String pathName = FSImageSerialization.readString(in);
+    if (parentName != null) {  // local name
+      pathName = "/" + pathName;
+      if (!"/".equals(parentName)) { // children of non-root directory
+        pathName = parentName + pathName;
+      }
+    }
+    return pathName;
+  }
+
+  /**
+   * Process an INode
+   * 
+   * @param in image stream
+   * @param v visitor
+   * @param skipBlocks skip blocks or not
+   * @param parentName the name of its parent node
+   * @param isSnapshotCopy whether or not the inode is a snapshot copy
+   * @throws IOException
+   */
+  private void processINode(DataInputStream in, ImageVisitor v,
+      boolean skipBlocks, String parentName, boolean isSnapshotCopy)
+      throws IOException {
+    boolean supportSnapshot = 
+        NameNodeLayoutVersion.supports(Feature.SNAPSHOT, imageVersion);
+    boolean supportInodeId = 
+        NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, imageVersion);
+    
+    v.visitEnclosingElement(ImageElement.INODE);
+    final String pathName = readINodePath(in, parentName);
+    v.visit(ImageElement.INODE_PATH, pathName);
+
+    long inodeId = INodeId.GRANDFATHER_INODE_ID;
+    if (supportInodeId) {
+      inodeId = in.readLong();
+      v.visit(ImageElement.INODE_ID, inodeId);
+    }
+    v.visit(ImageElement.REPLICATION, in.readShort());
+    v.visit(ImageElement.MODIFICATION_TIME, formatDate(in.readLong()));
+    if(NameNodeLayoutVersion.supports(Feature.FILE_ACCESS_TIME, imageVersion))
+      v.visit(ImageElement.ACCESS_TIME, formatDate(in.readLong()));
+    v.visit(ImageElement.BLOCK_SIZE, in.readLong());
+    int numBlocks = in.readInt();
+
+    processBlocks(in, v, numBlocks, skipBlocks);
+    
+    if (numBlocks >= 0) { // File
+      if (supportSnapshot) {
+        // make sure subtreeMap only contains entry for directory
+        subtreeMap.remove(inodeId);
+        // process file diffs
+        processFileDiffList(in, v, parentName);
+        if (isSnapshotCopy) {
+          boolean underConstruction = in.readBoolean();
+          if (underConstruction) {
+            v.visit(ImageElement.CLIENT_NAME,
+                FSImageSerialization.readString(in));
+            v.visit(ImageElement.CLIENT_MACHINE,
+                FSImageSerialization.readString(in));
+          }
+        }
+      }
+      processPermission(in, v);
+    } else if (numBlocks == -1) { // Directory
+      if (supportSnapshot && supportInodeId) {
+        dirNodeMap.put(inodeId, pathName);
+      }
+      v.visit(ImageElement.NS_QUOTA, numBlocks == -1 ? in.readLong() : -1);
+      if (NameNodeLayoutVersion.supports(Feature.DISKSPACE_QUOTA, imageVersion))
+        v.visit(ImageElement.DS_QUOTA, numBlocks == -1 ? in.readLong() : -1);
+      if (supportSnapshot) {
+        boolean snapshottable = in.readBoolean();
+        if (!snapshottable) {
+          boolean withSnapshot = in.readBoolean();
+          v.visit(ImageElement.IS_WITHSNAPSHOT_DIR, Boolean.toString(withSnapshot));
+        } else {
+          v.visit(ImageElement.IS_SNAPSHOTTABLE_DIR, Boolean.toString(snapshottable));
+        }
+      }
+      processPermission(in, v);
+    } else if (numBlocks == -2) {
+      v.visit(ImageElement.SYMLINK, Text.readString(in));
+      processPermission(in, v);
+    } else if (numBlocks == -3) { // reference node
+      final boolean isWithName = in.readBoolean();
+      int snapshotId = in.readInt();
+      if (isWithName) {
+        v.visit(ImageElement.SNAPSHOT_LAST_SNAPSHOT_ID, snapshotId);
+      } else {
+        v.visit(ImageElement.SNAPSHOT_DST_SNAPSHOT_ID, snapshotId);
+      }
+      
+      final boolean firstReferred = in.readBoolean();
+      if (firstReferred) {
+        // if a subtree is linked by multiple "parents", the corresponding dir
+        // must be referred by a reference node. we put the reference node into
+        // the subtreeMap here and let its value be false. when we later visit
+        // the subtree for the first time, we change the value to true.
+        subtreeMap.put(inodeId, false);
+        v.visitEnclosingElement(ImageElement.SNAPSHOT_REF_INODE);
+        processINode(in, v, skipBlocks, parentName, isSnapshotCopy);
+        v.leaveEnclosingElement();  // referred inode    
+      } else {
+        v.visit(ImageElement.SNAPSHOT_REF_INODE_ID, in.readLong());
+      }
+    }
+
+    v.leaveEnclosingElement(); // INode
+  }
+
+  private void processINodeFileAttributes(DataInputStream in, ImageVisitor v,
+      String parentName) throws IOException {
+    final String pathName = readINodePath(in, parentName);
+    v.visit(ImageElement.INODE_PATH, pathName);
+    processPermission(in, v);
+    v.visit(ImageElement.MODIFICATION_TIME, formatDate(in.readLong()));
+    if(NameNodeLayoutVersion.supports(Feature.FILE_ACCESS_TIME, imageVersion)) {
+      v.visit(ImageElement.ACCESS_TIME, formatDate(in.readLong()));
+    }
+
+    v.visit(ImageElement.REPLICATION, in.readShort());
+    v.visit(ImageElement.BLOCK_SIZE, in.readLong());
+  }
+  
+  private void processFileDiffList(DataInputStream in, ImageVisitor v,
+      String currentINodeName) throws IOException {
+    final int size = in.readInt();
+    if (size >= 0) {
+      v.visitEnclosingElement(ImageElement.SNAPSHOT_FILE_DIFFS,
+          ImageElement.NUM_SNAPSHOT_FILE_DIFF, size);
+      for (int i = 0; i < size; i++) {
+        processFileDiff(in, v, currentINodeName);
+      }
+      v.leaveEnclosingElement();
+    }
+  }
+  
+  private void processFileDiff(DataInputStream in, ImageVisitor v,
+      String currentINodeName) throws IOException {
+    int snapshotId = in.readInt();
+    v.visitEnclosingElement(ImageElement.SNAPSHOT_FILE_DIFF,
+        ImageElement.SNAPSHOT_DIFF_SNAPSHOTID, snapshotId);
+    v.visit(ImageElement.SNAPSHOT_FILE_SIZE, in.readLong());
+    if (in.readBoolean()) {
+      v.visitEnclosingElement(ImageElement.SNAPSHOT_INODE_FILE_ATTRIBUTES);
+      if (NameNodeLayoutVersion.supports(Feature.OPTIMIZE_SNAPSHOT_INODES, imageVersion)) {
+        processINodeFileAttributes(in, v, currentINodeName);
+      } else {
+        processINode(in, v, true, currentINodeName, true);
+      }
+      v.leaveEnclosingElement();
+    }
+    v.leaveEnclosingElement();
+  }
+  
+  /**
+   * Helper method to format dates during processing.
+   * @param date Date as read from image file
+   * @return String version of date format
+   */
+  private String formatDate(long date) {
+    return dateFormat.format(new Date(date));
+  }
+}

+ 212 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageVisitor.java

@@ -0,0 +1,212 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+
+/**
+ * An implementation of ImageVisitor can traverse the structure of an
+ * Hadoop fsimage and respond to each of the structures within the file.
+ */
+abstract class ImageVisitor {
+
+  /**
+   * Structural elements of an FSImage that may be encountered within the
+   * file. ImageVisitors are able to handle processing any of these elements.
+   */
+  public enum ImageElement {
+    FS_IMAGE,
+    IMAGE_VERSION,
+    NAMESPACE_ID,
+    IS_COMPRESSED,
+    COMPRESS_CODEC,
+    LAYOUT_VERSION,
+    NUM_INODES,
+    GENERATION_STAMP,
+    GENERATION_STAMP_V2,
+    GENERATION_STAMP_V1_LIMIT,
+    LAST_ALLOCATED_BLOCK_ID,
+    INODES,
+    INODE,
+    INODE_PATH,
+    REPLICATION,
+    MODIFICATION_TIME,
+    ACCESS_TIME,
+    BLOCK_SIZE,
+    NUM_BLOCKS,
+    BLOCKS,
+    BLOCK,
+    BLOCK_ID,
+    NUM_BYTES,
+    NS_QUOTA,
+    DS_QUOTA,
+    PERMISSIONS,
+    SYMLINK,
+    NUM_INODES_UNDER_CONSTRUCTION,
+    INODES_UNDER_CONSTRUCTION,
+    INODE_UNDER_CONSTRUCTION,
+    PREFERRED_BLOCK_SIZE,
+    CLIENT_NAME,
+    CLIENT_MACHINE,
+    USER_NAME,
+    GROUP_NAME,
+    PERMISSION_STRING,
+    CURRENT_DELEGATION_KEY_ID,
+    NUM_DELEGATION_KEYS,
+    DELEGATION_KEYS,
+    DELEGATION_KEY,
+    DELEGATION_TOKEN_SEQUENCE_NUMBER,
+    NUM_DELEGATION_TOKENS,
+    DELEGATION_TOKENS,
+    DELEGATION_TOKEN_IDENTIFIER,
+    DELEGATION_TOKEN_IDENTIFIER_KIND,
+    DELEGATION_TOKEN_IDENTIFIER_SEQNO,
+    DELEGATION_TOKEN_IDENTIFIER_OWNER,
+    DELEGATION_TOKEN_IDENTIFIER_RENEWER,
+    DELEGATION_TOKEN_IDENTIFIER_REALUSER,
+    DELEGATION_TOKEN_IDENTIFIER_ISSUE_DATE,
+    DELEGATION_TOKEN_IDENTIFIER_MAX_DATE,
+    DELEGATION_TOKEN_IDENTIFIER_EXPIRY_TIME,
+    DELEGATION_TOKEN_IDENTIFIER_MASTER_KEY_ID,
+    TRANSACTION_ID,
+    LAST_INODE_ID,
+    INODE_ID,
+
+    SNAPSHOT_COUNTER,
+    NUM_SNAPSHOTS_TOTAL,
+    NUM_SNAPSHOTS,
+    SNAPSHOTS,
+    SNAPSHOT,
+    SNAPSHOT_ID,
+    SNAPSHOT_ROOT,
+    SNAPSHOT_QUOTA,
+    NUM_SNAPSHOT_DIR_DIFF,
+    SNAPSHOT_DIR_DIFFS,
+    SNAPSHOT_DIR_DIFF,
+    SNAPSHOT_DIFF_SNAPSHOTID,
+    SNAPSHOT_DIR_DIFF_CHILDREN_SIZE,
+    SNAPSHOT_INODE_FILE_ATTRIBUTES,
+    SNAPSHOT_INODE_DIRECTORY_ATTRIBUTES,
+    SNAPSHOT_DIR_DIFF_CREATEDLIST,
+    SNAPSHOT_DIR_DIFF_CREATEDLIST_SIZE,
+    SNAPSHOT_DIR_DIFF_CREATED_INODE,
+    SNAPSHOT_DIR_DIFF_DELETEDLIST,
+    SNAPSHOT_DIR_DIFF_DELETEDLIST_SIZE,
+    SNAPSHOT_DIR_DIFF_DELETED_INODE,
+    IS_SNAPSHOTTABLE_DIR,
+    IS_WITHSNAPSHOT_DIR,
+    SNAPSHOT_FILE_DIFFS,
+    SNAPSHOT_FILE_DIFF,
+    NUM_SNAPSHOT_FILE_DIFF,
+    SNAPSHOT_FILE_SIZE,
+    SNAPSHOT_DST_SNAPSHOT_ID,
+    SNAPSHOT_LAST_SNAPSHOT_ID,
+    SNAPSHOT_REF_INODE_ID,
+    SNAPSHOT_REF_INODE,
+
+    CACHE_NEXT_ENTRY_ID,
+    CACHE_NUM_POOLS,
+    CACHE_POOL_NAME,
+    CACHE_POOL_OWNER_NAME,
+    CACHE_POOL_GROUP_NAME,
+    CACHE_POOL_PERMISSION_STRING,
+    CACHE_POOL_WEIGHT,
+    CACHE_NUM_ENTRIES,
+    CACHE_ENTRY_PATH,
+    CACHE_ENTRY_REPLICATION,
+    CACHE_ENTRY_POOL_NAME
+  }
+  
+  /**
+   * Begin visiting the fsimage structure.  Opportunity to perform
+   * any initialization necessary for the implementing visitor.
+   */
+  abstract void start() throws IOException;
+
+  /**
+   * Finish visiting the fsimage structure.  Opportunity to perform any
+   * clean up necessary for the implementing visitor.
+   */
+  abstract void finish() throws IOException;
+
+  /**
+   * Finish visiting the fsimage structure after an error has occurred
+   * during the processing.  Opportunity to perform any clean up necessary
+   * for the implementing visitor.
+   */
+  abstract void finishAbnormally() throws IOException;
+
+  /**
+   * Visit non enclosing element of fsimage with specified value.
+   *
+   * @param element FSImage element
+   * @param value Element's value
+   */
+  abstract void visit(ImageElement element, String value) throws IOException;
+
+  // Convenience methods to automatically convert numeric value types to strings
+  void visit(ImageElement element, int value) throws IOException {
+    visit(element, Integer.toString(value));
+  }
+
+  void visit(ImageElement element, long value) throws IOException {
+    visit(element, Long.toString(value));
+  }
+
+  /**
+   * Begin visiting an element that encloses another element, such as
+   * the beginning of the list of blocks that comprise a file.
+   *
+   * @param element Element being visited
+   */
+  abstract void visitEnclosingElement(ImageElement element)
+     throws IOException;
+
+  /**
+   * Begin visiting an element that encloses another element, such as
+   * the beginning of the list of blocks that comprise a file.
+   *
+   * Also provide an additional key and value for the element, such as the
+   * number items within the element.
+   *
+   * @param element Element being visited
+   * @param key Key describing the element being visited
+   * @param value Value associated with element being visited
+   */
+  abstract void visitEnclosingElement(ImageElement element,
+      ImageElement key, String value) throws IOException;
+
+  // Convenience methods to automatically convert value types to strings
+  void visitEnclosingElement(ImageElement element,
+      ImageElement key, int value)
+     throws IOException {
+    visitEnclosingElement(element, key, Integer.toString(value));
+  }
+
+  void visitEnclosingElement(ImageElement element,
+      ImageElement key, long value)
+     throws IOException {
+    visitEnclosingElement(element, key, Long.toString(value));
+  }
+
+  /**
+   * Leave current enclosing element.  Called, for instance, at the end of
+   * processing the blocks that compromise a file.
+   */
+  abstract void leaveEnclosingElement() throws IOException;
+}

+ 111 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/IndentedImageVisitor.java

@@ -0,0 +1,111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+import java.util.Date;
+
+/**
+ * IndentedImageVisitor walks over an FSImage and displays its structure 
+ * using indenting to organize sections within the image file.
+ */
+class IndentedImageVisitor extends TextWriterImageVisitor {
+  
+  public IndentedImageVisitor(String filename) throws IOException {
+    super(filename);
+  }
+
+  public IndentedImageVisitor(String filename, boolean printToScreen) throws IOException {
+    super(filename, printToScreen);
+  }
+
+  final private DepthCounter dc = new DepthCounter();// to track leading spacing
+
+  @Override
+  void start() throws IOException {}
+
+  @Override
+  void finish() throws IOException { super.finish(); }
+
+  @Override
+  void finishAbnormally() throws IOException {
+    System.out.println("*** Image processing finished abnormally.  Ending ***");
+    super.finishAbnormally();
+  }
+
+  @Override
+  void leaveEnclosingElement() throws IOException {
+    dc.decLevel();
+  }
+
+  @Override
+  void visit(ImageElement element, String value) throws IOException {
+    printIndents();
+    write(element + " = " + value + "\n");
+  }
+
+  @Override
+  void visit(ImageElement element, long value) throws IOException {
+    if ((element == ImageElement.DELEGATION_TOKEN_IDENTIFIER_EXPIRY_TIME) || 
+        (element == ImageElement.DELEGATION_TOKEN_IDENTIFIER_ISSUE_DATE) || 
+        (element == ImageElement.DELEGATION_TOKEN_IDENTIFIER_MAX_DATE)) {
+      visit(element, new Date(value).toString());
+    } else {
+      visit(element, Long.toString(value));
+    }
+  }
+  
+  @Override
+  void visitEnclosingElement(ImageElement element) throws IOException {
+    printIndents();
+    write(element + "\n");
+    dc.incLevel();
+  }
+
+  // Print element, along with associated key/value pair, in brackets
+  @Override
+  void visitEnclosingElement(ImageElement element,
+      ImageElement key, String value)
+      throws IOException {
+    printIndents();
+    write(element + " [" + key + " = " + value + "]\n");
+    dc.incLevel();
+  }
+
+  /**
+  * Print an appropriate number of spaces for the current level.
+  * FsImages can potentially be millions of lines long, so caching can
+  * significantly speed up output.
+  */
+  final private static String [] indents = { "",
+                                             "  ",
+                                             "    ",
+                                             "      ",
+                                             "        ",
+                                             "          ",
+                                             "            "};
+  private void printIndents() throws IOException {
+    try {
+      write(indents[dc.getLevel()]);
+    } catch (IndexOutOfBoundsException e) {
+      // There's no reason in an fsimage would need a deeper indent
+      for(int i = 0; i < dc.getLevel(); i++)
+        write(" ");
+    }
+   }
+}

+ 178 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/LsImageVisitor.java

@@ -0,0 +1,178 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+import java.util.Formatter;
+import java.util.LinkedList;
+
+/**
+ * LsImageVisitor displays the blocks of the namespace in a format very similar
+ * to the output of ls/lsr.  Entries are marked as directories or not,
+ * permissions listed, replication, username and groupname, along with size,
+ * modification date and full path.
+ *
+ * Note: A significant difference between the output of the lsr command
+ * and this image visitor is that this class cannot sort the file entries;
+ * they are listed in the order they are stored within the fsimage file. 
+ * Therefore, the output of this class cannot be directly compared to the
+ * output of the lsr command.
+ */
+class LsImageVisitor extends TextWriterImageVisitor {
+  final private LinkedList<ImageElement> elemQ = new LinkedList<ImageElement>();
+
+  private int numBlocks;
+  private String perms;
+  private int replication;
+  private String username;
+  private String group;
+  private long filesize;
+  private String modTime;
+  private String path;
+  private String linkTarget;
+
+  private boolean inInode = false;
+  final private StringBuilder sb = new StringBuilder();
+  final private Formatter formatter = new Formatter(sb);
+
+  public LsImageVisitor(String filename) throws IOException {
+    super(filename);
+  }
+
+  public LsImageVisitor(String filename, boolean printToScreen) throws IOException {
+    super(filename, printToScreen);
+  }
+
+  /**
+   * Start a new line of output, reset values.
+   */
+  private void newLine() {
+    numBlocks = 0;
+    perms = username = group = path = linkTarget = "";
+    filesize = 0l;
+    replication = 0;
+
+    inInode = true;
+  }
+
+  /**
+   * All the values have been gathered.  Print them to the console in an
+   * ls-style format.
+   */
+  private final static int widthRepl = 2;  
+  private final static int widthUser = 8; 
+  private final static int widthGroup = 10; 
+  private final static int widthSize = 10;
+  private final static int widthMod = 10;
+  private final static String lsStr = " %" + widthRepl + "s %" + widthUser + 
+                                       "s %" + widthGroup + "s %" + widthSize +
+                                       "d %" + widthMod + "s %s";
+  private void printLine() throws IOException {
+    sb.append(numBlocks < 0 ? "d" : "-");
+    sb.append(perms);
+
+    if (0 != linkTarget.length()) {
+      path = path + " -> " + linkTarget; 
+    }
+    formatter.format(lsStr, replication > 0 ? replication : "-",
+                           username, group, filesize, modTime, path);
+    sb.append("\n");
+
+    write(sb.toString());
+    sb.setLength(0); // clear string builder
+
+    inInode = false;
+  }
+
+  @Override
+  void start() throws IOException {}
+
+  @Override
+  void finish() throws IOException {
+    super.finish();
+  }
+
+  @Override
+  void finishAbnormally() throws IOException {
+    System.out.println("Input ended unexpectedly.");
+    super.finishAbnormally();
+  }
+
+  @Override
+  void leaveEnclosingElement() throws IOException {
+    ImageElement elem = elemQ.pop();
+
+    if(elem == ImageElement.INODE)
+      printLine();
+  }
+
+  // Maintain state of location within the image tree and record
+  // values needed to display the inode in ls-style format.
+  @Override
+  void visit(ImageElement element, String value) throws IOException {
+    if(inInode) {
+      switch(element) {
+      case INODE_PATH:
+        if(value.equals("")) path = "/";
+        else path = value;
+        break;
+      case PERMISSION_STRING:
+        perms = value;
+        break;
+      case REPLICATION:
+        replication = Integer.valueOf(value);
+        break;
+      case USER_NAME:
+        username = value;
+        break;
+      case GROUP_NAME:
+        group = value;
+        break;
+      case NUM_BYTES:
+        filesize += Long.valueOf(value);
+        break;
+      case MODIFICATION_TIME:
+        modTime = value;
+        break;
+      case SYMLINK:
+        linkTarget = value;
+        break;
+      default:
+        // This is OK.  We're not looking for all the values.
+        break;
+      }
+    }
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element) throws IOException {
+    elemQ.push(element);
+    if(element == ImageElement.INODE)
+      newLine();
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element,
+      ImageElement key, String value) throws IOException {
+    elemQ.push(element);
+    if(element == ImageElement.INODE)
+      newLine();
+    else if (element == ImageElement.BLOCKS)
+      numBlocks = Integer.valueOf(value);
+  }
+}

+ 118 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/NameDistributionVisitor.java

@@ -0,0 +1,118 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map.Entry;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * File name distribution visitor. 
+ * <p>
+ * It analyzes file names in fsimage and prints the following information: 
+ * <li>Number of unique file names</li> 
+ * <li>Number file names and the corresponding number range of files that use 
+ * these same names</li>
+ * <li>Heap saved if the file name objects are reused</li>
+ */
+@InterfaceAudience.Private
+public class NameDistributionVisitor extends TextWriterImageVisitor {
+  HashMap<String, Integer> counts = new HashMap<String, Integer>();
+
+  public NameDistributionVisitor(String filename, boolean printToScreen)
+      throws IOException {
+    super(filename, printToScreen);
+  }
+
+  @Override
+  void finish() throws IOException {
+    final int BYTEARRAY_OVERHEAD = 24;
+
+    write("Total unique file names " + counts.size());
+    // Columns: Frequency of file occurrence, savings in heap, total files using
+    // the name and number of file names
+    final long stats[][] = { { 100000, 0, 0, 0 },
+                             { 10000, 0, 0, 0 },
+                             { 1000, 0, 0, 0 },
+                             { 100, 0, 0, 0 },
+                             { 10, 0, 0, 0 },
+                             { 5, 0, 0, 0 },
+                             { 4, 0, 0, 0 },
+                             { 3, 0, 0, 0 },
+                             { 2, 0, 0, 0 }};
+
+    int highbound = Integer.MIN_VALUE;
+    for (Entry<String, Integer> entry : counts.entrySet()) {
+      highbound = Math.max(highbound, entry.getValue());
+      for (int i = 0; i < stats.length; i++) {
+        if (entry.getValue() >= stats[i][0]) {
+          stats[i][1] += (BYTEARRAY_OVERHEAD + entry.getKey().length())
+              * (entry.getValue() - 1);
+          stats[i][2] += entry.getValue();
+          stats[i][3]++;
+          break;
+        }
+      }
+    }
+
+    long lowbound = 0;
+    long totalsavings = 0;
+    for (long[] stat : stats) {
+      lowbound = stat[0];
+      totalsavings += stat[1];
+      String range = lowbound == highbound ? " " + lowbound :
+          " between " + lowbound + "-" + highbound;
+      write("\n" + stat[3] + " names are used by " + stat[2] + " files"
+          + range + " times. Heap savings ~" + stat[1] + " bytes.");
+      highbound = (int) stat[0] - 1;
+    }
+    write("\n\nTotal saved heap ~" + totalsavings + "bytes.\n");
+    super.finish();
+  }
+
+  @Override
+  void visit(ImageElement element, String value) throws IOException {
+    if (element == ImageElement.INODE_PATH) {
+      String filename = value.substring(value.lastIndexOf("/") + 1);
+      if (counts.containsKey(filename)) {
+        counts.put(filename, counts.get(filename) + 1);
+      } else {
+        counts.put(filename, 1);
+      }
+    }
+  }
+
+  @Override
+  void leaveEnclosingElement() throws IOException {
+  }
+
+  @Override
+  void start() throws IOException {
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element) throws IOException {
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element, ImageElement key,
+      String value) throws IOException {
+  }
+}

+ 274 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewer.java

@@ -0,0 +1,274 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.BufferedInputStream;
+import java.io.DataInputStream;
+import java.io.EOFException;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.PositionTrackingInputStream;
+
+/**
+ * OfflineImageViewer to dump the contents of an Hadoop image file to XML
+ * or the console.  Main entry point into utility, either via the
+ * command line or programatically.
+ */
+@InterfaceAudience.Private
+public class OfflineImageViewer {
+  public static final Log LOG = LogFactory.getLog(OfflineImageViewer.class);
+  
+  private final static String usage = 
+    "Usage: bin/hdfs oiv [OPTIONS] -i INPUTFILE -o OUTPUTFILE\n" +
+    "Offline Image Viewer\n" + 
+    "View a Hadoop fsimage INPUTFILE using the specified PROCESSOR,\n" +
+    "saving the results in OUTPUTFILE.\n" +
+    "\n" +
+    "The oiv utility will attempt to parse correctly formed image files\n" +
+    "and will abort fail with mal-formed image files.\n" +
+    "\n" +
+    "The tool works offline and does not require a running cluster in\n" +
+    "order to process an image file.\n" +
+    "\n" +
+    "The following image processors are available:\n" +
+    "  * Ls: The default image processor generates an lsr-style listing\n" +
+    "    of the files in the namespace, with the same fields in the same\n" +
+    "    order.  Note that in order to correctly determine file sizes,\n" +
+    "    this formatter cannot skip blocks and will override the\n" +
+    "    -skipBlocks option.\n" +
+    "  * Indented: This processor enumerates over all of the elements in\n" +
+    "    the fsimage file, using levels of indentation to delineate\n" +
+    "    sections within the file.\n" +
+    "  * Delimited: Generate a text file with all of the elements common\n" +
+    "    to both inodes and inodes-under-construction, separated by a\n" +
+    "    delimiter. The default delimiter is \u0001, though this may be\n" +
+    "    changed via the -delimiter argument. This processor also overrides\n" +
+    "    the -skipBlocks option for the same reason as the Ls processor\n" +
+    "  * XML: This processor creates an XML document with all elements of\n" +
+    "    the fsimage enumerated, suitable for further analysis by XML\n" +
+    "    tools.\n" +
+    "  * FileDistribution: This processor analyzes the file size\n" +
+    "    distribution in the image.\n" +
+    "    -maxSize specifies the range [0, maxSize] of file sizes to be\n" +
+    "     analyzed (128GB by default).\n" +
+    "    -step defines the granularity of the distribution. (2MB by default)\n" +
+    "  * NameDistribution: This processor analyzes the file names\n" +
+    "    in the image and prints total number of file names and how frequently\n" +
+    "    file names are reused.\n" +
+    "\n" + 
+    "Required command line arguments:\n" +
+    "-i,--inputFile <arg>   FSImage file to process.\n" +
+    "-o,--outputFile <arg>  Name of output file. If the specified\n" +
+    "                       file exists, it will be overwritten.\n" +
+    "\n" + 
+    "Optional command line arguments:\n" +
+    "-p,--processor <arg>   Select which type of processor to apply\n" +
+    "                       against image file." +
+    " (Ls|XML|Delimited|Indented|FileDistribution).\n" +
+    "-h,--help              Display usage information and exit\n" +
+    "-printToScreen         For processors that write to a file, also\n" +
+    "                       output to screen. On large image files this\n" +
+    "                       will dramatically increase processing time.\n" +
+    "-skipBlocks            Skip inodes' blocks information. May\n" +
+    "                       significantly decrease output.\n" +
+    "                       (default = false).\n" +
+    "-delimiter <arg>       Delimiting string to use with Delimited processor\n";
+
+  private final boolean skipBlocks;
+  private final String inputFile;
+  private final ImageVisitor processor;
+  
+  public OfflineImageViewer(String inputFile, ImageVisitor processor, 
+             boolean skipBlocks) {
+    this.inputFile = inputFile;
+    this.processor = processor;
+    this.skipBlocks = skipBlocks;
+  }
+
+  /**
+   * Process image file.
+   */
+  public void go() throws IOException  {
+    DataInputStream in = null;
+    PositionTrackingInputStream tracker = null;
+    ImageLoader fsip = null;
+    boolean done = false;
+    try {
+      tracker = new PositionTrackingInputStream(new BufferedInputStream(
+               new FileInputStream(new File(inputFile))));
+      in = new DataInputStream(tracker);
+
+      int imageVersionFile = findImageVersion(in);
+
+      fsip = ImageLoader.LoaderFactory.getLoader(imageVersionFile);
+
+      if(fsip == null) 
+        throw new IOException("No image processor to read version " +
+            imageVersionFile + " is available.");
+      fsip.loadImage(in, processor, skipBlocks);
+      done = true;
+    } finally {
+      if (!done) {
+        LOG.error("image loading failed at offset " + tracker.getPos());
+      }
+      IOUtils.cleanup(LOG, in, tracker);
+    }
+  }
+
+  /**
+   * Check an fsimage datainputstream's version number.
+   *
+   * The datainput stream is returned at the same point as it was passed in;
+   * this method has no effect on the datainputstream's read pointer.
+   *
+   * @param in Datainputstream of fsimage
+   * @return Filesystem layout version of fsimage represented by stream
+   * @throws IOException If problem reading from in
+   */
+  private int findImageVersion(DataInputStream in) throws IOException {
+    in.mark(42); // arbitrary amount, resetting immediately
+
+    int version = in.readInt();
+    in.reset();
+
+    return version;
+  }
+  
+  /**
+   * Build command-line options and descriptions
+   */
+  public static Options buildOptions() {
+    Options options = new Options();
+
+    // Build in/output file arguments, which are required, but there is no 
+    // addOption method that can specify this
+    OptionBuilder.isRequired();
+    OptionBuilder.hasArgs();
+    OptionBuilder.withLongOpt("outputFile");
+    options.addOption(OptionBuilder.create("o"));
+    
+    OptionBuilder.isRequired();
+    OptionBuilder.hasArgs();
+    OptionBuilder.withLongOpt("inputFile");
+    options.addOption(OptionBuilder.create("i"));
+    
+    options.addOption("p", "processor", true, "");
+    options.addOption("h", "help", false, "");
+    options.addOption("skipBlocks", false, "");
+    options.addOption("printToScreen", false, "");
+    options.addOption("delimiter", true, "");
+
+    return options;
+  }
+  
+  /**
+   * Entry point to command-line-driven operation.  User may specify
+   * options and start fsimage viewer from the command line.  Program
+   * will process image file and exit cleanly or, if an error is
+   * encountered, inform user and exit.
+   *
+   * @param args Command line options
+   * @throws IOException 
+   */
+  public static void main(String[] args) throws IOException {
+    Options options = buildOptions();
+    if(args.length == 0) {
+      printUsage();
+      return;
+    }
+    
+    CommandLineParser parser = new PosixParser();
+    CommandLine cmd;
+
+    try {
+      cmd = parser.parse(options, args);
+    } catch (ParseException e) {
+      System.out.println("Error parsing command-line options: ");
+      printUsage();
+      return;
+    }
+
+    if(cmd.hasOption("h")) { // print help and exit
+      printUsage();
+      return;
+    }
+
+    boolean skipBlocks = cmd.hasOption("skipBlocks");
+    boolean printToScreen = cmd.hasOption("printToScreen");
+    String inputFile = cmd.getOptionValue("i");
+    String processor = cmd.getOptionValue("p", "Ls");
+    String outputFile = cmd.getOptionValue("o");
+    String delimiter = cmd.getOptionValue("delimiter");
+    
+    if( !(delimiter == null || processor.equals("Delimited")) ) {
+      System.out.println("Can only specify -delimiter with Delimited processor");
+      printUsage();
+      return;
+    }
+    
+    ImageVisitor v;
+    if(processor.equals("Indented")) {
+      v = new IndentedImageVisitor(outputFile, printToScreen);
+    } else if (processor.equals("XML")) {
+      v = new XmlImageVisitor(outputFile, printToScreen);
+    } else if (processor.equals("Delimited")) {
+      v = delimiter == null ?  
+                 new DelimitedImageVisitor(outputFile, printToScreen) :
+                 new DelimitedImageVisitor(outputFile, printToScreen, delimiter);
+      skipBlocks = false;
+    } else if (processor.equals("FileDistribution")) {
+      long maxSize = Long.parseLong(cmd.getOptionValue("maxSize", "0"));
+      int step = Integer.parseInt(cmd.getOptionValue("step", "0"));
+      v = new FileDistributionVisitor(outputFile, maxSize, step);
+    } else if (processor.equals("NameDistribution")) {
+      v = new NameDistributionVisitor(outputFile, printToScreen);
+    } else {
+      v = new LsImageVisitor(outputFile, printToScreen);
+      skipBlocks = false;
+    }
+    
+    try {
+      OfflineImageViewer d = new OfflineImageViewer(inputFile, v, skipBlocks);
+      d.go();
+    } catch (EOFException e) {
+      System.err.println("Input file ended unexpectedly.  Exiting");
+    } catch(IOException e) {
+      System.err.println("Encountered exception.  Exiting: " + e.getMessage());
+    }
+  }
+
+  /**
+   * Print application usage instructions.
+   */
+  private static void printUsage() {
+    System.out.println(usage);
+  }
+}

+ 109 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TextWriterImageVisitor.java

@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+
+import com.google.common.base.Charsets;
+
+/**
+ * TextWriterImageProcessor mixes in the ability for ImageVisitor
+ * implementations to easily write their output to a text file.
+ *
+ * Implementing classes should be sure to call the super methods for the
+ * constructors, finish and finishAbnormally methods, in order that the
+ * underlying file may be opened and closed correctly.
+ *
+ * Note, this class does not add newlines to text written to file or (if
+ * enabled) screen.  This is the implementing class' responsibility.
+ */
+abstract class TextWriterImageVisitor extends ImageVisitor {
+  private boolean printToScreen = false;
+  private boolean okToWrite = false;
+  final private OutputStreamWriter fw;
+
+  /**
+   * Create a processor that writes to the file named.
+   *
+   * @param filename Name of file to write output to
+   */
+  public TextWriterImageVisitor(String filename) throws IOException {
+    this(filename, false);
+  }
+
+  /**
+   * Create a processor that writes to the file named and may or may not
+   * also output to the screen, as specified.
+   *
+   * @param filename Name of file to write output to
+   * @param printToScreen Mirror output to screen?
+   */
+  public TextWriterImageVisitor(String filename, boolean printToScreen)
+         throws IOException {
+    super();
+    this.printToScreen = printToScreen;
+    fw = new OutputStreamWriter(new FileOutputStream(filename), Charsets.UTF_8);
+    okToWrite = true;
+  }
+  
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.hdfs.tools.offlineImageViewer.ImageVisitor#finish()
+   */
+  @Override
+  void finish() throws IOException {
+    close();
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.hdfs.tools.offlineImageViewer.ImageVisitor#finishAbnormally()
+   */
+  @Override
+  void finishAbnormally() throws IOException {
+    close();
+  }
+
+  /**
+   * Close output stream and prevent further writing
+   */
+  private void close() throws IOException {
+    fw.close();
+    okToWrite = false;
+  }
+
+  /**
+   * Write parameter to output file (and possibly screen).
+   *
+   * @param toWrite Text to write to file
+   */
+  protected void write(String toWrite) throws IOException  {
+    if(!okToWrite)
+      throw new IOException("file not open for writing.");
+
+    if(printToScreen)
+      System.out.print(toWrite);
+
+    try {
+      fw.write(toWrite);
+    } catch (IOException e) {
+      okToWrite = false;
+      throw e;
+    }
+  }
+}

+ 88 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/XmlImageVisitor.java

@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+import java.util.LinkedList;
+
+/**
+ * An XmlImageVisitor walks over an fsimage structure and writes out
+ * an equivalent XML document that contains the fsimage's components.
+ */
+public class XmlImageVisitor extends TextWriterImageVisitor {
+  final private LinkedList<ImageElement> tagQ =
+                                          new LinkedList<ImageElement>();
+
+  public XmlImageVisitor(String filename) throws IOException {
+    super(filename, false);
+  }
+
+  public XmlImageVisitor(String filename, boolean printToScreen)
+       throws IOException {
+    super(filename, printToScreen);
+  }
+
+  @Override
+  void finish() throws IOException {
+    super.finish();
+  }
+
+  @Override
+  void finishAbnormally() throws IOException {
+    write("\n<!-- Error processing image file.  Exiting -->\n");
+    super.finishAbnormally();
+  }
+
+  @Override
+  void leaveEnclosingElement() throws IOException {
+    if(tagQ.size() == 0)
+      throw new IOException("Tried to exit non-existent enclosing element " +
+                "in FSImage file");
+
+    ImageElement element = tagQ.pop();
+    write("</" + element.toString() + ">\n");
+  }
+
+  @Override
+  void start() throws IOException {
+    write("<?xml version=\"1.0\" ?>\n");
+  }
+
+  @Override
+  void visit(ImageElement element, String value) throws IOException {
+    writeTag(element.toString(), value);
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element) throws IOException {
+    write("<" + element.toString() + ">\n");
+    tagQ.push(element);
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element,
+      ImageElement key, String value)
+       throws IOException {
+    write("<" + element.toString() + " " + key + "=\"" + value +"\">\n");
+    tagQ.push(element);
+  }
+
+  private void writeTag(String tag, String value) throws IOException {
+    write("<" + tag + ">" + value + "</" + tag + ">\n");
+  }
+}

+ 43 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java

@@ -27,6 +27,7 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
@@ -43,6 +44,7 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 
+import com.google.common.io.Files;
 import org.apache.commons.cli.ParseException;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -87,7 +89,6 @@ import org.apache.hadoop.util.ExitUtil.ExitException;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.log4j.Level;
 import org.junit.After;
-import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 import org.mockito.ArgumentMatcher;
@@ -1084,7 +1085,7 @@ public class TestCheckpoint {
       
       FSDirectory secondaryFsDir = secondary.getFSNamesystem().dir;
       INode rootInMap = secondaryFsDir.getInode(secondaryFsDir.rootDir.getId());
-      Assert.assertSame(rootInMap, secondaryFsDir.rootDir);
+      assertSame(rootInMap, secondaryFsDir.rootDir);
       
       fileSys.delete(tmpDir, true);
       fileSys.mkdirs(tmpDir);
@@ -2404,6 +2405,46 @@ public class TestCheckpoint {
     }
   }
 
+  @Test
+  public void testLegacyOivImage() throws Exception {
+    MiniDFSCluster cluster = null;
+    SecondaryNameNode secondary = null;
+    File tmpDir = Files.createTempDir();
+    Configuration conf = new HdfsConfiguration();
+    conf.set(DFSConfigKeys.DFS_NAMENODE_LEGACY_OIV_IMAGE_DIR_KEY,
+        tmpDir.getAbsolutePath());
+    conf.set(DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY,
+        "2");
+
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
+              .format(true).build();
+
+      secondary = startSecondaryNameNode(conf);
+
+      // Checkpoint once
+      secondary.doCheckpoint();
+      String files1[] = tmpDir.list();
+      assertEquals("Only one file is expected", 1, files1.length);
+
+      // Perform more checkpointngs and check whether retention management
+      // is working.
+      secondary.doCheckpoint();
+      secondary.doCheckpoint();
+      String files2[] = tmpDir.list();
+      assertEquals("Two files are expected", 2, files2.length);
+
+      // Verify that the first file is deleted.
+      for (String fName : files2) {
+        assertFalse(fName.equals(files1[0]));
+      }
+    } finally {
+      cleanup(secondary);
+      cleanup(cluster);
+      tmpDir.delete();
+    }
+  }
+
   private static void cleanup(SecondaryNameNode snn) {
     if (snn != null) {
       try {

+ 9 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java

@@ -66,24 +66,28 @@ import com.google.common.base.Supplier;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Lists;
-
+import com.google.common.io.Files;
 
 public class TestStandbyCheckpoints {
   private static final int NUM_DIRS_IN_LOG = 200000;
   protected MiniDFSCluster cluster;
   protected NameNode nn0, nn1;
   protected FileSystem fs;
+  protected File tmpOivImgDir;
   
   private static final Log LOG = LogFactory.getLog(TestStandbyCheckpoints.class);
 
   @SuppressWarnings("rawtypes")
   @Before
   public void setupCluster() throws Exception {
+    tmpOivImgDir = Files.createTempDir();
     Configuration conf = new Configuration();
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, 1);
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 5);
     conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
-    
+    conf.set(DFSConfigKeys.DFS_NAMENODE_LEGACY_OIV_IMAGE_DIR_KEY,
+        tmpOivImgDir.getAbsolutePath());
+
     // Dial down the retention of extra edits and checkpoints. This is to
     // help catch regressions of HDFS-4238 (SBN should not purge shared edits)
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY, 1);
@@ -129,6 +133,9 @@ public class TestStandbyCheckpoints {
     // Once the standby catches up, it should notice that it needs to
     // do a checkpoint and save one to its local directories.
     HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(12));
+
+    // It should have saved the oiv image too.
+    assertEquals("One file is expected", 1, tmpOivImgDir.list().length);
     
     // It should also upload it back to the active.
     HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(12));